1 | ; $Id: ASMMultU64ByU32DivByU32.asm 106061 2024-09-16 14:03:52Z vboxsync $
|
---|
2 | ;; @file
|
---|
3 | ; IPRT - Assembly Functions, ASMMultU64ByU32DivByU32.
|
---|
4 | ;
|
---|
5 |
|
---|
6 | ;
|
---|
7 | ; Copyright (C) 2006-2024 Oracle and/or its affiliates.
|
---|
8 | ;
|
---|
9 | ; This file is part of VirtualBox base platform packages, as
|
---|
10 | ; available from https://www.virtualbox.org.
|
---|
11 | ;
|
---|
12 | ; This program is free software; you can redistribute it and/or
|
---|
13 | ; modify it under the terms of the GNU General Public License
|
---|
14 | ; as published by the Free Software Foundation, in version 3 of the
|
---|
15 | ; License.
|
---|
16 | ;
|
---|
17 | ; This program is distributed in the hope that it will be useful, but
|
---|
18 | ; WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
19 | ; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
---|
20 | ; General Public License for more details.
|
---|
21 | ;
|
---|
22 | ; You should have received a copy of the GNU General Public License
|
---|
23 | ; along with this program; if not, see <https://www.gnu.org/licenses>.
|
---|
24 | ;
|
---|
25 | ; The contents of this file may alternatively be used under the terms
|
---|
26 | ; of the Common Development and Distribution License Version 1.0
|
---|
27 | ; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
|
---|
28 | ; in the VirtualBox distribution, in which case the provisions of the
|
---|
29 | ; CDDL are applicable instead of those of the GPL.
|
---|
30 | ;
|
---|
31 | ; You may elect to license modified versions of this file under the
|
---|
32 | ; terms and conditions of either the GPL or the CDDL or both.
|
---|
33 | ;
|
---|
34 | ; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
|
---|
35 | ;
|
---|
36 |
|
---|
37 | %include "iprt/asmdefs.mac"
|
---|
38 |
|
---|
39 |
|
---|
40 | ;;
|
---|
41 | ; Multiple a 64-bit by a 32-bit integer and divide the result by a 32-bit integer
|
---|
42 | ; using a 96 bit intermediate result.
|
---|
43 | ;
|
---|
44 | ; @returns (u64A * u32B) / u32C.
|
---|
45 | ; @param u64A/rcx/rdi The 64-bit value.
|
---|
46 | ; @param u32B/edx/esi The 32-bit value to multiple by A.
|
---|
47 | ; @param u32C/r8d/edx The 32-bit value to divide A*B by.
|
---|
48 | ;
|
---|
49 | ; @cproto DECLASM(uint64_t) ASMMultU64ByU32DivByU32(uint64_t u64A, uint32_t u32B, uint32_t u32C);
|
---|
50 | ;
|
---|
51 | RT_BEGINPROC ASMMultU64ByU32DivByU32
|
---|
52 | %if ARCH_BITS == 64
|
---|
53 |
|
---|
54 | %ifdef ASM_CALL64_MSC
|
---|
55 | mov rax, rcx ; rax = u64A
|
---|
56 | mov r9d, edx ; should check the specs wrt to the high bits one day...
|
---|
57 | mov r8d, r8d ; be paranoid for the time being.
|
---|
58 | %else
|
---|
59 | mov rax, rdi ; rax = u64A
|
---|
60 | mov r9d, esi ; r9d = u32B
|
---|
61 | mov r8d, edx ; r8d = u32C
|
---|
62 | %endif
|
---|
63 | mul r9
|
---|
64 | div r8
|
---|
65 |
|
---|
66 | %else ; 16 or 32 bit
|
---|
67 | ;
|
---|
68 | ; This implementation is converted from the GCC inline
|
---|
69 | ; version of the code. Nothing additional has been done
|
---|
70 | ; performance wise.
|
---|
71 | ;
|
---|
72 | %if ARCH_BITS == 16
|
---|
73 | push bp
|
---|
74 | mov bp, sp
|
---|
75 | push eax ; push all return registers to preserve high value (paranoia)
|
---|
76 | push ebx
|
---|
77 | push ecx
|
---|
78 | push edx
|
---|
79 | %endif
|
---|
80 | push esi
|
---|
81 | push edi
|
---|
82 |
|
---|
83 | %if ARCH_BITS == 16
|
---|
84 | %define u64A_Lo [bp + 4 + 04h]
|
---|
85 | %define u64A_Hi [bp + 4 + 08h]
|
---|
86 | %define u32B [bp + 4 + 0ch]
|
---|
87 | %define u32C [bp + 4 + 10h]
|
---|
88 | %else
|
---|
89 | %define u64A_Lo [esp + 04h + 08h]
|
---|
90 | %define u64A_Hi [esp + 08h + 08h]
|
---|
91 | %define u32B [esp + 0ch + 08h]
|
---|
92 | %define u32C [esp + 10h + 08h]
|
---|
93 | %endif
|
---|
94 |
|
---|
95 | ; Load parameters into registers.
|
---|
96 | mov eax, u64A_Lo
|
---|
97 | mov esi, u64A_Hi
|
---|
98 | mov ecx, u32B
|
---|
99 | mov edi, u32C
|
---|
100 |
|
---|
101 | ; The body, just like the in
|
---|
102 | mul ecx ; eax = u64Lo.lo = (u64A.lo * u32B).lo
|
---|
103 | ; edx = u64Lo.hi = (u64A.lo * u32B).hi
|
---|
104 | xchg eax, esi ; esi = u64Lo.lo
|
---|
105 | ; eax = u64A.hi
|
---|
106 | xchg edx, edi ; edi = u64Low.hi
|
---|
107 | ; edx = u32C
|
---|
108 | xchg edx, ecx ; ecx = u32C
|
---|
109 | ; edx = u32B
|
---|
110 | mul edx ; eax = u64Hi.lo = (u64A.hi * u32B).lo
|
---|
111 | ; edx = u64Hi.hi = (u64A.hi * u32B).hi
|
---|
112 | add eax, edi ; u64Hi.lo += u64Lo.hi
|
---|
113 | adc edx, 0 ; u64Hi.hi += carry
|
---|
114 | div ecx ; eax = u64Hi / u32C
|
---|
115 | ; edx = u64Hi % u32C
|
---|
116 | mov edi, eax ; edi = u64Result.hi = u64Hi / u32C
|
---|
117 | mov eax, esi ; eax = u64Lo.lo
|
---|
118 | div ecx ; u64Result.lo
|
---|
119 | mov edx, edi ; u64Result.hi
|
---|
120 |
|
---|
121 | ; epilogue
|
---|
122 | pop edi
|
---|
123 | pop esi
|
---|
124 | %if ARCH_BITS == 16
|
---|
125 | ; DX:CX:BX:AX, where DX holds bits 15:0, CX bits 31:16, BX bits 47:32, and AX bits 63:48.
|
---|
126 | mov ax, [bp - 4*4] ; dx = bits 15:0
|
---|
127 | shr eax, 16
|
---|
128 | mov ax, [bp - 3*4] ; cx = bits 31:16
|
---|
129 | mov dx, [bp - 2*4] ; bx = bits 47:32
|
---|
130 | shr edx, 16
|
---|
131 | mov dx, [bp - 1*4] ; ax = bits 63:48
|
---|
132 | pop edx
|
---|
133 | pop ecx
|
---|
134 | pop ebx
|
---|
135 | pop eax
|
---|
136 | leave
|
---|
137 | %endif
|
---|
138 | %endif
|
---|
139 | ret
|
---|
140 | ENDPROC ASMMultU64ByU32DivByU32
|
---|
141 |
|
---|