1 | ; $Id: x86-aulldvrm.asm 98508 2023-02-08 15:31:06Z vboxsync $
|
---|
2 | ;; @file
|
---|
3 | ; IPRT - Visual C++ Compiler - unsigned 64-bit division support, x86.
|
---|
4 | ;
|
---|
5 |
|
---|
6 | ;
|
---|
7 | ; Copyright (C) 2023 Oracle and/or its affiliates.
|
---|
8 | ;
|
---|
9 | ; This file is part of VirtualBox base platform packages, as
|
---|
10 | ; available from https://www.alldomusa.eu.org.
|
---|
11 | ;
|
---|
12 | ; This program is free software; you can redistribute it and/or
|
---|
13 | ; modify it under the terms of the GNU General Public License
|
---|
14 | ; as published by the Free Software Foundation, in version 3 of the
|
---|
15 | ; License.
|
---|
16 | ;
|
---|
17 | ; This program is distributed in the hope that it will be useful, but
|
---|
18 | ; WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
19 | ; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
---|
20 | ; General Public License for more details.
|
---|
21 | ;
|
---|
22 | ; You should have received a copy of the GNU General Public License
|
---|
23 | ; along with this program; if not, see <https://www.gnu.org/licenses>.
|
---|
24 | ;
|
---|
25 | ; The contents of this file may alternatively be used under the terms
|
---|
26 | ; of the Common Development and Distribution License Version 1.0
|
---|
27 | ; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
|
---|
28 | ; in the VirtualBox distribution, in which case the provisions of the
|
---|
29 | ; CDDL are applicable instead of those of the GPL.
|
---|
30 | ;
|
---|
31 | ; You may elect to license modified versions of this file under the
|
---|
32 | ; terms and conditions of either the GPL or the CDDL or both.
|
---|
33 | ;
|
---|
34 | ; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
|
---|
35 | ;
|
---|
36 |
|
---|
37 |
|
---|
38 | ;*********************************************************************************************************************************
|
---|
39 | ;* Header Files *
|
---|
40 | ;*********************************************************************************************************************************
|
---|
41 | %include "iprt/asmdefs.mac"
|
---|
42 |
|
---|
43 |
|
---|
44 | ;*********************************************************************************************************************************
|
---|
45 | ;* External Symbols *
|
---|
46 | ;*********************************************************************************************************************************
|
---|
47 | extern NAME(RTVccUInt64Div)
|
---|
48 |
|
---|
49 |
|
---|
50 | ;;
|
---|
51 | ; Division of unsigned 64-bit values, returning both quotient and remainder.
|
---|
52 | ;
|
---|
53 | ; @returns Quotient in edx:eax, remainder in ebx:ecx.
|
---|
54 | ; @param [ebp+08h] Dividend (64-bit)
|
---|
55 | ; @param [ebp+10h] Divisor (64-bit)
|
---|
56 | ;
|
---|
57 | ; @note The remainder registers are swapped compared to Watcom's I8D and U8D.
|
---|
58 | ;
|
---|
59 | BEGINPROC_RAW __aulldvrm
|
---|
60 | push ebp
|
---|
61 | mov ebp, esp
|
---|
62 |
|
---|
63 | %define DIVIDEND_LO ebp + 08h
|
---|
64 | %define DIVIDEND_HI ebp + 0ch
|
---|
65 | %define DIVISOR_LO ebp + 10h
|
---|
66 | %define DIVISOR_HI ebp + 14h
|
---|
67 |
|
---|
68 | ;
|
---|
69 | ; If the divisor is only 32-bit wide as we can do a two-step division on 32-bit units.
|
---|
70 | ;
|
---|
71 | mov ebx, [DIVISOR_HI]
|
---|
72 | or ebx, ebx
|
---|
73 | jnz .full_64_bit_divisor
|
---|
74 |
|
---|
75 | ; step 1: dividend_hi / divisor
|
---|
76 | mov ebx, [DIVISOR_LO]
|
---|
77 | mov eax, [DIVIDEND_HI]
|
---|
78 | xor edx, edx
|
---|
79 | div ebx
|
---|
80 | mov ecx, eax ; high quotient bits.
|
---|
81 |
|
---|
82 | ; step 2: (dividend_lo + step_1_remainder) / divisor
|
---|
83 | mov eax, [DIVIDEND_LO] ; edx contains the remainder from the first step.
|
---|
84 | div ebx ; -> eax = low quotient, edx = remainder.
|
---|
85 |
|
---|
86 | xchg edx, ecx ; ecx = (low) remainder, edx = saved high quotient from step 1
|
---|
87 | xor ebx, ebx ; ebx = high remainder is zero, since divisor is 32-bit.
|
---|
88 |
|
---|
89 | leave
|
---|
90 | ret 10h
|
---|
91 |
|
---|
92 | %if 1
|
---|
93 | ;
|
---|
94 | ; The divisor is larger than 32 bits.
|
---|
95 | ;
|
---|
96 | ; We can approximate the quotient by reducing the divisor to 32 bits
|
---|
97 | ; (reducing the dividend accordingly) and perform a 32-bit division.
|
---|
98 | ; The result will be at most one off.
|
---|
99 | ;
|
---|
100 | ; The remainder has to be calculated using multiplication and
|
---|
101 | ; subtraction.
|
---|
102 | ;
|
---|
103 | .full_64_bit_divisor:
|
---|
104 | push edi
|
---|
105 |
|
---|
106 | ; Find the shift count needed to reduce the divisor to 32-bit.
|
---|
107 | bsr ecx, ebx
|
---|
108 | inc cl
|
---|
109 | test cl, ~31
|
---|
110 | jnz .shift_32
|
---|
111 |
|
---|
112 | ; Shift the divisor into edi.
|
---|
113 | mov edi, [DIVISOR_LO]
|
---|
114 | shrd edi, ebx, cl ; edi = reduced divisor
|
---|
115 |
|
---|
116 | ; Shift the dividend into edx:eax.
|
---|
117 | mov eax, [DIVIDEND_LO]
|
---|
118 | mov edx, [DIVIDEND_HI]
|
---|
119 | shrd eax, edx, cl
|
---|
120 | shr edx, cl
|
---|
121 | jmp .shifted
|
---|
122 |
|
---|
123 | .shift_32: ; simplified version.
|
---|
124 | mov edi, ebx
|
---|
125 | mov eax, [DIVIDEND_HI]
|
---|
126 | xor edx, edx
|
---|
127 | .shifted:
|
---|
128 |
|
---|
129 | ; Divide and save the approximate quotient (Qapprox) in edi.
|
---|
130 | div edi
|
---|
131 | mov edi, eax ; edi = Qapprox
|
---|
132 |
|
---|
133 | ; Now multiply Qapprox with the divisor.
|
---|
134 | mul dword [DIVISOR_HI]
|
---|
135 | mov ecx, eax ; temporary storage
|
---|
136 | mov eax, [DIVISOR_LO]
|
---|
137 | mul edi
|
---|
138 | add edx, ecx ; edx:eax = QapproxDividend = Qapprox * divisor
|
---|
139 |
|
---|
140 | ; Preload the dividend into ebx:ecx for remainder calculation and for adjusting Qapprox.
|
---|
141 | mov ecx, [DIVIDEND_LO]
|
---|
142 | mov ebx, [DIVIDEND_HI]
|
---|
143 |
|
---|
144 | ; If carry is set, the result overflowed 64 bits, so the quotient must be too large.
|
---|
145 | jc .quotient_is_one_above_and_calc_remainder
|
---|
146 |
|
---|
147 | ; Calculate the remainder, if this overflows (CF) it means Qapprox is
|
---|
148 | ; one above and we need to reduce it and the adjust the remainder.
|
---|
149 | sub ecx, eax
|
---|
150 | sbb ebx, edx
|
---|
151 | jc .quotient_is_one_above
|
---|
152 | .done:
|
---|
153 | mov eax, edi
|
---|
154 | xor edx, edx
|
---|
155 |
|
---|
156 | pop edi
|
---|
157 | leave
|
---|
158 | ret 10h
|
---|
159 |
|
---|
160 | .quotient_is_one_above_and_calc_remainder:
|
---|
161 | sub ecx, eax
|
---|
162 | sbb ebx, edx
|
---|
163 | .quotient_is_one_above:
|
---|
164 | add ecx, [DIVISOR_LO]
|
---|
165 | adc ebx, [DIVISOR_HI]
|
---|
166 | dec edi
|
---|
167 | jmp .done
|
---|
168 |
|
---|
169 | %else
|
---|
170 | ;
|
---|
171 | ; Fall back on a rather slow C implementation.
|
---|
172 | ;
|
---|
173 | .full_64_bit_divisor:
|
---|
174 | ; Call RTVccUInt64Div(RTUINT64U const *paDividendDivisor, RTUINT64U *paQuotientRemainder)
|
---|
175 | sub esp, 10h ; space for quotient and remainder.
|
---|
176 | mov edx, esp
|
---|
177 | push edx
|
---|
178 | lea ecx, [ebp + 8]
|
---|
179 | push ecx
|
---|
180 | call NAME(RTVccUInt64Div)
|
---|
181 |
|
---|
182 | ; Load the result.
|
---|
183 | mov eax, [ebp - 10h]
|
---|
184 | mov edx, [ebp - 10h + 4]
|
---|
185 |
|
---|
186 | mov ecx, [ebp - 08h]
|
---|
187 | mov ebx, [ebp - 08h + 4]
|
---|
188 | leave
|
---|
189 | ret 10h
|
---|
190 |
|
---|
191 | %endif
|
---|
192 | ENDPROC_RAW __aulldvrm
|
---|
193 |
|
---|