ftol2-vcc.asm

最後變更在這個檔案是 106510,由 vboxsync 提交於 5 月前
Runtime/ftol2-vcc.asm: Added ftoul2_legacy, ftoul2 and ftoui2 for the various /fpcvt+/arch modes of the 2022 compiler. jiraref:VBP-1171
屬性 svn:eol-style 設為 `native` 屬性 svn:keywords 設為 `Author Date Id Revision`
檔案大小: 9.4 KB

行
1	; $Id: ftol2-vcc.asm 106510 2024-10-20 00:47:58Z vboxsync $
2	;; @file
3	; IPRT - Floating Point to Integer related Visual C++ support routines.
4	;
5
6	;
7	; Copyright (C) 2022-2024 Oracle and/or its affiliates.
8	;
9	; This file is part of VirtualBox base platform packages, as
10	; available from https://www.alldomusa.eu.org.
11	;
12	; This program is free software; you can redistribute it and/or
13	; modify it under the terms of the GNU General Public License
14	; as published by the Free Software Foundation, in version 3 of the
15	; License.
16	;
17	; This program is distributed in the hope that it will be useful, but
18	; WITHOUT ANY WARRANTY; without even the implied warranty of
19	; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20	; General Public License for more details.
21	;
22	; You should have received a copy of the GNU General Public License
23	; along with this program; if not, see <https://www.gnu.org/licenses>.
24	;
25	; The contents of this file may alternatively be used under the terms
26	; of the Common Development and Distribution License Version 1.0
27	; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
28	; in the VirtualBox distribution, in which case the provisions of the
29	; CDDL are applicable instead of those of the GPL.
30	;
31	; You may elect to license modified versions of this file under the
32	; terms and conditions of either the GPL or the CDDL or both.
33	;
34	; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
35	;
36
37
38	%include "iprt/asmdefs.mac"
39	%include "iprt/x86.mac"
40
41
42	%define RTFLOAT80U_EXP_BIAS (16383)
43
44
45	;;
46	; Convert st0 to integer returning it in eax, popping st0.
47	;
48	; @returns value in eax
49	; @param st0 The value to convert. Will be popped.
50	; @uses eax, st0, FSW.TOP, FSW.exception
51	;
52	GLOBALNAME_RAW __ftol2_sse_excpt, function, RT_NOTHING
53	GLOBALNAME_RAW __ftol2_sse, function, RT_NOTHING ;; @todo kind of expect __ftol2_sse to take input in xmm0 and return in edx:eax.
54	BEGINPROC_RAW __ftoi2
55	push ebp
56	mov ebp, esp
57	sub esp, 8h
58	fisttp dword [esp]
59	mov eax, [esp]
60	leave
61	ret
62	ENDPROC_RAW __ftoi2
63
64
65	;;
66	; Convert st0 to integer returning it in edx:eax, popping st0.
67	;
68	; @returns value in edx:eax
69	; @param st0 The value to convert. Will be popped.
70	; @uses eax, edx, st0, FSW.TOP, FSW.exception
71	;
72	BEGINPROC_RAW __ftol2
73	push ebp
74	mov ebp, esp
75	sub esp, 8h
76	and esp, 0fffffff8h ; proper alignment.
77	fisttp qword [esp]
78	mov eax, [esp]
79	mov edx, [esp + 4]
80	leave
81	ret
82	ENDPROC_RAW __ftol2
83
84
85	;;
86	; Convert st0 to an unsigned integer returning it in edx:eax, popping st0.
87	;
88	; Used when /fpcvt:IA is given together with /ARCH:IA32 or /ARCH:SSE.
89	;
90	; @returns value in edx:eax
91	; @param st0 The value to convert. Will be popped.
92	; @uses eax, edx, st0, FSW.TOP
93	;
94	BEGINPROC_RAW __ftoui2
95	push ebp
96	mov ebp, esp
97
98	; Proper stack alignment for a qword store (see note below).
99	sub esp, 8h
100	and esp, 0fffffff8h ; proper alignment.
101
102	; Check if the value is unordered or negative.
103	fldz
104	fucomip st0, st1
105	jp .unordered ; PF=1 only when unordered.
106	ja .negative ; jumps if ZF=0 & CF=0, i.e. if zero > value.
107
108	; Check if the value is too large for uint32_t.
109	fld dword [g_r32TwoToThePowerOf32]
110	fucomip st0, st1 ; if 1*2^32 <= value
111	jbe .too_large ; then jmp
112
113	;
114	; The value is unproblematic, so just convert and return it.
115	;
116	; Note! We do a 64-bit conversion here, not a 32-bit. This helps with
117	; values at or above 2**31.
118	;
119	fisttp qword [esp] ; Raise exceptions as appropriate, pop ST0.
120	mov eax, [esp]
121	.return:
122	leave
123	ret
124
125	;
126	; Negative value.
127	;
128	.negative:
129	; If the value is -1.0 or smaller, treat it as unordered.
130	fabs
131	fld1
132	fucomip st0, st1 ; if 1.0 <= abs(value)
133	jbe .unordered ; then jmp (jbe = jmp if ZF or/and CF set)
134
135	; Values between -1.0 and 0 (both exclusively) are truncated to zero.
136	fisttp dword [esp] ; Raise exceptions as appropriate, pop ST0.
137	xor eax, eax
138	jmp .return
139
140	; Return MAX after maybe raising an exception.
141	.unordered:
142	.too_large:
143	fcomp dword [g_r32QNaN] ; Set C0-3, raise exceptions as appropriate, pop ST0.
144	mov eax, -1
145	jmp .return
146	ENDPROC_RAW __ftoui2
147
148
149	;;
150	; Convert st0 to an unsigned long integer returning it in edx:eax, popping st0.
151	;
152	; Used when /fpcvt:IA is given together with /ARCH:IA32 or /ARCH:SSE.
153	;
154	; @returns value in edx:eax
155	; @param st0 The value to convert. Will be popped.
156	; @uses eax, edx, st0, FSW.TOP
157	;
158	BEGINPROC_RAW __ftoul2
159	push ebp
160	mov ebp, esp
161
162	; We may need to store a RTFLOAT80 and uint64_t, so make sure the stack is 16 byte aligned.
163	sub esp, 16h
164	and esp, 0fffffff0h
165
166	; Check if the value is unordered or negative.
167	fldz
168	fucomip st0, st1
169	jp .unordered ; PF=1 only when unordered.
170	ja .negative ; jumps if ZF=0 & CF=0, i.e. if zero > value.
171
172	; Check if the value is in signed 64-bit integer range, i.e. less than 1.0*2^63.
173	fld dword [g_r32TwoToThePowerOf63]
174	fucomip st0, st1 ; if 1*2^63 <= value
175	jbe .larger_than_or_equal_to_2_to_the_power_of_63 ; then jmp;
176
177	;
178	; The value is unproblematic for 64-bit signed conversion, so just convert and return it.
179	;
180	fisttp qword [esp] ; Raise exceptions as appropriate, pop ST0.
181	mov edx, [esp + 4]
182	mov eax, [esp]
183	.return:
184	leave
185	ret
186
187	;
188	; We've got a value that so large that fisttp can't handle it, however
189	; it may still be covertable to uint64_t, iff the exponent is 63.
190	;
191	.larger_than_or_equal_to_2_to_the_power_of_63:
192
193	; Save the value on the stack so we can examine it in it's full 80-bit format.
194	fld st0
195	fstp tword [esp] ; RTFLOAT80U
196	movzx eax, word [esp + 8] ; The exponent and (zero) sign value.
197
198	%ifdef RT_STRICT
199	; Negative numbers shall not end up here, we checked for that in the zero compare above!
200	bt eax, 15
201	jnc .sign_clear_as_expected
202	int3
203	.sign_clear_as_expected:
204	; The exponent shall not be less than 63, we checked for that in the g_r32TwoToThePowerOf63 compare above!
205	cmp eax, RTFLOAT80U_EXP_BIAS+63
206	jae .exp_not_below_63_as_expected
207	int3
208	.exp_not_below_63_as_expected:
209	%endif
210
211	; Check that the exponent is 63, because if it isn't it must be higher
212	; and out of range for a conversion to uint64_t.
213	cmp eax, RTFLOAT80U_EXP_BIAS+63
214	jne .too_large
215
216	; Check for unnormal values just to be on the safe side.
217	mov edx, [esp + 4]
218	bt edx, 31 ; Check if the most significant bit in the mantissa is zero.
219	jnc .unnormal
220
221	; Load the rest of the value.
222	mov eax, [esp]
223	frndint ; Clear C1 & raising exceptions as appropriate.
224	ffreep st0
225	jmp .return
226
227	;
228	; Negative value.
229	;
230	.negative:
231	; If the value is -1.0 or smaller, treat it as unordered.
232	fabs
233	fld1
234	fucomip st0, st1 ; if 1.0 <= abs(value)
235	jbe .unordered ; then jmp (jbe = jmp if ZF or/and CF set)
236
237	; Values between -1.0 and 0 (both exclusively) are truncated to zero.
238	fisttp qword [esp] ; Raise exceptions as appropriate, pop ST0.
239	xor edx, edx
240	xor eax, eax
241	jmp .return
242
243	;
244	; Unordered or a value in the (-1.0, 0) range.
245	; Return -1 after popping ST0.
246	;
247	.unordered:
248	.unnormal:
249	.too_large:
250	fcomp dword [g_r32QNaN] ; Set C0-3, raise exceptions as appropriate, pop ST0.
251	mov edx, -1
252	mov eax, -1
253	jmp .return
254	ENDPROC_RAW __ftoul2
255
256
257	;;
258	; Convert st0 to unsigned integer returning it in edx:eax, popping st0.
259	;
260	; This is new with VC 14.3 / 2022 and changes how extreme values are handled.
261	;
262	; This is used for /ARCH:IA32 & /ARCH:SSE, the compiler generates inline conversion
263	; code for /ARCH:SSE2 it seems. If either of those two are used with /fpcvt:IA, the
264	; __ftoul2 is used instead.
265	;
266	; @returns value in edx:eax
267	; @param st0 The value to convert. Will be popped.
268	; @uses eax, edx, st0, FSW.TOP
269	;
270	BEGINPROC_RAW __ftoul2_legacy
271	sub esp, 4
272	fst dword [esp]
273	pop edx
274	cmp edx, 0x5f800000 ; RTFLOAT32U
275	jae __ftol2 ; Jump if exponent >= 64 or the value is signed.
276	jmp __ftoul2
277	ENDPROC_RAW __ftoul2_legacy
278
279
280	;
281	; Constants.
282	;
283	ALIGNCODE(4)
284	g_r32TwoToThePowerOf32:
285	dd 0x4f800000 ; 1.0*2^32
286	g_r32TwoToThePowerOf63:
287	dd 0x5f000000 ; 1.0*2^63
288	g_r32QNaN:
289	dd 0xffc00000 ; Quite negative NaN (RTFLOAT32U_INIT_QNAN)
290

注意: 瀏覽 TracBrowser 來幫助您使用儲存庫瀏覽器

source: vbox/trunk/src/VBox/Runtime/common/compiler/vcc/ftol2-vcc.asm

以其他格式下載: