sinf.asm

Last change on this file was 106061, checked in by vboxsync, 8 weeks ago
Copyright year updates by scm.
Property svn:eol-style set to `native` Property svn:keywords set to `Author Date Id Revision`
File size: 5.7 KB

Line
1	; $Id: sinf.asm 106061 2024-09-16 14:03:52Z vboxsync $
2	;; @file
3	; IPRT - No-CRT sinf - AMD64 & X86.
4	;
5
6	;
7	; Copyright (C) 2006-2024 Oracle and/or its affiliates.
8	;
9	; This file is part of VirtualBox base platform packages, as
10	; available from https://www.virtualbox.org.
11	;
12	; This program is free software; you can redistribute it and/or
13	; modify it under the terms of the GNU General Public License
14	; as published by the Free Software Foundation, in version 3 of the
15	; License.
16	;
17	; This program is distributed in the hope that it will be useful, but
18	; WITHOUT ANY WARRANTY; without even the implied warranty of
19	; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20	; General Public License for more details.
21	;
22	; You should have received a copy of the GNU General Public License
23	; along with this program; if not, see <https://www.gnu.org/licenses>.
24	;
25	; The contents of this file may alternatively be used under the terms
26	; of the Common Development and Distribution License Version 1.0
27	; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
28	; in the VirtualBox distribution, in which case the provisions of the
29	; CDDL are applicable instead of those of the GPL.
30	;
31	; You may elect to license modified versions of this file under the
32	; terms and conditions of either the GPL or the CDDL or both.
33	;
34	; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
35	;
36
37
38	%define RT_ASM_WITH_SEH64
39	%include "iprt/asmdefs.mac"
40	%include "iprt/x86.mac"
41
42
43	BEGINCODE
44
45
46	;;
47	; Compute the sine of rd, measured in radians.
48	;
49	; @returns st(0) / xmm0
50	; @param rd [rbp + xCB*2] / xmm0
51	;
52	RT_NOCRT_BEGINPROC sinf
53	push xBP
54	SEH64_PUSH_xBP
55	mov xBP, xSP
56	SEH64_SET_FRAME_xBP 0
57	sub xSP, 20h
58	SEH64_ALLOCATE_STACK 20h
59	SEH64_END_PROLOGUE
60
61	%ifdef RT_OS_WINDOWS
62	;
63	; Make sure we use full precision and not the windows default of 53 bits.
64	;
65	fnstcw [xBP - 20h]
66	mov ax, [xBP - 20h]
67	or ax, X86_FCW_PC_64 ; includes both bits, so no need to clear the mask.
68	mov [xBP - 1ch], ax
69	fldcw [xBP - 1ch]
70	%endif
71
72	;
73	; Load the input into st0.
74	;
75	%ifdef RT_ARCH_AMD64
76	movss [xBP - 10h], xmm0
77	fld dword [xBP - 10h]
78	%else
79	fld dword [xBP + xCB*2]
80	%endif
81
82	;
83	; We examin the input and weed out non-finit numbers first.
84	;
85	fxam
86	fnstsw ax
87	and ax, X86_FSW_C3 \| X86_FSW_C2 \| X86_FSW_C0
88	cmp ax, X86_FSW_C2 ; Normal finite number (excluding zero)
89	je .finite
90	cmp ax, X86_FSW_C3 ; Zero
91	je .zero
92	cmp ax, X86_FSW_C3 \| X86_FSW_C2 ; Denormals - treat them as zero.
93	je .zero
94	cmp ax, X86_FSW_C0 ; NaN - must handle it special,
95	je .nan
96
97	; Pass infinities and unsupported inputs to fsin, assuming it does the right thing.
98	.do_sin:
99	fsin
100	jmp .return_val
101
102	;
103	; Finite number.
104	;
105	.finite:
106	; For very tiny numbers, 0 < abs(input) < 2**-25, we can return the
107	; input value directly.
108	fld st0 ; duplicate st0
109	fabs ; make it an absolute (positive) value.
110	fld qword [.s_r64Tiny xWrtRIP]
111	fcomip st1 ; compare s_r64Tiny and fabs(input)
112	ja .return_tiny_number_as_is ; jump if fabs(input) is smaller
113
114	; FSIN is documented to be reasonable for the range ]-3pi/4,3pi/4[, so
115	; while we have fabs(input) loaded already, check for that here and
116	; allow rtNoCrtMathSinCore to assume it won't see values very close to
117	; zero, except by cos -> sin conversion where they won't be relevant to
118	; any assumpttions about precision approximation.
119	fld qword [.s_r64FSinOkay xWrtRIP]
120	fcomip st1
121	ffreep st0 ; drop the fabs(input) value
122	ja .do_sin
123
124	;
125	; Call common sine/cos worker.
126	;
127	mov ecx, 0 ; float
128	extern NAME(rtNoCrtMathSinCore)
129	call NAME(rtNoCrtMathSinCore)
130
131	;
132	; Run st0.
133	;
134	.return_val:
135	%ifdef RT_ARCH_AMD64
136	fstp dword [xBP - 10h]
137	movss xmm0, [xBP - 10h]
138	%endif
139	%ifdef RT_OS_WINDOWS
140	fldcw [xBP - 20h] ; restore original
141	%endif
142	.return:
143	leave
144	ret
145
146	;
147	; As explained already, we can return tiny numbers directly too as the
148	; output from sinf(input) = input given our precision.
149	; We can skip the st0 -> xmm0 translation here, so follow the same path
150	; as .zero & .nan, after we've removed the fabs(input) value.
151	;
152	.return_tiny_number_as_is:
153	ffreep st0
154
155	;
156	; sinf(+/-0.0) = +/-0.0 (preserve the sign)
157	; We can skip the st0 -> xmm0 translation here, so follow the .nan code path.
158	;
159	.zero:
160
161	;
162	; Input is NaN, output it unmodified as far as we can (FLD changes SNaN
163	; to QNaN when masked).
164	;
165	.nan:
166	%ifdef RT_ARCH_AMD64
167	ffreep st0
168	%endif
169	jmp .return
170
171	ALIGNCODE(8)
172	; Ca. 2**-26, absolute value. Inputs closer to zero than this can be
173	; returns directly as the sinf(input) value should be basically the same
174	; given the precision we're working with and FSIN probably won't even
175	; manage that.
176	;; @todo experiment when FSIN gets better than this.
177	.s_r64Tiny:
178	dq 1.49011612e-8
179	; The absolute limit of FSIN "good" range.
180	.s_r64FSinOkay:
181	dq 2.356194490192344928845 ; 3pi/4
182	;dq 1.57079632679489661923 ; pi/2 - alternative.
183
184	ENDPROC RT_NOCRT(sinf)
185

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/src/VBox/Runtime/common/math/sinf.asm

Download in other formats: