cos.asm@ 100765

Last change on this file since 100765 was 98103, checked in by vboxsync, 2 years ago
Copyright year updates by scm.
Property svn:eol-style set to `native` Property svn:keywords set to `Author Date Id Revision`
File size: 6.1 KB

Line
1	; $Id: cos.asm 98103 2023-01-17 14:15:46Z vboxsync $
2	;; @file
3	; IPRT - No-CRT cos - AMD64 & X86.
4	;
5
6	;
7	; Copyright (C) 2006-2023 Oracle and/or its affiliates.
8	;
9	; This file is part of VirtualBox base platform packages, as
10	; available from https://www.virtualbox.org.
11	;
12	; This program is free software; you can redistribute it and/or
13	; modify it under the terms of the GNU General Public License
14	; as published by the Free Software Foundation, in version 3 of the
15	; License.
16	;
17	; This program is distributed in the hope that it will be useful, but
18	; WITHOUT ANY WARRANTY; without even the implied warranty of
19	; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20	; General Public License for more details.
21	;
22	; You should have received a copy of the GNU General Public License
23	; along with this program; if not, see <https://www.gnu.org/licenses>.
24	;
25	; The contents of this file may alternatively be used under the terms
26	; of the Common Development and Distribution License Version 1.0
27	; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
28	; in the VirtualBox distribution, in which case the provisions of the
29	; CDDL are applicable instead of those of the GPL.
30	;
31	; You may elect to license modified versions of this file under the
32	; terms and conditions of either the GPL or the CDDL or both.
33	;
34	; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
35	;
36
37
38	%define RT_ASM_WITH_SEH64
39	%include "iprt/asmdefs.mac"
40	%include "iprt/x86.mac"
41
42
43	BEGINCODE
44
45	;;
46	; Compute the cosine of rd, measured in radians.
47	;
48	; @returns st(0) / xmm0
49	; @param rd [rbp + xCB*2] / xmm0
50	;
51	RT_NOCRT_BEGINPROC cos
52	push xBP
53	SEH64_PUSH_xBP
54	mov xBP, xSP
55	SEH64_SET_FRAME_xBP 0
56	sub xSP, 20h
57	SEH64_ALLOCATE_STACK 20h
58	SEH64_END_PROLOGUE
59
60	%ifdef RT_OS_WINDOWS
61	;
62	; Make sure we use full precision and not the windows default of 53 bits.
63	;
64	;; @todo not sure if this makes any difference...
65	fnstcw [xBP - 20h]
66	mov ax, [xBP - 20h]
67	or ax, X86_FCW_PC_64 ; includes both bits, so no need to clear the mask.
68	mov [xBP - 1ch], ax
69	fldcw [xBP - 1ch]
70	%endif
71
72	;
73	; Load the input into st0.
74	;
75	%ifdef RT_ARCH_AMD64
76	movsd [xBP - 10h], xmm0
77	fld qword [xBP - 10h]
78	%else
79	fld qword [xBP + xCB*2]
80	%endif
81
82	;
83	; The FCOS instruction has a very narrow range (-3pi/8 to 3pi/8) where it
84	; works reliably, so outside that we'll use the FSIN instruction instead
85	; as it has a larger good range (-5pi/4 to 1pi/4 for cosine).
86	; Input conversion follows: cos(x) = sin(x + pi/2)
87	;
88	; We examin the input and weed out non-finit numbers first.
89	;
90
91	; We only do the range check on normal finite numbers.
92	fxam
93	fnstsw ax
94	and ax, X86_FSW_C3 \| X86_FSW_C2 \| X86_FSW_C0
95	cmp ax, X86_FSW_C2 ; Normal finite number (excluding zero)
96	je .finite
97	cmp ax, X86_FSW_C3 ; Zero
98	je .zero
99	cmp ax, X86_FSW_C3 \| X86_FSW_C2 ; Denormals - treat them as zero.
100	je .zero
101	cmp ax, X86_FSW_C0 ; NaN - must handle it special,
102	je .nan
103
104	; Pass infinities and unsupported inputs to fcos, assuming it does the right thing.
105	; We also jump here if we get a finite number in the "good" range, see below.
106	.do_fcos:
107	fcos
108	jmp .return_val
109
110	;
111	; Finite number.
112	;
113	; First check if it's a very tiny number where we can simply return 1.
114	; Next check if it's in the range where FCOS is reasonable, otherwise
115	; go to FSIN to do the work.
116	;
117	.finite:
118	fld st0
119	fabs
120	fld qword [.s_r64TinyCosTo1 xWrtRIP]
121	fcomip st1
122	ja .zero_extra_pop
123
124	.not_that_tiny_input:
125	fld qword [.s_r64FCosOkay xWrtRIP]
126	fcomip st1
127	ffreep st0 ; pop fabs(input)
128	ja .do_fcos ; jmp if fabs(input) < .s_r64FCosOkay
129
130	;
131	; If we have a positive number we subtract 3pi/2, for negative we add pi/2.
132	; We still have the FXAM result in AX.
133	;
134	.outside_fcos_range:
135	test ax, X86_FSW_C1 ; The sign bit.
136	jnz .adjust_negative_to_sine
137
138	; Calc -3pi/2 using FPU-internal pi constant.
139	fldpi
140	fadd st0, st0 ; st0=2pi
141	fldpi
142	fdiv qword [.s_r64Two xWrtRIP] ; st1=2pi; st0=pi/2
143	fsubp st1, st0 ; st0=3pi/2
144	fchs ; st0=-3pi/2
145	jmp .make_sine_adjustment
146
147	.adjust_negative_to_sine:
148	; Calc +pi/2.
149	fldpi
150	fdiv qword [.s_r64Two xWrtRIP] ; st1=2pi; st0=pi/2
151
152	.make_sine_adjustment:
153	faddp st1, st0
154
155	;
156	; Call internal sine worker to calculate st0=sin(st0)
157	;
158	.do_sine:
159	mov ecx, 1 ; double
160	extern NAME(rtNoCrtMathSinCore)
161	call NAME(rtNoCrtMathSinCore)
162
163	;
164	; Return st0.
165	;
166	.return_val:
167	%ifdef RT_ARCH_AMD64
168	fstp qword [xBP - 10h]
169	movsd xmm0, [xBP - 10h]
170	%endif
171	%ifdef RT_OS_WINDOWS
172	fldcw [xBP - 20h] ; restore original
173	%endif
174	.return:
175	leave
176	ret
177
178	;
179	; cos(+/-0) = +1.0
180	;
181	.zero_extra_pop:
182	ffreep st0
183	.zero:
184	ffreep st0
185	fld1
186	jmp .return_val
187
188	;
189	; Input is NaN, output it unmodified as far as we can (FLD changes SNaN
190	; to QNaN when masked).
191	;
192	.nan:
193	%ifdef RT_ARCH_AMD64
194	ffreep st0
195	%endif
196	jmp .return
197
198	;
199	; Local constants.
200	;
201	ALIGNCODE(8)
202	; About 2**-27. When fabs(input) is below this limit we can consider cos(input) ~= 1.0.
203	.s_r64TinyCosTo1:
204	dq 7.4505806e-9
205
206	; The absolute limit for the range which FCOS is expected to produce reasonable results.
207	.s_r64FCosOkay:
208	dq 1.1780972450961724644225 ; 3*pi/8
209
210	.s_r64Two:
211	dq 2.0
212	ENDPROC RT_NOCRT(cos)
213

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/src/VBox/Runtime/common/math/cos.asm@ 100765

Download in other formats: