VirtualBox

source: vbox/trunk/src/VBox/Runtime/common/math/fma-asm.asm@ 97242

Last change on this file since 97242 was 96407, checked in by vboxsync, 2 years ago

scm copyright and license note update

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 2.9 KB
Line 
1; $Id: fma-asm.asm 96407 2022-08-22 17:43:14Z vboxsync $
2;; @file
3; IPRT - No-CRT fma alternatives - AMD64 & X86.
4;
5
6;
7; Copyright (C) 2006-2022 Oracle and/or its affiliates.
8;
9; This file is part of VirtualBox base platform packages, as
10; available from https://www.virtualbox.org.
11;
12; This program is free software; you can redistribute it and/or
13; modify it under the terms of the GNU General Public License
14; as published by the Free Software Foundation, in version 3 of the
15; License.
16;
17; This program is distributed in the hope that it will be useful, but
18; WITHOUT ANY WARRANTY; without even the implied warranty of
19; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20; General Public License for more details.
21;
22; You should have received a copy of the GNU General Public License
23; along with this program; if not, see <https://www.gnu.org/licenses>.
24;
25; The contents of this file may alternatively be used under the terms
26; of the Common Development and Distribution License Version 1.0
27; (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
28; in the VirtualBox distribution, in which case the provisions of the
29; CDDL are applicable instead of those of the GPL.
30;
31; You may elect to license modified versions of this file under the
32; terms and conditions of either the GPL or the CDDL or both.
33;
34; SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
35;
36
37%define RT_ASM_WITH_SEH64
38%include "iprt/asmdefs.mac"
39
40BEGINCODE
41
42;;
43; Fused multiplication and add, intel version.
44;
45; @returns st(0) / xmm0
46; @param rdFactor1 [rbp + 08h] / xmm0
47; @param rdFactor2 [rbp + 10h] / xmm1
48; @param rdAddend [rbp + 18h] / xmm2
49BEGINPROC rtNoCrtMathFma3
50 push xBP
51 SEH64_PUSH_xBP
52 mov xBP, xSP
53 SEH64_SET_FRAME_xBP 0
54 SEH64_END_PROLOGUE
55
56%ifdef RT_ARCH_X86
57 movsd xmm0, qword [xBP + xCB*2 + 00h]
58 movsd xmm1, qword [xBP + xCB*2 + 08h]
59 movsd xmm2, qword [xBP + xCB*2 + 10h]
60%endif
61
62 vfmadd132sd xmm0, xmm2, xmm1 ; xmm0 = xmm0 * xmm1 + xmm2 (132 = multiply op1 with op3 and add op2)
63
64%ifdef RT_ARCH_X86
65 sub xSP, 10h
66 movsd [xSP], xmm0
67 fld qword [xSP]
68%endif
69 leave
70 ret
71ENDPROC rtNoCrtMathFma3
72
73
74;;
75; Fused multiplication and add, amd version.
76;
77; @returns st(0) / xmm0
78; @param rdFactor1 [rbp + 08h] / xmm0
79; @param rdFactor2 [rbp + 10h] / xmm1
80; @param rdAddend [rbp + 18h] / xmm2
81BEGINPROC rtNoCrtMathFma4
82 push xBP
83 SEH64_PUSH_xBP
84 mov xBP, xSP
85 SEH64_SET_FRAME_xBP 0
86 SEH64_END_PROLOGUE
87
88%ifdef RT_ARCH_X86
89 movsd xmm0, qword [xBP + xCB*2 + 00h]
90 movsd xmm1, qword [xBP + xCB*2 + 08h]
91 movsd xmm2, qword [xBP + xCB*2 + 10h]
92%endif
93
94 vfmaddsd xmm0, xmm0, xmm1, xmm2 ; xmm0 = xmm0 * xmm1 + xmm2
95
96%ifdef RT_ARCH_X86
97 sub xSP, 10h
98 movsd [xSP], xmm0
99 fld qword [xSP]
100%endif
101 leave
102 ret
103ENDPROC rtNoCrtMathFma4
104
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette