1 | ; $Id: fma-asm.asm 96108 2022-08-08 11:16:23Z vboxsync $
|
---|
2 | ;; @file
|
---|
3 | ; IPRT - No-CRT fma alternatives - AMD64 & X86.
|
---|
4 | ;
|
---|
5 |
|
---|
6 | ;
|
---|
7 | ; Copyright (C) 2006-2022 Oracle Corporation
|
---|
8 | ;
|
---|
9 | ; This file is part of VirtualBox Open Source Edition (OSE), as
|
---|
10 | ; available from http://www.virtualbox.org. This file is free software;
|
---|
11 | ; you can redistribute it and/or modify it under the terms of the GNU
|
---|
12 | ; General Public License (GPL) as published by the Free Software
|
---|
13 | ; Foundation, in version 2 as it comes in the "COPYING" file of the
|
---|
14 | ; VirtualBox OSE distribution. VirtualBox OSE is distributed in the
|
---|
15 | ; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
|
---|
16 | ;
|
---|
17 | ; The contents of this file may alternatively be used under the terms
|
---|
18 | ; of the Common Development and Distribution License Version 1.0
|
---|
19 | ; (CDDL) only, as it comes in the "COPYING.CDDL" file of the
|
---|
20 | ; VirtualBox OSE distribution, in which case the provisions of the
|
---|
21 | ; CDDL are applicable instead of those of the GPL.
|
---|
22 | ;
|
---|
23 | ; You may elect to license modified versions of this file under the
|
---|
24 | ; terms and conditions of either the GPL or the CDDL or both.
|
---|
25 | ;
|
---|
26 |
|
---|
27 | %define RT_ASM_WITH_SEH64
|
---|
28 | %include "iprt/asmdefs.mac"
|
---|
29 |
|
---|
30 | BEGINCODE
|
---|
31 |
|
---|
32 | ;;
|
---|
33 | ; Fused multiplication and add, intel version.
|
---|
34 | ;
|
---|
35 | ; @returns st(0) / xmm0
|
---|
36 | ; @param rdFactor1 [rbp + 08h] / xmm0
|
---|
37 | ; @param rdFactor2 [rbp + 10h] / xmm1
|
---|
38 | ; @param rdAddend [rbp + 18h] / xmm2
|
---|
39 | BEGINPROC rtNoCrtMathFma3
|
---|
40 | push xBP
|
---|
41 | SEH64_PUSH_xBP
|
---|
42 | mov xBP, xSP
|
---|
43 | SEH64_SET_FRAME_xBP 0
|
---|
44 | SEH64_END_PROLOGUE
|
---|
45 |
|
---|
46 | %ifdef RT_ARCH_X86
|
---|
47 | movsd xmm0, qword [xBP + xCB*2 + 00h]
|
---|
48 | movsd xmm1, qword [xBP + xCB*2 + 08h]
|
---|
49 | movsd xmm2, qword [xBP + xCB*2 + 10h]
|
---|
50 | %endif
|
---|
51 |
|
---|
52 | vfmadd132sd xmm0, xmm2, xmm1 ; xmm0 = xmm0 * xmm1 + xmm2 (132 = multiply op1 with op3 and add op2)
|
---|
53 |
|
---|
54 | %ifdef RT_ARCH_X86
|
---|
55 | sub xSP, 10h
|
---|
56 | movsd [xSP], xmm0
|
---|
57 | fld qword [xSP]
|
---|
58 | %endif
|
---|
59 | leave
|
---|
60 | ret
|
---|
61 | ENDPROC rtNoCrtMathFma3
|
---|
62 |
|
---|
63 |
|
---|
64 | ;;
|
---|
65 | ; Fused multiplication and add, amd version.
|
---|
66 | ;
|
---|
67 | ; @returns st(0) / xmm0
|
---|
68 | ; @param rdFactor1 [rbp + 08h] / xmm0
|
---|
69 | ; @param rdFactor2 [rbp + 10h] / xmm1
|
---|
70 | ; @param rdAddend [rbp + 18h] / xmm2
|
---|
71 | BEGINPROC rtNoCrtMathFma4
|
---|
72 | push xBP
|
---|
73 | SEH64_PUSH_xBP
|
---|
74 | mov xBP, xSP
|
---|
75 | SEH64_SET_FRAME_xBP 0
|
---|
76 | SEH64_END_PROLOGUE
|
---|
77 |
|
---|
78 | %ifdef RT_ARCH_X86
|
---|
79 | movsd xmm0, qword [xBP + xCB*2 + 00h]
|
---|
80 | movsd xmm1, qword [xBP + xCB*2 + 08h]
|
---|
81 | movsd xmm2, qword [xBP + xCB*2 + 10h]
|
---|
82 | %endif
|
---|
83 |
|
---|
84 | vfmaddsd xmm0, xmm0, xmm1, xmm2 ; xmm0 = xmm0 * xmm1 + xmm2
|
---|
85 |
|
---|
86 | %ifdef RT_ARCH_X86
|
---|
87 | sub xSP, 10h
|
---|
88 | movsd [xSP], xmm0
|
---|
89 | fld qword [xSP]
|
---|
90 | %endif
|
---|
91 | leave
|
---|
92 | ret
|
---|
93 | ENDPROC rtNoCrtMathFma4
|
---|
94 |
|
---|