1 | /* $Id: fmaf.cpp 106061 2024-09-16 14:03:52Z vboxsync $ */
|
---|
2 | /** @file
|
---|
3 | * IPRT - No-CRT - fmaf().
|
---|
4 | */
|
---|
5 |
|
---|
6 | /*
|
---|
7 | * Copyright (C) 2022-2024 Oracle and/or its affiliates.
|
---|
8 | *
|
---|
9 | * This file is part of VirtualBox base platform packages, as
|
---|
10 | * available from https://www.virtualbox.org.
|
---|
11 | *
|
---|
12 | * This program is free software; you can redistribute it and/or
|
---|
13 | * modify it under the terms of the GNU General Public License
|
---|
14 | * as published by the Free Software Foundation, in version 3 of the
|
---|
15 | * License.
|
---|
16 | *
|
---|
17 | * This program is distributed in the hope that it will be useful, but
|
---|
18 | * WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
19 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
---|
20 | * General Public License for more details.
|
---|
21 | *
|
---|
22 | * You should have received a copy of the GNU General Public License
|
---|
23 | * along with this program; if not, see <https://www.gnu.org/licenses>.
|
---|
24 | *
|
---|
25 | * The contents of this file may alternatively be used under the terms
|
---|
26 | * of the Common Development and Distribution License Version 1.0
|
---|
27 | * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
|
---|
28 | * in the VirtualBox distribution, in which case the provisions of the
|
---|
29 | * CDDL are applicable instead of those of the GPL.
|
---|
30 | *
|
---|
31 | * You may elect to license modified versions of this file under the
|
---|
32 | * terms and conditions of either the GPL or the CDDL or both.
|
---|
33 | *
|
---|
34 | * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
|
---|
35 | */
|
---|
36 |
|
---|
37 |
|
---|
38 | /*********************************************************************************************************************************
|
---|
39 | * Header Files *
|
---|
40 | *********************************************************************************************************************************/
|
---|
41 | #define IPRT_NO_CRT_FOR_3RD_PARTY
|
---|
42 | #include "internal/nocrt.h"
|
---|
43 | #include <iprt/nocrt/math.h>
|
---|
44 | #include <iprt/assertcompile.h>
|
---|
45 | #if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
|
---|
46 | # include <iprt/asm-amd64-x86.h>
|
---|
47 | # include <iprt/x86.h>
|
---|
48 | #endif
|
---|
49 | #include <softfloat.h>
|
---|
50 |
|
---|
51 |
|
---|
52 | /*********************************************************************************************************************************
|
---|
53 | * External Symbols *
|
---|
54 | *********************************************************************************************************************************/
|
---|
55 | DECLASM(float) rtNoCrtMathFma3f(float r32Factor1, float r32Factor2, float r32Addend);
|
---|
56 | DECLASM(float) rtNoCrtMathFma4f(float r32Factor1, float r32Factor2, float r32Addend);
|
---|
57 |
|
---|
58 |
|
---|
59 | #undef fmaf
|
---|
60 | float RT_NOCRT(fmaf)(float r32Factor1, float r32Factor2, float r32Addend)
|
---|
61 | {
|
---|
62 | /*
|
---|
63 | * We prefer using native FMA instructions when available.
|
---|
64 | */
|
---|
65 | #if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
|
---|
66 | typedef enum { kCpuDetect = 0, kCpuWithFma3, kCpuWithFma4, kCpuWithoutFma } CPUFMASUPPORT;
|
---|
67 | static CPUFMASUPPORT volatile s_enmSup = kCpuDetect;
|
---|
68 | CPUFMASUPPORT enmSup = s_enmSup;
|
---|
69 | if (enmSup != kCpuDetect)
|
---|
70 | { }
|
---|
71 | else
|
---|
72 | {
|
---|
73 | if (ASMCpuId_ECX(1) & X86_CPUID_FEATURE_ECX_FMA)
|
---|
74 | enmSup = kCpuWithFma3;
|
---|
75 | else if (ASMCpuId_ECX(UINT32_C(0x80000001)) & X86_CPUID_AMD_FEATURE_ECX_FMA4)
|
---|
76 | enmSup = kCpuWithFma4;
|
---|
77 | else
|
---|
78 | enmSup = kCpuWithoutFma;
|
---|
79 | s_enmSup = enmSup;
|
---|
80 | }
|
---|
81 | if (enmSup == kCpuWithFma3)
|
---|
82 | return rtNoCrtMathFma3f(r32Factor1, r32Factor2, r32Addend);
|
---|
83 | if (enmSup == kCpuWithFma4)
|
---|
84 | return rtNoCrtMathFma4f(r32Factor1, r32Factor2, r32Addend);
|
---|
85 | #endif
|
---|
86 |
|
---|
87 | /*
|
---|
88 | * Fall back on SoftFloat.
|
---|
89 | */
|
---|
90 | /** @todo couldn't we just use double as a fallback here? */
|
---|
91 | AssertCompile(sizeof(r32Factor1) == sizeof(RTFLOAT32U));
|
---|
92 | softfloat_state_t State = SOFTFLOAT_STATE_INIT_DEFAULTS(); /** @todo init from MXCSR/FCW */
|
---|
93 | union { RTFLOAT32U Iprt; float32_t SoftFloat; } uFactor1, uFactor2, uAddend, uResult;
|
---|
94 | uFactor1.Iprt.r = r32Factor1;
|
---|
95 | uFactor2.Iprt.r = r32Factor2;
|
---|
96 | uAddend.Iprt.r = r32Addend;
|
---|
97 | uResult.SoftFloat = f32_mulAdd(uFactor1.SoftFloat, uFactor2.SoftFloat, uAddend.SoftFloat, &State);
|
---|
98 | return uResult.Iprt.r;
|
---|
99 | }
|
---|
100 | RT_ALIAS_AND_EXPORT_NOCRT_SYMBOL(fmaf);
|
---|
101 |
|
---|