VirtualBox

source: vbox/trunk/src/VBox/ValidationKit/bootsectors/bs3-cpu-instr-4.c32@ 104792

Last change on this file since 104792 was 104785, checked in by vboxsync, 11 months ago

ValidationKit/bootsectors: bugref:10658 SIMD FP testcase: [V]ADDPD.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 79.9 KB
Line 
1/* $Id: bs3-cpu-instr-4.c32 104785 2024-05-27 07:20:34Z vboxsync $ */
2/** @file
3 * BS3Kit - bs3-cpu-instr-4 - SSE, AVX FPU instructions, C code template.
4 */
5
6/*
7 * Copyright (C) 2024 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * The contents of this file may alternatively be used under the terms
26 * of the Common Development and Distribution License Version 1.0
27 * (CDDL), a copy of it is provided in the "COPYING.CDDL" file included
28 * in the VirtualBox distribution, in which case the provisions of the
29 * CDDL are applicable instead of those of the GPL.
30 *
31 * You may elect to license modified versions of this file under the
32 * terms and conditions of either the GPL or the CDDL or both.
33 *
34 * SPDX-License-Identifier: GPL-3.0-only OR CDDL-1.0
35 */
36
37
38/*********************************************************************************************************************************
39* Header Files *
40*********************************************************************************************************************************/
41#include <bs3kit.h>
42#include "bs3-cpu-instr-4-asm-auto.h"
43
44#include <iprt/asm.h>
45#include <iprt/asm-amd64-x86.h>
46
47
48/*********************************************************************************************************************************
49* Defined Constants And Macros *
50*********************************************************************************************************************************/
51/** Converts an execution mode (BS3_MODE_XXX) into an index into an array
52 * initialized by BS3CPUINSTR4_TEST1_MODES_INIT etc. */
53#define BS3CPUINSTR4_TEST_MODES_INDEX(a_bMode) (BS3_MODE_IS_16BIT_CODE(bMode) ? 0 : BS3_MODE_IS_32BIT_CODE(bMode) ? 1 : 2)
54
55/** Maximum length for the names of all SIMD FP exception flags combined. */
56#define BS3_FP_XCPT_NAMES_MAXLEN sizeof(" IE DE ZE OE UE PE ")
57
58/*
59 * Single-precision (32 bits) floating-point defines.
60 */
61/** The max exponent value for a single-precision floating-point normal. */
62#define BS3_FP32_EXP_NORMAL_MAX 254
63/** The min exponent value for a single-precision floating-point normal. */
64#define BS3_FP32_EXP_NORMAL_MIN 0
65/** The max fraction value for a single-precision floating-point normal. */
66#define BS3_FP32_FRACTION_NORMAL_MAX 0x7fffff
67/** The min fraction value for a single-precision floating-point normal. */
68#define BS3_FP32_FRACTION_NORMAL_MIN 0
69/** The exponent bias for the single-precision floating-point format. */
70#define BS3_FP32_EXP_BIAS RTFLOAT32U_EXP_BIAS
71/** Fraction width (in bits) for the single-precision floating-point format. */
72#define BS3_FP32_FRACTION_BITS RTFLOAT32U_FRACTION_BITS
73
74#define BS3_FP32_NORMAL_MAX(a_Sign) RTFLOAT32U_INIT_C(a_Sign, BS3_FP32_FRACTION_NORMAL_MAX, BS3_FP32_EXP_NORMAL_MAX)
75#define BS3_FP32_NORMAL_MIN(a_Sign) RTFLOAT32U_INIT_C(a_Sign, BS3_FP32_FRACTION_NORMAL_MIN, BS3_FP32_EXP_NORMAL_MIN)
76#define BS3_FP32_ZERO(a_Sign) RTFLOAT32U_INIT_ZERO(a_Sign)
77#define BS3_FP32_ONE(a_Sign) RTFLOAT32U_INIT_C(a_Sign, 0, RTFLOAT32U_EXP_BIAS)
78#define BS3_FP32_VAL(a_Sign, a_Frac, a_Exp) RTFLOAT32U_INIT_C(a_Sign, a_Frac, a_Exp)
79#define BS3_FP32_INF(a_Sign) RTFLOAT32U_INIT_INF(a_Sign)
80#define BS3_FP32_QNAN(a_Sign) RTFLOAT32U_INIT_QNAN(a_Sign)
81#define BS3_FP32_QNAN_VAL(a_Sign, a_Val) RTFLOAT32U_INIT_QNAN_EX(a_Sign, a_Val)
82#define BS3_FP32_SNAN(a_Sign) RTFLOAT32U_INIT_SNAN(a_Sign)
83
84/*
85 * Single-precision floating normals.
86 * Fraction - 23 bits, all usable.
87 * Exponent - 8 bits, least significant bit MBZ.
88 */
89#define BS3_FP32_NORMAL_VAL_1(a_Sign) RTFLOAT32U_INIT_C(a_Sign, 0x5fcabd, 0xbc)
90#define BS3_FP32_NORMAL_VAL_2(a_Sign) RTFLOAT32U_INIT_C(a_Sign, 0x7e117a, 0x7e)
91#define BS3_FP32_NORMAL_VAL_3(a_Sign) RTFLOAT32U_INIT_C(a_Sign, 0x5b5b5b, 0x9a)
92/* The maximum integer value (all 23 + 1 implied bit of the fraction part set) without losing precision. */
93#define BS3_FP32_NORMAL_SAFE_INT_MAX(a_Sign) RTFLOAT64U_INIT_C(a_Sign, BS3_FP32_FRACTION_NORMAL_MAX, BS3_FP32_EXP_BIAS + BS3_FP32_FRACTION_BITS)
94
95/*
96 * Double-precision (64 bits) floating-point defines.
97 */
98/** The max exponent value for a double-precision floating-point normal. */
99#define BS3_FP64_EXP_NORMAL_MAX 2046
100/** The min exponent value for a double-precision floating-point normal. */
101#define BS3_FP64_EXP_NORMAL_MIN 1
102/** The max fraction value for a double-precision floating-point normal. */
103#define BS3_FP64_FRACTION_NORMAL_MAX 0xfffffffffffff
104/** The min fraction value for a double-precision floating-point normal. */
105#define BS3_FP64_FRACTION_NORMAL_MIN 0
106/** The exponent bias for the double-precision floating-point format. */
107#define BS3_FP64_EXP_BIAS RTFLOAT64U_EXP_BIAS
108/** Fraction width (in bits) for the double-precision floating-point format. */
109#define BS3_FP64_FRACTION_BITS RTFLOAT64U_FRACTION_BITS
110
111#define BS3_FP64_NORMAL_MAX(a_Sign) RTFLOAT64U_INIT_C(a_Sign, BS3_FP64_FRACTION_NORMAL_MAX, BS3_FP64_EXP_NORMAL_MAX)
112#define BS3_FP64_NORMAL_MIN(a_Sign) RTFLOAT64U_INIT_C(a_Sign, BS3_FP64_FRACTION_NORMAL_MIN, BS3_FP64_EXP_NORMAL_MIN)
113#define BS3_FP64_ZERO(a_Sign) RTFLOAT64U_INIT_ZERO(a_Sign)
114#define BS3_FP64_ONE(a_Sign) RTFLOAT64U_INIT_C(a_Sign, 0, RTFLOAT64U_EXP_BIAS)
115#define BS3_FP64_VAL(a_Sign, a_Frac, a_Exp) RTFLOAT64U_INIT_C(a_Sign, a_Frac, a_Exp)
116#define BS3_FP64_INF(a_Sign) RTFLOAT64U_INIT_INF(a_Sign)
117#define BS3_FP64_QNAN(a_Sign) RTFLOAT64U_INIT_QNAN(a_Sign)
118#define BS3_FP64_QNAN_VAL(a_Sign, a_Val) RTFLOAT64U_INIT_QNAN_EX(a_Sign, a_Val)
119#define BS3_FP64_SNAN(a_Sign) RTFLOAT64U_INIT_SNAN(a_Sign)
120#define BS3_FP64_SNAN_VAL(a_Sign, a_Val) RTFLOAT64U_INIT_SNAN_EX(a_Sign, a_Val)
121
122/*
123 * Double-precision floating normals.
124 * Fraction - 52 bits, all usable.
125 * Exponent - 11 bits, least significant bit MBZ.
126 */
127#define BS3_FP64_NORMAL_VAL_1(a_Sign) RTFLOAT64U_INIT_C(a_Sign, 0xf10a7ab1ec01a, 0x4bc)
128#define BS3_FP64_NORMAL_VAL_2(a_Sign) RTFLOAT64U_INIT_C(a_Sign, 0xca5cadea1b1ed, 0x3ae)
129#define BS3_FP64_NORMAL_VAL_3(a_Sign) RTFLOAT64U_INIT_C(a_Sign, 0xb5b5b5b5b5b5b, 0xffe)
130/* The maximum integer value (all 52 + 1 implied bit of the fraction part set) without losing precision. */
131#define BS3_FP64_NORMAL_SAFE_INT_MAX(a_Sign) RTFLOAT64U_INIT_C(a_Sign, BS3_FP64_FRACTION_NORMAL_MAX, BS3_FP64_EXP_BIAS + BS3_FP64_FRACTION_BITS)
132
133
134/*********************************************************************************************************************************
135* Structures and Typedefs *
136*********************************************************************************************************************************/
137/** Instruction set type and operand width. */
138typedef enum BS3CPUINSTRX_INSTRTYPE_T
139{
140 T_INVALID,
141 T_MMX,
142 T_MMX_SSE, /**< MMX instruction, but require the SSE CPUID to work. */
143 T_MMX_SSE2, /**< MMX instruction, but require the SSE2 CPUID to work. */
144 T_MMX_SSSE3, /**< MMX instruction, but require the SSSE3 CPUID to work. */
145 T_AXMMX,
146 T_AXMMX_OR_SSE,
147 T_SSE,
148 T_128BITS = T_SSE,
149 T_SSE2,
150 T_SSE3,
151 T_SSSE3,
152 T_SSE4_1,
153 T_SSE4_2,
154 T_SSE4A,
155 T_PCLMUL,
156 T_SHA,
157 T_AVX_128,
158 T_AVX2_128,
159 T_AVX_PCLMUL,
160 T_AVX_256,
161 T_256BITS = T_AVX_256,
162 T_AVX2_256,
163 T_MAX
164} BS3CPUINSTRX_INSTRTYPE_T;
165
166/** Memory or register rm variant. */
167enum {
168 RM_REG = 0,
169 RM_MEM,
170 RM_MEM8, /**< Memory operand is 8 bytes. Hack for movss and similar. */
171 RM_MEM16, /**< Memory operand is 16 bytes. Hack for movss and similar. */
172 RM_MEM32, /**< Memory operand is 32 bytes. Hack for movss and similar. */
173 RM_MEM64 /**< Memory operand is 64 bytes. Hack for movss and similar. */
174};
175
176/**
177 * Execution environment configuration.
178 */
179typedef struct BS3CPUINSTR4_CONFIG_T
180{
181 uint16_t fCr0Mp : 1;
182 uint16_t fCr0Em : 1;
183 uint16_t fCr0Ts : 1;
184 uint16_t fCr4OsFxSR : 1;
185 uint16_t fCr4OsXSave : 1;
186 uint16_t fCr4OsXmmExcpt : 1;
187 uint16_t fXcr0Sse : 1;
188 uint16_t fXcr0Avx : 1;
189 uint16_t fAligned : 1; /**< Aligned mem operands. If 0, they will be misaligned and tests w/o mem operands skipped. */
190 uint16_t fAlignCheck : 1;
191 uint16_t fMxCsrMM : 1; /**< AMD only */
192 uint8_t bXcptSse;
193 uint8_t bXcptAvx;
194} BS3CPUINSTR4_CONFIG_T;
195/** Pointer to an execution environment configuration. */
196typedef BS3CPUINSTR4_CONFIG_T const BS3_FAR *PCBS3CPUINSTR4_CONFIG_T;
197
198/** State saved by bs3CpuInstr4ConfigReconfigure. */
199typedef struct BS3CPUINSTRX_CONFIG_SAVED_T
200{
201 uint32_t uCr0;
202 uint32_t uCr4;
203 uint32_t uEfl;
204 uint16_t uFcw;
205 uint16_t uFsw;
206 uint32_t uMxCsr;
207} BS3CPUINSTRX_CONFIG_SAVED_T;
208typedef BS3CPUINSTRX_CONFIG_SAVED_T BS3_FAR *PBS3CPUINSTRX_CONFIG_SAVED_T;
209typedef BS3CPUINSTRX_CONFIG_SAVED_T const BS3_FAR *PCBS3CPUINSTRX_CONFIG_SAVED_T;
210
211/**
212 * YMM packed double-precision floating-point register.
213 * @todo move to x86.h?
214 */
215typedef union X86YMMFLOATPDREG
216{
217 /** Packed double-precision floating-point view. */
218 RTFLOAT64U ar64[4];
219 /** 256-bit integer view. */
220 RTUINT256U ymm;
221} X86YMMFLOATPDREG;
222# ifndef VBOX_FOR_DTRACE_LIB
223AssertCompileSize(X86YMMFLOATPDREG, 32);
224# endif
225/** Pointer to a YMM packed floating-point register. */
226typedef X86YMMFLOATPDREG BS3_FAR *PX86YMMFLOATPDREG;
227/** Pointer to a const YMM packed floating-point register. */
228typedef X86YMMFLOATPDREG const BS3_FAR *PCX86YMMFLOATPDREG;
229
230/**
231 * YMM packed single-precision floating-point register.
232 * @todo move to x86.h?
233 */
234typedef union X86YMMFLOATPSREG
235{
236 /** Packed single-precision floating-point view. */
237 RTFLOAT32U ar32[8];
238 /** 256-bit integer view. */
239 RTUINT256U ymm;
240} X86YMMFLOATPSREG;
241# ifndef VBOX_FOR_DTRACE_LIB
242AssertCompileSize(X86YMMFLOATPSREG, 32);
243# endif
244/** Pointer to a YMM packed single-precision floating-point register. */
245typedef X86YMMFLOATPSREG BS3_FAR *PX86YMMFLOATPSREG;
246/** Pointer to a const YMM single-precision packed floating-point register. */
247typedef X86YMMFLOATPSREG const BS3_FAR *PCX86YMMFLOATPSREG;
248
249/**
250 * YMM scalar quadruple-precision floating-point register.
251 * @todo move to x86.h?
252 */
253typedef union X86YMMFLOATSQREG
254{
255 /** Scalar quadruple-precision floating point view. */
256 RTFLOAT128U ar128[2];
257 /** 256-bit integer view. */
258 RTUINT256U ymm;
259} X86YMMFLOATSQREG;
260# ifndef VBOX_FOR_DTRACE_LIB
261AssertCompileSize(X86YMMFLOATSQREG, 32);
262# endif
263/** Pointer to a YMM scalar quadruple-precision floating-point register. */
264typedef X86YMMFLOATSQREG *PX86YMMFLOATSQREG;
265/** Pointer to a const YMM scalar quadruple-precision floating-point register. */
266typedef X86YMMFLOATSQREG const *PCX86YMMFLOATSQREG;
267
268
269/*********************************************************************************************************************************
270* Global Variables *
271*********************************************************************************************************************************/
272static bool g_afTypeSupports[T_MAX] = { false, false, false, false, false, false, false, false, false, false };
273static bool g_fAmdMisalignedSse = false;
274static uint8_t g_enmExtCtxMethod = BS3EXTCTXMETHOD_INVALID;
275static bool g_fMxCsrDazSupported = false;
276
277/** Zero value (indexed by fSign). */
278RTFLOAT32U const g_ar32Zero[] = { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(1) };
279RTFLOAT64U const g_ar64Zero[] = { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(1) };
280
281/** One value (indexed by fSign). */
282RTFLOAT32U const g_ar32One[] = { RTFLOAT32U_INIT_C(0, 0, RTFLOAT32U_EXP_BIAS),
283 RTFLOAT32U_INIT_C(1, 0, RTFLOAT32U_EXP_BIAS) };
284RTFLOAT64U const g_ar64One[] = { RTFLOAT64U_INIT_C(0, 0, RTFLOAT64U_EXP_BIAS),
285 RTFLOAT64U_INIT_C(1, 0, RTFLOAT64U_EXP_BIAS) };
286
287/** Infinity (indexed by fSign). */
288RTFLOAT32U const g_ar32Infinity[] = { RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(1) };
289RTFLOAT64U const g_ar64Infinity[] = { RTFLOAT64U_INIT_INF(0), RTFLOAT64U_INIT_INF(1) };
290
291/** Default QNaNs (indexed by fSign). */
292RTFLOAT32U const g_ar32QNaN[] = { RTFLOAT32U_INIT_QNAN(0), RTFLOAT32U_INIT_QNAN(1) };
293RTFLOAT64U const g_ar64QNaN[] = { RTFLOAT64U_INIT_QNAN(0), RTFLOAT64U_INIT_QNAN(1) };
294
295/** Size of g_pbBuf - at least three pages. */
296static uint32_t g_cbBuf;
297/** Buffer of g_cbBuf size. */
298static uint8_t BS3_FAR *g_pbBuf;
299/** RW alias for the buffer memory at g_pbBuf. Set up by bs3CpuInstrXBufSetup. */
300static uint8_t BS3_FAR *g_pbBufAlias;
301/** RW alias for the memory at g_pbBuf. */
302static uint8_t BS3_FAR *g_pbBufAliasAlloc;
303
304/** Exception type \#1 test configurations, 16 & 32 bytes strictly aligned. */
305static const BS3CPUINSTR4_CONFIG_T g_aXcptConfig1[] =
306{
307/*
308 * X87 SSE SSE SSE AVX SSE AVX AVX SSE AVX AMD/SSE <-- applies to
309 * +AVX +AVX +AMD/SSE +AMD/SSE
310 * CR0 CR0 CR0 CR4 CR4 CR4 XCR0 XCR0 MXCSR
311 * MP, EM, TS, OSFXSR, OSXSAVE, OSXMMEXCPT SSE, AVX, fAligned, AC/AM, MM, bXcptSse, bXcptAvx */
312 { 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_DB, X86_XCPT_DB }, /* #0 */
313 { 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, X86_XCPT_DB, X86_XCPT_DB }, /* #1 */
314 { 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_DB, X86_XCPT_DB }, /* #2 */
315 { 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_UD, X86_XCPT_DB }, /* #3 */
316 { 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_NM, X86_XCPT_NM }, /* #4 */
317 { 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_UD, X86_XCPT_NM }, /* #5 */
318 { 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, X86_XCPT_UD, X86_XCPT_DB }, /* #6 */
319 { 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, X86_XCPT_DB, X86_XCPT_UD }, /* #7 */
320 { 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, X86_XCPT_DB, X86_XCPT_UD }, /* #8 */
321 { 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, X86_XCPT_DB, X86_XCPT_UD }, /* #9 */
322 /* Memory misalignment and alignment checks: */
323 { 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, X86_XCPT_GP, X86_XCPT_DB }, /* #10 */
324 { 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, X86_XCPT_GP, X86_XCPT_DB }, /* #11 */
325 { 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, X86_XCPT_DB, X86_XCPT_DB }, /* #12 */
326 /* AMD only: */
327 { 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, X86_XCPT_DB, X86_XCPT_GP }, /* #13 */
328 { 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, X86_XCPT_AC, X86_XCPT_GP }, /* #14 */
329};
330
331
332
333/**
334 * Returns the name of an X86 exception given the vector.
335 *
336 * @returns Name of the exception.
337 * @param uVector The exception vector.
338 */
339static const char BS3_FAR *bs3CpuInstr4XcptName(uint8_t uVector)
340{
341 switch (uVector)
342 {
343 case X86_XCPT_DE: return "#DE";
344 case X86_XCPT_DB: return "#DB";
345 case X86_XCPT_NMI: return "#NMI";
346 case X86_XCPT_BP: return "#BP";
347 case X86_XCPT_OF: return "#OF";
348 case X86_XCPT_BR: return "#BR";
349 case X86_XCPT_UD: return "#UD";
350 case X86_XCPT_NM: return "#NM";
351 case X86_XCPT_DF: return "#DF";
352 case X86_XCPT_CO_SEG_OVERRUN: return "#CO_SEG_OVERRUN";
353 case X86_XCPT_TS: return "#TS";
354 case X86_XCPT_NP: return "#NP";
355 case X86_XCPT_SS: return "#SS";
356 case X86_XCPT_GP: return "#GP";
357 case X86_XCPT_PF: return "#PF";
358 case X86_XCPT_MF: return "#MF";
359 case X86_XCPT_AC: return "#AC";
360 case X86_XCPT_MC: return "#MC";
361 case X86_XCPT_XF: return "#XF";
362 case X86_XCPT_VE: return "#VE";
363 case X86_XCPT_CP: return "#CP";
364 case X86_XCPT_VC: return "#VC";
365 case X86_XCPT_SX: return "#SX";
366 }
367 return "UNKNOWN";
368}
369
370
371/**
372 * Gets the names of floating-point exception flags that are set for a given MXCSR.
373 *
374 * @returns Names of floating-point exception flags that are set.
375 * @param pszBuf Where to store the floating-point exception flags.
376 * @param cchBuf The size of the buffer.
377 * @param fMxCsr The MXCSR value.
378 */
379static size_t bs3CpuInstr4GetXcptFlags(char BS3_FAR *pszBuf, size_t cchBuf, uint32_t fMxCsr)
380{
381 BS3_ASSERT(cchBuf >= BS3_FP_XCPT_NAMES_MAXLEN);
382 if (!(fMxCsr & X86_MXCSR_XCPT_FLAGS))
383 return Bs3StrPrintf(pszBuf, cchBuf, " None");
384 return Bs3StrPrintf(pszBuf, cchBuf, "%s%s%s%s%s%s", fMxCsr & X86_MXCSR_IE ? " IE" : "", fMxCsr & X86_MXCSR_DE ? " DE" : "",
385 fMxCsr & X86_MXCSR_ZE ? " ZE" : "", fMxCsr & X86_MXCSR_OE ? " OE" : "",
386 fMxCsr & X86_MXCSR_UE ? " UE" : "", fMxCsr & X86_MXCSR_PE ? " PE" : "");
387}
388
389
390/**
391 * Reconfigures the execution environment according to @a pConfig.
392 *
393 * Call bs3CpuInstrXConfigRestore to undo the changes.
394 *
395 * @returns true on success, false if the configuration cannot be applied. In
396 * the latter case, no context changes are made.
397 * @param pSavedCfg Where to save state we modify.
398 * @param pCtx The register context to modify.
399 * @param pExtCtx The extended register context to modify.
400 * @param pConfig The configuration to apply.
401 * @param bMode The target mode.
402 */
403static bool bs3CpuInstr4ConfigReconfigure(PBS3CPUINSTRX_CONFIG_SAVED_T pSavedCfg, PBS3REGCTX pCtx, PBS3EXTCTX pExtCtx,
404 PCBS3CPUINSTR4_CONFIG_T pConfig, uint8_t bMode)
405{
406 /*
407 * Save context bits we may change here
408 */
409 pSavedCfg->uCr0 = pCtx->cr0.u32;
410 pSavedCfg->uCr4 = pCtx->cr4.u32;
411 pSavedCfg->uEfl = pCtx->rflags.u32;
412 pSavedCfg->uFcw = Bs3ExtCtxGetFcw(pExtCtx);
413 pSavedCfg->uFsw = Bs3ExtCtxGetFsw(pExtCtx);
414 pSavedCfg->uMxCsr = Bs3ExtCtxGetMxCsr(pExtCtx);
415
416 /*
417 * Can we make these changes?
418 */
419 if (pConfig->fMxCsrMM && !g_fAmdMisalignedSse)
420 return false;
421
422 /*
423 * Modify the test context.
424 */
425 if (pConfig->fCr0Mp)
426 pCtx->cr0.u32 |= X86_CR0_MP;
427 else
428 pCtx->cr0.u32 &= ~X86_CR0_MP;
429 if (pConfig->fCr0Em)
430 pCtx->cr0.u32 |= X86_CR0_EM;
431 else
432 pCtx->cr0.u32 &= ~X86_CR0_EM;
433 if (pConfig->fCr0Ts)
434 pCtx->cr0.u32 |= X86_CR0_TS;
435 else
436 pCtx->cr0.u32 &= ~X86_CR0_TS;
437
438 if (pConfig->fCr4OsFxSR)
439 pCtx->cr4.u32 |= X86_CR4_OSFXSR;
440 else
441 pCtx->cr4.u32 &= ~X86_CR4_OSFXSR;
442
443 if (pConfig->fCr4OsXmmExcpt && g_afTypeSupports[T_SSE])
444 pCtx->cr4.u32 |= X86_CR4_OSXMMEEXCPT;
445 else
446 pCtx->cr4.u32 &= ~X86_CR4_OSXMMEEXCPT;
447
448 if (pConfig->fCr4OsFxSR)
449 pCtx->cr4.u32 |= X86_CR4_OSFXSR;
450 else
451 pCtx->cr4.u32 &= ~X86_CR4_OSFXSR;
452
453 if (pConfig->fCr4OsXSave)
454 pCtx->cr4.u32 |= X86_CR4_OSXSAVE;
455 else
456 pCtx->cr4.u32 &= ~X86_CR4_OSXSAVE;
457
458 if (pConfig->fXcr0Sse)
459 pExtCtx->fXcr0Saved |= XSAVE_C_SSE;
460 else
461 pExtCtx->fXcr0Saved &= ~XSAVE_C_SSE;
462 if (pConfig->fXcr0Avx && g_afTypeSupports[T_AVX_256])
463 pExtCtx->fXcr0Saved |= XSAVE_C_YMM;
464 else
465 pExtCtx->fXcr0Saved &= ~XSAVE_C_YMM;
466
467 if (pConfig->fAlignCheck)
468 {
469 pCtx->rflags.u32 |= X86_EFL_AC;
470 pCtx->cr0.u32 |= X86_CR0_AM;
471 }
472 else
473 {
474 pCtx->rflags.u32 &= ~X86_EFL_AC;
475 pCtx->cr0.u32 &= ~X86_CR0_AM;
476 }
477
478 /** @todo Can we remove this? x87 FPU and SIMD are independent. */
479 Bs3ExtCtxSetFsw(pExtCtx, pSavedCfg->uFsw & ~(X86_FSW_ES | X86_FSW_B));
480
481 if (pConfig->fMxCsrMM)
482 Bs3ExtCtxSetMxCsr(pExtCtx, pSavedCfg->uMxCsr | X86_MXCSR_MM);
483 else
484 Bs3ExtCtxSetMxCsr(pExtCtx, pSavedCfg->uMxCsr & ~X86_MXCSR_MM);
485 return true;
486}
487
488
489/**
490 * Undoes changes made by bs3CpuInstr4ConfigReconfigure.
491 */
492static void bs3CpuInstrXConfigRestore(PCBS3CPUINSTRX_CONFIG_SAVED_T pSavedCfg, PBS3REGCTX pCtx, PBS3EXTCTX pExtCtx)
493{
494 pCtx->cr0.u32 = pSavedCfg->uCr0;
495 pCtx->cr4.u32 = pSavedCfg->uCr4;
496 pCtx->rflags.u32 = pSavedCfg->uEfl;
497 pExtCtx->fXcr0Saved = pExtCtx->fXcr0Nominal;
498 Bs3ExtCtxSetFcw(pExtCtx, pSavedCfg->uFcw);
499 Bs3ExtCtxSetFsw(pExtCtx, pSavedCfg->uFsw);
500 Bs3ExtCtxSetMxCsr(pExtCtx, pSavedCfg->uMxCsr);
501}
502
503
504/**
505 * Allocates three extended CPU contexts and initializes the first one
506 * with random data.
507 * @returns First extended context, initialized with randomish data. NULL on
508 * failure (complained).
509 * @param ppExtCtx2 Where to return the 2nd context.
510 */
511static PBS3EXTCTX bs3CpuInstrXAllocExtCtxs(PBS3EXTCTX BS3_FAR *ppExtCtx2)
512{
513 /* Allocate extended context structures. */
514 uint64_t fFlags;
515 uint16_t cb = Bs3ExtCtxGetSize(&fFlags);
516 PBS3EXTCTX pExtCtx1 = Bs3MemAlloc(BS3MEMKIND_TILED, cb * 3);
517 PBS3EXTCTX pExtCtx2 = (PBS3EXTCTX)((uint8_t BS3_FAR *)pExtCtx1 + cb);
518 if (pExtCtx1)
519 {
520 Bs3ExtCtxInit(pExtCtx1, cb, fFlags);
521 /** @todo populate with semi-random stuff. */
522
523 Bs3ExtCtxInit(pExtCtx2, cb, fFlags);
524 *ppExtCtx2 = pExtCtx2;
525 return pExtCtx1;
526 }
527 Bs3TestFailedF("Bs3MemAlloc(tiled,%#x)", cb * 2);
528 *ppExtCtx2 = NULL;
529 return NULL;
530}
531
532
533/**
534 * Frees the extended CPU contexts allocated by bs3CpuInstrXAllocExtCtxs.
535 *
536 * @param pExtCtx1 The first extended context.
537 * @param pExtCtx2 The second extended context.
538 */
539static void bs3CpuInstrXFreeExtCtxs(PBS3EXTCTX pExtCtx1, PBS3EXTCTX BS3_FAR pExtCtx2)
540{
541 RT_NOREF_PV(pExtCtx2);
542 Bs3MemFree(pExtCtx1, pExtCtx1->cb * 2);
543}
544
545
546/**
547 * Sets up SSE and AVX bits relevant for FPU instructions.
548 */
549static void bs3CpuInstr4SetupSseAndAvx(PBS3REGCTX pCtx, PCBS3EXTCTX pExtCtx)
550{
551 /* CR0: */
552 uint32_t cr0 = Bs3RegGetCr0();
553 cr0 &= ~(X86_CR0_TS | X86_CR0_MP | X86_CR0_EM);
554 cr0 |= X86_CR0_NE;
555 Bs3RegSetCr0(cr0);
556
557 /* If real mode context, the cr0 value will differ from the current one (we're in PE32 mode). */
558 pCtx->cr0.u32 &= ~(X86_CR0_TS | X86_CR0_MP | X86_CR0_EM);
559 pCtx->cr0.u32 |= X86_CR0_NE;
560
561 /* CR4: */
562 BS3_ASSERT( pExtCtx->enmMethod == BS3EXTCTXMETHOD_FXSAVE
563 || pExtCtx->enmMethod == BS3EXTCTXMETHOD_XSAVE);
564 {
565 uint32_t cr4 = Bs3RegGetCr4();
566 if (pExtCtx->enmMethod == BS3EXTCTXMETHOD_XSAVE)
567 {
568 cr4 |= X86_CR4_OSFXSR | X86_CR4_OSXMMEEXCPT | X86_CR4_OSXSAVE;
569 Bs3RegSetCr4(cr4);
570 Bs3RegSetXcr0(pExtCtx->fXcr0Nominal);
571 }
572 else if (pExtCtx->enmMethod == BS3EXTCTXMETHOD_FXSAVE)
573 {
574 cr4 |= X86_CR4_OSFXSR | X86_CR4_OSXMMEEXCPT;
575 Bs3RegSetCr4(cr4);
576 }
577 pCtx->cr4.u32 = cr4;
578 }
579}
580
581
582/**
583 * Configures the buffer with electric fences in paged modes.
584 *
585 * @returns Adjusted buffer pointer.
586 * @param pbBuf The buffer pointer.
587 * @param pcbBuf Pointer to the buffer size (input & output).
588 * @param bMode The testing target mode.
589 */
590DECLINLINE(uint8_t BS3_FAR *) bs3CpuInstrXBufSetup(uint8_t BS3_FAR *pbBuf, uint32_t *pcbBuf, uint8_t bMode)
591{
592 if (BS3_MODE_IS_PAGED(bMode))
593 {
594 int rc;
595 uint32_t cbBuf = *pcbBuf;
596 Bs3PagingProtectPtr(&pbBuf[0], X86_PAGE_SIZE, 0, X86_PTE_P);
597 Bs3PagingProtectPtr(&pbBuf[cbBuf - X86_PAGE_SIZE], X86_PAGE_SIZE, 0, X86_PTE_P);
598 pbBuf += X86_PAGE_SIZE;
599 cbBuf -= X86_PAGE_SIZE * 2;
600 *pcbBuf = cbBuf;
601
602 g_pbBufAlias = g_pbBufAliasAlloc;
603 rc = Bs3PagingAlias((uintptr_t)g_pbBufAlias, (uintptr_t)pbBuf, cbBuf + X86_PAGE_SIZE, /* must include the tail guard pg */
604 X86_PTE_P | X86_PTE_A | X86_PTE_D | X86_PTE_RW);
605 if (RT_FAILURE(rc))
606 Bs3TestFailedF("Bs3PagingAlias failed on %p/%p LB %#x: %d", g_pbBufAlias, pbBuf, cbBuf, rc);
607 }
608 else
609 g_pbBufAlias = pbBuf;
610 return pbBuf;
611}
612
613
614/**
615 * Undoes what bs3CpuInstrXBufSetup did.
616 *
617 * @param pbBuf The buffer pointer.
618 * @param cbBuf The buffer size.
619 * @param bMode The testing target mode.
620 */
621DECLINLINE(void) bs3CpuInstrXBufCleanup(uint8_t BS3_FAR *pbBuf, uint32_t cbBuf, uint8_t bMode)
622{
623 if (BS3_MODE_IS_PAGED(bMode))
624 {
625 Bs3PagingProtectPtr(&pbBuf[-X86_PAGE_SIZE], X86_PAGE_SIZE, X86_PTE_P, 0);
626 Bs3PagingProtectPtr(&pbBuf[cbBuf], X86_PAGE_SIZE, X86_PTE_P, 0);
627 }
628}
629
630
631/**
632 * Gets a buffer of a @a cbMemOp sized operand according to the given
633 * configuration and alignment restrictions.
634 *
635 * @returns Pointer to the buffer.
636 * @param pbBuf The buffer pointer.
637 * @param cbBuf The buffer size.
638 * @param cbMemOp The operand size.
639 * @param cbAlign The operand alignment restriction.
640 * @param pConfig The configuration.
641 * @param fPageFault The \#PF test setting.
642 */
643DECLINLINE(uint8_t BS3_FAR *) bs3CpuInstrXBufForOperand(uint8_t BS3_FAR *pbBuf, uint32_t cbBuf, uint8_t cbMemOp, uint8_t cbAlign,
644 PCBS3CPUINSTR4_CONFIG_T pConfig, unsigned fPageFault)
645{
646 /* All allocations are at the tail end of the buffer, so that we've got a
647 guard page following the operand. When asked to consistenly trigger
648 a #PF, we slide the buffer into that guard page. */
649 if (fPageFault)
650 cbBuf += X86_PAGE_SIZE;
651
652 if (pConfig->fAligned)
653 {
654 if (!pConfig->fAlignCheck)
655 return &pbBuf[cbBuf - cbMemOp];
656 return &pbBuf[cbBuf - cbMemOp - cbAlign];
657 }
658 return &pbBuf[cbBuf - cbMemOp - 1];
659}
660
661
662/**
663 * Determines the size of memory operands.
664 */
665DECLINLINE(uint8_t) bs3CpuInstrXMemOpSize(uint8_t cbOperand, uint8_t enmRm)
666{
667 if (enmRm <= RM_MEM)
668 return cbOperand;
669 if (enmRm == RM_MEM8)
670 return sizeof(uint8_t);
671 if (enmRm == RM_MEM16)
672 return sizeof(uint16_t);
673 if (enmRm == RM_MEM32)
674 return sizeof(uint32_t);
675 if (enmRm == RM_MEM64)
676 return sizeof(uint64_t);
677 BS3_ASSERT(0);
678 return cbOperand;
679}
680
681
682/*
683 * Code to make testing the tests faster. `bs3CpuInstrX_SkipIt()' randomly
684 * skips a large fraction of the micro-tests. It is sufficiently random
685 * that over a large number of runs, all micro-tests will be hit.
686 *
687 * This improves the runtime of the worst case (`#define ALL_TESTS' on a
688 * debug build, run with '--execute-all-in-iem') from ~9000 to ~800 seconds
689 * (on an Intel Core i7-10700, fwiw).
690 *
691 * To activate this 'developer's speed-testing mode', turn on
692 * `#define BS3_SKIPIT_DO_SKIP' here.
693 *
694 * BS3_SKIPIT_AVG_SKIP governs approximately how many micro-tests are
695 * skipped in a row; e.g. the default of 26 means about every 27th
696 * micro-test is run during a particular test run. (This is not 27x
697 * faster due to other activities which are not skipped!) Note this is
698 * only an average; the actual skips are random.
699 *
700 * You can also modify bs3CpuInstrX_SkipIt() to focus on specific sub-tests,
701 * using its (currently ignored) `bRing, iCfg, iTest, iVal, iVariant' args
702 * (to enable this: turn on `#define BS3_SKIPIT_DO_ARGS': which costs about
703 * 3% performance).
704 *
705 * Note! The skipping is not compatible with testing the native recompiler as
706 * it requires the test code to be run a number of times before it kicks
707 * in and does the native recompilation (currently around 16 times).
708 */
709#define BS3_SKIPIT_AVG_SKIP 26
710#define BS3_SKIPIT_REPORT_COUNT 150000
711#undef BS3_SKIPIT_DO_SKIP
712#undef BS3_SKIPIT_DO_ARGS
713
714#ifndef BS3_SKIPIT_DO_SKIP
715# define BS3_SKIPIT(bRing, iCfg, iTest, iVal, iVariant) (false)
716#else
717# include <iprt/asm-amd64-x86.h>
718# include <iprt/asm-math.h>
719
720DECLINLINE(uint32_t) bs3CpuInstrX_SimpleRand(void)
721{
722 /*
723 * A simple Lehmer linear congruential pseudo-random number
724 * generator using the constants suggested by Park & Miller:
725 *
726 * modulus = 2^31 - 1 (INT32_MAX)
727 * multiplier = 7^5 (16807)
728 *
729 * It produces numbers in the range [1..INT32_MAX-1] and is
730 * more chaotic in the higher bits.
731 *
732 * Note! Runtime/common/rand/randparkmiller.cpp is also use this algorithm,
733 * though the zero handling is different.
734 */
735 static uint32_t s_uSeedMemory = 0;
736 uint32_t uVal = s_uSeedMemory;
737 if (!uVal)
738 uVal = (uint32_t)ASMReadTSC();
739 uVal = ASMModU64ByU32RetU32(ASMMult2xU32RetU64(uVal, 16807), INT32_MAX);
740 s_uSeedMemory = uVal;
741 return uVal;
742}
743
744static unsigned g_cSeen, g_cSkipped;
745
746static void bs3CpuInstrX_ShowTallies(void)
747{
748 Bs3TestPrintf("Micro-tests %d: tested %d / skipped %d\n", g_cSeen, g_cSeen - g_cSkipped, g_cSkipped);
749}
750
751# ifdef BS3_SKIPIT_DO_ARGS
752# define BS3_SKIPIT(bRing, iCfg, iTest, iVal, iVariant) bs3CpuInstrX_SkipIt(bRing, iCfg, iTest, iVal, iVariant)
753static bool bs3CpuInstrX_SkipIt(uint8_t bRing, unsigned iCfg, unsigned iTest, unsigned iVal, unsigned iVariant)
754# else
755# define BS3_SKIPIT(bRing, iCfg, iTest, iVal, iVariant) bs3CpuInstrX_SkipIt()
756static bool bs3CpuInstrX_SkipIt(void)
757# endif
758{
759 static unsigned s_uTimes = 0;
760 bool fSkip;
761
762 /* Cache calls to the relatively expensive random routine */
763 if (!s_uTimes)
764 s_uTimes = bs3CpuInstrX_SimpleRand() % (BS3_SKIPIT_AVG_SKIP * 2 + 1) + 1;
765 fSkip = --s_uTimes > 0;
766 if (fSkip)
767 ++g_cSkipped;
768
769 if (++g_cSeen % BS3_SKIPIT_REPORT_COUNT == 0)
770 bs3CpuInstrX_ShowTallies();
771 return fSkip;
772}
773
774#endif /* BS3_SKIPIT_DO_SKIP */
775
776/*
777 * Test type #1.
778 * Generic YMM registers.
779 */
780typedef struct BS3CPUINSTR4_TEST1_VALUES_T
781{
782 X86YMMREG uSrc2; /**< Second source operand. */
783 X86YMMREG uSrc1; /**< uDstIn for SSE */
784 X86YMMREG uDstOut; /**< Destination output. */
785 uint32_t fMxCsrMask; /**< MXCSR exception mask to use. */
786 bool fDenormalsAreZero; /**< Whether DAZ (Denormals-Are-Zero) is used. */
787 bool fFlushToZero; /**< Whether Flush-To-Zero (FZ) is used. */
788 uint32_t fRoundingCtlMask; /**< Rounding control mask (X86_MXCSR_RC_MASK) to use. */
789 uint32_t fExpectedMxCsrFlags; /**< Expected MXCSR exception flags. */
790} BS3CPUINSTR4_TEST1_VALUES_T;
791
792/*
793 * Test type #1.
794 * Packed single-precision.
795 */
796typedef struct BS3CPUINSTR4_TEST1_VALUES_PS_T
797{
798 X86YMMFLOATPSREG uSrc2; /**< Second source operand. */
799 X86YMMFLOATPSREG uSrc1; /**< uDstIn for SSE */
800 X86YMMFLOATPSREG uDstOut; /**< Destination output. */
801 uint32_t fMxCsrMask; /**< MXCSR exception mask to use. */
802 bool fDenormalsAreZero; /**< Whether DAZ (Denormals-Are-Zero) is used. */
803 bool fFlushToZero; /**< Whether Flush-To-Zero (FZ) is used. */
804 uint32_t fRoundingCtlMask; /**< Rounding control mask (X86_MXCSR_RC_MASK) to use. */
805 uint32_t fExpectedMxCsrFlags; /**< Expected MXCSR exception flags. */
806} BS3CPUINSTR4_TEST1_VALUES_PS_T;
807AssertCompile(sizeof(BS3CPUINSTR4_TEST1_VALUES_PS_T) == sizeof(BS3CPUINSTR4_TEST1_VALUES_T));
808AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PS_T, uSrc2, BS3CPUINSTR4_TEST1_VALUES_T, uSrc2);
809AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PS_T, uSrc1, BS3CPUINSTR4_TEST1_VALUES_T, uSrc1);
810AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PS_T, uDstOut, BS3CPUINSTR4_TEST1_VALUES_T, uDstOut);
811AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PS_T, fMxCsrMask, BS3CPUINSTR4_TEST1_VALUES_T, fMxCsrMask);
812AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PS_T, fDenormalsAreZero, BS3CPUINSTR4_TEST1_VALUES_T, fDenormalsAreZero);
813AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PS_T, fFlushToZero, BS3CPUINSTR4_TEST1_VALUES_T, fFlushToZero);
814AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PS_T, fRoundingCtlMask, BS3CPUINSTR4_TEST1_VALUES_T, fRoundingCtlMask);
815AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PS_T, fExpectedMxCsrFlags, BS3CPUINSTR4_TEST1_VALUES_T, fExpectedMxCsrFlags);
816
817/*
818 * Test type #1.
819 * Packed double-precision.
820 */
821typedef struct BS3CPUINSTR4_TEST1_VALUES_PD_T
822{
823 X86YMMFLOATPDREG uSrc2; /**< Second source operand. */
824 X86YMMFLOATPDREG uSrc1; /**< uDstIn for SSE */
825 X86YMMFLOATPDREG uDstOut; /**< Destination output. */
826 uint32_t fMxCsrMask; /**< MXCSR exception mask to use. */
827 bool fDenormalsAreZero; /**< Whether DAZ (Denormals-Are-Zero) is used. */
828 bool fFlushToZero; /**< Whether Flush-To-Zero (FZ) is used. */
829 uint32_t fRoundingCtlMask; /**< Rounding control mask (X86_MXCSR_RC_MASK) to use. */
830 uint32_t fExpectedMxCsrFlags; /**< Expected MXCSR exception flags. */
831} BS3CPUINSTR4_TEST1_VALUES_PD_T;
832AssertCompile(sizeof(BS3CPUINSTR4_TEST1_VALUES_PD_T) == sizeof(BS3CPUINSTR4_TEST1_VALUES_T));
833AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PD_T, uSrc2, BS3CPUINSTR4_TEST1_VALUES_T, uSrc2);
834AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PD_T, uSrc1, BS3CPUINSTR4_TEST1_VALUES_T, uSrc1);
835AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PD_T, uDstOut, BS3CPUINSTR4_TEST1_VALUES_T, uDstOut);
836AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PD_T, fMxCsrMask, BS3CPUINSTR4_TEST1_VALUES_T, fMxCsrMask);
837AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PD_T, fDenormalsAreZero, BS3CPUINSTR4_TEST1_VALUES_T, fDenormalsAreZero);
838AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PD_T, fFlushToZero, BS3CPUINSTR4_TEST1_VALUES_T, fFlushToZero);
839AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PD_T, fRoundingCtlMask, BS3CPUINSTR4_TEST1_VALUES_T, fRoundingCtlMask);
840AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_PD_T, fExpectedMxCsrFlags, BS3CPUINSTR4_TEST1_VALUES_T, fExpectedMxCsrFlags);
841
842/*
843 * Test type #1.
844 * Scalar quadruple-precision.
845 */
846typedef struct BS3CPUINSTR4_TEST1_VALUES_SQ_T
847{
848 X86YMMFLOATSQREG uSrc2; /**< Second source operand. */
849 X86YMMFLOATSQREG uSrc1; /**< uDstIn for SSE */
850 X86YMMFLOATSQREG uDstOut; /**< Destination output. */
851 uint32_t fMxCsrMask; /**< MXCSR exception mask to use. */
852 bool fDenormalsAreZero; /**< Whether DAZ (Denormals-Are-Zero) is used. */
853 bool fFlushToZero; /**< Whether Flush-To-Zero (FZ) is used. */
854 uint32_t fRoundingCtlMask; /**< Rounding control mask (X86_MXCSR_RC_MASK) to use. */
855 uint32_t fExpectedMxCsrFlags; /**< Expected MXCSR exception flags. */
856} BS3CPUINSTR4_TEST1_VALUES_SQ_T;
857AssertCompile(sizeof(BS3CPUINSTR4_TEST1_VALUES_SQ_T) == sizeof(BS3CPUINSTR4_TEST1_VALUES_T));
858AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SQ_T, uSrc2, BS3CPUINSTR4_TEST1_VALUES_T, uSrc2);
859AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SQ_T, uSrc1, BS3CPUINSTR4_TEST1_VALUES_T, uSrc1);
860AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SQ_T, uDstOut, BS3CPUINSTR4_TEST1_VALUES_T, uDstOut);
861AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SQ_T, fMxCsrMask, BS3CPUINSTR4_TEST1_VALUES_T, fMxCsrMask);
862AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SQ_T, fDenormalsAreZero, BS3CPUINSTR4_TEST1_VALUES_T, fDenormalsAreZero);
863AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SQ_T, fFlushToZero, BS3CPUINSTR4_TEST1_VALUES_T, fFlushToZero);
864AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SQ_T, fRoundingCtlMask, BS3CPUINSTR4_TEST1_VALUES_T, fRoundingCtlMask);
865AssertCompileMembersSameSizeAndOffset(BS3CPUINSTR4_TEST1_VALUES_SQ_T, fExpectedMxCsrFlags, BS3CPUINSTR4_TEST1_VALUES_T, fExpectedMxCsrFlags);
866
867typedef struct BS3CPUINSTR4_TEST1_T
868{
869 FPFNBS3FAR pfnWorker; /**< Test function worker. */
870 uint8_t bAvxMisalignXcpt; /**< AVX misalignment exception. */
871 uint8_t enmRm; /**< R/M type. */
872 uint8_t enmType; /**< CPU instruction type (see T_XXX). */
873 uint8_t iRegDst; /**< Index of destination register, UINT8_MAX if N/A. */
874 uint8_t iRegSrc1; /**< Index of first source register, UINT8_MAX if N/A. */
875 uint8_t iRegSrc2; /**< Index of second source register, UINT8_MAX if N/A. */
876 uint8_t cValues; /**< Number of test values in @c paValues. */
877 BS3CPUINSTR4_TEST1_VALUES_T const BS3_FAR *paValues; /**< Test values. */
878} BS3CPUINSTR4_TEST1_T;
879
880typedef struct BS3CPUINSTR4_TEST1_MODE_T
881{
882 BS3CPUINSTR4_TEST1_T const BS3_FAR *paTests;
883 unsigned cTests;
884} BS3CPUINSTR4_TEST1_MODE_T;
885
886/** Initializer for a BS3CPUINSTR4_TEST1_MODE_T array (three entries). */
887#define BS3CPUINSTR4_TEST1_MODES_INIT(a_aTests16, a_aTests32, a_aTests64) \
888 { { a_aTests16, RT_ELEMENTS(a_aTests16) }, { a_aTests32, RT_ELEMENTS(a_aTests32) }, { a_aTests64, RT_ELEMENTS(a_aTests64) } }
889
890typedef struct BS3CPUINSTR4_TEST1_CTX_T
891{
892 BS3CPUINSTR4_CONFIG_T const BS3_FAR *pConfig; /**< The test execution environment configuration. */
893 BS3CPUINSTR4_TEST1_T const BS3_FAR *pTest; /**< The instruction being tested. */
894 unsigned iVal; /**< Which iteration of the test value is this. */
895 const char BS3_FAR *pszMode; /**< The testing mode (e.g. real, protected, paged and permutations). */
896 PBS3TRAPFRAME pTrapFrame; /**< The exception (trap) frame. */
897 PBS3REGCTX pCtx; /**< The general-purpose register context. */
898 PBS3EXTCTX pExtCtx; /**< The extended (FPU) register context. */
899 PBS3EXTCTX pExtCtxOut; /**< The output extended (FPU) register context. */
900 uint8_t BS3_FAR *puMemOp; /**< The memory operand buffer. */
901 uint8_t BS3_FAR *puMemOpAlias; /**< The memory operand alias buffer for comparing result. */
902 uint8_t cbMemOp; /**< Size of the memory operand (and alias) buffer in bytes. */
903 uint8_t cbOperand; /**< Size of the instruction operand (8 for MMX, 16 for SSE etc). */
904 uint8_t cbInstr; /**< Size of the instruction opcode. */
905 uint8_t bXcptExpect; /**< The expected exception while/after executing the instruction. */
906 bool fSseInstr; /**< Whether this is an SSE instruction. */
907 bool fAvxInstr; /**< Whether this is an AVX instruction. */
908 uint16_t idTestStep; /**< The test iteration step. */
909} BS3CPUINSTR4_TEST1_CTX_T;
910/** Pointer to a test 1 context. */
911typedef BS3CPUINSTR4_TEST1_CTX_T BS3_FAR *PBS3CPUINSTR4_TEST1_CTX_T;
912
913
914/**
915 * Worker for bs3CpuInstrX_WorkerTestType1.
916 */
917static uint16_t bs3CpuInstr4_WorkerTestType1_Inner(uint8_t bMode, PBS3CPUINSTR4_TEST1_CTX_T pTestCtx,
918 PCBS3CPUINSTRX_CONFIG_SAVED_T pSavedCfg)
919{
920 BS3CPUINSTR4_TEST1_T const BS3_FAR *pTest = pTestCtx->pTest;
921 BS3CPUINSTR4_TEST1_VALUES_T const BS3_FAR *pValues = &pTestCtx->pTest->paValues[pTestCtx->iVal];
922 PBS3TRAPFRAME pTrapFrame = pTestCtx->pTrapFrame;
923 PBS3REGCTX pCtx = pTestCtx->pCtx;
924 PBS3EXTCTX pExtCtx = pTestCtx->pExtCtx;
925 PBS3EXTCTX pExtCtxOut = pTestCtx->pExtCtxOut;
926 uint8_t BS3_FAR *puMemOp = pTestCtx->puMemOp;
927 uint8_t BS3_FAR *puMemOpAlias = pTestCtx->puMemOpAlias;
928 uint8_t cbMemOp = pTestCtx->cbMemOp;
929 uint8_t const cbOperand = pTestCtx->cbOperand;
930 uint8_t const cbInstr = ((uint8_t const BS3_FAR *)(uintptr_t)pTestCtx->pTest->pfnWorker)[-1];
931 uint8_t bXcptExpect = pTestCtx->bXcptExpect;
932 uint8_t const bFpXcpt = pTestCtx->pConfig->fCr4OsXmmExcpt ? X86_XCPT_XF : X86_XCPT_UD;
933 bool const fFpFlagsExpect = RT_BOOL( (pValues->fExpectedMxCsrFlags
934 & (~pValues->fMxCsrMask >> X86_MXCSR_XCPT_MASK_SHIFT)) & X86_MXCSR_XCPT_FLAGS);
935 uint32_t uMxCsr;
936 X86YMMREG MemOpExpect;
937 uint16_t cErrors;
938
939 /*
940 * Set up the context and some expectations.
941 */
942 /* Destination. */
943 Bs3MemZero(&MemOpExpect, sizeof(MemOpExpect));
944 if (pTest->iRegDst == UINT8_MAX)
945 {
946 BS3_ASSERT(pTest->enmRm >= RM_MEM);
947 Bs3MemSet(puMemOpAlias, 0xcc, cbMemOp);
948 if (bXcptExpect == X86_XCPT_DB)
949 MemOpExpect.ymm = pValues->uDstOut.ymm;
950 else
951 Bs3MemSet(&MemOpExpect, 0xcc, sizeof(MemOpExpect));
952 }
953
954 /* Source #1 (/ destination for SSE). */
955 if (pTest->iRegSrc1 == UINT8_MAX)
956 {
957 BS3_ASSERT(pTest->enmRm >= RM_MEM);
958 Bs3MemCpy(puMemOpAlias, &pValues->uSrc1, cbMemOp);
959 if (pTest->iRegDst == UINT8_MAX)
960 BS3_ASSERT(pTestCtx->fSseInstr);
961 else
962 MemOpExpect.ymm = pValues->uSrc1.ymm;
963 }
964 else if (pTestCtx->fSseInstr)
965 Bs3ExtCtxSetXmm(pExtCtx, pTest->iRegSrc1, &pValues->uSrc1.ymm.DQWords.dqw0);
966 else
967 Bs3ExtCtxSetYmm(pExtCtx, pTest->iRegSrc1, &pValues->uSrc1.ymm, 32);
968
969 /* Source #2. */
970 if (pTest->iRegSrc2 == UINT8_MAX)
971 {
972 BS3_ASSERT(pTest->enmRm >= RM_MEM);
973 BS3_ASSERT(pTest->iRegDst != UINT8_MAX && pTest->iRegSrc1 != UINT8_MAX);
974 Bs3MemCpy(puMemOpAlias, &pValues->uSrc2, cbMemOp);
975 MemOpExpect.ymm = pValues->uSrc2.ymm;
976 }
977 else if (pTestCtx->fSseInstr)
978 Bs3ExtCtxSetXmm(pExtCtx, pTest->iRegSrc2, &pValues->uSrc2.ymm.DQWords.dqw0);
979 else
980 Bs3ExtCtxSetYmm(pExtCtx, pTest->iRegSrc2, &pValues->uSrc2.ymm, 32);
981
982 /* Memory pointer. */
983 if (pTest->enmRm >= RM_MEM)
984 {
985 BS3_ASSERT( pTest->iRegDst == UINT8_MAX
986 || pTest->iRegSrc1 == UINT8_MAX
987 || pTest->iRegSrc2 == UINT8_MAX);
988 Bs3RegCtxSetGrpSegFromCurPtr(pCtx, &pCtx->rbx, &pCtx->fs, puMemOp);
989 }
990
991 /* Setup MXCSR for the current test. */
992 uMxCsr = (pSavedCfg->uMxCsr & ~(X86_MXCSR_XCPT_MASK | X86_MXCSR_RC_MASK))
993 | (pValues->fMxCsrMask & X86_MXCSR_XCPT_MASK)
994 | (pValues->fRoundingCtlMask & X86_MXCSR_RC_MASK);
995 if ( pValues->fDenormalsAreZero
996 && g_fMxCsrDazSupported)
997 uMxCsr |= X86_MXCSR_DAZ;
998 if (pValues->fFlushToZero)
999 uMxCsr |= X86_MXCSR_FZ;
1000 Bs3ExtCtxSetMxCsr(pExtCtx, uMxCsr);
1001
1002 /*
1003 * Prepare globals and execute.
1004 */
1005 g_uBs3TrapEipHint = pCtx->rip.u32;
1006 if ( bXcptExpect == X86_XCPT_DB
1007 && !fFpFlagsExpect)
1008 g_uBs3TrapEipHint += cbInstr + 1;
1009 Bs3TrapSetJmpAndRestoreWithExtCtxAndRm(pCtx, pExtCtx, pTrapFrame, pExtCtxOut);
1010
1011 /*
1012 * Check the result.
1013 */
1014 cErrors = Bs3TestSubErrorCount();
1015 if ( bXcptExpect == X86_XCPT_DB
1016 && pTest->iRegDst != UINT8_MAX)
1017 {
1018 if (pTestCtx->fSseInstr)
1019 Bs3ExtCtxSetXmm(pExtCtx, pTest->iRegDst, &pValues->uDstOut.ymm.DQWords.dqw0);
1020 else
1021 Bs3ExtCtxSetYmm(pExtCtx, pTest->iRegDst, &pValues->uDstOut.ymm, cbOperand);
1022 }
1023#if defined(DEBUG_aeichner) /** @todo Necessary kludge on a i7-1068NG7. */
1024 if ( pExtCtx->enmMethod == BS3EXTCTXMETHOD_XSAVE
1025 && pExtCtx->Ctx.x.Hdr.bmXState == 0x7
1026 && pExtCtxOut->Ctx.x.Hdr.bmXState == 0x3)
1027 pExtCtxOut->Ctx.x.Hdr.bmXState = 0x7;
1028#endif
1029 if (bXcptExpect == X86_XCPT_DB)
1030 Bs3ExtCtxSetMxCsr(pExtCtx, (uMxCsr & ~X86_MXCSR_XCPT_FLAGS)
1031 | (pValues->fExpectedMxCsrFlags & X86_MXCSR_XCPT_FLAGS));
1032 Bs3TestCheckExtCtx(pExtCtxOut, pExtCtx, 0 /*fFlags*/, pTestCtx->pszMode, pTestCtx->idTestStep);
1033
1034 if (bXcptExpect == X86_XCPT_DB)
1035 {
1036 uint32_t const fMxCsrXcptFlags = Bs3ExtCtxGetMxCsr(pExtCtxOut) & X86_MXCSR_XCPT_FLAGS;
1037
1038 /* Check if the SIMD FP exception flags (or lack of) are as expected. */
1039 if (fMxCsrXcptFlags != (pValues->fExpectedMxCsrFlags & X86_MXCSR_XCPT_FLAGS))
1040 {
1041 char szGotBuf[BS3_FP_XCPT_NAMES_MAXLEN];
1042 char szExpectBuf[BS3_FP_XCPT_NAMES_MAXLEN];
1043 bs3CpuInstr4GetXcptFlags(&szExpectBuf[0], sizeof(szExpectBuf), pValues->fExpectedMxCsrFlags);
1044 bs3CpuInstr4GetXcptFlags(&szGotBuf[0], sizeof(szGotBuf), fMxCsrXcptFlags);
1045 Bs3TestFailedF("Expected floating-point xcpt flags%s, got%s", szExpectBuf, szGotBuf);
1046 }
1047
1048 /* Check if the SIMD FP exception (or lack of) is as expected. */
1049 if (fFpFlagsExpect)
1050 {
1051 if (pTrapFrame->bXcpt == bFpXcpt)
1052 { /* likely */ }
1053 else
1054 Bs3TestFailedF("Expected floating-point xcpt %s, got %s", bs3CpuInstr4XcptName(bFpXcpt),
1055 bs3CpuInstr4XcptName(pTrapFrame->bXcpt));
1056 }
1057 else if (pTrapFrame->bXcpt == X86_XCPT_DB)
1058 { /* likely */ }
1059 else
1060 Bs3TestFailedF("Expected no xcpt, got %s", bs3CpuInstr4XcptName(pTrapFrame->bXcpt));
1061 }
1062 /* Check if non-FP exception is as expected. */
1063 else if (pTrapFrame->bXcpt != bXcptExpect)
1064 Bs3TestFailedF("Expected xcpt %s, got %s", bs3CpuInstr4XcptName(bXcptExpect), bs3CpuInstr4XcptName(pTrapFrame->bXcpt));
1065
1066 /* Kludge! Looks like EFLAGS.AC is cleared when raising #GP in real mode on the 10980XE. WEIRD! */
1067 if (bMode == BS3_MODE_RM && (pCtx->rflags.u32 & X86_EFL_AC))
1068 {
1069 if (pTrapFrame->Ctx.rflags.u32 & X86_EFL_AC)
1070 Bs3TestFailedF("Expected EFLAGS.AC to be cleared (bXcpt=%d)", pTrapFrame->bXcpt);
1071 pTrapFrame->Ctx.rflags.u32 |= X86_EFL_AC;
1072 }
1073 if (bXcptExpect == X86_XCPT_PF)
1074 pCtx->cr2.u = (uintptr_t)puMemOp;
1075 Bs3TestCheckRegCtxEx(&pTrapFrame->Ctx, pCtx, bXcptExpect == X86_XCPT_DB && !fFpFlagsExpect ? cbInstr + 1 : 0, 0 /*cbSpAdjust*/,
1076 (bXcptExpect == X86_XCPT_DB && !fFpFlagsExpect) || BS3_MODE_IS_16BIT_SYS(bMode) ? 0 : X86_EFL_RF,
1077 pTestCtx->pszMode, pTestCtx->idTestStep);
1078 pCtx->cr2.u = 0;
1079
1080 if ( pTest->enmRm >= RM_MEM
1081 && Bs3MemCmp(puMemOpAlias, &MemOpExpect, cbMemOp) != 0)
1082 Bs3TestFailedF("Expected uMemOp %.*Rhxs, got %.*Rhxs", cbMemOp, &MemOpExpect, cbMemOp, puMemOpAlias);
1083
1084 return cErrors;
1085}
1086
1087
1088/**
1089 * Test type #1 worker.
1090 */
1091static uint8_t bs3CpuInstrX_WorkerTestType1(uint8_t bMode, BS3CPUINSTR4_TEST1_T const BS3_FAR *paTests, unsigned cTests,
1092 PCBS3CPUINSTR4_CONFIG_T paConfigs, unsigned cConfigs)
1093{
1094 BS3REGCTX Ctx;
1095 BS3TRAPFRAME TrapFrame;
1096 const char BS3_FAR * const pszMode = Bs3GetModeName(bMode);
1097 uint8_t bRing = BS3_MODE_IS_V86(bMode) ? 3 : 0;
1098 uint8_t BS3_FAR *pbBuf = g_pbBuf;
1099 uint32_t cbBuf = g_cbBuf;
1100 PBS3EXTCTX pExtCtxOut;
1101 PBS3EXTCTX pExtCtx = bs3CpuInstrXAllocExtCtxs(&pExtCtxOut);
1102 if (pExtCtx)
1103 { /* likely */ }
1104 else
1105 return 0;
1106 if (pExtCtx->enmMethod != BS3EXTCTXMETHOD_ANCIENT)
1107 { /* likely */ }
1108 else
1109 {
1110 Bs3TestPrintf("Skipped due to ancient FPU state format\n");
1111 return 0;
1112 }
1113
1114 /* Ensure the structures are allocated before we sample the stack pointer. */
1115 Bs3MemSet(&Ctx, 0, sizeof(Ctx));
1116 Bs3MemSet(&TrapFrame, 0, sizeof(TrapFrame));
1117
1118 /*
1119 * Create test context.
1120 */
1121 pbBuf = bs3CpuInstrXBufSetup(pbBuf, &cbBuf, bMode);
1122 Bs3RegCtxSaveForMode(&Ctx, bMode, 1024);
1123 bs3CpuInstr4SetupSseAndAvx(&Ctx, pExtCtx);
1124
1125 /*
1126 * Run the tests in all rings since alignment issues may behave
1127 * differently in ring-3 compared to ring-0.
1128 */
1129 for (;;)
1130 {
1131 unsigned fPf = 0;
1132 do
1133 {
1134 unsigned iCfg;
1135 for (iCfg = 0; iCfg < cConfigs; iCfg++)
1136 {
1137 unsigned iTest;
1138 BS3CPUINSTRX_CONFIG_SAVED_T SavedCfg;
1139 if (!bs3CpuInstr4ConfigReconfigure(&SavedCfg, &Ctx, pExtCtx, &paConfigs[iCfg], bMode))
1140 continue; /* unsupported config */
1141
1142 /*
1143 * Iterate the tests.
1144 */
1145 for (iTest = 0; iTest < cTests; iTest++)
1146 {
1147 BS3CPUINSTR4_TEST1_T const BS3_FAR *pTest = &paTests[iTest];
1148 unsigned const cValues = pTest->cValues;
1149 bool const fSseInstr = pTest->enmType >= T_SSE && pTest->enmType < T_AVX_128;
1150 bool const fAvxInstr = pTest->enmType >= T_AVX_128;
1151 uint8_t const cbOperand = pTest->enmType < T_128BITS ? 64/8
1152 : pTest->enmType < T_256BITS ? 128/8 : 256/8;
1153 uint8_t const cbMemOp = bs3CpuInstrXMemOpSize(cbOperand, pTest->enmRm);
1154 uint8_t const cbAlign = cbMemOp;
1155 uint8_t BS3_FAR *puMemOp = bs3CpuInstrXBufForOperand(pbBuf, cbBuf, cbMemOp, cbAlign, &paConfigs[iCfg], fPf);
1156 uint8_t *puMemOpAlias = &g_pbBufAlias[(uintptr_t)puMemOp - (uintptr_t)pbBuf];
1157 uint8_t bXcptExpect = !g_afTypeSupports[pTest->enmType] ? X86_XCPT_UD
1158 : fSseInstr ? paConfigs[iCfg].bXcptSse
1159 : BS3_MODE_IS_RM_OR_V86(bMode) ? X86_XCPT_UD : paConfigs[iCfg].bXcptAvx;
1160 uint16_t idTestStep = bRing * 10000 + iCfg * 100 + iTest * 10;
1161 unsigned cRecompRuns = 0;
1162 unsigned const cMaxRecompRuns = g_cBs3ThresholdNativeRecompiler + cValues;
1163 unsigned iVal;
1164
1165 /* If testing unaligned memory accesses (or #PF), skip register-only tests. This
1166 allows setting bXcptSse and bXcptAvx to reflect the misaligned exceptions. */
1167 if ( (pTest->enmRm == RM_REG || pTest->enmRm == RM_MEM8)
1168 && (!paConfigs[iCfg].fAligned || paConfigs[iCfg].fAlignCheck || fPf))
1169 continue;
1170
1171 /* #AC is only raised in ring-3. */
1172 if (bXcptExpect == X86_XCPT_AC)
1173 {
1174 if (bRing != 3)
1175 bXcptExpect = X86_XCPT_DB;
1176 else if (fAvxInstr)
1177 bXcptExpect = pTest->bAvxMisalignXcpt; /* they generally don't raise #AC */
1178 }
1179
1180 if (fPf && bXcptExpect == X86_XCPT_DB)
1181 bXcptExpect = X86_XCPT_PF;
1182
1183 Bs3RegCtxSetRipCsFromCurPtr(&Ctx, pTest->pfnWorker);
1184
1185 /*
1186 * Iterate the test values and do the actual testing.
1187 */
1188 while (cRecompRuns < cMaxRecompRuns)
1189 {
1190 for (iVal = 0; iVal < cValues; iVal++, idTestStep++, cRecompRuns++)
1191 {
1192 uint16_t cErrors;
1193 BS3CPUINSTR4_TEST1_CTX_T TestCtx;
1194 if (BS3_SKIPIT(bRing, iCfg, iTest, iVal, 0))
1195 continue;
1196
1197 /*
1198 * Setup the test instruction context and pass it to the worker.
1199 * A few of these can be figured out by the worker but initializing
1200 * it outside the inner most loop is more optimal.
1201 */
1202 TestCtx.pConfig = &paConfigs[iCfg];
1203 TestCtx.pTest = pTest;
1204 TestCtx.iVal = iVal;
1205 TestCtx.pszMode = pszMode;
1206 TestCtx.pTrapFrame = &TrapFrame;
1207 TestCtx.pCtx = &Ctx;
1208 TestCtx.pExtCtx = pExtCtx;
1209 TestCtx.pExtCtxOut = pExtCtxOut;
1210 TestCtx.puMemOp = (uint8_t *)puMemOp;
1211 TestCtx.puMemOpAlias = puMemOpAlias;
1212 TestCtx.cbMemOp = cbMemOp;
1213 TestCtx.cbOperand = cbOperand;
1214 TestCtx.bXcptExpect = bXcptExpect;
1215 TestCtx.fSseInstr = fSseInstr;
1216 TestCtx.fAvxInstr = fAvxInstr;
1217 TestCtx.idTestStep = idTestStep;
1218 cErrors = bs3CpuInstr4_WorkerTestType1_Inner(bMode, &TestCtx, &SavedCfg);
1219 if (cErrors != Bs3TestSubErrorCount())
1220 {
1221 if (paConfigs[iCfg].fAligned)
1222 Bs3TestFailedF("%s: ring-%d/cfg#%u/test#%u/value#%u failed (bXcptExpect=%u %s)",
1223 Bs3GetModeName(bMode), bRing, iCfg, iTest, iVal,
1224 bXcptExpect, bs3CpuInstr4XcptName(bXcptExpect));
1225 else
1226 Bs3TestFailedF("%s: ring-%d/cfg#%u/test#%u/value#%u failed (bXcptExpect=%u %s, puMemOp=%p, EFLAGS=%#RX32, CR0=%#RX32)",
1227 Bs3GetModeName(bMode), bRing, iCfg, iTest, iVal,
1228 bXcptExpect, bs3CpuInstr4XcptName(bXcptExpect), puMemOp,
1229 TrapFrame.Ctx.rflags.u32, TrapFrame.Ctx.cr0);
1230 Bs3TestPrintf("\n");
1231 }
1232 }
1233 }
1234 }
1235 bs3CpuInstrXConfigRestore(&SavedCfg, &Ctx, pExtCtx);
1236 }
1237 } while (fPf++ == 0 && BS3_MODE_IS_PAGED(bMode));
1238
1239 /*
1240 * Next ring.
1241 */
1242 bRing++;
1243 if (bRing > 3 || bMode == BS3_MODE_RM)
1244 break;
1245 Bs3RegCtxConvertToRingX(&Ctx, bRing);
1246 }
1247
1248 /*
1249 * Cleanup.
1250 */
1251 bs3CpuInstrXBufCleanup(pbBuf, cbBuf, bMode);
1252 bs3CpuInstrXFreeExtCtxs(pExtCtx, pExtCtxOut);
1253 return 0;
1254}
1255
1256
1257/*
1258 * [V]ADDPS.
1259 */
1260BS3_DECL_FAR(uint8_t) bs3CpuInstrX_v_addps(uint8_t bMode)
1261{
1262 static BS3CPUINSTR4_TEST1_VALUES_PS_T const s_aValues[] =
1263 {
1264 /* 0*/{ { /*src2 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1265 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1266 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1267 /*mask */ X86_MXCSR_XCPT_MASK,
1268 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1269 /*flags */ 0 },
1270 /* 1*/{ { /*src2 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1271 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1272 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0) } },
1273 /*mask */ ~X86_MXCSR_XCPT_MASK,
1274 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1275 /*flags */ 0 },
1276 /* 2*/{ { /*src2 */ { BS3_FP32_INF(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0)} },
1277 { /*src1 */ { BS3_FP32_INF(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0)} },
1278 { /* => */ { BS3_FP32_INF(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0)} },
1279 /*mask */ ~X86_MXCSR_IM,
1280 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1281 /*flags */ X86_MXCSR_IE },
1282 /* 3*/{ { /*src2 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_INF(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0)} },
1283 { /*src1 */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_INF(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0)} },
1284 { /* => */ { BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_QNAN(1), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0), BS3_FP32_ZERO(0)} },
1285 /*mask */ X86_MXCSR_XCPT_MASK,
1286 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1287 /*flags */ X86_MXCSR_IE },
1288 };
1289
1290 static BS3CPUINSTR4_TEST1_T const s_aTests16[] =
1291 {
1292 { bs3CpuInstrX_addps_XMM1_XMM2_icebp_c16, 255, RM_REG, T_SSE, 1, 1, 2, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1293 { bs3CpuInstrX_addps_XMM1_FSxBX_icebp_c16, 255, RM_MEM, T_SSE, 1, 1, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1294 };
1295 static BS3CPUINSTR4_TEST1_T const s_aTests32[] =
1296 {
1297 { bs3CpuInstrX_addps_XMM1_XMM2_icebp_c32, 255, RM_REG, T_SSE, 1, 1, 2, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1298 { bs3CpuInstrX_addps_XMM1_FSxBX_icebp_c32, 255, RM_MEM, T_SSE, 1, 1, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1299 };
1300 static BS3CPUINSTR4_TEST1_T const s_aTests64[] =
1301 {
1302 { bs3CpuInstrX_addps_XMM1_XMM2_icebp_c64, 255, RM_REG, T_SSE, 1, 1, 2, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1303 { bs3CpuInstrX_addps_XMM1_FSxBX_icebp_c64, 255, RM_MEM, T_SSE, 1, 1, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1304 { bs3CpuInstrX_addps_XMM8_XMM9_icebp_c64, 255, RM_REG, T_SSE, 8, 8, 9, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1305 { bs3CpuInstrX_addps_XMM8_FSxBX_icebp_c64, 255, RM_MEM, T_SSE, 8, 8, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1306 };
1307
1308 static BS3CPUINSTR4_TEST1_MODE_T const s_aTests[3] = BS3CPUINSTR4_TEST1_MODES_INIT(s_aTests16, s_aTests32, s_aTests64);
1309 unsigned const iTest = BS3CPUINSTR4_TEST_MODES_INDEX(bMode);
1310 return bs3CpuInstrX_WorkerTestType1(bMode, s_aTests[iTest].paTests, s_aTests[iTest].cTests,
1311 g_aXcptConfig1, RT_ELEMENTS(g_aXcptConfig1));
1312}
1313
1314
1315/*
1316 * [V]ADDPD.
1317 */
1318BS3_DECL_FAR(uint8_t) bs3CpuInstrX_v_addpd(uint8_t bMode)
1319{
1320 static BS3CPUINSTR4_TEST1_VALUES_PD_T const s_aValues[] =
1321 {
1322 /*
1323 * Zero.
1324 */
1325 /* 0*/{ { /*src2 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1326 { /*src1 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1327 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1328 /*mask */ X86_MXCSR_XCPT_MASK,
1329 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1330 /*flags */ 0 },
1331 /* 1*/{ { /*src2 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1332 { /*src1 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1333 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1334 /*mask */ ~X86_MXCSR_XCPT_MASK,
1335 /*daz,fz,rc*/ 0, 1, X86_MXCSR_RC_NEAREST,
1336 /*flags */ 0 },
1337 /* 2*/{ { /*src2 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1338 { /*src1 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1339 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1340 /*mask */ X86_MXCSR_XCPT_MASK,
1341 /*daz,fz,rc*/ 1, 0, X86_MXCSR_RC_DOWN,
1342 /*flags */ 0 },
1343 /* 3*/{ { /*src2 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1344 { /*src1 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1345 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1346 /*mask */ ~X86_MXCSR_XCPT_MASK,
1347 /*daz,fz,rc*/ 1, 1, X86_MXCSR_RC_UP,
1348 /*flags */ 0 },
1349 /* 4*/{ { /*src2 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1350 { /*src1 */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1351 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1352 /*mask */ X86_MXCSR_XCPT_MASK,
1353 /*daz,fz,rc*/ 1, 1, X86_MXCSR_RC_ZERO,
1354 /*flags */ 0 },
1355 /*
1356 * Infinity.
1357 */
1358#if 0
1359 /* 5*/{ { /*src2 */ { BS3_FP64_INF(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1360 { /*src1 */ { BS3_FP64_INF(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1361 { /* => */ { BS3_FP64_INF(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1362 /*mask */ ~X86_MXCSR_IM,
1363 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1364 /*flags */ X86_MXCSR_IE },
1365 /* 6*/{ { /*src2 */ { BS3_FP64_ZERO(0), BS3_FP64_INF(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1366 { /*src1 */ { BS3_FP64_ZERO(0), BS3_FP64_INF(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1367 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_INF(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1368 /*mask */ ~X86_MXCSR_IM,
1369 /*daz,fz,rc*/ 0, 1, X86_MXCSR_RC_DOWN,
1370 /*flags */ X86_MXCSR_IE },
1371 /* 7*/{ { /*src2 */ { BS3_FP64_ZERO(0), BS3_FP64_INF(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1372 { /*src1 */ { BS3_FP64_ZERO(0), BS3_FP64_INF(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1373 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_INF(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1374 /*mask */ ~X86_MXCSR_IM,
1375 /*daz,fz,rc*/ 1, 1, X86_MXCSR_RC_UP,
1376 /*flags */ X86_MXCSR_IE },
1377#endif
1378 /* 8*/{ { /*src2 */ { BS3_FP64_INF(0), BS3_FP64_INF(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1379 { /*src1 */ { BS3_FP64_INF(1), BS3_FP64_INF(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1380 { /* => */ { BS3_FP64_QNAN(1), BS3_FP64_QNAN(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1381 /*mask */ X86_MXCSR_XCPT_MASK,
1382 /*daz,fz,rc*/ 1, 0, X86_MXCSR_RC_ZERO,
1383 /*flags */ X86_MXCSR_IE },
1384 /*
1385 * Overflow.
1386 */
1387#if 0
1388 /* 9*/{ { /*src2 */ { BS3_FP64_ZERO(0), BS3_FP64_NORMAL_MAX(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1389 { /*src1 */ { BS3_FP64_ZERO(0), BS3_FP64_NORMAL_MAX(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1390 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_NORMAL_MAX(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1391 /*mask */ ~X86_MXCSR_XCPT_MASK,
1392 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1393 /*flags */ X86_MXCSR_OE },
1394 /*10*/{ { /*src2 */ { BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MAX(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1395 { /*src1 */ { BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MAX(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1396 { /* => */ { BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MAX(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1397 /*mask */ ~X86_MXCSR_XCPT_MASK,
1398 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1399 /*flags */ X86_MXCSR_OE },
1400#endif
1401 /*11*/{ { /*src2 */ { BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MIN(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1402 { /*src1 */ { BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MIN(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1403 { /* => */ { BS3_FP64_INF(0), BS3_FP64_VAL(1, 0, 2), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1404 /*mask */ X86_MXCSR_OM | X86_MXCSR_PM,
1405 /*daz,fz,rc*/ 1, 1, X86_MXCSR_RC_NEAREST,
1406 /*flags */ X86_MXCSR_OE | X86_MXCSR_PE },
1407 /*12*/{ { /*src2 */ { BS3_FP64_NORMAL_MIN(1), BS3_FP64_NORMAL_MAX(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1408 { /*src1 */ { BS3_FP64_NORMAL_MIN(1), BS3_FP64_NORMAL_MAX(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1409 { /* => */ { BS3_FP64_VAL(1, 0, 2), BS3_FP64_NORMAL_MAX(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1410 /*mask */ X86_MXCSR_OM | X86_MXCSR_PM,
1411 /*daz,fz,rc*/ 1, 1, X86_MXCSR_RC_ZERO,
1412 /*flags */ X86_MXCSR_OE | X86_MXCSR_PE },
1413 /*13*/{ { /*src2 */ { BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MAX(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1414 { /*src1 */ { BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MAX(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1415 { /* => */ { BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_MAX(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1416 /*mask */ X86_MXCSR_XCPT_MASK,
1417 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_ZERO,
1418 /*flags */ X86_MXCSR_OE | X86_MXCSR_PE },
1419 /*
1420 * Normals.
1421 */
1422 /*14*/{ { /*src2 */ { BS3_FP64_NORMAL_MAX(0), BS3_FP64_NORMAL_VAL_1(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1423 { /*src1 */ { BS3_FP64_NORMAL_MAX(1), BS3_FP64_NORMAL_VAL_1(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1424 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1425 /*mask */ ~X86_MXCSR_XCPT_MASK,
1426 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1427 /*flags */ 0 },
1428 /*15*/{ { /*src2 */ { BS3_FP64_VAL(0, 0, 0x409)/*1024*/, BS3_FP64_VAL(0, 0xb800000000000, 0x404)/*55*/, BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1429 { /*src1 */ { BS3_FP64_VAL(0, 0, 0x408)/* 512*/, BS3_FP64_VAL(0, 0xc000000000000, 0x401)/* 7*/, BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1430 { /* => */ { BS3_FP64_VAL(0, 0x8000000000000, 0x409)/*1536*/, BS3_FP64_VAL(0, 0xf000000000000, 0x404)/*62*/, BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1431 /*mask */ X86_MXCSR_XCPT_MASK,
1432 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1433 /*flags */ 0 },
1434 /*16*/{ { /*src2 */ { BS3_FP64_VAL(0, 0x26580b4800000, 0x41d)/* 1234567890*/, BS3_FP64_VAL(0, 0xd6f3458800000, 0x41c)/*987654321*/, BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1435 { /*src1 */ { BS3_FP64_VAL(1, 0x26580b4800000, 0x41d)/*-1234567890*/, BS3_FP64_VAL(1, 0x9000000000000, 0x405)/* -100*/, BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1436 { /* => */ { BS3_FP64_ZERO(0), BS3_FP64_VAL(0, 0xd6f3426800000, 0x41c)/*987654221*/, BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1437 /*mask */ ~X86_MXCSR_XCPT_MASK,
1438 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1439 /*flags */ 0 },
1440 /*17*/{ { /*src2 */ { BS3_FP64_VAL(0, BS3_FP64_FRACTION_NORMAL_MAX - 1, BS3_FP64_EXP_BIAS + BS3_FP64_FRACTION_BITS), BS3_FP64_NORMAL_SAFE_INT_MAX(0), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1441 { /*src1 */ { BS3_FP64_ONE(0), BS3_FP64_ONE(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1442 { /* => */ { BS3_FP64_NORMAL_SAFE_INT_MAX(0), BS3_FP64_VAL(0, BS3_FP64_FRACTION_NORMAL_MAX - 1, BS3_FP64_EXP_BIAS + BS3_FP64_FRACTION_BITS), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1443 /*mask */ X86_MXCSR_XCPT_MASK,
1444 /*daz,fz,rc*/ 1, 1, X86_MXCSR_RC_ZERO,
1445 /*flags */ 0 },
1446 /*18*/{ { /*src2 */ { BS3_FP64_NORMAL_SAFE_INT_MAX(0), BS3_FP64_ONE(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1447 { /*src1 */ { BS3_FP64_ONE(0), BS3_FP64_NORMAL_SAFE_INT_MAX(1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1448 { /* => */ { BS3_FP64_VAL(0, 0, BS3_FP64_EXP_BIAS + BS3_FP64_FRACTION_BITS + 1), BS3_FP64_VAL(1, 0, BS3_FP64_EXP_BIAS + BS3_FP64_FRACTION_BITS + 1), BS3_FP64_ZERO(0), BS3_FP64_ZERO(0) } },
1449 /*mask */ ~X86_MXCSR_XCPT_MASK,
1450 /*daz,fz,rc*/ 0, 0, X86_MXCSR_RC_NEAREST,
1451 /*flags */ 0 },
1452 };
1453
1454 static BS3CPUINSTR4_TEST1_T const s_aTests16[] =
1455 {
1456 { bs3CpuInstrX_addpd_XMM1_XMM2_icebp_c16, 255, RM_REG, T_SSE2, 1, 1, 2, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1457 { bs3CpuInstrX_addpd_XMM1_FSxBX_icebp_c16, 255, RM_MEM, T_SSE2, 1, 1, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1458
1459 { bs3CpuInstrX_vaddpd_XMM1_XMM2_XMM3_icebp_c16, X86_XCPT_GP, RM_REG, T_AVX_128, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1460 { bs3CpuInstrX_vaddpd_XMM1_XMM2_FSxBX_icebp_c16, X86_XCPT_GP, RM_MEM, T_AVX_128, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1461
1462 { bs3CpuInstrX_vaddpd_YMM1_YMM2_YMM3_icebp_c16, X86_XCPT_GP, RM_REG, T_AVX2_256, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1463 { bs3CpuInstrX_vaddpd_YMM1_YMM2_FSxBX_icebp_c16, X86_XCPT_GP, RM_MEM, T_AVX2_256, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1464 };
1465 static BS3CPUINSTR4_TEST1_T const s_aTests32[] =
1466 {
1467 { bs3CpuInstrX_addpd_XMM1_XMM2_icebp_c32, 255, RM_REG, T_SSE2, 1, 1, 2, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1468 { bs3CpuInstrX_addpd_XMM1_FSxBX_icebp_c32, 255, RM_MEM, T_SSE2, 1, 1, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1469
1470 { bs3CpuInstrX_vaddpd_XMM1_XMM2_XMM3_icebp_c32, X86_XCPT_GP, RM_REG, T_AVX_128, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1471 { bs3CpuInstrX_vaddpd_XMM1_XMM2_FSxBX_icebp_c32, X86_XCPT_GP, RM_MEM, T_AVX_128, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1472
1473 { bs3CpuInstrX_vaddpd_YMM1_YMM2_YMM3_icebp_c32, X86_XCPT_GP, RM_REG, T_AVX2_256, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1474 { bs3CpuInstrX_vaddpd_YMM1_YMM2_FSxBX_icebp_c32, X86_XCPT_GP, RM_MEM, T_AVX2_256, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1475 };
1476 static BS3CPUINSTR4_TEST1_T const s_aTests64[] =
1477 {
1478 { bs3CpuInstrX_addpd_XMM1_XMM2_icebp_c64, 255, RM_REG, T_SSE2, 1, 1, 2, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1479 { bs3CpuInstrX_addpd_XMM1_FSxBX_icebp_c64, 255, RM_MEM, T_SSE2, 1, 1, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1480
1481 { bs3CpuInstrX_vaddpd_XMM1_XMM2_XMM3_icebp_c64, X86_XCPT_GP, RM_REG, T_AVX_128, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1482 { bs3CpuInstrX_vaddpd_XMM1_XMM2_FSxBX_icebp_c64, X86_XCPT_GP, RM_MEM, T_AVX_128, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1483
1484 { bs3CpuInstrX_vaddpd_YMM1_YMM2_YMM3_icebp_c64, X86_XCPT_GP, RM_REG, T_AVX2_256, 1, 2, 3, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1485 { bs3CpuInstrX_vaddpd_YMM1_YMM2_FSxBX_icebp_c64, X86_XCPT_GP, RM_MEM, T_AVX2_256, 1, 2, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1486
1487 { bs3CpuInstrX_addpd_XMM8_XMM9_icebp_c64, 255, RM_REG, T_SSE2, 8, 8, 9, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1488 { bs3CpuInstrX_addpd_XMM8_FSxBX_icebp_c64, 255, RM_MEM, T_SSE2, 8, 8, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1489
1490 { bs3CpuInstrX_vaddpd_YMM8_YMM9_YMM10_icebp_c64, X86_XCPT_GP, RM_REG, T_AVX_256, 8, 9, 10, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1491// { bs3CpuInstrX_vaddpd_YMM8_YMM9_FSxBX_icebp_c64, X86_XCPT_GP, RM_MEM, T_AVX_256, 8, 9, 255, RT_ELEMENTS(s_aValues), (BS3CPUINSTR4_TEST1_VALUES_T *)s_aValues },
1492
1493 };
1494
1495 static BS3CPUINSTR4_TEST1_MODE_T const s_aTests[3] = BS3CPUINSTR4_TEST1_MODES_INIT(s_aTests16, s_aTests32, s_aTests64);
1496 unsigned const iTest = BS3CPUINSTR4_TEST_MODES_INDEX(bMode);
1497 return bs3CpuInstrX_WorkerTestType1(bMode, s_aTests[iTest].paTests, s_aTests[iTest].cTests,
1498 g_aXcptConfig1, RT_ELEMENTS(g_aXcptConfig1));
1499}
1500
1501
1502/**
1503 * The 32-bit protected mode main function.
1504 *
1505 * The tests a driven by 32-bit test drivers, even for real-mode tests (though
1506 * we'll switch between PE32 and RM for each test step we perform). Given that
1507 * we test SSE and AVX here, we don't need to worry about 286 or 8086.
1508 *
1509 * Some extra steps needs to be taken to properly handle extended state in LM64
1510 * (Bs3ExtCtxRestoreEx & Bs3ExtCtxSaveEx) and when testing real mode
1511 * (Bs3RegCtxSaveForMode & Bs3TrapSetJmpAndRestoreWithExtCtxAndRm).
1512 */
1513BS3_DECL(void) Main_pe32()
1514{
1515 static const BS3TESTMODEBYONEENTRY g_aTests[] =
1516 {
1517#if 1 /*ndef DEBUG_bird*/
1518# define ALL_TESTS
1519#endif
1520#if defined(ALL_TESTS)
1521 { "[v]addps", bs3CpuInstrX_v_addps, 0 },
1522 { "[v]addpd", bs3CpuInstrX_v_addpd, 0 },
1523#endif
1524 };
1525 Bs3TestInit("bs3-cpu-instr-4");
1526
1527 /*
1528 * Initialize globals.
1529 */
1530 if (g_uBs3CpuDetected & BS3CPU_F_CPUID)
1531 {
1532 uint32_t fEbx, fEcx, fEdx;
1533 ASMCpuIdExSlow(1, 0, 0, 0, NULL, NULL, &fEcx, &fEdx);
1534 g_afTypeSupports[T_MMX] = RT_BOOL(fEdx & X86_CPUID_FEATURE_EDX_MMX);
1535 g_afTypeSupports[T_MMX_SSE] = RT_BOOL(fEdx & X86_CPUID_FEATURE_EDX_SSE);
1536 g_afTypeSupports[T_MMX_SSE2] = RT_BOOL(fEdx & X86_CPUID_FEATURE_EDX_SSE2);
1537 g_afTypeSupports[T_MMX_SSSE3] = RT_BOOL(fEdx & X86_CPUID_FEATURE_ECX_SSSE3);
1538 g_afTypeSupports[T_SSE] = RT_BOOL(fEdx & X86_CPUID_FEATURE_EDX_SSE);
1539 g_afTypeSupports[T_SSE2] = RT_BOOL(fEdx & X86_CPUID_FEATURE_EDX_SSE2);
1540 g_afTypeSupports[T_SSE3] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_SSE3);
1541 g_afTypeSupports[T_SSSE3] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_SSSE3);
1542 g_afTypeSupports[T_SSE4_1] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_SSE4_1);
1543 g_afTypeSupports[T_SSE4_2] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_SSE4_2);
1544 g_afTypeSupports[T_PCLMUL] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_PCLMUL);
1545 g_afTypeSupports[T_AVX_128] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_AVX);
1546 g_afTypeSupports[T_AVX_256] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_AVX);
1547 g_afTypeSupports[T_AVX_PCLMUL] = RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_PCLMUL)
1548 && RT_BOOL(fEcx & X86_CPUID_FEATURE_ECX_AVX);
1549
1550 if (ASMCpuId_EAX(0) >= 7)
1551 {
1552 ASMCpuIdExSlow(7, 0, 0, 0, NULL, &fEbx, NULL, NULL);
1553 g_afTypeSupports[T_AVX2_128] = RT_BOOL(fEbx & X86_CPUID_STEXT_FEATURE_EBX_AVX2);
1554 g_afTypeSupports[T_AVX2_256] = RT_BOOL(fEbx & X86_CPUID_STEXT_FEATURE_EBX_AVX2);
1555 g_afTypeSupports[T_SHA] = RT_BOOL(fEbx & X86_CPUID_STEXT_FEATURE_EBX_SHA);
1556 }
1557
1558 if (g_uBs3CpuDetected & BS3CPU_F_CPUID_EXT_LEAVES)
1559 {
1560 ASMCpuIdExSlow(UINT32_C(0x80000001), 0, 0, 0, NULL, NULL, &fEcx, &fEdx);
1561 g_afTypeSupports[T_AXMMX] = RT_BOOL(fEcx & X86_CPUID_AMD_FEATURE_EDX_AXMMX);
1562 g_afTypeSupports[T_SSE4A] = RT_BOOL(fEcx & X86_CPUID_AMD_FEATURE_ECX_SSE4A);
1563 g_fAmdMisalignedSse = RT_BOOL(fEcx & X86_CPUID_AMD_FEATURE_ECX_MISALNSSE);
1564 }
1565 g_afTypeSupports[T_AXMMX_OR_SSE] = g_afTypeSupports[T_AXMMX] || g_afTypeSupports[T_SSE];
1566
1567 /*
1568 * Figure out FPU save/restore method and support for DAZ bit.
1569 */
1570 {
1571 /** @todo Add bs3kit API to just get the ext ctx method without needing to
1572 * alloc/free a context. Replicating the logic in the bs3kit here, though
1573 * doable, runs a risk of not updating this when the other logic is
1574 * changed. */
1575 uint64_t fFlags;
1576 uint16_t const cbExtCtx = Bs3ExtCtxGetSize(&fFlags);
1577 PBS3EXTCTX pExtCtx = Bs3MemAlloc(BS3MEMKIND_TILED, cbExtCtx);
1578 if (pExtCtx)
1579 {
1580 Bs3ExtCtxInit(pExtCtx, cbExtCtx, fFlags);
1581 g_enmExtCtxMethod = pExtCtx->enmMethod;
1582 if ( ( (g_enmExtCtxMethod == BS3EXTCTXMETHOD_XSAVE
1583 && (pExtCtx->Ctx.x.x87.MXCSR_MASK & X86_MXCSR_DAZ)))
1584 || ( (g_enmExtCtxMethod == BS3EXTCTXMETHOD_FXSAVE)
1585 && (pExtCtx->Ctx.x87.MXCSR_MASK & X86_MXCSR_DAZ)))
1586 g_fMxCsrDazSupported = true;
1587 }
1588 else
1589 Bs3TestFailedF("Failed to allocate %u bytes for extended CPU context (tiled addressable)\n", cbExtCtx);
1590 }
1591
1592 /*
1593 * Allocate a buffer for testing.
1594 */
1595 g_cbBuf = X86_PAGE_SIZE * 4;
1596 g_pbBuf = (uint8_t BS3_FAR *)Bs3MemAlloc(BS3MEMKIND_REAL, g_cbBuf);
1597 if (g_pbBuf)
1598 {
1599 g_pbBufAliasAlloc = (uint8_t BS3_FAR *)Bs3MemAlloc(BS3MEMKIND_TILED, g_cbBuf);
1600 if (g_pbBufAliasAlloc)
1601 {
1602 /*
1603 * Do the tests.
1604 */
1605 Bs3TestDoModesByOne_pe32(g_aTests, RT_ELEMENTS(g_aTests), BS3TESTMODEBYONEENTRY_F_REAL_MODE_READY);
1606#ifdef BS3_SKIPIT_DO_SKIP
1607 bs3CpuInstrX_ShowTallies();
1608#endif
1609 }
1610 else
1611 Bs3TestFailed("Failed to allocate 16K alias buffer (tiled addressable)");
1612 }
1613 else
1614 Bs3TestFailed("Failed to allocate 16K buffer (real mode addressable)");
1615 }
1616
1617 Bs3TestTerm();
1618}
1619
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette