VirtualBox

source: vbox/trunk/src/VBox/VMM/testcase/tstIEMAImpl.cpp@ 103099

Last change on this file since 103099 was 103099, checked in by vboxsync, 13 months ago

tstIEMAImpl,VMM/IEM: Regenerated integer tests on intel, increasing the number to 1024 entries per tests. Fixed some issues. bugref:9898 bugref:10591

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 484.8 KB
Line 
1/* $Id: tstIEMAImpl.cpp 103099 2024-01-26 23:34:32Z vboxsync $ */
2/** @file
3 * IEM Assembly Instruction Helper Testcase.
4 */
5
6/*
7 * Copyright (C) 2022-2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#include "../include/IEMInternal.h"
33
34#include <iprt/errcore.h>
35#include <VBox/log.h>
36#include <iprt/assert.h>
37#include <iprt/buildconfig.h>
38#include <iprt/ctype.h>
39#include <iprt/err.h>
40#include <iprt/getopt.h>
41#include <iprt/initterm.h>
42#include <iprt/file.h>
43#include <iprt/mem.h>
44#include <iprt/message.h>
45#include <iprt/mp.h>
46#include <iprt/rand.h>
47#include <iprt/stream.h>
48#include <iprt/string.h>
49#include <iprt/test.h>
50#include <iprt/time.h>
51#include <iprt/thread.h>
52#include <iprt/vfs.h>
53#include <iprt/zip.h>
54#include <VBox/version.h>
55
56#include "tstIEMAImpl.h"
57
58
59/*********************************************************************************************************************************
60* Defined Constants And Macros *
61*********************************************************************************************************************************/
62#define ENTRY_BIN_FIX(a_Name) ENTRY_BIN_FIX_EX(a_Name, 0)
63#ifdef TSTIEMAIMPL_WITH_GENERATOR
64# define ENTRY_BIN_FIX_EX(a_Name, a_uExtra) \
65 { RT_XSTR(a_Name), iemAImpl_ ## a_Name, NULL, \
66 g_abTests_ ## a_Name, &g_cbTests_ ## a_Name, \
67 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */, \
68 RT_ELEMENTS(g_aFixedTests_ ## a_Name), g_aFixedTests_ ## a_Name }
69#else
70# define ENTRY_BIN_FIX_EX(a_Name, a_uExtra) ENTRY_BIN_EX(a_Name, a_uExtra)
71#endif
72
73#define ENTRY_BIN_PFN_CAST(a_Name, a_pfnType) ENTRY_BIN_PFN_CAST_EX(a_Name, a_pfnType, 0)
74#define ENTRY_BIN_PFN_CAST_EX(a_Name, a_pfnType, a_uExtra) \
75 { RT_XSTR(a_Name), (a_pfnType)iemAImpl_ ## a_Name, NULL, \
76 g_abTests_ ## a_Name, &g_cbTests_ ## a_Name, \
77 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */ }
78
79#define ENTRY_BIN(a_Name) ENTRY_BIN_EX(a_Name, 0)
80#define ENTRY_BIN_EX(a_Name, a_uExtra) \
81 { RT_XSTR(a_Name), iemAImpl_ ## a_Name, NULL, \
82 g_abTests_ ## a_Name, &g_cbTests_ ## a_Name, \
83 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */ }
84
85#define ENTRY_BIN_AVX(a_Name) ENTRY_BIN_AVX_EX(a_Name, 0)
86#ifndef IEM_WITHOUT_ASSEMBLY
87# define ENTRY_BIN_AVX_EX(a_Name, a_uExtra) \
88 { RT_XSTR(a_Name), iemAImpl_ ## a_Name, NULL, \
89 g_abTests_ ## a_Name, &g_cbTests_ ## a_Name, \
90 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */ }
91#else
92# define ENTRY_BIN_AVX_EX(a_Name, a_uExtra) \
93 { RT_XSTR(a_Name), iemAImpl_ ## a_Name ## _fallback, NULL, \
94 g_abTests_ ## a_Name, &g_cbTests_ ## a_Name, \
95 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */ }
96#endif
97
98#define ENTRY_BIN_SSE_OPT(a_Name) ENTRY_BIN_SSE_OPT_EX(a_Name, 0)
99#ifndef IEM_WITHOUT_ASSEMBLY
100# define ENTRY_BIN_SSE_OPT_EX(a_Name, a_uExtra) \
101 { RT_XSTR(a_Name), iemAImpl_ ## a_Name, NULL, \
102 g_abTests_ ## a_Name, &g_cbTests_ ## a_Name, \
103 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */ }
104#else
105# define ENTRY_BIN_SSE_OPT_EX(a_Name, a_uExtra) \
106 { RT_XSTR(a_Name), iemAImpl_ ## a_Name ## _fallback, NULL, \
107 g_abTests_ ## a_Name, &g_cbTests_ ## a_Name, \
108 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */ }
109#endif
110
111#define ENTRY_BIN_INTEL(a_Name, a_fEflUndef) ENTRY_BIN_INTEL_EX(a_Name, a_fEflUndef, 0)
112#define ENTRY_BIN_INTEL_EX(a_Name, a_fEflUndef, a_uExtra) \
113 { RT_XSTR(a_Name) "_intel", iemAImpl_ ## a_Name ## _intel, iemAImpl_ ## a_Name, \
114 g_abTests_ ## a_Name ## _intel, &g_cbTests_ ## a_Name ## _intel, \
115 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_INTEL }
116
117#define ENTRY_BIN_AMD(a_Name, a_fEflUndef) ENTRY_BIN_AMD_EX(a_Name, a_fEflUndef, 0)
118#define ENTRY_BIN_AMD_EX(a_Name, a_fEflUndef, a_uExtra) \
119 { RT_XSTR(a_Name) "_amd", iemAImpl_ ## a_Name ## _amd, iemAImpl_ ## a_Name, \
120 g_abTests_ ## a_Name ## _amd, &g_cbTests_ ## a_Name ## _amd, \
121 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_AMD }
122
123#define ENTRY_BIN_FIX_INTEL(a_Name, a_fEflUndef) ENTRY_BIN_FIX_INTEL_EX(a_Name, a_fEflUndef, 0)
124#ifdef TSTIEMAIMPL_WITH_GENERATOR
125# define ENTRY_BIN_FIX_INTEL_EX(a_Name, a_fEflUndef, a_uExtra) \
126 { RT_XSTR(a_Name) "_intel", iemAImpl_ ## a_Name ## _intel, iemAImpl_ ## a_Name, \
127 g_abTests_ ## a_Name ## _intel, &g_cbTests_ ## a_Name ## _intel, \
128 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_INTEL, \
129 RT_ELEMENTS(g_aFixedTests_ ## a_Name), g_aFixedTests_ ## a_Name }
130#else
131# define ENTRY_BIN_FIX_INTEL_EX(a_Name, a_fEflUndef, a_uExtra) ENTRY_BIN_INTEL_EX(a_Name, a_fEflUndef, a_uExtra)
132#endif
133
134#define ENTRY_BIN_FIX_AMD(a_Name, a_fEflUndef) ENTRY_BIN_FIX_AMD_EX(a_Name, a_fEflUndef, 0)
135#ifdef TSTIEMAIMPL_WITH_GENERATOR
136# define ENTRY_BIN_FIX_AMD_EX(a_Name, a_fEflUndef, a_uExtra) \
137 { RT_XSTR(a_Name) "_amd", iemAImpl_ ## a_Name ## _amd, iemAImpl_ ## a_Name, \
138 g_abTests_ ## a_Name ## _amd, &g_cbTests_ ## a_Name ## _amd, \
139 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_AMD, \
140 RT_ELEMENTS(g_aFixedTests_ ## a_Name), g_aFixedTests_ ## a_Name }
141#else
142# define ENTRY_BIN_FIX_AMD_EX(a_Name, a_fEflUndef, a_uExtra) ENTRY_BIN_AMD_EX(a_Name, a_fEflUndef, a_uExtra)
143#endif
144
145
146#define TYPEDEF_SUBTEST_TYPE(a_TypeName, a_TestType, a_FunctionPtrType) \
147 typedef struct a_TypeName \
148 { \
149 const char *pszName; \
150 const a_FunctionPtrType pfn; \
151 const a_FunctionPtrType pfnNative; \
152 void const * const pvCompressedTests; \
153 uint32_t const *pcbCompressedTests; \
154 uint32_t const uExtra; \
155 uint8_t const idxCpuEflFlavour; \
156 uint16_t const cFixedTests; \
157 a_TestType const * const paFixedTests; \
158 a_TestType const *paTests; /**< The decompressed info. */ \
159 uint32_t cTests; /**< The decompressed info. */ \
160 IEMTESTENTRYINFO Info; \
161 } a_TypeName
162
163#define COUNT_VARIATIONS(a_SubTest) \
164 (1 + ((a_SubTest).idxCpuEflFlavour == g_idxCpuEflFlavour && (a_SubTest).pfnNative) )
165
166
167/*********************************************************************************************************************************
168* Structures and Typedefs *
169*********************************************************************************************************************************/
170typedef struct IEMBINARYHEADER
171{
172 char szMagic[16];
173 uint32_t cbEntry;
174 uint32_t uSvnRev;
175 uint32_t auUnused[6];
176 char szCpuDesc[80];
177} IEMBINARYHEADER;
178AssertCompileSize(IEMBINARYHEADER, 128);
179
180 // 01234567890123456
181#define IEMBINARYHEADER_MAGIC "IEMAImpl Bin v1"
182AssertCompile(sizeof(IEMBINARYHEADER_MAGIC) == 16);
183
184
185typedef struct IEMBINARYFOOTER
186{
187 char szMagic[24];
188 uint32_t cbEntry;
189 uint32_t cEntries;
190} IEMBINARYFOOTER;
191AssertCompileSize(IEMBINARYFOOTER, 32);
192 // 012345678901234567890123
193#define IEMBINARYFOOTER_MAGIC "\nIEMAImpl Bin Footer v1"
194AssertCompile(sizeof(IEMBINARYFOOTER_MAGIC) == 24);
195
196
197/** Fixed part of TYPEDEF_SUBTEST_TYPE and friends. */
198typedef struct IEMTESTENTRYINFO
199{
200 void *pvUncompressed;
201 uint32_t cbUncompressed;
202 const char *pszCpuDesc;
203 uint32_t uSvnRev;
204} IEMTESTENTRYINFO;
205
206
207#ifdef TSTIEMAIMPL_WITH_GENERATOR
208typedef struct IEMBINARYOUTPUT
209{
210 /** The output file. */
211 RTVFSFILE hVfsFile;
212 /** The stream we write uncompressed binary test data to. */
213 RTVFSIOSTREAM hVfsUncompressed;
214 /** The number of bytes written (ignoring write failures). */
215 size_t cbWritten;
216 /** The entry size. */
217 uint32_t cbEntry;
218 /** Write status. */
219 int rcWrite;
220 /** Set if NULL. */
221 bool fNull;
222 /** Set if we wrote a header and should write a footer as well. */
223 bool fWroteHeader;
224 /** Filename. */
225 char szFilename[94];
226} IEMBINARYOUTPUT;
227typedef IEMBINARYOUTPUT *PIEMBINARYOUTPUT;
228#endif /* TSTIEMAIMPL_WITH_GENERATOR */
229
230
231/*********************************************************************************************************************************
232* Global Variables *
233*********************************************************************************************************************************/
234static RTTEST g_hTest;
235static uint8_t g_idxCpuEflFlavour = IEMTARGETCPU_EFL_BEHAVIOR_INTEL;
236#ifdef TSTIEMAIMPL_WITH_GENERATOR
237static uint32_t g_cZeroDstTests = 2;
238static uint32_t g_cZeroSrcTests = 4;
239#endif
240static uint8_t *g_pu8, *g_pu8Two;
241static uint16_t *g_pu16, *g_pu16Two;
242static uint32_t *g_pu32, *g_pu32Two, *g_pfEfl;
243static uint64_t *g_pu64, *g_pu64Two;
244static RTUINT128U *g_pu128, *g_pu128Two;
245
246static char g_aszBuf[32][256];
247static unsigned g_idxBuf = 0;
248
249static uint32_t g_cIncludeTestPatterns;
250static uint32_t g_cExcludeTestPatterns;
251static const char *g_apszIncludeTestPatterns[64];
252static const char *g_apszExcludeTestPatterns[64];
253
254/** Higher value, means longer benchmarking. */
255static uint64_t g_cPicoSecBenchmark = 0;
256
257static unsigned g_cVerbosity = 0;
258
259
260#ifdef TSTIEMAIMPL_WITH_GENERATOR
261/** The SVN revision (for use in the binary headers). */
262static uint32_t g_uSvnRev = 0;
263/** The CPU description (for use in the binary headers). */
264static char g_szCpuDesc[80] = "";
265#endif
266
267
268/*********************************************************************************************************************************
269* Internal Functions *
270*********************************************************************************************************************************/
271static const char *FormatR80(PCRTFLOAT80U pr80);
272static const char *FormatR64(PCRTFLOAT64U pr64);
273static const char *FormatR32(PCRTFLOAT32U pr32);
274
275
276/*
277 * Random helpers.
278 */
279
280static uint32_t RandEFlags(void)
281{
282 uint32_t fEfl = RTRandU32();
283 return (fEfl & X86_EFL_LIVE_MASK) | X86_EFL_RA1_MASK;
284}
285
286#ifdef TSTIEMAIMPL_WITH_GENERATOR
287
288static uint8_t RandU8(void)
289{
290 return RTRandU32Ex(0, 0xff);
291}
292
293
294static uint16_t RandU16(void)
295{
296 return RTRandU32Ex(0, 0xffff);
297}
298
299
300static uint32_t RandU32(void)
301{
302 return RTRandU32();
303}
304
305#endif
306
307static uint64_t RandU64(void)
308{
309 return RTRandU64();
310}
311
312
313static RTUINT128U RandU128(void)
314{
315 RTUINT128U Ret;
316 Ret.s.Hi = RTRandU64();
317 Ret.s.Lo = RTRandU64();
318 return Ret;
319}
320
321#ifdef TSTIEMAIMPL_WITH_GENERATOR
322
323static uint8_t RandU8Dst(uint32_t iTest)
324{
325 if (iTest < g_cZeroDstTests)
326 return 0;
327 return RandU8();
328}
329
330
331static uint8_t RandU8Src(uint32_t iTest)
332{
333 if (iTest < g_cZeroSrcTests)
334 return 0;
335 return RandU8();
336}
337
338
339static uint16_t RandU16Dst(uint32_t iTest)
340{
341 if (iTest < g_cZeroDstTests)
342 return 0;
343 return RandU16();
344}
345
346
347static uint16_t RandU16Src(uint32_t iTest)
348{
349 if (iTest < g_cZeroSrcTests)
350 return 0;
351 return RandU16();
352}
353
354
355static uint32_t RandU32Dst(uint32_t iTest)
356{
357 if (iTest < g_cZeroDstTests)
358 return 0;
359 return RandU32();
360}
361
362
363static uint32_t RandU32Src(uint32_t iTest)
364{
365 if (iTest < g_cZeroSrcTests)
366 return 0;
367 return RandU32();
368}
369
370
371static uint64_t RandU64Dst(uint32_t iTest)
372{
373 if (iTest < g_cZeroDstTests)
374 return 0;
375 return RandU64();
376}
377
378
379static uint64_t RandU64Src(uint32_t iTest)
380{
381 if (iTest < g_cZeroSrcTests)
382 return 0;
383 return RandU64();
384}
385
386
387/** 2nd operand for and FPU instruction, pairing with RandR80Src1. */
388static int16_t RandI16Src2(uint32_t iTest)
389{
390 if (iTest < 18 * 4)
391 switch (iTest % 4)
392 {
393 case 0: return 0;
394 case 1: return INT16_MAX;
395 case 2: return INT16_MIN;
396 case 3: break;
397 }
398 return (int16_t)RandU16();
399}
400
401
402/** 2nd operand for and FPU instruction, pairing with RandR80Src1. */
403static int32_t RandI32Src2(uint32_t iTest)
404{
405 if (iTest < 18 * 4)
406 switch (iTest % 4)
407 {
408 case 0: return 0;
409 case 1: return INT32_MAX;
410 case 2: return INT32_MIN;
411 case 3: break;
412 }
413 return (int32_t)RandU32();
414}
415
416
417static int64_t RandI64Src(uint32_t iTest)
418{
419 RT_NOREF(iTest);
420 return (int64_t)RandU64();
421}
422
423
424static uint16_t RandFcw(void)
425{
426 return RandU16() & ~X86_FCW_ZERO_MASK;
427}
428
429
430static uint16_t RandFsw(void)
431{
432 AssertCompile((X86_FSW_C_MASK | X86_FSW_XCPT_ES_MASK | X86_FSW_TOP_MASK | X86_FSW_B) == 0xffff);
433 return RandU16();
434}
435
436
437static uint32_t RandMxcsr(void)
438{
439 return RandU32() & ~X86_MXCSR_ZERO_MASK;
440}
441
442
443static void SafeR80FractionShift(PRTFLOAT80U pr80, uint8_t cShift)
444{
445 if (pr80->sj64.uFraction >= RT_BIT_64(cShift))
446 pr80->sj64.uFraction >>= cShift;
447 else
448 pr80->sj64.uFraction = (cShift % 19) + 1;
449}
450
451
452
453static RTFLOAT80U RandR80Ex(uint8_t bType, unsigned cTarget = 80, bool fIntTarget = false)
454{
455 Assert(cTarget == (!fIntTarget ? 80U : 16U) || cTarget == 64U || cTarget == 32U || (cTarget == 59U && fIntTarget));
456
457 RTFLOAT80U r80;
458 r80.au64[0] = RandU64();
459 r80.au16[4] = RandU16();
460
461 /*
462 * Adjust the random stuff according to bType.
463 */
464 bType &= 0x1f;
465 if (bType == 0 || bType == 1 || bType == 2 || bType == 3)
466 {
467 /* Zero (0), Pseudo-Infinity (1), Infinity (2), Indefinite (3). We only keep fSign here. */
468 r80.sj64.uExponent = bType == 0 ? 0 : 0x7fff;
469 r80.sj64.uFraction = bType <= 2 ? 0 : RT_BIT_64(62);
470 r80.sj64.fInteger = bType >= 2 ? 1 : 0;
471 AssertMsg(bType != 0 || RTFLOAT80U_IS_ZERO(&r80), ("%s\n", FormatR80(&r80)));
472 AssertMsg(bType != 1 || RTFLOAT80U_IS_PSEUDO_INF(&r80), ("%s\n", FormatR80(&r80)));
473 Assert( bType != 1 || RTFLOAT80U_IS_387_INVALID(&r80));
474 AssertMsg(bType != 2 || RTFLOAT80U_IS_INF(&r80), ("%s\n", FormatR80(&r80)));
475 AssertMsg(bType != 3 || RTFLOAT80U_IS_INDEFINITE(&r80), ("%s\n", FormatR80(&r80)));
476 }
477 else if (bType == 4 || bType == 5 || bType == 6 || bType == 7)
478 {
479 /* Denormals (4,5) and Pseudo denormals (6,7) */
480 if (bType & 1)
481 SafeR80FractionShift(&r80, r80.sj64.uExponent % 62);
482 else if (r80.sj64.uFraction == 0 && bType < 6)
483 r80.sj64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT80U_FRACTION_BITS) - 1);
484 r80.sj64.uExponent = 0;
485 r80.sj64.fInteger = bType >= 6;
486 AssertMsg(bType >= 6 || RTFLOAT80U_IS_DENORMAL(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
487 AssertMsg(bType < 6 || RTFLOAT80U_IS_PSEUDO_DENORMAL(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
488 }
489 else if (bType == 8 || bType == 9)
490 {
491 /* Pseudo NaN. */
492 if (bType & 1)
493 SafeR80FractionShift(&r80, r80.sj64.uExponent % 62);
494 else if (r80.sj64.uFraction == 0 && !r80.sj64.fInteger)
495 r80.sj64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT80U_FRACTION_BITS) - 1);
496 r80.sj64.uExponent = 0x7fff;
497 if (r80.sj64.fInteger)
498 r80.sj64.uFraction |= RT_BIT_64(62);
499 else
500 r80.sj64.uFraction &= ~RT_BIT_64(62);
501 r80.sj64.fInteger = 0;
502 AssertMsg(RTFLOAT80U_IS_PSEUDO_NAN(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
503 AssertMsg(RTFLOAT80U_IS_NAN(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
504 Assert(RTFLOAT80U_IS_387_INVALID(&r80));
505 }
506 else if (bType == 10 || bType == 11 || bType == 12 || bType == 13)
507 {
508 /* Quiet and signalling NaNs. */
509 if (bType & 1)
510 SafeR80FractionShift(&r80, r80.sj64.uExponent % 62);
511 else if (r80.sj64.uFraction == 0)
512 r80.sj64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT80U_FRACTION_BITS) - 1);
513 r80.sj64.uExponent = 0x7fff;
514 if (bType < 12)
515 r80.sj64.uFraction |= RT_BIT_64(62); /* quiet */
516 else
517 r80.sj64.uFraction &= ~RT_BIT_64(62); /* signaling */
518 r80.sj64.fInteger = 1;
519 AssertMsg(bType >= 12 || RTFLOAT80U_IS_QUIET_NAN(&r80), ("%s\n", FormatR80(&r80)));
520 AssertMsg(bType < 12 || RTFLOAT80U_IS_SIGNALLING_NAN(&r80), ("%s\n", FormatR80(&r80)));
521 AssertMsg(RTFLOAT80U_IS_SIGNALLING_NAN(&r80) || RTFLOAT80U_IS_QUIET_NAN(&r80), ("%s\n", FormatR80(&r80)));
522 AssertMsg(RTFLOAT80U_IS_QUIET_OR_SIGNALLING_NAN(&r80), ("%s\n", FormatR80(&r80)));
523 AssertMsg(RTFLOAT80U_IS_NAN(&r80), ("%s\n", FormatR80(&r80)));
524 }
525 else if (bType == 14 || bType == 15)
526 {
527 /* Unnormals */
528 if (bType & 1)
529 SafeR80FractionShift(&r80, RandU8() % 62);
530 r80.sj64.fInteger = 0;
531 if (r80.sj64.uExponent == RTFLOAT80U_EXP_MAX || r80.sj64.uExponent == 0)
532 r80.sj64.uExponent = (uint16_t)RTRandU32Ex(1, RTFLOAT80U_EXP_MAX - 1);
533 AssertMsg(RTFLOAT80U_IS_UNNORMAL(&r80), ("%s\n", FormatR80(&r80)));
534 Assert(RTFLOAT80U_IS_387_INVALID(&r80));
535 }
536 else if (bType < 26)
537 {
538 /* Make sure we have lots of normalized values. */
539 if (!fIntTarget)
540 {
541 const unsigned uMinExp = cTarget == 64 ? RTFLOAT80U_EXP_BIAS - RTFLOAT64U_EXP_BIAS
542 : cTarget == 32 ? RTFLOAT80U_EXP_BIAS - RTFLOAT32U_EXP_BIAS : 0;
543 const unsigned uMaxExp = cTarget == 64 ? uMinExp + RTFLOAT64U_EXP_MAX
544 : cTarget == 32 ? uMinExp + RTFLOAT32U_EXP_MAX : RTFLOAT80U_EXP_MAX;
545 r80.sj64.fInteger = 1;
546 if (r80.sj64.uExponent <= uMinExp)
547 r80.sj64.uExponent = uMinExp + 1;
548 else if (r80.sj64.uExponent >= uMaxExp)
549 r80.sj64.uExponent = uMaxExp - 1;
550
551 if (bType == 16)
552 { /* All 1s is useful to testing rounding. Also try trigger special
553 behaviour by sometimes rounding out of range, while we're at it. */
554 r80.sj64.uFraction = RT_BIT_64(63) - 1;
555 uint8_t bExp = RandU8();
556 if ((bExp & 3) == 0)
557 r80.sj64.uExponent = uMaxExp - 1;
558 else if ((bExp & 3) == 1)
559 r80.sj64.uExponent = uMinExp + 1;
560 else if ((bExp & 3) == 2)
561 r80.sj64.uExponent = uMinExp - (bExp & 15); /* (small numbers are mapped to subnormal values) */
562 }
563 }
564 else
565 {
566 /* integer target: */
567 const unsigned uMinExp = RTFLOAT80U_EXP_BIAS;
568 const unsigned uMaxExp = RTFLOAT80U_EXP_BIAS + cTarget - 2;
569 r80.sj64.fInteger = 1;
570 if (r80.sj64.uExponent < uMinExp)
571 r80.sj64.uExponent = uMinExp;
572 else if (r80.sj64.uExponent > uMaxExp)
573 r80.sj64.uExponent = uMaxExp;
574
575 if (bType == 16)
576 { /* All 1s is useful to testing rounding. Also try trigger special
577 behaviour by sometimes rounding out of range, while we're at it. */
578 r80.sj64.uFraction = RT_BIT_64(63) - 1;
579 uint8_t bExp = RandU8();
580 if ((bExp & 3) == 0)
581 r80.sj64.uExponent = uMaxExp;
582 else if ((bExp & 3) == 1)
583 r80.sj64.uFraction &= ~(RT_BIT_64(cTarget - 1 - r80.sj64.uExponent) - 1); /* no rounding */
584 }
585 }
586
587 AssertMsg(RTFLOAT80U_IS_NORMAL(&r80), ("%s\n", FormatR80(&r80)));
588 }
589 return r80;
590}
591
592
593static RTFLOAT80U RandR80(unsigned cTarget = 80, bool fIntTarget = false)
594{
595 /*
596 * Make it more likely that we get a good selection of special values.
597 */
598 return RandR80Ex(RandU8(), cTarget, fIntTarget);
599
600}
601
602
603static RTFLOAT80U RandR80Src(uint32_t iTest, unsigned cTarget = 80, bool fIntTarget = false)
604{
605 /* Make sure we cover all the basic types first before going for random selection: */
606 if (iTest <= 18)
607 return RandR80Ex(18 - iTest, cTarget, fIntTarget); /* Starting with 3 normals. */
608 return RandR80(cTarget, fIntTarget);
609}
610
611
612/**
613 * Helper for RandR80Src1 and RandR80Src2 that converts bType from a 0..11 range
614 * to a 0..17, covering all basic value types.
615 */
616static uint8_t RandR80Src12RemapType(uint8_t bType)
617{
618 switch (bType)
619 {
620 case 0: return 18; /* normal */
621 case 1: return 16; /* normal extreme rounding */
622 case 2: return 14; /* unnormal */
623 case 3: return 12; /* Signalling NaN */
624 case 4: return 10; /* Quiet NaN */
625 case 5: return 8; /* PseudoNaN */
626 case 6: return 6; /* Pseudo Denormal */
627 case 7: return 4; /* Denormal */
628 case 8: return 3; /* Indefinite */
629 case 9: return 2; /* Infinity */
630 case 10: return 1; /* Pseudo-Infinity */
631 case 11: return 0; /* Zero */
632 default: AssertFailedReturn(18);
633 }
634}
635
636
637/**
638 * This works in tandem with RandR80Src2 to make sure we cover all operand
639 * type mixes first before we venture into regular random testing.
640 *
641 * There are 11 basic variations, when we leave out the five odd ones using
642 * SafeR80FractionShift. Because of the special normalized value targetting at
643 * rounding, we make it an even 12. So 144 combinations for two operands.
644 */
645static RTFLOAT80U RandR80Src1(uint32_t iTest, unsigned cPartnerBits = 80, bool fPartnerInt = false)
646{
647 if (cPartnerBits == 80)
648 {
649 Assert(!fPartnerInt);
650 if (iTest < 12 * 12)
651 return RandR80Ex(RandR80Src12RemapType(iTest / 12));
652 }
653 else if ((cPartnerBits == 64 || cPartnerBits == 32) && !fPartnerInt)
654 {
655 if (iTest < 12 * 10)
656 return RandR80Ex(RandR80Src12RemapType(iTest / 10));
657 }
658 else if (iTest < 18 * 4 && fPartnerInt)
659 return RandR80Ex(iTest / 4);
660 return RandR80();
661}
662
663
664/** Partner to RandR80Src1. */
665static RTFLOAT80U RandR80Src2(uint32_t iTest)
666{
667 if (iTest < 12 * 12)
668 return RandR80Ex(RandR80Src12RemapType(iTest % 12));
669 return RandR80();
670}
671
672
673static void SafeR64FractionShift(PRTFLOAT64U pr64, uint8_t cShift)
674{
675 if (pr64->s64.uFraction >= RT_BIT_64(cShift))
676 pr64->s64.uFraction >>= cShift;
677 else
678 pr64->s64.uFraction = (cShift % 19) + 1;
679}
680
681
682static RTFLOAT64U RandR64Ex(uint8_t bType)
683{
684 RTFLOAT64U r64;
685 r64.u = RandU64();
686
687 /*
688 * Make it more likely that we get a good selection of special values.
689 * On average 6 out of 16 calls should return a special value.
690 */
691 bType &= 0xf;
692 if (bType == 0 || bType == 1)
693 {
694 /* 0 or Infinity. We only keep fSign here. */
695 r64.s.uExponent = bType == 0 ? 0 : 0x7ff;
696 r64.s.uFractionHigh = 0;
697 r64.s.uFractionLow = 0;
698 AssertMsg(bType != 0 || RTFLOAT64U_IS_ZERO(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
699 AssertMsg(bType != 1 || RTFLOAT64U_IS_INF(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
700 }
701 else if (bType == 2 || bType == 3)
702 {
703 /* Subnormals */
704 if (bType == 3)
705 SafeR64FractionShift(&r64, r64.s64.uExponent % 51);
706 else if (r64.s64.uFraction == 0)
707 r64.s64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT64U_FRACTION_BITS) - 1);
708 r64.s64.uExponent = 0;
709 AssertMsg(RTFLOAT64U_IS_SUBNORMAL(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
710 }
711 else if (bType == 4 || bType == 5 || bType == 6 || bType == 7)
712 {
713 /* NaNs */
714 if (bType & 1)
715 SafeR64FractionShift(&r64, r64.s64.uExponent % 51);
716 else if (r64.s64.uFraction == 0)
717 r64.s64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT64U_FRACTION_BITS) - 1);
718 r64.s64.uExponent = 0x7ff;
719 if (bType < 6)
720 r64.s64.uFraction |= RT_BIT_64(RTFLOAT64U_FRACTION_BITS - 1); /* quiet */
721 else
722 r64.s64.uFraction &= ~RT_BIT_64(RTFLOAT64U_FRACTION_BITS - 1); /* signalling */
723 AssertMsg(bType >= 6 || RTFLOAT64U_IS_QUIET_NAN(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
724 AssertMsg(bType < 6 || RTFLOAT64U_IS_SIGNALLING_NAN(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
725 AssertMsg(RTFLOAT64U_IS_NAN(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
726 }
727 else if (bType < 12)
728 {
729 /* Make sure we have lots of normalized values. */
730 if (r64.s.uExponent == 0)
731 r64.s.uExponent = 1;
732 else if (r64.s.uExponent == 0x7ff)
733 r64.s.uExponent = 0x7fe;
734 AssertMsg(RTFLOAT64U_IS_NORMAL(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
735 }
736 return r64;
737}
738
739
740static RTFLOAT64U RandR64Src(uint32_t iTest)
741{
742 if (iTest < 16)
743 return RandR64Ex(iTest);
744 return RandR64Ex(RandU8());
745}
746
747
748/** Pairing with a 80-bit floating point arg. */
749static RTFLOAT64U RandR64Src2(uint32_t iTest)
750{
751 if (iTest < 12 * 10)
752 return RandR64Ex(9 - iTest % 10); /* start with normal values */
753 return RandR64Ex(RandU8());
754}
755
756
757static void SafeR32FractionShift(PRTFLOAT32U pr32, uint8_t cShift)
758{
759 if (pr32->s.uFraction >= RT_BIT_32(cShift))
760 pr32->s.uFraction >>= cShift;
761 else
762 pr32->s.uFraction = (cShift % 19) + 1;
763}
764
765
766static RTFLOAT32U RandR32Ex(uint8_t bType)
767{
768 RTFLOAT32U r32;
769 r32.u = RandU32();
770
771 /*
772 * Make it more likely that we get a good selection of special values.
773 * On average 6 out of 16 calls should return a special value.
774 */
775 bType &= 0xf;
776 if (bType == 0 || bType == 1)
777 {
778 /* 0 or Infinity. We only keep fSign here. */
779 r32.s.uExponent = bType == 0 ? 0 : 0xff;
780 r32.s.uFraction = 0;
781 AssertMsg(bType != 0 || RTFLOAT32U_IS_ZERO(&r32), ("%s\n", FormatR32(&r32)));
782 AssertMsg(bType != 1 || RTFLOAT32U_IS_INF(&r32), ("%s\n", FormatR32(&r32)));
783 }
784 else if (bType == 2 || bType == 3)
785 {
786 /* Subnormals */
787 if (bType == 3)
788 SafeR32FractionShift(&r32, r32.s.uExponent % 22);
789 else if (r32.s.uFraction == 0)
790 r32.s.uFraction = RTRandU32Ex(1, RT_BIT_32(RTFLOAT32U_FRACTION_BITS) - 1);
791 r32.s.uExponent = 0;
792 AssertMsg(RTFLOAT32U_IS_SUBNORMAL(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
793 }
794 else if (bType == 4 || bType == 5 || bType == 6 || bType == 7)
795 {
796 /* NaNs */
797 if (bType & 1)
798 SafeR32FractionShift(&r32, r32.s.uExponent % 22);
799 else if (r32.s.uFraction == 0)
800 r32.s.uFraction = RTRandU32Ex(1, RT_BIT_32(RTFLOAT32U_FRACTION_BITS) - 1);
801 r32.s.uExponent = 0xff;
802 if (bType < 6)
803 r32.s.uFraction |= RT_BIT_32(RTFLOAT32U_FRACTION_BITS - 1); /* quiet */
804 else
805 r32.s.uFraction &= ~RT_BIT_32(RTFLOAT32U_FRACTION_BITS - 1); /* signalling */
806 AssertMsg(bType >= 6 || RTFLOAT32U_IS_QUIET_NAN(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
807 AssertMsg(bType < 6 || RTFLOAT32U_IS_SIGNALLING_NAN(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
808 AssertMsg(RTFLOAT32U_IS_NAN(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
809 }
810 else if (bType < 12)
811 {
812 /* Make sure we have lots of normalized values. */
813 if (r32.s.uExponent == 0)
814 r32.s.uExponent = 1;
815 else if (r32.s.uExponent == 0xff)
816 r32.s.uExponent = 0xfe;
817 AssertMsg(RTFLOAT32U_IS_NORMAL(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
818 }
819 return r32;
820}
821
822
823static RTFLOAT32U RandR32Src(uint32_t iTest)
824{
825 if (iTest < 16)
826 return RandR32Ex(iTest);
827 return RandR32Ex(RandU8());
828}
829
830
831/** Pairing with a 80-bit floating point arg. */
832static RTFLOAT32U RandR32Src2(uint32_t iTest)
833{
834 if (iTest < 12 * 10)
835 return RandR32Ex(9 - iTest % 10); /* start with normal values */
836 return RandR32Ex(RandU8());
837}
838
839
840static RTPBCD80U RandD80Src(uint32_t iTest)
841{
842 if (iTest < 3)
843 {
844 RTPBCD80U d80Zero = RTPBCD80U_INIT_ZERO(!(iTest & 1));
845 return d80Zero;
846 }
847 if (iTest < 5)
848 {
849 RTPBCD80U d80Ind = RTPBCD80U_INIT_INDEFINITE();
850 return d80Ind;
851 }
852
853 RTPBCD80U d80;
854 uint8_t b = RandU8();
855 d80.s.fSign = b & 1;
856
857 if ((iTest & 7) >= 6)
858 {
859 /* Illegal */
860 d80.s.uPad = (iTest & 7) == 7 ? b >> 1 : 0;
861 for (size_t iPair = 0; iPair < RT_ELEMENTS(d80.s.abPairs); iPair++)
862 d80.s.abPairs[iPair] = RandU8();
863 }
864 else
865 {
866 /* Normal */
867 d80.s.uPad = 0;
868 for (size_t iPair = 0; iPair < RT_ELEMENTS(d80.s.abPairs); iPair++)
869 {
870 uint8_t const uLo = (uint8_t)RTRandU32Ex(0, 9);
871 uint8_t const uHi = (uint8_t)RTRandU32Ex(0, 9);
872 d80.s.abPairs[iPair] = RTPBCD80U_MAKE_PAIR(uHi, uLo);
873 }
874 }
875 return d80;
876}
877
878# if 0 /* unused */
879
880static const char *GenFormatR80(PCRTFLOAT80U plrd)
881{
882 if (RTFLOAT80U_IS_ZERO(plrd))
883 return plrd->s.fSign ? "RTFLOAT80U_INIT_ZERO(1)" : "RTFLOAT80U_INIT_ZERO(0)";
884 if (RTFLOAT80U_IS_INF(plrd))
885 return plrd->s.fSign ? "RTFLOAT80U_INIT_INF(1)" : "RTFLOAT80U_INIT_INF(0)";
886 if (RTFLOAT80U_IS_INDEFINITE(plrd))
887 return plrd->s.fSign ? "RTFLOAT80U_INIT_IND(1)" : "RTFLOAT80U_INIT_IND(0)";
888 if (RTFLOAT80U_IS_QUIET_NAN(plrd) && (plrd->s.uMantissa & (RT_BIT_64(62) - 1)) == 1)
889 return plrd->s.fSign ? "RTFLOAT80U_INIT_QNAN(1)" : "RTFLOAT80U_INIT_QNAN(0)";
890 if (RTFLOAT80U_IS_SIGNALLING_NAN(plrd) && (plrd->s.uMantissa & (RT_BIT_64(62) - 1)) == 1)
891 return plrd->s.fSign ? "RTFLOAT80U_INIT_SNAN(1)" : "RTFLOAT80U_INIT_SNAN(0)";
892
893 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
894 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTFLOAT80U_INIT_C(%d,%#RX64,%u)",
895 plrd->s.fSign, plrd->s.uMantissa, plrd->s.uExponent);
896 return pszBuf;
897}
898
899static const char *GenFormatR64(PCRTFLOAT64U prd)
900{
901 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
902 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTFLOAT64U_INIT_C(%d,%#RX64,%u)",
903 prd->s.fSign, RT_MAKE_U64(prd->s.uFractionLow, prd->s.uFractionHigh), prd->s.uExponent);
904 return pszBuf;
905}
906
907
908static const char *GenFormatR32(PCRTFLOAT32U pr)
909{
910 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
911 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTFLOAT32U_INIT_C(%d,%#RX32,%u)", pr->s.fSign, pr->s.uFraction, pr->s.uExponent);
912 return pszBuf;
913}
914
915
916static const char *GenFormatD80(PCRTPBCD80U pd80)
917{
918 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
919 size_t off;
920 if (pd80->s.uPad == 0)
921 off = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTPBCD80U_INIT_C(%d", pd80->s.fSign);
922 else
923 off = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTPBCD80U_INIT_EX_C(%#x,%d", pd80->s.uPad, pd80->s.fSign);
924 size_t iPair = RT_ELEMENTS(pd80->s.abPairs);
925 while (iPair-- > 0)
926 off += RTStrPrintf(&pszBuf[off], sizeof(g_aszBuf[0]) - off, ",%d,%d",
927 RTPBCD80U_HI_DIGIT(pd80->s.abPairs[iPair]),
928 RTPBCD80U_LO_DIGIT(pd80->s.abPairs[iPair]));
929 pszBuf[off++] = ')';
930 pszBuf[off++] = '\0';
931 return pszBuf;
932}
933
934
935static const char *GenFormatI64(int64_t i64)
936{
937 if (i64 == INT64_MIN) /* This one is problematic */
938 return "INT64_MIN";
939 if (i64 == INT64_MAX)
940 return "INT64_MAX";
941 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
942 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "INT64_C(%RI64)", i64);
943 return pszBuf;
944}
945
946# if 0 /* unused */
947static const char *GenFormatI64(int64_t const *pi64)
948{
949 return GenFormatI64(*pi64);
950}
951# endif
952
953static const char *GenFormatI32(int32_t i32)
954{
955 if (i32 == INT32_MIN) /* This one is problematic */
956 return "INT32_MIN";
957 if (i32 == INT32_MAX)
958 return "INT32_MAX";
959 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
960 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "INT32_C(%RI32)", i32);
961 return pszBuf;
962}
963
964
965const char *GenFormatI32(int32_t const *pi32)
966{
967 return GenFormatI32(*pi32);
968}
969
970
971const char *GenFormatI16(int16_t i16)
972{
973 if (i16 == INT16_MIN) /* This one is problematic */
974 return "INT16_MIN";
975 if (i16 == INT16_MAX)
976 return "INT16_MAX";
977 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
978 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "INT16_C(%RI16)", i16);
979 return pszBuf;
980}
981
982
983const char *GenFormatI16(int16_t const *pi16)
984{
985 return GenFormatI16(*pi16);
986}
987
988
989static void GenerateHeader(PRTSTREAM pOut, const char *pszCpuDesc, const char *pszCpuType)
990{
991 /* We want to tag the generated source code with the revision that produced it. */
992 static char s_szRev[] = "$Revision: 103099 $";
993 const char *pszRev = RTStrStripL(strchr(s_szRev, ':') + 1);
994 size_t cchRev = 0;
995 while (RT_C_IS_DIGIT(pszRev[cchRev]))
996 cchRev++;
997
998 RTStrmPrintf(pOut,
999 "/* $Id: tstIEMAImpl.cpp 103099 2024-01-26 23:34:32Z vboxsync $ */\n"
1000 "/** @file\n"
1001 " * IEM Assembly Instruction Helper Testcase Data%s%s - r%.*s on %s.\n"
1002 " */\n"
1003 "\n"
1004 "/*\n"
1005 " * Copyright (C) 2022-" VBOX_C_YEAR " Oracle and/or its affiliates.\n"
1006 " *\n"
1007 " * This file is part of VirtualBox base platform packages, as\n"
1008 " * available from https://www.virtualbox.org.\n"
1009 " *\n"
1010 " * This program is free software; you can redistribute it and/or\n"
1011 " * modify it under the terms of the GNU General Public License\n"
1012 " * as published by the Free Software Foundation, in version 3 of the\n"
1013 " * License.\n"
1014 " *\n"
1015 " * This program is distributed in the hope that it will be useful, but\n"
1016 " * WITHOUT ANY WARRANTY; without even the implied warranty of\n"
1017 " * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\n"
1018 " * General Public License for more details.\n"
1019 " *\n"
1020 " * You should have received a copy of the GNU General Public License\n"
1021 " * along with this program; if not, see <https://www.gnu.org/licenses>.\n"
1022 " *\n"
1023 " * SPDX-License-Identifier: GPL-3.0-only\n"
1024 " */\n"
1025 "\n"
1026 "#include \"tstIEMAImpl.h\"\n"
1027 "\n"
1028 ,
1029 pszCpuType ? " " : "", pszCpuType ? pszCpuType : "", cchRev, pszRev, pszCpuDesc);
1030}
1031
1032
1033static PRTSTREAM GenerateOpenWithHdr(const char *pszFilename, const char *pszCpuDesc, const char *pszCpuType)
1034{
1035 PRTSTREAM pOut = NULL;
1036 int rc = RTStrmOpen(pszFilename, "w", &pOut);
1037 if (RT_SUCCESS(rc))
1038 {
1039 GenerateHeader(pOut, pszCpuDesc, pszCpuType);
1040 return pOut;
1041 }
1042 RTMsgError("Failed to open %s for writing: %Rrc", pszFilename, rc);
1043 return NULL;
1044}
1045
1046
1047static RTEXITCODE GenerateFooterAndClose(PRTSTREAM pOut, const char *pszFilename, RTEXITCODE rcExit)
1048{
1049 RTStrmPrintf(pOut,
1050 "\n"
1051 "/* end of file */\n");
1052 int rc = RTStrmClose(pOut);
1053 if (RT_SUCCESS(rc))
1054 return rcExit;
1055 return RTMsgErrorExitFailure("RTStrmClose failed on %s: %Rrc", pszFilename, rc);
1056}
1057
1058
1059static void GenerateArrayStart(PRTSTREAM pOut, const char *pszName, const char *pszType)
1060{
1061 RTStrmPrintf(pOut, "%s const g_aTests_%s[] =\n{\n", pszType, pszName);
1062}
1063
1064
1065static void GenerateArrayEnd(PRTSTREAM pOut, const char *pszName)
1066{
1067 RTStrmPrintf(pOut,
1068 "};\n"
1069 "uint32_t const g_cTests_%s = RT_ELEMENTS(g_aTests_%s);\n"
1070 "\n",
1071 pszName, pszName);
1072}
1073
1074# endif /* unused */
1075
1076static void GenerateBinaryWrite(PIEMBINARYOUTPUT pBinOut, const void *pvData, size_t cbData)
1077{
1078 pBinOut->cbWritten += cbData; /* ignore errors - makes entry calculation simpler */
1079 if (RT_SUCCESS_NP(pBinOut->rcWrite))
1080 {
1081 pBinOut->rcWrite = RTVfsIoStrmWrite(pBinOut->hVfsUncompressed, pvData, cbData, true /*fBlocking*/, NULL);
1082 if (RT_SUCCESS(pBinOut->rcWrite))
1083 return;
1084 RTMsgError("Error writing '%s': %Rrc", pBinOut->szFilename, pBinOut->rcWrite);
1085 }
1086}
1087
1088static bool GenerateBinaryOpen(PIEMBINARYOUTPUT pBinOut, const char *pszFilenameFmt, const char *pszName,
1089 IEMTESTENTRYINFO const *pInfoToPreserve, uint32_t cbEntry)
1090{
1091 pBinOut->cbEntry = cbEntry;
1092 pBinOut->cbWritten = 0;
1093 pBinOut->hVfsFile = NIL_RTVFSFILE;
1094 pBinOut->hVfsUncompressed = NIL_RTVFSIOSTREAM;
1095 if (pszFilenameFmt)
1096 {
1097 pBinOut->fNull = false;
1098 if (RTStrPrintf2(pBinOut->szFilename, sizeof(pBinOut->szFilename), pszFilenameFmt, pszName) > 0)
1099 {
1100 RTMsgInfo("GenerateBinaryOpen: %s...\n", pBinOut->szFilename);
1101 pBinOut->rcWrite = RTVfsFileOpenNormal(pBinOut->szFilename,
1102 RTFILE_O_CREATE_REPLACE | RTFILE_O_WRITE | RTFILE_O_DENY_READWRITE,
1103 &pBinOut->hVfsFile);
1104 if (RT_SUCCESS(pBinOut->rcWrite))
1105 {
1106 RTVFSIOSTREAM hVfsIoFile = RTVfsFileToIoStream(pBinOut->hVfsFile);
1107 if (hVfsIoFile != NIL_RTVFSIOSTREAM)
1108 {
1109 pBinOut->rcWrite = RTZipGzipCompressIoStream(hVfsIoFile, 0 /*fFlags*/, 9, &pBinOut->hVfsUncompressed);
1110 RTVfsIoStrmRelease(hVfsIoFile);
1111 if (RT_SUCCESS(pBinOut->rcWrite))
1112 {
1113 pBinOut->rcWrite = VINF_SUCCESS;
1114 pBinOut->fWroteHeader = false;
1115
1116 /* Write the header if applicable. */
1117 if ( !pInfoToPreserve
1118 || (pInfoToPreserve->uSvnRev != 0 && *pInfoToPreserve->pszCpuDesc))
1119 {
1120 IEMBINARYHEADER Hdr;
1121 RT_ZERO(Hdr);
1122 memcpy(Hdr.szMagic, IEMBINARYHEADER_MAGIC, sizeof(IEMBINARYHEADER_MAGIC));
1123 Hdr.cbEntry = cbEntry;
1124 Hdr.uSvnRev = pInfoToPreserve ? pInfoToPreserve->uSvnRev : g_uSvnRev;
1125 RTStrCopy(Hdr.szCpuDesc, sizeof(Hdr.szCpuDesc),
1126 pInfoToPreserve ? pInfoToPreserve->pszCpuDesc : g_szCpuDesc);
1127 GenerateBinaryWrite(pBinOut, &Hdr, sizeof(Hdr));
1128 pBinOut->fWroteHeader = true;
1129 }
1130
1131 return true;
1132 }
1133
1134 RTMsgError("RTZipGzipCompressIoStream: %Rrc", pBinOut->rcWrite);
1135 }
1136 else
1137 {
1138 RTMsgError("RTVfsFileToIoStream failed!");
1139 pBinOut->rcWrite = VERR_VFS_CHAIN_CAST_FAILED;
1140 }
1141 RTVfsFileRelease(pBinOut->hVfsFile);
1142 RTFileDelete(pBinOut->szFilename);
1143 }
1144 else
1145 RTMsgError("Failed to open '%s' for writing: %Rrc", pBinOut->szFilename, pBinOut->rcWrite);
1146 }
1147 else
1148 {
1149 RTMsgError("filename too long: %s + %s", pszFilenameFmt, pszName);
1150 pBinOut->rcWrite = VERR_BUFFER_OVERFLOW;
1151 }
1152 return false;
1153 }
1154 RTMsgInfo("GenerateBinaryOpen: %s -> /dev/null\n", pszName);
1155 pBinOut->rcWrite = VERR_IGNORED;
1156 pBinOut->fNull = true;
1157 pBinOut->fWroteHeader = false;
1158 pBinOut->szFilename[0] = '\0';
1159 return true;
1160}
1161
1162# define GENERATE_BINARY_OPEN(a_pBinOut, a_papszNameFmts, a_Entry) \
1163 GenerateBinaryOpen((a_pBinOut), a_papszNameFmts[(a_Entry).idxCpuEflFlavour], (a_Entry).pszName, \
1164 NULL /*pInfo*/, sizeof((a_Entry).paTests[0]))
1165
1166static bool GenerateBinaryClose(PIEMBINARYOUTPUT pBinOut)
1167{
1168 if (!pBinOut->fNull)
1169 {
1170 /* Write footer if we've written a header. */
1171 if (pBinOut->fWroteHeader)
1172 {
1173 IEMBINARYFOOTER Ftr;
1174 RT_ZERO(Ftr);
1175 memcpy(Ftr.szMagic, IEMBINARYFOOTER_MAGIC, sizeof(IEMBINARYFOOTER_MAGIC));
1176 Ftr.cbEntry = pBinOut->cbEntry;
1177 Ftr.cEntries = (uint32_t)((pBinOut->cbWritten - sizeof(IEMBINARYHEADER)) / pBinOut->cbEntry);
1178 Assert(Ftr.cEntries * pBinOut->cbEntry + sizeof(IEMBINARYHEADER) == pBinOut->cbWritten);
1179 GenerateBinaryWrite(pBinOut, &Ftr, sizeof(Ftr));
1180 }
1181
1182 /* This is rather jovial about rcWrite. */
1183 int const rc1 = RTVfsIoStrmFlush(pBinOut->hVfsUncompressed);
1184 RTVfsIoStrmRelease(pBinOut->hVfsUncompressed);
1185 pBinOut->hVfsUncompressed = NIL_RTVFSIOSTREAM;
1186 if (RT_FAILURE(rc1))
1187 RTMsgError("Error flushing '%s' (uncompressed stream): %Rrc", pBinOut->szFilename, rc1);
1188
1189 int const rc2 = RTVfsFileFlush(pBinOut->hVfsFile);
1190 RTVfsFileRelease(pBinOut->hVfsFile);
1191 pBinOut->hVfsFile = NIL_RTVFSFILE;
1192 if (RT_FAILURE(rc2))
1193 RTMsgError("Error flushing '%s' (compressed file): %Rrc", pBinOut->szFilename, rc2);
1194
1195 return RT_SUCCESS(rc2) && RT_SUCCESS(rc1) && RT_SUCCESS(pBinOut->rcWrite);
1196 }
1197 return true;
1198}
1199
1200/* Helper for DumpAll. */
1201# define DUMP_ALL_FN(a_FnBaseName, a_aSubTests) \
1202 static RTEXITCODE a_FnBaseName ## DumpAll(const char * const * papszNameFmts) \
1203 { \
1204 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
1205 { \
1206 AssertReturn(DECOMPRESS_TESTS(a_aSubTests[iFn]), RTEXITCODE_FAILURE); \
1207 IEMBINARYOUTPUT BinOut; \
1208 AssertReturn(GenerateBinaryOpen(&BinOut, papszNameFmts[a_aSubTests[iFn].idxCpuEflFlavour], \
1209 a_aSubTests[iFn].pszName, &a_aSubTests[iFn].Info, \
1210 sizeof(a_aSubTests[iFn].paTests[0])), \
1211 RTEXITCODE_FAILURE); \
1212 GenerateBinaryWrite(&BinOut, a_aSubTests[iFn].paTests, a_aSubTests[iFn].cTests); \
1213 FREE_DECOMPRESSED_TESTS(a_aSubTests[iFn]); \
1214 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE); \
1215 } \
1216 return RTEXITCODE_SUCCESS; \
1217 }
1218#endif /* TSTIEMAIMPL_WITH_GENERATOR */
1219
1220
1221/*
1222 * Test helpers.
1223 */
1224static bool IsTestEnabled(const char *pszName)
1225{
1226 /* Process excludes first: */
1227 uint32_t i = g_cExcludeTestPatterns;
1228 while (i-- > 0)
1229 if (RTStrSimplePatternMultiMatch(g_apszExcludeTestPatterns[i], RTSTR_MAX, pszName, RTSTR_MAX, NULL))
1230 return false;
1231
1232 /* If no include patterns, everything is included: */
1233 i = g_cIncludeTestPatterns;
1234 if (!i)
1235 return true;
1236
1237 /* Otherwise only tests in the include patters gets tested: */
1238 while (i-- > 0)
1239 if (RTStrSimplePatternMultiMatch(g_apszIncludeTestPatterns[i], RTSTR_MAX, pszName, RTSTR_MAX, NULL))
1240 return true;
1241
1242 return false;
1243}
1244
1245
1246static bool SubTestAndCheckIfEnabled(const char *pszName)
1247{
1248 RTTestSub(g_hTest, pszName);
1249 if (IsTestEnabled(pszName))
1250 return true;
1251 RTTestSkipped(g_hTest, g_cVerbosity > 0 ? "excluded" : NULL);
1252 return false;
1253}
1254
1255
1256/** Decompresses test data before use as required. */
1257static int DecompressBinaryTest(void const *pvCompressed, uint32_t cbCompressed, size_t cbEntry, const char *pszWhat,
1258 void **ppvTests, uint32_t *pcTests, IEMTESTENTRYINFO *pInfo)
1259{
1260 /* Don't do it again. */
1261 if (pInfo->pvUncompressed && *ppvTests)
1262 return VINF_SUCCESS;
1263
1264 /* Open a memory stream for the compressed binary data. */
1265 RTVFSIOSTREAM hVfsIos = NIL_RTVFSIOSTREAM;
1266 int rc = RTVfsIoStrmFromBuffer(RTFILE_O_READ, pvCompressed, cbCompressed, &hVfsIos);
1267 RTTESTI_CHECK_RC_OK_RET(rc, rc);
1268
1269 /* Open a decompressed stream for it. */
1270 RTVFSIOSTREAM hVfsIosDecomp = NIL_RTVFSIOSTREAM;
1271 rc = RTZipGzipDecompressIoStream(hVfsIos, RTZIPGZIPDECOMP_F_ALLOW_ZLIB_HDR, &hVfsIosDecomp);
1272 RTTESTI_CHECK_RC_OK(rc);
1273 if (RT_SUCCESS(rc))
1274 {
1275 /* Initial output buffer allocation. */
1276 size_t cbDecompressedAlloc = cbCompressed <= _16M ? (size_t)cbCompressed * 16 : (size_t)cbCompressed * 4;
1277 uint8_t *pbDecompressed = (uint8_t *)RTMemAllocZ(cbDecompressedAlloc);
1278 if (pbDecompressed)
1279 {
1280 size_t off = 0;
1281 for (;;)
1282 {
1283 size_t cbRead = 0;
1284 rc = RTVfsIoStrmRead(hVfsIosDecomp, &pbDecompressed[off], cbDecompressedAlloc - off, true /*fBlocking*/, &cbRead);
1285 if (RT_FAILURE(rc))
1286 break;
1287 if (rc == VINF_EOF && cbRead == 0)
1288 break;
1289 off += cbRead;
1290
1291 if (cbDecompressedAlloc < off + 256)
1292 {
1293 size_t const cbNew = cbDecompressedAlloc < _128M ? cbDecompressedAlloc * 2 : cbDecompressedAlloc + _32M;
1294 void * const pvNew = RTMemRealloc(pbDecompressed, cbNew);
1295 AssertBreakStmt(pvNew, rc = VERR_NO_MEMORY);
1296 cbDecompressedAlloc = cbNew;
1297 pbDecompressed = (uint8_t *)pvNew;
1298 }
1299 }
1300 if (RT_SUCCESS(rc))
1301 {
1302 size_t const cbUncompressed = off;
1303
1304 /* Validate the header and footer if present and subtract them from 'off'. */
1305 IEMBINARYHEADER const *pHdr = NULL;
1306 if ( off >= sizeof(IEMTESTENTRYINFO)
1307 && memcmp(pbDecompressed, IEMBINARYHEADER_MAGIC, sizeof(IEMBINARYHEADER_MAGIC)) == 0)
1308 {
1309 pHdr = (IEMBINARYHEADER const *)pbDecompressed;
1310 IEMBINARYFOOTER const *pFtr = (IEMBINARYFOOTER const *)&pbDecompressed[off - sizeof(IEMBINARYFOOTER)];
1311
1312 off -= sizeof(*pHdr) + sizeof(*pFtr);
1313 rc = VERR_IO_BAD_UNIT;
1314 if (pHdr->cbEntry != cbEntry)
1315 RTTestIFailed("Test entry size differs for '%s': %#x (header r%u), expected %#zx (uncompressed size %#zx)",
1316 pszWhat, pHdr->cbEntry, pHdr->uSvnRev, cbEntry, off + sizeof(*pHdr) + sizeof(*pFtr));
1317 else if (memcmp(pFtr->szMagic, IEMBINARYFOOTER_MAGIC, sizeof(IEMBINARYFOOTER_MAGIC)) != 0)
1318 RTTestIFailed("Wrong footer magic for '%s': %.*Rhxs\n", pszWhat, sizeof(pFtr->szMagic), pFtr->szMagic);
1319 else if (pFtr->cbEntry != cbEntry)
1320 RTTestIFailed("Wrong footer entry size for '%s': %#x, expected %#x\n", pszWhat, pFtr->cbEntry, cbEntry);
1321 else if (pFtr->cEntries != off / cbEntry)
1322 RTTestIFailed("Wrong footer entry count for '%s': %#x, expected %#x\n",
1323 pszWhat, pFtr->cEntries, off / cbEntry);
1324 else
1325 rc = VINF_SUCCESS;
1326 }
1327
1328 /* Validate the decompressed size wrt entry size. */
1329 if ((off % cbEntry) != 0 && RT_SUCCESS(rc))
1330 {
1331 RTTestIFailed("Uneven decompressed data size for '%s': %#zx vs entry size %#zx -> %#zx",
1332 pszWhat, off, cbEntry, off % cbEntry);
1333 rc = VERR_IO_BAD_LENGTH;
1334 }
1335
1336 if (RT_SUCCESS(rc))
1337 {
1338 /*
1339 * We're good.
1340 */
1341 /* Reallocate the block if it's way to big. */
1342 if (cbDecompressedAlloc - cbUncompressed > _512K)
1343 {
1344 void * const pvNew = RTMemRealloc(pbDecompressed, cbUncompressed);
1345 if (pvNew)
1346 {
1347 pbDecompressed = (uint8_t *)pvNew;
1348 if (pHdr)
1349 pHdr = (IEMBINARYHEADER const *)pbDecompressed;
1350 }
1351 }
1352 RTMEM_MAY_LEAK(pbDecompressed);
1353
1354 /* Fill in the info and other return values. */
1355 pInfo->cbUncompressed = (uint32_t)cbUncompressed;
1356 pInfo->pvUncompressed = pbDecompressed;
1357 pInfo->pszCpuDesc = pHdr ? pHdr->szCpuDesc : NULL;
1358 pInfo->uSvnRev = pHdr ? pHdr->uSvnRev : 0;
1359 *pcTests = (uint32_t)(off / cbEntry);
1360 *ppvTests = pHdr ? (uint8_t *)(pHdr + 1) : pbDecompressed;
1361
1362 pbDecompressed = NULL;
1363 rc = VINF_SUCCESS;
1364 }
1365 }
1366 else
1367 RTTestIFailed("Failed to decompress binary stream '%s': %Rrc (off=%#zx, cbCompressed=%#x)",
1368 pszWhat, rc, off, cbCompressed);
1369 RTMemFree(pbDecompressed);
1370 }
1371 else
1372 {
1373 RTTestIFailed("Out of memory decompressing test data '%s'", pszWhat);
1374 rc = VERR_NO_MEMORY;
1375 }
1376 RTVfsIoStrmRelease(hVfsIosDecomp);
1377 }
1378 RTVfsIoStrmRelease(hVfsIos);
1379 return rc;
1380}
1381
1382#define DECOMPRESS_TESTS(a_Entry) \
1383 RT_SUCCESS(DecompressBinaryTest((a_Entry).pvCompressedTests, *(a_Entry).pcbCompressedTests, \
1384 sizeof((a_Entry).paTests[0]), (a_Entry).pszName, \
1385 (void **)&(a_Entry).paTests, &(a_Entry).cTests, &(a_Entry).Info))
1386
1387/** Frees the decompressed test data. */
1388static void FreeDecompressedTests(void **ppvTests, uint32_t *pcTests, IEMTESTENTRYINFO *pInfo)
1389{
1390 RTMemFree(pInfo->pvUncompressed);
1391 pInfo->pvUncompressed = NULL;
1392 pInfo->cbUncompressed = 0;
1393 *ppvTests = NULL;
1394 *pcTests = 0;
1395}
1396
1397#define FREE_DECOMPRESSED_TESTS(a_Entry) \
1398 FreeDecompressedTests((void **)&(a_Entry).paTests, &(a_Entry).cTests, &(a_Entry).Info)
1399
1400
1401/** Check if the test is enabled and decompresses test data. */
1402static int SubTestAndCheckIfEnabledAndDecompress(const char *pszName, void const *pvCompressed, uint32_t cbCompressed,
1403 size_t cbEntry, void **ppvTests, uint32_t *pcTests, IEMTESTENTRYINFO *pInfo)
1404{
1405 if (SubTestAndCheckIfEnabled(pszName))
1406 {
1407 int const rc = DecompressBinaryTest(pvCompressed, cbCompressed, cbEntry, pszName, ppvTests, pcTests, pInfo);
1408 if (RT_SUCCESS(rc))
1409 return true;
1410 }
1411 return false;
1412}
1413
1414#define SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(a_Entry) \
1415 SubTestAndCheckIfEnabledAndDecompress((a_Entry).pszName, (a_Entry).pvCompressedTests, *(a_Entry).pcbCompressedTests, \
1416 sizeof((a_Entry).paTests[0]), \
1417 (void **)&(a_Entry).paTests, &(a_Entry).cTests, &(a_Entry).Info)
1418
1419
1420static const char *EFlagsDiff(uint32_t fActual, uint32_t fExpected)
1421{
1422 if (fActual == fExpected)
1423 return "";
1424
1425 uint32_t const fXor = fActual ^ fExpected;
1426 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1427 size_t cch = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), " - %#x", fXor);
1428
1429 static struct
1430 {
1431 const char *pszName;
1432 uint32_t fFlag;
1433 } const s_aFlags[] =
1434 {
1435#define EFL_ENTRY(a_Flags) { #a_Flags, X86_EFL_ ## a_Flags }
1436 EFL_ENTRY(CF),
1437 EFL_ENTRY(PF),
1438 EFL_ENTRY(AF),
1439 EFL_ENTRY(ZF),
1440 EFL_ENTRY(SF),
1441 EFL_ENTRY(TF),
1442 EFL_ENTRY(IF),
1443 EFL_ENTRY(DF),
1444 EFL_ENTRY(OF),
1445 EFL_ENTRY(IOPL),
1446 EFL_ENTRY(NT),
1447 EFL_ENTRY(RF),
1448 EFL_ENTRY(VM),
1449 EFL_ENTRY(AC),
1450 EFL_ENTRY(VIF),
1451 EFL_ENTRY(VIP),
1452 EFL_ENTRY(ID),
1453 };
1454 for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1455 if (s_aFlags[i].fFlag & fXor)
1456 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch,
1457 s_aFlags[i].fFlag & fActual ? "/%s" : "/!%s", s_aFlags[i].pszName);
1458 RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
1459 return pszBuf;
1460}
1461
1462
1463static const char *FswDiff(uint16_t fActual, uint16_t fExpected)
1464{
1465 if (fActual == fExpected)
1466 return "";
1467
1468 uint16_t const fXor = fActual ^ fExpected;
1469 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1470 size_t cch = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), " - %#x", fXor);
1471
1472 static struct
1473 {
1474 const char *pszName;
1475 uint32_t fFlag;
1476 } const s_aFlags[] =
1477 {
1478#define FSW_ENTRY(a_Flags) { #a_Flags, X86_FSW_ ## a_Flags }
1479 FSW_ENTRY(IE),
1480 FSW_ENTRY(DE),
1481 FSW_ENTRY(ZE),
1482 FSW_ENTRY(OE),
1483 FSW_ENTRY(UE),
1484 FSW_ENTRY(PE),
1485 FSW_ENTRY(SF),
1486 FSW_ENTRY(ES),
1487 FSW_ENTRY(C0),
1488 FSW_ENTRY(C1),
1489 FSW_ENTRY(C2),
1490 FSW_ENTRY(C3),
1491 FSW_ENTRY(B),
1492 };
1493 for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1494 if (s_aFlags[i].fFlag & fXor)
1495 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch,
1496 s_aFlags[i].fFlag & fActual ? "/%s" : "/!%s", s_aFlags[i].pszName);
1497 if (fXor & X86_FSW_TOP_MASK)
1498 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "/TOP%u!%u",
1499 X86_FSW_TOP_GET(fActual), X86_FSW_TOP_GET(fExpected));
1500#if 0 /* For debugging fprem & fprem1 */
1501 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, " - Q=%d (vs %d)",
1502 X86_FSW_CX_TO_QUOTIENT(fActual), X86_FSW_CX_TO_QUOTIENT(fExpected));
1503#endif
1504 RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
1505 return pszBuf;
1506}
1507
1508
1509static const char *MxcsrDiff(uint32_t fActual, uint32_t fExpected)
1510{
1511 if (fActual == fExpected)
1512 return "";
1513
1514 uint16_t const fXor = fActual ^ fExpected;
1515 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1516 size_t cch = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), " - %#x", fXor);
1517
1518 static struct
1519 {
1520 const char *pszName;
1521 uint32_t fFlag;
1522 } const s_aFlags[] =
1523 {
1524#define MXCSR_ENTRY(a_Flags) { #a_Flags, X86_MXCSR_ ## a_Flags }
1525 MXCSR_ENTRY(IE),
1526 MXCSR_ENTRY(DE),
1527 MXCSR_ENTRY(ZE),
1528 MXCSR_ENTRY(OE),
1529 MXCSR_ENTRY(UE),
1530 MXCSR_ENTRY(PE),
1531
1532 MXCSR_ENTRY(IM),
1533 MXCSR_ENTRY(DM),
1534 MXCSR_ENTRY(ZM),
1535 MXCSR_ENTRY(OM),
1536 MXCSR_ENTRY(UM),
1537 MXCSR_ENTRY(PM),
1538
1539 MXCSR_ENTRY(DAZ),
1540 MXCSR_ENTRY(FZ),
1541#undef MXCSR_ENTRY
1542 };
1543 for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1544 if (s_aFlags[i].fFlag & fXor)
1545 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch,
1546 s_aFlags[i].fFlag & fActual ? "/%s" : "/!%s", s_aFlags[i].pszName);
1547 RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
1548 return pszBuf;
1549}
1550
1551
1552static const char *FormatFcw(uint16_t fFcw)
1553{
1554 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1555
1556 const char *pszPC = NULL; /* (msc+gcc are too stupid) */
1557 switch (fFcw & X86_FCW_PC_MASK)
1558 {
1559 case X86_FCW_PC_24: pszPC = "PC24"; break;
1560 case X86_FCW_PC_RSVD: pszPC = "PCRSVD!"; break;
1561 case X86_FCW_PC_53: pszPC = "PC53"; break;
1562 case X86_FCW_PC_64: pszPC = "PC64"; break;
1563 }
1564
1565 const char *pszRC = NULL; /* (msc+gcc are too stupid) */
1566 switch (fFcw & X86_FCW_RC_MASK)
1567 {
1568 case X86_FCW_RC_NEAREST: pszRC = "NEAR"; break;
1569 case X86_FCW_RC_DOWN: pszRC = "DOWN"; break;
1570 case X86_FCW_RC_UP: pszRC = "UP"; break;
1571 case X86_FCW_RC_ZERO: pszRC = "ZERO"; break;
1572 }
1573 size_t cch = RTStrPrintf(&pszBuf[0], sizeof(g_aszBuf[0]), "%s %s", pszPC, pszRC);
1574
1575 static struct
1576 {
1577 const char *pszName;
1578 uint32_t fFlag;
1579 } const s_aFlags[] =
1580 {
1581#define FCW_ENTRY(a_Flags) { #a_Flags, X86_FCW_ ## a_Flags }
1582 FCW_ENTRY(IM),
1583 FCW_ENTRY(DM),
1584 FCW_ENTRY(ZM),
1585 FCW_ENTRY(OM),
1586 FCW_ENTRY(UM),
1587 FCW_ENTRY(PM),
1588 { "6M", 64 },
1589 };
1590 for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1591 if (fFcw & s_aFlags[i].fFlag)
1592 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, " %s", s_aFlags[i].pszName);
1593
1594 RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
1595 return pszBuf;
1596}
1597
1598
1599static const char *FormatMxcsr(uint32_t fMxcsr)
1600{
1601 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1602
1603 const char *pszRC = NULL; /* (msc+gcc are too stupid) */
1604 switch (fMxcsr & X86_MXCSR_RC_MASK)
1605 {
1606 case X86_MXCSR_RC_NEAREST: pszRC = "NEAR"; break;
1607 case X86_MXCSR_RC_DOWN: pszRC = "DOWN"; break;
1608 case X86_MXCSR_RC_UP: pszRC = "UP"; break;
1609 case X86_MXCSR_RC_ZERO: pszRC = "ZERO"; break;
1610 }
1611
1612 const char *pszDAZ = fMxcsr & X86_MXCSR_DAZ ? " DAZ" : "";
1613 const char *pszFZ = fMxcsr & X86_MXCSR_FZ ? " FZ" : "";
1614 size_t cch = RTStrPrintf(&pszBuf[0], sizeof(g_aszBuf[0]), "%s%s%s", pszRC, pszDAZ, pszFZ);
1615
1616 static struct
1617 {
1618 const char *pszName;
1619 uint32_t fFlag;
1620 } const s_aFlags[] =
1621 {
1622#define MXCSR_ENTRY(a_Flags) { #a_Flags, X86_MXCSR_ ## a_Flags }
1623 MXCSR_ENTRY(IE),
1624 MXCSR_ENTRY(DE),
1625 MXCSR_ENTRY(ZE),
1626 MXCSR_ENTRY(OE),
1627 MXCSR_ENTRY(UE),
1628 MXCSR_ENTRY(PE),
1629
1630 MXCSR_ENTRY(IM),
1631 MXCSR_ENTRY(DM),
1632 MXCSR_ENTRY(ZM),
1633 MXCSR_ENTRY(OM),
1634 MXCSR_ENTRY(UM),
1635 MXCSR_ENTRY(PM),
1636 { "6M", 64 },
1637 };
1638 for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1639 if (fMxcsr & s_aFlags[i].fFlag)
1640 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, " %s", s_aFlags[i].pszName);
1641
1642 RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
1643 return pszBuf;
1644}
1645
1646
1647static const char *FormatR80(PCRTFLOAT80U pr80)
1648{
1649 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1650 RTStrFormatR80(pszBuf, sizeof(g_aszBuf[0]), pr80, 0, 0, RTSTR_F_SPECIAL);
1651 return pszBuf;
1652}
1653
1654
1655static const char *FormatR64(PCRTFLOAT64U pr64)
1656{
1657 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1658 RTStrFormatR64(pszBuf, sizeof(g_aszBuf[0]), pr64, 0, 0, RTSTR_F_SPECIAL);
1659 return pszBuf;
1660}
1661
1662
1663static const char *FormatR32(PCRTFLOAT32U pr32)
1664{
1665 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1666 RTStrFormatR32(pszBuf, sizeof(g_aszBuf[0]), pr32, 0, 0, RTSTR_F_SPECIAL);
1667 return pszBuf;
1668}
1669
1670
1671static const char *FormatD80(PCRTPBCD80U pd80)
1672{
1673 /* There is only one indefinite endcoding (same as for 80-bit
1674 floating point), so get it out of the way first: */
1675 if (RTPBCD80U_IS_INDEFINITE(pd80))
1676 return "Ind";
1677
1678 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1679 size_t off = 0;
1680 pszBuf[off++] = pd80->s.fSign ? '-' : '+';
1681 unsigned cBadDigits = 0;
1682 size_t iPair = RT_ELEMENTS(pd80->s.abPairs);
1683 while (iPair-- > 0)
1684 {
1685 static const char s_szDigits[] = "0123456789abcdef";
1686 static const uint8_t s_bBadDigits[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1 };
1687 pszBuf[off++] = s_szDigits[RTPBCD80U_HI_DIGIT(pd80->s.abPairs[iPair])];
1688 pszBuf[off++] = s_szDigits[RTPBCD80U_LO_DIGIT(pd80->s.abPairs[iPair])];
1689 cBadDigits += s_bBadDigits[RTPBCD80U_HI_DIGIT(pd80->s.abPairs[iPair])]
1690 + s_bBadDigits[RTPBCD80U_LO_DIGIT(pd80->s.abPairs[iPair])];
1691 }
1692 if (cBadDigits || pd80->s.uPad != 0)
1693 off += RTStrPrintf(&pszBuf[off], sizeof(g_aszBuf[0]) - off, "[%u,%#x]", cBadDigits, pd80->s.uPad);
1694 pszBuf[off] = '\0';
1695 return pszBuf;
1696}
1697
1698
1699#if 0
1700static const char *FormatI64(int64_t const *piVal)
1701{
1702 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1703 RTStrFormatU64(pszBuf, sizeof(g_aszBuf[0]), *piVal, 16, 0, 0, RTSTR_F_SPECIAL | RTSTR_F_VALSIGNED);
1704 return pszBuf;
1705}
1706#endif
1707
1708
1709static const char *FormatI32(int32_t const *piVal)
1710{
1711 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1712 RTStrFormatU32(pszBuf, sizeof(g_aszBuf[0]), *piVal, 16, 0, 0, RTSTR_F_SPECIAL | RTSTR_F_VALSIGNED);
1713 return pszBuf;
1714}
1715
1716
1717static const char *FormatI16(int16_t const *piVal)
1718{
1719 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1720 RTStrFormatU16(pszBuf, sizeof(g_aszBuf[0]), *piVal, 16, 0, 0, RTSTR_F_SPECIAL | RTSTR_F_VALSIGNED);
1721 return pszBuf;
1722}
1723
1724
1725static const char *FormatU128(PCRTUINT128U puVal)
1726{
1727 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1728 RTStrFormatU128(pszBuf, sizeof(g_aszBuf[0]), puVal, 16, 0, 0, RTSTR_F_SPECIAL);
1729 return pszBuf;
1730}
1731
1732
1733/*
1734 * Binary operations.
1735 */
1736TYPEDEF_SUBTEST_TYPE(BINU8_T, BINU8_TEST_T, PFNIEMAIMPLBINU8);
1737TYPEDEF_SUBTEST_TYPE(BINU16_T, BINU16_TEST_T, PFNIEMAIMPLBINU16);
1738TYPEDEF_SUBTEST_TYPE(BINU32_T, BINU32_TEST_T, PFNIEMAIMPLBINU32);
1739TYPEDEF_SUBTEST_TYPE(BINU64_T, BINU64_TEST_T, PFNIEMAIMPLBINU64);
1740
1741#ifdef TSTIEMAIMPL_WITH_GENERATOR
1742# define GEN_BINARY_TESTS(a_cBits, a_Fmt, a_TestType) \
1743static RTEXITCODE BinU ## a_cBits ## Generate(uint32_t cTests, const char * const * papszNameFmts) \
1744{ \
1745 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aBinU ## a_cBits); iFn++) \
1746 { \
1747 PFNIEMAIMPLBINU ## a_cBits const pfn = g_aBinU ## a_cBits[iFn].pfnNative \
1748 ? g_aBinU ## a_cBits[iFn].pfnNative : g_aBinU ## a_cBits[iFn].pfn; \
1749 IEMBINARYOUTPUT BinOut; \
1750 if ( g_aBinU ## a_cBits[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE \
1751 && g_aBinU ## a_cBits[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
1752 continue; \
1753 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aBinU ## a_cBits[iFn]), RTEXITCODE_FAILURE); \
1754 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1755 { \
1756 a_TestType Test; \
1757 Test.fEflIn = RandEFlags(); \
1758 Test.fEflOut = Test.fEflIn; \
1759 Test.uDstIn = RandU ## a_cBits ## Dst(iTest); \
1760 Test.uDstOut = Test.uDstIn; \
1761 Test.uSrcIn = RandU ## a_cBits ## Src(iTest); \
1762 if (g_aBinU ## a_cBits[iFn].uExtra) \
1763 Test.uSrcIn &= a_cBits - 1; /* Restrict bit index according to operand width */ \
1764 Test.uMisc = 0; \
1765 pfn(&Test.uDstOut, Test.uSrcIn, &Test.fEflOut); \
1766 GenerateBinaryWrite(&BinOut, &Test, sizeof(Test)); \
1767 } \
1768 for (uint32_t iTest = 0; iTest < g_aBinU ## a_cBits[iFn].cFixedTests; iTest++ ) \
1769 { \
1770 a_TestType Test; \
1771 Test.fEflIn = g_aBinU ## a_cBits[iFn].paFixedTests[iTest].fEflIn == UINT32_MAX ? RandEFlags() \
1772 : g_aBinU ## a_cBits[iFn].paFixedTests[iTest].fEflIn; \
1773 Test.fEflOut = Test.fEflIn; \
1774 Test.uDstIn = g_aBinU ## a_cBits[iFn].paFixedTests[iTest].uDstIn; \
1775 Test.uDstOut = Test.uDstIn; \
1776 Test.uSrcIn = g_aBinU ## a_cBits[iFn].paFixedTests[iTest].uSrcIn; \
1777 Test.uMisc = g_aBinU ## a_cBits[iFn].paFixedTests[iTest].uMisc; \
1778 pfn(&Test.uDstOut, Test.uSrcIn, &Test.fEflOut); \
1779 GenerateBinaryWrite(&BinOut, &Test, sizeof(Test)); \
1780 } \
1781 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE); \
1782 } \
1783 return RTEXITCODE_SUCCESS; \
1784} \
1785DUMP_ALL_FN(BinU ## a_cBits, g_aBinU ## a_cBits)
1786
1787#else
1788# define GEN_BINARY_TESTS(a_cBits, a_Fmt, a_TestType)
1789#endif
1790
1791
1792/** Based on a quick probe run, guess how long to run the benchmark. */
1793static uint32_t EstimateIterations(uint32_t cProbeIterations, uint64_t cNsProbe)
1794{
1795 uint64_t cPicoSecPerIteration = cNsProbe * 1000 / cProbeIterations;
1796 uint64_t cIterations = g_cPicoSecBenchmark / cPicoSecPerIteration;
1797 if (cIterations > _2G)
1798 return _2G;
1799 if (cIterations < _4K)
1800 return _4K;
1801 return RT_ALIGN_32((uint32_t)cIterations, _4K);
1802}
1803
1804
1805#define TEST_BINARY_OPS(a_cBits, a_uType, a_Fmt, a_TestType, a_aSubTests) \
1806GEN_BINARY_TESTS(a_cBits, a_Fmt, a_TestType) \
1807\
1808static uint64_t BinU ## a_cBits ## Bench(uint32_t cIterations, PFNIEMAIMPLBINU ## a_cBits pfn, a_TestType const *pEntry) \
1809{ \
1810 uint32_t const fEflIn = pEntry->fEflIn; \
1811 a_uType const uDstIn = pEntry->uDstIn; \
1812 a_uType const uSrcIn = pEntry->uSrcIn; \
1813 cIterations /= 4; \
1814 RTThreadYield(); \
1815 uint64_t const nsStart = RTTimeNanoTS(); \
1816 for (uint32_t i = 0; i < cIterations; i++) \
1817 { \
1818 uint32_t fBenchEfl = fEflIn; \
1819 a_uType uBenchDst = uDstIn; \
1820 pfn(&uBenchDst, uSrcIn, &fBenchEfl); \
1821 \
1822 fBenchEfl = fEflIn; \
1823 uBenchDst = uDstIn; \
1824 pfn(&uBenchDst, uSrcIn, &fBenchEfl); \
1825 \
1826 fBenchEfl = fEflIn; \
1827 uBenchDst = uDstIn; \
1828 pfn(&uBenchDst, uSrcIn, &fBenchEfl); \
1829 \
1830 fBenchEfl = fEflIn; \
1831 uBenchDst = uDstIn; \
1832 pfn(&uBenchDst, uSrcIn, &fBenchEfl); \
1833 } \
1834 return RTTimeNanoTS() - nsStart; \
1835} \
1836\
1837static void BinU ## a_cBits ## Test(void) \
1838{ \
1839 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
1840 { \
1841 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(a_aSubTests[iFn])) \
1842 continue; \
1843 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
1844 uint32_t const cTests = a_aSubTests[iFn].cTests; \
1845 PFNIEMAIMPLBINU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
1846 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
1847 if (!cTests) { RTTestSkipped(g_hTest, "no tests"); continue; } \
1848 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
1849 { \
1850 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1851 { \
1852 uint32_t fEfl = paTests[iTest].fEflIn; \
1853 a_uType uDst = paTests[iTest].uDstIn; \
1854 pfn(&uDst, paTests[iTest].uSrcIn, &fEfl); \
1855 if ( uDst != paTests[iTest].uDstOut \
1856 || fEfl != paTests[iTest].fEflOut ) \
1857 RTTestFailed(g_hTest, "#%u%s: efl=%#08x dst=" a_Fmt " src=" a_Fmt " -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s - %s\n", \
1858 iTest, !iVar ? "" : "/n", paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uSrcIn, \
1859 fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
1860 EFlagsDiff(fEfl, paTests[iTest].fEflOut), \
1861 uDst == paTests[iTest].uDstOut ? "eflags" : fEfl == paTests[iTest].fEflOut ? "dst" : "both"); \
1862 else \
1863 { \
1864 *g_pu ## a_cBits = paTests[iTest].uDstIn; \
1865 *g_pfEfl = paTests[iTest].fEflIn; \
1866 pfn(g_pu ## a_cBits, paTests[iTest].uSrcIn, g_pfEfl); \
1867 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
1868 RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
1869 } \
1870 } \
1871 \
1872 /* Benchmark if all succeeded. */ \
1873 if (g_cPicoSecBenchmark && RTTestSubErrorCount(g_hTest) == 0) \
1874 { \
1875 uint32_t const iTest = cTests / 2; \
1876 uint32_t const cIterations = EstimateIterations(_64K, BinU ## a_cBits ## Bench(_64K, pfn, &paTests[iTest])); \
1877 uint64_t const cNsRealRun = BinU ## a_cBits ## Bench(cIterations, pfn, &paTests[iTest]); \
1878 RTTestValueF(g_hTest, cNsRealRun * 1000 / cIterations, RTTESTUNIT_PS_PER_CALL, \
1879 "%s%s", a_aSubTests[iFn].pszName, iVar ? "-native" : ""); \
1880 } \
1881 \
1882 /* Next variation is native. */ \
1883 pfn = a_aSubTests[iFn].pfnNative; \
1884 } \
1885 FREE_DECOMPRESSED_TESTS(a_aSubTests[iFn]); \
1886 } \
1887}
1888
1889
1890/*
1891 * 8-bit binary operations.
1892 */
1893static BINU8_T g_aBinU8[] =
1894{
1895 ENTRY_BIN(add_u8),
1896 ENTRY_BIN(add_u8_locked),
1897 ENTRY_BIN(adc_u8),
1898 ENTRY_BIN(adc_u8_locked),
1899 ENTRY_BIN(sub_u8),
1900 ENTRY_BIN(sub_u8_locked),
1901 ENTRY_BIN(sbb_u8),
1902 ENTRY_BIN(sbb_u8_locked),
1903 ENTRY_BIN(or_u8),
1904 ENTRY_BIN(or_u8_locked),
1905 ENTRY_BIN(xor_u8),
1906 ENTRY_BIN(xor_u8_locked),
1907 ENTRY_BIN(and_u8),
1908 ENTRY_BIN(and_u8_locked),
1909 ENTRY_BIN_PFN_CAST(cmp_u8, PFNIEMAIMPLBINU8),
1910 ENTRY_BIN_PFN_CAST(test_u8, PFNIEMAIMPLBINU8),
1911};
1912TEST_BINARY_OPS(8, uint8_t, "%#04x", BINU8_TEST_T, g_aBinU8)
1913
1914
1915/*
1916 * 16-bit binary operations.
1917 */
1918#ifdef TSTIEMAIMPL_WITH_GENERATOR
1919static const BINU16_TEST_T g_aFixedTests_add_u16[] =
1920{
1921 /* efl in, efl out, uDstIn, uDstOut, uSrc, uExtra */
1922 { UINT32_MAX, 0, 1, 0, UINT16_MAX, 0 },
1923};
1924#endif
1925static BINU16_T g_aBinU16[] =
1926{
1927 ENTRY_BIN_FIX(add_u16),
1928 ENTRY_BIN(add_u16_locked),
1929 ENTRY_BIN(adc_u16),
1930 ENTRY_BIN(adc_u16_locked),
1931 ENTRY_BIN(sub_u16),
1932 ENTRY_BIN(sub_u16_locked),
1933 ENTRY_BIN(sbb_u16),
1934 ENTRY_BIN(sbb_u16_locked),
1935 ENTRY_BIN(or_u16),
1936 ENTRY_BIN(or_u16_locked),
1937 ENTRY_BIN(xor_u16),
1938 ENTRY_BIN(xor_u16_locked),
1939 ENTRY_BIN(and_u16),
1940 ENTRY_BIN(and_u16_locked),
1941 ENTRY_BIN_PFN_CAST(cmp_u16, PFNIEMAIMPLBINU16),
1942 ENTRY_BIN_PFN_CAST(test_u16, PFNIEMAIMPLBINU16),
1943 ENTRY_BIN_PFN_CAST_EX(bt_u16, PFNIEMAIMPLBINU16, 1),
1944 ENTRY_BIN_EX(btc_u16, 1),
1945 ENTRY_BIN_EX(btc_u16_locked, 1),
1946 ENTRY_BIN_EX(btr_u16, 1),
1947 ENTRY_BIN_EX(btr_u16_locked, 1),
1948 ENTRY_BIN_EX(bts_u16, 1),
1949 ENTRY_BIN_EX(bts_u16_locked, 1),
1950 ENTRY_BIN_AMD( bsf_u16, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1951 ENTRY_BIN_INTEL(bsf_u16, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1952 ENTRY_BIN_AMD( bsr_u16, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1953 ENTRY_BIN_INTEL(bsr_u16, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1954 ENTRY_BIN_AMD( imul_two_u16, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1955 ENTRY_BIN_INTEL(imul_two_u16, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1956 ENTRY_BIN(arpl),
1957};
1958TEST_BINARY_OPS(16, uint16_t, "%#06x", BINU16_TEST_T, g_aBinU16)
1959
1960
1961/*
1962 * 32-bit binary operations.
1963 */
1964#ifdef TSTIEMAIMPL_WITH_GENERATOR
1965static const BINU32_TEST_T g_aFixedTests_add_u32[] =
1966{
1967 /* efl in, efl out, uDstIn, uDstOut, uSrc, uExtra */
1968 { UINT32_MAX, 0, 1, 0, UINT32_MAX, 0 },
1969};
1970#endif
1971static BINU32_T g_aBinU32[] =
1972{
1973 ENTRY_BIN_FIX(add_u32),
1974 ENTRY_BIN(add_u32_locked),
1975 ENTRY_BIN(adc_u32),
1976 ENTRY_BIN(adc_u32_locked),
1977 ENTRY_BIN(sub_u32),
1978 ENTRY_BIN(sub_u32_locked),
1979 ENTRY_BIN(sbb_u32),
1980 ENTRY_BIN(sbb_u32_locked),
1981 ENTRY_BIN(or_u32),
1982 ENTRY_BIN(or_u32_locked),
1983 ENTRY_BIN(xor_u32),
1984 ENTRY_BIN(xor_u32_locked),
1985 ENTRY_BIN(and_u32),
1986 ENTRY_BIN(and_u32_locked),
1987 ENTRY_BIN_PFN_CAST(cmp_u32, PFNIEMAIMPLBINU32),
1988 ENTRY_BIN_PFN_CAST(test_u32, PFNIEMAIMPLBINU32),
1989 ENTRY_BIN_PFN_CAST_EX(bt_u32, PFNIEMAIMPLBINU32, 1),
1990 ENTRY_BIN_EX(btc_u32, 1),
1991 ENTRY_BIN_EX(btc_u32_locked, 1),
1992 ENTRY_BIN_EX(btr_u32, 1),
1993 ENTRY_BIN_EX(btr_u32_locked, 1),
1994 ENTRY_BIN_EX(bts_u32, 1),
1995 ENTRY_BIN_EX(bts_u32_locked, 1),
1996 ENTRY_BIN_AMD( bsf_u32, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1997 ENTRY_BIN_INTEL(bsf_u32, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1998 ENTRY_BIN_AMD( bsr_u32, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1999 ENTRY_BIN_INTEL(bsr_u32, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
2000 ENTRY_BIN_AMD( imul_two_u32, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
2001 ENTRY_BIN_INTEL(imul_two_u32, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
2002 ENTRY_BIN(adcx_u32),
2003 ENTRY_BIN(adox_u32),
2004};
2005TEST_BINARY_OPS(32, uint32_t, "%#010RX32", BINU32_TEST_T, g_aBinU32)
2006
2007
2008/*
2009 * 64-bit binary operations.
2010 */
2011#ifdef TSTIEMAIMPL_WITH_GENERATOR
2012static const BINU64_TEST_T g_aFixedTests_add_u64[] =
2013{
2014 /* efl in, efl out, uDstIn, uDstOut, uSrc, uExtra */
2015 { UINT32_MAX, 0, 1, 0, UINT64_MAX, 0 },
2016};
2017#endif
2018static BINU64_T g_aBinU64[] =
2019{
2020 ENTRY_BIN_FIX(add_u64),
2021 ENTRY_BIN(add_u64_locked),
2022 ENTRY_BIN(adc_u64),
2023 ENTRY_BIN(adc_u64_locked),
2024 ENTRY_BIN(sub_u64),
2025 ENTRY_BIN(sub_u64_locked),
2026 ENTRY_BIN(sbb_u64),
2027 ENTRY_BIN(sbb_u64_locked),
2028 ENTRY_BIN(or_u64),
2029 ENTRY_BIN(or_u64_locked),
2030 ENTRY_BIN(xor_u64),
2031 ENTRY_BIN(xor_u64_locked),
2032 ENTRY_BIN(and_u64),
2033 ENTRY_BIN(and_u64_locked),
2034 ENTRY_BIN_PFN_CAST(cmp_u64, PFNIEMAIMPLBINU64),
2035 ENTRY_BIN_PFN_CAST(test_u64, PFNIEMAIMPLBINU64),
2036 ENTRY_BIN_PFN_CAST_EX(bt_u64, PFNIEMAIMPLBINU64, 1),
2037 ENTRY_BIN_EX(btc_u64, 1),
2038 ENTRY_BIN_EX(btc_u64_locked, 1),
2039 ENTRY_BIN_EX(btr_u64, 1),
2040 ENTRY_BIN_EX(btr_u64_locked, 1),
2041 ENTRY_BIN_EX(bts_u64, 1),
2042 ENTRY_BIN_EX(bts_u64_locked, 1),
2043 ENTRY_BIN_AMD( bsf_u64, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
2044 ENTRY_BIN_INTEL(bsf_u64, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
2045 ENTRY_BIN_AMD( bsr_u64, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
2046 ENTRY_BIN_INTEL(bsr_u64, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
2047 ENTRY_BIN_AMD( imul_two_u64, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
2048 ENTRY_BIN_INTEL(imul_two_u64, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
2049 ENTRY_BIN(adcx_u64),
2050 ENTRY_BIN(adox_u64),
2051};
2052TEST_BINARY_OPS(64, uint64_t, "%#018RX64", BINU64_TEST_T, g_aBinU64)
2053
2054
2055/*
2056 * XCHG
2057 */
2058static void XchgTest(void)
2059{
2060 if (!SubTestAndCheckIfEnabled("xchg"))
2061 return;
2062 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU8, (uint8_t *pu8Mem, uint8_t *pu8Reg));
2063 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU16,(uint16_t *pu16Mem, uint16_t *pu16Reg));
2064 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU32,(uint32_t *pu32Mem, uint32_t *pu32Reg));
2065 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU64,(uint64_t *pu64Mem, uint64_t *pu64Reg));
2066
2067 static struct
2068 {
2069 uint8_t cb; uint64_t fMask;
2070 union
2071 {
2072 uintptr_t pfn;
2073 FNIEMAIMPLXCHGU8 *pfnU8;
2074 FNIEMAIMPLXCHGU16 *pfnU16;
2075 FNIEMAIMPLXCHGU32 *pfnU32;
2076 FNIEMAIMPLXCHGU64 *pfnU64;
2077 } u;
2078 }
2079 s_aXchgWorkers[] =
2080 {
2081 { 1, UINT8_MAX, { (uintptr_t)iemAImpl_xchg_u8_locked } },
2082 { 2, UINT16_MAX, { (uintptr_t)iemAImpl_xchg_u16_locked } },
2083 { 4, UINT32_MAX, { (uintptr_t)iemAImpl_xchg_u32_locked } },
2084 { 8, UINT64_MAX, { (uintptr_t)iemAImpl_xchg_u64_locked } },
2085 { 1, UINT8_MAX, { (uintptr_t)iemAImpl_xchg_u8_unlocked } },
2086 { 2, UINT16_MAX, { (uintptr_t)iemAImpl_xchg_u16_unlocked } },
2087 { 4, UINT32_MAX, { (uintptr_t)iemAImpl_xchg_u32_unlocked } },
2088 { 8, UINT64_MAX, { (uintptr_t)iemAImpl_xchg_u64_unlocked } },
2089 };
2090 for (size_t i = 0; i < RT_ELEMENTS(s_aXchgWorkers); i++)
2091 {
2092 RTUINT64U uIn1, uIn2, uMem, uDst;
2093 uMem.u = uIn1.u = RTRandU64Ex(0, s_aXchgWorkers[i].fMask);
2094 uDst.u = uIn2.u = RTRandU64Ex(0, s_aXchgWorkers[i].fMask);
2095 if (uIn1.u == uIn2.u)
2096 uDst.u = uIn2.u = ~uIn2.u;
2097
2098 switch (s_aXchgWorkers[i].cb)
2099 {
2100 case 1:
2101 s_aXchgWorkers[i].u.pfnU8(g_pu8, g_pu8Two);
2102 s_aXchgWorkers[i].u.pfnU8(&uMem.au8[0], &uDst.au8[0]);
2103 break;
2104 case 2:
2105 s_aXchgWorkers[i].u.pfnU16(g_pu16, g_pu16Two);
2106 s_aXchgWorkers[i].u.pfnU16(&uMem.Words.w0, &uDst.Words.w0);
2107 break;
2108 case 4:
2109 s_aXchgWorkers[i].u.pfnU32(g_pu32, g_pu32Two);
2110 s_aXchgWorkers[i].u.pfnU32(&uMem.DWords.dw0, &uDst.DWords.dw0);
2111 break;
2112 case 8:
2113 s_aXchgWorkers[i].u.pfnU64(g_pu64, g_pu64Two);
2114 s_aXchgWorkers[i].u.pfnU64(&uMem.u, &uDst.u);
2115 break;
2116 default: RTTestFailed(g_hTest, "%d\n", s_aXchgWorkers[i].cb); break;
2117 }
2118
2119 if (uMem.u != uIn2.u || uDst.u != uIn1.u)
2120 RTTestFailed(g_hTest, "i=%u: %#RX64, %#RX64 -> %#RX64, %#RX64\n", i, uIn1.u, uIn2.u, uMem.u, uDst.u);
2121 }
2122}
2123
2124
2125/*
2126 * XADD
2127 */
2128static void XaddTest(void)
2129{
2130#define TEST_XADD(a_cBits, a_Type, a_Fmt) do { \
2131 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXADDU ## a_cBits, (a_Type *, a_Type *, uint32_t *)); \
2132 static struct \
2133 { \
2134 const char * const pszName; \
2135 FNIEMAIMPLXADDU ## a_cBits * const pfn; \
2136 void const * const pvCompressedTests; \
2137 uint32_t const * const pcbCompressedTests; \
2138 BINU ## a_cBits ## _TEST_T const *paTests; \
2139 uint32_t cTests; \
2140 IEMTESTENTRYINFO Info; \
2141 } s_aFuncs[] = \
2142 { \
2143 { "xadd_u" # a_cBits, iemAImpl_xadd_u ## a_cBits, \
2144 g_abTests_add_u ## a_cBits, &g_cbTests_add_u ## a_cBits }, \
2145 { "xadd_u" # a_cBits "8_locked", iemAImpl_xadd_u ## a_cBits ## _locked, \
2146 g_abTests_add_u ## a_cBits, &g_cbTests_add_u ## a_cBits }, \
2147 }; \
2148 for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++) \
2149 { \
2150 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(s_aFuncs[iFn])) continue; \
2151 BINU ## a_cBits ## _TEST_T const * const paTests = s_aFuncs[iFn].paTests; \
2152 uint32_t const cTests = s_aFuncs[iFn].cTests; \
2153 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2154 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2155 { \
2156 uint32_t fEfl = paTests[iTest].fEflIn; \
2157 a_Type uSrc = paTests[iTest].uSrcIn; \
2158 *g_pu ## a_cBits = paTests[iTest].uDstIn; \
2159 s_aFuncs[iFn].pfn(g_pu ## a_cBits, &uSrc, &fEfl); \
2160 if ( fEfl != paTests[iTest].fEflOut \
2161 || *g_pu ## a_cBits != paTests[iTest].uDstOut \
2162 || uSrc != paTests[iTest].uDstIn) \
2163 RTTestFailed(g_hTest, "%s/#%u: efl=%#08x dst=" a_Fmt " src=" a_Fmt " -> efl=%#08x dst=" a_Fmt " src=" a_Fmt ", expected %#08x, " a_Fmt ", " a_Fmt "%s\n", \
2164 s_aFuncs[iFn].pszName, iTest, paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uSrcIn, \
2165 fEfl, *g_pu ## a_cBits, uSrc, paTests[iTest].fEflOut, paTests[iTest].uDstOut, paTests[iTest].uDstIn, \
2166 EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
2167 } \
2168 FREE_DECOMPRESSED_TESTS(s_aFuncs[iFn]); \
2169 } \
2170 } while(0)
2171 TEST_XADD(8, uint8_t, "%#04x");
2172 TEST_XADD(16, uint16_t, "%#06x");
2173 TEST_XADD(32, uint32_t, "%#010RX32");
2174 TEST_XADD(64, uint64_t, "%#010RX64");
2175}
2176
2177
2178/*
2179 * CMPXCHG
2180 */
2181
2182static void CmpXchgTest(void)
2183{
2184#define TEST_CMPXCHG(a_cBits, a_Type, a_Fmt) do {\
2185 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLCMPXCHGU ## a_cBits, (a_Type *, a_Type *, a_Type, uint32_t *)); \
2186 static struct \
2187 { \
2188 const char * const pszName; \
2189 FNIEMAIMPLCMPXCHGU ## a_cBits * const pfn; \
2190 PFNIEMAIMPLBINU ## a_cBits const pfnSub; \
2191 void const * const pvCompressedTests; \
2192 uint32_t const * const pcbCompressedTests; \
2193 BINU ## a_cBits ## _TEST_T const *paTests; \
2194 uint32_t cTests; \
2195 IEMTESTENTRYINFO Info; \
2196 } s_aFuncs[] = \
2197 { \
2198 { "cmpxchg_u" # a_cBits, iemAImpl_cmpxchg_u ## a_cBits, iemAImpl_sub_u ## a_cBits, \
2199 g_abTests_cmp_u ## a_cBits, &g_cbTests_cmp_u ## a_cBits }, \
2200 { "cmpxchg_u" # a_cBits "_locked", iemAImpl_cmpxchg_u ## a_cBits ## _locked, iemAImpl_sub_u ## a_cBits, \
2201 g_abTests_cmp_u ## a_cBits, &g_cbTests_cmp_u ## a_cBits }, \
2202 }; \
2203 for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++) \
2204 { \
2205 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(s_aFuncs[iFn])) continue; \
2206 BINU ## a_cBits ## _TEST_T const * const paTests = s_aFuncs[iFn].paTests; \
2207 uint32_t const cTests = s_aFuncs[iFn].cTests; \
2208 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2209 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2210 { \
2211 /* as is (99% likely to be negative). */ \
2212 uint32_t fEfl = paTests[iTest].fEflIn; \
2213 a_Type const uNew = paTests[iTest].uSrcIn + 0x42; \
2214 a_Type uA = paTests[iTest].uDstIn; \
2215 *g_pu ## a_cBits = paTests[iTest].uSrcIn; \
2216 a_Type const uExpect = uA != paTests[iTest].uSrcIn ? paTests[iTest].uSrcIn : uNew; \
2217 s_aFuncs[iFn].pfn(g_pu ## a_cBits, &uA, uNew, &fEfl); \
2218 if ( fEfl != paTests[iTest].fEflOut \
2219 || *g_pu ## a_cBits != uExpect \
2220 || uA != paTests[iTest].uSrcIn) \
2221 RTTestFailed(g_hTest, "%s/#%ua: efl=%#08x dst=" a_Fmt " cmp=" a_Fmt " new=" a_Fmt " -> efl=%#08x dst=" a_Fmt " old=" a_Fmt ", expected %#08x, " a_Fmt ", " a_Fmt "%s\n", \
2222 s_aFuncs[iFn].pszName, iTest, paTests[iTest].fEflIn, paTests[iTest].uSrcIn, paTests[iTest].uDstIn, \
2223 uNew, fEfl, *g_pu ## a_cBits, uA, paTests[iTest].fEflOut, uExpect, paTests[iTest].uSrcIn, \
2224 EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
2225 /* positive */ \
2226 uint32_t fEflExpect = paTests[iTest].fEflIn; \
2227 uA = paTests[iTest].uDstIn; \
2228 s_aFuncs[iFn].pfnSub(&uA, uA, &fEflExpect); \
2229 fEfl = paTests[iTest].fEflIn; \
2230 uA = paTests[iTest].uDstIn; \
2231 *g_pu ## a_cBits = uA; \
2232 s_aFuncs[iFn].pfn(g_pu ## a_cBits, &uA, uNew, &fEfl); \
2233 if ( fEfl != fEflExpect \
2234 || *g_pu ## a_cBits != uNew \
2235 || uA != paTests[iTest].uDstIn) \
2236 RTTestFailed(g_hTest, "%s/#%ua: efl=%#08x dst=" a_Fmt " cmp=" a_Fmt " new=" a_Fmt " -> efl=%#08x dst=" a_Fmt " old=" a_Fmt ", expected %#08x, " a_Fmt ", " a_Fmt "%s\n", \
2237 s_aFuncs[iFn].pszName, iTest, paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uDstIn, \
2238 uNew, fEfl, *g_pu ## a_cBits, uA, fEflExpect, uNew, paTests[iTest].uDstIn, \
2239 EFlagsDiff(fEfl, fEflExpect)); \
2240 } \
2241 FREE_DECOMPRESSED_TESTS(s_aFuncs[iFn]); \
2242 } \
2243 } while(0)
2244 TEST_CMPXCHG(8, uint8_t, "%#04RX8");
2245 TEST_CMPXCHG(16, uint16_t, "%#06x");
2246 TEST_CMPXCHG(32, uint32_t, "%#010RX32");
2247#if ARCH_BITS != 32 /* calling convension issue, skipping as it's an unsupported host */
2248 TEST_CMPXCHG(64, uint64_t, "%#010RX64");
2249#endif
2250}
2251
2252static void CmpXchg8bTest(void)
2253{
2254 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLCMPXCHG8B,(uint64_t *, PRTUINT64U, PRTUINT64U, uint32_t *));
2255 static struct
2256 {
2257 const char *pszName;
2258 FNIEMAIMPLCMPXCHG8B *pfn;
2259 } const s_aFuncs[] =
2260 {
2261 { "cmpxchg8b", iemAImpl_cmpxchg8b },
2262 { "cmpxchg8b_locked", iemAImpl_cmpxchg8b_locked },
2263 };
2264 for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++)
2265 {
2266 if (!SubTestAndCheckIfEnabled(s_aFuncs[iFn].pszName))
2267 continue;
2268 for (uint32_t iTest = 0; iTest < 4; iTest += 2)
2269 {
2270 uint64_t const uOldValue = RandU64();
2271 uint64_t const uNewValue = RandU64();
2272
2273 /* positive test. */
2274 RTUINT64U uA, uB;
2275 uB.u = uNewValue;
2276 uA.u = uOldValue;
2277 *g_pu64 = uOldValue;
2278 uint32_t fEflIn = RandEFlags();
2279 uint32_t fEfl = fEflIn;
2280 s_aFuncs[iFn].pfn(g_pu64, &uA, &uB, &fEfl);
2281 if ( fEfl != (fEflIn | X86_EFL_ZF)
2282 || *g_pu64 != uNewValue
2283 || uA.u != uOldValue)
2284 RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64 cmp=%#018RX64 new=%#018RX64\n -> efl=%#08x dst=%#018RX64 old=%#018RX64,\n wanted %#08x, %#018RX64, %#018RX64%s\n",
2285 iTest, fEflIn, uOldValue, uOldValue, uNewValue,
2286 fEfl, *g_pu64, uA.u,
2287 (fEflIn | X86_EFL_ZF), uNewValue, uOldValue, EFlagsDiff(fEfl, fEflIn | X86_EFL_ZF));
2288 RTTEST_CHECK(g_hTest, uB.u == uNewValue);
2289
2290 /* negative */
2291 uint64_t const uExpect = ~uOldValue;
2292 *g_pu64 = uExpect;
2293 uA.u = uOldValue;
2294 uB.u = uNewValue;
2295 fEfl = fEflIn = RandEFlags();
2296 s_aFuncs[iFn].pfn(g_pu64, &uA, &uB, &fEfl);
2297 if ( fEfl != (fEflIn & ~X86_EFL_ZF)
2298 || *g_pu64 != uExpect
2299 || uA.u != uExpect)
2300 RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64 cmp=%#018RX64 new=%#018RX64\n -> efl=%#08x dst=%#018RX64 old=%#018RX64,\n wanted %#08x, %#018RX64, %#018RX64%s\n",
2301 iTest + 1, fEflIn, uExpect, uOldValue, uNewValue,
2302 fEfl, *g_pu64, uA.u,
2303 (fEflIn & ~X86_EFL_ZF), uExpect, uExpect, EFlagsDiff(fEfl, fEflIn & ~X86_EFL_ZF));
2304 RTTEST_CHECK(g_hTest, uB.u == uNewValue);
2305 }
2306 }
2307}
2308
2309static void CmpXchg16bTest(void)
2310{
2311 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLCMPXCHG16B,(PRTUINT128U, PRTUINT128U, PRTUINT128U, uint32_t *));
2312 static struct
2313 {
2314 const char *pszName;
2315 FNIEMAIMPLCMPXCHG16B *pfn;
2316 } const s_aFuncs[] =
2317 {
2318 { "cmpxchg16b", iemAImpl_cmpxchg16b },
2319 { "cmpxchg16b_locked", iemAImpl_cmpxchg16b_locked },
2320#if !defined(RT_ARCH_ARM64)
2321 { "cmpxchg16b_fallback", iemAImpl_cmpxchg16b_fallback },
2322#endif
2323 };
2324 for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++)
2325 {
2326 if (!SubTestAndCheckIfEnabled(s_aFuncs[iFn].pszName))
2327 continue;
2328#if !defined(IEM_WITHOUT_ASSEMBLY) && defined(RT_ARCH_AMD64)
2329 if (!(ASMCpuId_ECX(1) & X86_CPUID_FEATURE_ECX_CX16))
2330 {
2331 RTTestSkipped(g_hTest, "no hardware cmpxchg16b");
2332 continue;
2333 }
2334#endif
2335 for (uint32_t iTest = 0; iTest < 4; iTest += 2)
2336 {
2337 RTUINT128U const uOldValue = RandU128();
2338 RTUINT128U const uNewValue = RandU128();
2339
2340 /* positive test. */
2341 RTUINT128U uA, uB;
2342 uB = uNewValue;
2343 uA = uOldValue;
2344 *g_pu128 = uOldValue;
2345 uint32_t fEflIn = RandEFlags();
2346 uint32_t fEfl = fEflIn;
2347 s_aFuncs[iFn].pfn(g_pu128, &uA, &uB, &fEfl);
2348 if ( fEfl != (fEflIn | X86_EFL_ZF)
2349 || g_pu128->s.Lo != uNewValue.s.Lo
2350 || g_pu128->s.Hi != uNewValue.s.Hi
2351 || uA.s.Lo != uOldValue.s.Lo
2352 || uA.s.Hi != uOldValue.s.Hi)
2353 RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64'%016RX64 cmp=%#018RX64'%016RX64 new=%#018RX64'%016RX64\n"
2354 " -> efl=%#08x dst=%#018RX64'%016RX64 old=%#018RX64'%016RX64,\n"
2355 " wanted %#08x, %#018RX64'%016RX64, %#018RX64'%016RX64%s\n",
2356 iTest, fEflIn, uOldValue.s.Hi, uOldValue.s.Lo, uOldValue.s.Hi, uOldValue.s.Lo, uNewValue.s.Hi, uNewValue.s.Lo,
2357 fEfl, g_pu128->s.Hi, g_pu128->s.Lo, uA.s.Hi, uA.s.Lo,
2358 (fEflIn | X86_EFL_ZF), uNewValue.s.Hi, uNewValue.s.Lo, uOldValue.s.Hi, uOldValue.s.Lo,
2359 EFlagsDiff(fEfl, fEflIn | X86_EFL_ZF));
2360 RTTEST_CHECK(g_hTest, uB.s.Lo == uNewValue.s.Lo && uB.s.Hi == uNewValue.s.Hi);
2361
2362 /* negative */
2363 RTUINT128U const uExpect = RTUINT128_INIT(~uOldValue.s.Hi, ~uOldValue.s.Lo);
2364 *g_pu128 = uExpect;
2365 uA = uOldValue;
2366 uB = uNewValue;
2367 fEfl = fEflIn = RandEFlags();
2368 s_aFuncs[iFn].pfn(g_pu128, &uA, &uB, &fEfl);
2369 if ( fEfl != (fEflIn & ~X86_EFL_ZF)
2370 || g_pu128->s.Lo != uExpect.s.Lo
2371 || g_pu128->s.Hi != uExpect.s.Hi
2372 || uA.s.Lo != uExpect.s.Lo
2373 || uA.s.Hi != uExpect.s.Hi)
2374 RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64'%016RX64 cmp=%#018RX64'%016RX64 new=%#018RX64'%016RX64\n"
2375 " -> efl=%#08x dst=%#018RX64'%016RX64 old=%#018RX64'%016RX64,\n"
2376 " wanted %#08x, %#018RX64'%016RX64, %#018RX64'%016RX64%s\n",
2377 iTest + 1, fEflIn, uExpect.s.Hi, uExpect.s.Lo, uOldValue.s.Hi, uOldValue.s.Lo, uNewValue.s.Hi, uNewValue.s.Lo,
2378 fEfl, g_pu128->s.Hi, g_pu128->s.Lo, uA.s.Hi, uA.s.Lo,
2379 (fEflIn & ~X86_EFL_ZF), uExpect.s.Hi, uExpect.s.Lo, uExpect.s.Hi, uExpect.s.Lo,
2380 EFlagsDiff(fEfl, fEflIn & ~X86_EFL_ZF));
2381 RTTEST_CHECK(g_hTest, uB.s.Lo == uNewValue.s.Lo && uB.s.Hi == uNewValue.s.Hi);
2382 }
2383 }
2384}
2385
2386
2387/*
2388 * Double shifts.
2389 *
2390 * Note! We use BINUxx_TEST_T with the shift value in the uMisc field.
2391 */
2392#ifdef TSTIEMAIMPL_WITH_GENERATOR
2393# define GEN_SHIFT_DBL(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
2394static RTEXITCODE ShiftDblU ## a_cBits ## Generate(uint32_t cTests, const char * const * papszNameFmts) \
2395{ \
2396 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2397 { \
2398 if ( a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE \
2399 && a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
2400 continue; \
2401 IEMBINARYOUTPUT BinOut; \
2402 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, a_aSubTests[iFn]), RTEXITCODE_FAILURE); \
2403 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2404 { \
2405 a_TestType Test; \
2406 Test.fEflIn = RandEFlags(); \
2407 Test.fEflOut = Test.fEflIn; \
2408 Test.uDstIn = RandU ## a_cBits ## Dst(iTest); \
2409 Test.uDstOut = Test.uDstIn; \
2410 Test.uSrcIn = RandU ## a_cBits ## Src(iTest); \
2411 Test.uMisc = RandU8() & (a_cBits * 4 - 1); /* need to go way beyond the a_cBits limit */ \
2412 a_aSubTests[iFn].pfnNative(&Test.uDstOut, Test.uSrcIn, Test.uMisc, &Test.fEflOut); \
2413 GenerateBinaryWrite(&BinOut, &Test, sizeof(Test)); \
2414 } \
2415 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE); \
2416 } \
2417 return RTEXITCODE_SUCCESS; \
2418} \
2419DUMP_ALL_FN(ShiftDblU ## a_cBits, a_aSubTests)
2420
2421#else
2422# define GEN_SHIFT_DBL(a_cBits, a_Fmt, a_TestType, a_aSubTests)
2423#endif
2424
2425#define TEST_SHIFT_DBL(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType, a_aSubTests) \
2426TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLSHIFTDBLU ## a_cBits); \
2427\
2428static a_SubTestType a_aSubTests[] = \
2429{ \
2430 ENTRY_BIN_AMD(shld_u ## a_cBits, X86_EFL_OF | X86_EFL_CF), \
2431 ENTRY_BIN_INTEL(shld_u ## a_cBits, X86_EFL_OF | X86_EFL_CF), \
2432 ENTRY_BIN_AMD(shrd_u ## a_cBits, X86_EFL_OF | X86_EFL_CF), \
2433 ENTRY_BIN_INTEL(shrd_u ## a_cBits, X86_EFL_OF | X86_EFL_CF), \
2434}; \
2435\
2436GEN_SHIFT_DBL(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
2437\
2438static void ShiftDblU ## a_cBits ## Test(void) \
2439{ \
2440 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2441 { \
2442 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(a_aSubTests[iFn])) \
2443 continue; \
2444 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2445 uint32_t const cTests = a_aSubTests[iFn].cTests; \
2446 PFNIEMAIMPLSHIFTDBLU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2447 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2448 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2449 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2450 { \
2451 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2452 { \
2453 uint32_t fEfl = paTests[iTest].fEflIn; \
2454 a_Type uDst = paTests[iTest].uDstIn; \
2455 pfn(&uDst, paTests[iTest].uSrcIn, paTests[iTest].uMisc, &fEfl); \
2456 if ( uDst != paTests[iTest].uDstOut \
2457 || fEfl != paTests[iTest].fEflOut) \
2458 RTTestFailed(g_hTest, "#%03u%s: efl=%#08x dst=" a_Fmt " src=" a_Fmt " shift=%-2u -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s%s\n", \
2459 iTest, iVar == 0 ? "" : "/n", paTests[iTest].fEflIn, \
2460 paTests[iTest].uDstIn, paTests[iTest].uSrcIn, (unsigned)paTests[iTest].uMisc, \
2461 fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
2462 EFlagsDiff(fEfl, paTests[iTest].fEflOut), uDst == paTests[iTest].uDstOut ? "" : " dst!"); \
2463 else \
2464 { \
2465 *g_pu ## a_cBits = paTests[iTest].uDstIn; \
2466 *g_pfEfl = paTests[iTest].fEflIn; \
2467 pfn(g_pu ## a_cBits, paTests[iTest].uSrcIn, paTests[iTest].uMisc, g_pfEfl); \
2468 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
2469 RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
2470 } \
2471 } \
2472 pfn = a_aSubTests[iFn].pfnNative; \
2473 } \
2474 FREE_DECOMPRESSED_TESTS(a_aSubTests[iFn]); \
2475 } \
2476}
2477TEST_SHIFT_DBL(16, uint16_t, "%#06RX16", BINU16_TEST_T, SHIFT_DBL_U16_T, g_aShiftDblU16)
2478TEST_SHIFT_DBL(32, uint32_t, "%#010RX32", BINU32_TEST_T, SHIFT_DBL_U32_T, g_aShiftDblU32)
2479TEST_SHIFT_DBL(64, uint64_t, "%#018RX64", BINU64_TEST_T, SHIFT_DBL_U64_T, g_aShiftDblU64)
2480
2481#ifdef TSTIEMAIMPL_WITH_GENERATOR
2482static RTEXITCODE ShiftDblGenerate(uint32_t cTests, const char * const * papszNameFmts)
2483{
2484 RTEXITCODE rcExit = ShiftDblU16Generate(cTests, papszNameFmts);
2485 if (rcExit == RTEXITCODE_SUCCESS)
2486 rcExit = ShiftDblU32Generate(cTests, papszNameFmts);
2487 if (rcExit == RTEXITCODE_SUCCESS)
2488 rcExit = ShiftDblU64Generate(cTests, papszNameFmts);
2489 return rcExit;
2490}
2491
2492static RTEXITCODE ShiftDblDumpAll(const char * const * papszNameFmts)
2493{
2494 RTEXITCODE rcExit = ShiftDblU16DumpAll(papszNameFmts);
2495 if (rcExit == RTEXITCODE_SUCCESS)
2496 rcExit = ShiftDblU32DumpAll(papszNameFmts);
2497 if (rcExit == RTEXITCODE_SUCCESS)
2498 rcExit = ShiftDblU64DumpAll(papszNameFmts);
2499 return rcExit;
2500}
2501#endif
2502
2503static void ShiftDblTest(void)
2504{
2505 ShiftDblU16Test();
2506 ShiftDblU32Test();
2507 ShiftDblU64Test();
2508}
2509
2510
2511/*
2512 * Unary operators.
2513 *
2514 * Note! We use BINUxx_TEST_T ignoreing uSrcIn and uMisc.
2515 */
2516#ifdef TSTIEMAIMPL_WITH_GENERATOR
2517# define GEN_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType) \
2518static RTEXITCODE UnaryU ## a_cBits ## Generate(uint32_t cTests, const char * const * papszNameFmts) \
2519{ \
2520 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aUnaryU ## a_cBits); iFn++) \
2521 { \
2522 IEMBINARYOUTPUT BinOut; \
2523 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aUnaryU ## a_cBits[iFn]), RTEXITCODE_FAILURE); \
2524 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2525 { \
2526 a_TestType Test; \
2527 Test.fEflIn = RandEFlags(); \
2528 Test.fEflOut = Test.fEflIn; \
2529 Test.uDstIn = RandU ## a_cBits(); \
2530 Test.uDstOut = Test.uDstIn; \
2531 Test.uSrcIn = 0; \
2532 Test.uMisc = 0; \
2533 g_aUnaryU ## a_cBits[iFn].pfn(&Test.uDstOut, &Test.fEflOut); \
2534 GenerateBinaryWrite(&BinOut, &Test, sizeof(Test)); \
2535 } \
2536 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE); \
2537 } \
2538 return RTEXITCODE_SUCCESS; \
2539} \
2540DUMP_ALL_FN(UnaryU ## a_cBits, g_aUnaryU ## a_cBits)
2541#else
2542# define GEN_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType)
2543#endif
2544
2545#define TEST_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType) \
2546TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLUNARYU ## a_cBits); \
2547static a_SubTestType g_aUnaryU ## a_cBits [] = \
2548{ \
2549 ENTRY_BIN(inc_u ## a_cBits), \
2550 ENTRY_BIN(inc_u ## a_cBits ## _locked), \
2551 ENTRY_BIN(dec_u ## a_cBits), \
2552 ENTRY_BIN(dec_u ## a_cBits ## _locked), \
2553 ENTRY_BIN(not_u ## a_cBits), \
2554 ENTRY_BIN(not_u ## a_cBits ## _locked), \
2555 ENTRY_BIN(neg_u ## a_cBits), \
2556 ENTRY_BIN(neg_u ## a_cBits ## _locked), \
2557}; \
2558\
2559GEN_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType) \
2560\
2561static void UnaryU ## a_cBits ## Test(void) \
2562{ \
2563 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aUnaryU ## a_cBits); iFn++) \
2564 { \
2565 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aUnaryU ## a_cBits[iFn])) \
2566 continue; \
2567 a_TestType const * const paTests = g_aUnaryU ## a_cBits[iFn].paTests; \
2568 uint32_t const cTests = g_aUnaryU ## a_cBits[iFn].cTests; \
2569 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2570 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2571 { \
2572 uint32_t fEfl = paTests[iTest].fEflIn; \
2573 a_Type uDst = paTests[iTest].uDstIn; \
2574 g_aUnaryU ## a_cBits[iFn].pfn(&uDst, &fEfl); \
2575 if ( uDst != paTests[iTest].uDstOut \
2576 || fEfl != paTests[iTest].fEflOut) \
2577 RTTestFailed(g_hTest, "#%u: efl=%#08x dst=" a_Fmt " -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s\n", \
2578 iTest, paTests[iTest].fEflIn, paTests[iTest].uDstIn, \
2579 fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
2580 EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
2581 else \
2582 { \
2583 *g_pu ## a_cBits = paTests[iTest].uDstIn; \
2584 *g_pfEfl = paTests[iTest].fEflIn; \
2585 g_aUnaryU ## a_cBits[iFn].pfn(g_pu ## a_cBits, g_pfEfl); \
2586 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
2587 RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
2588 } \
2589 } \
2590 FREE_DECOMPRESSED_TESTS(g_aUnaryU ## a_cBits[iFn]); \
2591 } \
2592}
2593TEST_UNARY(8, uint8_t, "%#04RX8", BINU8_TEST_T, INT_UNARY_U8_T)
2594TEST_UNARY(16, uint16_t, "%#06RX16", BINU16_TEST_T, INT_UNARY_U16_T)
2595TEST_UNARY(32, uint32_t, "%#010RX32", BINU32_TEST_T, INT_UNARY_U32_T)
2596TEST_UNARY(64, uint64_t, "%#018RX64", BINU64_TEST_T, INT_UNARY_U64_T)
2597
2598#ifdef TSTIEMAIMPL_WITH_GENERATOR
2599static RTEXITCODE UnaryGenerate(uint32_t cTests, const char * const * papszNameFmts)
2600{
2601 RTEXITCODE rcExit = UnaryU8Generate(cTests, papszNameFmts);
2602 if (rcExit == RTEXITCODE_SUCCESS)
2603 rcExit = UnaryU16Generate(cTests, papszNameFmts);
2604 if (rcExit == RTEXITCODE_SUCCESS)
2605 rcExit = UnaryU32Generate(cTests, papszNameFmts);
2606 if (rcExit == RTEXITCODE_SUCCESS)
2607 rcExit = UnaryU64Generate(cTests, papszNameFmts);
2608 return rcExit;
2609}
2610
2611static RTEXITCODE UnaryDumpAll(const char * const * papszNameFmts)
2612{
2613 RTEXITCODE rcExit = UnaryU8DumpAll(papszNameFmts);
2614 if (rcExit == RTEXITCODE_SUCCESS)
2615 rcExit = UnaryU16DumpAll(papszNameFmts);
2616 if (rcExit == RTEXITCODE_SUCCESS)
2617 rcExit = UnaryU32DumpAll(papszNameFmts);
2618 if (rcExit == RTEXITCODE_SUCCESS)
2619 rcExit = UnaryU64DumpAll(papszNameFmts);
2620 return rcExit;
2621}
2622#endif
2623
2624static void UnaryTest(void)
2625{
2626 UnaryU8Test();
2627 UnaryU16Test();
2628 UnaryU32Test();
2629 UnaryU64Test();
2630}
2631
2632
2633/*
2634 * Shifts.
2635 *
2636 * Note! We use BINUxx_TEST_T with the shift count in uMisc and uSrcIn unused.
2637 */
2638#ifdef TSTIEMAIMPL_WITH_GENERATOR
2639# define GEN_SHIFT(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
2640static RTEXITCODE ShiftU ## a_cBits ## Generate(uint32_t cTests, const char * const * papszNameFmts) \
2641{ \
2642 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2643 { \
2644 if ( a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE \
2645 && a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
2646 continue; \
2647 IEMBINARYOUTPUT BinOut; \
2648 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, a_aSubTests[iFn]), RTEXITCODE_FAILURE); \
2649 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2650 { \
2651 a_TestType Test; \
2652 Test.fEflIn = RandEFlags(); \
2653 Test.fEflOut = Test.fEflIn; \
2654 Test.uDstIn = RandU ## a_cBits ## Dst(iTest); \
2655 Test.uDstOut = Test.uDstIn; \
2656 Test.uSrcIn = 0; \
2657 Test.uMisc = RandU8() & (a_cBits * 4 - 1); /* need to go way beyond the a_cBits limit */ \
2658 a_aSubTests[iFn].pfnNative(&Test.uDstOut, Test.uMisc, &Test.fEflOut); \
2659 GenerateBinaryWrite(&BinOut, &Test, sizeof(Test)); \
2660 \
2661 Test.fEflIn = (~Test.fEflIn & X86_EFL_LIVE_MASK) | X86_EFL_RA1_MASK; \
2662 Test.fEflOut = Test.fEflIn; \
2663 Test.uDstOut = Test.uDstIn; \
2664 a_aSubTests[iFn].pfnNative(&Test.uDstOut, Test.uMisc, &Test.fEflOut); \
2665 GenerateBinaryWrite(&BinOut, &Test, sizeof(Test)); \
2666 } \
2667 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE); \
2668 } \
2669 return RTEXITCODE_SUCCESS; \
2670} \
2671DUMP_ALL_FN(ShiftU ## a_cBits, a_aSubTests)
2672#else
2673# define GEN_SHIFT(a_cBits, a_Fmt, a_TestType, a_aSubTests)
2674#endif
2675
2676#define TEST_SHIFT(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType, a_aSubTests) \
2677TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLSHIFTU ## a_cBits); \
2678static a_SubTestType a_aSubTests[] = \
2679{ \
2680 ENTRY_BIN_AMD( rol_u ## a_cBits, X86_EFL_OF), \
2681 ENTRY_BIN_INTEL(rol_u ## a_cBits, X86_EFL_OF), \
2682 ENTRY_BIN_AMD( ror_u ## a_cBits, X86_EFL_OF), \
2683 ENTRY_BIN_INTEL(ror_u ## a_cBits, X86_EFL_OF), \
2684 ENTRY_BIN_AMD( rcl_u ## a_cBits, X86_EFL_OF), \
2685 ENTRY_BIN_INTEL(rcl_u ## a_cBits, X86_EFL_OF), \
2686 ENTRY_BIN_AMD( rcr_u ## a_cBits, X86_EFL_OF), \
2687 ENTRY_BIN_INTEL(rcr_u ## a_cBits, X86_EFL_OF), \
2688 ENTRY_BIN_AMD( shl_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
2689 ENTRY_BIN_INTEL(shl_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
2690 ENTRY_BIN_AMD( shr_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
2691 ENTRY_BIN_INTEL(shr_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
2692 ENTRY_BIN_AMD( sar_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
2693 ENTRY_BIN_INTEL(sar_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
2694}; \
2695\
2696GEN_SHIFT(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
2697\
2698static void ShiftU ## a_cBits ## Test(void) \
2699{ \
2700 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2701 { \
2702 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(a_aSubTests[iFn])) \
2703 continue; \
2704 PFNIEMAIMPLSHIFTU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2705 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2706 uint32_t const cTests = a_aSubTests[iFn].cTests; \
2707 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2708 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2709 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2710 { \
2711 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2712 { \
2713 uint32_t fEfl = paTests[iTest].fEflIn; \
2714 a_Type uDst = paTests[iTest].uDstIn; \
2715 pfn(&uDst, paTests[iTest].uMisc, &fEfl); \
2716 if ( uDst != paTests[iTest].uDstOut \
2717 || fEfl != paTests[iTest].fEflOut ) \
2718 RTTestFailed(g_hTest, "#%u%s: efl=%#08x dst=" a_Fmt " shift=%2u -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s\n", \
2719 iTest, iVar == 0 ? "" : "/n", \
2720 paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uMisc, \
2721 fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
2722 EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
2723 else \
2724 { \
2725 *g_pu ## a_cBits = paTests[iTest].uDstIn; \
2726 *g_pfEfl = paTests[iTest].fEflIn; \
2727 pfn(g_pu ## a_cBits, paTests[iTest].uMisc, g_pfEfl); \
2728 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
2729 RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
2730 } \
2731 } \
2732 pfn = a_aSubTests[iFn].pfnNative; \
2733 } \
2734 FREE_DECOMPRESSED_TESTS(a_aSubTests[iFn]); \
2735 } \
2736}
2737TEST_SHIFT(8, uint8_t, "%#04RX8", BINU8_TEST_T, INT_BINARY_U8_T, g_aShiftU8)
2738TEST_SHIFT(16, uint16_t, "%#06RX16", BINU16_TEST_T, INT_BINARY_U16_T, g_aShiftU16)
2739TEST_SHIFT(32, uint32_t, "%#010RX32", BINU32_TEST_T, INT_BINARY_U32_T, g_aShiftU32)
2740TEST_SHIFT(64, uint64_t, "%#018RX64", BINU64_TEST_T, INT_BINARY_U64_T, g_aShiftU64)
2741
2742#ifdef TSTIEMAIMPL_WITH_GENERATOR
2743static RTEXITCODE ShiftGenerate(uint32_t cTests, const char * const * papszNameFmts)
2744{
2745 RTEXITCODE rcExit = ShiftU8Generate(cTests, papszNameFmts);
2746 if (rcExit == RTEXITCODE_SUCCESS)
2747 rcExit = ShiftU16Generate(cTests, papszNameFmts);
2748 if (rcExit == RTEXITCODE_SUCCESS)
2749 rcExit = ShiftU32Generate(cTests, papszNameFmts);
2750 if (rcExit == RTEXITCODE_SUCCESS)
2751 rcExit = ShiftU64Generate(cTests, papszNameFmts);
2752 return rcExit;
2753}
2754
2755static RTEXITCODE ShiftDumpAll(const char * const * papszNameFmts)
2756{
2757 RTEXITCODE rcExit = ShiftU8DumpAll(papszNameFmts);
2758 if (rcExit == RTEXITCODE_SUCCESS)
2759 rcExit = ShiftU16DumpAll(papszNameFmts);
2760 if (rcExit == RTEXITCODE_SUCCESS)
2761 rcExit = ShiftU32DumpAll(papszNameFmts);
2762 if (rcExit == RTEXITCODE_SUCCESS)
2763 rcExit = ShiftU64DumpAll(papszNameFmts);
2764 return rcExit;
2765}
2766#endif
2767
2768static void ShiftTest(void)
2769{
2770 ShiftU8Test();
2771 ShiftU16Test();
2772 ShiftU32Test();
2773 ShiftU64Test();
2774}
2775
2776
2777/*
2778 * Multiplication and division.
2779 *
2780 * Note! The 8-bit functions has a different format, so we need to duplicate things.
2781 * Note! Currently ignoring undefined bits.
2782 */
2783
2784/* U8 */
2785#ifdef TSTIEMAIMPL_WITH_GENERATOR
2786static const MULDIVU8_TEST_T g_aFixedTests_idiv_u8[] =
2787{
2788 /* efl in, efl out, uDstIn, uDstOut, uSrcIn, rc (0 or -1 for actual; -128 for auto) */
2789 { UINT32_MAX, 0, 0x8000, 0, 0xc7, -1 }, /* -32768 / -57 = #DE (574.8771929824...) */
2790 { UINT32_MAX, 0, 0x8000, 0, 0xdd, -128 }, /* -32768 / -35 = #DE (936.2285714285...) */
2791 { UINT32_MAX, 0, 0x7f00, 0, 0x7f, -1 }, /* 0x7f00 / 0x7f = #DE (0x100) */
2792 { UINT32_MAX, 0, 0x3f80, 0, 0x7f, -1 }, /* 0x3F80 / 0x7f = #DE (0x80) */
2793 { UINT32_MAX, 0, 0x3f7f, 0, 0x7f, 0 }, /* 0x3F7F / 0x7f = 127.992125984... */
2794 { UINT32_MAX, 0, 0xc000, 0, 0x80, -1 }, /* -16384 / -128 = #DE (0x80) */
2795 { UINT32_MAX, 0, 0xc001, 0, 0x80, 0 }, /* -16383 / -128 = 127.9921875 */
2796};
2797#endif
2798TYPEDEF_SUBTEST_TYPE(INT_MULDIV_U8_T, MULDIVU8_TEST_T, PFNIEMAIMPLMULDIVU8);
2799static INT_MULDIV_U8_T g_aMulDivU8[] =
2800{
2801 ENTRY_BIN_AMD_EX(mul_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF,
2802 X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF),
2803 ENTRY_BIN_INTEL_EX(mul_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0),
2804 ENTRY_BIN_AMD_EX(imul_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF,
2805 X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF),
2806 ENTRY_BIN_INTEL_EX(imul_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0),
2807 ENTRY_BIN_AMD_EX(div_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0),
2808 ENTRY_BIN_INTEL_EX(div_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0),
2809 ENTRY_BIN_FIX_AMD_EX(idiv_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0),
2810 ENTRY_BIN_FIX_INTEL_EX(idiv_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0),
2811};
2812
2813#ifdef TSTIEMAIMPL_WITH_GENERATOR
2814DUMP_ALL_FN(MulDivU8, g_aMulDivU8)
2815static RTEXITCODE MulDivU8Generate(uint32_t cTests, const char * const * papszNameFmts)
2816{
2817 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aMulDivU8); iFn++)
2818 {
2819 if ( g_aMulDivU8[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE
2820 && g_aMulDivU8[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
2821 continue;
2822 IEMBINARYOUTPUT BinOut; \
2823 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aMulDivU8[iFn]), RTEXITCODE_FAILURE); \
2824 for (uint32_t iTest = 0; iTest < cTests; iTest++ )
2825 {
2826 MULDIVU8_TEST_T Test;
2827 Test.fEflIn = RandEFlags();
2828 Test.fEflOut = Test.fEflIn;
2829 Test.uDstIn = RandU16Dst(iTest);
2830 Test.uDstOut = Test.uDstIn;
2831 Test.uSrcIn = RandU8Src(iTest);
2832 Test.rc = g_aMulDivU8[iFn].pfnNative(&Test.uDstOut, Test.uSrcIn, &Test.fEflOut);
2833 GenerateBinaryWrite(&BinOut, &Test, sizeof(Test));
2834 }
2835 for (uint32_t iTest = 0; iTest < g_aMulDivU8[iFn].cFixedTests; iTest++)
2836 {
2837 MULDIVU8_TEST_T Test;
2838 Test.fEflIn = g_aMulDivU8[iFn].paFixedTests[iTest].fEflIn == UINT32_MAX ? RandEFlags()
2839 : g_aMulDivU8[iFn].paFixedTests[iTest].fEflIn;
2840 Test.fEflOut = Test.fEflIn;
2841 Test.uDstIn = g_aMulDivU8[iFn].paFixedTests[iTest].uDstIn;
2842 Test.uDstOut = Test.uDstIn;
2843 Test.uSrcIn = g_aMulDivU8[iFn].paFixedTests[iTest].uSrcIn;
2844 Test.rc = g_aMulDivU8[iFn].pfnNative(&Test.uDstOut, Test.uSrcIn, &Test.fEflOut);
2845 if (g_aMulDivU8[iFn].paFixedTests[iTest].rc == 0 || g_aMulDivU8[iFn].paFixedTests[iTest].rc == -1)
2846 Test.rc = g_aMulDivU8[iFn].paFixedTests[iTest].rc;
2847 GenerateBinaryWrite(&BinOut, &Test, sizeof(Test));
2848 }
2849 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
2850 }
2851 return RTEXITCODE_SUCCESS;
2852}
2853#endif
2854
2855static void MulDivU8Test(void)
2856{
2857 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aMulDivU8); iFn++)
2858 {
2859 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aMulDivU8[iFn])) \
2860 continue; \
2861 MULDIVU8_TEST_T const * const paTests = g_aMulDivU8[iFn].paTests;
2862 uint32_t const cTests = g_aMulDivU8[iFn].cTests;
2863 uint32_t const fEflIgn = g_aMulDivU8[iFn].uExtra;
2864 PFNIEMAIMPLMULDIVU8 pfn = g_aMulDivU8[iFn].pfn;
2865 uint32_t const cVars = COUNT_VARIATIONS(g_aMulDivU8[iFn]); \
2866 if (!cTests) RTTestSkipped(g_hTest, "no tests");
2867 for (uint32_t iVar = 0; iVar < cVars; iVar++)
2868 {
2869 for (uint32_t iTest = 0; iTest < cTests; iTest++ )
2870 {
2871 uint32_t fEfl = paTests[iTest].fEflIn;
2872 uint16_t uDst = paTests[iTest].uDstIn;
2873 int rc = g_aMulDivU8[iFn].pfn(&uDst, paTests[iTest].uSrcIn, &fEfl);
2874 if ( uDst != paTests[iTest].uDstOut
2875 || (fEfl | fEflIgn) != (paTests[iTest].fEflOut | fEflIgn)
2876 || rc != paTests[iTest].rc)
2877 RTTestFailed(g_hTest, "#%02u%s: efl=%#08x dst=%#06RX16 src=%#04RX8\n"
2878 " %s-> efl=%#08x dst=%#06RX16 rc=%d\n"
2879 "%sexpected %#08x %#06RX16 %d%s\n",
2880 iTest, iVar ? "/n" : "", paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uSrcIn,
2881 iVar ? " " : "", fEfl, uDst, rc,
2882 iVar ? " " : "", paTests[iTest].fEflOut, paTests[iTest].uDstOut, paTests[iTest].rc,
2883 EFlagsDiff(fEfl | fEflIgn, paTests[iTest].fEflOut | fEflIgn));
2884 else
2885 {
2886 *g_pu16 = paTests[iTest].uDstIn;
2887 *g_pfEfl = paTests[iTest].fEflIn;
2888 rc = g_aMulDivU8[iFn].pfn(g_pu16, paTests[iTest].uSrcIn, g_pfEfl);
2889 RTTEST_CHECK(g_hTest, *g_pu16 == paTests[iTest].uDstOut);
2890 RTTEST_CHECK(g_hTest, (*g_pfEfl | fEflIgn) == (paTests[iTest].fEflOut | fEflIgn));
2891 RTTEST_CHECK(g_hTest, rc == paTests[iTest].rc);
2892 }
2893 }
2894 pfn = g_aMulDivU8[iFn].pfnNative;
2895 }
2896 FREE_DECOMPRESSED_TESTS(g_aMulDivU8[iFn]); \
2897 }
2898}
2899
2900#ifdef TSTIEMAIMPL_WITH_GENERATOR
2901static const MULDIVU16_TEST_T g_aFixedTests_idiv_u16[] =
2902{
2903 /* low high */
2904 /* --- eflags ---, -- uDst1 --, -- uDst2 --, */
2905 /* in, out, in , out, in , out, uSrcIn, rc (0 or -1 for actual; -128 for auto) */
2906 { UINT32_MAX, 0, 0x0000, 0, 0x8000, 0, 0xc004, -1 }, /* -2147483648 /-16380 = #DE (131104.00781...) */
2907 { UINT32_MAX, 0, 0xffff, 0, 0x7fff, 0, 0x7fff, -1 }, /* 2147483647 / 32767 = #DE (65538.000030...) */
2908 { UINT32_MAX, 0, 0x8000, 0, 0x3fff, 0, 0x7fff, -1 }, /* 0x3fff8000 / 0x7fff = #DE (0x8000) */
2909 { UINT32_MAX, 0, 0x7fff, 0, 0x3fff, 0, 0x7fff, 0 }, /* 0x3fff7fff / 0x7fff = 32767.99996948... */
2910 { UINT32_MAX, 0, 0x0000, 0, 0xc000, 0, 0x8000, -1 }, /* -1073741824 / -32768 = #DE (0x8000) */
2911 { UINT32_MAX, 0, 0x0001, 0, 0xc000, 0, 0x8000, 0 }, /* -1073741823 / -32768 = 32767.999969482421875 */
2912};
2913
2914static const MULDIVU32_TEST_T g_aFixedTests_idiv_u32[] =
2915{
2916 /* low high */
2917 /* --- eflags ---, ---- uDst1 ----, ---- uDst2 ----, */
2918 /* in, out, in , out, in , out, uSrcIn, rc (0 or -1 for actual; -128 for auto) */
2919 { UINT32_MAX, 0, 0x00000000, 0, 0x80000000, 0, 0xc0000004, -1 },
2920 { UINT32_MAX, 0, 0xffffffff, 0, 0x7fffffff, 0, 0x7fffffff, -1 },
2921 { UINT32_MAX, 0, 0x80000000, 0, 0x3fffffff, 0, 0x7fffffff, -1 },
2922 { UINT32_MAX, 0, 0x7fffffff, 0, 0x3fffffff, 0, 0x7fffffff, 0 },
2923 { UINT32_MAX, 0, 0x00000000, 0, 0xc0000000, 0, 0x80000000, -1 },
2924 { UINT32_MAX, 0, 0x00000001, 0, 0xc0000000, 0, 0x80000000, 0 },
2925};
2926
2927static const MULDIVU64_TEST_T g_aFixedTests_idiv_u64[] =
2928{
2929 /* low high */
2930 /* --- eflags ---, -------- uDst1 --------, -------- uDst2 --------, */
2931 /* in, out, in , out, in , out, uSrcIn, rc (0 or -1 for actual; -128 for auto) */
2932 { UINT32_MAX, 0, 0x0000000000000000, 0, 0x8000000000000000, 0, 0xc000000000000004, -1 },
2933 { UINT32_MAX, 0, 0xffffffffffffffff, 0, 0x7fffffffffffffff, 0, 0x7fffffffffffffff, -1 },
2934 { UINT32_MAX, 0, 0x8000000000000000, 0, 0x3fffffffffffffff, 0, 0x7fffffffffffffff, -1 },
2935 { UINT32_MAX, 0, 0x7fffffffffffffff, 0, 0x3fffffffffffffff, 0, 0x7fffffffffffffff, 0 },
2936 { UINT32_MAX, 0, 0x0000000000000000, 0, 0xc000000000000000, 0, 0x8000000000000000, -1 },
2937 { UINT32_MAX, 0, 0x0000000000000001, 0, 0xc000000000000000, 0, 0x8000000000000000, 0 },
2938};
2939
2940# define GEN_MULDIV(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
2941DUMP_ALL_FN(MulDivU ## a_cBits, a_aSubTests) \
2942static RTEXITCODE MulDivU ## a_cBits ## Generate(uint32_t cTests, const char * const * papszNameFmts) \
2943{ \
2944 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2945 { \
2946 if ( a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE \
2947 && a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
2948 continue; \
2949 IEMBINARYOUTPUT BinOut; \
2950 a_TestType Test; \
2951 RT_ZERO(Test); /* 64-bit variant contains alignment padding */ \
2952 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, a_aSubTests[iFn]), RTEXITCODE_FAILURE); \
2953 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2954 { \
2955 Test.fEflIn = RandEFlags(); \
2956 Test.fEflOut = Test.fEflIn; \
2957 Test.uDst1In = RandU ## a_cBits ## Dst(iTest); \
2958 Test.uDst1Out = Test.uDst1In; \
2959 Test.uDst2In = RandU ## a_cBits ## Dst(iTest); \
2960 Test.uDst2Out = Test.uDst2In; \
2961 Test.uSrcIn = RandU ## a_cBits ## Src(iTest); \
2962 Test.rc = a_aSubTests[iFn].pfnNative(&Test.uDst1Out, &Test.uDst2Out, Test.uSrcIn, &Test.fEflOut); \
2963 GenerateBinaryWrite(&BinOut, &Test, sizeof(Test)); \
2964 } \
2965 for (uint32_t iTest = 0; iTest < a_aSubTests[iFn].cFixedTests; iTest++ ) \
2966 { \
2967 Test.fEflIn = a_aSubTests[iFn].paFixedTests[iTest].fEflIn == UINT32_MAX ? RandEFlags() \
2968 : a_aSubTests[iFn].paFixedTests[iTest].fEflIn; \
2969 Test.fEflOut = Test.fEflIn; \
2970 Test.uDst1In = a_aSubTests[iFn].paFixedTests[iTest].uDst1In; \
2971 Test.uDst1Out = Test.uDst1In; \
2972 Test.uDst2In = a_aSubTests[iFn].paFixedTests[iTest].uDst2In; \
2973 Test.uDst2Out = Test.uDst2In; \
2974 Test.uSrcIn = a_aSubTests[iFn].paFixedTests[iTest].uSrcIn; \
2975 Test.rc = a_aSubTests[iFn].pfnNative(&Test.uDst1Out, &Test.uDst2Out, Test.uSrcIn, &Test.fEflOut); \
2976 if (a_aSubTests[iFn].paFixedTests[iTest].rc == 0 || a_aSubTests[iFn].paFixedTests[iTest].rc == -1) \
2977 Test.rc = a_aSubTests[iFn].paFixedTests[iTest].rc; \
2978 GenerateBinaryWrite(&BinOut, &Test, sizeof(Test)); \
2979 } \
2980 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE); \
2981 } \
2982 return RTEXITCODE_SUCCESS; \
2983}
2984#else
2985# define GEN_MULDIV(a_cBits, a_Fmt, a_TestType, a_aSubTests)
2986#endif
2987
2988#define TEST_MULDIV(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType, a_aSubTests) \
2989TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLMULDIVU ## a_cBits); \
2990static a_SubTestType a_aSubTests [] = \
2991{ \
2992 ENTRY_BIN_AMD_EX(mul_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0), \
2993 ENTRY_BIN_INTEL_EX(mul_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0), \
2994 ENTRY_BIN_AMD_EX(imul_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0), \
2995 ENTRY_BIN_INTEL_EX(imul_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0), \
2996 ENTRY_BIN_AMD_EX(div_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0), \
2997 ENTRY_BIN_INTEL_EX(div_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0), \
2998 ENTRY_BIN_FIX_AMD_EX(idiv_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0), \
2999 ENTRY_BIN_FIX_INTEL_EX(idiv_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0), \
3000}; \
3001\
3002GEN_MULDIV(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
3003\
3004static void MulDivU ## a_cBits ## Test(void) \
3005{ \
3006 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3007 { \
3008 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(a_aSubTests[iFn])) \
3009 continue; \
3010 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
3011 uint32_t const cTests = a_aSubTests[iFn].cTests; \
3012 uint32_t const fEflIgn = a_aSubTests[iFn].uExtra; \
3013 PFNIEMAIMPLMULDIVU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
3014 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
3015 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
3016 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
3017 { \
3018 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
3019 { \
3020 uint32_t fEfl = paTests[iTest].fEflIn; \
3021 a_Type uDst1 = paTests[iTest].uDst1In; \
3022 a_Type uDst2 = paTests[iTest].uDst2In; \
3023 int rc = pfn(&uDst1, &uDst2, paTests[iTest].uSrcIn, &fEfl); \
3024 if ( uDst1 != paTests[iTest].uDst1Out \
3025 || uDst2 != paTests[iTest].uDst2Out \
3026 || (fEfl | fEflIgn) != (paTests[iTest].fEflOut | fEflIgn)\
3027 || rc != paTests[iTest].rc) \
3028 { __debugbreak(); \
3029 RTTestFailed(g_hTest, "#%04u%s: efl=%#010x dst1=" a_Fmt " dst2=" a_Fmt " src=" a_Fmt "\n" \
3030 " -> efl=%#010x dst1=" a_Fmt " dst2=" a_Fmt " rc=%d\n" \
3031 " expected %#010x " a_Fmt " " a_Fmt " %d%s -%s%s%s\n", \
3032 iTest, iVar == 0 ? " " : "/n", \
3033 paTests[iTest].fEflIn, paTests[iTest].uDst1In, paTests[iTest].uDst2In, paTests[iTest].uSrcIn, \
3034 fEfl, uDst1, uDst2, rc, \
3035 paTests[iTest].fEflOut, paTests[iTest].uDst1Out, paTests[iTest].uDst2Out, paTests[iTest].rc, \
3036 EFlagsDiff(fEfl | fEflIgn, paTests[iTest].fEflOut | fEflIgn), \
3037 uDst1 != paTests[iTest].uDst1Out ? " dst1" : "", uDst2 != paTests[iTest].uDst2Out ? " dst2" : "", \
3038 (fEfl | fEflIgn) != (paTests[iTest].fEflOut | fEflIgn) ? " eflags" : ""); \
3039 } \
3040 else \
3041 { \
3042 *g_pu ## a_cBits = paTests[iTest].uDst1In; \
3043 *g_pu ## a_cBits ## Two = paTests[iTest].uDst2In; \
3044 *g_pfEfl = paTests[iTest].fEflIn; \
3045 rc = pfn(g_pu ## a_cBits, g_pu ## a_cBits ## Two, paTests[iTest].uSrcIn, g_pfEfl); \
3046 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDst1Out); \
3047 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits ## Two == paTests[iTest].uDst2Out); \
3048 RTTEST_CHECK(g_hTest, (*g_pfEfl | fEflIgn) == (paTests[iTest].fEflOut | fEflIgn)); \
3049 RTTEST_CHECK(g_hTest, rc == paTests[iTest].rc); \
3050 } \
3051 } \
3052 pfn = a_aSubTests[iFn].pfnNative; \
3053 } \
3054 FREE_DECOMPRESSED_TESTS(a_aSubTests[iFn]); \
3055 } \
3056} //1068553096 = 0x3FB0D388 (1068553096)
3057TEST_MULDIV(16, uint16_t, "%#06RX16", MULDIVU16_TEST_T, INT_MULDIV_U16_T, g_aMulDivU16)
3058TEST_MULDIV(32, uint32_t, "%#010RX32", MULDIVU32_TEST_T, INT_MULDIV_U32_T, g_aMulDivU32)
3059TEST_MULDIV(64, uint64_t, "%#018RX64", MULDIVU64_TEST_T, INT_MULDIV_U64_T, g_aMulDivU64)
3060
3061#ifdef TSTIEMAIMPL_WITH_GENERATOR
3062static RTEXITCODE MulDivGenerate(uint32_t cTests, const char * const * papszNameFmts)
3063{
3064 RTEXITCODE rcExit = MulDivU8Generate(cTests, papszNameFmts);
3065 if (rcExit == RTEXITCODE_SUCCESS)
3066 rcExit = MulDivU16Generate(cTests, papszNameFmts);
3067 if (rcExit == RTEXITCODE_SUCCESS)
3068 rcExit = MulDivU32Generate(cTests, papszNameFmts);
3069 if (rcExit == RTEXITCODE_SUCCESS)
3070 rcExit = MulDivU64Generate(cTests, papszNameFmts);
3071 return rcExit;
3072}
3073
3074static RTEXITCODE MulDivDumpAll(const char * const * papszNameFmts)
3075{
3076 RTEXITCODE rcExit = MulDivU8DumpAll(papszNameFmts);
3077 if (rcExit == RTEXITCODE_SUCCESS)
3078 rcExit = MulDivU16DumpAll(papszNameFmts);
3079 if (rcExit == RTEXITCODE_SUCCESS)
3080 rcExit = MulDivU32DumpAll(papszNameFmts);
3081 if (rcExit == RTEXITCODE_SUCCESS)
3082 rcExit = MulDivU64DumpAll(papszNameFmts);
3083 return rcExit;
3084}
3085#endif
3086
3087static void MulDivTest(void)
3088{
3089 MulDivU8Test();
3090 MulDivU16Test();
3091 MulDivU32Test();
3092 MulDivU64Test();
3093}
3094
3095
3096/*
3097 * BSWAP
3098 */
3099static void BswapTest(void)
3100{
3101 if (SubTestAndCheckIfEnabled("bswap_u16"))
3102 {
3103 *g_pu32 = UINT32_C(0x12345678);
3104 iemAImpl_bswap_u16(g_pu32);
3105#if 0
3106 RTTEST_CHECK_MSG(g_hTest, *g_pu32 == UINT32_C(0x12347856), (g_hTest, "*g_pu32=%#RX32\n", *g_pu32));
3107#else
3108 RTTEST_CHECK_MSG(g_hTest, *g_pu32 == UINT32_C(0x12340000), (g_hTest, "*g_pu32=%#RX32\n", *g_pu32));
3109#endif
3110 *g_pu32 = UINT32_C(0xffff1122);
3111 iemAImpl_bswap_u16(g_pu32);
3112#if 0
3113 RTTEST_CHECK_MSG(g_hTest, *g_pu32 == UINT32_C(0xffff2211), (g_hTest, "*g_pu32=%#RX32\n", *g_pu32));
3114#else
3115 RTTEST_CHECK_MSG(g_hTest, *g_pu32 == UINT32_C(0xffff0000), (g_hTest, "*g_pu32=%#RX32\n", *g_pu32));
3116#endif
3117 }
3118
3119 if (SubTestAndCheckIfEnabled("bswap_u32"))
3120 {
3121 *g_pu32 = UINT32_C(0x12345678);
3122 iemAImpl_bswap_u32(g_pu32);
3123 RTTEST_CHECK(g_hTest, *g_pu32 == UINT32_C(0x78563412));
3124 }
3125
3126 if (SubTestAndCheckIfEnabled("bswap_u64"))
3127 {
3128 *g_pu64 = UINT64_C(0x0123456789abcdef);
3129 iemAImpl_bswap_u64(g_pu64);
3130 RTTEST_CHECK(g_hTest, *g_pu64 == UINT64_C(0xefcdab8967452301));
3131 }
3132}
3133
3134
3135
3136/*********************************************************************************************************************************
3137* Floating point (x87 style) *
3138*********************************************************************************************************************************/
3139
3140/*
3141 * FPU constant loading.
3142 */
3143TYPEDEF_SUBTEST_TYPE(FPU_LD_CONST_T, FPU_LD_CONST_TEST_T, PFNIEMAIMPLFPUR80LDCONST);
3144
3145static FPU_LD_CONST_T g_aFpuLdConst[] =
3146{
3147 ENTRY_BIN(fld1),
3148 ENTRY_BIN(fldl2t),
3149 ENTRY_BIN(fldl2e),
3150 ENTRY_BIN(fldpi),
3151 ENTRY_BIN(fldlg2),
3152 ENTRY_BIN(fldln2),
3153 ENTRY_BIN(fldz),
3154};
3155
3156#ifdef TSTIEMAIMPL_WITH_GENERATOR
3157static RTEXITCODE FpuLdConstGenerate(uint32_t cTests, const char * const *papszNameFmts)
3158{
3159 X86FXSTATE State;
3160 RT_ZERO(State);
3161 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdConst); iFn++)
3162 {
3163 IEMBINARYOUTPUT BinOut;
3164 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aFpuLdConst[iFn]), RTEXITCODE_FAILURE);
3165 for (uint32_t iTest = 0; iTest < cTests; iTest += 4)
3166 {
3167 State.FCW = RandFcw();
3168 State.FSW = RandFsw();
3169
3170 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
3171 {
3172 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3173 State.FCW = (State.FCW & ~X86_FCW_RC_MASK) | (iRounding << X86_FCW_RC_SHIFT);
3174 g_aFpuLdConst[iFn].pfn(&State, &Res);
3175 FPU_LD_CONST_TEST_T const Test = { State.FCW, State.FSW, Res.FSW, Res.r80Result };
3176 GenerateBinaryWrite(&BinOut, &Test, sizeof(Test));
3177 }
3178 }
3179 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
3180 }
3181 return RTEXITCODE_SUCCESS;
3182}
3183DUMP_ALL_FN(FpuLdConst, g_aFpuLdConst)
3184#endif
3185
3186static void FpuLdConstTest(void)
3187{
3188 /*
3189 * Inputs:
3190 * - FSW: C0, C1, C2, C3
3191 * - FCW: Exception masks, Precision control, Rounding control.
3192 *
3193 * C1 set to 1 on stack overflow, zero otherwise. C0, C2, and C3 are "undefined".
3194 */
3195 X86FXSTATE State;
3196 RT_ZERO(State);
3197 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdConst); iFn++)
3198 {
3199 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aFpuLdConst[iFn]))
3200 continue;
3201
3202 FPU_LD_CONST_TEST_T const *paTests = g_aFpuLdConst[iFn].paTests;
3203 uint32_t const cTests = g_aFpuLdConst[iFn].cTests;
3204 PFNIEMAIMPLFPUR80LDCONST pfn = g_aFpuLdConst[iFn].pfn;
3205 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuLdConst[iFn]); \
3206 if (!cTests) RTTestSkipped(g_hTest, "no tests");
3207 for (uint32_t iVar = 0; iVar < cVars; iVar++)
3208 {
3209 for (uint32_t iTest = 0; iTest < cTests; iTest++)
3210 {
3211 State.FCW = paTests[iTest].fFcw;
3212 State.FSW = paTests[iTest].fFswIn;
3213 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3214 pfn(&State, &Res);
3215 if ( Res.FSW != paTests[iTest].fFswOut
3216 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult))
3217 RTTestFailed(g_hTest, "#%u%s: fcw=%#06x fsw=%#06x -> fsw=%#06x %s, expected %#06x %s%s%s (%s)\n",
3218 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
3219 Res.FSW, FormatR80(&Res.r80Result),
3220 paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult),
3221 FswDiff(Res.FSW, paTests[iTest].fFswOut),
3222 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "",
3223 FormatFcw(paTests[iTest].fFcw) );
3224 }
3225 pfn = g_aFpuLdConst[iFn].pfnNative;
3226 }
3227
3228 FREE_DECOMPRESSED_TESTS(g_aFpuLdConst[iFn]);
3229 }
3230}
3231
3232
3233/*
3234 * Load floating point values from memory.
3235 */
3236#ifdef TSTIEMAIMPL_WITH_GENERATOR
3237# define GEN_FPU_LOAD(a_cBits, a_rdTypeIn, a_aSubTests, a_TestType) \
3238static RTEXITCODE FpuLdR ## a_cBits ## Generate(uint32_t cTests, const char * const *papszNameFmts) \
3239{ \
3240 X86FXSTATE State; \
3241 RT_ZERO(State); \
3242 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3243 { \
3244 IEMBINARYOUTPUT BinOut; \
3245 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, a_aSubTests[iFn]), RTEXITCODE_FAILURE); \
3246 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
3247 { \
3248 State.FCW = RandFcw(); \
3249 State.FSW = RandFsw(); \
3250 a_rdTypeIn InVal = RandR ## a_cBits ## Src(iTest); \
3251 \
3252 for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
3253 { \
3254 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
3255 State.FCW = (State.FCW & ~X86_FCW_RC_MASK) | (iRounding << X86_FCW_RC_SHIFT); \
3256 a_aSubTests[iFn].pfn(&State, &Res, &InVal); \
3257 a_TestType const Test = { State.FCW, State.FSW, Res.FSW, Res.r80Result, InVal }; \
3258 GenerateBinaryWrite(&BinOut, &Test, sizeof(Test)); \
3259 } \
3260 } \
3261 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE); \
3262 } \
3263 return RTEXITCODE_SUCCESS; \
3264} \
3265DUMP_ALL_FN(FpuLdR ## a_cBits, a_aSubTests)
3266#else
3267# define GEN_FPU_LOAD(a_cBits, a_rdTypeIn, a_aSubTests, a_TestType)
3268#endif
3269
3270#define TEST_FPU_LOAD(a_cBits, a_rdTypeIn, a_SubTestType, a_aSubTests, a_TestType) \
3271typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPULDR80FROM ## a_cBits,(PCX86FXSTATE, PIEMFPURESULT, PC ## a_rdTypeIn)); \
3272typedef FNIEMAIMPLFPULDR80FROM ## a_cBits *PFNIEMAIMPLFPULDR80FROM ## a_cBits; \
3273TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPULDR80FROM ## a_cBits); \
3274\
3275static a_SubTestType a_aSubTests[] = \
3276{ \
3277 ENTRY_BIN(RT_CONCAT(fld_r80_from_r,a_cBits)) \
3278}; \
3279GEN_FPU_LOAD(a_cBits, a_rdTypeIn, a_aSubTests, a_TestType) \
3280\
3281static void FpuLdR ## a_cBits ## Test(void) \
3282{ \
3283 X86FXSTATE State; \
3284 RT_ZERO(State); \
3285 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3286 { \
3287 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(a_aSubTests[iFn])) \
3288 continue; \
3289 \
3290 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
3291 uint32_t const cTests = a_aSubTests[iFn].cTests; \
3292 PFNIEMAIMPLFPULDR80FROM ## a_cBits pfn = a_aSubTests[iFn].pfn; \
3293 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
3294 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
3295 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
3296 { \
3297 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
3298 { \
3299 a_rdTypeIn const InVal = paTests[iTest].InVal; \
3300 State.FCW = paTests[iTest].fFcw; \
3301 State.FSW = paTests[iTest].fFswIn; \
3302 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
3303 pfn(&State, &Res, &InVal); \
3304 if ( Res.FSW != paTests[iTest].fFswOut \
3305 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult)) \
3306 RTTestFailed(g_hTest, "#%03u%s: fcw=%#06x fsw=%#06x in=%s\n" \
3307 "%s -> fsw=%#06x %s\n" \
3308 "%s expected %#06x %s%s%s (%s)\n", \
3309 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
3310 FormatR ## a_cBits(&paTests[iTest].InVal), \
3311 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result), \
3312 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult), \
3313 FswDiff(Res.FSW, paTests[iTest].fFswOut), \
3314 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "", \
3315 FormatFcw(paTests[iTest].fFcw) ); \
3316 } \
3317 pfn = a_aSubTests[iFn].pfnNative; \
3318 } \
3319 \
3320 FREE_DECOMPRESSED_TESTS(a_aSubTests[iFn]); \
3321 } \
3322}
3323
3324TEST_FPU_LOAD(80, RTFLOAT80U, FPU_LD_R80_T, g_aFpuLdR80, FPU_R80_IN_TEST_T)
3325TEST_FPU_LOAD(64, RTFLOAT64U, FPU_LD_R64_T, g_aFpuLdR64, FPU_R64_IN_TEST_T)
3326TEST_FPU_LOAD(32, RTFLOAT32U, FPU_LD_R32_T, g_aFpuLdR32, FPU_R32_IN_TEST_T)
3327
3328#ifdef TSTIEMAIMPL_WITH_GENERATOR
3329static RTEXITCODE FpuLdMemGenerate(uint32_t cTests, const char * const *papszNameFmts)
3330{
3331 RTEXITCODE rcExit = FpuLdR80Generate(cTests, papszNameFmts);
3332 if (rcExit == RTEXITCODE_SUCCESS)
3333 rcExit = FpuLdR64Generate(cTests, papszNameFmts);
3334 if (rcExit == RTEXITCODE_SUCCESS)
3335 rcExit = FpuLdR32Generate(cTests, papszNameFmts);
3336 return rcExit;
3337}
3338
3339static RTEXITCODE FpuLdMemDumpAll(const char * const *papszNameFmts)
3340{
3341 RTEXITCODE rcExit = FpuLdR80DumpAll(papszNameFmts);
3342 if (rcExit == RTEXITCODE_SUCCESS)
3343 rcExit = FpuLdR64DumpAll(papszNameFmts);
3344 if (rcExit == RTEXITCODE_SUCCESS)
3345 rcExit = FpuLdR32DumpAll(papszNameFmts);
3346 return rcExit;
3347}
3348#endif
3349
3350static void FpuLdMemTest(void)
3351{
3352 FpuLdR80Test();
3353 FpuLdR64Test();
3354 FpuLdR32Test();
3355}
3356
3357
3358/*
3359 * Load integer values from memory.
3360 */
3361#ifdef TSTIEMAIMPL_WITH_GENERATOR
3362# define GEN_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_aSubTests, a_TestType) \
3363static RTEXITCODE FpuLdI ## a_cBits ## Generate(uint32_t cTests, const char * const *papszNameFmts) \
3364{ \
3365 X86FXSTATE State; \
3366 RT_ZERO(State); \
3367 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3368 { \
3369 IEMBINARYOUTPUT BinOut; \
3370 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, a_aSubTests[iFn]), RTEXITCODE_FAILURE); \
3371 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
3372 { \
3373 State.FCW = RandFcw(); \
3374 State.FSW = RandFsw(); \
3375 a_iTypeIn InVal = (a_iTypeIn)RandU ## a_cBits ## Src(iTest); \
3376 \
3377 for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
3378 { \
3379 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
3380 State.FCW = (State.FCW & ~X86_FCW_RC_MASK) | (iRounding << X86_FCW_RC_SHIFT); \
3381 a_aSubTests[iFn].pfn(&State, &Res, &InVal); \
3382 a_TestType const Test = { State.FCW, State.FSW, Res.FSW, Res.r80Result }; \
3383 GenerateBinaryWrite(&BinOut, &Test, sizeof(Test)); \
3384 } \
3385 } \
3386 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE); \
3387 } \
3388 return RTEXITCODE_SUCCESS; \
3389} \
3390DUMP_ALL_FN(FpuLdI ## a_cBits, a_aSubTests)
3391#else
3392# define GEN_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_aSubTests, a_TestType)
3393#endif
3394
3395#define TEST_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_SubTestType, a_aSubTests, a_TestType) \
3396typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPULDR80FROMI ## a_cBits,(PCX86FXSTATE, PIEMFPURESULT, a_iTypeIn const *)); \
3397typedef FNIEMAIMPLFPULDR80FROMI ## a_cBits *PFNIEMAIMPLFPULDR80FROMI ## a_cBits; \
3398TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPULDR80FROMI ## a_cBits); \
3399\
3400static a_SubTestType a_aSubTests[] = \
3401{ \
3402 ENTRY_BIN(RT_CONCAT(fild_r80_from_i,a_cBits)) \
3403}; \
3404GEN_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_aSubTests, a_TestType) \
3405\
3406static void FpuLdI ## a_cBits ## Test(void) \
3407{ \
3408 X86FXSTATE State; \
3409 RT_ZERO(State); \
3410 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3411 { \
3412 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(a_aSubTests[iFn])) \
3413 continue; \
3414 \
3415 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
3416 uint32_t const cTests = a_aSubTests[iFn].cTests; \
3417 PFNIEMAIMPLFPULDR80FROMI ## a_cBits pfn = a_aSubTests[iFn].pfn; \
3418 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
3419 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
3420 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
3421 { \
3422 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
3423 { \
3424 a_iTypeIn const iInVal = paTests[iTest].iInVal; \
3425 State.FCW = paTests[iTest].fFcw; \
3426 State.FSW = paTests[iTest].fFswIn; \
3427 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
3428 pfn(&State, &Res, &iInVal); \
3429 if ( Res.FSW != paTests[iTest].fFswOut \
3430 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult)) \
3431 RTTestFailed(g_hTest, "#%03u%s: fcw=%#06x fsw=%#06x in=" a_szFmtIn "\n" \
3432 "%s -> fsw=%#06x %s\n" \
3433 "%s expected %#06x %s%s%s (%s)\n", \
3434 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, paTests[iTest].iInVal, \
3435 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result), \
3436 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult), \
3437 FswDiff(Res.FSW, paTests[iTest].fFswOut), \
3438 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "", \
3439 FormatFcw(paTests[iTest].fFcw) ); \
3440 } \
3441 pfn = a_aSubTests[iFn].pfnNative; \
3442 } \
3443 FREE_DECOMPRESSED_TESTS(a_aSubTests[iFn]); \
3444 } \
3445}
3446
3447TEST_FPU_LOAD_INT(64, int64_t, "%RI64", FPU_LD_I64_T, g_aFpuLdU64, FPU_I64_IN_TEST_T)
3448TEST_FPU_LOAD_INT(32, int32_t, "%RI32", FPU_LD_I32_T, g_aFpuLdU32, FPU_I32_IN_TEST_T)
3449TEST_FPU_LOAD_INT(16, int16_t, "%RI16", FPU_LD_I16_T, g_aFpuLdU16, FPU_I16_IN_TEST_T)
3450
3451#ifdef TSTIEMAIMPL_WITH_GENERATOR
3452static RTEXITCODE FpuLdIntGenerate(uint32_t cTests, const char * const *papszNameFmts)
3453{
3454 RTEXITCODE rcExit = FpuLdI64Generate(cTests, papszNameFmts);
3455 if (rcExit == RTEXITCODE_SUCCESS)
3456 rcExit = FpuLdI32Generate(cTests, papszNameFmts);
3457 if (rcExit == RTEXITCODE_SUCCESS)
3458 rcExit = FpuLdI16Generate(cTests, papszNameFmts);
3459 return rcExit;
3460}
3461
3462static RTEXITCODE FpuLdIntDumpAll(const char * const *papszNameFmts)
3463{
3464 RTEXITCODE rcExit = FpuLdI64DumpAll(papszNameFmts);
3465 if (rcExit == RTEXITCODE_SUCCESS)
3466 rcExit = FpuLdI32DumpAll(papszNameFmts);
3467 if (rcExit == RTEXITCODE_SUCCESS)
3468 rcExit = FpuLdI16DumpAll(papszNameFmts);
3469 return rcExit;
3470}
3471#endif
3472
3473static void FpuLdIntTest(void)
3474{
3475 FpuLdI64Test();
3476 FpuLdI32Test();
3477 FpuLdI16Test();
3478}
3479
3480
3481/*
3482 * Load binary coded decimal values from memory.
3483 */
3484typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPULDR80FROMD80,(PCX86FXSTATE, PIEMFPURESULT, PCRTPBCD80U));
3485typedef FNIEMAIMPLFPULDR80FROMD80 *PFNIEMAIMPLFPULDR80FROMD80;
3486TYPEDEF_SUBTEST_TYPE(FPU_LD_D80_T, FPU_D80_IN_TEST_T, PFNIEMAIMPLFPULDR80FROMD80);
3487
3488static FPU_LD_D80_T g_aFpuLdD80[] =
3489{
3490 ENTRY_BIN(fld_r80_from_d80)
3491};
3492
3493#ifdef TSTIEMAIMPL_WITH_GENERATOR
3494static RTEXITCODE FpuLdD80Generate(uint32_t cTests, const char * const *papszNameFmts)
3495{
3496 X86FXSTATE State;
3497 RT_ZERO(State);
3498 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdD80); iFn++)
3499 {
3500 IEMBINARYOUTPUT BinOut;
3501 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aFpuLdD80[iFn]), RTEXITCODE_FAILURE);
3502 for (uint32_t iTest = 0; iTest < cTests; iTest++)
3503 {
3504 State.FCW = RandFcw();
3505 State.FSW = RandFsw();
3506 RTPBCD80U InVal = RandD80Src(iTest);
3507
3508 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
3509 {
3510 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3511 State.FCW = (State.FCW & ~X86_FCW_RC_MASK) | (iRounding << X86_FCW_RC_SHIFT);
3512 g_aFpuLdD80[iFn].pfn(&State, &Res, &InVal);
3513 FPU_D80_IN_TEST_T const Test = { State.FCW, State.FSW, Res.FSW, Res.r80Result, InVal };
3514 GenerateBinaryWrite(&BinOut, &Test, sizeof(Test));
3515 }
3516 }
3517 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
3518 }
3519 return RTEXITCODE_SUCCESS;
3520}
3521DUMP_ALL_FN(FpuLdD80, g_aFpuLdD80)
3522#endif
3523
3524static void FpuLdD80Test(void)
3525{
3526 X86FXSTATE State;
3527 RT_ZERO(State);
3528 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdD80); iFn++)
3529 {
3530 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aFpuLdD80[iFn]))
3531 continue;
3532
3533 FPU_D80_IN_TEST_T const * const paTests = g_aFpuLdD80[iFn].paTests;
3534 uint32_t const cTests = g_aFpuLdD80[iFn].cTests;
3535 PFNIEMAIMPLFPULDR80FROMD80 pfn = g_aFpuLdD80[iFn].pfn;
3536 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuLdD80[iFn]);
3537 if (!cTests) RTTestSkipped(g_hTest, "no tests");
3538 for (uint32_t iVar = 0; iVar < cVars; iVar++)
3539 {
3540 for (uint32_t iTest = 0; iTest < cTests; iTest++)
3541 {
3542 RTPBCD80U const InVal = paTests[iTest].InVal;
3543 State.FCW = paTests[iTest].fFcw;
3544 State.FSW = paTests[iTest].fFswIn;
3545 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3546 pfn(&State, &Res, &InVal);
3547 if ( Res.FSW != paTests[iTest].fFswOut
3548 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult))
3549 RTTestFailed(g_hTest, "#%03u%s: fcw=%#06x fsw=%#06x in=%s\n"
3550 "%s -> fsw=%#06x %s\n"
3551 "%s expected %#06x %s%s%s (%s)\n",
3552 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
3553 FormatD80(&paTests[iTest].InVal),
3554 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result),
3555 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult),
3556 FswDiff(Res.FSW, paTests[iTest].fFswOut),
3557 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "",
3558 FormatFcw(paTests[iTest].fFcw) );
3559 }
3560 pfn = g_aFpuLdD80[iFn].pfnNative;
3561 }
3562
3563 FREE_DECOMPRESSED_TESTS(g_aFpuLdD80[iFn]);
3564 }
3565}
3566
3567
3568/*
3569 * Store values floating point values to memory.
3570 */
3571#ifdef TSTIEMAIMPL_WITH_GENERATOR
3572static const RTFLOAT80U g_aFpuStR32Specials[] =
3573{
3574 RTFLOAT80U_INIT_C(0, 0xffffff8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
3575 RTFLOAT80U_INIT_C(1, 0xffffff8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
3576 RTFLOAT80U_INIT_C(0, 0xfffffe8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding */
3577 RTFLOAT80U_INIT_C(1, 0xfffffe8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding */
3578};
3579static const RTFLOAT80U g_aFpuStR64Specials[] =
3580{
3581 RTFLOAT80U_INIT_C(0, 0xfffffffffffffc00, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
3582 RTFLOAT80U_INIT_C(1, 0xfffffffffffffc00, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
3583 RTFLOAT80U_INIT_C(0, 0xfffffffffffff400, RTFLOAT80U_EXP_BIAS), /* near rounding */
3584 RTFLOAT80U_INIT_C(1, 0xfffffffffffff400, RTFLOAT80U_EXP_BIAS), /* near rounding */
3585 RTFLOAT80U_INIT_C(0, 0xd0b9e6fdda887400, 687 + RTFLOAT80U_EXP_BIAS), /* random example for this */
3586};
3587static const RTFLOAT80U g_aFpuStR80Specials[] =
3588{
3589 RTFLOAT80U_INIT_C(0, 0x8000000000000000, RTFLOAT80U_EXP_BIAS), /* placeholder */
3590};
3591# define GEN_FPU_STORE(a_cBits, a_rdType, a_aSubTests, a_TestType) \
3592static RTEXITCODE FpuStR ## a_cBits ## Generate(uint32_t cTests, const char * const *papszNameFmts) \
3593{ \
3594 uint32_t const cTotalTests = cTests + RT_ELEMENTS(g_aFpuStR ## a_cBits ## Specials); \
3595 X86FXSTATE State; \
3596 RT_ZERO(State); \
3597 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3598 { \
3599 IEMBINARYOUTPUT BinOut; \
3600 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, a_aSubTests[iFn]), RTEXITCODE_FAILURE); \
3601 for (uint32_t iTest = 0; iTest < cTotalTests; iTest++) \
3602 { \
3603 uint16_t const fFcw = RandFcw(); \
3604 State.FSW = RandFsw(); \
3605 RTFLOAT80U const InVal = iTest < cTests ? RandR80Src(iTest, a_cBits) \
3606 : g_aFpuStR ## a_cBits ## Specials[iTest - cTests]; \
3607 \
3608 for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
3609 { \
3610 /* PC doesn't influence these, so leave as is. */ \
3611 AssertCompile(X86_FCW_OM_BIT + 1 == X86_FCW_UM_BIT && X86_FCW_UM_BIT + 1 == X86_FCW_PM_BIT); \
3612 for (uint16_t iMask = 0; iMask < 16; iMask += 2 /*1*/) \
3613 { \
3614 uint16_t uFswOut = 0; \
3615 a_rdType OutVal; \
3616 RT_ZERO(OutVal); \
3617 memset(&OutVal, 0xfe, sizeof(OutVal)); \
3618 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_OM | X86_FCW_UM | X86_FCW_PM)) \
3619 | (iRounding << X86_FCW_RC_SHIFT); \
3620 /*if (iMask & 1) State.FCW ^= X86_FCW_MASK_ALL;*/ \
3621 State.FCW |= (iMask >> 1) << X86_FCW_OM_BIT; \
3622 a_aSubTests[iFn].pfn(&State, &uFswOut, &OutVal, &InVal); \
3623 a_TestType const Test = { State.FCW, State.FSW, uFswOut, InVal, OutVal }; \
3624 GenerateBinaryWrite(&BinOut, &Test, sizeof(Test)); \
3625 } \
3626 } \
3627 } \
3628 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE); \
3629 } \
3630 return RTEXITCODE_SUCCESS; \
3631} \
3632DUMP_ALL_FN(FpuStR ## a_cBits, a_aSubTests)
3633#else
3634# define GEN_FPU_STORE(a_cBits, a_rdType, a_aSubTests, a_TestType)
3635#endif
3636
3637#define TEST_FPU_STORE(a_cBits, a_rdType, a_SubTestType, a_aSubTests, a_TestType) \
3638typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPUSTR80TOR ## a_cBits,(PCX86FXSTATE, uint16_t *, \
3639 PRTFLOAT ## a_cBits ## U, PCRTFLOAT80U)); \
3640typedef FNIEMAIMPLFPUSTR80TOR ## a_cBits *PFNIEMAIMPLFPUSTR80TOR ## a_cBits; \
3641TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPUSTR80TOR ## a_cBits); \
3642\
3643static a_SubTestType a_aSubTests[] = \
3644{ \
3645 ENTRY_BIN(RT_CONCAT(fst_r80_to_r,a_cBits)) \
3646}; \
3647GEN_FPU_STORE(a_cBits, a_rdType, a_aSubTests, a_TestType) \
3648\
3649static void FpuStR ## a_cBits ## Test(void) \
3650{ \
3651 X86FXSTATE State; \
3652 RT_ZERO(State); \
3653 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3654 { \
3655 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(a_aSubTests[iFn])) \
3656 continue; \
3657 \
3658 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
3659 uint32_t const cTests = a_aSubTests[iFn].cTests; \
3660 PFNIEMAIMPLFPUSTR80TOR ## a_cBits pfn = a_aSubTests[iFn].pfn; \
3661 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
3662 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
3663 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
3664 { \
3665 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
3666 { \
3667 RTFLOAT80U const InVal = paTests[iTest].InVal; \
3668 uint16_t uFswOut = 0; \
3669 a_rdType OutVal; \
3670 RT_ZERO(OutVal); \
3671 memset(&OutVal, 0xfe, sizeof(OutVal)); \
3672 State.FCW = paTests[iTest].fFcw; \
3673 State.FSW = paTests[iTest].fFswIn; \
3674 pfn(&State, &uFswOut, &OutVal, &InVal); \
3675 if ( uFswOut != paTests[iTest].fFswOut \
3676 || !RTFLOAT ## a_cBits ## U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal)) \
3677 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n" \
3678 "%s -> fsw=%#06x %s\n" \
3679 "%s expected %#06x %s%s%s (%s)\n", \
3680 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
3681 FormatR80(&paTests[iTest].InVal), \
3682 iVar ? " " : "", uFswOut, FormatR ## a_cBits(&OutVal), \
3683 iVar ? " " : "", paTests[iTest].fFswOut, FormatR ## a_cBits(&paTests[iTest].OutVal), \
3684 FswDiff(uFswOut, paTests[iTest].fFswOut), \
3685 !RTFLOAT ## a_cBits ## U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal) ? " - val" : "", \
3686 FormatFcw(paTests[iTest].fFcw) ); \
3687 } \
3688 pfn = a_aSubTests[iFn].pfnNative; \
3689 } \
3690 FREE_DECOMPRESSED_TESTS(a_aSubTests[iFn]); \
3691 } \
3692}
3693
3694TEST_FPU_STORE(80, RTFLOAT80U, FPU_ST_R80_T, g_aFpuStR80, FPU_ST_R80_TEST_T)
3695TEST_FPU_STORE(64, RTFLOAT64U, FPU_ST_R64_T, g_aFpuStR64, FPU_ST_R64_TEST_T)
3696TEST_FPU_STORE(32, RTFLOAT32U, FPU_ST_R32_T, g_aFpuStR32, FPU_ST_R32_TEST_T)
3697
3698#ifdef TSTIEMAIMPL_WITH_GENERATOR
3699static RTEXITCODE FpuStMemGenerate(uint32_t cTests, const char * const *papszNameFmts)
3700{
3701 RTEXITCODE rcExit = FpuStR80Generate(cTests, papszNameFmts);
3702 if (rcExit == RTEXITCODE_SUCCESS)
3703 rcExit = FpuStR64Generate(cTests, papszNameFmts);
3704 if (rcExit == RTEXITCODE_SUCCESS)
3705 rcExit = FpuStR32Generate(cTests, papszNameFmts);
3706 return rcExit;
3707}
3708
3709static RTEXITCODE FpuStMemDumpAll(const char * const *papszNameFmts)
3710{
3711 RTEXITCODE rcExit = FpuStR80DumpAll(papszNameFmts);
3712 if (rcExit == RTEXITCODE_SUCCESS)
3713 rcExit = FpuStR64DumpAll(papszNameFmts);
3714 if (rcExit == RTEXITCODE_SUCCESS)
3715 rcExit = FpuStR32DumpAll(papszNameFmts);
3716 return rcExit;
3717}
3718#endif
3719
3720static void FpuStMemTest(void)
3721{
3722 FpuStR80Test();
3723 FpuStR64Test();
3724 FpuStR32Test();
3725}
3726
3727
3728/*
3729 * Store integer values to memory or register.
3730 */
3731TYPEDEF_SUBTEST_TYPE(FPU_ST_I16_T, FPU_ST_I16_TEST_T, PFNIEMAIMPLFPUSTR80TOI16);
3732TYPEDEF_SUBTEST_TYPE(FPU_ST_I32_T, FPU_ST_I32_TEST_T, PFNIEMAIMPLFPUSTR80TOI32);
3733TYPEDEF_SUBTEST_TYPE(FPU_ST_I64_T, FPU_ST_I64_TEST_T, PFNIEMAIMPLFPUSTR80TOI64);
3734
3735static FPU_ST_I16_T g_aFpuStI16[] =
3736{
3737 ENTRY_BIN(fist_r80_to_i16),
3738 ENTRY_BIN_AMD( fistt_r80_to_i16, 0),
3739 ENTRY_BIN_INTEL(fistt_r80_to_i16, 0),
3740};
3741static FPU_ST_I32_T g_aFpuStI32[] =
3742{
3743 ENTRY_BIN(fist_r80_to_i32),
3744 ENTRY_BIN(fistt_r80_to_i32),
3745};
3746static FPU_ST_I64_T g_aFpuStI64[] =
3747{
3748 ENTRY_BIN(fist_r80_to_i64),
3749 ENTRY_BIN(fistt_r80_to_i64),
3750};
3751
3752#ifdef TSTIEMAIMPL_WITH_GENERATOR
3753static const RTFLOAT80U g_aFpuStI16Specials[] = /* 16-bit variant borrows properties from the 32-bit one, thus all this stuff. */
3754{
3755 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 13 + RTFLOAT80U_EXP_BIAS),
3756 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 13 + RTFLOAT80U_EXP_BIAS),
3757 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 14 + RTFLOAT80U_EXP_BIAS),
3758 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 14 + RTFLOAT80U_EXP_BIAS),
3759 RTFLOAT80U_INIT_C(0, 0x8000080000000000, 14 + RTFLOAT80U_EXP_BIAS),
3760 RTFLOAT80U_INIT_C(1, 0x8000080000000000, 14 + RTFLOAT80U_EXP_BIAS),
3761 RTFLOAT80U_INIT_C(0, 0x8000100000000000, 14 + RTFLOAT80U_EXP_BIAS),
3762 RTFLOAT80U_INIT_C(1, 0x8000100000000000, 14 + RTFLOAT80U_EXP_BIAS),
3763 RTFLOAT80U_INIT_C(0, 0x8000200000000000, 14 + RTFLOAT80U_EXP_BIAS),
3764 RTFLOAT80U_INIT_C(1, 0x8000200000000000, 14 + RTFLOAT80U_EXP_BIAS),
3765 RTFLOAT80U_INIT_C(0, 0x8000400000000000, 14 + RTFLOAT80U_EXP_BIAS),
3766 RTFLOAT80U_INIT_C(1, 0x8000400000000000, 14 + RTFLOAT80U_EXP_BIAS),
3767 RTFLOAT80U_INIT_C(0, 0x8000800000000000, 14 + RTFLOAT80U_EXP_BIAS),
3768 RTFLOAT80U_INIT_C(1, 0x8000800000000000, 14 + RTFLOAT80U_EXP_BIAS),
3769 RTFLOAT80U_INIT_C(1, 0x8000ffffffffffff, 14 + RTFLOAT80U_EXP_BIAS),
3770 RTFLOAT80U_INIT_C(0, 0x8001000000000000, 14 + RTFLOAT80U_EXP_BIAS),
3771 RTFLOAT80U_INIT_C(1, 0x8001000000000000, 14 + RTFLOAT80U_EXP_BIAS),
3772 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 14 + RTFLOAT80U_EXP_BIAS),
3773 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 14 + RTFLOAT80U_EXP_BIAS),
3774 RTFLOAT80U_INIT_C(0, 0xffff800000000000, 14 + RTFLOAT80U_EXP_BIAS),
3775 RTFLOAT80U_INIT_C(0, 0xffff000000000000, 14 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
3776 RTFLOAT80U_INIT_C(0, 0xfffe000000000000, 14 + RTFLOAT80U_EXP_BIAS),
3777 RTFLOAT80U_INIT_C(1, 0xffff800000000000, 14 + RTFLOAT80U_EXP_BIAS),
3778 RTFLOAT80U_INIT_C(1, 0xffff000000000000, 14 + RTFLOAT80U_EXP_BIAS), /* min */
3779 RTFLOAT80U_INIT_C(1, 0xfffe000000000000, 14 + RTFLOAT80U_EXP_BIAS),
3780 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 15 + RTFLOAT80U_EXP_BIAS),
3781 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 15 + RTFLOAT80U_EXP_BIAS),
3782 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 16 + RTFLOAT80U_EXP_BIAS),
3783 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 17 + RTFLOAT80U_EXP_BIAS),
3784 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 20 + RTFLOAT80U_EXP_BIAS),
3785 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 24 + RTFLOAT80U_EXP_BIAS),
3786 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 28 + RTFLOAT80U_EXP_BIAS),
3787 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
3788 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
3789 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS),
3790 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS),
3791 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
3792 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
3793 RTFLOAT80U_INIT_C(0, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
3794 RTFLOAT80U_INIT_C(1, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
3795 RTFLOAT80U_INIT_C(0, 0x8000ffffffffffff, 31 + RTFLOAT80U_EXP_BIAS),
3796 RTFLOAT80U_INIT_C(1, 0x8000ffffffffffff, 31 + RTFLOAT80U_EXP_BIAS),
3797 RTFLOAT80U_INIT_C(0, 0x8001000000000000, 31 + RTFLOAT80U_EXP_BIAS),
3798 RTFLOAT80U_INIT_C(1, 0x8001000000000000, 31 + RTFLOAT80U_EXP_BIAS),
3799 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
3800 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
3801 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 32 + RTFLOAT80U_EXP_BIAS),
3802};
3803static const RTFLOAT80U g_aFpuStI32Specials[] =
3804{
3805 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
3806 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
3807 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
3808 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS), /* min */
3809 RTFLOAT80U_INIT_C(0, 0xffffffff80000000, 30 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
3810 RTFLOAT80U_INIT_C(1, 0xffffffff80000000, 30 + RTFLOAT80U_EXP_BIAS), /* min */
3811 RTFLOAT80U_INIT_C(0, 0xffffffff00000000, 30 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
3812 RTFLOAT80U_INIT_C(1, 0xffffffff00000000, 30 + RTFLOAT80U_EXP_BIAS), /* min */
3813 RTFLOAT80U_INIT_C(0, 0xfffffffe00000000, 30 + RTFLOAT80U_EXP_BIAS),
3814 RTFLOAT80U_INIT_C(1, 0xfffffffe00000000, 30 + RTFLOAT80U_EXP_BIAS),
3815 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
3816 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
3817 RTFLOAT80U_INIT_C(0, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
3818 RTFLOAT80U_INIT_C(1, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
3819 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
3820 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
3821};
3822static const RTFLOAT80U g_aFpuStI64Specials[] =
3823{
3824 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 61 + RTFLOAT80U_EXP_BIAS),
3825 RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, 61 + RTFLOAT80U_EXP_BIAS),
3826 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 62 + RTFLOAT80U_EXP_BIAS),
3827 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 62 + RTFLOAT80U_EXP_BIAS),
3828 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 62 + RTFLOAT80U_EXP_BIAS),
3829 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 62 + RTFLOAT80U_EXP_BIAS),
3830 RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, 62 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
3831 RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, 62 + RTFLOAT80U_EXP_BIAS), /* min */
3832 RTFLOAT80U_INIT_C(0, 0xfffffffffffffffe, 62 + RTFLOAT80U_EXP_BIAS),
3833 RTFLOAT80U_INIT_C(1, 0xfffffffffffffffe, 62 + RTFLOAT80U_EXP_BIAS),
3834 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 63 + RTFLOAT80U_EXP_BIAS),
3835 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 63 + RTFLOAT80U_EXP_BIAS),
3836 RTFLOAT80U_INIT_C(0, 0x8000000000000001, 63 + RTFLOAT80U_EXP_BIAS),
3837 RTFLOAT80U_INIT_C(1, 0x8000000000000001, 63 + RTFLOAT80U_EXP_BIAS),
3838 RTFLOAT80U_INIT_C(0, 0x8000000000000002, 63 + RTFLOAT80U_EXP_BIAS),
3839 RTFLOAT80U_INIT_C(1, 0x8000000000000002, 63 + RTFLOAT80U_EXP_BIAS),
3840 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 63 + RTFLOAT80U_EXP_BIAS),
3841};
3842
3843# define GEN_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_aSubTests, a_TestType) \
3844static RTEXITCODE FpuStI ## a_cBits ## Generate(uint32_t cTests, const char * const *papszNameFmts) \
3845{ \
3846 X86FXSTATE State; \
3847 RT_ZERO(State); \
3848 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3849 { \
3850 PFNIEMAIMPLFPUSTR80TOI ## a_cBits const pfn = a_aSubTests[iFn].pfnNative \
3851 ? a_aSubTests[iFn].pfnNative : a_aSubTests[iFn].pfn; \
3852 if ( a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE \
3853 && a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
3854 continue; \
3855 \
3856 IEMBINARYOUTPUT BinOut; \
3857 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, a_aSubTests[iFn]), RTEXITCODE_FAILURE); \
3858 uint32_t const cTotalTests = cTests + RT_ELEMENTS(g_aFpuStI ## a_cBits ## Specials); \
3859 for (uint32_t iTest = 0; iTest < cTotalTests; iTest++) \
3860 { \
3861 uint16_t const fFcw = RandFcw(); \
3862 State.FSW = RandFsw(); \
3863 RTFLOAT80U const InVal = iTest < cTests ? RandR80Src(iTest, a_cBits, true) \
3864 : g_aFpuStI ## a_cBits ## Specials[iTest - cTests]; \
3865 \
3866 for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
3867 { \
3868 /* PC doesn't influence these, so leave as is. */ \
3869 AssertCompile(X86_FCW_OM_BIT + 1 == X86_FCW_UM_BIT && X86_FCW_UM_BIT + 1 == X86_FCW_PM_BIT); \
3870 for (uint16_t iMask = 0; iMask < 16; iMask += 2 /*1*/) \
3871 { \
3872 uint16_t uFswOut = 0; \
3873 a_iType iOutVal = ~(a_iType)2; \
3874 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_OM | X86_FCW_UM | X86_FCW_PM)) \
3875 | (iRounding << X86_FCW_RC_SHIFT); \
3876 /*if (iMask & 1) State.FCW ^= X86_FCW_MASK_ALL;*/ \
3877 State.FCW |= (iMask >> 1) << X86_FCW_OM_BIT; \
3878 pfn(&State, &uFswOut, &iOutVal, &InVal); \
3879 a_TestType const Test = { State.FCW, State.FSW, uFswOut, InVal, iOutVal }; \
3880 GenerateBinaryWrite(&BinOut, &Test, sizeof(Test)); \
3881 } \
3882 } \
3883 } \
3884 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE); \
3885 } \
3886 return RTEXITCODE_SUCCESS; \
3887} \
3888DUMP_ALL_FN(FpuStI ## a_cBits, a_aSubTests)
3889#else
3890# define GEN_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_aSubTests, a_TestType)
3891#endif
3892
3893#define TEST_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_SubTestType, a_aSubTests, a_TestType) \
3894GEN_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_aSubTests, a_TestType) \
3895\
3896static void FpuStI ## a_cBits ## Test(void) \
3897{ \
3898 X86FXSTATE State; \
3899 RT_ZERO(State); \
3900 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3901 { \
3902 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(a_aSubTests[iFn])) \
3903 continue; \
3904 \
3905 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
3906 uint32_t const cTests = a_aSubTests[iFn].cTests; \
3907 PFNIEMAIMPLFPUSTR80TOI ## a_cBits pfn = a_aSubTests[iFn].pfn; \
3908 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
3909 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
3910 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
3911 { \
3912 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
3913 { \
3914 RTFLOAT80U const InVal = paTests[iTest].InVal; \
3915 uint16_t uFswOut = 0; \
3916 a_iType iOutVal = ~(a_iType)2; \
3917 State.FCW = paTests[iTest].fFcw; \
3918 State.FSW = paTests[iTest].fFswIn; \
3919 pfn(&State, &uFswOut, &iOutVal, &InVal); \
3920 if ( uFswOut != paTests[iTest].fFswOut \
3921 || iOutVal != paTests[iTest].iOutVal) \
3922 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n" \
3923 "%s -> fsw=%#06x " a_szFmt "\n" \
3924 "%s expected %#06x " a_szFmt "%s%s (%s)\n", \
3925 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
3926 FormatR80(&paTests[iTest].InVal), \
3927 iVar ? " " : "", uFswOut, iOutVal, \
3928 iVar ? " " : "", paTests[iTest].fFswOut, paTests[iTest].iOutVal, \
3929 FswDiff(uFswOut, paTests[iTest].fFswOut), \
3930 iOutVal != paTests[iTest].iOutVal ? " - val" : "", FormatFcw(paTests[iTest].fFcw) ); \
3931 } \
3932 pfn = a_aSubTests[iFn].pfnNative; \
3933 } \
3934 FREE_DECOMPRESSED_TESTS(a_aSubTests[iFn]); \
3935 } \
3936}
3937
3938//fistt_r80_to_i16 diffs for AMD, of course :-)
3939
3940TEST_FPU_STORE_INT(64, int64_t, "%RI64", FPU_ST_I64_T, g_aFpuStI64, FPU_ST_I64_TEST_T)
3941TEST_FPU_STORE_INT(32, int32_t, "%RI32", FPU_ST_I32_T, g_aFpuStI32, FPU_ST_I32_TEST_T)
3942TEST_FPU_STORE_INT(16, int16_t, "%RI16", FPU_ST_I16_T, g_aFpuStI16, FPU_ST_I16_TEST_T)
3943
3944#ifdef TSTIEMAIMPL_WITH_GENERATOR
3945static RTEXITCODE FpuStIntGenerate(uint32_t cTests, const char * const *papszNameFmts)
3946{
3947 RTEXITCODE rcExit = FpuStI64Generate(cTests, papszNameFmts);
3948 if (rcExit == RTEXITCODE_SUCCESS)
3949 rcExit = FpuStI32Generate(cTests, papszNameFmts);
3950 if (rcExit == RTEXITCODE_SUCCESS)
3951 rcExit = FpuStI16Generate(cTests, papszNameFmts);
3952 return rcExit;
3953}
3954static RTEXITCODE FpuStIntDumpAll(const char * const *papszNameFmts)
3955{
3956 RTEXITCODE rcExit = FpuStI64DumpAll(papszNameFmts);
3957 if (rcExit == RTEXITCODE_SUCCESS)
3958 rcExit = FpuStI32DumpAll(papszNameFmts);
3959 if (rcExit == RTEXITCODE_SUCCESS)
3960 rcExit = FpuStI16DumpAll(papszNameFmts);
3961 return rcExit;
3962}
3963#endif
3964
3965static void FpuStIntTest(void)
3966{
3967 FpuStI64Test();
3968 FpuStI32Test();
3969 FpuStI16Test();
3970}
3971
3972
3973/*
3974 * Store as packed BCD value (memory).
3975 */
3976typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPUSTR80TOD80,(PCX86FXSTATE, uint16_t *, PRTPBCD80U, PCRTFLOAT80U));
3977typedef FNIEMAIMPLFPUSTR80TOD80 *PFNIEMAIMPLFPUSTR80TOD80;
3978TYPEDEF_SUBTEST_TYPE(FPU_ST_D80_T, FPU_ST_D80_TEST_T, PFNIEMAIMPLFPUSTR80TOD80);
3979
3980static FPU_ST_D80_T g_aFpuStD80[] =
3981{
3982 ENTRY_BIN(fst_r80_to_d80),
3983};
3984
3985#ifdef TSTIEMAIMPL_WITH_GENERATOR
3986static RTEXITCODE FpuStD80Generate(uint32_t cTests, const char * const *papszNameFmts)
3987{
3988 static RTFLOAT80U const s_aSpecials[] =
3989 {
3990 RTFLOAT80U_INIT_C(0, 0xde0b6b3a763fffe0, RTFLOAT80U_EXP_BIAS + 59), /* 1 below max */
3991 RTFLOAT80U_INIT_C(1, 0xde0b6b3a763fffe0, RTFLOAT80U_EXP_BIAS + 59), /* 1 above min */
3992 RTFLOAT80U_INIT_C(0, 0xde0b6b3a763ffff0, RTFLOAT80U_EXP_BIAS + 59), /* exact max */
3993 RTFLOAT80U_INIT_C(1, 0xde0b6b3a763ffff0, RTFLOAT80U_EXP_BIAS + 59), /* exact min */
3994 RTFLOAT80U_INIT_C(0, 0xde0b6b3a763fffff, RTFLOAT80U_EXP_BIAS + 59), /* max & all rounded off bits set */
3995 RTFLOAT80U_INIT_C(1, 0xde0b6b3a763fffff, RTFLOAT80U_EXP_BIAS + 59), /* min & all rounded off bits set */
3996 RTFLOAT80U_INIT_C(0, 0xde0b6b3a763ffff8, RTFLOAT80U_EXP_BIAS + 59), /* max & some rounded off bits set */
3997 RTFLOAT80U_INIT_C(1, 0xde0b6b3a763ffff8, RTFLOAT80U_EXP_BIAS + 59), /* min & some rounded off bits set */
3998 RTFLOAT80U_INIT_C(0, 0xde0b6b3a763ffff1, RTFLOAT80U_EXP_BIAS + 59), /* max & some other rounded off bits set */
3999 RTFLOAT80U_INIT_C(1, 0xde0b6b3a763ffff1, RTFLOAT80U_EXP_BIAS + 59), /* min & some other rounded off bits set */
4000 RTFLOAT80U_INIT_C(0, 0xde0b6b3a76400000, RTFLOAT80U_EXP_BIAS + 59), /* 1 above max */
4001 RTFLOAT80U_INIT_C(1, 0xde0b6b3a76400000, RTFLOAT80U_EXP_BIAS + 59), /* 1 below min */
4002 };
4003
4004 X86FXSTATE State;
4005 RT_ZERO(State);
4006 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuStD80); iFn++)
4007 {
4008 IEMBINARYOUTPUT BinOut;
4009 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aFpuStD80[iFn]), RTEXITCODE_FAILURE);
4010 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
4011 {
4012 uint16_t const fFcw = RandFcw();
4013 State.FSW = RandFsw();
4014 RTFLOAT80U const InVal = iTest < cTests ? RandR80Src(iTest, 59, true) : s_aSpecials[iTest - cTests];
4015
4016 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
4017 {
4018 /* PC doesn't influence these, so leave as is. */
4019 AssertCompile(X86_FCW_OM_BIT + 1 == X86_FCW_UM_BIT && X86_FCW_UM_BIT + 1 == X86_FCW_PM_BIT);
4020 for (uint16_t iMask = 0; iMask < 16; iMask += 2 /*1*/)
4021 {
4022 uint16_t uFswOut = 0;
4023 RTPBCD80U OutVal = RTPBCD80U_INIT_ZERO(0);
4024 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_OM | X86_FCW_UM | X86_FCW_PM))
4025 | (iRounding << X86_FCW_RC_SHIFT);
4026 /*if (iMask & 1) State.FCW ^= X86_FCW_MASK_ALL;*/
4027 State.FCW |= (iMask >> 1) << X86_FCW_OM_BIT;
4028 g_aFpuStD80[iFn].pfn(&State, &uFswOut, &OutVal, &InVal);
4029 FPU_ST_D80_TEST_T const Test = { State.FCW, State.FSW, uFswOut, InVal, OutVal };
4030 GenerateBinaryWrite(&BinOut, &Test, sizeof(Test));
4031 }
4032 }
4033 }
4034 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
4035 }
4036 return RTEXITCODE_SUCCESS;
4037}
4038DUMP_ALL_FN(FpuStD80, g_aFpuStD80)
4039#endif
4040
4041
4042static void FpuStD80Test(void)
4043{
4044 X86FXSTATE State;
4045 RT_ZERO(State);
4046 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuStD80); iFn++)
4047 {
4048 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aFpuStD80[iFn]))
4049 continue;
4050
4051 FPU_ST_D80_TEST_T const * const paTests = g_aFpuStD80[iFn].paTests;
4052 uint32_t const cTests = g_aFpuStD80[iFn].cTests;
4053 PFNIEMAIMPLFPUSTR80TOD80 pfn = g_aFpuStD80[iFn].pfn;
4054 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuStD80[iFn]);
4055 if (!cTests) RTTestSkipped(g_hTest, "no tests");
4056 for (uint32_t iVar = 0; iVar < cVars; iVar++)
4057 {
4058 for (uint32_t iTest = 0; iTest < cTests; iTest++)
4059 {
4060 RTFLOAT80U const InVal = paTests[iTest].InVal;
4061 uint16_t uFswOut = 0;
4062 RTPBCD80U OutVal = RTPBCD80U_INIT_ZERO(0);
4063 State.FCW = paTests[iTest].fFcw;
4064 State.FSW = paTests[iTest].fFswIn;
4065 pfn(&State, &uFswOut, &OutVal, &InVal);
4066 if ( uFswOut != paTests[iTest].fFswOut
4067 || !RTPBCD80U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal))
4068 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
4069 "%s -> fsw=%#06x %s\n"
4070 "%s expected %#06x %s%s%s (%s)\n",
4071 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
4072 FormatR80(&paTests[iTest].InVal),
4073 iVar ? " " : "", uFswOut, FormatD80(&OutVal),
4074 iVar ? " " : "", paTests[iTest].fFswOut, FormatD80(&paTests[iTest].OutVal),
4075 FswDiff(uFswOut, paTests[iTest].fFswOut),
4076 RTPBCD80U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal) ? " - val" : "",
4077 FormatFcw(paTests[iTest].fFcw) );
4078 }
4079 pfn = g_aFpuStD80[iFn].pfnNative;
4080 }
4081
4082 FREE_DECOMPRESSED_TESTS(g_aFpuStD80[iFn]);
4083 }
4084}
4085
4086
4087
4088/*********************************************************************************************************************************
4089* x87 FPU Binary Operations *
4090*********************************************************************************************************************************/
4091
4092/*
4093 * Binary FPU operations on two 80-bit floating point values.
4094 */
4095TYPEDEF_SUBTEST_TYPE(FPU_BINARY_R80_T, FPU_BINARY_R80_TEST_T, PFNIEMAIMPLFPUR80);
4096enum { kFpuBinaryHint_fprem = 1, };
4097
4098static FPU_BINARY_R80_T g_aFpuBinaryR80[] =
4099{
4100 ENTRY_BIN(fadd_r80_by_r80),
4101 ENTRY_BIN(fsub_r80_by_r80),
4102 ENTRY_BIN(fsubr_r80_by_r80),
4103 ENTRY_BIN(fmul_r80_by_r80),
4104 ENTRY_BIN(fdiv_r80_by_r80),
4105 ENTRY_BIN(fdivr_r80_by_r80),
4106 ENTRY_BIN_EX(fprem_r80_by_r80, kFpuBinaryHint_fprem),
4107 ENTRY_BIN_EX(fprem1_r80_by_r80, kFpuBinaryHint_fprem),
4108 ENTRY_BIN(fscale_r80_by_r80),
4109 ENTRY_BIN_AMD( fpatan_r80_by_r80, 0), // C1 and rounding differs on AMD
4110 ENTRY_BIN_INTEL(fpatan_r80_by_r80, 0), // C1 and rounding differs on AMD
4111 ENTRY_BIN_AMD( fyl2x_r80_by_r80, 0), // C1 and rounding differs on AMD
4112 ENTRY_BIN_INTEL(fyl2x_r80_by_r80, 0), // C1 and rounding differs on AMD
4113 ENTRY_BIN_AMD( fyl2xp1_r80_by_r80, 0), // C1 and rounding differs on AMD
4114 ENTRY_BIN_INTEL(fyl2xp1_r80_by_r80, 0), // C1 and rounding differs on AMD
4115};
4116
4117#ifdef TSTIEMAIMPL_WITH_GENERATOR
4118static RTEXITCODE FpuBinaryR80Generate(uint32_t cTests, const char * const *papszNameFmts)
4119{
4120 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
4121
4122 static struct { RTFLOAT80U Val1, Val2; } const s_aSpecials[] =
4123 {
4124 { RTFLOAT80U_INIT_C(1, 0xdd762f07f2e80eef, 30142), /* causes weird overflows with DOWN and NEAR rounding. */
4125 RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1) },
4126 { RTFLOAT80U_INIT_ZERO(0), /* causes weird overflows with UP and NEAR rounding when precision is lower than 64. */
4127 RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1) },
4128 { RTFLOAT80U_INIT_ZERO(0), /* minus variant */
4129 RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1) },
4130 { RTFLOAT80U_INIT_C(0, 0xcef238bb9a0afd86, 577 + RTFLOAT80U_EXP_BIAS), /* for fprem and fprem1, max sequence length */
4131 RTFLOAT80U_INIT_C(0, 0xf11684ec0beaad94, 1 + RTFLOAT80U_EXP_BIAS) },
4132 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, -13396 + RTFLOAT80U_EXP_BIAS), /* for fdiv. We missed PE. */
4133 RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, 16383 + RTFLOAT80U_EXP_BIAS) },
4134 { RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1 + RTFLOAT80U_EXP_BIAS), /* for fprem/fprem1 */
4135 RTFLOAT80U_INIT_C(0, 0xe000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
4136 { RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1 + RTFLOAT80U_EXP_BIAS), /* for fprem/fprem1 */
4137 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
4138 /* fscale: This may seriously increase the exponent, and it turns out overflow and underflow behaviour changes
4139 once RTFLOAT80U_EXP_BIAS_ADJUST is exceeded. */
4140 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^1 */
4141 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
4142 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^64 */
4143 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 6 + RTFLOAT80U_EXP_BIAS) },
4144 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^1024 */
4145 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 10 + RTFLOAT80U_EXP_BIAS) },
4146 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^4096 */
4147 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 12 + RTFLOAT80U_EXP_BIAS) },
4148 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^16384 */
4149 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 49150 */
4150 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
4151 RTFLOAT80U_INIT_C(0, 0xc000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 57342 - within 10980XE range */
4152 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^24577 */
4153 RTFLOAT80U_INIT_C(0, 0xc002000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 57343 - outside 10980XE range, behaviour changes! */
4154 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^32768 - result is within range on 10980XE */
4155 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 15 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 65534 */
4156 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^65536 */
4157 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 16 + RTFLOAT80U_EXP_BIAS) },
4158 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^1048576 */
4159 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 20 + RTFLOAT80U_EXP_BIAS) },
4160 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^16777216 */
4161 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 24 + RTFLOAT80U_EXP_BIAS) },
4162 { RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1), /* for fscale: min * 2^-24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
4163 RTFLOAT80U_INIT_C(1, 0xc000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: -24575 - within 10980XE range */
4164 { RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1), /* for fscale: max * 2^-24577 (RTFLOAT80U_EXP_BIAS_ADJUST) */
4165 RTFLOAT80U_INIT_C(1, 0xc002000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: -24576 - outside 10980XE range, behaviour changes! */
4166 /* fscale: Negative variants for the essentials of the above. */
4167 { RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
4168 RTFLOAT80U_INIT_C(0, 0xc000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 57342 - within 10980XE range */
4169 { RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^24577 */
4170 RTFLOAT80U_INIT_C(0, 0xc002000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 57343 - outside 10980XE range, behaviour changes! */
4171 { RTFLOAT80U_INIT_C(1, 0x8000000000000000, 1), /* for fscale: min * 2^-24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
4172 RTFLOAT80U_INIT_C(1, 0xc000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: -57342 - within 10980XE range */
4173 { RTFLOAT80U_INIT_C(1, 0x8000000000000000, 1), /* for fscale: max * 2^-24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
4174 RTFLOAT80U_INIT_C(1, 0xc002000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: -57343 - outside 10980XE range, behaviour changes! */
4175 /* fscale: Some fun with denormals and pseudo-denormals. */
4176 { RTFLOAT80U_INIT_C(0, 0x0800000000000000, 0), /* for fscale: max * 2^-4 */
4177 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 2 + RTFLOAT80U_EXP_BIAS) },
4178 { RTFLOAT80U_INIT_C(0, 0x0800000000000000, 0), /* for fscale: max * 2^+1 */
4179 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
4180 { RTFLOAT80U_INIT_C(0, 0x0800000000000000, 0), RTFLOAT80U_INIT_ZERO(0) }, /* for fscale: max * 2^+0 */
4181 { RTFLOAT80U_INIT_C(0, 0x0000000000000008, 0), /* for fscale: max * 2^-4 => underflow */
4182 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 2 + RTFLOAT80U_EXP_BIAS) },
4183 { RTFLOAT80U_INIT_C(0, 0x8005000300020001, 0), RTFLOAT80U_INIT_ZERO(0) }, /* pseudo-normal number * 2^+0. */
4184 { RTFLOAT80U_INIT_C(1, 0x8005000300020001, 0), RTFLOAT80U_INIT_ZERO(0) }, /* pseudo-normal number * 2^+0. */
4185 { RTFLOAT80U_INIT_C(0, 0x8005000300020001, 0), /* pseudo-normal number * 2^-4 */
4186 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 2 + RTFLOAT80U_EXP_BIAS) },
4187 { RTFLOAT80U_INIT_C(0, 0x8005000300020001, 0), /* pseudo-normal number * 2^+0 */
4188 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
4189 { RTFLOAT80U_INIT_C(0, 0x8005000300020001, 0), /* pseudo-normal number * 2^+1 */
4190 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1 + RTFLOAT80U_EXP_BIAS) },
4191 };
4192
4193 X86FXSTATE State;
4194 RT_ZERO(State);
4195 uint32_t cMinNormalPairs = (cTests - 144) / 4;
4196 uint32_t cMinTargetRangeInputs = cMinNormalPairs / 2;
4197 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryR80); iFn++)
4198 {
4199 PFNIEMAIMPLFPUR80 const pfn = g_aFpuBinaryR80[iFn].pfnNative ? g_aFpuBinaryR80[iFn].pfnNative : g_aFpuBinaryR80[iFn].pfn;
4200 if ( g_aFpuBinaryR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE
4201 && g_aFpuBinaryR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
4202 continue;
4203
4204 IEMBINARYOUTPUT BinOut;
4205 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aFpuBinaryR80[iFn]), RTEXITCODE_FAILURE);
4206 uint32_t cNormalInputPairs = 0;
4207 uint32_t cTargetRangeInputs = 0;
4208 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
4209 {
4210 RTFLOAT80U InVal1 = iTest < cTests ? RandR80Src1(iTest) : s_aSpecials[iTest - cTests].Val1;
4211 RTFLOAT80U InVal2 = iTest < cTests ? RandR80Src2(iTest) : s_aSpecials[iTest - cTests].Val2;
4212 bool fTargetRange = false;
4213 if (RTFLOAT80U_IS_NORMAL(&InVal1) && RTFLOAT80U_IS_NORMAL(&InVal2))
4214 {
4215 cNormalInputPairs++;
4216 if ( g_aFpuBinaryR80[iFn].uExtra == kFpuBinaryHint_fprem
4217 && (uint32_t)InVal1.s.uExponent - (uint32_t)InVal2.s.uExponent - (uint32_t)64 <= (uint32_t)512)
4218 cTargetRangeInputs += fTargetRange = true;
4219 else if (cTargetRangeInputs < cMinTargetRangeInputs && iTest < cTests)
4220 if (g_aFpuBinaryR80[iFn].uExtra == kFpuBinaryHint_fprem)
4221 { /* The aim is two values with an exponent difference between 64 and 640 so we can do the whole sequence. */
4222 InVal2.s.uExponent = RTRandU32Ex(1, RTFLOAT80U_EXP_MAX - 66);
4223 InVal1.s.uExponent = RTRandU32Ex(InVal2.s.uExponent + 64, RT_MIN(InVal2.s.uExponent + 512, RTFLOAT80U_EXP_MAX - 1));
4224 cTargetRangeInputs += fTargetRange = true;
4225 }
4226 }
4227 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
4228 {
4229 iTest -= 1;
4230 continue;
4231 }
4232
4233 uint16_t const fFcwExtra = 0;
4234 uint16_t const fFcw = RandFcw();
4235 State.FSW = RandFsw();
4236
4237 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
4238 for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
4239 {
4240 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_PC_MASK | X86_FCW_MASK_ALL))
4241 | (iRounding << X86_FCW_RC_SHIFT)
4242 | (iPrecision << X86_FCW_PC_SHIFT)
4243 | X86_FCW_MASK_ALL;
4244 IEMFPURESULT ResM = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4245 pfn(&State, &ResM, &InVal1, &InVal2);
4246 FPU_BINARY_R80_TEST_T const TestM
4247 = { (uint16_t)(State.FCW | fFcwExtra), State.FSW, ResM.FSW, InVal1, InVal2, ResM.r80Result };
4248 GenerateBinaryWrite(&BinOut, &TestM, sizeof(TestM));
4249
4250 State.FCW = State.FCW & ~X86_FCW_MASK_ALL;
4251 IEMFPURESULT ResU = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4252 pfn(&State, &ResU, &InVal1, &InVal2);
4253 FPU_BINARY_R80_TEST_T const TestU
4254 = { (uint16_t)(State.FCW | fFcwExtra), State.FSW, ResU.FSW, InVal1, InVal2, ResU.r80Result };
4255 GenerateBinaryWrite(&BinOut, &TestU, sizeof(TestU));
4256
4257 uint16_t fXcpt = (ResM.FSW | ResU.FSW) & X86_FSW_XCPT_MASK & ~X86_FSW_SF;
4258 if (fXcpt)
4259 {
4260 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
4261 IEMFPURESULT Res1 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4262 pfn(&State, &Res1, &InVal1, &InVal2);
4263 FPU_BINARY_R80_TEST_T const Test1
4264 = { (uint16_t)(State.FCW | fFcwExtra), State.FSW, Res1.FSW, InVal1, InVal2, Res1.r80Result };
4265 GenerateBinaryWrite(&BinOut, &Test1, sizeof(Test1));
4266
4267 if (((Res1.FSW & X86_FSW_XCPT_MASK) & fXcpt) != (Res1.FSW & X86_FSW_XCPT_MASK))
4268 {
4269 fXcpt |= Res1.FSW & X86_FSW_XCPT_MASK;
4270 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
4271 IEMFPURESULT Res2 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4272 pfn(&State, &Res2, &InVal1, &InVal2);
4273 FPU_BINARY_R80_TEST_T const Test2
4274 = { (uint16_t)(State.FCW | fFcwExtra), State.FSW, Res2.FSW, InVal1, InVal2, Res2.r80Result };
4275 GenerateBinaryWrite(&BinOut, &Test2, sizeof(Test2));
4276 }
4277 if (!RT_IS_POWER_OF_TWO(fXcpt))
4278 for (uint16_t fUnmasked = 1; fUnmasked <= X86_FCW_PM; fUnmasked <<= 1)
4279 if (fUnmasked & fXcpt)
4280 {
4281 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | (fXcpt & ~fUnmasked);
4282 IEMFPURESULT Res3 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4283 pfn(&State, &Res3, &InVal1, &InVal2);
4284 FPU_BINARY_R80_TEST_T const Test3
4285 = { (uint16_t)(State.FCW | fFcwExtra), State.FSW, Res3.FSW, InVal1, InVal2, Res3.r80Result };
4286 GenerateBinaryWrite(&BinOut, &Test3, sizeof(Test3));
4287 }
4288 }
4289
4290 /* If the values are in range and caused no exceptions, do the whole series of
4291 partial reminders till we get the non-partial one or run into an exception. */
4292 if (fTargetRange && fXcpt == 0 && g_aFpuBinaryR80[iFn].uExtra == kFpuBinaryHint_fprem)
4293 {
4294 IEMFPURESULT ResPrev = ResM;
4295 for (unsigned i = 0; i < 32 && (ResPrev.FSW & (X86_FSW_C2 | X86_FSW_XCPT_MASK)) == X86_FSW_C2; i++)
4296 {
4297 State.FCW = State.FCW | X86_FCW_MASK_ALL;
4298 State.FSW = ResPrev.FSW;
4299 IEMFPURESULT ResSeq = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4300 pfn(&State, &ResSeq, &ResPrev.r80Result, &InVal2);
4301 FPU_BINARY_R80_TEST_T const TestSeq
4302 = { (uint16_t)(State.FCW | fFcwExtra), State.FSW, ResSeq.FSW, ResPrev.r80Result, InVal2, ResSeq.r80Result };
4303 GenerateBinaryWrite(&BinOut, &TestSeq, sizeof(TestSeq));
4304 ResPrev = ResSeq;
4305 }
4306 }
4307 }
4308 }
4309 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
4310 }
4311 return RTEXITCODE_SUCCESS;
4312}
4313DUMP_ALL_FN(FpuBinaryR80, g_aFpuBinaryR80)
4314#endif
4315
4316
4317static void FpuBinaryR80Test(void)
4318{
4319 X86FXSTATE State;
4320 RT_ZERO(State);
4321 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryR80); iFn++)
4322 {
4323 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aFpuBinaryR80[iFn]))
4324 continue;
4325
4326 FPU_BINARY_R80_TEST_T const * const paTests = g_aFpuBinaryR80[iFn].paTests;
4327 uint32_t const cTests = g_aFpuBinaryR80[iFn].cTests;
4328 PFNIEMAIMPLFPUR80 pfn = g_aFpuBinaryR80[iFn].pfn;
4329 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuBinaryR80[iFn]);
4330 if (!cTests) RTTestSkipped(g_hTest, "no tests");
4331 for (uint32_t iVar = 0; iVar < cVars; iVar++)
4332 {
4333 for (uint32_t iTest = 0; iTest < cTests; iTest++)
4334 {
4335 RTFLOAT80U const InVal1 = paTests[iTest].InVal1;
4336 RTFLOAT80U const InVal2 = paTests[iTest].InVal2;
4337 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4338 State.FCW = paTests[iTest].fFcw;
4339 State.FSW = paTests[iTest].fFswIn;
4340 pfn(&State, &Res, &InVal1, &InVal2);
4341 if ( Res.FSW != paTests[iTest].fFswOut
4342 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal))
4343 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n"
4344 "%s -> fsw=%#06x %s\n"
4345 "%s expected %#06x %s%s%s (%s)\n",
4346 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
4347 FormatR80(&paTests[iTest].InVal1), FormatR80(&paTests[iTest].InVal2),
4348 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result),
4349 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].OutVal),
4350 FswDiff(Res.FSW, paTests[iTest].fFswOut),
4351 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal) ? " - val" : "",
4352 FormatFcw(paTests[iTest].fFcw) );
4353 }
4354 pfn = g_aFpuBinaryR80[iFn].pfnNative;
4355 }
4356
4357 FREE_DECOMPRESSED_TESTS(g_aFpuBinaryR80[iFn]);
4358 }
4359}
4360
4361
4362/*
4363 * Binary FPU operations on one 80-bit floating point value and one 64-bit or 32-bit one.
4364 */
4365#define int64_t_IS_NORMAL(a) 1
4366#define int32_t_IS_NORMAL(a) 1
4367#define int16_t_IS_NORMAL(a) 1
4368
4369#ifdef TSTIEMAIMPL_WITH_GENERATOR
4370static struct { RTFLOAT80U Val1; RTFLOAT64U Val2; } const s_aFpuBinaryR64Specials[] =
4371{
4372 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
4373 RTFLOAT64U_INIT_C(0, 0xfeeeeddddcccc, RTFLOAT64U_EXP_BIAS) }, /* whatever */
4374};
4375static struct { RTFLOAT80U Val1; RTFLOAT32U Val2; } const s_aFpuBinaryR32Specials[] =
4376{
4377 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
4378 RTFLOAT32U_INIT_C(0, 0x7fffee, RTFLOAT32U_EXP_BIAS) }, /* whatever */
4379};
4380static struct { RTFLOAT80U Val1; int32_t Val2; } const s_aFpuBinaryI32Specials[] =
4381{
4382 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT32_MAX }, /* whatever */
4383};
4384static struct { RTFLOAT80U Val1; int16_t Val2; } const s_aFpuBinaryI16Specials[] =
4385{
4386 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT16_MAX }, /* whatever */
4387};
4388
4389# define GEN_FPU_BINARY_SMALL(a_fIntType, a_cBits, a_LoBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
4390static RTEXITCODE FpuBinary ## a_UpBits ## Generate(uint32_t cTests, const char * const *papszNameFmts) \
4391{ \
4392 cTests = RT_MAX(160, cTests); /* there are 144 standard input variations for r80 by r80 */ \
4393 \
4394 X86FXSTATE State; \
4395 RT_ZERO(State); \
4396 uint32_t cMinNormalPairs = (cTests - 144) / 4; \
4397 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
4398 { \
4399 IEMBINARYOUTPUT BinOut; \
4400 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, a_aSubTests[iFn]), RTEXITCODE_FAILURE); \
4401 uint32_t cNormalInputPairs = 0; \
4402 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aFpuBinary ## a_UpBits ## Specials); iTest += 1) \
4403 { \
4404 RTFLOAT80U const InVal1 = iTest < cTests ? RandR80Src1(iTest, a_cBits, a_fIntType) \
4405 : s_aFpuBinary ## a_UpBits ## Specials[iTest - cTests].Val1; \
4406 a_Type2 const InVal2 = iTest < cTests ? Rand ## a_UpBits ## Src2(iTest) \
4407 : s_aFpuBinary ## a_UpBits ## Specials[iTest - cTests].Val2; \
4408 if (RTFLOAT80U_IS_NORMAL(&InVal1) && a_Type2 ## _IS_NORMAL(&InVal2)) \
4409 cNormalInputPairs++; \
4410 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests) \
4411 { \
4412 iTest -= 1; \
4413 continue; \
4414 } \
4415 \
4416 uint16_t const fFcw = RandFcw(); \
4417 State.FSW = RandFsw(); \
4418 \
4419 for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
4420 { \
4421 for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++) \
4422 { \
4423 for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL) \
4424 { \
4425 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_PC_MASK | X86_FCW_MASK_ALL)) \
4426 | (iRounding << X86_FCW_RC_SHIFT) \
4427 | (iPrecision << X86_FCW_PC_SHIFT) \
4428 | iMask; \
4429 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
4430 a_aSubTests[iFn].pfn(&State, &Res, &InVal1, &InVal2); \
4431 a_TestType const Test = { State.FCW, State.FSW, Res.FSW, InVal1, InVal2, Res.r80Result }; \
4432 GenerateBinaryWrite(&BinOut, &Test, sizeof(Test)); \
4433 } \
4434 } \
4435 } \
4436 } \
4437 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE); \
4438 } \
4439 return RTEXITCODE_SUCCESS; \
4440} \
4441DUMP_ALL_FN(FpuBinary ## a_UpBits, a_aSubTests)
4442#else
4443# define GEN_FPU_BINARY_SMALL(a_fIntType, a_cBits, a_LoBits, a_UpBits, a_Type2, a_aSubTests, a_TestType)
4444#endif
4445
4446#define TEST_FPU_BINARY_SMALL(a_fIntType, a_cBits, a_LoBits, a_UpBits, a_I, a_Type2, a_SubTestType, a_aSubTests, a_TestType) \
4447TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPU ## a_UpBits); \
4448\
4449static a_SubTestType a_aSubTests[] = \
4450{ \
4451 ENTRY_BIN(RT_CONCAT4(f, a_I, add_r80_by_, a_LoBits)), \
4452 ENTRY_BIN(RT_CONCAT4(f, a_I, mul_r80_by_, a_LoBits)), \
4453 ENTRY_BIN(RT_CONCAT4(f, a_I, sub_r80_by_, a_LoBits)), \
4454 ENTRY_BIN(RT_CONCAT4(f, a_I, subr_r80_by_, a_LoBits)), \
4455 ENTRY_BIN(RT_CONCAT4(f, a_I, div_r80_by_, a_LoBits)), \
4456 ENTRY_BIN(RT_CONCAT4(f, a_I, divr_r80_by_, a_LoBits)), \
4457}; \
4458\
4459GEN_FPU_BINARY_SMALL(a_fIntType, a_cBits, a_LoBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
4460\
4461static void FpuBinary ## a_UpBits ## Test(void) \
4462{ \
4463 X86FXSTATE State; \
4464 RT_ZERO(State); \
4465 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
4466 { \
4467 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(a_aSubTests[iFn])) \
4468 continue; \
4469 \
4470 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
4471 uint32_t const cTests = a_aSubTests[iFn].cTests; \
4472 PFNIEMAIMPLFPU ## a_UpBits pfn = a_aSubTests[iFn].pfn; \
4473 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
4474 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
4475 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
4476 { \
4477 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
4478 { \
4479 RTFLOAT80U const InVal1 = paTests[iTest].InVal1; \
4480 a_Type2 const InVal2 = paTests[iTest].InVal2; \
4481 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
4482 State.FCW = paTests[iTest].fFcw; \
4483 State.FSW = paTests[iTest].fFswIn; \
4484 pfn(&State, &Res, &InVal1, &InVal2); \
4485 if ( Res.FSW != paTests[iTest].fFswOut \
4486 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal)) \
4487 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n" \
4488 "%s -> fsw=%#06x %s\n" \
4489 "%s expected %#06x %s%s%s (%s)\n", \
4490 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
4491 FormatR80(&paTests[iTest].InVal1), Format ## a_UpBits(&paTests[iTest].InVal2), \
4492 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result), \
4493 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].OutVal), \
4494 FswDiff(Res.FSW, paTests[iTest].fFswOut), \
4495 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal) ? " - val" : "", \
4496 FormatFcw(paTests[iTest].fFcw) ); \
4497 } \
4498 pfn = a_aSubTests[iFn].pfnNative; \
4499 } \
4500 FREE_DECOMPRESSED_TESTS(a_aSubTests[iFn]); \
4501 } \
4502}
4503
4504TEST_FPU_BINARY_SMALL(0, 64, r64, R64, RT_NOTHING, RTFLOAT64U, FPU_BINARY_R64_T, g_aFpuBinaryR64, FPU_BINARY_R64_TEST_T)
4505TEST_FPU_BINARY_SMALL(0, 32, r32, R32, RT_NOTHING, RTFLOAT32U, FPU_BINARY_R32_T, g_aFpuBinaryR32, FPU_BINARY_R32_TEST_T)
4506TEST_FPU_BINARY_SMALL(1, 32, i32, I32, i, int32_t, FPU_BINARY_I32_T, g_aFpuBinaryI32, FPU_BINARY_I32_TEST_T)
4507TEST_FPU_BINARY_SMALL(1, 16, i16, I16, i, int16_t, FPU_BINARY_I16_T, g_aFpuBinaryI16, FPU_BINARY_I16_TEST_T)
4508
4509
4510/*
4511 * Binary operations on 80-, 64- and 32-bit floating point only affecting FSW.
4512 */
4513#ifdef TSTIEMAIMPL_WITH_GENERATOR
4514static struct { RTFLOAT80U Val1, Val2; } const s_aFpuBinaryFswR80Specials[] =
4515{
4516 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
4517 RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS) }, /* whatever */
4518};
4519static struct { RTFLOAT80U Val1; RTFLOAT64U Val2; } const s_aFpuBinaryFswR64Specials[] =
4520{
4521 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
4522 RTFLOAT64U_INIT_C(0, 0xfeeeeddddcccc, RTFLOAT64U_EXP_BIAS) }, /* whatever */
4523};
4524static struct { RTFLOAT80U Val1; RTFLOAT32U Val2; } const s_aFpuBinaryFswR32Specials[] =
4525{
4526 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
4527 RTFLOAT32U_INIT_C(0, 0x7fffee, RTFLOAT32U_EXP_BIAS) }, /* whatever */
4528};
4529static struct { RTFLOAT80U Val1; int32_t Val2; } const s_aFpuBinaryFswI32Specials[] =
4530{
4531 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT32_MAX }, /* whatever */
4532};
4533static struct { RTFLOAT80U Val1; int16_t Val2; } const s_aFpuBinaryFswI16Specials[] =
4534{
4535 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT16_MAX }, /* whatever */
4536};
4537
4538# define GEN_FPU_BINARY_FSW(a_fIntType, a_cBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
4539static RTEXITCODE FpuBinaryFsw ## a_UpBits ## Generate(uint32_t cTests, const char * const *papszNameFmts) \
4540{ \
4541 cTests = RT_MAX(160, cTests); /* there are 144 standard input variations for r80 by r80 */ \
4542 \
4543 X86FXSTATE State; \
4544 RT_ZERO(State); \
4545 uint32_t cMinNormalPairs = (cTests - 144) / 4; \
4546 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
4547 { \
4548 IEMBINARYOUTPUT BinOut; \
4549 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, a_aSubTests[iFn]), RTEXITCODE_FAILURE); \
4550 uint32_t cNormalInputPairs = 0; \
4551 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aFpuBinaryFsw ## a_UpBits ## Specials); iTest += 1) \
4552 { \
4553 RTFLOAT80U const InVal1 = iTest < cTests ? RandR80Src1(iTest, a_cBits, a_fIntType) \
4554 : s_aFpuBinaryFsw ## a_UpBits ## Specials[iTest - cTests].Val1; \
4555 a_Type2 const InVal2 = iTest < cTests ? Rand ## a_UpBits ## Src2(iTest) \
4556 : s_aFpuBinaryFsw ## a_UpBits ## Specials[iTest - cTests].Val2; \
4557 if (RTFLOAT80U_IS_NORMAL(&InVal1) && a_Type2 ## _IS_NORMAL(&InVal2)) \
4558 cNormalInputPairs++; \
4559 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests) \
4560 { \
4561 iTest -= 1; \
4562 continue; \
4563 } \
4564 \
4565 uint16_t const fFcw = RandFcw(); \
4566 State.FSW = RandFsw(); \
4567 \
4568 /* Guess these aren't affected by precision or rounding, so just flip the exception mask. */ \
4569 for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL) \
4570 { \
4571 State.FCW = (fFcw & ~(X86_FCW_MASK_ALL)) | iMask; \
4572 uint16_t fFswOut = 0; \
4573 a_aSubTests[iFn].pfn(&State, &fFswOut, &InVal1, &InVal2); \
4574 a_TestType const Test = { State.FCW, State.FSW, fFswOut, InVal1, InVal2 }; \
4575 GenerateBinaryWrite(&BinOut, &Test, sizeof(Test)); \
4576 } \
4577 } \
4578 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE); \
4579 } \
4580 return RTEXITCODE_SUCCESS; \
4581} \
4582DUMP_ALL_FN(FpuBinaryFsw ## a_UpBits, a_aSubTests)
4583#else
4584# define GEN_FPU_BINARY_FSW(a_fIntType, a_cBits, a_UpBits, a_Type2, a_aSubTests, a_TestType)
4585#endif
4586
4587#define TEST_FPU_BINARY_FSW(a_fIntType, a_cBits, a_UpBits, a_Type2, a_SubTestType, a_aSubTests, a_TestType, ...) \
4588TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPU ## a_UpBits ## FSW); \
4589\
4590static a_SubTestType a_aSubTests[] = \
4591{ \
4592 __VA_ARGS__ \
4593}; \
4594\
4595GEN_FPU_BINARY_FSW(a_fIntType, a_cBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
4596\
4597static void FpuBinaryFsw ## a_UpBits ## Test(void) \
4598{ \
4599 X86FXSTATE State; \
4600 RT_ZERO(State); \
4601 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
4602 { \
4603 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(a_aSubTests[iFn])) \
4604 continue; \
4605 \
4606 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
4607 uint32_t const cTests = a_aSubTests[iFn].cTests; \
4608 PFNIEMAIMPLFPU ## a_UpBits ## FSW pfn = a_aSubTests[iFn].pfn; \
4609 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
4610 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
4611 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
4612 { \
4613 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
4614 { \
4615 uint16_t fFswOut = 0; \
4616 RTFLOAT80U const InVal1 = paTests[iTest].InVal1; \
4617 a_Type2 const InVal2 = paTests[iTest].InVal2; \
4618 State.FCW = paTests[iTest].fFcw; \
4619 State.FSW = paTests[iTest].fFswIn; \
4620 pfn(&State, &fFswOut, &InVal1, &InVal2); \
4621 if (fFswOut != paTests[iTest].fFswOut) \
4622 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n" \
4623 "%s -> fsw=%#06x\n" \
4624 "%s expected %#06x %s (%s)\n", \
4625 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
4626 FormatR80(&paTests[iTest].InVal1), Format ## a_UpBits(&paTests[iTest].InVal2), \
4627 iVar ? " " : "", fFswOut, \
4628 iVar ? " " : "", paTests[iTest].fFswOut, \
4629 FswDiff(fFswOut, paTests[iTest].fFswOut), FormatFcw(paTests[iTest].fFcw) ); \
4630 } \
4631 pfn = a_aSubTests[iFn].pfnNative; \
4632 } \
4633 FREE_DECOMPRESSED_TESTS(a_aSubTests[iFn]); \
4634 } \
4635}
4636
4637TEST_FPU_BINARY_FSW(0, 80, R80, RTFLOAT80U, FPU_BINARY_FSW_R80_T, g_aFpuBinaryFswR80, FPU_BINARY_R80_TEST_T, ENTRY_BIN(fcom_r80_by_r80), ENTRY_BIN(fucom_r80_by_r80))
4638TEST_FPU_BINARY_FSW(0, 64, R64, RTFLOAT64U, FPU_BINARY_FSW_R64_T, g_aFpuBinaryFswR64, FPU_BINARY_R64_TEST_T, ENTRY_BIN(fcom_r80_by_r64))
4639TEST_FPU_BINARY_FSW(0, 32, R32, RTFLOAT32U, FPU_BINARY_FSW_R32_T, g_aFpuBinaryFswR32, FPU_BINARY_R32_TEST_T, ENTRY_BIN(fcom_r80_by_r32))
4640TEST_FPU_BINARY_FSW(1, 32, I32, int32_t, FPU_BINARY_FSW_I32_T, g_aFpuBinaryFswI32, FPU_BINARY_I32_TEST_T, ENTRY_BIN(ficom_r80_by_i32))
4641TEST_FPU_BINARY_FSW(1, 16, I16, int16_t, FPU_BINARY_FSW_I16_T, g_aFpuBinaryFswI16, FPU_BINARY_I16_TEST_T, ENTRY_BIN(ficom_r80_by_i16))
4642
4643
4644/*
4645 * Binary operations on 80-bit floating point that effects only EFLAGS and possibly FSW.
4646 */
4647TYPEDEF_SUBTEST_TYPE(FPU_BINARY_EFL_R80_T, FPU_BINARY_EFL_R80_TEST_T, PFNIEMAIMPLFPUR80EFL);
4648
4649static FPU_BINARY_EFL_R80_T g_aFpuBinaryEflR80[] =
4650{
4651 ENTRY_BIN(fcomi_r80_by_r80),
4652 ENTRY_BIN(fucomi_r80_by_r80),
4653};
4654
4655#ifdef TSTIEMAIMPL_WITH_GENERATOR
4656static struct { RTFLOAT80U Val1, Val2; } const s_aFpuBinaryEflR80Specials[] =
4657{
4658 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
4659 RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS) }, /* whatever */
4660};
4661
4662static RTEXITCODE FpuBinaryEflR80Generate(uint32_t cTests, const char * const *papszNameFmts)
4663{
4664 cTests = RT_MAX(160, cTests); /* there are 144 standard input variations */
4665
4666 X86FXSTATE State;
4667 RT_ZERO(State);
4668 uint32_t cMinNormalPairs = (cTests - 144) / 4;
4669 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryEflR80); iFn++)
4670 {
4671 IEMBINARYOUTPUT BinOut;
4672 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aFpuBinaryEflR80[iFn]), RTEXITCODE_FAILURE);
4673 uint32_t cNormalInputPairs = 0;
4674 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aFpuBinaryEflR80Specials); iTest += 1)
4675 {
4676 RTFLOAT80U const InVal1 = iTest < cTests ? RandR80Src1(iTest) : s_aFpuBinaryEflR80Specials[iTest - cTests].Val1;
4677 RTFLOAT80U const InVal2 = iTest < cTests ? RandR80Src2(iTest) : s_aFpuBinaryEflR80Specials[iTest - cTests].Val2;
4678 if (RTFLOAT80U_IS_NORMAL(&InVal1) && RTFLOAT80U_IS_NORMAL(&InVal2))
4679 cNormalInputPairs++;
4680 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
4681 {
4682 iTest -= 1;
4683 continue;
4684 }
4685
4686 uint16_t const fFcw = RandFcw();
4687 State.FSW = RandFsw();
4688
4689 /* Guess these aren't affected by precision or rounding, so just flip the exception mask. */
4690 for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL)
4691 {
4692 State.FCW = (fFcw & ~(X86_FCW_MASK_ALL)) | iMask;
4693 uint16_t uFswOut = 0;
4694 uint32_t fEflOut = g_aFpuBinaryEflR80[iFn].pfn(&State, &uFswOut, &InVal1, &InVal2);
4695 FPU_BINARY_EFL_R80_TEST_T const Test = { State.FCW, State.FSW, uFswOut, InVal1, InVal2, fEflOut, };
4696 GenerateBinaryWrite(&BinOut, &Test, sizeof(Test));
4697 }
4698 }
4699 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
4700 }
4701 return RTEXITCODE_SUCCESS;
4702}
4703DUMP_ALL_FN(FpuBinaryEflR80, g_aFpuBinaryEflR80)
4704#endif /*TSTIEMAIMPL_WITH_GENERATOR*/
4705
4706static void FpuBinaryEflR80Test(void)
4707{
4708 X86FXSTATE State;
4709 RT_ZERO(State);
4710 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryEflR80); iFn++)
4711 {
4712 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aFpuBinaryEflR80[iFn]))
4713 continue;
4714
4715 FPU_BINARY_EFL_R80_TEST_T const * const paTests = g_aFpuBinaryEflR80[iFn].paTests;
4716 uint32_t const cTests = g_aFpuBinaryEflR80[iFn].cTests;
4717 PFNIEMAIMPLFPUR80EFL pfn = g_aFpuBinaryEflR80[iFn].pfn;
4718 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuBinaryEflR80[iFn]);
4719 if (!cTests) RTTestSkipped(g_hTest, "no tests");
4720 for (uint32_t iVar = 0; iVar < cVars; iVar++)
4721 {
4722 for (uint32_t iTest = 0; iTest < cTests; iTest++)
4723 {
4724 RTFLOAT80U const InVal1 = paTests[iTest].InVal1;
4725 RTFLOAT80U const InVal2 = paTests[iTest].InVal2;
4726 State.FCW = paTests[iTest].fFcw;
4727 State.FSW = paTests[iTest].fFswIn;
4728 uint16_t uFswOut = 0;
4729 uint32_t fEflOut = pfn(&State, &uFswOut, &InVal1, &InVal2);
4730 if ( uFswOut != paTests[iTest].fFswOut
4731 || fEflOut != paTests[iTest].fEflOut)
4732 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n"
4733 "%s -> fsw=%#06x efl=%#08x\n"
4734 "%s expected %#06x %#08x %s%s (%s)\n",
4735 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
4736 FormatR80(&paTests[iTest].InVal1), FormatR80(&paTests[iTest].InVal2),
4737 iVar ? " " : "", uFswOut, fEflOut,
4738 iVar ? " " : "", paTests[iTest].fFswOut, paTests[iTest].fEflOut,
4739 FswDiff(uFswOut, paTests[iTest].fFswOut), EFlagsDiff(fEflOut, paTests[iTest].fEflOut),
4740 FormatFcw(paTests[iTest].fFcw));
4741 }
4742 pfn = g_aFpuBinaryEflR80[iFn].pfnNative;
4743 }
4744
4745 FREE_DECOMPRESSED_TESTS(g_aFpuBinaryEflR80[iFn]);
4746 }
4747}
4748
4749
4750/*********************************************************************************************************************************
4751* x87 FPU Unary Operations *
4752*********************************************************************************************************************************/
4753
4754/*
4755 * Unary FPU operations on one 80-bit floating point value.
4756 *
4757 * Note! The FCW reserved bit 7 is used to indicate whether a test may produce
4758 * a rounding error or not.
4759 */
4760TYPEDEF_SUBTEST_TYPE(FPU_UNARY_R80_T, FPU_UNARY_R80_TEST_T, PFNIEMAIMPLFPUR80UNARY);
4761
4762enum { kUnary_Accurate = 0, kUnary_Accurate_Trigonometry /*probably not accurate, but need impl to know*/, kUnary_Rounding_F2xm1 };
4763static FPU_UNARY_R80_T g_aFpuUnaryR80[] =
4764{
4765 ENTRY_BIN_EX( fabs_r80, kUnary_Accurate),
4766 ENTRY_BIN_EX( fchs_r80, kUnary_Accurate),
4767 ENTRY_BIN_AMD_EX( f2xm1_r80, 0, kUnary_Accurate), // C1 differs for -1m0x3fb263cc2c331e15^-2654 (different ln2 constant?)
4768 ENTRY_BIN_INTEL_EX(f2xm1_r80, 0, kUnary_Rounding_F2xm1),
4769 ENTRY_BIN_EX( fsqrt_r80, kUnary_Accurate),
4770 ENTRY_BIN_EX( frndint_r80, kUnary_Accurate),
4771 ENTRY_BIN_AMD_EX( fsin_r80, 0, kUnary_Accurate_Trigonometry), // value & C1 differences for pseudo denormals and others (e.g. -1m0x2b1e5683cbca5725^-3485)
4772 ENTRY_BIN_INTEL_EX(fsin_r80, 0, kUnary_Accurate_Trigonometry),
4773 ENTRY_BIN_AMD_EX( fcos_r80, 0, kUnary_Accurate_Trigonometry), // value & C1 differences
4774 ENTRY_BIN_INTEL_EX(fcos_r80, 0, kUnary_Accurate_Trigonometry),
4775};
4776
4777#ifdef TSTIEMAIMPL_WITH_GENERATOR
4778
4779static bool FpuUnaryR80MayHaveRoundingError(PCRTFLOAT80U pr80Val, int enmKind)
4780{
4781 if ( enmKind == kUnary_Rounding_F2xm1
4782 && RTFLOAT80U_IS_NORMAL(pr80Val)
4783 && pr80Val->s.uExponent < RTFLOAT80U_EXP_BIAS
4784 && pr80Val->s.uExponent >= RTFLOAT80U_EXP_BIAS - 69)
4785 return true;
4786 return false;
4787}
4788
4789DUMP_ALL_FN(FpuUnaryR80, g_aFpuUnaryR80)
4790static RTEXITCODE FpuUnaryR80Generate(uint32_t cTests, const char * const *papszNameFmts)
4791{
4792 static RTFLOAT80U const s_aSpecials[] =
4793 {
4794 RTFLOAT80U_INIT_C(0, 0x8000000000000000, RTFLOAT80U_EXP_BIAS - 1), /* 0.5 (for f2xm1) */
4795 RTFLOAT80U_INIT_C(1, 0x8000000000000000, RTFLOAT80U_EXP_BIAS - 1), /* -0.5 (for f2xm1) */
4796 RTFLOAT80U_INIT_C(0, 0x8000000000000000, RTFLOAT80U_EXP_BIAS), /* 1.0 (for f2xm1) */
4797 RTFLOAT80U_INIT_C(1, 0x8000000000000000, RTFLOAT80U_EXP_BIAS), /* -1.0 (for f2xm1) */
4798 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0), /* +1.0^-16382 */
4799 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 0), /* -1.0^-16382 */
4800 RTFLOAT80U_INIT_C(0, 0xc000000000000000, 0), /* +1.1^-16382 */
4801 RTFLOAT80U_INIT_C(1, 0xc000000000000000, 0), /* -1.1^-16382 */
4802 RTFLOAT80U_INIT_C(0, 0xc000100000000000, 0), /* +1.1xxx1^-16382 */
4803 RTFLOAT80U_INIT_C(1, 0xc000100000000000, 0), /* -1.1xxx1^-16382 */
4804 };
4805 X86FXSTATE State;
4806 RT_ZERO(State);
4807 uint32_t cMinNormals = cTests / 4;
4808 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryR80); iFn++)
4809 {
4810 PFNIEMAIMPLFPUR80UNARY const pfn = g_aFpuUnaryR80[iFn].pfnNative ? g_aFpuUnaryR80[iFn].pfnNative : g_aFpuUnaryR80[iFn].pfn;
4811 if ( g_aFpuUnaryR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE
4812 && g_aFpuUnaryR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
4813 continue;
4814
4815 IEMBINARYOUTPUT BinOut;
4816 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aFpuUnaryR80[iFn]), RTEXITCODE_FAILURE);
4817 uint32_t cNormalInputs = 0;
4818 uint32_t cTargetRangeInputs = 0;
4819 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
4820 {
4821 RTFLOAT80U InVal = iTest < cTests ? RandR80Src(iTest) : s_aSpecials[iTest - cTests];
4822 if (RTFLOAT80U_IS_NORMAL(&InVal))
4823 {
4824 if (g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1)
4825 {
4826 unsigned uTargetExp = g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1
4827 ? RTFLOAT80U_EXP_BIAS /* 2^0..2^-69 */ : RTFLOAT80U_EXP_BIAS + 63 + 1 /* 2^64..2^-64 */;
4828 unsigned cTargetExp = g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1 ? 69 : 63*2 + 2;
4829 if (InVal.s.uExponent <= uTargetExp && InVal.s.uExponent >= uTargetExp - cTargetExp)
4830 cTargetRangeInputs++;
4831 else if (cTargetRangeInputs < cMinNormals / 2 && iTest + cMinNormals / 2 >= cTests && iTest < cTests)
4832 {
4833 InVal.s.uExponent = RTRandU32Ex(uTargetExp - cTargetExp, uTargetExp);
4834 cTargetRangeInputs++;
4835 }
4836 }
4837 cNormalInputs++;
4838 }
4839 else if (cNormalInputs < cMinNormals && iTest + cMinNormals >= cTests && iTest < cTests)
4840 {
4841 iTest -= 1;
4842 continue;
4843 }
4844
4845 uint16_t const fFcwExtra = FpuUnaryR80MayHaveRoundingError(&InVal, g_aFpuUnaryR80[iFn].uExtra) ? 0x80 : 0;
4846 uint16_t const fFcw = RandFcw();
4847 State.FSW = RandFsw();
4848
4849 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
4850 for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
4851 {
4852 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_PC_MASK | X86_FCW_MASK_ALL))
4853 | (iRounding << X86_FCW_RC_SHIFT)
4854 | (iPrecision << X86_FCW_PC_SHIFT)
4855 | X86_FCW_MASK_ALL;
4856 IEMFPURESULT ResM = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4857 pfn(&State, &ResM, &InVal);
4858 FPU_UNARY_R80_TEST_T const TestM
4859 = { (uint16_t)(State.FCW | fFcwExtra), State.FSW, ResM.FSW, InVal, ResM.r80Result };
4860 GenerateBinaryWrite(&BinOut, &TestM, sizeof(TestM));
4861
4862 State.FCW = State.FCW & ~X86_FCW_MASK_ALL;
4863 IEMFPURESULT ResU = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4864 pfn(&State, &ResU, &InVal);
4865 FPU_UNARY_R80_TEST_T const TestU
4866 = { (uint16_t)(State.FCW | fFcwExtra), State.FSW, ResU.FSW, InVal, ResU.r80Result };
4867 GenerateBinaryWrite(&BinOut, &TestU, sizeof(TestU));
4868
4869 uint16_t fXcpt = (ResM.FSW | ResU.FSW) & X86_FSW_XCPT_MASK & ~X86_FSW_SF;
4870 if (fXcpt)
4871 {
4872 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
4873 IEMFPURESULT Res1 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4874 pfn(&State, &Res1, &InVal);
4875 FPU_UNARY_R80_TEST_T const Test1
4876 = { (uint16_t)(State.FCW | fFcwExtra), State.FSW, Res1.FSW, InVal, Res1.r80Result };
4877 GenerateBinaryWrite(&BinOut, &Test1, sizeof(Test1));
4878 if (((Res1.FSW & X86_FSW_XCPT_MASK) & fXcpt) != (Res1.FSW & X86_FSW_XCPT_MASK))
4879 {
4880 fXcpt |= Res1.FSW & X86_FSW_XCPT_MASK;
4881 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
4882 IEMFPURESULT Res2 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4883 pfn(&State, &Res2, &InVal);
4884 FPU_UNARY_R80_TEST_T const Test2
4885 = { (uint16_t)(State.FCW | fFcwExtra), State.FSW, Res2.FSW, InVal, Res2.r80Result };
4886 GenerateBinaryWrite(&BinOut, &Test2, sizeof(Test2));
4887 }
4888 if (!RT_IS_POWER_OF_TWO(fXcpt))
4889 for (uint16_t fUnmasked = 1; fUnmasked <= X86_FCW_PM; fUnmasked <<= 1)
4890 if (fUnmasked & fXcpt)
4891 {
4892 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | (fXcpt & ~fUnmasked);
4893 IEMFPURESULT Res3 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4894 pfn(&State, &Res3, &InVal);
4895 FPU_UNARY_R80_TEST_T const Test3
4896 = { (uint16_t)(State.FCW | fFcwExtra), State.FSW, Res3.FSW, InVal, Res3.r80Result };
4897 GenerateBinaryWrite(&BinOut, &Test3, sizeof(Test3));
4898 }
4899 }
4900 }
4901 }
4902 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
4903 }
4904 return RTEXITCODE_SUCCESS;
4905}
4906#endif
4907
4908static bool FpuIsEqualFcwMaybeIgnoreRoundErr(uint16_t fFcw1, uint16_t fFcw2, bool fRndErrOk, bool *pfRndErr)
4909{
4910 if (fFcw1 == fFcw2)
4911 return true;
4912 if (fRndErrOk && (fFcw1 & ~X86_FSW_C1) == (fFcw2 & ~X86_FSW_C1))
4913 {
4914 *pfRndErr = true;
4915 return true;
4916 }
4917 return false;
4918}
4919
4920static bool FpuIsEqualR80MaybeIgnoreRoundErr(PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2, bool fRndErrOk, bool *pfRndErr)
4921{
4922 if (RTFLOAT80U_ARE_IDENTICAL(pr80Val1, pr80Val2))
4923 return true;
4924 if ( fRndErrOk
4925 && pr80Val1->s.fSign == pr80Val2->s.fSign)
4926 {
4927 if ( ( pr80Val1->s.uExponent == pr80Val2->s.uExponent
4928 && ( pr80Val1->s.uMantissa > pr80Val2->s.uMantissa
4929 ? pr80Val1->s.uMantissa - pr80Val2->s.uMantissa == 1
4930 : pr80Val2->s.uMantissa - pr80Val1->s.uMantissa == 1))
4931 ||
4932 ( pr80Val1->s.uExponent + 1 == pr80Val2->s.uExponent
4933 && pr80Val1->s.uMantissa == UINT64_MAX
4934 && pr80Val2->s.uMantissa == RT_BIT_64(63))
4935 ||
4936 ( pr80Val1->s.uExponent == pr80Val2->s.uExponent + 1
4937 && pr80Val2->s.uMantissa == UINT64_MAX
4938 && pr80Val1->s.uMantissa == RT_BIT_64(63)) )
4939 {
4940 *pfRndErr = true;
4941 return true;
4942 }
4943 }
4944 return false;
4945}
4946
4947
4948static void FpuUnaryR80Test(void)
4949{
4950 X86FXSTATE State;
4951 RT_ZERO(State);
4952 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryR80); iFn++)
4953 {
4954 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aFpuUnaryR80[iFn]))
4955 continue;
4956
4957 FPU_UNARY_R80_TEST_T const * const paTests = g_aFpuUnaryR80[iFn].paTests;
4958 uint32_t const cTests = g_aFpuUnaryR80[iFn].cTests;
4959 PFNIEMAIMPLFPUR80UNARY pfn = g_aFpuUnaryR80[iFn].pfn;
4960 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuUnaryR80[iFn]);
4961 uint32_t cRndErrs = 0;
4962 uint32_t cPossibleRndErrs = 0;
4963 if (!cTests) RTTestSkipped(g_hTest, "no tests");
4964 for (uint32_t iVar = 0; iVar < cVars; iVar++)
4965 {
4966 for (uint32_t iTest = 0; iTest < cTests; iTest++)
4967 {
4968 RTFLOAT80U const InVal = paTests[iTest].InVal;
4969 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4970 bool const fRndErrOk = RT_BOOL(paTests[iTest].fFcw & 0x80);
4971 State.FCW = paTests[iTest].fFcw & ~(uint16_t)0x80;
4972 State.FSW = paTests[iTest].fFswIn;
4973 pfn(&State, &Res, &InVal);
4974 bool fRndErr = false;
4975 if ( !FpuIsEqualFcwMaybeIgnoreRoundErr(Res.FSW, paTests[iTest].fFswOut, fRndErrOk, &fRndErr)
4976 || !FpuIsEqualR80MaybeIgnoreRoundErr(&Res.r80Result, &paTests[iTest].OutVal, fRndErrOk, &fRndErr))
4977 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
4978 "%s -> fsw=%#06x %s\n"
4979 "%s expected %#06x %s%s%s%s (%s)\n",
4980 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
4981 FormatR80(&paTests[iTest].InVal),
4982 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result),
4983 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].OutVal),
4984 FswDiff(Res.FSW, paTests[iTest].fFswOut),
4985 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal) ? " - val" : "",
4986 fRndErrOk ? " - rounding errors ok" : "", FormatFcw(paTests[iTest].fFcw));
4987 cRndErrs += fRndErr;
4988 cPossibleRndErrs += fRndErrOk;
4989 }
4990 pfn = g_aFpuUnaryR80[iFn].pfnNative;
4991 }
4992 if (cPossibleRndErrs > 0)
4993 RTTestPrintf(g_hTest, RTTESTLVL_ALWAYS, "rounding errors: %u out of %u\n", cRndErrs, cPossibleRndErrs);
4994 FREE_DECOMPRESSED_TESTS(g_aFpuUnaryR80[iFn]);
4995 }
4996}
4997
4998
4999/*
5000 * Unary FPU operations on one 80-bit floating point value, but only affects the FSW.
5001 */
5002TYPEDEF_SUBTEST_TYPE(FPU_UNARY_FSW_R80_T, FPU_UNARY_R80_TEST_T, PFNIEMAIMPLFPUR80UNARYFSW);
5003
5004static FPU_UNARY_FSW_R80_T g_aFpuUnaryFswR80[] =
5005{
5006 ENTRY_BIN(ftst_r80),
5007 ENTRY_BIN_EX(fxam_r80, 1),
5008};
5009
5010#ifdef TSTIEMAIMPL_WITH_GENERATOR
5011static RTEXITCODE FpuUnaryFswR80Generate(uint32_t cTests, const char * const *papszNameFmts)
5012{
5013 static RTFLOAT80U const s_aSpecials[] =
5014 {
5015 RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), /* whatever */
5016 };
5017
5018 X86FXSTATE State;
5019 RT_ZERO(State);
5020 uint32_t cMinNormals = cTests / 4;
5021 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryFswR80); iFn++)
5022 {
5023 bool const fIsFxam = g_aFpuUnaryFswR80[iFn].uExtra == 1;
5024 PFNIEMAIMPLFPUR80UNARYFSW const pfn = g_aFpuUnaryFswR80[iFn].pfnNative ? g_aFpuUnaryFswR80[iFn].pfnNative : g_aFpuUnaryFswR80[iFn].pfn;
5025 if ( g_aFpuUnaryFswR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE
5026 && g_aFpuUnaryFswR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
5027 continue;
5028 State.FTW = 0;
5029
5030 IEMBINARYOUTPUT BinOut;
5031 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aFpuUnaryFswR80[iFn]), RTEXITCODE_FAILURE);
5032 uint32_t cNormalInputs = 0;
5033 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
5034 {
5035 RTFLOAT80U const InVal = iTest < cTests ? RandR80Src(iTest) : s_aSpecials[iTest - cTests];
5036 if (RTFLOAT80U_IS_NORMAL(&InVal))
5037 cNormalInputs++;
5038 else if (cNormalInputs < cMinNormals && iTest + cMinNormals >= cTests && iTest < cTests)
5039 {
5040 iTest -= 1;
5041 continue;
5042 }
5043
5044 uint16_t const fFcw = RandFcw();
5045 State.FSW = RandFsw();
5046 if (!fIsFxam)
5047 {
5048 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
5049 {
5050 for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
5051 {
5052 for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL)
5053 {
5054 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_PC_MASK | X86_FCW_MASK_ALL))
5055 | (iRounding << X86_FCW_RC_SHIFT)
5056 | (iPrecision << X86_FCW_PC_SHIFT)
5057 | iMask;
5058 uint16_t fFswOut = 0;
5059 pfn(&State, &fFswOut, &InVal);
5060 FPU_UNARY_R80_TEST_T const Test = { State.FCW, State.FSW, fFswOut, InVal };
5061 GenerateBinaryWrite(&BinOut, &Test, sizeof(Test));
5062 }
5063 }
5064 }
5065 }
5066 else
5067 {
5068 uint16_t fFswOut = 0;
5069 uint16_t const fEmpty = RTRandU32Ex(0, 3) == 3 ? 0x80 : 0; /* Using MBZ bit 7 in FCW to indicate empty tag value. */
5070 State.FTW = !fEmpty ? 1 << X86_FSW_TOP_GET(State.FSW) : 0;
5071 State.FCW = fFcw;
5072 pfn(&State, &fFswOut, &InVal);
5073 FPU_UNARY_R80_TEST_T const Test = { (uint16_t)(fFcw | fEmpty), State.FSW, fFswOut, InVal };
5074 GenerateBinaryWrite(&BinOut, &Test, sizeof(Test));
5075 }
5076 }
5077 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
5078 }
5079 return RTEXITCODE_SUCCESS;
5080}
5081DUMP_ALL_FN(FpuUnaryFswR80, g_aFpuUnaryFswR80)
5082#endif
5083
5084
5085static void FpuUnaryFswR80Test(void)
5086{
5087 X86FXSTATE State;
5088 RT_ZERO(State);
5089 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryFswR80); iFn++)
5090 {
5091 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aFpuUnaryFswR80[iFn]))
5092 continue;
5093
5094 FPU_UNARY_R80_TEST_T const * const paTests = g_aFpuUnaryFswR80[iFn].paTests;
5095 uint32_t const cTests = g_aFpuUnaryFswR80[iFn].cTests;
5096 PFNIEMAIMPLFPUR80UNARYFSW pfn = g_aFpuUnaryFswR80[iFn].pfn;
5097 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuUnaryFswR80[iFn]);
5098 if (!cTests) RTTestSkipped(g_hTest, "no tests");
5099 for (uint32_t iVar = 0; iVar < cVars; iVar++)
5100 {
5101 for (uint32_t iTest = 0; iTest < cTests; iTest++)
5102 {
5103 RTFLOAT80U const InVal = paTests[iTest].InVal;
5104 uint16_t fFswOut = 0;
5105 State.FSW = paTests[iTest].fFswIn;
5106 State.FCW = paTests[iTest].fFcw & ~(uint16_t)0x80; /* see generator code */
5107 State.FTW = paTests[iTest].fFcw & 0x80 ? 0 : 1 << X86_FSW_TOP_GET(paTests[iTest].fFswIn);
5108 pfn(&State, &fFswOut, &InVal);
5109 if (fFswOut != paTests[iTest].fFswOut)
5110 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
5111 "%s -> fsw=%#06x\n"
5112 "%s expected %#06x %s (%s%s)\n",
5113 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
5114 FormatR80(&paTests[iTest].InVal),
5115 iVar ? " " : "", fFswOut,
5116 iVar ? " " : "", paTests[iTest].fFswOut,
5117 FswDiff(fFswOut, paTests[iTest].fFswOut), FormatFcw(paTests[iTest].fFcw),
5118 paTests[iTest].fFcw & 0x80 ? " empty" : "");
5119 }
5120 pfn = g_aFpuUnaryFswR80[iFn].pfnNative;
5121 }
5122
5123 FREE_DECOMPRESSED_TESTS(g_aFpuUnaryFswR80[iFn]);
5124 }
5125}
5126
5127/*
5128 * Unary FPU operations on one 80-bit floating point value, but with two outputs.
5129 */
5130TYPEDEF_SUBTEST_TYPE(FPU_UNARY_TWO_R80_T, FPU_UNARY_TWO_R80_TEST_T, PFNIEMAIMPLFPUR80UNARYTWO);
5131
5132static FPU_UNARY_TWO_R80_T g_aFpuUnaryTwoR80[] =
5133{
5134 ENTRY_BIN(fxtract_r80_r80),
5135 ENTRY_BIN_AMD( fptan_r80_r80, 0), // rounding differences
5136 ENTRY_BIN_INTEL(fptan_r80_r80, 0),
5137 ENTRY_BIN_AMD( fsincos_r80_r80, 0), // C1 differences & value differences (e.g. -1m0x235cf2f580244a27^-1696)
5138 ENTRY_BIN_INTEL(fsincos_r80_r80, 0),
5139};
5140
5141#ifdef TSTIEMAIMPL_WITH_GENERATOR
5142static RTEXITCODE FpuUnaryTwoR80Generate(uint32_t cTests, const char * const *papszNameFmts)
5143{
5144 static RTFLOAT80U const s_aSpecials[] =
5145 {
5146 RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), /* whatever */
5147 };
5148
5149 X86FXSTATE State;
5150 RT_ZERO(State);
5151 uint32_t cMinNormals = cTests / 4;
5152 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryTwoR80); iFn++)
5153 {
5154 PFNIEMAIMPLFPUR80UNARYTWO const pfn = g_aFpuUnaryTwoR80[iFn].pfnNative ? g_aFpuUnaryTwoR80[iFn].pfnNative : g_aFpuUnaryTwoR80[iFn].pfn;
5155 if ( g_aFpuUnaryTwoR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE
5156 && g_aFpuUnaryTwoR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
5157 continue;
5158
5159 IEMBINARYOUTPUT BinOut;
5160 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aFpuUnaryTwoR80[iFn]), RTEXITCODE_FAILURE);
5161 uint32_t cNormalInputs = 0;
5162 uint32_t cTargetRangeInputs = 0;
5163 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
5164 {
5165 RTFLOAT80U InVal = iTest < cTests ? RandR80Src(iTest) : s_aSpecials[iTest - cTests];
5166 if (RTFLOAT80U_IS_NORMAL(&InVal))
5167 {
5168 if (iFn != 0)
5169 {
5170 unsigned uTargetExp = RTFLOAT80U_EXP_BIAS + 63 + 1 /* 2^64..2^-64 */;
5171 unsigned cTargetExp = g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1 ? 69 : 63*2 + 2;
5172 if (InVal.s.uExponent <= uTargetExp && InVal.s.uExponent >= uTargetExp - cTargetExp)
5173 cTargetRangeInputs++;
5174 else if (cTargetRangeInputs < cMinNormals / 2 && iTest + cMinNormals / 2 >= cTests && iTest < cTests)
5175 {
5176 InVal.s.uExponent = RTRandU32Ex(uTargetExp - cTargetExp, uTargetExp);
5177 cTargetRangeInputs++;
5178 }
5179 }
5180 cNormalInputs++;
5181 }
5182 else if (cNormalInputs < cMinNormals && iTest + cMinNormals >= cTests && iTest < cTests)
5183 {
5184 iTest -= 1;
5185 continue;
5186 }
5187
5188 uint16_t const fFcwExtra = 0; /* for rounding error indication */
5189 uint16_t const fFcw = RandFcw();
5190 State.FSW = RandFsw();
5191
5192 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
5193 for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
5194 {
5195 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_PC_MASK | X86_FCW_MASK_ALL))
5196 | (iRounding << X86_FCW_RC_SHIFT)
5197 | (iPrecision << X86_FCW_PC_SHIFT)
5198 | X86_FCW_MASK_ALL;
5199 IEMFPURESULTTWO ResM = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
5200 pfn(&State, &ResM, &InVal);
5201 FPU_UNARY_TWO_R80_TEST_T const TestM
5202 = { (uint16_t)(State.FCW | fFcwExtra), State.FSW, ResM.FSW, InVal, ResM.r80Result1, ResM.r80Result2 };
5203 GenerateBinaryWrite(&BinOut, &TestM, sizeof(TestM));
5204
5205 State.FCW = State.FCW & ~X86_FCW_MASK_ALL;
5206 IEMFPURESULTTWO ResU = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
5207 pfn(&State, &ResU, &InVal);
5208 FPU_UNARY_TWO_R80_TEST_T const TestU
5209 = { (uint16_t)(State.FCW | fFcwExtra), State.FSW, ResU.FSW, InVal, ResU.r80Result1, ResU.r80Result2 };
5210 GenerateBinaryWrite(&BinOut, &TestU, sizeof(TestU));
5211
5212 uint16_t fXcpt = (ResM.FSW | ResU.FSW) & X86_FSW_XCPT_MASK & ~X86_FSW_SF;
5213 if (fXcpt)
5214 {
5215 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
5216 IEMFPURESULTTWO Res1 = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
5217 pfn(&State, &Res1, &InVal);
5218 FPU_UNARY_TWO_R80_TEST_T const Test1
5219 = { (uint16_t)(State.FCW | fFcwExtra), State.FSW, Res1.FSW, InVal, Res1.r80Result1, Res1.r80Result2 };
5220 GenerateBinaryWrite(&BinOut, &Test1, sizeof(Test1));
5221
5222 if (((Res1.FSW & X86_FSW_XCPT_MASK) & fXcpt) != (Res1.FSW & X86_FSW_XCPT_MASK))
5223 {
5224 fXcpt |= Res1.FSW & X86_FSW_XCPT_MASK;
5225 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
5226 IEMFPURESULTTWO Res2 = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
5227 pfn(&State, &Res2, &InVal);
5228 FPU_UNARY_TWO_R80_TEST_T const Test2
5229 = { (uint16_t)(State.FCW | fFcwExtra), State.FSW, Res2.FSW, InVal, Res2.r80Result1, Res2.r80Result2 };
5230 GenerateBinaryWrite(&BinOut, &Test2, sizeof(Test2));
5231 }
5232 if (!RT_IS_POWER_OF_TWO(fXcpt))
5233 for (uint16_t fUnmasked = 1; fUnmasked <= X86_FCW_PM; fUnmasked <<= 1)
5234 if (fUnmasked & fXcpt)
5235 {
5236 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | (fXcpt & ~fUnmasked);
5237 IEMFPURESULTTWO Res3 = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
5238 pfn(&State, &Res3, &InVal);
5239 FPU_UNARY_TWO_R80_TEST_T const Test3
5240 = { (uint16_t)(State.FCW | fFcwExtra), State.FSW, Res3.FSW, InVal, Res3.r80Result1, Res3.r80Result2 };
5241 GenerateBinaryWrite(&BinOut, &Test3, sizeof(Test3));
5242 }
5243 }
5244 }
5245 }
5246 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
5247 }
5248 return RTEXITCODE_SUCCESS;
5249}
5250DUMP_ALL_FN(FpuUnaryTwoR80, g_aFpuUnaryTwoR80)
5251#endif
5252
5253
5254static void FpuUnaryTwoR80Test(void)
5255{
5256 X86FXSTATE State;
5257 RT_ZERO(State);
5258 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryTwoR80); iFn++)
5259 {
5260 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aFpuUnaryTwoR80[iFn]))
5261 continue;
5262
5263 FPU_UNARY_TWO_R80_TEST_T const * const paTests = g_aFpuUnaryTwoR80[iFn].paTests;
5264 uint32_t const cTests = g_aFpuUnaryTwoR80[iFn].cTests;
5265 PFNIEMAIMPLFPUR80UNARYTWO pfn = g_aFpuUnaryTwoR80[iFn].pfn;
5266 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuUnaryTwoR80[iFn]);
5267 if (!cTests) RTTestSkipped(g_hTest, "no tests");
5268 for (uint32_t iVar = 0; iVar < cVars; iVar++)
5269 {
5270 for (uint32_t iTest = 0; iTest < cTests; iTest++)
5271 {
5272 IEMFPURESULTTWO Res = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
5273 RTFLOAT80U const InVal = paTests[iTest].InVal;
5274 State.FCW = paTests[iTest].fFcw;
5275 State.FSW = paTests[iTest].fFswIn;
5276 pfn(&State, &Res, &InVal);
5277 if ( Res.FSW != paTests[iTest].fFswOut
5278 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result1, &paTests[iTest].OutVal1)
5279 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result2, &paTests[iTest].OutVal2) )
5280 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
5281 "%s -> fsw=%#06x %s %s\n"
5282 "%s expected %#06x %s %s %s%s%s (%s)\n",
5283 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
5284 FormatR80(&paTests[iTest].InVal),
5285 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result1), FormatR80(&Res.r80Result2),
5286 iVar ? " " : "", paTests[iTest].fFswOut,
5287 FormatR80(&paTests[iTest].OutVal1), FormatR80(&paTests[iTest].OutVal2),
5288 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result1, &paTests[iTest].OutVal1) ? " - val1" : "",
5289 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result2, &paTests[iTest].OutVal2) ? " - val2" : "",
5290 FswDiff(Res.FSW, paTests[iTest].fFswOut), FormatFcw(paTests[iTest].fFcw) );
5291 }
5292 pfn = g_aFpuUnaryTwoR80[iFn].pfnNative;
5293 }
5294
5295 FREE_DECOMPRESSED_TESTS(g_aFpuUnaryTwoR80[iFn]);
5296 }
5297}
5298
5299
5300/*********************************************************************************************************************************
5301* SSE floating point Binary Operations *
5302*********************************************************************************************************************************/
5303
5304/*
5305 * Binary SSE operations on packed single precision floating point values.
5306 */
5307TYPEDEF_SUBTEST_TYPE(SSE_BINARY_R32_T, SSE_BINARY_TEST_T, PFNIEMAIMPLFPSSEF2U128);
5308
5309static SSE_BINARY_R32_T g_aSseBinaryR32[] =
5310{
5311 ENTRY_BIN(addps_u128),
5312 ENTRY_BIN(mulps_u128),
5313 ENTRY_BIN(subps_u128),
5314 ENTRY_BIN(minps_u128),
5315 ENTRY_BIN(divps_u128),
5316 ENTRY_BIN(maxps_u128),
5317 ENTRY_BIN(haddps_u128),
5318 ENTRY_BIN(hsubps_u128),
5319 ENTRY_BIN(sqrtps_u128),
5320 ENTRY_BIN(addsubps_u128),
5321 ENTRY_BIN(cvtps2pd_u128),
5322};
5323
5324#ifdef TSTIEMAIMPL_WITH_GENERATOR
5325DUMP_ALL_FN(SseBinaryR32, g_aSseBinaryR32)
5326static RTEXITCODE SseBinaryR32Generate(uint32_t cTests, const char * const *papszNameFmts)
5327{
5328 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
5329
5330 static struct { RTFLOAT32U aVal1[4], aVal2[4]; } const s_aSpecials[] =
5331 {
5332 { { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), },
5333 { RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1), RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1), RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1), RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1) } },
5334 /** @todo More specials. */
5335 };
5336
5337 X86FXSTATE State;
5338 RT_ZERO(State);
5339 uint32_t cMinNormalPairs = (cTests - 144) / 4;
5340 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR32); iFn++)
5341 {
5342 PFNIEMAIMPLFPSSEF2U128 const pfn = g_aSseBinaryR32[iFn].pfnNative ? g_aSseBinaryR32[iFn].pfnNative : g_aSseBinaryR32[iFn].pfn;
5343
5344 IEMBINARYOUTPUT BinOut;
5345 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseBinaryR32[iFn]), RTEXITCODE_FAILURE);
5346
5347 uint32_t cNormalInputPairs = 0;
5348 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
5349 {
5350 SSE_BINARY_TEST_T TestData; RT_ZERO(TestData);
5351
5352 TestData.InVal1.ar32[0] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
5353 TestData.InVal1.ar32[1] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
5354 TestData.InVal1.ar32[2] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[2];
5355 TestData.InVal1.ar32[3] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[3];
5356
5357 TestData.InVal2.ar32[0] = iTest < cTests ? RandR32Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[0];
5358 TestData.InVal2.ar32[1] = iTest < cTests ? RandR32Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[1];
5359 TestData.InVal2.ar32[2] = iTest < cTests ? RandR32Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[2];
5360 TestData.InVal2.ar32[3] = iTest < cTests ? RandR32Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[3];
5361
5362 if ( RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[0]) && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[0])
5363 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[1]) && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[1])
5364 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[2]) && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[2])
5365 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[3]) && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[3]))
5366 cNormalInputPairs++;
5367 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
5368 {
5369 iTest -= 1;
5370 continue;
5371 }
5372
5373 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
5374 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
5375 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
5376 for (uint8_t iFz = 0; iFz < 2; iFz++)
5377 {
5378 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
5379 | (iRounding << X86_MXCSR_RC_SHIFT)
5380 | (iDaz ? X86_MXCSR_DAZ : 0)
5381 | (iFz ? X86_MXCSR_FZ : 0)
5382 | X86_MXCSR_XCPT_MASK;
5383 IEMSSERESULT ResM; RT_ZERO(ResM);
5384 pfn(&State, &ResM, &TestData.InVal1, &TestData.InVal2);
5385 TestData.fMxcsrIn = State.MXCSR;
5386 TestData.fMxcsrOut = ResM.MXCSR;
5387 TestData.OutVal = ResM.uResult;
5388 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5389
5390 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
5391 IEMSSERESULT ResU; RT_ZERO(ResU);
5392 pfn(&State, &ResU, &TestData.InVal1, &TestData.InVal2);
5393 TestData.fMxcsrIn = State.MXCSR;
5394 TestData.fMxcsrOut = ResU.MXCSR;
5395 TestData.OutVal = ResU.uResult;
5396 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5397
5398 uint16_t fXcpt = (ResM.MXCSR | ResU.MXCSR) & X86_MXCSR_XCPT_FLAGS;
5399 if (fXcpt)
5400 {
5401 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
5402 IEMSSERESULT Res1; RT_ZERO(Res1);
5403 pfn(&State, &Res1, &TestData.InVal1, &TestData.InVal2);
5404 TestData.fMxcsrIn = State.MXCSR;
5405 TestData.fMxcsrOut = Res1.MXCSR;
5406 TestData.OutVal = Res1.uResult;
5407 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5408
5409 if (((Res1.MXCSR & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (Res1.MXCSR & X86_MXCSR_XCPT_FLAGS))
5410 {
5411 fXcpt |= Res1.MXCSR & X86_MXCSR_XCPT_FLAGS;
5412 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
5413 IEMSSERESULT Res2; RT_ZERO(Res2);
5414 pfn(&State, &Res2, &TestData.InVal1, &TestData.InVal2);
5415 TestData.fMxcsrIn = State.MXCSR;
5416 TestData.fMxcsrOut = Res2.MXCSR;
5417 TestData.OutVal = Res2.uResult;
5418 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5419 }
5420 if (!RT_IS_POWER_OF_TWO(fXcpt))
5421 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
5422 if (fUnmasked & fXcpt)
5423 {
5424 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
5425 IEMSSERESULT Res3; RT_ZERO(Res3);
5426 pfn(&State, &Res3, &TestData.InVal1, &TestData.InVal2);
5427 TestData.fMxcsrIn = State.MXCSR;
5428 TestData.fMxcsrOut = Res3.MXCSR;
5429 TestData.OutVal = Res3.uResult;
5430 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5431 }
5432 }
5433 }
5434 }
5435 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
5436 }
5437
5438 return RTEXITCODE_SUCCESS;
5439}
5440#endif
5441
5442static void SseBinaryR32Test(void)
5443{
5444 X86FXSTATE State;
5445 RT_ZERO(State);
5446 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR32); iFn++)
5447 {
5448 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseBinaryR32[iFn]))
5449 continue;
5450
5451 SSE_BINARY_TEST_T const * const paTests = g_aSseBinaryR32[iFn].paTests;
5452 uint32_t const cbTests = g_aSseBinaryR32[iFn].cTests;
5453 PFNIEMAIMPLFPSSEF2U128 pfn = g_aSseBinaryR32[iFn].pfn;
5454 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryR32[iFn]);
5455 if (!cbTests) RTTestSkipped(g_hTest, "no tests");
5456 for (uint32_t iVar = 0; iVar < cVars; iVar++)
5457 {
5458 for (uint32_t iTest = 0; iTest < cbTests / sizeof(paTests[0]); iTest++)
5459 {
5460 IEMSSERESULT Res; RT_ZERO(Res);
5461
5462 State.MXCSR = paTests[iTest].fMxcsrIn;
5463 pfn(&State, &Res, &paTests[iTest].InVal1, &paTests[iTest].InVal2);
5464 bool fValsIdentical = RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[0], &paTests[iTest].OutVal.ar32[0])
5465 && RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[1], &paTests[iTest].OutVal.ar32[1])
5466 && RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[2], &paTests[iTest].OutVal.ar32[2])
5467 && RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[3], &paTests[iTest].OutVal.ar32[3]);
5468 if ( Res.MXCSR != paTests[iTest].fMxcsrOut
5469 || !fValsIdentical)
5470 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s'%s'%s in2=%s'%s'%s'%s\n"
5471 "%s -> mxcsr=%#08x %s'%s'%s'%s\n"
5472 "%s expected %#08x %s'%s'%s'%s%s%s (%s)\n",
5473 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
5474 FormatR32(&paTests[iTest].InVal1.ar32[0]), FormatR32(&paTests[iTest].InVal1.ar32[1]),
5475 FormatR32(&paTests[iTest].InVal1.ar32[2]), FormatR32(&paTests[iTest].InVal1.ar32[3]),
5476 FormatR32(&paTests[iTest].InVal2.ar32[0]), FormatR32(&paTests[iTest].InVal2.ar32[1]),
5477 FormatR32(&paTests[iTest].InVal2.ar32[2]), FormatR32(&paTests[iTest].InVal2.ar32[3]),
5478 iVar ? " " : "", Res.MXCSR,
5479 FormatR32(&Res.uResult.ar32[0]), FormatR32(&Res.uResult.ar32[1]),
5480 FormatR32(&Res.uResult.ar32[2]), FormatR32(&Res.uResult.ar32[3]),
5481 iVar ? " " : "", paTests[iTest].fMxcsrOut,
5482 FormatR32(&paTests[iTest].OutVal.ar32[0]), FormatR32(&paTests[iTest].OutVal.ar32[1]),
5483 FormatR32(&paTests[iTest].OutVal.ar32[2]), FormatR32(&paTests[iTest].OutVal.ar32[3]),
5484 MxcsrDiff(Res.MXCSR, paTests[iTest].fMxcsrOut),
5485 !fValsIdentical ? " - val" : "",
5486 FormatMxcsr(paTests[iTest].fMxcsrIn) );
5487 }
5488 pfn = g_aSseBinaryR32[iFn].pfnNative;
5489 }
5490
5491 FREE_DECOMPRESSED_TESTS(g_aSseBinaryR32[iFn]);
5492 }
5493}
5494
5495
5496/*
5497 * Binary SSE operations on packed single precision floating point values.
5498 */
5499TYPEDEF_SUBTEST_TYPE(SSE_BINARY_R64_T, SSE_BINARY_TEST_T, PFNIEMAIMPLFPSSEF2U128);
5500
5501static SSE_BINARY_R64_T g_aSseBinaryR64[] =
5502{
5503 ENTRY_BIN(addpd_u128),
5504 ENTRY_BIN(mulpd_u128),
5505 ENTRY_BIN(subpd_u128),
5506 ENTRY_BIN(minpd_u128),
5507 ENTRY_BIN(divpd_u128),
5508 ENTRY_BIN(maxpd_u128),
5509 ENTRY_BIN(haddpd_u128),
5510 ENTRY_BIN(hsubpd_u128),
5511 ENTRY_BIN(sqrtpd_u128),
5512 ENTRY_BIN(addsubpd_u128),
5513 ENTRY_BIN(cvtpd2ps_u128),
5514};
5515
5516#ifdef TSTIEMAIMPL_WITH_GENERATOR
5517DUMP_ALL_FN(SseBinaryR64, g_aSseBinaryR32)
5518static RTEXITCODE SseBinaryR64Generate(uint32_t cTests, const char * const *papszNameFmts)
5519{
5520 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
5521
5522 static struct { RTFLOAT64U aVal1[2], aVal2[2]; } const s_aSpecials[] =
5523 {
5524 { { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) },
5525 { RTFLOAT64U_INIT_C(0, 8388607, RTFLOAT64U_EXP_MAX - 1), RTFLOAT64U_INIT_C(0, 8388607, RTFLOAT64U_EXP_MAX - 1) } },
5526 /** @todo More specials. */
5527 };
5528
5529 X86FXSTATE State;
5530 RT_ZERO(State);
5531 uint32_t cMinNormalPairs = (cTests - 144) / 4;
5532 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR64); iFn++)
5533 {
5534 PFNIEMAIMPLFPSSEF2U128 const pfn = g_aSseBinaryR64[iFn].pfnNative ? g_aSseBinaryR64[iFn].pfnNative : g_aSseBinaryR64[iFn].pfn;
5535
5536 IEMBINARYOUTPUT BinOut;
5537 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseBinaryR64[iFn]), RTEXITCODE_FAILURE);
5538
5539 uint32_t cNormalInputPairs = 0;
5540 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
5541 {
5542 SSE_BINARY_TEST_T TestData; RT_ZERO(TestData);
5543
5544 TestData.InVal1.ar64[0] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
5545 TestData.InVal1.ar64[1] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
5546 TestData.InVal2.ar64[0] = iTest < cTests ? RandR64Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[0];
5547 TestData.InVal2.ar64[1] = iTest < cTests ? RandR64Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[0];
5548
5549 if ( RTFLOAT64U_IS_NORMAL(&TestData.InVal1.ar64[0]) && RTFLOAT64U_IS_NORMAL(&TestData.InVal1.ar64[1])
5550 && RTFLOAT64U_IS_NORMAL(&TestData.InVal2.ar64[0]) && RTFLOAT64U_IS_NORMAL(&TestData.InVal2.ar64[1]))
5551 cNormalInputPairs++;
5552 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
5553 {
5554 iTest -= 1;
5555 continue;
5556 }
5557
5558 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
5559 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
5560 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
5561 for (uint8_t iFz = 0; iFz < 2; iFz++)
5562 {
5563 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
5564 | (iRounding << X86_MXCSR_RC_SHIFT)
5565 | (iDaz ? X86_MXCSR_DAZ : 0)
5566 | (iFz ? X86_MXCSR_FZ : 0)
5567 | X86_MXCSR_XCPT_MASK;
5568 IEMSSERESULT ResM; RT_ZERO(ResM);
5569 pfn(&State, &ResM, &TestData.InVal1, &TestData.InVal2);
5570 TestData.fMxcsrIn = State.MXCSR;
5571 TestData.fMxcsrOut = ResM.MXCSR;
5572 TestData.OutVal = ResM.uResult;
5573 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5574
5575 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
5576 IEMSSERESULT ResU; RT_ZERO(ResU);
5577 pfn(&State, &ResU, &TestData.InVal1, &TestData.InVal2);
5578 TestData.fMxcsrIn = State.MXCSR;
5579 TestData.fMxcsrOut = ResU.MXCSR;
5580 TestData.OutVal = ResU.uResult;
5581 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5582
5583 uint16_t fXcpt = (ResM.MXCSR | ResU.MXCSR) & X86_MXCSR_XCPT_FLAGS;
5584 if (fXcpt)
5585 {
5586 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
5587 IEMSSERESULT Res1; RT_ZERO(Res1);
5588 pfn(&State, &Res1, &TestData.InVal1, &TestData.InVal2);
5589 TestData.fMxcsrIn = State.MXCSR;
5590 TestData.fMxcsrOut = Res1.MXCSR;
5591 TestData.OutVal = Res1.uResult;
5592 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5593
5594 if (((Res1.MXCSR & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (Res1.MXCSR & X86_MXCSR_XCPT_FLAGS))
5595 {
5596 fXcpt |= Res1.MXCSR & X86_MXCSR_XCPT_FLAGS;
5597 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
5598 IEMSSERESULT Res2; RT_ZERO(Res2);
5599 pfn(&State, &Res2, &TestData.InVal1, &TestData.InVal2);
5600 TestData.fMxcsrIn = State.MXCSR;
5601 TestData.fMxcsrOut = Res2.MXCSR;
5602 TestData.OutVal = Res2.uResult;
5603 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5604 }
5605 if (!RT_IS_POWER_OF_TWO(fXcpt))
5606 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
5607 if (fUnmasked & fXcpt)
5608 {
5609 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
5610 IEMSSERESULT Res3; RT_ZERO(Res3);
5611 pfn(&State, &Res3, &TestData.InVal1, &TestData.InVal2);
5612 TestData.fMxcsrIn = State.MXCSR;
5613 TestData.fMxcsrOut = Res3.MXCSR;
5614 TestData.OutVal = Res3.uResult;
5615 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5616 }
5617 }
5618 }
5619 }
5620 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
5621 }
5622
5623 return RTEXITCODE_SUCCESS;
5624}
5625#endif
5626
5627
5628static void SseBinaryR64Test(void)
5629{
5630 X86FXSTATE State;
5631 RT_ZERO(State);
5632 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR64); iFn++)
5633 {
5634 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseBinaryR64[iFn]))
5635 continue;
5636
5637 SSE_BINARY_TEST_T const * const paTests = g_aSseBinaryR64[iFn].paTests;
5638 uint32_t const cTests = g_aSseBinaryR64[iFn].cTests;
5639 PFNIEMAIMPLFPSSEF2U128 pfn = g_aSseBinaryR64[iFn].pfn;
5640 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryR64[iFn]);
5641 if (!cTests) RTTestSkipped(g_hTest, "no tests");
5642 for (uint32_t iVar = 0; iVar < cVars; iVar++)
5643 {
5644 for (uint32_t iTest = 0; iTest < cTests; iTest++)
5645 {
5646 IEMSSERESULT Res; RT_ZERO(Res);
5647
5648 State.MXCSR = paTests[iTest].fMxcsrIn;
5649 pfn(&State, &Res, &paTests[iTest].InVal1, &paTests[iTest].InVal2);
5650 if ( Res.MXCSR != paTests[iTest].fMxcsrOut
5651 || !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[0], &paTests[iTest].OutVal.ar64[0])
5652 || !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[1], &paTests[iTest].OutVal.ar64[1]))
5653 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s in2=%s'%s\n"
5654 "%s -> mxcsr=%#08x %s'%s\n"
5655 "%s expected %#08x %s'%s%s%s (%s)\n",
5656 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
5657 FormatR64(&paTests[iTest].InVal1.ar64[0]), FormatR64(&paTests[iTest].InVal1.ar64[1]),
5658 FormatR64(&paTests[iTest].InVal2.ar64[0]), FormatR64(&paTests[iTest].InVal2.ar64[1]),
5659 iVar ? " " : "", Res.MXCSR,
5660 FormatR64(&Res.uResult.ar64[0]), FormatR64(&Res.uResult.ar64[1]),
5661 iVar ? " " : "", paTests[iTest].fMxcsrOut,
5662 FormatR64(&paTests[iTest].OutVal.ar64[0]), FormatR64(&paTests[iTest].OutVal.ar64[1]),
5663 MxcsrDiff(Res.MXCSR, paTests[iTest].fMxcsrOut),
5664 ( !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[0], &paTests[iTest].OutVal.ar64[0])
5665 || !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[1], &paTests[iTest].OutVal.ar64[1]))
5666 ? " - val" : "",
5667 FormatMxcsr(paTests[iTest].fMxcsrIn) );
5668 }
5669 pfn = g_aSseBinaryR64[iFn].pfnNative;
5670 }
5671
5672 FREE_DECOMPRESSED_TESTS(g_aSseBinaryR64[iFn]);
5673 }
5674}
5675
5676
5677/*
5678 * Binary SSE operations on packed single precision floating point values.
5679 */
5680TYPEDEF_SUBTEST_TYPE(SSE_BINARY_U128_R32_T, SSE_BINARY_U128_R32_TEST_T, PFNIEMAIMPLFPSSEF2U128R32);
5681
5682static SSE_BINARY_U128_R32_T g_aSseBinaryU128R32[] =
5683{
5684 ENTRY_BIN(addss_u128_r32),
5685 ENTRY_BIN(mulss_u128_r32),
5686 ENTRY_BIN(subss_u128_r32),
5687 ENTRY_BIN(minss_u128_r32),
5688 ENTRY_BIN(divss_u128_r32),
5689 ENTRY_BIN(maxss_u128_r32),
5690 ENTRY_BIN(cvtss2sd_u128_r32),
5691 ENTRY_BIN(sqrtss_u128_r32),
5692};
5693
5694#ifdef TSTIEMAIMPL_WITH_GENERATOR
5695DUMP_ALL_FN(SseBinaryU128R32, g_aSseBinaryU128R32)
5696static RTEXITCODE SseBinaryU128R32Generate(uint32_t cTests, const char * const *papszNameFmts)
5697{
5698 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
5699
5700 static struct { RTFLOAT32U aVal1[4], Val2; } const s_aSpecials[] =
5701 {
5702 { { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), }, RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1) },
5703 /** @todo More specials. */
5704 };
5705
5706 X86FXSTATE State;
5707 RT_ZERO(State);
5708 uint32_t cMinNormalPairs = (cTests - 144) / 4;
5709 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryU128R32); iFn++)
5710 {
5711 PFNIEMAIMPLFPSSEF2U128R32 const pfn = g_aSseBinaryU128R32[iFn].pfnNative ? g_aSseBinaryU128R32[iFn].pfnNative : g_aSseBinaryU128R32[iFn].pfn;
5712
5713 IEMBINARYOUTPUT BinOut;
5714 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseBinaryU128R32[iFn]), RTEXITCODE_FAILURE);
5715
5716 uint32_t cNormalInputPairs = 0;
5717 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
5718 {
5719 SSE_BINARY_U128_R32_TEST_T TestData; RT_ZERO(TestData);
5720
5721 TestData.InVal1.ar32[0] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
5722 TestData.InVal1.ar32[1] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
5723 TestData.InVal1.ar32[2] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[2];
5724 TestData.InVal1.ar32[3] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[3];
5725
5726 TestData.r32Val2 = iTest < cTests ? RandR32Src2(iTest) : s_aSpecials[iTest - cTests].Val2;
5727
5728 if ( RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[0])
5729 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[1])
5730 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[2])
5731 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[3])
5732 && RTFLOAT32U_IS_NORMAL(&TestData.r32Val2))
5733 cNormalInputPairs++;
5734 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
5735 {
5736 iTest -= 1;
5737 continue;
5738 }
5739
5740 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
5741 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
5742 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
5743 for (uint8_t iFz = 0; iFz < 2; iFz++)
5744 {
5745 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
5746 | (iRounding << X86_MXCSR_RC_SHIFT)
5747 | (iDaz ? X86_MXCSR_DAZ : 0)
5748 | (iFz ? X86_MXCSR_FZ : 0)
5749 | X86_MXCSR_XCPT_MASK;
5750 IEMSSERESULT ResM; RT_ZERO(ResM);
5751 pfn(&State, &ResM, &TestData.InVal1, &TestData.r32Val2);
5752 TestData.fMxcsrIn = State.MXCSR;
5753 TestData.fMxcsrOut = ResM.MXCSR;
5754 TestData.OutVal = ResM.uResult;
5755 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5756
5757 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
5758 IEMSSERESULT ResU; RT_ZERO(ResU);
5759 pfn(&State, &ResU, &TestData.InVal1, &TestData.r32Val2);
5760 TestData.fMxcsrIn = State.MXCSR;
5761 TestData.fMxcsrOut = ResU.MXCSR;
5762 TestData.OutVal = ResU.uResult;
5763 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5764
5765 uint16_t fXcpt = (ResM.MXCSR | ResU.MXCSR) & X86_MXCSR_XCPT_FLAGS;
5766 if (fXcpt)
5767 {
5768 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
5769 IEMSSERESULT Res1; RT_ZERO(Res1);
5770 pfn(&State, &Res1, &TestData.InVal1, &TestData.r32Val2);
5771 TestData.fMxcsrIn = State.MXCSR;
5772 TestData.fMxcsrOut = Res1.MXCSR;
5773 TestData.OutVal = Res1.uResult;
5774 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5775
5776 if (((Res1.MXCSR & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (Res1.MXCSR & X86_MXCSR_XCPT_FLAGS))
5777 {
5778 fXcpt |= Res1.MXCSR & X86_MXCSR_XCPT_FLAGS;
5779 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
5780 IEMSSERESULT Res2; RT_ZERO(Res2);
5781 pfn(&State, &Res2, &TestData.InVal1, &TestData.r32Val2);
5782 TestData.fMxcsrIn = State.MXCSR;
5783 TestData.fMxcsrOut = Res2.MXCSR;
5784 TestData.OutVal = Res2.uResult;
5785 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5786 }
5787 if (!RT_IS_POWER_OF_TWO(fXcpt))
5788 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
5789 if (fUnmasked & fXcpt)
5790 {
5791 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
5792 IEMSSERESULT Res3; RT_ZERO(Res3);
5793 pfn(&State, &Res3, &TestData.InVal1, &TestData.r32Val2);
5794 TestData.fMxcsrIn = State.MXCSR;
5795 TestData.fMxcsrOut = Res3.MXCSR;
5796 TestData.OutVal = Res3.uResult;
5797 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5798 }
5799 }
5800 }
5801 }
5802 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
5803 }
5804
5805 return RTEXITCODE_SUCCESS;
5806}
5807#endif
5808
5809static void SseBinaryU128R32Test(void)
5810{
5811 X86FXSTATE State;
5812 RT_ZERO(State);
5813 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryU128R32); iFn++)
5814 {
5815 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseBinaryU128R32[iFn]))
5816 continue;
5817
5818 SSE_BINARY_U128_R32_TEST_T const * const paTests = g_aSseBinaryU128R32[iFn].paTests;
5819 uint32_t const cTests = g_aSseBinaryU128R32[iFn].cTests;
5820 PFNIEMAIMPLFPSSEF2U128R32 pfn = g_aSseBinaryU128R32[iFn].pfn;
5821 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryU128R32[iFn]);
5822 if (!cTests) RTTestSkipped(g_hTest, "no tests");
5823 for (uint32_t iVar = 0; iVar < cVars; iVar++)
5824 {
5825 for (uint32_t iTest = 0; iTest < cTests; iTest++)
5826 {
5827 IEMSSERESULT Res; RT_ZERO(Res);
5828
5829 State.MXCSR = paTests[iTest].fMxcsrIn;
5830 pfn(&State, &Res, &paTests[iTest].InVal1, &paTests[iTest].r32Val2);
5831 bool fValsIdentical = RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[0], &paTests[iTest].OutVal.ar32[0])
5832 && RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[1], &paTests[iTest].OutVal.ar32[1])
5833 && RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[2], &paTests[iTest].OutVal.ar32[2])
5834 && RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[3], &paTests[iTest].OutVal.ar32[3]);
5835 if ( Res.MXCSR != paTests[iTest].fMxcsrOut
5836 || !fValsIdentical)
5837 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s'%s'%s in2=%s\n"
5838 "%s -> mxcsr=%#08x %s'%s'%s'%s\n"
5839 "%s expected %#08x %s'%s'%s'%s%s%s (%s)\n",
5840 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
5841 FormatR32(&paTests[iTest].InVal1.ar32[0]), FormatR32(&paTests[iTest].InVal1.ar32[1]),
5842 FormatR32(&paTests[iTest].InVal1.ar32[2]), FormatR32(&paTests[iTest].InVal1.ar32[3]),
5843 FormatR32(&paTests[iTest].r32Val2),
5844 iVar ? " " : "", Res.MXCSR,
5845 FormatR32(&Res.uResult.ar32[0]), FormatR32(&Res.uResult.ar32[1]),
5846 FormatR32(&Res.uResult.ar32[2]), FormatR32(&Res.uResult.ar32[3]),
5847 iVar ? " " : "", paTests[iTest].fMxcsrOut,
5848 FormatR32(&paTests[iTest].OutVal.ar32[0]), FormatR32(&paTests[iTest].OutVal.ar32[1]),
5849 FormatR32(&paTests[iTest].OutVal.ar32[2]), FormatR32(&paTests[iTest].OutVal.ar32[3]),
5850 MxcsrDiff(Res.MXCSR, paTests[iTest].fMxcsrOut),
5851 !fValsIdentical ? " - val" : "",
5852 FormatMxcsr(paTests[iTest].fMxcsrIn) );
5853 }
5854 }
5855
5856 FREE_DECOMPRESSED_TESTS(g_aSseBinaryU128R32[iFn]);
5857 }
5858}
5859
5860
5861/*
5862 * Binary SSE operations on packed single precision floating point values (xxxsd xmm1, r/m64).
5863 */
5864TYPEDEF_SUBTEST_TYPE(SSE_BINARY_U128_R64_T, SSE_BINARY_U128_R64_TEST_T, PFNIEMAIMPLFPSSEF2U128R64);
5865
5866static SSE_BINARY_U128_R64_T g_aSseBinaryU128R64[] =
5867{
5868 ENTRY_BIN(addsd_u128_r64),
5869 ENTRY_BIN(mulsd_u128_r64),
5870 ENTRY_BIN(subsd_u128_r64),
5871 ENTRY_BIN(minsd_u128_r64),
5872 ENTRY_BIN(divsd_u128_r64),
5873 ENTRY_BIN(maxsd_u128_r64),
5874 ENTRY_BIN(cvtsd2ss_u128_r64),
5875 ENTRY_BIN(sqrtsd_u128_r64),
5876};
5877
5878#ifdef TSTIEMAIMPL_WITH_GENERATOR
5879DUMP_ALL_FN(SseBinaryU128R64, g_aSseBinaryU128R64)
5880static RTEXITCODE SseBinaryU128R64Generate(uint32_t cTests, const char * const *papszNameFmts)
5881{
5882 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
5883
5884 static struct { RTFLOAT64U aVal1[2], Val2; } const s_aSpecials[] =
5885 {
5886 { { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) }, RTFLOAT64U_INIT_C(0, 8388607, RTFLOAT64U_EXP_MAX - 1) },
5887 /** @todo More specials. */
5888 };
5889
5890 X86FXSTATE State;
5891 RT_ZERO(State);
5892 uint32_t cMinNormalPairs = (cTests - 144) / 4;
5893 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryU128R64); iFn++)
5894 {
5895 PFNIEMAIMPLFPSSEF2U128R64 const pfn = g_aSseBinaryU128R64[iFn].pfnNative ? g_aSseBinaryU128R64[iFn].pfnNative : g_aSseBinaryU128R64[iFn].pfn;
5896
5897 IEMBINARYOUTPUT BinOut;
5898 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseBinaryU128R64[iFn]), RTEXITCODE_FAILURE);
5899
5900 uint32_t cNormalInputPairs = 0;
5901 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
5902 {
5903 SSE_BINARY_U128_R64_TEST_T TestData; RT_ZERO(TestData);
5904
5905 TestData.InVal1.ar64[0] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
5906 TestData.InVal1.ar64[1] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
5907 TestData.r64Val2 = iTest < cTests ? RandR64Src2(iTest) : s_aSpecials[iTest - cTests].Val2;
5908
5909 if ( RTFLOAT64U_IS_NORMAL(&TestData.InVal1.ar64[0]) && RTFLOAT64U_IS_NORMAL(&TestData.InVal1.ar64[1])
5910 && RTFLOAT64U_IS_NORMAL(&TestData.r64Val2))
5911 cNormalInputPairs++;
5912 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
5913 {
5914 iTest -= 1;
5915 continue;
5916 }
5917
5918 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
5919 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
5920 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
5921 for (uint8_t iFz = 0; iFz < 2; iFz++)
5922 {
5923 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
5924 | (iRounding << X86_MXCSR_RC_SHIFT)
5925 | (iDaz ? X86_MXCSR_DAZ : 0)
5926 | (iFz ? X86_MXCSR_FZ : 0)
5927 | X86_MXCSR_XCPT_MASK;
5928 IEMSSERESULT ResM; RT_ZERO(ResM);
5929 pfn(&State, &ResM, &TestData.InVal1, &TestData.r64Val2);
5930 TestData.fMxcsrIn = State.MXCSR;
5931 TestData.fMxcsrOut = ResM.MXCSR;
5932 TestData.OutVal = ResM.uResult;
5933 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5934
5935 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
5936 IEMSSERESULT ResU; RT_ZERO(ResU);
5937 pfn(&State, &ResU, &TestData.InVal1, &TestData.r64Val2);
5938 TestData.fMxcsrIn = State.MXCSR;
5939 TestData.fMxcsrOut = ResU.MXCSR;
5940 TestData.OutVal = ResU.uResult;
5941 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5942
5943 uint16_t fXcpt = (ResM.MXCSR | ResU.MXCSR) & X86_MXCSR_XCPT_FLAGS;
5944 if (fXcpt)
5945 {
5946 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
5947 IEMSSERESULT Res1; RT_ZERO(Res1);
5948 pfn(&State, &Res1, &TestData.InVal1, &TestData.r64Val2);
5949 TestData.fMxcsrIn = State.MXCSR;
5950 TestData.fMxcsrOut = Res1.MXCSR;
5951 TestData.OutVal = Res1.uResult;
5952 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5953
5954 if (((Res1.MXCSR & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (Res1.MXCSR & X86_MXCSR_XCPT_FLAGS))
5955 {
5956 fXcpt |= Res1.MXCSR & X86_MXCSR_XCPT_FLAGS;
5957 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
5958 IEMSSERESULT Res2; RT_ZERO(Res2);
5959 pfn(&State, &Res2, &TestData.InVal1, &TestData.r64Val2);
5960 TestData.fMxcsrIn = State.MXCSR;
5961 TestData.fMxcsrOut = Res2.MXCSR;
5962 TestData.OutVal = Res2.uResult;
5963 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5964 }
5965 if (!RT_IS_POWER_OF_TWO(fXcpt))
5966 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
5967 if (fUnmasked & fXcpt)
5968 {
5969 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
5970 IEMSSERESULT Res3; RT_ZERO(Res3);
5971 pfn(&State, &Res3, &TestData.InVal1, &TestData.r64Val2);
5972 TestData.fMxcsrIn = State.MXCSR;
5973 TestData.fMxcsrOut = Res3.MXCSR;
5974 TestData.OutVal = Res3.uResult;
5975 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
5976 }
5977 }
5978 }
5979 }
5980 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
5981 }
5982
5983 return RTEXITCODE_SUCCESS;
5984}
5985#endif
5986
5987
5988static void SseBinaryU128R64Test(void)
5989{
5990 X86FXSTATE State;
5991 RT_ZERO(State);
5992 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryU128R64); iFn++)
5993 {
5994 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseBinaryU128R64[iFn]))
5995 continue;
5996
5997 SSE_BINARY_U128_R64_TEST_T const * const paTests = g_aSseBinaryU128R64[iFn].paTests;
5998 uint32_t const cTests = g_aSseBinaryU128R64[iFn].cTests;
5999 PFNIEMAIMPLFPSSEF2U128R64 pfn = g_aSseBinaryU128R64[iFn].pfn;
6000 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryU128R64[iFn]);
6001 if (!cTests) RTTestSkipped(g_hTest, "no tests");
6002 for (uint32_t iVar = 0; iVar < cVars; iVar++)
6003 {
6004 for (uint32_t iTest = 0; iTest < cTests; iTest++)
6005 {
6006 IEMSSERESULT Res; RT_ZERO(Res);
6007
6008 State.MXCSR = paTests[iTest].fMxcsrIn;
6009 pfn(&State, &Res, &paTests[iTest].InVal1, &paTests[iTest].r64Val2);
6010 if ( Res.MXCSR != paTests[iTest].fMxcsrOut
6011 || !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[0], &paTests[iTest].OutVal.ar64[0])
6012 || !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[1], &paTests[iTest].OutVal.ar64[1]))
6013 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s in2=%s\n"
6014 "%s -> mxcsr=%#08x %s'%s\n"
6015 "%s expected %#08x %s'%s%s%s (%s)\n",
6016 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
6017 FormatR64(&paTests[iTest].InVal1.ar64[0]), FormatR64(&paTests[iTest].InVal1.ar64[1]),
6018 FormatR64(&paTests[iTest].r64Val2),
6019 iVar ? " " : "", Res.MXCSR,
6020 FormatR64(&Res.uResult.ar64[0]), FormatR64(&Res.uResult.ar64[1]),
6021 iVar ? " " : "", paTests[iTest].fMxcsrOut,
6022 FormatR64(&paTests[iTest].OutVal.ar64[0]), FormatR64(&paTests[iTest].OutVal.ar64[1]),
6023 MxcsrDiff(Res.MXCSR, paTests[iTest].fMxcsrOut),
6024 ( !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[0], &paTests[iTest].OutVal.ar64[0])
6025 || !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[1], &paTests[iTest].OutVal.ar64[1]))
6026 ? " - val" : "",
6027 FormatMxcsr(paTests[iTest].fMxcsrIn) );
6028 }
6029 }
6030
6031 FREE_DECOMPRESSED_TESTS(g_aSseBinaryU128R64[iFn]);
6032 }
6033}
6034
6035
6036/*
6037 * SSE operations converting single double-precision floating point values to signed double-word integers (cvttsd2si and friends).
6038 */
6039TYPEDEF_SUBTEST_TYPE(SSE_BINARY_I32_R64_T, SSE_BINARY_I32_R64_TEST_T, PFNIEMAIMPLSSEF2I32U64);
6040
6041static SSE_BINARY_I32_R64_T g_aSseBinaryI32R64[] =
6042{
6043 ENTRY_BIN(cvttsd2si_i32_r64),
6044 ENTRY_BIN(cvtsd2si_i32_r64),
6045};
6046
6047#ifdef TSTIEMAIMPL_WITH_GENERATOR
6048DUMP_ALL_FN(SseBinaryI32R64, g_aSseBinaryI32R64)
6049static RTEXITCODE SseBinaryI32R64Generate(uint32_t cTests, const char * const *papszNameFmts)
6050{
6051 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
6052
6053 static struct { RTFLOAT64U Val; } const s_aSpecials[] =
6054 {
6055 { RTFLOAT64U_INIT_C(0, 8388607, RTFLOAT64U_EXP_MAX - 1) },
6056 /** @todo More specials. */
6057 };
6058
6059 X86FXSTATE State;
6060 RT_ZERO(State);
6061 uint32_t cMinNormalPairs = (cTests - 144) / 4;
6062 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI32R64); iFn++)
6063 {
6064 PFNIEMAIMPLSSEF2I32U64 const pfn = g_aSseBinaryI32R64[iFn].pfnNative ? g_aSseBinaryI32R64[iFn].pfnNative : g_aSseBinaryI32R64[iFn].pfn;
6065
6066 IEMBINARYOUTPUT BinOut;
6067 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseBinaryI32R64[iFn]), RTEXITCODE_FAILURE);
6068
6069 uint32_t cNormalInputPairs = 0;
6070 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
6071 {
6072 SSE_BINARY_I32_R64_TEST_T TestData; RT_ZERO(TestData);
6073
6074 TestData.r64ValIn = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val;
6075
6076 if (RTFLOAT64U_IS_NORMAL(&TestData.r64ValIn))
6077 cNormalInputPairs++;
6078 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
6079 {
6080 iTest -= 1;
6081 continue;
6082 }
6083
6084 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
6085 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
6086 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
6087 for (uint8_t iFz = 0; iFz < 2; iFz++)
6088 {
6089 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
6090 | (iRounding << X86_MXCSR_RC_SHIFT)
6091 | (iDaz ? X86_MXCSR_DAZ : 0)
6092 | (iFz ? X86_MXCSR_FZ : 0)
6093 | X86_MXCSR_XCPT_MASK;
6094 uint32_t fMxcsrM; int32_t i32OutM;
6095 pfn(&State, &fMxcsrM, &i32OutM, &TestData.r64ValIn.u);
6096 TestData.fMxcsrIn = State.MXCSR;
6097 TestData.fMxcsrOut = fMxcsrM;
6098 TestData.i32ValOut = i32OutM;
6099 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6100
6101 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
6102 uint32_t fMxcsrU; int32_t i32OutU;
6103 pfn(&State, &fMxcsrU, &i32OutU, &TestData.r64ValIn.u);
6104 TestData.fMxcsrIn = State.MXCSR;
6105 TestData.fMxcsrOut = fMxcsrU;
6106 TestData.i32ValOut = i32OutU;
6107 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6108
6109 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
6110 if (fXcpt)
6111 {
6112 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
6113 uint32_t fMxcsr1; int32_t i32Out1;
6114 pfn(&State, &fMxcsr1, &i32Out1, &TestData.r64ValIn.u);
6115 TestData.fMxcsrIn = State.MXCSR;
6116 TestData.fMxcsrOut = fMxcsr1;
6117 TestData.i32ValOut = i32Out1;
6118 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6119
6120 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
6121 {
6122 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
6123 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
6124 uint32_t fMxcsr2; int32_t i32Out2;
6125 pfn(&State, &fMxcsr2, &i32Out2, &TestData.r64ValIn.u);
6126 TestData.fMxcsrIn = State.MXCSR;
6127 TestData.fMxcsrOut = fMxcsr2;
6128 TestData.i32ValOut = i32Out2;
6129 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6130 }
6131 if (!RT_IS_POWER_OF_TWO(fXcpt))
6132 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
6133 if (fUnmasked & fXcpt)
6134 {
6135 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
6136 uint32_t fMxcsr3; int32_t i32Out3;
6137 pfn(&State, &fMxcsr3, &i32Out3, &TestData.r64ValIn.u);
6138 TestData.fMxcsrIn = State.MXCSR;
6139 TestData.fMxcsrOut = fMxcsr3;
6140 TestData.i32ValOut = i32Out3;
6141 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6142 }
6143 }
6144 }
6145 }
6146 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
6147 }
6148
6149 return RTEXITCODE_SUCCESS;
6150}
6151#endif
6152
6153
6154static void SseBinaryI32R64Test(void)
6155{
6156 X86FXSTATE State;
6157 RT_ZERO(State);
6158 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI32R64); iFn++)
6159 {
6160 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseBinaryI32R64[iFn]))
6161 continue;
6162
6163 SSE_BINARY_I32_R64_TEST_T const * const paTests = g_aSseBinaryI32R64[iFn].paTests;
6164 uint32_t const cTests = g_aSseBinaryI32R64[iFn].cTests;
6165 PFNIEMAIMPLSSEF2I32U64 pfn = g_aSseBinaryI32R64[iFn].pfn;
6166 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryI32R64[iFn]);
6167 if (!cTests) RTTestSkipped(g_hTest, "no tests");
6168 for (uint32_t iVar = 0; iVar < cVars; iVar++)
6169 {
6170 for (uint32_t iTest = 0; iTest < cTests; iTest++)
6171 {
6172 uint32_t fMxcsr = 0;
6173 int32_t i32Dst = 0;
6174
6175 State.MXCSR = paTests[iTest].fMxcsrIn;
6176 pfn(&State, &fMxcsr, &i32Dst, &paTests[iTest].r64ValIn.u);
6177 if ( fMxcsr != paTests[iTest].fMxcsrOut
6178 || i32Dst != paTests[iTest].i32ValOut)
6179 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s\n"
6180 "%s -> mxcsr=%#08x %RI32\n"
6181 "%s expected %#08x %RI32%s%s (%s)\n",
6182 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
6183 FormatR64(&paTests[iTest].r64ValIn),
6184 iVar ? " " : "", fMxcsr, i32Dst,
6185 iVar ? " " : "", paTests[iTest].fMxcsrOut, paTests[iTest].i32ValOut,
6186 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
6187 i32Dst != paTests[iTest].i32ValOut
6188 ? " - val" : "",
6189 FormatMxcsr(paTests[iTest].fMxcsrIn) );
6190 }
6191 }
6192
6193 FREE_DECOMPRESSED_TESTS(g_aSseBinaryI32R64[iFn]);
6194 }
6195}
6196
6197
6198/*
6199 * SSE operations converting single double-precision floating point values to signed quad-word integers (cvttsd2si and friends).
6200 */
6201TYPEDEF_SUBTEST_TYPE(SSE_BINARY_I64_R64_T, SSE_BINARY_I64_R64_TEST_T, PFNIEMAIMPLSSEF2I64U64);
6202
6203static SSE_BINARY_I64_R64_T g_aSseBinaryI64R64[] =
6204{
6205 ENTRY_BIN(cvttsd2si_i64_r64),
6206 ENTRY_BIN(cvtsd2si_i64_r64),
6207};
6208
6209#ifdef TSTIEMAIMPL_WITH_GENERATOR
6210DUMP_ALL_FN(SseBinaryI64R64, g_aSseBinaryI64R64)
6211static RTEXITCODE SseBinaryI64R64Generate(uint32_t cTests, const char * const *papszNameFmts)
6212{
6213 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
6214
6215 static struct { RTFLOAT64U Val; } const s_aSpecials[] =
6216 {
6217 { RTFLOAT64U_INIT_C(0, 8388607, RTFLOAT64U_EXP_MAX - 1) },
6218 /** @todo More specials. */
6219 };
6220
6221 X86FXSTATE State;
6222 RT_ZERO(State);
6223 uint32_t cMinNormalPairs = (cTests - 144) / 4;
6224 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI64R64); iFn++)
6225 {
6226 PFNIEMAIMPLSSEF2I64U64 const pfn = g_aSseBinaryI64R64[iFn].pfnNative ? g_aSseBinaryI64R64[iFn].pfnNative : g_aSseBinaryI64R64[iFn].pfn;
6227
6228 IEMBINARYOUTPUT BinOut;
6229 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseBinaryI64R64[iFn]), RTEXITCODE_FAILURE);
6230
6231 uint32_t cNormalInputPairs = 0;
6232 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
6233 {
6234 SSE_BINARY_I64_R64_TEST_T TestData; RT_ZERO(TestData);
6235
6236 TestData.r64ValIn = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val;
6237
6238 if (RTFLOAT64U_IS_NORMAL(&TestData.r64ValIn))
6239 cNormalInputPairs++;
6240 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
6241 {
6242 iTest -= 1;
6243 continue;
6244 }
6245
6246 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
6247 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
6248 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
6249 for (uint8_t iFz = 0; iFz < 2; iFz++)
6250 {
6251 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
6252 | (iRounding << X86_MXCSR_RC_SHIFT)
6253 | (iDaz ? X86_MXCSR_DAZ : 0)
6254 | (iFz ? X86_MXCSR_FZ : 0)
6255 | X86_MXCSR_XCPT_MASK;
6256 uint32_t fMxcsrM; int64_t i64OutM;
6257 pfn(&State, &fMxcsrM, &i64OutM, &TestData.r64ValIn.u);
6258 TestData.fMxcsrIn = State.MXCSR;
6259 TestData.fMxcsrOut = fMxcsrM;
6260 TestData.i64ValOut = i64OutM;
6261 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6262
6263 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
6264 uint32_t fMxcsrU; int64_t i64OutU;
6265 pfn(&State, &fMxcsrU, &i64OutU, &TestData.r64ValIn.u);
6266 TestData.fMxcsrIn = State.MXCSR;
6267 TestData.fMxcsrOut = fMxcsrU;
6268 TestData.i64ValOut = i64OutU;
6269 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6270
6271 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
6272 if (fXcpt)
6273 {
6274 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
6275 uint32_t fMxcsr1; int64_t i64Out1;
6276 pfn(&State, &fMxcsr1, &i64Out1, &TestData.r64ValIn.u);
6277 TestData.fMxcsrIn = State.MXCSR;
6278 TestData.fMxcsrOut = fMxcsr1;
6279 TestData.i64ValOut = i64Out1;
6280 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6281
6282 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
6283 {
6284 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
6285 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
6286 uint32_t fMxcsr2; int64_t i64Out2;
6287 pfn(&State, &fMxcsr2, &i64Out2, &TestData.r64ValIn.u);
6288 TestData.fMxcsrIn = State.MXCSR;
6289 TestData.fMxcsrOut = fMxcsr2;
6290 TestData.i64ValOut = i64Out2;
6291 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6292 }
6293 if (!RT_IS_POWER_OF_TWO(fXcpt))
6294 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
6295 if (fUnmasked & fXcpt)
6296 {
6297 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
6298 uint32_t fMxcsr3; int64_t i64Out3;
6299 pfn(&State, &fMxcsr3, &i64Out3, &TestData.r64ValIn.u);
6300 TestData.fMxcsrIn = State.MXCSR;
6301 TestData.fMxcsrOut = fMxcsr3;
6302 TestData.i64ValOut = i64Out3;
6303 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6304 }
6305 }
6306 }
6307 }
6308 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
6309 }
6310
6311 return RTEXITCODE_SUCCESS;
6312}
6313#endif
6314
6315
6316static void SseBinaryI64R64Test(void)
6317{
6318 X86FXSTATE State;
6319 RT_ZERO(State);
6320 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI64R64); iFn++)
6321 {
6322 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseBinaryI64R64[iFn]))
6323 continue;
6324
6325 SSE_BINARY_I64_R64_TEST_T const * const paTests = g_aSseBinaryI64R64[iFn].paTests;
6326 uint32_t const cTests = g_aSseBinaryI64R64[iFn].cTests;
6327 PFNIEMAIMPLSSEF2I64U64 pfn = g_aSseBinaryI64R64[iFn].pfn;
6328 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryI32R64[iFn]);
6329 if (!cTests) RTTestSkipped(g_hTest, "no tests");
6330 for (uint32_t iVar = 0; iVar < cVars; iVar++)
6331 {
6332 for (uint32_t iTest = 0; iTest < cTests; iTest++)
6333 {
6334 uint32_t fMxcsr = 0;
6335 int64_t i64Dst = 0;
6336
6337 State.MXCSR = paTests[iTest].fMxcsrIn;
6338 pfn(&State, &fMxcsr, &i64Dst, &paTests[iTest].r64ValIn.u);
6339 if ( fMxcsr != paTests[iTest].fMxcsrOut
6340 || i64Dst != paTests[iTest].i64ValOut)
6341 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s\n"
6342 "%s -> mxcsr=%#08x %RI64\n"
6343 "%s expected %#08x %RI64%s%s (%s)\n",
6344 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
6345 FormatR64(&paTests[iTest].r64ValIn),
6346 iVar ? " " : "", fMxcsr, i64Dst,
6347 iVar ? " " : "", paTests[iTest].fMxcsrOut, paTests[iTest].i64ValOut,
6348 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
6349 i64Dst != paTests[iTest].i64ValOut
6350 ? " - val" : "",
6351 FormatMxcsr(paTests[iTest].fMxcsrIn) );
6352 }
6353 }
6354
6355 FREE_DECOMPRESSED_TESTS(g_aSseBinaryI64R64[iFn]);
6356 }
6357}
6358
6359
6360/*
6361 * SSE operations converting single single-precision floating point values to signed double-word integers (cvttss2si and friends).
6362 */
6363TYPEDEF_SUBTEST_TYPE(SSE_BINARY_I32_R32_T, SSE_BINARY_I32_R32_TEST_T, PFNIEMAIMPLSSEF2I32U32);
6364
6365static SSE_BINARY_I32_R32_T g_aSseBinaryI32R32[] =
6366{
6367 ENTRY_BIN(cvttss2si_i32_r32),
6368 ENTRY_BIN(cvtss2si_i32_r32),
6369};
6370
6371#ifdef TSTIEMAIMPL_WITH_GENERATOR
6372DUMP_ALL_FN(SseBinaryI32R32, g_aSseBinaryI32R32)
6373static RTEXITCODE SseBinaryI32R32Generate(uint32_t cTests, const char * const *papszNameFmts)
6374{
6375 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
6376
6377 static struct { RTFLOAT32U Val; } const s_aSpecials[] =
6378 {
6379 { RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1) },
6380 /** @todo More specials. */
6381 };
6382
6383 X86FXSTATE State;
6384 RT_ZERO(State);
6385 uint32_t cMinNormalPairs = (cTests - 144) / 4;
6386 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI32R32); iFn++)
6387 {
6388 PFNIEMAIMPLSSEF2I32U32 const pfn = g_aSseBinaryI32R32[iFn].pfnNative ? g_aSseBinaryI32R32[iFn].pfnNative : g_aSseBinaryI32R32[iFn].pfn;
6389
6390 IEMBINARYOUTPUT BinOut;
6391 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseBinaryI32R32[iFn]), RTEXITCODE_FAILURE);
6392
6393 uint32_t cNormalInputPairs = 0;
6394 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
6395 {
6396 SSE_BINARY_I32_R32_TEST_T TestData; RT_ZERO(TestData);
6397
6398 TestData.r32ValIn = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val;
6399
6400 if (RTFLOAT32U_IS_NORMAL(&TestData.r32ValIn))
6401 cNormalInputPairs++;
6402 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
6403 {
6404 iTest -= 1;
6405 continue;
6406 }
6407
6408 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
6409 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
6410 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
6411 for (uint8_t iFz = 0; iFz < 2; iFz++)
6412 {
6413 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
6414 | (iRounding << X86_MXCSR_RC_SHIFT)
6415 | (iDaz ? X86_MXCSR_DAZ : 0)
6416 | (iFz ? X86_MXCSR_FZ : 0)
6417 | X86_MXCSR_XCPT_MASK;
6418 uint32_t fMxcsrM; int32_t i32OutM;
6419 pfn(&State, &fMxcsrM, &i32OutM, &TestData.r32ValIn.u);
6420 TestData.fMxcsrIn = State.MXCSR;
6421 TestData.fMxcsrOut = fMxcsrM;
6422 TestData.i32ValOut = i32OutM;
6423 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6424
6425 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
6426 uint32_t fMxcsrU; int32_t i32OutU;
6427 pfn(&State, &fMxcsrU, &i32OutU, &TestData.r32ValIn.u);
6428 TestData.fMxcsrIn = State.MXCSR;
6429 TestData.fMxcsrOut = fMxcsrU;
6430 TestData.i32ValOut = i32OutU;
6431 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6432
6433 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
6434 if (fXcpt)
6435 {
6436 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
6437 uint32_t fMxcsr1; int32_t i32Out1;
6438 pfn(&State, &fMxcsr1, &i32Out1, &TestData.r32ValIn.u);
6439 TestData.fMxcsrIn = State.MXCSR;
6440 TestData.fMxcsrOut = fMxcsr1;
6441 TestData.i32ValOut = i32Out1;
6442 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6443
6444 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
6445 {
6446 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
6447 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
6448 uint32_t fMxcsr2; int32_t i32Out2;
6449 pfn(&State, &fMxcsr2, &i32Out2, &TestData.r32ValIn.u);
6450 TestData.fMxcsrIn = State.MXCSR;
6451 TestData.fMxcsrOut = fMxcsr2;
6452 TestData.i32ValOut = i32Out2;
6453 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6454 }
6455 if (!RT_IS_POWER_OF_TWO(fXcpt))
6456 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
6457 if (fUnmasked & fXcpt)
6458 {
6459 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
6460 uint32_t fMxcsr3; int32_t i32Out3;
6461 pfn(&State, &fMxcsr3, &i32Out3, &TestData.r32ValIn.u);
6462 TestData.fMxcsrIn = State.MXCSR;
6463 TestData.fMxcsrOut = fMxcsr3;
6464 TestData.i32ValOut = i32Out3;
6465 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6466 }
6467 }
6468 }
6469 }
6470 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
6471 }
6472
6473 return RTEXITCODE_SUCCESS;
6474}
6475#endif
6476
6477
6478static void SseBinaryI32R32Test(void)
6479{
6480 X86FXSTATE State;
6481 RT_ZERO(State);
6482 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI32R32); iFn++)
6483 {
6484 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseBinaryI32R32[iFn]))
6485 continue;
6486
6487 SSE_BINARY_I32_R32_TEST_T const * const paTests = g_aSseBinaryI32R32[iFn].paTests;
6488 uint32_t const cTests = g_aSseBinaryI32R32[iFn].cTests;
6489 PFNIEMAIMPLSSEF2I32U32 pfn = g_aSseBinaryI32R32[iFn].pfn;
6490 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryI32R32[iFn]);
6491 if (!cTests) RTTestSkipped(g_hTest, "no tests");
6492 for (uint32_t iVar = 0; iVar < cVars; iVar++)
6493 {
6494 for (uint32_t iTest = 0; iTest < cTests; iTest++)
6495 {
6496 uint32_t fMxcsr = 0;
6497 int32_t i32Dst = 0;
6498
6499 State.MXCSR = paTests[iTest].fMxcsrIn;
6500 pfn(&State, &fMxcsr, &i32Dst, &paTests[iTest].r32ValIn.u);
6501 if ( fMxcsr != paTests[iTest].fMxcsrOut
6502 || i32Dst != paTests[iTest].i32ValOut)
6503 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s\n"
6504 "%s -> mxcsr=%#08x %RI32\n"
6505 "%s expected %#08x %RI32%s%s (%s)\n",
6506 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
6507 FormatR32(&paTests[iTest].r32ValIn),
6508 iVar ? " " : "", fMxcsr, i32Dst,
6509 iVar ? " " : "", paTests[iTest].fMxcsrOut, paTests[iTest].i32ValOut,
6510 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
6511 i32Dst != paTests[iTest].i32ValOut
6512 ? " - val" : "",
6513 FormatMxcsr(paTests[iTest].fMxcsrIn) );
6514 }
6515 }
6516
6517 FREE_DECOMPRESSED_TESTS(g_aSseBinaryI32R32[iFn]);
6518 }
6519}
6520
6521
6522/*
6523 * SSE operations converting single single-precision floating point values to signed quad-word integers (cvttss2si and friends).
6524 */
6525TYPEDEF_SUBTEST_TYPE(SSE_BINARY_I64_R32_T, SSE_BINARY_I64_R32_TEST_T, PFNIEMAIMPLSSEF2I64U32);
6526
6527static SSE_BINARY_I64_R32_T g_aSseBinaryI64R32[] =
6528{
6529 ENTRY_BIN(cvttss2si_i64_r32),
6530 ENTRY_BIN(cvtss2si_i64_r32),
6531};
6532
6533#ifdef TSTIEMAIMPL_WITH_GENERATOR
6534DUMP_ALL_FN(SseBinaryI64R32, g_aSseBinaryI64R32)
6535static RTEXITCODE SseBinaryI64R32Generate(uint32_t cTests, const char * const *papszNameFmts)
6536{
6537 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
6538
6539 static struct { RTFLOAT32U Val; } const s_aSpecials[] =
6540 {
6541 { RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1) },
6542 /** @todo More specials. */
6543 };
6544
6545 X86FXSTATE State;
6546 RT_ZERO(State);
6547 uint32_t cMinNormalPairs = (cTests - 144) / 4;
6548 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI64R32); iFn++)
6549 {
6550 PFNIEMAIMPLSSEF2I64U32 const pfn = g_aSseBinaryI64R32[iFn].pfnNative ? g_aSseBinaryI64R32[iFn].pfnNative : g_aSseBinaryI64R32[iFn].pfn;
6551
6552 IEMBINARYOUTPUT BinOut;
6553 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseBinaryI64R32[iFn]), RTEXITCODE_FAILURE);
6554
6555 uint32_t cNormalInputPairs = 0;
6556 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
6557 {
6558 SSE_BINARY_I64_R32_TEST_T TestData; RT_ZERO(TestData);
6559
6560 TestData.r32ValIn = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val;
6561
6562 if (RTFLOAT32U_IS_NORMAL(&TestData.r32ValIn))
6563 cNormalInputPairs++;
6564 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
6565 {
6566 iTest -= 1;
6567 continue;
6568 }
6569
6570 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
6571 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
6572 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
6573 for (uint8_t iFz = 0; iFz < 2; iFz++)
6574 {
6575 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
6576 | (iRounding << X86_MXCSR_RC_SHIFT)
6577 | (iDaz ? X86_MXCSR_DAZ : 0)
6578 | (iFz ? X86_MXCSR_FZ : 0)
6579 | X86_MXCSR_XCPT_MASK;
6580 uint32_t fMxcsrM; int64_t i64OutM;
6581 pfn(&State, &fMxcsrM, &i64OutM, &TestData.r32ValIn.u);
6582 TestData.fMxcsrIn = State.MXCSR;
6583 TestData.fMxcsrOut = fMxcsrM;
6584 TestData.i64ValOut = i64OutM;
6585 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6586
6587 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
6588 uint32_t fMxcsrU; int64_t i64OutU;
6589 pfn(&State, &fMxcsrU, &i64OutU, &TestData.r32ValIn.u);
6590 TestData.fMxcsrIn = State.MXCSR;
6591 TestData.fMxcsrOut = fMxcsrU;
6592 TestData.i64ValOut = i64OutU;
6593 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6594
6595 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
6596 if (fXcpt)
6597 {
6598 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
6599 uint32_t fMxcsr1; int64_t i64Out1;
6600 pfn(&State, &fMxcsr1, &i64Out1, &TestData.r32ValIn.u);
6601 TestData.fMxcsrIn = State.MXCSR;
6602 TestData.fMxcsrOut = fMxcsr1;
6603 TestData.i64ValOut = i64Out1;
6604 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6605
6606 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
6607 {
6608 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
6609 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
6610 uint32_t fMxcsr2; int64_t i64Out2;
6611 pfn(&State, &fMxcsr2, &i64Out2, &TestData.r32ValIn.u);
6612 TestData.fMxcsrIn = State.MXCSR;
6613 TestData.fMxcsrOut = fMxcsr2;
6614 TestData.i64ValOut = i64Out2;
6615 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6616 }
6617 if (!RT_IS_POWER_OF_TWO(fXcpt))
6618 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
6619 if (fUnmasked & fXcpt)
6620 {
6621 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
6622 uint32_t fMxcsr3; int64_t i64Out3;
6623 pfn(&State, &fMxcsr3, &i64Out3, &TestData.r32ValIn.u);
6624 TestData.fMxcsrIn = State.MXCSR;
6625 TestData.fMxcsrOut = fMxcsr3;
6626 TestData.i64ValOut = i64Out3;
6627 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6628 }
6629 }
6630 }
6631 }
6632 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
6633 }
6634
6635 return RTEXITCODE_SUCCESS;
6636}
6637#endif
6638
6639
6640static void SseBinaryI64R32Test(void)
6641{
6642 X86FXSTATE State;
6643 RT_ZERO(State);
6644 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI64R32); iFn++)
6645 {
6646 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseBinaryI64R32[iFn]))
6647 continue;
6648
6649 SSE_BINARY_I64_R32_TEST_T const * const paTests = g_aSseBinaryI64R32[iFn].paTests;
6650 uint32_t const cTests = g_aSseBinaryI64R32[iFn].cTests;
6651 PFNIEMAIMPLSSEF2I64U32 pfn = g_aSseBinaryI64R32[iFn].pfn;
6652 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryI64R32[iFn]);
6653 if (!cTests) RTTestSkipped(g_hTest, "no tests");
6654 for (uint32_t iVar = 0; iVar < cVars; iVar++)
6655 {
6656 for (uint32_t iTest = 0; iTest < cTests; iTest++)
6657 {
6658 uint32_t fMxcsr = 0;
6659 int64_t i64Dst = 0;
6660
6661 State.MXCSR = paTests[iTest].fMxcsrIn;
6662 pfn(&State, &fMxcsr, &i64Dst, &paTests[iTest].r32ValIn.u);
6663 if ( fMxcsr != paTests[iTest].fMxcsrOut
6664 || i64Dst != paTests[iTest].i64ValOut)
6665 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s\n"
6666 "%s -> mxcsr=%#08x %RI64\n"
6667 "%s expected %#08x %RI64%s%s (%s)\n",
6668 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
6669 FormatR32(&paTests[iTest].r32ValIn),
6670 iVar ? " " : "", fMxcsr, i64Dst,
6671 iVar ? " " : "", paTests[iTest].fMxcsrOut, paTests[iTest].i64ValOut,
6672 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
6673 i64Dst != paTests[iTest].i64ValOut
6674 ? " - val" : "",
6675 FormatMxcsr(paTests[iTest].fMxcsrIn) );
6676 }
6677 }
6678
6679 FREE_DECOMPRESSED_TESTS(g_aSseBinaryI64R32[iFn]);
6680 }
6681}
6682
6683
6684/*
6685 * SSE operations converting single signed double-word integers to double-precision floating point values (probably only cvtsi2sd).
6686 */
6687TYPEDEF_SUBTEST_TYPE(SSE_BINARY_R64_I32_T, SSE_BINARY_R64_I32_TEST_T, PFNIEMAIMPLSSEF2R64I32);
6688
6689static SSE_BINARY_R64_I32_T g_aSseBinaryR64I32[] =
6690{
6691 ENTRY_BIN(cvtsi2sd_r64_i32)
6692};
6693
6694#ifdef TSTIEMAIMPL_WITH_GENERATOR
6695DUMP_ALL_FN(SseBinaryR64I32, g_aSseBinaryR64I32)
6696static RTEXITCODE SseBinaryR64I32Generate(uint32_t cTests, const char * const *papszNameFmts)
6697{
6698 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
6699
6700 static int32_t const s_aSpecials[] =
6701 {
6702 INT32_MIN,
6703 INT32_MAX,
6704 /** @todo More specials. */
6705 };
6706
6707 X86FXSTATE State;
6708 RT_ZERO(State);
6709 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR64I32); iFn++)
6710 {
6711 PFNIEMAIMPLSSEF2R64I32 const pfn = g_aSseBinaryR64I32[iFn].pfnNative ? g_aSseBinaryR64I32[iFn].pfnNative : g_aSseBinaryR64I32[iFn].pfn;
6712
6713 IEMBINARYOUTPUT BinOut;
6714 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseBinaryR64I32[iFn]), RTEXITCODE_FAILURE);
6715
6716 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
6717 {
6718 SSE_BINARY_R64_I32_TEST_T TestData; RT_ZERO(TestData);
6719
6720 TestData.i32ValIn = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
6721
6722 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
6723 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
6724 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
6725 for (uint8_t iFz = 0; iFz < 2; iFz++)
6726 {
6727 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
6728 | (iRounding << X86_MXCSR_RC_SHIFT)
6729 | (iDaz ? X86_MXCSR_DAZ : 0)
6730 | (iFz ? X86_MXCSR_FZ : 0)
6731 | X86_MXCSR_XCPT_MASK;
6732 uint32_t fMxcsrM; RTFLOAT64U r64OutM;
6733 pfn(&State, &fMxcsrM, &r64OutM, &TestData.i32ValIn);
6734 TestData.fMxcsrIn = State.MXCSR;
6735 TestData.fMxcsrOut = fMxcsrM;
6736 TestData.r64ValOut = r64OutM;
6737 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6738
6739 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
6740 uint32_t fMxcsrU; RTFLOAT64U r64OutU;
6741 pfn(&State, &fMxcsrU, &r64OutU, &TestData.i32ValIn);
6742 TestData.fMxcsrIn = State.MXCSR;
6743 TestData.fMxcsrOut = fMxcsrU;
6744 TestData.r64ValOut = r64OutU;
6745 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6746
6747 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
6748 if (fXcpt)
6749 {
6750 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
6751 uint32_t fMxcsr1; RTFLOAT64U r64Out1;
6752 pfn(&State, &fMxcsr1, &r64Out1, &TestData.i32ValIn);
6753 TestData.fMxcsrIn = State.MXCSR;
6754 TestData.fMxcsrOut = fMxcsr1;
6755 TestData.r64ValOut = r64Out1;
6756 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6757
6758 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
6759 {
6760 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
6761 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
6762 uint32_t fMxcsr2; RTFLOAT64U r64Out2;
6763 pfn(&State, &fMxcsr2, &r64Out2, &TestData.i32ValIn);
6764 TestData.fMxcsrIn = State.MXCSR;
6765 TestData.fMxcsrOut = fMxcsr2;
6766 TestData.r64ValOut = r64Out2;
6767 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6768 }
6769 if (!RT_IS_POWER_OF_TWO(fXcpt))
6770 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
6771 if (fUnmasked & fXcpt)
6772 {
6773 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
6774 uint32_t fMxcsr3; RTFLOAT64U r64Out3;
6775 pfn(&State, &fMxcsr3, &r64Out3, &TestData.i32ValIn);
6776 TestData.fMxcsrIn = State.MXCSR;
6777 TestData.fMxcsrOut = fMxcsr3;
6778 TestData.r64ValOut = r64Out3;
6779 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6780 }
6781 }
6782 }
6783 }
6784 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
6785 }
6786
6787 return RTEXITCODE_SUCCESS;
6788}
6789#endif
6790
6791
6792static void SseBinaryR64I32Test(void)
6793{
6794 X86FXSTATE State;
6795 RT_ZERO(State);
6796 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR64I32); iFn++)
6797 {
6798 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseBinaryR64I32[iFn]))
6799 continue;
6800
6801 SSE_BINARY_R64_I32_TEST_T const * const paTests = g_aSseBinaryR64I32[iFn].paTests;
6802 uint32_t const cTests = g_aSseBinaryR64I32[iFn].cTests;
6803 PFNIEMAIMPLSSEF2R64I32 pfn = g_aSseBinaryR64I32[iFn].pfn;
6804 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryR64I32[iFn]);
6805 if (!cTests) RTTestSkipped(g_hTest, "no tests");
6806 for (uint32_t iVar = 0; iVar < cVars; iVar++)
6807 {
6808 for (uint32_t iTest = 0; iTest < cTests; iTest++)
6809 {
6810 uint32_t fMxcsr = 0;
6811 RTFLOAT64U r64Dst; RT_ZERO(r64Dst);
6812
6813 State.MXCSR = paTests[iTest].fMxcsrIn;
6814 pfn(&State, &fMxcsr, &r64Dst, &paTests[iTest].i32ValIn);
6815 if ( fMxcsr != paTests[iTest].fMxcsrOut
6816 || !RTFLOAT64U_ARE_IDENTICAL(&r64Dst, &paTests[iTest].r64ValOut))
6817 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI32\n"
6818 "%s -> mxcsr=%#08x %s\n"
6819 "%s expected %#08x %s%s%s (%s)\n",
6820 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
6821 &paTests[iTest].i32ValIn,
6822 iVar ? " " : "", fMxcsr, FormatR64(&r64Dst),
6823 iVar ? " " : "", paTests[iTest].fMxcsrOut, FormatR64(&paTests[iTest].r64ValOut),
6824 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
6825 !RTFLOAT64U_ARE_IDENTICAL(&r64Dst, &paTests[iTest].r64ValOut)
6826 ? " - val" : "",
6827 FormatMxcsr(paTests[iTest].fMxcsrIn) );
6828 }
6829 }
6830
6831 FREE_DECOMPRESSED_TESTS(g_aSseBinaryR64I32[iFn]);
6832 }
6833}
6834
6835
6836/*
6837 * SSE operations converting single signed quad-word integers to double-precision floating point values (probably only cvtsi2sd).
6838 */
6839TYPEDEF_SUBTEST_TYPE(SSE_BINARY_R64_I64_T, SSE_BINARY_R64_I64_TEST_T, PFNIEMAIMPLSSEF2R64I64);
6840
6841static SSE_BINARY_R64_I64_T g_aSseBinaryR64I64[] =
6842{
6843 ENTRY_BIN(cvtsi2sd_r64_i64),
6844};
6845
6846#ifdef TSTIEMAIMPL_WITH_GENERATOR
6847DUMP_ALL_FN(SseBinaryR64I64, g_aSseBinaryR64I64)
6848static RTEXITCODE SseBinaryR64I64Generate(uint32_t cTests, const char * const *papszNameFmts)
6849{
6850 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
6851
6852 static int64_t const s_aSpecials[] =
6853 {
6854 INT64_MIN,
6855 INT64_MAX
6856 /** @todo More specials. */
6857 };
6858
6859 X86FXSTATE State;
6860 RT_ZERO(State);
6861 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR64I64); iFn++)
6862 {
6863 PFNIEMAIMPLSSEF2R64I64 const pfn = g_aSseBinaryR64I64[iFn].pfnNative ? g_aSseBinaryR64I64[iFn].pfnNative : g_aSseBinaryR64I64[iFn].pfn;
6864
6865 IEMBINARYOUTPUT BinOut;
6866 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseBinaryR64I64[iFn]), RTEXITCODE_FAILURE);
6867
6868 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
6869 {
6870 SSE_BINARY_R64_I64_TEST_T TestData; RT_ZERO(TestData);
6871
6872 TestData.i64ValIn = iTest < cTests ? RandI64Src(iTest) : s_aSpecials[iTest - cTests];
6873
6874 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
6875 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
6876 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
6877 for (uint8_t iFz = 0; iFz < 2; iFz++)
6878 {
6879 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
6880 | (iRounding << X86_MXCSR_RC_SHIFT)
6881 | (iDaz ? X86_MXCSR_DAZ : 0)
6882 | (iFz ? X86_MXCSR_FZ : 0)
6883 | X86_MXCSR_XCPT_MASK;
6884 uint32_t fMxcsrM; RTFLOAT64U r64OutM;
6885 pfn(&State, &fMxcsrM, &r64OutM, &TestData.i64ValIn);
6886 TestData.fMxcsrIn = State.MXCSR;
6887 TestData.fMxcsrOut = fMxcsrM;
6888 TestData.r64ValOut = r64OutM;
6889 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6890
6891 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
6892 uint32_t fMxcsrU; RTFLOAT64U r64OutU;
6893 pfn(&State, &fMxcsrU, &r64OutU, &TestData.i64ValIn);
6894 TestData.fMxcsrIn = State.MXCSR;
6895 TestData.fMxcsrOut = fMxcsrU;
6896 TestData.r64ValOut = r64OutU;
6897 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6898
6899 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
6900 if (fXcpt)
6901 {
6902 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
6903 uint32_t fMxcsr1; RTFLOAT64U r64Out1;
6904 pfn(&State, &fMxcsr1, &r64Out1, &TestData.i64ValIn);
6905 TestData.fMxcsrIn = State.MXCSR;
6906 TestData.fMxcsrOut = fMxcsr1;
6907 TestData.r64ValOut = r64Out1;
6908 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6909
6910 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
6911 {
6912 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
6913 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
6914 uint32_t fMxcsr2; RTFLOAT64U r64Out2;
6915 pfn(&State, &fMxcsr2, &r64Out2, &TestData.i64ValIn);
6916 TestData.fMxcsrIn = State.MXCSR;
6917 TestData.fMxcsrOut = fMxcsr2;
6918 TestData.r64ValOut = r64Out2;
6919 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6920 }
6921 if (!RT_IS_POWER_OF_TWO(fXcpt))
6922 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
6923 if (fUnmasked & fXcpt)
6924 {
6925 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
6926 uint32_t fMxcsr3; RTFLOAT64U r64Out3;
6927 pfn(&State, &fMxcsr3, &r64Out3, &TestData.i64ValIn);
6928 TestData.fMxcsrIn = State.MXCSR;
6929 TestData.fMxcsrOut = fMxcsr3;
6930 TestData.r64ValOut = r64Out3;
6931 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
6932 }
6933 }
6934 }
6935 }
6936 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
6937 }
6938
6939 return RTEXITCODE_SUCCESS;
6940}
6941#endif
6942
6943
6944static void SseBinaryR64I64Test(void)
6945{
6946 X86FXSTATE State;
6947 RT_ZERO(State);
6948 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR64I64); iFn++)
6949 {
6950 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseBinaryR64I64[iFn]))
6951 continue;
6952
6953 SSE_BINARY_R64_I64_TEST_T const * const paTests = g_aSseBinaryR64I64[iFn].paTests;
6954 uint32_t const cTests = g_aSseBinaryR64I64[iFn].cTests;
6955 PFNIEMAIMPLSSEF2R64I64 pfn = g_aSseBinaryR64I64[iFn].pfn;
6956 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryR64I64[iFn]);
6957 if (!cTests) RTTestSkipped(g_hTest, "no tests");
6958 for (uint32_t iVar = 0; iVar < cVars; iVar++)
6959 {
6960 for (uint32_t iTest = 0; iTest < cTests; iTest++)
6961 {
6962 uint32_t fMxcsr = 0;
6963 RTFLOAT64U r64Dst; RT_ZERO(r64Dst);
6964
6965 State.MXCSR = paTests[iTest].fMxcsrIn;
6966 pfn(&State, &fMxcsr, &r64Dst, &paTests[iTest].i64ValIn);
6967 if ( fMxcsr != paTests[iTest].fMxcsrOut
6968 || !RTFLOAT64U_ARE_IDENTICAL(&r64Dst, &paTests[iTest].r64ValOut))
6969 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI64\n"
6970 "%s -> mxcsr=%#08x %s\n"
6971 "%s expected %#08x %s%s%s (%s)\n",
6972 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
6973 &paTests[iTest].i64ValIn,
6974 iVar ? " " : "", fMxcsr, FormatR64(&r64Dst),
6975 iVar ? " " : "", paTests[iTest].fMxcsrOut, FormatR64(&paTests[iTest].r64ValOut),
6976 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
6977 !RTFLOAT64U_ARE_IDENTICAL(&r64Dst, &paTests[iTest].r64ValOut)
6978 ? " - val" : "",
6979 FormatMxcsr(paTests[iTest].fMxcsrIn) );
6980 }
6981 }
6982
6983 FREE_DECOMPRESSED_TESTS(g_aSseBinaryR64I64[iFn]);
6984 }
6985}
6986
6987
6988/*
6989 * SSE operations converting single signed double-word integers to single-precision floating point values (probably only cvtsi2ss).
6990 */
6991TYPEDEF_SUBTEST_TYPE(SSE_BINARY_R32_I32_T, SSE_BINARY_R32_I32_TEST_T, PFNIEMAIMPLSSEF2R32I32);
6992
6993static SSE_BINARY_R32_I32_T g_aSseBinaryR32I32[] =
6994{
6995 ENTRY_BIN(cvtsi2ss_r32_i32),
6996};
6997
6998#ifdef TSTIEMAIMPL_WITH_GENERATOR
6999DUMP_ALL_FN(SseBinaryR32I32, g_aSseBinaryR32I32)
7000static RTEXITCODE SseBinaryR32I32Generate(uint32_t cTests, const char * const *papszNameFmts)
7001{
7002 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
7003
7004 static int32_t const s_aSpecials[] =
7005 {
7006 INT32_MIN,
7007 INT32_MAX,
7008 /** @todo More specials. */
7009 };
7010
7011 X86FXSTATE State;
7012 RT_ZERO(State);
7013 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR32I32); iFn++)
7014 {
7015 PFNIEMAIMPLSSEF2R32I32 const pfn = g_aSseBinaryR32I32[iFn].pfnNative ? g_aSseBinaryR32I32[iFn].pfnNative : g_aSseBinaryR32I32[iFn].pfn;
7016
7017 IEMBINARYOUTPUT BinOut;
7018 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseBinaryR32I32[iFn]), RTEXITCODE_FAILURE);
7019
7020 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
7021 {
7022 SSE_BINARY_R32_I32_TEST_T TestData; RT_ZERO(TestData);
7023
7024 TestData.i32ValIn = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
7025
7026 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
7027 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
7028 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
7029 for (uint8_t iFz = 0; iFz < 2; iFz++)
7030 {
7031 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
7032 | (iRounding << X86_MXCSR_RC_SHIFT)
7033 | (iDaz ? X86_MXCSR_DAZ : 0)
7034 | (iFz ? X86_MXCSR_FZ : 0)
7035 | X86_MXCSR_XCPT_MASK;
7036 uint32_t fMxcsrM; RTFLOAT32U r32OutM;
7037 pfn(&State, &fMxcsrM, &r32OutM, &TestData.i32ValIn);
7038 TestData.fMxcsrIn = State.MXCSR;
7039 TestData.fMxcsrOut = fMxcsrM;
7040 TestData.r32ValOut = r32OutM;
7041 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7042
7043 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
7044 uint32_t fMxcsrU; RTFLOAT32U r32OutU;
7045 pfn(&State, &fMxcsrU, &r32OutU, &TestData.i32ValIn);
7046 TestData.fMxcsrIn = State.MXCSR;
7047 TestData.fMxcsrOut = fMxcsrU;
7048 TestData.r32ValOut = r32OutU;
7049 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7050
7051 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
7052 if (fXcpt)
7053 {
7054 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
7055 uint32_t fMxcsr1; RTFLOAT32U r32Out1;
7056 pfn(&State, &fMxcsr1, &r32Out1, &TestData.i32ValIn);
7057 TestData.fMxcsrIn = State.MXCSR;
7058 TestData.fMxcsrOut = fMxcsr1;
7059 TestData.r32ValOut = r32Out1;
7060 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7061
7062 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
7063 {
7064 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
7065 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
7066 uint32_t fMxcsr2; RTFLOAT32U r32Out2;
7067 pfn(&State, &fMxcsr2, &r32Out2, &TestData.i32ValIn);
7068 TestData.fMxcsrIn = State.MXCSR;
7069 TestData.fMxcsrOut = fMxcsr2;
7070 TestData.r32ValOut = r32Out2;
7071 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7072 }
7073 if (!RT_IS_POWER_OF_TWO(fXcpt))
7074 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
7075 if (fUnmasked & fXcpt)
7076 {
7077 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
7078 uint32_t fMxcsr3; RTFLOAT32U r32Out3;
7079 pfn(&State, &fMxcsr3, &r32Out3, &TestData.i32ValIn);
7080 TestData.fMxcsrIn = State.MXCSR;
7081 TestData.fMxcsrOut = fMxcsr3;
7082 TestData.r32ValOut = r32Out3;
7083 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7084 }
7085 }
7086 }
7087 }
7088 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
7089 }
7090
7091 return RTEXITCODE_SUCCESS;
7092}
7093#endif
7094
7095
7096static void SseBinaryR32I32Test(void)
7097{
7098 X86FXSTATE State;
7099 RT_ZERO(State);
7100 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR32I32); iFn++)
7101 {
7102 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseBinaryR32I32[iFn]))
7103 continue;
7104
7105 SSE_BINARY_R32_I32_TEST_T const * const paTests = g_aSseBinaryR32I32[iFn].paTests;
7106 uint32_t const cTests = g_aSseBinaryR32I32[iFn].cTests;
7107 PFNIEMAIMPLSSEF2R32I32 pfn = g_aSseBinaryR32I32[iFn].pfn;
7108 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryR32I32[iFn]);
7109 if (!cTests) RTTestSkipped(g_hTest, "no tests");
7110 for (uint32_t iVar = 0; iVar < cVars; iVar++)
7111 {
7112 for (uint32_t iTest = 0; iTest < cTests; iTest++)
7113 {
7114 uint32_t fMxcsr = 0;
7115 RTFLOAT32U r32Dst; RT_ZERO(r32Dst);
7116
7117 State.MXCSR = paTests[iTest].fMxcsrIn;
7118 pfn(&State, &fMxcsr, &r32Dst, &paTests[iTest].i32ValIn);
7119 if ( fMxcsr != paTests[iTest].fMxcsrOut
7120 || !RTFLOAT32U_ARE_IDENTICAL(&r32Dst, &paTests[iTest].r32ValOut))
7121 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI32\n"
7122 "%s -> mxcsr=%#08x %RI32\n"
7123 "%s expected %#08x %RI32%s%s (%s)\n",
7124 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
7125 &paTests[iTest].i32ValIn,
7126 iVar ? " " : "", fMxcsr, FormatR32(&r32Dst),
7127 iVar ? " " : "", paTests[iTest].fMxcsrOut, FormatR32(&paTests[iTest].r32ValOut),
7128 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
7129 !RTFLOAT32U_ARE_IDENTICAL(&r32Dst, &paTests[iTest].r32ValOut)
7130 ? " - val" : "",
7131 FormatMxcsr(paTests[iTest].fMxcsrIn) );
7132 }
7133 }
7134
7135 FREE_DECOMPRESSED_TESTS(g_aSseBinaryR32I32[iFn]);
7136 }
7137}
7138
7139
7140/*
7141 * SSE operations converting single signed quad-word integers to single-precision floating point values (probably only cvtsi2ss).
7142 */
7143TYPEDEF_SUBTEST_TYPE(SSE_BINARY_R32_I64_T, SSE_BINARY_R32_I64_TEST_T, PFNIEMAIMPLSSEF2R32I64);
7144
7145static SSE_BINARY_R32_I64_T g_aSseBinaryR32I64[] =
7146{
7147 ENTRY_BIN(cvtsi2ss_r32_i64),
7148};
7149
7150#ifdef TSTIEMAIMPL_WITH_GENERATOR
7151DUMP_ALL_FN(SseBinaryR32I64, g_aSseBinaryR32I64)
7152static RTEXITCODE SseBinaryR32I64Generate(uint32_t cTests, const char * const *papszNameFmts)
7153{
7154 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
7155
7156 static int64_t const s_aSpecials[] =
7157 {
7158 INT64_MIN,
7159 INT64_MAX
7160 /** @todo More specials. */
7161 };
7162
7163 X86FXSTATE State;
7164 RT_ZERO(State);
7165 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR32I64); iFn++)
7166 {
7167 PFNIEMAIMPLSSEF2R32I64 const pfn = g_aSseBinaryR32I64[iFn].pfnNative ? g_aSseBinaryR32I64[iFn].pfnNative : g_aSseBinaryR32I64[iFn].pfn;
7168
7169 IEMBINARYOUTPUT BinOut;
7170 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseBinaryR32I64[iFn]), RTEXITCODE_FAILURE);
7171
7172 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
7173 {
7174 SSE_BINARY_R32_I64_TEST_T TestData; RT_ZERO(TestData);
7175
7176 TestData.i64ValIn = iTest < cTests ? RandI64Src(iTest) : s_aSpecials[iTest - cTests];
7177
7178 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
7179 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
7180 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
7181 for (uint8_t iFz = 0; iFz < 2; iFz++)
7182 {
7183 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
7184 | (iRounding << X86_MXCSR_RC_SHIFT)
7185 | (iDaz ? X86_MXCSR_DAZ : 0)
7186 | (iFz ? X86_MXCSR_FZ : 0)
7187 | X86_MXCSR_XCPT_MASK;
7188 uint32_t fMxcsrM; RTFLOAT32U r32OutM;
7189 pfn(&State, &fMxcsrM, &r32OutM, &TestData.i64ValIn);
7190 TestData.fMxcsrIn = State.MXCSR;
7191 TestData.fMxcsrOut = fMxcsrM;
7192 TestData.r32ValOut = r32OutM;
7193 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7194
7195 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
7196 uint32_t fMxcsrU; RTFLOAT32U r32OutU;
7197 pfn(&State, &fMxcsrU, &r32OutU, &TestData.i64ValIn);
7198 TestData.fMxcsrIn = State.MXCSR;
7199 TestData.fMxcsrOut = fMxcsrU;
7200 TestData.r32ValOut = r32OutU;
7201 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7202
7203 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
7204 if (fXcpt)
7205 {
7206 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
7207 uint32_t fMxcsr1; RTFLOAT32U r32Out1;
7208 pfn(&State, &fMxcsr1, &r32Out1, &TestData.i64ValIn);
7209 TestData.fMxcsrIn = State.MXCSR;
7210 TestData.fMxcsrOut = fMxcsr1;
7211 TestData.r32ValOut = r32Out1;
7212 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7213
7214 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
7215 {
7216 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
7217 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
7218 uint32_t fMxcsr2; RTFLOAT32U r32Out2;
7219 pfn(&State, &fMxcsr2, &r32Out2, &TestData.i64ValIn);
7220 TestData.fMxcsrIn = State.MXCSR;
7221 TestData.fMxcsrOut = fMxcsr2;
7222 TestData.r32ValOut = r32Out2;
7223 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7224 }
7225 if (!RT_IS_POWER_OF_TWO(fXcpt))
7226 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
7227 if (fUnmasked & fXcpt)
7228 {
7229 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
7230 uint32_t fMxcsr3; RTFLOAT32U r32Out3;
7231 pfn(&State, &fMxcsr3, &r32Out3, &TestData.i64ValIn);
7232 TestData.fMxcsrIn = State.MXCSR;
7233 TestData.fMxcsrOut = fMxcsr3;
7234 TestData.r32ValOut = r32Out3;
7235 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7236 }
7237 }
7238 }
7239 }
7240 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
7241 }
7242
7243 return RTEXITCODE_SUCCESS;
7244}
7245#endif
7246
7247
7248static void SseBinaryR32I64Test(void)
7249{
7250 X86FXSTATE State;
7251 RT_ZERO(State);
7252 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR32I64); iFn++)
7253 {
7254 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseBinaryR32I64[iFn]))
7255 continue;
7256
7257 SSE_BINARY_R32_I64_TEST_T const * const paTests = g_aSseBinaryR32I64[iFn].paTests;
7258 uint32_t const cTests = g_aSseBinaryR32I64[iFn].cTests;
7259 PFNIEMAIMPLSSEF2R32I64 pfn = g_aSseBinaryR32I64[iFn].pfn;
7260 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryR32I64[iFn]);
7261 if (!cTests) RTTestSkipped(g_hTest, "no tests");
7262 for (uint32_t iVar = 0; iVar < cVars; iVar++)
7263 {
7264 for (uint32_t iTest = 0; iTest < cTests; iTest++)
7265 {
7266 uint32_t fMxcsr = 0;
7267 RTFLOAT32U r32Dst; RT_ZERO(r32Dst);
7268
7269 State.MXCSR = paTests[iTest].fMxcsrIn;
7270 pfn(&State, &fMxcsr, &r32Dst, &paTests[iTest].i64ValIn);
7271 if ( fMxcsr != paTests[iTest].fMxcsrOut
7272 || !RTFLOAT32U_ARE_IDENTICAL(&r32Dst, &paTests[iTest].r32ValOut))
7273 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI64\n"
7274 "%s -> mxcsr=%#08x %RI32\n"
7275 "%s expected %#08x %RI32%s%s (%s)\n",
7276 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
7277 &paTests[iTest].i64ValIn,
7278 iVar ? " " : "", fMxcsr, FormatR32(&r32Dst),
7279 iVar ? " " : "", paTests[iTest].fMxcsrOut, FormatR32(&paTests[iTest].r32ValOut),
7280 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
7281 !RTFLOAT32U_ARE_IDENTICAL(&r32Dst, &paTests[iTest].r32ValOut)
7282 ? " - val" : "",
7283 FormatMxcsr(paTests[iTest].fMxcsrIn) );
7284 }
7285 }
7286
7287 FREE_DECOMPRESSED_TESTS(g_aSseBinaryR32I64[iFn]);
7288 }
7289}
7290
7291
7292/*
7293 * Compare SSE operations on single single-precision floating point values - outputting only EFLAGS.
7294 */
7295TYPEDEF_SUBTEST_TYPE(SSE_COMPARE_EFL_R32_R32_T, SSE_COMPARE_EFL_R32_R32_TEST_T, PFNIEMAIMPLF2EFLMXCSR128);
7296
7297static SSE_COMPARE_EFL_R32_R32_T g_aSseCompareEflR32R32[] =
7298{
7299 ENTRY_BIN(ucomiss_u128),
7300 ENTRY_BIN(comiss_u128),
7301 ENTRY_BIN_AVX(vucomiss_u128),
7302 ENTRY_BIN_AVX(vcomiss_u128),
7303};
7304
7305#ifdef TSTIEMAIMPL_WITH_GENERATOR
7306DUMP_ALL_FN(SseCompareEflR32R32, g_aSseCompareEflR32R32)
7307static RTEXITCODE SseCompareEflR32R32Generate(uint32_t cTests, const char * const *papszNameFmts)
7308{
7309 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
7310
7311 static struct { RTFLOAT32U Val1, Val2; } const s_aSpecials[] =
7312 {
7313 { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0) },
7314 { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(1) },
7315 { RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(0) },
7316 { RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(1) },
7317 { RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(0) },
7318 { RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(1) },
7319 { RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(0) },
7320 { RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(1) },
7321 /** @todo More specials. */
7322 };
7323
7324 uint32_t cMinNormalPairs = (cTests - 144) / 4;
7325 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareEflR32R32); iFn++)
7326 {
7327 PFNIEMAIMPLF2EFLMXCSR128 const pfn = g_aSseCompareEflR32R32[iFn].pfnNative ? g_aSseCompareEflR32R32[iFn].pfnNative : g_aSseCompareEflR32R32[iFn].pfn;
7328
7329 IEMBINARYOUTPUT BinOut;
7330 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseCompareEflR32R32[iFn]), RTEXITCODE_FAILURE);
7331
7332 uint32_t cNormalInputPairs = 0;
7333 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
7334 {
7335 SSE_COMPARE_EFL_R32_R32_TEST_T TestData; RT_ZERO(TestData);
7336 X86XMMREG ValIn1; RT_ZERO(ValIn1);
7337 X86XMMREG ValIn2; RT_ZERO(ValIn2);
7338
7339 TestData.r32ValIn1 = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val1;
7340 TestData.r32ValIn2 = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val2;
7341
7342 ValIn1.ar32[0] = TestData.r32ValIn1;
7343 ValIn2.ar32[0] = TestData.r32ValIn2;
7344
7345 if ( RTFLOAT32U_IS_NORMAL(&TestData.r32ValIn1)
7346 && RTFLOAT32U_IS_NORMAL(&TestData.r32ValIn2))
7347 cNormalInputPairs++;
7348 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
7349 {
7350 iTest -= 1;
7351 continue;
7352 }
7353
7354 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
7355 uint32_t const fEFlags = RandEFlags();
7356 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
7357 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
7358 for (uint8_t iFz = 0; iFz < 2; iFz++)
7359 {
7360 uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
7361 | (iRounding << X86_MXCSR_RC_SHIFT)
7362 | (iDaz ? X86_MXCSR_DAZ : 0)
7363 | (iFz ? X86_MXCSR_FZ : 0)
7364 | X86_MXCSR_XCPT_MASK;
7365 uint32_t fMxcsrM = fMxcsrIn;
7366 uint32_t fEFlagsM = fEFlags;
7367 pfn(&fMxcsrM, &fEFlagsM, &ValIn1, &ValIn2);
7368 TestData.fMxcsrIn = fMxcsrIn;
7369 TestData.fMxcsrOut = fMxcsrM;
7370 TestData.fEflIn = fEFlags;
7371 TestData.fEflOut = fEFlagsM;
7372 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7373
7374 fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
7375 uint32_t fMxcsrU = fMxcsrIn;
7376 uint32_t fEFlagsU = fEFlags;
7377 pfn(&fMxcsrU, &fEFlagsU, &ValIn1, &ValIn2);
7378 TestData.fMxcsrIn = fMxcsrIn;
7379 TestData.fMxcsrOut = fMxcsrU;
7380 TestData.fEflIn = fEFlags;
7381 TestData.fEflOut = fEFlagsU;
7382 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7383
7384 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
7385 if (fXcpt)
7386 {
7387 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
7388 uint32_t fMxcsr1 = fMxcsrIn;
7389 uint32_t fEFlags1 = fEFlags;
7390 pfn(&fMxcsr1, &fEFlags1, &ValIn1, &ValIn2);
7391 TestData.fMxcsrIn = fMxcsrIn;
7392 TestData.fMxcsrOut = fMxcsr1;
7393 TestData.fEflIn = fEFlags;
7394 TestData.fEflOut = fEFlags1;
7395 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7396
7397 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
7398 {
7399 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
7400 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
7401 uint32_t fMxcsr2 = fMxcsrIn;
7402 uint32_t fEFlags2 = fEFlags;
7403 pfn(&fMxcsr2, &fEFlags2, &ValIn1, &ValIn2);
7404 TestData.fMxcsrIn = fMxcsrIn;
7405 TestData.fMxcsrOut = fMxcsr2;
7406 TestData.fEflIn = fEFlags;
7407 TestData.fEflOut = fEFlags2;
7408 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7409 }
7410 if (!RT_IS_POWER_OF_TWO(fXcpt))
7411 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
7412 if (fUnmasked & fXcpt)
7413 {
7414 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
7415 uint32_t fMxcsr3 = fMxcsrIn;
7416 uint32_t fEFlags3 = fEFlags;
7417 pfn(&fMxcsr3, &fEFlags3, &ValIn1, &ValIn2);
7418 TestData.fMxcsrIn = fMxcsrIn;
7419 TestData.fMxcsrOut = fMxcsr3;
7420 TestData.fEflIn = fEFlags;
7421 TestData.fEflOut = fEFlags3;
7422 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7423 }
7424 }
7425 }
7426 }
7427 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
7428 }
7429
7430 return RTEXITCODE_SUCCESS;
7431}
7432#endif
7433
7434static void SseCompareEflR32R32Test(void)
7435{
7436 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareEflR32R32); iFn++)
7437 {
7438 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseCompareEflR32R32[iFn]))
7439 continue;
7440
7441 SSE_COMPARE_EFL_R32_R32_TEST_T const * const paTests = g_aSseCompareEflR32R32[iFn].paTests;
7442 uint32_t const cTests = g_aSseCompareEflR32R32[iFn].cTests;
7443 PFNIEMAIMPLF2EFLMXCSR128 pfn = g_aSseCompareEflR32R32[iFn].pfn;
7444 uint32_t const cVars = COUNT_VARIATIONS(g_aSseCompareEflR32R32[iFn]);
7445 if (!cTests) RTTestSkipped(g_hTest, "no tests");
7446 for (uint32_t iVar = 0; iVar < cVars; iVar++)
7447 {
7448 for (uint32_t iTest = 0; iTest < cTests; iTest++)
7449 {
7450 X86XMMREG ValIn1; RT_ZERO(ValIn1);
7451 X86XMMREG ValIn2; RT_ZERO(ValIn2);
7452
7453 ValIn1.ar32[0] = paTests[iTest].r32ValIn1;
7454 ValIn2.ar32[0] = paTests[iTest].r32ValIn2;
7455 uint32_t fMxcsr = paTests[iTest].fMxcsrIn;
7456 uint32_t fEFlags = paTests[iTest].fEflIn;
7457 pfn(&fMxcsr, &fEFlags, &ValIn1, &ValIn2);
7458 if ( fMxcsr != paTests[iTest].fMxcsrOut
7459 || fEFlags != paTests[iTest].fEflOut)
7460 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x efl=%#08x in1=%s in2=%s\n"
7461 "%s -> mxcsr=%#08x %#08x\n"
7462 "%s expected %#08x %#08x%s (%s) (EFL: %s)\n",
7463 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn, paTests[iTest].fEflIn,
7464 FormatR32(&paTests[iTest].r32ValIn1), FormatR32(&paTests[iTest].r32ValIn2),
7465 iVar ? " " : "", fMxcsr, fEFlags,
7466 iVar ? " " : "", paTests[iTest].fMxcsrOut, paTests[iTest].fEflOut,
7467 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
7468 FormatMxcsr(paTests[iTest].fMxcsrIn),
7469 EFlagsDiff(fEFlags, paTests[iTest].fEflOut));
7470 }
7471 }
7472
7473 FREE_DECOMPRESSED_TESTS(g_aSseCompareEflR32R32[iFn]);
7474 }
7475}
7476
7477
7478/*
7479 * Compare SSE operations on single single-precision floating point values - outputting only EFLAGS.
7480 */
7481TYPEDEF_SUBTEST_TYPE(SSE_COMPARE_EFL_R64_R64_T, SSE_COMPARE_EFL_R64_R64_TEST_T, PFNIEMAIMPLF2EFLMXCSR128);
7482
7483static SSE_COMPARE_EFL_R64_R64_T g_aSseCompareEflR64R64[] =
7484{
7485 ENTRY_BIN(ucomisd_u128),
7486 ENTRY_BIN(comisd_u128),
7487 ENTRY_BIN_AVX(vucomisd_u128),
7488 ENTRY_BIN_AVX(vcomisd_u128)
7489};
7490
7491#ifdef TSTIEMAIMPL_WITH_GENERATOR
7492DUMP_ALL_FN(SseCompareEflR64R64, g_aSseCompareEflR64R64)
7493static RTEXITCODE SseCompareEflR64R64Generate(uint32_t cTests, const char * const *papszNameFmts)
7494{
7495 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
7496
7497 static struct { RTFLOAT64U Val1, Val2; } const s_aSpecials[] =
7498 {
7499 { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) },
7500 { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(1) },
7501 { RTFLOAT64U_INIT_ZERO(1), RTFLOAT64U_INIT_ZERO(0) },
7502 { RTFLOAT64U_INIT_ZERO(1), RTFLOAT64U_INIT_ZERO(1) },
7503 { RTFLOAT64U_INIT_INF(0), RTFLOAT64U_INIT_INF(0) },
7504 { RTFLOAT64U_INIT_INF(0), RTFLOAT64U_INIT_INF(1) },
7505 { RTFLOAT64U_INIT_INF(1), RTFLOAT64U_INIT_INF(0) },
7506 { RTFLOAT64U_INIT_INF(1), RTFLOAT64U_INIT_INF(1) },
7507 /** @todo More specials. */
7508 };
7509
7510 uint32_t cMinNormalPairs = (cTests - 144) / 4;
7511 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareEflR64R64); iFn++)
7512 {
7513 PFNIEMAIMPLF2EFLMXCSR128 const pfn = g_aSseCompareEflR64R64[iFn].pfnNative ? g_aSseCompareEflR64R64[iFn].pfnNative : g_aSseCompareEflR64R64[iFn].pfn;
7514
7515 IEMBINARYOUTPUT BinOut;
7516 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseCompareEflR64R64[iFn]), RTEXITCODE_FAILURE);
7517
7518 uint32_t cNormalInputPairs = 0;
7519 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
7520 {
7521 SSE_COMPARE_EFL_R64_R64_TEST_T TestData; RT_ZERO(TestData);
7522 X86XMMREG ValIn1; RT_ZERO(ValIn1);
7523 X86XMMREG ValIn2; RT_ZERO(ValIn2);
7524
7525 TestData.r64ValIn1 = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val1;
7526 TestData.r64ValIn2 = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val2;
7527
7528 ValIn1.ar64[0] = TestData.r64ValIn1;
7529 ValIn2.ar64[0] = TestData.r64ValIn2;
7530
7531 if ( RTFLOAT64U_IS_NORMAL(&TestData.r64ValIn1)
7532 && RTFLOAT64U_IS_NORMAL(&TestData.r64ValIn2))
7533 cNormalInputPairs++;
7534 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
7535 {
7536 iTest -= 1;
7537 continue;
7538 }
7539
7540 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
7541 uint32_t const fEFlags = RandEFlags();
7542 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
7543 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
7544 for (uint8_t iFz = 0; iFz < 2; iFz++)
7545 {
7546 uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
7547 | (iRounding << X86_MXCSR_RC_SHIFT)
7548 | (iDaz ? X86_MXCSR_DAZ : 0)
7549 | (iFz ? X86_MXCSR_FZ : 0)
7550 | X86_MXCSR_XCPT_MASK;
7551 uint32_t fMxcsrM = fMxcsrIn;
7552 uint32_t fEFlagsM = fEFlags;
7553 pfn(&fMxcsrM, &fEFlagsM, &ValIn1, &ValIn2);
7554 TestData.fMxcsrIn = fMxcsrIn;
7555 TestData.fMxcsrOut = fMxcsrM;
7556 TestData.fEflIn = fEFlags;
7557 TestData.fEflOut = fEFlagsM;
7558 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7559
7560 fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
7561 uint32_t fMxcsrU = fMxcsrIn;
7562 uint32_t fEFlagsU = fEFlags;
7563 pfn(&fMxcsrU, &fEFlagsU, &ValIn1, &ValIn2);
7564 TestData.fMxcsrIn = fMxcsrIn;
7565 TestData.fMxcsrOut = fMxcsrU;
7566 TestData.fEflIn = fEFlags;
7567 TestData.fEflOut = fEFlagsU;
7568 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7569
7570 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
7571 if (fXcpt)
7572 {
7573 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
7574 uint32_t fMxcsr1 = fMxcsrIn;
7575 uint32_t fEFlags1 = fEFlags;
7576 pfn(&fMxcsr1, &fEFlags1, &ValIn1, &ValIn2);
7577 TestData.fMxcsrIn = fMxcsrIn;
7578 TestData.fMxcsrOut = fMxcsr1;
7579 TestData.fEflIn = fEFlags;
7580 TestData.fEflOut = fEFlags1;
7581 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7582
7583 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
7584 {
7585 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
7586 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
7587 uint32_t fMxcsr2 = fMxcsrIn;
7588 uint32_t fEFlags2 = fEFlags;
7589 pfn(&fMxcsr2, &fEFlags2, &ValIn1, &ValIn2);
7590 TestData.fMxcsrIn = fMxcsrIn;
7591 TestData.fMxcsrOut = fMxcsr2;
7592 TestData.fEflIn = fEFlags;
7593 TestData.fEflOut = fEFlags2;
7594 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7595 }
7596 if (!RT_IS_POWER_OF_TWO(fXcpt))
7597 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
7598 if (fUnmasked & fXcpt)
7599 {
7600 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
7601 uint32_t fMxcsr3 = fMxcsrIn;
7602 uint32_t fEFlags3 = fEFlags;
7603 pfn(&fMxcsr3, &fEFlags3, &ValIn1, &ValIn2);
7604 TestData.fMxcsrIn = fMxcsrIn;
7605 TestData.fMxcsrOut = fMxcsr3;
7606 TestData.fEflIn = fEFlags;
7607 TestData.fEflOut = fEFlags3;
7608 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7609 }
7610 }
7611 }
7612 }
7613 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
7614 }
7615
7616 return RTEXITCODE_SUCCESS;
7617}
7618#endif
7619
7620static void SseCompareEflR64R64Test(void)
7621{
7622 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareEflR64R64); iFn++)
7623 {
7624 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseCompareEflR64R64[iFn]))
7625 continue;
7626
7627 SSE_COMPARE_EFL_R64_R64_TEST_T const * const paTests = g_aSseCompareEflR64R64[iFn].paTests;
7628 uint32_t const cTests = g_aSseCompareEflR64R64[iFn].cTests;
7629 PFNIEMAIMPLF2EFLMXCSR128 pfn = g_aSseCompareEflR64R64[iFn].pfn;
7630 uint32_t const cVars = COUNT_VARIATIONS(g_aSseCompareEflR64R64[iFn]);
7631 if (!cTests) RTTestSkipped(g_hTest, "no tests");
7632 for (uint32_t iVar = 0; iVar < cVars; iVar++)
7633 {
7634 for (uint32_t iTest = 0; iTest < cTests; iTest++)
7635 {
7636 X86XMMREG ValIn1; RT_ZERO(ValIn1);
7637 X86XMMREG ValIn2; RT_ZERO(ValIn2);
7638
7639 ValIn1.ar64[0] = paTests[iTest].r64ValIn1;
7640 ValIn2.ar64[0] = paTests[iTest].r64ValIn2;
7641 uint32_t fMxcsr = paTests[iTest].fMxcsrIn;
7642 uint32_t fEFlags = paTests[iTest].fEflIn;
7643 pfn(&fMxcsr, &fEFlags, &ValIn1, &ValIn2);
7644 if ( fMxcsr != paTests[iTest].fMxcsrOut
7645 || fEFlags != paTests[iTest].fEflOut)
7646 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x efl=%#08x in1=%s in2=%s\n"
7647 "%s -> mxcsr=%#08x %#08x\n"
7648 "%s expected %#08x %#08x%s (%s) (EFL: %s)\n",
7649 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn, paTests[iTest].fEflIn,
7650 FormatR64(&paTests[iTest].r64ValIn1), FormatR64(&paTests[iTest].r64ValIn2),
7651 iVar ? " " : "", fMxcsr, fEFlags,
7652 iVar ? " " : "", paTests[iTest].fMxcsrOut, paTests[iTest].fEflOut,
7653 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
7654 FormatMxcsr(paTests[iTest].fMxcsrIn),
7655 EFlagsDiff(fEFlags, paTests[iTest].fEflOut));
7656 }
7657 }
7658
7659 FREE_DECOMPRESSED_TESTS(g_aSseCompareEflR64R64[iFn]);
7660 }
7661}
7662
7663
7664/*
7665 * Compare SSE operations on packed and single single-precision floating point values - outputting a mask.
7666 */
7667/** Maximum immediate to try to keep the testdata size under control (at least a little bit)- */
7668#define SSE_COMPARE_F2_XMM_IMM8_MAX 0x1f
7669
7670TYPEDEF_SUBTEST_TYPE(SSE_COMPARE_F2_XMM_IMM8_T, SSE_COMPARE_F2_XMM_IMM8_TEST_T, PFNIEMAIMPLMXCSRF2XMMIMM8);
7671
7672static SSE_COMPARE_F2_XMM_IMM8_T g_aSseCompareF2XmmR32Imm8[] =
7673{
7674 ENTRY_BIN(cmpps_u128),
7675 ENTRY_BIN(cmpss_u128)
7676};
7677
7678#ifdef TSTIEMAIMPL_WITH_GENERATOR
7679DUMP_ALL_FN(SseCompareF2XmmR32Imm8, g_aSseCompareF2XmmR32Imm8)
7680static RTEXITCODE SseCompareF2XmmR32Imm8Generate(uint32_t cTests, const char * const *papszNameFmts)
7681{
7682 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
7683
7684 static struct { RTFLOAT32U Val1, Val2; } const s_aSpecials[] =
7685 {
7686 { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0) },
7687 { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(1) },
7688 { RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(0) },
7689 { RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(1) },
7690 { RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(0) },
7691 { RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(1) },
7692 { RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(0) },
7693 { RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(1) },
7694 /** @todo More specials. */
7695 };
7696
7697 uint32_t cMinNormalPairs = (cTests - 144) / 4;
7698 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareF2XmmR32Imm8); iFn++)
7699 {
7700 PFNIEMAIMPLMXCSRF2XMMIMM8 const pfn = g_aSseCompareF2XmmR32Imm8[iFn].pfnNative ? g_aSseCompareF2XmmR32Imm8[iFn].pfnNative : g_aSseCompareF2XmmR32Imm8[iFn].pfn;
7701
7702 IEMBINARYOUTPUT BinOut;
7703 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseCompareF2XmmR32Imm8[iFn]), RTEXITCODE_FAILURE);
7704
7705 uint32_t cNormalInputPairs = 0;
7706 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
7707 {
7708 SSE_COMPARE_F2_XMM_IMM8_TEST_T TestData; RT_ZERO(TestData);
7709
7710 TestData.InVal1.ar32[0] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val1;
7711 TestData.InVal1.ar32[1] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val1;
7712 TestData.InVal1.ar32[2] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val1;
7713 TestData.InVal1.ar32[3] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val1;
7714
7715 TestData.InVal2.ar32[0] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val2;
7716 TestData.InVal2.ar32[1] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val2;
7717 TestData.InVal2.ar32[2] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val2;
7718 TestData.InVal2.ar32[3] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val2;
7719
7720 if ( RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[0])
7721 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[1])
7722 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[2])
7723 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[3])
7724 && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[0])
7725 && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[1])
7726 && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[2])
7727 && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[3]))
7728 cNormalInputPairs++;
7729 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
7730 {
7731 iTest -= 1;
7732 continue;
7733 }
7734
7735 IEMMEDIAF2XMMSRC Src;
7736 Src.uSrc1 = TestData.InVal1;
7737 Src.uSrc2 = TestData.InVal2;
7738 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
7739 for (uint8_t bImm = 0; bImm <= SSE_COMPARE_F2_XMM_IMM8_MAX; bImm++)
7740 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
7741 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
7742 for (uint8_t iFz = 0; iFz < 2; iFz++)
7743 {
7744 uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
7745 | (iRounding << X86_MXCSR_RC_SHIFT)
7746 | (iDaz ? X86_MXCSR_DAZ : 0)
7747 | (iFz ? X86_MXCSR_FZ : 0)
7748 | X86_MXCSR_XCPT_MASK;
7749 uint32_t fMxcsrM = fMxcsrIn;
7750 X86XMMREG ResM;
7751 pfn(&fMxcsrM, &ResM, &Src, bImm);
7752 TestData.fMxcsrIn = fMxcsrIn;
7753 TestData.fMxcsrOut = fMxcsrM;
7754 TestData.bImm = bImm;
7755 TestData.OutVal = ResM;
7756 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7757
7758 fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
7759 uint32_t fMxcsrU = fMxcsrIn;
7760 X86XMMREG ResU;
7761 pfn(&fMxcsrU, &ResU, &Src, bImm);
7762 TestData.fMxcsrIn = fMxcsrIn;
7763 TestData.fMxcsrOut = fMxcsrU;
7764 TestData.bImm = bImm;
7765 TestData.OutVal = ResU;
7766 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7767
7768 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
7769 if (fXcpt)
7770 {
7771 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
7772 uint32_t fMxcsr1 = fMxcsrIn;
7773 X86XMMREG Res1;
7774 pfn(&fMxcsr1, &Res1, &Src, bImm);
7775 TestData.fMxcsrIn = fMxcsrIn;
7776 TestData.fMxcsrOut = fMxcsr1;
7777 TestData.bImm = bImm;
7778 TestData.OutVal = Res1;
7779 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7780
7781 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
7782 {
7783 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
7784 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
7785 uint32_t fMxcsr2 = fMxcsrIn;
7786 X86XMMREG Res2;
7787 pfn(&fMxcsr2, &Res2, &Src, bImm);
7788 TestData.fMxcsrIn = fMxcsrIn;
7789 TestData.fMxcsrOut = fMxcsr2;
7790 TestData.bImm = bImm;
7791 TestData.OutVal = Res2;
7792 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7793 }
7794 if (!RT_IS_POWER_OF_TWO(fXcpt))
7795 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
7796 if (fUnmasked & fXcpt)
7797 {
7798 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
7799 uint32_t fMxcsr3 = fMxcsrIn;
7800 X86XMMREG Res3;
7801 pfn(&fMxcsr3, &Res3, &Src, bImm);
7802 TestData.fMxcsrIn = fMxcsrIn;
7803 TestData.fMxcsrOut = fMxcsr3;
7804 TestData.bImm = bImm;
7805 TestData.OutVal = Res3;
7806 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7807 }
7808 }
7809 }
7810 }
7811 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
7812 }
7813
7814 return RTEXITCODE_SUCCESS;
7815}
7816#endif
7817
7818static void SseCompareF2XmmR32Imm8Test(void)
7819{
7820 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareF2XmmR32Imm8); iFn++)
7821 {
7822 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseCompareF2XmmR32Imm8[iFn]))
7823 continue;
7824
7825 SSE_COMPARE_F2_XMM_IMM8_TEST_T const * const paTests = g_aSseCompareF2XmmR32Imm8[iFn].paTests;
7826 uint32_t const cTests = g_aSseCompareF2XmmR32Imm8[iFn].cTests;
7827 PFNIEMAIMPLMXCSRF2XMMIMM8 pfn = g_aSseCompareF2XmmR32Imm8[iFn].pfn;
7828 uint32_t const cVars = COUNT_VARIATIONS(g_aSseCompareF2XmmR32Imm8[iFn]);
7829 if (!cTests) RTTestSkipped(g_hTest, "no tests");
7830 for (uint32_t iVar = 0; iVar < cVars; iVar++)
7831 {
7832 for (uint32_t iTest = 0; iTest < cTests; iTest++)
7833 {
7834 IEMMEDIAF2XMMSRC Src;
7835 X86XMMREG ValOut;
7836
7837 Src.uSrc1 = paTests[iTest].InVal1;
7838 Src.uSrc2 = paTests[iTest].InVal2;
7839 uint32_t fMxcsr = paTests[iTest].fMxcsrIn;
7840 pfn(&fMxcsr, &ValOut, &Src, paTests[iTest].bImm);
7841 if ( fMxcsr != paTests[iTest].fMxcsrOut
7842 || ValOut.au32[0] != paTests[iTest].OutVal.au32[0]
7843 || ValOut.au32[1] != paTests[iTest].OutVal.au32[1]
7844 || ValOut.au32[2] != paTests[iTest].OutVal.au32[2]
7845 || ValOut.au32[3] != paTests[iTest].OutVal.au32[3])
7846 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s'%s'%s in2=%s'%s'%s'%s imm8=%x\n"
7847 "%s -> mxcsr=%#08x %RX32'%RX32'%RX32'%RX32\n"
7848 "%s expected %#08x %RX32'%RX32'%RX32'%RX32%s%s (%s)\n",
7849 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
7850 FormatR32(&paTests[iTest].InVal1.ar32[0]), FormatR32(&paTests[iTest].InVal1.ar32[1]),
7851 FormatR32(&paTests[iTest].InVal1.ar32[2]), FormatR32(&paTests[iTest].InVal1.ar32[3]),
7852 FormatR32(&paTests[iTest].InVal2.ar32[0]), FormatR32(&paTests[iTest].InVal2.ar32[1]),
7853 FormatR32(&paTests[iTest].InVal2.ar32[2]), FormatR32(&paTests[iTest].InVal2.ar32[3]),
7854 paTests[iTest].bImm,
7855 iVar ? " " : "", fMxcsr, ValOut.au32[0], ValOut.au32[1], ValOut.au32[2], ValOut.au32[3],
7856 iVar ? " " : "", paTests[iTest].fMxcsrOut,
7857 paTests[iTest].OutVal.au32[0], paTests[iTest].OutVal.au32[1],
7858 paTests[iTest].OutVal.au32[2], paTests[iTest].OutVal.au32[3],
7859 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
7860 ( ValOut.au32[0] != paTests[iTest].OutVal.au32[0]
7861 || ValOut.au32[1] != paTests[iTest].OutVal.au32[1]
7862 || ValOut.au32[2] != paTests[iTest].OutVal.au32[2]
7863 || ValOut.au32[3] != paTests[iTest].OutVal.au32[3])
7864 ? " - val" : "",
7865 FormatMxcsr(paTests[iTest].fMxcsrIn));
7866 }
7867 }
7868
7869 FREE_DECOMPRESSED_TESTS(g_aSseCompareF2XmmR32Imm8[iFn]);
7870 }
7871}
7872
7873
7874/*
7875 * Compare SSE operations on packed and single double-precision floating point values - outputting a mask.
7876 */
7877static SSE_COMPARE_F2_XMM_IMM8_T g_aSseCompareF2XmmR64Imm8[] =
7878{
7879 ENTRY_BIN(cmppd_u128),
7880 ENTRY_BIN(cmpsd_u128)
7881};
7882
7883#ifdef TSTIEMAIMPL_WITH_GENERATOR
7884DUMP_ALL_FN(SseCompareF2XmmR64Imm8, g_aSseCompareF2XmmR64Imm8)
7885static RTEXITCODE SseCompareF2XmmR64Imm8Generate(uint32_t cTests, const char * const *papszNameFmts)
7886{
7887 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
7888
7889 static struct { RTFLOAT64U Val1, Val2; } const s_aSpecials[] =
7890 {
7891 { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) },
7892 { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(1) },
7893 { RTFLOAT64U_INIT_ZERO(1), RTFLOAT64U_INIT_ZERO(0) },
7894 { RTFLOAT64U_INIT_ZERO(1), RTFLOAT64U_INIT_ZERO(1) },
7895 { RTFLOAT64U_INIT_INF(0), RTFLOAT64U_INIT_INF(0) },
7896 { RTFLOAT64U_INIT_INF(0), RTFLOAT64U_INIT_INF(1) },
7897 { RTFLOAT64U_INIT_INF(1), RTFLOAT64U_INIT_INF(0) },
7898 { RTFLOAT64U_INIT_INF(1), RTFLOAT64U_INIT_INF(1) },
7899 /** @todo More specials. */
7900 };
7901
7902 uint32_t cMinNormalPairs = (cTests - 144) / 4;
7903 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareF2XmmR64Imm8); iFn++)
7904 {
7905 PFNIEMAIMPLMXCSRF2XMMIMM8 const pfn = g_aSseCompareF2XmmR64Imm8[iFn].pfnNative ? g_aSseCompareF2XmmR64Imm8[iFn].pfnNative : g_aSseCompareF2XmmR64Imm8[iFn].pfn;
7906
7907 IEMBINARYOUTPUT BinOut;
7908 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseCompareF2XmmR64Imm8[iFn]), RTEXITCODE_FAILURE);
7909
7910 uint32_t cNormalInputPairs = 0;
7911 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
7912 {
7913 SSE_COMPARE_F2_XMM_IMM8_TEST_T TestData; RT_ZERO(TestData);
7914
7915 TestData.InVal1.ar64[0] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val1;
7916 TestData.InVal1.ar64[1] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val1;
7917
7918 TestData.InVal2.ar64[0] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val2;
7919 TestData.InVal2.ar64[1] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val2;
7920
7921 if ( RTFLOAT64U_IS_NORMAL(&TestData.InVal1.ar64[0])
7922 && RTFLOAT64U_IS_NORMAL(&TestData.InVal1.ar64[1])
7923 && RTFLOAT64U_IS_NORMAL(&TestData.InVal2.ar64[0])
7924 && RTFLOAT64U_IS_NORMAL(&TestData.InVal2.ar64[1]))
7925 cNormalInputPairs++;
7926 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
7927 {
7928 iTest -= 1;
7929 continue;
7930 }
7931
7932 IEMMEDIAF2XMMSRC Src;
7933 Src.uSrc1 = TestData.InVal1;
7934 Src.uSrc2 = TestData.InVal2;
7935 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
7936 for (uint8_t bImm = 0; bImm <= SSE_COMPARE_F2_XMM_IMM8_MAX; bImm++)
7937 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
7938 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
7939 for (uint8_t iFz = 0; iFz < 2; iFz++)
7940 {
7941 uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
7942 | (iRounding << X86_MXCSR_RC_SHIFT)
7943 | (iDaz ? X86_MXCSR_DAZ : 0)
7944 | (iFz ? X86_MXCSR_FZ : 0)
7945 | X86_MXCSR_XCPT_MASK;
7946 uint32_t fMxcsrM = fMxcsrIn;
7947 X86XMMREG ResM;
7948 pfn(&fMxcsrM, &ResM, &Src, bImm);
7949 TestData.fMxcsrIn = fMxcsrIn;
7950 TestData.fMxcsrOut = fMxcsrM;
7951 TestData.bImm = bImm;
7952 TestData.OutVal = ResM;
7953 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7954
7955 fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
7956 uint32_t fMxcsrU = fMxcsrIn;
7957 X86XMMREG ResU;
7958 pfn(&fMxcsrU, &ResU, &Src, bImm);
7959 TestData.fMxcsrIn = fMxcsrIn;
7960 TestData.fMxcsrOut = fMxcsrU;
7961 TestData.bImm = bImm;
7962 TestData.OutVal = ResU;
7963 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7964
7965 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
7966 if (fXcpt)
7967 {
7968 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
7969 uint32_t fMxcsr1 = fMxcsrIn;
7970 X86XMMREG Res1;
7971 pfn(&fMxcsr1, &Res1, &Src, bImm);
7972 TestData.fMxcsrIn = fMxcsrIn;
7973 TestData.fMxcsrOut = fMxcsr1;
7974 TestData.bImm = bImm;
7975 TestData.OutVal = Res1;
7976 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7977
7978 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
7979 {
7980 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
7981 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
7982 uint32_t fMxcsr2 = fMxcsrIn;
7983 X86XMMREG Res2;
7984 pfn(&fMxcsr2, &Res2, &Src, bImm);
7985 TestData.fMxcsrIn = fMxcsrIn;
7986 TestData.fMxcsrOut = fMxcsr2;
7987 TestData.bImm = bImm;
7988 TestData.OutVal = Res2;
7989 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
7990 }
7991 if (!RT_IS_POWER_OF_TWO(fXcpt))
7992 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
7993 if (fUnmasked & fXcpt)
7994 {
7995 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
7996 uint32_t fMxcsr3 = fMxcsrIn;
7997 X86XMMREG Res3;
7998 pfn(&fMxcsr3, &Res3, &Src, bImm);
7999 TestData.fMxcsrIn = fMxcsrIn;
8000 TestData.fMxcsrOut = fMxcsr3;
8001 TestData.bImm = bImm;
8002 TestData.OutVal = Res3;
8003 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8004 }
8005 }
8006 }
8007 }
8008 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
8009 }
8010
8011 return RTEXITCODE_SUCCESS;
8012}
8013#endif
8014
8015static void SseCompareF2XmmR64Imm8Test(void)
8016{
8017 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareF2XmmR64Imm8); iFn++)
8018 {
8019 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseCompareF2XmmR64Imm8[iFn]))
8020 continue;
8021
8022 SSE_COMPARE_F2_XMM_IMM8_TEST_T const * const paTests = g_aSseCompareF2XmmR64Imm8[iFn].paTests;
8023 uint32_t const cTests = g_aSseCompareF2XmmR64Imm8[iFn].cTests;
8024 PFNIEMAIMPLMXCSRF2XMMIMM8 pfn = g_aSseCompareF2XmmR64Imm8[iFn].pfn;
8025 uint32_t const cVars = COUNT_VARIATIONS(g_aSseCompareF2XmmR64Imm8[iFn]);
8026 if (!cTests) RTTestSkipped(g_hTest, "no tests");
8027 for (uint32_t iVar = 0; iVar < cVars; iVar++)
8028 {
8029 for (uint32_t iTest = 0; iTest < cTests; iTest++)
8030 {
8031 IEMMEDIAF2XMMSRC Src;
8032 X86XMMREG ValOut;
8033
8034 Src.uSrc1 = paTests[iTest].InVal1;
8035 Src.uSrc2 = paTests[iTest].InVal2;
8036 uint32_t fMxcsr = paTests[iTest].fMxcsrIn;
8037 pfn(&fMxcsr, &ValOut, &Src, paTests[iTest].bImm);
8038 if ( fMxcsr != paTests[iTest].fMxcsrOut
8039 || ValOut.au64[0] != paTests[iTest].OutVal.au64[0]
8040 || ValOut.au64[1] != paTests[iTest].OutVal.au64[1])
8041 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s in2=%s'%s imm8=%x\n"
8042 "%s -> mxcsr=%#08x %RX64'%RX64\n"
8043 "%s expected %#08x %RX64'%RX64%s%s (%s)\n",
8044 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
8045 FormatR64(&paTests[iTest].InVal1.ar64[0]), FormatR64(&paTests[iTest].InVal1.ar64[1]),
8046 FormatR64(&paTests[iTest].InVal2.ar64[0]), FormatR64(&paTests[iTest].InVal2.ar64[1]),
8047 paTests[iTest].bImm,
8048 iVar ? " " : "", fMxcsr, ValOut.au64[0], ValOut.au64[1],
8049 iVar ? " " : "", paTests[iTest].fMxcsrOut,
8050 paTests[iTest].OutVal.au64[0], paTests[iTest].OutVal.au64[1],
8051 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
8052 ( ValOut.au64[0] != paTests[iTest].OutVal.au64[0]
8053 || ValOut.au64[1] != paTests[iTest].OutVal.au64[1])
8054 ? " - val" : "",
8055 FormatMxcsr(paTests[iTest].fMxcsrIn));
8056 }
8057 }
8058
8059 FREE_DECOMPRESSED_TESTS(g_aSseCompareF2XmmR64Imm8[iFn]);
8060 }
8061}
8062
8063
8064/*
8065 * Convert SSE operations converting signed double-words to single-precision floating point values.
8066 */
8067TYPEDEF_SUBTEST_TYPE(SSE_CONVERT_XMM_T, SSE_CONVERT_XMM_TEST_T, PFNIEMAIMPLFPSSEF2U128);
8068
8069static SSE_CONVERT_XMM_T g_aSseConvertXmmI32R32[] =
8070{
8071 ENTRY_BIN(cvtdq2ps_u128)
8072};
8073
8074#ifdef TSTIEMAIMPL_WITH_GENERATOR
8075DUMP_ALL_FN(SseConvertXmmI32R32, g_aSseConvertXmmI32R32)
8076static RTEXITCODE SseConvertXmmI32R32Generate(uint32_t cTests, const char * const *papszNameFmts)
8077{
8078 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
8079
8080 static int32_t const s_aSpecials[] =
8081 {
8082 INT32_MIN,
8083 INT32_MIN / 2,
8084 0,
8085 INT32_MAX / 2,
8086 INT32_MAX,
8087 (int32_t)0x80000000
8088 /** @todo More specials. */
8089 };
8090
8091 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmI32R32); iFn++)
8092 {
8093 PFNIEMAIMPLFPSSEF2U128 const pfn = g_aSseConvertXmmI32R32[iFn].pfnNative ? g_aSseConvertXmmI32R32[iFn].pfnNative : g_aSseConvertXmmI32R32[iFn].pfn;
8094
8095 IEMBINARYOUTPUT BinOut;
8096 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseConvertXmmI32R32[iFn]), RTEXITCODE_FAILURE);
8097
8098 X86FXSTATE State;
8099 RT_ZERO(State);
8100 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
8101 {
8102 SSE_CONVERT_XMM_TEST_T TestData; RT_ZERO(TestData);
8103
8104 TestData.InVal.ai32[0] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
8105 TestData.InVal.ai32[1] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
8106 TestData.InVal.ai32[2] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
8107 TestData.InVal.ai32[3] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
8108
8109 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
8110 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
8111 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
8112 for (uint8_t iFz = 0; iFz < 2; iFz++)
8113 {
8114 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
8115 | (iRounding << X86_MXCSR_RC_SHIFT)
8116 | (iDaz ? X86_MXCSR_DAZ : 0)
8117 | (iFz ? X86_MXCSR_FZ : 0)
8118 | X86_MXCSR_XCPT_MASK;
8119 IEMSSERESULT ResM; RT_ZERO(ResM);
8120 pfn(&State, &ResM, &ResM.uResult, &TestData.InVal);
8121 TestData.fMxcsrIn = State.MXCSR;
8122 TestData.fMxcsrOut = ResM.MXCSR;
8123 TestData.OutVal = ResM.uResult;
8124 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8125
8126 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
8127 IEMSSERESULT ResU; RT_ZERO(ResU);
8128 pfn(&State, &ResU, &ResU.uResult, &TestData.InVal);
8129 TestData.fMxcsrIn = State.MXCSR;
8130 TestData.fMxcsrOut = ResU.MXCSR;
8131 TestData.OutVal = ResU.uResult;
8132 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8133
8134 uint16_t fXcpt = (ResM.MXCSR | ResU.MXCSR) & X86_MXCSR_XCPT_FLAGS;
8135 if (fXcpt)
8136 {
8137 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
8138 IEMSSERESULT Res1; RT_ZERO(Res1);
8139 pfn(&State, &Res1, &Res1.uResult, &TestData.InVal);
8140 TestData.fMxcsrIn = State.MXCSR;
8141 TestData.fMxcsrOut = Res1.MXCSR;
8142 TestData.OutVal = Res1.uResult;
8143 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8144
8145 if (((Res1.MXCSR & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (Res1.MXCSR & X86_MXCSR_XCPT_FLAGS))
8146 {
8147 fXcpt |= Res1.MXCSR & X86_MXCSR_XCPT_FLAGS;
8148 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
8149 IEMSSERESULT Res2; RT_ZERO(Res2);
8150 pfn(&State, &Res2, &Res2.uResult, &TestData.InVal);
8151 TestData.fMxcsrIn = State.MXCSR;
8152 TestData.fMxcsrOut = Res2.MXCSR;
8153 TestData.OutVal = Res2.uResult;
8154 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8155 }
8156 if (!RT_IS_POWER_OF_TWO(fXcpt))
8157 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
8158 if (fUnmasked & fXcpt)
8159 {
8160 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
8161 IEMSSERESULT Res3; RT_ZERO(Res3);
8162 pfn(&State, &Res3, &Res3.uResult, &TestData.InVal);
8163 TestData.fMxcsrIn = State.MXCSR;
8164 TestData.fMxcsrOut = Res3.MXCSR;
8165 TestData.OutVal = Res3.uResult;
8166 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8167 }
8168 }
8169 }
8170 }
8171 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
8172 }
8173
8174 return RTEXITCODE_SUCCESS;
8175}
8176#endif
8177
8178static void SseConvertXmmI32R32Test(void)
8179{
8180 X86FXSTATE State;
8181 RT_ZERO(State);
8182
8183 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmI32R32); iFn++)
8184 {
8185 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseConvertXmmI32R32[iFn]))
8186 continue;
8187
8188 SSE_CONVERT_XMM_TEST_T const * const paTests = g_aSseConvertXmmI32R32[iFn].paTests;
8189 uint32_t const cTests = g_aSseConvertXmmI32R32[iFn].cTests;
8190 PFNIEMAIMPLFPSSEF2U128 pfn = g_aSseConvertXmmI32R32[iFn].pfn;
8191 uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertXmmI32R32[iFn]);
8192 if (!cTests) RTTestSkipped(g_hTest, "no tests");
8193 for (uint32_t iVar = 0; iVar < cVars; iVar++)
8194 {
8195 for (uint32_t iTest = 0; iTest < cTests; iTest++)
8196 {
8197 IEMSSERESULT Res; RT_ZERO(Res);
8198
8199 State.MXCSR = paTests[iTest].fMxcsrIn;
8200 pfn(&State, &Res, &Res.uResult, &paTests[iTest].InVal);
8201 if ( Res.MXCSR != paTests[iTest].fMxcsrOut
8202 || !RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[0], &paTests[iTest].OutVal.ar32[0])
8203 || !RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[1], &paTests[iTest].OutVal.ar32[1])
8204 || !RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[2], &paTests[iTest].OutVal.ar32[2])
8205 || !RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[3], &paTests[iTest].OutVal.ar32[3]))
8206 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI32'%RI32'%RI32'%RI32 \n"
8207 "%s -> mxcsr=%#08x %s'%s'%s'%s\n"
8208 "%s expected %#08x %s'%s'%s'%s%s%s (%s)\n",
8209 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
8210 paTests[iTest].InVal.ai32[0], paTests[iTest].InVal.ai32[1],
8211 paTests[iTest].InVal.ai32[2], paTests[iTest].InVal.ai32[3],
8212 iVar ? " " : "", Res.MXCSR,
8213 FormatR32(&Res.uResult.ar32[0]), FormatR32(&Res.uResult.ar32[1]),
8214 FormatR32(&Res.uResult.ar32[2]), FormatR32(&Res.uResult.ar32[3]),
8215 iVar ? " " : "", paTests[iTest].fMxcsrOut,
8216 FormatR32(&paTests[iTest].OutVal.ar32[0]), FormatR32(&paTests[iTest].OutVal.ar32[1]),
8217 FormatR32(&paTests[iTest].OutVal.ar32[2]), FormatR32(&paTests[iTest].OutVal.ar32[3]),
8218 MxcsrDiff(Res.MXCSR, paTests[iTest].fMxcsrOut),
8219 ( !RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[0], &paTests[iTest].OutVal.ar32[0])
8220 || !RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[1], &paTests[iTest].OutVal.ar32[1])
8221 || !RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[2], &paTests[iTest].OutVal.ar32[2])
8222 || !RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[3], &paTests[iTest].OutVal.ar32[3]))
8223 ? " - val" : "",
8224 FormatMxcsr(paTests[iTest].fMxcsrIn));
8225 }
8226 }
8227
8228 FREE_DECOMPRESSED_TESTS(g_aSseConvertXmmI32R32[iFn]);
8229 }
8230}
8231
8232
8233/*
8234 * Convert SSE operations converting signed double-words to single-precision floating point values.
8235 */
8236static SSE_CONVERT_XMM_T g_aSseConvertXmmR32I32[] =
8237{
8238 ENTRY_BIN(cvtps2dq_u128),
8239 ENTRY_BIN(cvttps2dq_u128)
8240};
8241
8242#ifdef TSTIEMAIMPL_WITH_GENERATOR
8243DUMP_ALL_FN(SseConvertXmmR32I32, g_aSseConvertXmmR32I32)
8244static RTEXITCODE SseConvertXmmR32I32Generate(uint32_t cTests, const char * const *papszNameFmts)
8245{
8246 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
8247
8248 static struct { RTFLOAT32U aVal1[4]; } const s_aSpecials[] =
8249 {
8250 { { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0) } },
8251 { { RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(1) } },
8252 { { RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(0) } },
8253 { { RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(1) } }
8254 /** @todo More specials. */
8255 };
8256
8257 X86FXSTATE State;
8258 RT_ZERO(State);
8259 uint32_t cMinNormalPairs = (cTests - 144) / 4;
8260 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR32I32); iFn++)
8261 {
8262 PFNIEMAIMPLFPSSEF2U128 const pfn = g_aSseConvertXmmR32I32[iFn].pfnNative ? g_aSseConvertXmmR32I32[iFn].pfnNative : g_aSseConvertXmmR32I32[iFn].pfn;
8263
8264 IEMBINARYOUTPUT BinOut;
8265 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseConvertXmmR32I32[iFn]), RTEXITCODE_FAILURE);
8266
8267 uint32_t cNormalInputPairs = 0;
8268 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
8269 {
8270 SSE_CONVERT_XMM_TEST_T TestData; RT_ZERO(TestData);
8271
8272 TestData.InVal.ar32[0] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
8273 TestData.InVal.ar32[1] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
8274 TestData.InVal.ar32[2] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[2];
8275 TestData.InVal.ar32[3] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[3];
8276
8277 if ( RTFLOAT32U_IS_NORMAL(&TestData.InVal.ar32[0])
8278 && RTFLOAT32U_IS_NORMAL(&TestData.InVal.ar32[1])
8279 && RTFLOAT32U_IS_NORMAL(&TestData.InVal.ar32[2])
8280 && RTFLOAT32U_IS_NORMAL(&TestData.InVal.ar32[3]))
8281 cNormalInputPairs++;
8282 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
8283 {
8284 iTest -= 1;
8285 continue;
8286 }
8287
8288 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
8289 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
8290 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
8291 for (uint8_t iFz = 0; iFz < 2; iFz++)
8292 {
8293 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
8294 | (iRounding << X86_MXCSR_RC_SHIFT)
8295 | (iDaz ? X86_MXCSR_DAZ : 0)
8296 | (iFz ? X86_MXCSR_FZ : 0)
8297 | X86_MXCSR_XCPT_MASK;
8298 IEMSSERESULT ResM; RT_ZERO(ResM);
8299 pfn(&State, &ResM, &ResM.uResult, &TestData.InVal);
8300 TestData.fMxcsrIn = State.MXCSR;
8301 TestData.fMxcsrOut = ResM.MXCSR;
8302 TestData.OutVal = ResM.uResult;
8303 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8304
8305 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
8306 IEMSSERESULT ResU; RT_ZERO(ResU);
8307 pfn(&State, &ResU, &ResU.uResult, &TestData.InVal);
8308 TestData.fMxcsrIn = State.MXCSR;
8309 TestData.fMxcsrOut = ResU.MXCSR;
8310 TestData.OutVal = ResU.uResult;
8311 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8312
8313 uint16_t fXcpt = (ResM.MXCSR | ResU.MXCSR) & X86_MXCSR_XCPT_FLAGS;
8314 if (fXcpt)
8315 {
8316 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
8317 IEMSSERESULT Res1; RT_ZERO(Res1);
8318 pfn(&State, &Res1, &Res1.uResult, &TestData.InVal);
8319 TestData.fMxcsrIn = State.MXCSR;
8320 TestData.fMxcsrOut = Res1.MXCSR;
8321 TestData.OutVal = Res1.uResult;
8322 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8323
8324 if (((Res1.MXCSR & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (Res1.MXCSR & X86_MXCSR_XCPT_FLAGS))
8325 {
8326 fXcpt |= Res1.MXCSR & X86_MXCSR_XCPT_FLAGS;
8327 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
8328 IEMSSERESULT Res2; RT_ZERO(Res2);
8329 pfn(&State, &Res2, &Res2.uResult, &TestData.InVal);
8330 TestData.fMxcsrIn = State.MXCSR;
8331 TestData.fMxcsrOut = Res2.MXCSR;
8332 TestData.OutVal = Res2.uResult;
8333 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8334 }
8335 if (!RT_IS_POWER_OF_TWO(fXcpt))
8336 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
8337 if (fUnmasked & fXcpt)
8338 {
8339 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
8340 IEMSSERESULT Res3; RT_ZERO(Res3);
8341 pfn(&State, &Res3, &Res3.uResult, &TestData.InVal);
8342 TestData.fMxcsrIn = State.MXCSR;
8343 TestData.fMxcsrOut = Res3.MXCSR;
8344 TestData.OutVal = Res3.uResult;
8345 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8346 }
8347 }
8348 }
8349 }
8350 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
8351 }
8352
8353 return RTEXITCODE_SUCCESS;
8354}
8355#endif
8356
8357static void SseConvertXmmR32I32Test(void)
8358{
8359 X86FXSTATE State;
8360 RT_ZERO(State);
8361
8362 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR32I32); iFn++)
8363 {
8364 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseConvertXmmR32I32[iFn]))
8365 continue;
8366
8367 SSE_CONVERT_XMM_TEST_T const * const paTests = g_aSseConvertXmmR32I32[iFn].paTests;
8368 uint32_t const cTests = g_aSseConvertXmmR32I32[iFn].cTests;
8369 PFNIEMAIMPLFPSSEF2U128 pfn = g_aSseConvertXmmR32I32[iFn].pfn;
8370 uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertXmmR32I32[iFn]);
8371 if (!cTests) RTTestSkipped(g_hTest, "no tests");
8372 for (uint32_t iVar = 0; iVar < cVars; iVar++)
8373 {
8374 for (uint32_t iTest = 0; iTest < cTests; iTest++)
8375 {
8376 IEMSSERESULT Res; RT_ZERO(Res);
8377
8378 State.MXCSR = paTests[iTest].fMxcsrIn;
8379 pfn(&State, &Res, &Res.uResult, &paTests[iTest].InVal);
8380 if ( Res.MXCSR != paTests[iTest].fMxcsrOut
8381 || Res.uResult.ai32[0] != paTests[iTest].OutVal.ai32[0]
8382 || Res.uResult.ai32[1] != paTests[iTest].OutVal.ai32[1]
8383 || Res.uResult.ai32[2] != paTests[iTest].OutVal.ai32[2]
8384 || Res.uResult.ai32[3] != paTests[iTest].OutVal.ai32[3])
8385 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s'%s'%s \n"
8386 "%s -> mxcsr=%#08x %RI32'%RI32'%RI32'%RI32\n"
8387 "%s expected %#08x %RI32'%RI32'%RI32'%RI32%s%s (%s)\n",
8388 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
8389 FormatR32(&paTests[iTest].InVal.ar32[0]), FormatR32(&paTests[iTest].InVal.ar32[1]),
8390 FormatR32(&paTests[iTest].InVal.ar32[2]), FormatR32(&paTests[iTest].InVal.ar32[3]),
8391 iVar ? " " : "", Res.MXCSR,
8392 Res.uResult.ai32[0], Res.uResult.ai32[1],
8393 Res.uResult.ai32[2], Res.uResult.ai32[3],
8394 iVar ? " " : "", paTests[iTest].fMxcsrOut,
8395 paTests[iTest].OutVal.ai32[0], paTests[iTest].OutVal.ai32[1],
8396 paTests[iTest].OutVal.ai32[2], paTests[iTest].OutVal.ai32[3],
8397 MxcsrDiff(Res.MXCSR, paTests[iTest].fMxcsrOut),
8398 ( Res.uResult.ai32[0] != paTests[iTest].OutVal.ai32[0]
8399 || Res.uResult.ai32[1] != paTests[iTest].OutVal.ai32[1]
8400 || Res.uResult.ai32[2] != paTests[iTest].OutVal.ai32[2]
8401 || Res.uResult.ai32[3] != paTests[iTest].OutVal.ai32[3])
8402 ? " - val" : "",
8403 FormatMxcsr(paTests[iTest].fMxcsrIn));
8404 }
8405 }
8406
8407 FREE_DECOMPRESSED_TESTS(g_aSseConvertXmmR32I32[iFn]);
8408 }
8409}
8410
8411
8412/*
8413 * Convert SSE operations converting signed double-words to double-precision floating point values.
8414 */
8415static SSE_CONVERT_XMM_T g_aSseConvertXmmI32R64[] =
8416{
8417 ENTRY_BIN(cvtdq2pd_u128)
8418};
8419
8420#ifdef TSTIEMAIMPL_WITH_GENERATOR
8421DUMP_ALL_FN(SseConvertXmmI32R64, g_aSseConvertXmmI32R64)
8422static RTEXITCODE SseConvertXmmI32R64Generate(uint32_t cTests, const char * const *papszNameFmts)
8423{
8424 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
8425
8426 static int32_t const s_aSpecials[] =
8427 {
8428 INT32_MIN,
8429 INT32_MIN / 2,
8430 0,
8431 INT32_MAX / 2,
8432 INT32_MAX,
8433 (int32_t)0x80000000
8434 /** @todo More specials. */
8435 };
8436
8437 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmI32R64); iFn++)
8438 {
8439 PFNIEMAIMPLFPSSEF2U128 const pfn = g_aSseConvertXmmI32R64[iFn].pfnNative ? g_aSseConvertXmmI32R64[iFn].pfnNative : g_aSseConvertXmmI32R64[iFn].pfn;
8440
8441 IEMBINARYOUTPUT BinOut;
8442 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseConvertXmmI32R64[iFn]), RTEXITCODE_FAILURE);
8443
8444 X86FXSTATE State;
8445 RT_ZERO(State);
8446 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
8447 {
8448 SSE_CONVERT_XMM_TEST_T TestData; RT_ZERO(TestData);
8449
8450 TestData.InVal.ai32[0] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
8451 TestData.InVal.ai32[1] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
8452 TestData.InVal.ai32[2] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
8453 TestData.InVal.ai32[3] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
8454
8455 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
8456 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
8457 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
8458 for (uint8_t iFz = 0; iFz < 2; iFz++)
8459 {
8460 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
8461 | (iRounding << X86_MXCSR_RC_SHIFT)
8462 | (iDaz ? X86_MXCSR_DAZ : 0)
8463 | (iFz ? X86_MXCSR_FZ : 0)
8464 | X86_MXCSR_XCPT_MASK;
8465 IEMSSERESULT ResM; RT_ZERO(ResM);
8466 pfn(&State, &ResM, &ResM.uResult, &TestData.InVal);
8467 TestData.fMxcsrIn = State.MXCSR;
8468 TestData.fMxcsrOut = ResM.MXCSR;
8469 TestData.OutVal = ResM.uResult;
8470 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8471
8472 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
8473 IEMSSERESULT ResU; RT_ZERO(ResU);
8474 pfn(&State, &ResU, &ResU.uResult, &TestData.InVal);
8475 TestData.fMxcsrIn = State.MXCSR;
8476 TestData.fMxcsrOut = ResU.MXCSR;
8477 TestData.OutVal = ResU.uResult;
8478 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8479
8480 uint16_t fXcpt = (ResM.MXCSR | ResU.MXCSR) & X86_MXCSR_XCPT_FLAGS;
8481 if (fXcpt)
8482 {
8483 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
8484 IEMSSERESULT Res1; RT_ZERO(Res1);
8485 pfn(&State, &Res1, &Res1.uResult, &TestData.InVal);
8486 TestData.fMxcsrIn = State.MXCSR;
8487 TestData.fMxcsrOut = Res1.MXCSR;
8488 TestData.OutVal = Res1.uResult;
8489 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8490
8491 if (((Res1.MXCSR & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (Res1.MXCSR & X86_MXCSR_XCPT_FLAGS))
8492 {
8493 fXcpt |= Res1.MXCSR & X86_MXCSR_XCPT_FLAGS;
8494 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
8495 IEMSSERESULT Res2; RT_ZERO(Res2);
8496 pfn(&State, &Res2, &Res2.uResult, &TestData.InVal);
8497 TestData.fMxcsrIn = State.MXCSR;
8498 TestData.fMxcsrOut = Res2.MXCSR;
8499 TestData.OutVal = Res2.uResult;
8500 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8501 }
8502 if (!RT_IS_POWER_OF_TWO(fXcpt))
8503 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
8504 if (fUnmasked & fXcpt)
8505 {
8506 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
8507 IEMSSERESULT Res3; RT_ZERO(Res3);
8508 pfn(&State, &Res3, &Res3.uResult, &TestData.InVal);
8509 TestData.fMxcsrIn = State.MXCSR;
8510 TestData.fMxcsrOut = Res3.MXCSR;
8511 TestData.OutVal = Res3.uResult;
8512 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8513 }
8514 }
8515 }
8516 }
8517 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
8518 }
8519
8520 return RTEXITCODE_SUCCESS;
8521}
8522#endif
8523
8524static void SseConvertXmmI32R64Test(void)
8525{
8526 X86FXSTATE State;
8527 RT_ZERO(State);
8528
8529 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmI32R64); iFn++)
8530 {
8531 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseConvertXmmI32R64[iFn]))
8532 continue;
8533
8534 SSE_CONVERT_XMM_TEST_T const * const paTests = g_aSseConvertXmmI32R64[iFn].paTests;
8535 uint32_t const cTests = g_aSseConvertXmmI32R64[iFn].cTests;
8536 PFNIEMAIMPLFPSSEF2U128 pfn = g_aSseConvertXmmI32R64[iFn].pfn;
8537 uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertXmmI32R64[iFn]);
8538 if (!cTests) RTTestSkipped(g_hTest, "no tests");
8539 for (uint32_t iVar = 0; iVar < cVars; iVar++)
8540 {
8541 for (uint32_t iTest = 0; iTest < cTests; iTest++)
8542 {
8543 IEMSSERESULT Res; RT_ZERO(Res);
8544
8545 State.MXCSR = paTests[iTest].fMxcsrIn;
8546 pfn(&State, &Res, &Res.uResult, &paTests[iTest].InVal);
8547 if ( Res.MXCSR != paTests[iTest].fMxcsrOut
8548 || !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[0], &paTests[iTest].OutVal.ar64[0])
8549 || !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[1], &paTests[iTest].OutVal.ar64[1]))
8550 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI32'%RI32'%RI32'%RI32 \n"
8551 "%s -> mxcsr=%#08x %s'%s\n"
8552 "%s expected %#08x %s'%s%s%s (%s)\n",
8553 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
8554 paTests[iTest].InVal.ai32[0], paTests[iTest].InVal.ai32[1],
8555 paTests[iTest].InVal.ai32[2], paTests[iTest].InVal.ai32[3],
8556 iVar ? " " : "", Res.MXCSR,
8557 FormatR64(&Res.uResult.ar64[0]), FormatR64(&Res.uResult.ar64[1]),
8558 iVar ? " " : "", paTests[iTest].fMxcsrOut,
8559 FormatR64(&paTests[iTest].OutVal.ar64[0]), FormatR64(&paTests[iTest].OutVal.ar64[1]),
8560 MxcsrDiff(Res.MXCSR, paTests[iTest].fMxcsrOut),
8561 ( !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[0], &paTests[iTest].OutVal.ar64[0])
8562 || !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[1], &paTests[iTest].OutVal.ar64[1]))
8563 ? " - val" : "",
8564 FormatMxcsr(paTests[iTest].fMxcsrIn));
8565 }
8566 }
8567
8568 FREE_DECOMPRESSED_TESTS(g_aSseConvertXmmI32R64[iFn]);
8569 }
8570}
8571
8572
8573/*
8574 * Convert SSE operations converting signed double-words to double-precision floating point values.
8575 */
8576static SSE_CONVERT_XMM_T g_aSseConvertXmmR64I32[] =
8577{
8578 ENTRY_BIN(cvtpd2dq_u128),
8579 ENTRY_BIN(cvttpd2dq_u128)
8580};
8581
8582#ifdef TSTIEMAIMPL_WITH_GENERATOR
8583DUMP_ALL_FN(SseConvertXmmR64I32, g_aSseConvertXmmR64I32)
8584static RTEXITCODE SseConvertXmmR64I32Generate(uint32_t cTests, const char * const *papszNameFmts)
8585{
8586 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
8587
8588 static struct { RTFLOAT64U aVal1[2]; } const s_aSpecials[] =
8589 {
8590 { { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) } },
8591 { { RTFLOAT64U_INIT_ZERO(1), RTFLOAT64U_INIT_ZERO(1) } },
8592 { { RTFLOAT64U_INIT_INF(0), RTFLOAT64U_INIT_INF(0) } },
8593 { { RTFLOAT64U_INIT_INF(1), RTFLOAT64U_INIT_INF(1) } }
8594 /** @todo More specials. */
8595 };
8596
8597 X86FXSTATE State;
8598 RT_ZERO(State);
8599 uint32_t cMinNormalPairs = (cTests - 144) / 4;
8600 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR64I32); iFn++)
8601 {
8602 PFNIEMAIMPLFPSSEF2U128 const pfn = g_aSseConvertXmmR64I32[iFn].pfnNative ? g_aSseConvertXmmR64I32[iFn].pfnNative : g_aSseConvertXmmR64I32[iFn].pfn;
8603
8604 IEMBINARYOUTPUT BinOut;
8605 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseConvertXmmR64I32[iFn]), RTEXITCODE_FAILURE);
8606
8607 uint32_t cNormalInputPairs = 0;
8608 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
8609 {
8610 SSE_CONVERT_XMM_TEST_T TestData; RT_ZERO(TestData);
8611
8612 TestData.InVal.ar64[0] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
8613 TestData.InVal.ar64[1] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
8614
8615 if ( RTFLOAT64U_IS_NORMAL(&TestData.InVal.ar64[0])
8616 && RTFLOAT64U_IS_NORMAL(&TestData.InVal.ar64[1]))
8617 cNormalInputPairs++;
8618 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
8619 {
8620 iTest -= 1;
8621 continue;
8622 }
8623
8624 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
8625 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
8626 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
8627 for (uint8_t iFz = 0; iFz < 2; iFz++)
8628 {
8629 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
8630 | (iRounding << X86_MXCSR_RC_SHIFT)
8631 | (iDaz ? X86_MXCSR_DAZ : 0)
8632 | (iFz ? X86_MXCSR_FZ : 0)
8633 | X86_MXCSR_XCPT_MASK;
8634 IEMSSERESULT ResM; RT_ZERO(ResM);
8635 pfn(&State, &ResM, &ResM.uResult, &TestData.InVal);
8636 TestData.fMxcsrIn = State.MXCSR;
8637 TestData.fMxcsrOut = ResM.MXCSR;
8638 TestData.OutVal = ResM.uResult;
8639 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8640
8641 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
8642 IEMSSERESULT ResU; RT_ZERO(ResU);
8643 pfn(&State, &ResU, &ResU.uResult, &TestData.InVal);
8644 TestData.fMxcsrIn = State.MXCSR;
8645 TestData.fMxcsrOut = ResU.MXCSR;
8646 TestData.OutVal = ResU.uResult;
8647 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8648
8649 uint16_t fXcpt = (ResM.MXCSR | ResU.MXCSR) & X86_MXCSR_XCPT_FLAGS;
8650 if (fXcpt)
8651 {
8652 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
8653 IEMSSERESULT Res1; RT_ZERO(Res1);
8654 pfn(&State, &Res1, &Res1.uResult, &TestData.InVal);
8655 TestData.fMxcsrIn = State.MXCSR;
8656 TestData.fMxcsrOut = Res1.MXCSR;
8657 TestData.OutVal = Res1.uResult;
8658 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8659
8660 if (((Res1.MXCSR & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (Res1.MXCSR & X86_MXCSR_XCPT_FLAGS))
8661 {
8662 fXcpt |= Res1.MXCSR & X86_MXCSR_XCPT_FLAGS;
8663 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
8664 IEMSSERESULT Res2; RT_ZERO(Res2);
8665 pfn(&State, &Res2, &Res2.uResult, &TestData.InVal);
8666 TestData.fMxcsrIn = State.MXCSR;
8667 TestData.fMxcsrOut = Res2.MXCSR;
8668 TestData.OutVal = Res2.uResult;
8669 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8670 }
8671 if (!RT_IS_POWER_OF_TWO(fXcpt))
8672 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
8673 if (fUnmasked & fXcpt)
8674 {
8675 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
8676 IEMSSERESULT Res3; RT_ZERO(Res3);
8677 pfn(&State, &Res3, &Res3.uResult, &TestData.InVal);
8678 TestData.fMxcsrIn = State.MXCSR;
8679 TestData.fMxcsrOut = Res3.MXCSR;
8680 TestData.OutVal = Res3.uResult;
8681 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8682 }
8683 }
8684 }
8685 }
8686 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
8687 }
8688
8689 return RTEXITCODE_SUCCESS;
8690}
8691#endif
8692
8693static void SseConvertXmmR64I32Test(void)
8694{
8695 X86FXSTATE State;
8696 RT_ZERO(State);
8697
8698 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR64I32); iFn++)
8699 {
8700 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseConvertXmmR64I32[iFn]))
8701 continue;
8702
8703 SSE_CONVERT_XMM_TEST_T const * const paTests = g_aSseConvertXmmR64I32[iFn].paTests;
8704 uint32_t const cTests = g_aSseConvertXmmR64I32[iFn].cTests;
8705 PFNIEMAIMPLFPSSEF2U128 pfn = g_aSseConvertXmmR64I32[iFn].pfn;
8706 uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertXmmR64I32[iFn]);
8707 if (!cTests) RTTestSkipped(g_hTest, "no tests");
8708 for (uint32_t iVar = 0; iVar < cVars; iVar++)
8709 {
8710 for (uint32_t iTest = 0; iTest < cTests; iTest++)
8711 {
8712 IEMSSERESULT Res; RT_ZERO(Res);
8713
8714 State.MXCSR = paTests[iTest].fMxcsrIn;
8715 pfn(&State, &Res, &Res.uResult, &paTests[iTest].InVal);
8716 if ( Res.MXCSR != paTests[iTest].fMxcsrOut
8717 || Res.uResult.ai32[0] != paTests[iTest].OutVal.ai32[0]
8718 || Res.uResult.ai32[1] != paTests[iTest].OutVal.ai32[1]
8719 || Res.uResult.ai32[2] != paTests[iTest].OutVal.ai32[2]
8720 || Res.uResult.ai32[3] != paTests[iTest].OutVal.ai32[3])
8721 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s \n"
8722 "%s -> mxcsr=%#08x %RI32'%RI32'%RI32'%RI32\n"
8723 "%s expected %#08x %RI32'%RI32'%RI32'%RI32%s%s (%s)\n",
8724 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
8725 FormatR64(&paTests[iTest].InVal.ar64[0]), FormatR64(&paTests[iTest].InVal.ar64[1]),
8726 iVar ? " " : "", Res.MXCSR,
8727 Res.uResult.ai32[0], Res.uResult.ai32[1],
8728 Res.uResult.ai32[2], Res.uResult.ai32[3],
8729 iVar ? " " : "", paTests[iTest].fMxcsrOut,
8730 paTests[iTest].OutVal.ai32[0], paTests[iTest].OutVal.ai32[1],
8731 paTests[iTest].OutVal.ai32[2], paTests[iTest].OutVal.ai32[3],
8732 MxcsrDiff(Res.MXCSR, paTests[iTest].fMxcsrOut),
8733 ( Res.uResult.ai32[0] != paTests[iTest].OutVal.ai32[0]
8734 || Res.uResult.ai32[1] != paTests[iTest].OutVal.ai32[1]
8735 || Res.uResult.ai32[2] != paTests[iTest].OutVal.ai32[2]
8736 || Res.uResult.ai32[3] != paTests[iTest].OutVal.ai32[3])
8737 ? " - val" : "",
8738 FormatMxcsr(paTests[iTest].fMxcsrIn));
8739 }
8740 }
8741
8742 FREE_DECOMPRESSED_TESTS(g_aSseConvertXmmR64I32[iFn]);
8743 }
8744}
8745
8746
8747/*
8748 * Convert SSE operations converting double-precision floating point values to signed double-word values.
8749 */
8750TYPEDEF_SUBTEST_TYPE(SSE_CONVERT_MM_XMM_T, SSE_CONVERT_MM_XMM_TEST_T, PFNIEMAIMPLMXCSRU64U128);
8751
8752static SSE_CONVERT_MM_XMM_T g_aSseConvertMmXmm[] =
8753{
8754 ENTRY_BIN(cvtpd2pi_u128),
8755 ENTRY_BIN(cvttpd2pi_u128)
8756};
8757
8758#ifdef TSTIEMAIMPL_WITH_GENERATOR
8759DUMP_ALL_FN(SseConvertMmXmm, g_aSseConvertMmXmm)
8760static RTEXITCODE SseConvertMmXmmGenerate(uint32_t cTests, const char * const *papszNameFmts)
8761{
8762 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
8763
8764 static struct { RTFLOAT64U aVal1[2]; } const s_aSpecials[] =
8765 {
8766 { { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) } },
8767 { { RTFLOAT64U_INIT_ZERO(1), RTFLOAT64U_INIT_ZERO(1) } },
8768 { { RTFLOAT64U_INIT_INF(0), RTFLOAT64U_INIT_INF(0) } },
8769 { { RTFLOAT64U_INIT_INF(1), RTFLOAT64U_INIT_INF(1) } }
8770 /** @todo More specials. */
8771 };
8772
8773 uint32_t cMinNormalPairs = (cTests - 144) / 4;
8774 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertMmXmm); iFn++)
8775 {
8776 PFNIEMAIMPLMXCSRU64U128 const pfn = g_aSseConvertMmXmm[iFn].pfnNative ? g_aSseConvertMmXmm[iFn].pfnNative : g_aSseConvertMmXmm[iFn].pfn;
8777
8778 IEMBINARYOUTPUT BinOut;
8779 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseConvertMmXmm[iFn]), RTEXITCODE_FAILURE);
8780
8781 uint32_t cNormalInputPairs = 0;
8782 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
8783 {
8784 SSE_CONVERT_MM_XMM_TEST_T TestData; RT_ZERO(TestData);
8785
8786 TestData.InVal.ar64[0] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
8787 TestData.InVal.ar64[1] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
8788
8789 if ( RTFLOAT64U_IS_NORMAL(&TestData.InVal.ar64[0])
8790 && RTFLOAT64U_IS_NORMAL(&TestData.InVal.ar64[1]))
8791 cNormalInputPairs++;
8792 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
8793 {
8794 iTest -= 1;
8795 continue;
8796 }
8797
8798 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
8799 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
8800 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
8801 for (uint8_t iFz = 0; iFz < 2; iFz++)
8802 {
8803 uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
8804 | (iRounding << X86_MXCSR_RC_SHIFT)
8805 | (iDaz ? X86_MXCSR_DAZ : 0)
8806 | (iFz ? X86_MXCSR_FZ : 0)
8807 | X86_MXCSR_XCPT_MASK;
8808 uint32_t fMxcsrM = fMxcsrIn;
8809 uint64_t u64ResM;
8810 pfn(&fMxcsrM, &u64ResM, &TestData.InVal);
8811 TestData.fMxcsrIn = fMxcsrIn;
8812 TestData.fMxcsrOut = fMxcsrM;
8813 TestData.OutVal.u = u64ResM;
8814 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8815
8816 fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
8817 uint32_t fMxcsrU = fMxcsrIn;
8818 uint64_t u64ResU;
8819 pfn(&fMxcsrU, &u64ResU, &TestData.InVal);
8820 TestData.fMxcsrIn = fMxcsrIn;
8821 TestData.fMxcsrOut = fMxcsrU;
8822 TestData.OutVal.u = u64ResU;
8823 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8824
8825 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
8826 if (fXcpt)
8827 {
8828 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
8829 uint32_t fMxcsr1 = fMxcsrIn;
8830 uint64_t u64Res1;
8831 pfn(&fMxcsr1, &u64Res1, &TestData.InVal);
8832 TestData.fMxcsrIn = fMxcsrIn;
8833 TestData.fMxcsrOut = fMxcsr1;
8834 TestData.OutVal.u = u64Res1;
8835 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8836
8837 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
8838 {
8839 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
8840 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
8841 uint32_t fMxcsr2 = fMxcsrIn;
8842 uint64_t u64Res2;
8843 pfn(&fMxcsr2, &u64Res2, &TestData.InVal);
8844 TestData.fMxcsrIn = fMxcsrIn;
8845 TestData.fMxcsrOut = fMxcsr2;
8846 TestData.OutVal.u = u64Res2;
8847 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8848 }
8849 if (!RT_IS_POWER_OF_TWO(fXcpt))
8850 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
8851 if (fUnmasked & fXcpt)
8852 {
8853 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
8854 uint32_t fMxcsr3 = fMxcsrIn;
8855 uint64_t u64Res3;
8856 pfn(&fMxcsr3, &u64Res3, &TestData.InVal);
8857 TestData.fMxcsrIn = fMxcsrIn;
8858 TestData.fMxcsrOut = fMxcsr3;
8859 TestData.OutVal.u = u64Res3;
8860 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8861 }
8862 }
8863 }
8864 }
8865 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
8866 }
8867
8868 return RTEXITCODE_SUCCESS;
8869}
8870#endif
8871
8872static void SseConvertMmXmmTest(void)
8873{
8874 X86FXSTATE State;
8875 RT_ZERO(State);
8876
8877 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertMmXmm); iFn++)
8878 {
8879 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseConvertMmXmm[iFn]))
8880 continue;
8881
8882 SSE_CONVERT_MM_XMM_TEST_T const * const paTests = g_aSseConvertMmXmm[iFn].paTests;
8883 uint32_t const cTests = g_aSseConvertMmXmm[iFn].cTests;
8884 PFNIEMAIMPLMXCSRU64U128 pfn = g_aSseConvertMmXmm[iFn].pfn;
8885 uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertMmXmm[iFn]);
8886 if (!cTests) RTTestSkipped(g_hTest, "no tests");
8887 for (uint32_t iVar = 0; iVar < cVars; iVar++)
8888 {
8889 for (uint32_t iTest = 0; iTest < cTests; iTest++)
8890 {
8891 RTUINT64U ValOut;
8892 uint32_t fMxcsr = paTests[iTest].fMxcsrIn;
8893 pfn(&fMxcsr, &ValOut.u, &paTests[iTest].InVal);
8894 if ( fMxcsr != paTests[iTest].fMxcsrOut
8895 || ValOut.ai32[0] != paTests[iTest].OutVal.ai32[0]
8896 || ValOut.ai32[1] != paTests[iTest].OutVal.ai32[1])
8897 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s\n"
8898 "%s -> mxcsr=%#08x %RI32'%RI32\n"
8899 "%s expected %#08x %RI32'%RI32%s%s (%s)\n",
8900 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
8901 FormatR64(&paTests[iTest].InVal.ar64[0]), FormatR64(&paTests[iTest].InVal.ar64[1]),
8902 iVar ? " " : "", fMxcsr, ValOut.ai32[0], ValOut.ai32[1],
8903 iVar ? " " : "", paTests[iTest].fMxcsrOut,
8904 paTests[iTest].OutVal.ai32[0], paTests[iTest].OutVal.ai32[1],
8905 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
8906 ( ValOut.ai32[0] != paTests[iTest].OutVal.ai32[0]
8907 || ValOut.ai32[1] != paTests[iTest].OutVal.ai32[1])
8908 ? " - val" : "",
8909 FormatMxcsr(paTests[iTest].fMxcsrIn));
8910 }
8911 }
8912
8913 FREE_DECOMPRESSED_TESTS(g_aSseConvertMmXmm[iFn]);
8914 }
8915}
8916
8917
8918/*
8919 * Convert SSE operations converting signed double-word values to double precision floating-point values (probably only cvtpi2pd).
8920 */
8921TYPEDEF_SUBTEST_TYPE(SSE_CONVERT_XMM_R64_MM_T, SSE_CONVERT_XMM_MM_TEST_T, PFNIEMAIMPLMXCSRU128U64);
8922
8923static SSE_CONVERT_XMM_R64_MM_T g_aSseConvertXmmR64Mm[] =
8924{
8925 ENTRY_BIN(cvtpi2pd_u128)
8926};
8927
8928#ifdef TSTIEMAIMPL_WITH_GENERATOR
8929DUMP_ALL_FN(SseConvertXmmR64Mm, g_aSseConvertXmmR64Mm)
8930static RTEXITCODE SseConvertXmmR64MmGenerate(uint32_t cTests, const char * const *papszNameFmts)
8931{
8932 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
8933
8934 static struct { int32_t aVal[2]; } const s_aSpecials[] =
8935 {
8936 { { INT32_MIN, INT32_MIN } },
8937 { { INT32_MAX, INT32_MAX } }
8938 /** @todo More specials. */
8939 };
8940
8941 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR64Mm); iFn++)
8942 {
8943 PFNIEMAIMPLMXCSRU128U64 const pfn = g_aSseConvertXmmR64Mm[iFn].pfnNative ? g_aSseConvertXmmR64Mm[iFn].pfnNative : g_aSseConvertXmmR64Mm[iFn].pfn;
8944
8945 IEMBINARYOUTPUT BinOut;
8946 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseConvertXmmR64Mm[iFn]), RTEXITCODE_FAILURE);
8947
8948 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
8949 {
8950 SSE_CONVERT_XMM_MM_TEST_T TestData; RT_ZERO(TestData);
8951
8952 TestData.InVal.ai32[0] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests].aVal[0];
8953 TestData.InVal.ai32[1] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests].aVal[1];
8954
8955 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
8956 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
8957 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
8958 for (uint8_t iFz = 0; iFz < 2; iFz++)
8959 {
8960 uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
8961 | (iRounding << X86_MXCSR_RC_SHIFT)
8962 | (iDaz ? X86_MXCSR_DAZ : 0)
8963 | (iFz ? X86_MXCSR_FZ : 0)
8964 | X86_MXCSR_XCPT_MASK;
8965 uint32_t fMxcsrM = fMxcsrIn;
8966 pfn(&fMxcsrM, &TestData.OutVal, TestData.InVal.u);
8967 TestData.fMxcsrIn = fMxcsrIn;
8968 TestData.fMxcsrOut = fMxcsrM;
8969 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8970
8971 fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
8972 uint32_t fMxcsrU = fMxcsrIn;
8973 pfn(&fMxcsrU, &TestData.OutVal, TestData.InVal.u);
8974 TestData.fMxcsrIn = fMxcsrIn;
8975 TestData.fMxcsrOut = fMxcsrU;
8976 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8977
8978 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
8979 if (fXcpt)
8980 {
8981 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
8982 uint32_t fMxcsr1 = fMxcsrIn;
8983 pfn(&fMxcsr1, &TestData.OutVal, TestData.InVal.u);
8984 TestData.fMxcsrIn = fMxcsrIn;
8985 TestData.fMxcsrOut = fMxcsr1;
8986 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8987
8988 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
8989 {
8990 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
8991 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
8992 uint32_t fMxcsr2 = fMxcsrIn;
8993 pfn(&fMxcsr2, &TestData.OutVal, TestData.InVal.u);
8994 TestData.fMxcsrIn = fMxcsrIn;
8995 TestData.fMxcsrOut = fMxcsr2;
8996 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
8997 }
8998 if (!RT_IS_POWER_OF_TWO(fXcpt))
8999 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
9000 if (fUnmasked & fXcpt)
9001 {
9002 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
9003 uint32_t fMxcsr3 = fMxcsrIn;
9004 pfn(&fMxcsr3, &TestData.OutVal, TestData.InVal.u);
9005 TestData.fMxcsrIn = fMxcsrIn;
9006 TestData.fMxcsrOut = fMxcsr3;
9007 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9008 }
9009 }
9010 }
9011 }
9012 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
9013 }
9014
9015 return RTEXITCODE_SUCCESS;
9016}
9017#endif
9018
9019static void SseConvertXmmR64MmTest(void)
9020{
9021 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR64Mm); iFn++)
9022 {
9023 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseConvertXmmR64Mm[iFn]))
9024 continue;
9025
9026 SSE_CONVERT_XMM_MM_TEST_T const * const paTests = g_aSseConvertXmmR64Mm[iFn].paTests;
9027 uint32_t const cTests = g_aSseConvertXmmR64Mm[iFn].cTests;
9028 PFNIEMAIMPLMXCSRU128U64 pfn = g_aSseConvertXmmR64Mm[iFn].pfn;
9029 uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertXmmR64Mm[iFn]);
9030 if (!cTests) RTTestSkipped(g_hTest, "no tests");
9031 for (uint32_t iVar = 0; iVar < cVars; iVar++)
9032 {
9033 for (uint32_t iTest = 0; iTest < cTests; iTest++)
9034 {
9035 X86XMMREG ValOut;
9036 uint32_t fMxcsr = paTests[iTest].fMxcsrIn;
9037 pfn(&fMxcsr, &ValOut, paTests[iTest].InVal.u);
9038 if ( fMxcsr != paTests[iTest].fMxcsrOut
9039 || !RTFLOAT64U_ARE_IDENTICAL(&ValOut.ar64[0], &paTests[iTest].OutVal.ar64[0])
9040 || !RTFLOAT64U_ARE_IDENTICAL(&ValOut.ar64[1], &paTests[iTest].OutVal.ar64[1]))
9041 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI32'%RI32\n"
9042 "%s -> mxcsr=%#08x %s'%s\n"
9043 "%s expected %#08x %s'%s%s%s (%s)\n",
9044 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
9045 paTests[iTest].InVal.ai32[0], paTests[iTest].InVal.ai32[1],
9046 iVar ? " " : "", fMxcsr,
9047 FormatR64(&ValOut.ar64[0]), FormatR64(&ValOut.ar64[1]),
9048 iVar ? " " : "", paTests[iTest].fMxcsrOut,
9049 FormatR64(&paTests[iTest].OutVal.ar64[0]), FormatR64(&paTests[iTest].OutVal.ar64[1]),
9050 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
9051 ( !RTFLOAT64U_ARE_IDENTICAL(&ValOut.ar64[0], &paTests[iTest].OutVal.ar64[0])
9052 || !RTFLOAT64U_ARE_IDENTICAL(&ValOut.ar64[1], &paTests[iTest].OutVal.ar64[1]))
9053 ? " - val" : "",
9054 FormatMxcsr(paTests[iTest].fMxcsrIn));
9055 }
9056 }
9057
9058 FREE_DECOMPRESSED_TESTS(g_aSseConvertXmmR64Mm[iFn]);
9059 }
9060}
9061
9062
9063/*
9064 * Convert SSE operations converting signed double-word values to double precision floating-point values (probably only cvtpi2pd).
9065 */
9066TYPEDEF_SUBTEST_TYPE(SSE_CONVERT_XMM_R32_MM_T, SSE_CONVERT_XMM_MM_TEST_T, PFNIEMAIMPLMXCSRU128U64);
9067
9068static SSE_CONVERT_XMM_R32_MM_T g_aSseConvertXmmR32Mm[] =
9069{
9070 ENTRY_BIN(cvtpi2ps_u128)
9071};
9072
9073#ifdef TSTIEMAIMPL_WITH_GENERATOR
9074DUMP_ALL_FN(SseConvertXmmR32Mm, g_aSseConvertXmmR32Mm)
9075static RTEXITCODE SseConvertXmmR32MmGenerate(uint32_t cTests, const char * const *papszNameFmts)
9076{
9077 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
9078
9079 static struct { int32_t aVal[2]; } const s_aSpecials[] =
9080 {
9081 { { INT32_MIN, INT32_MIN } },
9082 { { INT32_MAX, INT32_MAX } }
9083 /** @todo More specials. */
9084 };
9085
9086 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR32Mm); iFn++)
9087 {
9088 PFNIEMAIMPLMXCSRU128U64 const pfn = g_aSseConvertXmmR32Mm[iFn].pfnNative ? g_aSseConvertXmmR32Mm[iFn].pfnNative : g_aSseConvertXmmR32Mm[iFn].pfn;
9089
9090 IEMBINARYOUTPUT BinOut;
9091 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseConvertXmmR32Mm[iFn]), RTEXITCODE_FAILURE);
9092
9093 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
9094 {
9095 SSE_CONVERT_XMM_MM_TEST_T TestData; RT_ZERO(TestData);
9096
9097 TestData.InVal.ai32[0] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests].aVal[0];
9098 TestData.InVal.ai32[1] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests].aVal[1];
9099
9100 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
9101 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
9102 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
9103 for (uint8_t iFz = 0; iFz < 2; iFz++)
9104 {
9105 uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
9106 | (iRounding << X86_MXCSR_RC_SHIFT)
9107 | (iDaz ? X86_MXCSR_DAZ : 0)
9108 | (iFz ? X86_MXCSR_FZ : 0)
9109 | X86_MXCSR_XCPT_MASK;
9110 uint32_t fMxcsrM = fMxcsrIn;
9111 pfn(&fMxcsrM, &TestData.OutVal, TestData.InVal.u);
9112 TestData.fMxcsrIn = fMxcsrIn;
9113 TestData.fMxcsrOut = fMxcsrM;
9114 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9115
9116 fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
9117 uint32_t fMxcsrU = fMxcsrIn;
9118 pfn(&fMxcsrU, &TestData.OutVal, TestData.InVal.u);
9119 TestData.fMxcsrIn = fMxcsrIn;
9120 TestData.fMxcsrOut = fMxcsrU;
9121 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9122
9123 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
9124 if (fXcpt)
9125 {
9126 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
9127 uint32_t fMxcsr1 = fMxcsrIn;
9128 pfn(&fMxcsr1, &TestData.OutVal, TestData.InVal.u);
9129 TestData.fMxcsrIn = fMxcsrIn;
9130 TestData.fMxcsrOut = fMxcsr1;
9131 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9132
9133 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
9134 {
9135 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
9136 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
9137 uint32_t fMxcsr2 = fMxcsrIn;
9138 pfn(&fMxcsr2, &TestData.OutVal, TestData.InVal.u);
9139 TestData.fMxcsrIn = fMxcsrIn;
9140 TestData.fMxcsrOut = fMxcsr2;
9141 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9142 }
9143 if (!RT_IS_POWER_OF_TWO(fXcpt))
9144 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
9145 if (fUnmasked & fXcpt)
9146 {
9147 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
9148 uint32_t fMxcsr3 = fMxcsrIn;
9149 pfn(&fMxcsr3, &TestData.OutVal, TestData.InVal.u);
9150 TestData.fMxcsrIn = fMxcsrIn;
9151 TestData.fMxcsrOut = fMxcsr3;
9152 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9153 }
9154 }
9155 }
9156 }
9157 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
9158 }
9159
9160 return RTEXITCODE_SUCCESS;
9161}
9162#endif
9163
9164static void SseConvertXmmR32MmTest(void)
9165{
9166 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR32Mm); iFn++)
9167 {
9168 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseConvertXmmR32Mm[iFn]))
9169 continue;
9170
9171 SSE_CONVERT_XMM_MM_TEST_T const * const paTests = g_aSseConvertXmmR32Mm[iFn].paTests;
9172 uint32_t const cTests = g_aSseConvertXmmR32Mm[iFn].cTests;
9173 PFNIEMAIMPLMXCSRU128U64 pfn = g_aSseConvertXmmR32Mm[iFn].pfn;
9174 uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertXmmR32Mm[iFn]);
9175 if (!cTests) RTTestSkipped(g_hTest, "no tests");
9176 for (uint32_t iVar = 0; iVar < cVars; iVar++)
9177 {
9178 for (uint32_t iTest = 0; iTest < cTests; iTest++)
9179 {
9180 X86XMMREG ValOut;
9181 uint32_t fMxcsr = paTests[iTest].fMxcsrIn;
9182 pfn(&fMxcsr, &ValOut, paTests[iTest].InVal.u);
9183 if ( fMxcsr != paTests[iTest].fMxcsrOut
9184 || !RTFLOAT32U_ARE_IDENTICAL(&ValOut.ar32[0], &paTests[iTest].OutVal.ar32[0])
9185 || !RTFLOAT32U_ARE_IDENTICAL(&ValOut.ar32[1], &paTests[iTest].OutVal.ar32[1]))
9186 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI32'%RI32\n"
9187 "%s -> mxcsr=%#08x %s'%s\n"
9188 "%s expected %#08x %s'%s%s%s (%s)\n",
9189 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
9190 paTests[iTest].InVal.ai32[0], paTests[iTest].InVal.ai32[1],
9191 iVar ? " " : "", fMxcsr,
9192 FormatR32(&ValOut.ar32[0]), FormatR32(&ValOut.ar32[1]),
9193 iVar ? " " : "", paTests[iTest].fMxcsrOut,
9194 FormatR32(&paTests[iTest].OutVal.ar32[0]), FormatR32(&paTests[iTest].OutVal.ar32[1]),
9195 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
9196 ( !RTFLOAT32U_ARE_IDENTICAL(&ValOut.ar32[0], &paTests[iTest].OutVal.ar32[0])
9197 || !RTFLOAT32U_ARE_IDENTICAL(&ValOut.ar32[1], &paTests[iTest].OutVal.ar32[1]))
9198 ? " - val" : "",
9199 FormatMxcsr(paTests[iTest].fMxcsrIn));
9200 }
9201 }
9202
9203 FREE_DECOMPRESSED_TESTS(g_aSseConvertXmmR32Mm[iFn]);
9204 }
9205}
9206
9207
9208/*
9209 * Convert SSE operations converting single-precision floating point values to signed double-word values.
9210 */
9211TYPEDEF_SUBTEST_TYPE(SSE_CONVERT_MM_I32_XMM_R32_T, SSE_CONVERT_MM_R32_TEST_T, PFNIEMAIMPLMXCSRU64U64);
9212
9213static SSE_CONVERT_MM_I32_XMM_R32_T g_aSseConvertMmI32XmmR32[] =
9214{
9215 ENTRY_BIN(cvtps2pi_u128),
9216 ENTRY_BIN(cvttps2pi_u128)
9217};
9218
9219#ifdef TSTIEMAIMPL_WITH_GENERATOR
9220DUMP_ALL_FN(SseConvertMmI32XmmR32, g_aSseConvertMmI32XmmR32)
9221static RTEXITCODE SseConvertMmI32XmmR32Generate(uint32_t cTests, const char * const *papszNameFmts)
9222{
9223 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
9224
9225 static struct { RTFLOAT32U aVal1[2]; } const s_aSpecials[] =
9226 {
9227 { { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0) } },
9228 { { RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(1) } },
9229 { { RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(0) } },
9230 { { RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(1) } }
9231 /** @todo More specials. */
9232 };
9233
9234 uint32_t cMinNormalPairs = (cTests - 144) / 4;
9235 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertMmI32XmmR32); iFn++)
9236 {
9237 PFNIEMAIMPLMXCSRU64U64 const pfn = g_aSseConvertMmI32XmmR32[iFn].pfnNative ? g_aSseConvertMmI32XmmR32[iFn].pfnNative : g_aSseConvertMmI32XmmR32[iFn].pfn;
9238
9239 IEMBINARYOUTPUT BinOut;
9240 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSseConvertMmI32XmmR32[iFn]), RTEXITCODE_FAILURE);
9241
9242 uint32_t cNormalInputPairs = 0;
9243 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
9244 {
9245 SSE_CONVERT_MM_R32_TEST_T TestData; RT_ZERO(TestData);
9246
9247 TestData.ar32InVal[0] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
9248 TestData.ar32InVal[1] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
9249
9250 if ( RTFLOAT32U_IS_NORMAL(&TestData.ar32InVal[0])
9251 && RTFLOAT32U_IS_NORMAL(&TestData.ar32InVal[1]))
9252 cNormalInputPairs++;
9253 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
9254 {
9255 iTest -= 1;
9256 continue;
9257 }
9258
9259 RTFLOAT64U TestVal;
9260 TestVal.au32[0] = TestData.ar32InVal[0].u;
9261 TestVal.au32[1] = TestData.ar32InVal[1].u;
9262
9263 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
9264 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
9265 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
9266 for (uint8_t iFz = 0; iFz < 2; iFz++)
9267 {
9268 uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
9269 | (iRounding << X86_MXCSR_RC_SHIFT)
9270 | (iDaz ? X86_MXCSR_DAZ : 0)
9271 | (iFz ? X86_MXCSR_FZ : 0)
9272 | X86_MXCSR_XCPT_MASK;
9273 uint32_t fMxcsrM = fMxcsrIn;
9274 uint64_t u64ResM;
9275 pfn(&fMxcsrM, &u64ResM, TestVal.u);
9276 TestData.fMxcsrIn = fMxcsrIn;
9277 TestData.fMxcsrOut = fMxcsrM;
9278 TestData.OutVal.u = u64ResM;
9279 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9280
9281 fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
9282 uint32_t fMxcsrU = fMxcsrIn;
9283 uint64_t u64ResU;
9284 pfn(&fMxcsrU, &u64ResU, TestVal.u);
9285 TestData.fMxcsrIn = fMxcsrIn;
9286 TestData.fMxcsrOut = fMxcsrU;
9287 TestData.OutVal.u = u64ResU;
9288 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9289
9290 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
9291 if (fXcpt)
9292 {
9293 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
9294 uint32_t fMxcsr1 = fMxcsrIn;
9295 uint64_t u64Res1;
9296 pfn(&fMxcsr1, &u64Res1, TestVal.u);
9297 TestData.fMxcsrIn = fMxcsrIn;
9298 TestData.fMxcsrOut = fMxcsr1;
9299 TestData.OutVal.u = u64Res1;
9300 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9301
9302 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
9303 {
9304 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
9305 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
9306 uint32_t fMxcsr2 = fMxcsrIn;
9307 uint64_t u64Res2;
9308 pfn(&fMxcsr2, &u64Res2, TestVal.u);
9309 TestData.fMxcsrIn = fMxcsrIn;
9310 TestData.fMxcsrOut = fMxcsr2;
9311 TestData.OutVal.u = u64Res2;
9312 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9313 }
9314 if (!RT_IS_POWER_OF_TWO(fXcpt))
9315 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
9316 if (fUnmasked & fXcpt)
9317 {
9318 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
9319 uint32_t fMxcsr3 = fMxcsrIn;
9320 uint64_t u64Res3;
9321 pfn(&fMxcsr3, &u64Res3, TestVal.u);
9322 TestData.fMxcsrIn = fMxcsrIn;
9323 TestData.fMxcsrOut = fMxcsr3;
9324 TestData.OutVal.u = u64Res3;
9325 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9326 }
9327 }
9328 }
9329 }
9330 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
9331 }
9332
9333 return RTEXITCODE_SUCCESS;
9334}
9335#endif
9336
9337static void SseConvertMmI32XmmR32Test(void)
9338{
9339 X86FXSTATE State;
9340 RT_ZERO(State);
9341
9342 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertMmI32XmmR32); iFn++)
9343 {
9344 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSseConvertMmI32XmmR32[iFn]))
9345 continue;
9346
9347 SSE_CONVERT_MM_R32_TEST_T const * const paTests = g_aSseConvertMmI32XmmR32[iFn].paTests;
9348 uint32_t const cTests = g_aSseConvertMmI32XmmR32[iFn].cTests;
9349 PFNIEMAIMPLMXCSRU64U64 pfn = g_aSseConvertMmI32XmmR32[iFn].pfn;
9350 uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertMmI32XmmR32[iFn]);
9351 if (!cTests) RTTestSkipped(g_hTest, "no tests");
9352 for (uint32_t iVar = 0; iVar < cVars; iVar++)
9353 {
9354 for (uint32_t iTest = 0; iTest < cTests; iTest++)
9355 {
9356 RTUINT64U ValOut;
9357 RTUINT64U ValIn;
9358
9359 ValIn.au32[0] = paTests[iTest].ar32InVal[0].u;
9360 ValIn.au32[1] = paTests[iTest].ar32InVal[1].u;
9361
9362 uint32_t fMxcsr = paTests[iTest].fMxcsrIn;
9363 pfn(&fMxcsr, &ValOut.u, ValIn.u);
9364 if ( fMxcsr != paTests[iTest].fMxcsrOut
9365 || ValOut.ai32[0] != paTests[iTest].OutVal.ai32[0]
9366 || ValOut.ai32[1] != paTests[iTest].OutVal.ai32[1])
9367 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s \n"
9368 "%s -> mxcsr=%#08x %RI32'%RI32\n"
9369 "%s expected %#08x %RI32'%RI32%s%s (%s)\n",
9370 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
9371 FormatR32(&paTests[iTest].ar32InVal[0]), FormatR32(&paTests[iTest].ar32InVal[1]),
9372 iVar ? " " : "", fMxcsr,
9373 ValOut.ai32[0], ValOut.ai32[1],
9374 iVar ? " " : "", paTests[iTest].fMxcsrOut,
9375 paTests[iTest].OutVal.ai32[0], paTests[iTest].OutVal.ai32[1],
9376 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
9377 ( ValOut.ai32[0] != paTests[iTest].OutVal.ai32[0]
9378 || ValOut.ai32[1] != paTests[iTest].OutVal.ai32[1])
9379 ? " - val" : "",
9380 FormatMxcsr(paTests[iTest].fMxcsrIn));
9381 }
9382 }
9383
9384 FREE_DECOMPRESSED_TESTS(g_aSseConvertMmI32XmmR32[iFn]);
9385 }
9386}
9387
9388
9389/*
9390 * SSE 4.2 pcmpxstrx instructions.
9391 */
9392TYPEDEF_SUBTEST_TYPE(SSE_PCMPISTRI_T, SSE_PCMPISTRI_TEST_T, PFNIEMAIMPLPCMPISTRIU128IMM8);
9393
9394static SSE_PCMPISTRI_T g_aSsePcmpistri[] =
9395{
9396 ENTRY_BIN_SSE_OPT(pcmpistri_u128),
9397};
9398
9399#ifdef TSTIEMAIMPL_WITH_GENERATOR
9400DUMP_ALL_FN(SseComparePcmpistri, g_aSsePcmpistri)
9401static RTEXITCODE SseComparePcmpistriGenerate(uint32_t cTests, const char * const *papszNameFmts)
9402{
9403 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
9404
9405 static struct { RTUINT128U uSrc1; RTUINT128U uSrc2; } const s_aSpecials[] =
9406 {
9407 { RTUINT128_INIT_C(0, 0), RTUINT128_INIT_C(0, 0) },
9408 /** @todo More specials. */
9409 };
9410
9411 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSsePcmpistri); iFn++)
9412 {
9413 PFNIEMAIMPLPCMPISTRIU128IMM8 const pfn = g_aSsePcmpistri[iFn].pfnNative ? g_aSsePcmpistri[iFn].pfnNative : g_aSsePcmpistri[iFn].pfn;
9414
9415 IEMBINARYOUTPUT BinOut;
9416 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSsePcmpistri[iFn]), RTEXITCODE_FAILURE);
9417
9418 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
9419 {
9420 SSE_PCMPISTRI_TEST_T TestData; RT_ZERO(TestData);
9421
9422 TestData.InVal1.uXmm = iTest < cTests ? RandU128() : s_aSpecials[iTest - cTests].uSrc1;
9423 TestData.InVal2.uXmm = iTest < cTests ? RandU128() : s_aSpecials[iTest - cTests].uSrc2;
9424
9425 IEMPCMPISTRXSRC TestVal;
9426 TestVal.uSrc1 = TestData.InVal1.uXmm;
9427 TestVal.uSrc2 = TestData.InVal2.uXmm;
9428
9429 uint32_t const fEFlagsIn = RandEFlags();
9430 for (uint16_t u16Imm = 0; u16Imm < 256; u16Imm++)
9431 {
9432 uint32_t fEFlagsOut = fEFlagsIn;
9433 pfn(&TestData.u32EcxOut, &fEFlagsOut, &TestVal, (uint8_t)u16Imm);
9434 TestData.fEFlagsIn = fEFlagsIn;
9435 TestData.fEFlagsOut = fEFlagsOut;
9436 TestData.bImm = (uint8_t)u16Imm;
9437 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9438 }
9439
9440 /* Repeat the test with the input value being the same. */
9441 TestData.InVal2.uXmm = TestData.InVal1.uXmm;
9442 TestVal.uSrc1 = TestData.InVal1.uXmm;
9443 TestVal.uSrc2 = TestData.InVal2.uXmm;
9444
9445 for (uint16_t u16Imm = 0; u16Imm < 256; u16Imm++)
9446 {
9447 uint32_t fEFlagsOut = fEFlagsIn;
9448 pfn(&TestData.u32EcxOut, &fEFlagsOut, &TestVal, (uint8_t)u16Imm);
9449 TestData.fEFlagsIn = fEFlagsIn;
9450 TestData.fEFlagsOut = fEFlagsOut;
9451 TestData.bImm = (uint8_t)u16Imm;
9452 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9453 }
9454 }
9455 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
9456 }
9457
9458 return RTEXITCODE_SUCCESS;
9459}
9460#endif
9461
9462static void SseComparePcmpistriTest(void)
9463{
9464 X86FXSTATE State;
9465 RT_ZERO(State);
9466
9467 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSsePcmpistri); iFn++)
9468 {
9469 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSsePcmpistri[iFn]))
9470 continue;
9471
9472 SSE_PCMPISTRI_TEST_T const * const paTests = g_aSsePcmpistri[iFn].paTests;
9473 uint32_t const cTests = g_aSsePcmpistri[iFn].cTests;
9474 PFNIEMAIMPLPCMPISTRIU128IMM8 pfn = g_aSsePcmpistri[iFn].pfn;
9475 uint32_t const cVars = COUNT_VARIATIONS(g_aSsePcmpistri[iFn]);
9476 if (!cTests) RTTestSkipped(g_hTest, "no tests");
9477 for (uint32_t iVar = 0; iVar < cVars; iVar++)
9478 {
9479 for (uint32_t iTest = 0; iTest < cTests; iTest++)
9480 {
9481 IEMPCMPISTRXSRC TestVal;
9482 TestVal.uSrc1 = paTests[iTest].InVal1.uXmm;
9483 TestVal.uSrc2 = paTests[iTest].InVal2.uXmm;
9484
9485 uint32_t fEFlags = paTests[iTest].fEFlagsIn;
9486 uint32_t u32EcxOut = 0;
9487 pfn(&u32EcxOut, &fEFlags, &TestVal, paTests[iTest].bImm);
9488 if ( fEFlags != paTests[iTest].fEFlagsOut
9489 || u32EcxOut != paTests[iTest].u32EcxOut)
9490 RTTestFailed(g_hTest, "#%04u%s: efl=%#08x in1=%s in2=%s bImm=%#x\n"
9491 "%s -> efl=%#08x %RU32\n"
9492 "%s expected %#08x %RU32%s%s\n",
9493 iTest, iVar ? "/n" : "", paTests[iTest].fEFlagsIn,
9494 FormatU128(&paTests[iTest].InVal1.uXmm), FormatU128(&paTests[iTest].InVal2.uXmm), paTests[iTest].bImm,
9495 iVar ? " " : "", fEFlags, u32EcxOut,
9496 iVar ? " " : "", paTests[iTest].fEFlagsOut, paTests[iTest].u32EcxOut,
9497 EFlagsDiff(fEFlags, paTests[iTest].fEFlagsOut),
9498 (u32EcxOut != paTests[iTest].u32EcxOut) ? " - val" : "");
9499 }
9500 }
9501
9502 FREE_DECOMPRESSED_TESTS(g_aSsePcmpistri[iFn]);
9503 }
9504}
9505
9506
9507TYPEDEF_SUBTEST_TYPE(SSE_PCMPISTRM_T, SSE_PCMPISTRM_TEST_T, PFNIEMAIMPLPCMPISTRMU128IMM8);
9508
9509static SSE_PCMPISTRM_T g_aSsePcmpistrm[] =
9510{
9511 ENTRY_BIN_SSE_OPT(pcmpistrm_u128),
9512};
9513
9514#ifdef TSTIEMAIMPL_WITH_GENERATOR
9515DUMP_ALL_FN(SseComparePcmpistrm, g_aSsePcmpistrm)
9516static RTEXITCODE SseComparePcmpistrmGenerate(uint32_t cTests, const char * const *papszNameFmts)
9517{
9518 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
9519
9520 static struct { RTUINT128U uSrc1; RTUINT128U uSrc2; } const s_aSpecials[] =
9521 {
9522 { RTUINT128_INIT_C(0, 0), RTUINT128_INIT_C(0, 0) },
9523 /** @todo More specials. */
9524 };
9525
9526 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSsePcmpistrm); iFn++)
9527 {
9528 PFNIEMAIMPLPCMPISTRMU128IMM8 const pfn = g_aSsePcmpistrm[iFn].pfnNative ? g_aSsePcmpistrm[iFn].pfnNative : g_aSsePcmpistrm[iFn].pfn;
9529
9530 IEMBINARYOUTPUT BinOut;
9531 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSsePcmpistrm[iFn]), RTEXITCODE_FAILURE);
9532
9533 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
9534 {
9535 SSE_PCMPISTRM_TEST_T TestData; RT_ZERO(TestData);
9536
9537 TestData.InVal1.uXmm = iTest < cTests ? RandU128() : s_aSpecials[iTest - cTests].uSrc1;
9538 TestData.InVal2.uXmm = iTest < cTests ? RandU128() : s_aSpecials[iTest - cTests].uSrc2;
9539
9540 IEMPCMPISTRXSRC TestVal;
9541 TestVal.uSrc1 = TestData.InVal1.uXmm;
9542 TestVal.uSrc2 = TestData.InVal2.uXmm;
9543
9544 uint32_t const fEFlagsIn = RandEFlags();
9545 for (uint16_t u16Imm = 0; u16Imm < 256; u16Imm++)
9546 {
9547 uint32_t fEFlagsOut = fEFlagsIn;
9548 pfn(&TestData.OutVal.uXmm, &fEFlagsOut, &TestVal, (uint8_t)u16Imm);
9549 TestData.fEFlagsIn = fEFlagsIn;
9550 TestData.fEFlagsOut = fEFlagsOut;
9551 TestData.bImm = (uint8_t)u16Imm;
9552 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9553 }
9554
9555 /* Repeat the test with the input value being the same. */
9556 TestData.InVal2.uXmm = TestData.InVal1.uXmm;
9557 TestVal.uSrc1 = TestData.InVal1.uXmm;
9558 TestVal.uSrc2 = TestData.InVal2.uXmm;
9559
9560 for (uint16_t u16Imm = 0; u16Imm < 256; u16Imm++)
9561 {
9562 uint32_t fEFlagsOut = fEFlagsIn;
9563 pfn(&TestData.OutVal.uXmm, &fEFlagsOut, &TestVal, (uint8_t)u16Imm);
9564 TestData.fEFlagsIn = fEFlagsIn;
9565 TestData.fEFlagsOut = fEFlagsOut;
9566 TestData.bImm = (uint8_t)u16Imm;
9567 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9568 }
9569 }
9570 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
9571 }
9572
9573 return RTEXITCODE_SUCCESS;
9574}
9575#endif
9576
9577static void SseComparePcmpistrmTest(void)
9578{
9579 X86FXSTATE State;
9580 RT_ZERO(State);
9581
9582 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSsePcmpistrm); iFn++)
9583 {
9584 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSsePcmpistrm[iFn]))
9585 continue;
9586
9587 SSE_PCMPISTRM_TEST_T const * const paTests = g_aSsePcmpistrm[iFn].paTests;
9588 uint32_t const cTests = g_aSsePcmpistrm[iFn].cTests;
9589 PFNIEMAIMPLPCMPISTRMU128IMM8 pfn = g_aSsePcmpistrm[iFn].pfn;
9590 uint32_t const cVars = COUNT_VARIATIONS(g_aSsePcmpistrm[iFn]);
9591 if (!cTests) RTTestSkipped(g_hTest, "no tests");
9592 for (uint32_t iVar = 0; iVar < cVars; iVar++)
9593 {
9594 for (uint32_t iTest = 0; iTest < cTests; iTest++)
9595 {
9596 IEMPCMPISTRXSRC TestVal;
9597 TestVal.uSrc1 = paTests[iTest].InVal1.uXmm;
9598 TestVal.uSrc2 = paTests[iTest].InVal2.uXmm;
9599
9600 uint32_t fEFlags = paTests[iTest].fEFlagsIn;
9601 RTUINT128U OutVal;
9602 pfn(&OutVal, &fEFlags, &TestVal, paTests[iTest].bImm);
9603 if ( fEFlags != paTests[iTest].fEFlagsOut
9604 || OutVal.s.Hi != paTests[iTest].OutVal.uXmm.s.Hi
9605 || OutVal.s.Lo != paTests[iTest].OutVal.uXmm.s.Lo)
9606 RTTestFailed(g_hTest, "#%04u%s: efl=%#08x in1=%s in2=%s bImm=%#x\n"
9607 "%s -> efl=%#08x %s\n"
9608 "%s expected %#08x %s%s%s\n",
9609 iTest, iVar ? "/n" : "", paTests[iTest].fEFlagsIn,
9610 FormatU128(&paTests[iTest].InVal1.uXmm), FormatU128(&paTests[iTest].InVal2.uXmm), paTests[iTest].bImm,
9611 iVar ? " " : "", fEFlags, FormatU128(&OutVal),
9612 iVar ? " " : "", paTests[iTest].fEFlagsOut, FormatU128(&paTests[iTest].OutVal.uXmm),
9613 EFlagsDiff(fEFlags, paTests[iTest].fEFlagsOut),
9614 ( OutVal.s.Hi != paTests[iTest].OutVal.uXmm.s.Hi
9615 || OutVal.s.Lo != paTests[iTest].OutVal.uXmm.s.Lo) ? " - val" : "");
9616 }
9617 }
9618
9619 FREE_DECOMPRESSED_TESTS(g_aSsePcmpistrm[iFn]);
9620 }
9621}
9622
9623
9624TYPEDEF_SUBTEST_TYPE(SSE_PCMPESTRI_T, SSE_PCMPESTRI_TEST_T, PFNIEMAIMPLPCMPESTRIU128IMM8);
9625
9626static SSE_PCMPESTRI_T g_aSsePcmpestri[] =
9627{
9628 ENTRY_BIN_SSE_OPT(pcmpestri_u128),
9629};
9630
9631#ifdef TSTIEMAIMPL_WITH_GENERATOR
9632DUMP_ALL_FN(SseComparePcmpestri, g_aSsePcmpestri)
9633static RTEXITCODE SseComparePcmpestriGenerate(uint32_t cTests, const char * const *papszNameFmts)
9634{
9635 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
9636
9637 static struct { RTUINT128U uSrc1; RTUINT128U uSrc2; } const s_aSpecials[] =
9638 {
9639 { RTUINT128_INIT_C(0, 0), RTUINT128_INIT_C(0, 0) },
9640 /** @todo More specials. */
9641 };
9642
9643 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSsePcmpestri); iFn++)
9644 {
9645 PFNIEMAIMPLPCMPESTRIU128IMM8 const pfn = g_aSsePcmpestri[iFn].pfnNative ? g_aSsePcmpestri[iFn].pfnNative : g_aSsePcmpestri[iFn].pfn;
9646
9647 IEMBINARYOUTPUT BinOut;
9648 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSsePcmpestri[iFn]), RTEXITCODE_FAILURE);
9649
9650 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
9651 {
9652 SSE_PCMPESTRI_TEST_T TestData; RT_ZERO(TestData);
9653
9654 TestData.InVal1.uXmm = iTest < cTests ? RandU128() : s_aSpecials[iTest - cTests].uSrc1;
9655 TestData.InVal2.uXmm = iTest < cTests ? RandU128() : s_aSpecials[iTest - cTests].uSrc2;
9656
9657 for (int64_t i64Rax = -20; i64Rax < 20; i64Rax += 20)
9658 for (int64_t i64Rdx = -20; i64Rdx < 20; i64Rdx += 20)
9659 {
9660 TestData.u64Rax = (uint64_t)i64Rax;
9661 TestData.u64Rdx = (uint64_t)i64Rdx;
9662
9663 IEMPCMPESTRXSRC TestVal;
9664 TestVal.uSrc1 = TestData.InVal1.uXmm;
9665 TestVal.uSrc2 = TestData.InVal2.uXmm;
9666 TestVal.u64Rax = TestData.u64Rax;
9667 TestVal.u64Rdx = TestData.u64Rdx;
9668
9669 uint32_t const fEFlagsIn = RandEFlags();
9670 for (uint16_t u16Imm = 0; u16Imm < 256; u16Imm++)
9671 {
9672 uint32_t fEFlagsOut = fEFlagsIn;
9673 pfn(&TestData.u32EcxOut, &fEFlagsOut, &TestVal, (uint8_t)u16Imm);
9674 TestData.fEFlagsIn = fEFlagsIn;
9675 TestData.fEFlagsOut = fEFlagsOut;
9676 TestData.bImm = (uint8_t)u16Imm;
9677 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9678 }
9679
9680 /* Repeat the test with the input value being the same. */
9681 TestData.InVal2.uXmm = TestData.InVal1.uXmm;
9682 TestVal.uSrc1 = TestData.InVal1.uXmm;
9683 TestVal.uSrc2 = TestData.InVal2.uXmm;
9684
9685 for (uint16_t u16Imm = 0; u16Imm < 256; u16Imm++)
9686 {
9687 uint32_t fEFlagsOut = fEFlagsIn;
9688 pfn(&TestData.u32EcxOut, &fEFlagsOut, &TestVal, (uint8_t)u16Imm);
9689 TestData.fEFlagsIn = fEFlagsIn;
9690 TestData.fEFlagsOut = fEFlagsOut;
9691 TestData.bImm = (uint8_t)u16Imm;
9692 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9693 }
9694 }
9695 }
9696 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
9697 }
9698
9699 return RTEXITCODE_SUCCESS;
9700}
9701#endif
9702
9703static void SseComparePcmpestriTest(void)
9704{
9705 X86FXSTATE State;
9706 RT_ZERO(State);
9707
9708 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSsePcmpestri); iFn++)
9709 {
9710 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSsePcmpestri[iFn]))
9711 continue;
9712
9713 SSE_PCMPESTRI_TEST_T const * const paTests = g_aSsePcmpestri[iFn].paTests;
9714 uint32_t const cTests = g_aSsePcmpestri[iFn].cTests;
9715 PFNIEMAIMPLPCMPESTRIU128IMM8 pfn = g_aSsePcmpestri[iFn].pfn;
9716 uint32_t const cVars = COUNT_VARIATIONS(g_aSsePcmpestri[iFn]);
9717 if (!cTests) RTTestSkipped(g_hTest, "no tests");
9718 for (uint32_t iVar = 0; iVar < cVars; iVar++)
9719 {
9720 for (uint32_t iTest = 0; iTest < cTests; iTest++)
9721 {
9722 IEMPCMPESTRXSRC TestVal;
9723 TestVal.uSrc1 = paTests[iTest].InVal1.uXmm;
9724 TestVal.uSrc2 = paTests[iTest].InVal2.uXmm;
9725 TestVal.u64Rax = paTests[iTest].u64Rax;
9726 TestVal.u64Rdx = paTests[iTest].u64Rdx;
9727
9728 uint32_t fEFlags = paTests[iTest].fEFlagsIn;
9729 uint32_t u32EcxOut = 0;
9730 pfn(&u32EcxOut, &fEFlags, &TestVal, paTests[iTest].bImm);
9731 if ( fEFlags != paTests[iTest].fEFlagsOut
9732 || u32EcxOut != paTests[iTest].u32EcxOut)
9733 RTTestFailed(g_hTest, "#%04u%s: efl=%#08x in1=%s rax1=%RI64 in2=%s rdx2=%RI64 bImm=%#x\n"
9734 "%s -> efl=%#08x %RU32\n"
9735 "%s expected %#08x %RU32%s%s\n",
9736 iTest, iVar ? "/n" : "", paTests[iTest].fEFlagsIn,
9737 FormatU128(&paTests[iTest].InVal1.uXmm), paTests[iTest].u64Rax,
9738 FormatU128(&paTests[iTest].InVal2.uXmm), paTests[iTest].u64Rdx,
9739 paTests[iTest].bImm,
9740 iVar ? " " : "", fEFlags, u32EcxOut,
9741 iVar ? " " : "", paTests[iTest].fEFlagsOut, paTests[iTest].u32EcxOut,
9742 EFlagsDiff(fEFlags, paTests[iTest].fEFlagsOut),
9743 (u32EcxOut != paTests[iTest].u32EcxOut) ? " - val" : "");
9744 }
9745 }
9746
9747 FREE_DECOMPRESSED_TESTS(g_aSsePcmpestri[iFn]);
9748 }
9749}
9750
9751
9752TYPEDEF_SUBTEST_TYPE(SSE_PCMPESTRM_T, SSE_PCMPESTRM_TEST_T, PFNIEMAIMPLPCMPESTRMU128IMM8);
9753
9754static SSE_PCMPESTRM_T g_aSsePcmpestrm[] =
9755{
9756 ENTRY_BIN_SSE_OPT(pcmpestrm_u128),
9757};
9758
9759#ifdef TSTIEMAIMPL_WITH_GENERATOR
9760DUMP_ALL_FN(SseComparePcmpestrm, g_aSsePcmpestrm)
9761static RTEXITCODE SseComparePcmpestrmGenerate(uint32_t cTests, const char * const *papszNameFmts)
9762{
9763 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
9764
9765 static struct { RTUINT128U uSrc1; RTUINT128U uSrc2; } const s_aSpecials[] =
9766 {
9767 { RTUINT128_INIT_C(0, 0), RTUINT128_INIT_C(0, 0) },
9768 /** @todo More specials. */
9769 };
9770
9771 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSsePcmpestrm); iFn++)
9772 {
9773 PFNIEMAIMPLPCMPESTRMU128IMM8 const pfn = g_aSsePcmpestrm[iFn].pfnNative ? g_aSsePcmpestrm[iFn].pfnNative : g_aSsePcmpestrm[iFn].pfn;
9774
9775 IEMBINARYOUTPUT BinOut;
9776 AssertReturn(GENERATE_BINARY_OPEN(&BinOut, papszNameFmts, g_aSsePcmpestrm[iFn]), RTEXITCODE_FAILURE);
9777
9778 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
9779 {
9780 SSE_PCMPESTRM_TEST_T TestData; RT_ZERO(TestData);
9781
9782 TestData.InVal1.uXmm = iTest < cTests ? RandU128() : s_aSpecials[iTest - cTests].uSrc1;
9783 TestData.InVal2.uXmm = iTest < cTests ? RandU128() : s_aSpecials[iTest - cTests].uSrc2;
9784
9785 for (int64_t i64Rax = -20; i64Rax < 20; i64Rax += 20)
9786 for (int64_t i64Rdx = -20; i64Rdx < 20; i64Rdx += 20)
9787 {
9788 TestData.u64Rax = (uint64_t)i64Rax;
9789 TestData.u64Rdx = (uint64_t)i64Rdx;
9790
9791 IEMPCMPESTRXSRC TestVal;
9792 TestVal.uSrc1 = TestData.InVal1.uXmm;
9793 TestVal.uSrc2 = TestData.InVal2.uXmm;
9794 TestVal.u64Rax = TestData.u64Rax;
9795 TestVal.u64Rdx = TestData.u64Rdx;
9796
9797 uint32_t const fEFlagsIn = RandEFlags();
9798 for (uint16_t u16Imm = 0; u16Imm < 256; u16Imm++)
9799 {
9800 uint32_t fEFlagsOut = fEFlagsIn;
9801 pfn(&TestData.OutVal.uXmm, &fEFlagsOut, &TestVal, (uint8_t)u16Imm);
9802 TestData.fEFlagsIn = fEFlagsIn;
9803 TestData.fEFlagsOut = fEFlagsOut;
9804 TestData.bImm = (uint8_t)u16Imm;
9805 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9806 }
9807
9808 /* Repeat the test with the input value being the same. */
9809 TestData.InVal2.uXmm = TestData.InVal1.uXmm;
9810 TestVal.uSrc1 = TestData.InVal1.uXmm;
9811 TestVal.uSrc2 = TestData.InVal2.uXmm;
9812
9813 for (uint16_t u16Imm = 0; u16Imm < 256; u16Imm++)
9814 {
9815 uint32_t fEFlagsOut = fEFlagsIn;
9816 pfn(&TestData.OutVal.uXmm, &fEFlagsOut, &TestVal, (uint8_t)u16Imm);
9817 TestData.fEFlagsIn = fEFlagsIn;
9818 TestData.fEFlagsOut = fEFlagsOut;
9819 TestData.bImm = (uint8_t)u16Imm;
9820 GenerateBinaryWrite(&BinOut, &TestData, sizeof(TestData));
9821 }
9822 }
9823 }
9824 AssertReturn(GenerateBinaryClose(&BinOut), RTEXITCODE_FAILURE);
9825 }
9826
9827 return RTEXITCODE_SUCCESS;
9828}
9829#endif
9830
9831static void SseComparePcmpestrmTest(void)
9832{
9833 X86FXSTATE State;
9834 RT_ZERO(State);
9835
9836 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSsePcmpestrm); iFn++)
9837 {
9838 if (!SUBTEST_CHECK_IF_ENABLED_AND_DECOMPRESS(g_aSsePcmpestrm[iFn]))
9839 continue;
9840
9841 SSE_PCMPESTRM_TEST_T const * const paTests = g_aSsePcmpestrm[iFn].paTests;
9842 uint32_t const cTests = g_aSsePcmpestrm[iFn].cTests;
9843 PFNIEMAIMPLPCMPESTRMU128IMM8 pfn = g_aSsePcmpestrm[iFn].pfn;
9844 uint32_t const cVars = COUNT_VARIATIONS(g_aSsePcmpestrm[iFn]);
9845 if (!cTests) RTTestSkipped(g_hTest, "no tests");
9846 for (uint32_t iVar = 0; iVar < cVars; iVar++)
9847 {
9848 for (uint32_t iTest = 0; iTest < cTests; iTest++)
9849 {
9850 IEMPCMPESTRXSRC TestVal;
9851 TestVal.uSrc1 = paTests[iTest].InVal1.uXmm;
9852 TestVal.uSrc2 = paTests[iTest].InVal2.uXmm;
9853 TestVal.u64Rax = paTests[iTest].u64Rax;
9854 TestVal.u64Rdx = paTests[iTest].u64Rdx;
9855
9856 uint32_t fEFlags = paTests[iTest].fEFlagsIn;
9857 RTUINT128U OutVal;
9858 pfn(&OutVal, &fEFlags, &TestVal, paTests[iTest].bImm);
9859 if ( fEFlags != paTests[iTest].fEFlagsOut
9860 || OutVal.s.Hi != paTests[iTest].OutVal.uXmm.s.Hi
9861 || OutVal.s.Lo != paTests[iTest].OutVal.uXmm.s.Lo)
9862 RTTestFailed(g_hTest, "#%04u%s: efl=%#08x in1=%s rax1=%RI64 in2=%s rdx2=%RI64 bImm=%#x\n"
9863 "%s -> efl=%#08x %s\n"
9864 "%s expected %#08x %s%s%s\n",
9865 iTest, iVar ? "/n" : "", paTests[iTest].fEFlagsIn,
9866 FormatU128(&paTests[iTest].InVal1.uXmm), paTests[iTest].u64Rax,
9867 FormatU128(&paTests[iTest].InVal2.uXmm), paTests[iTest].u64Rdx,
9868 paTests[iTest].bImm,
9869 iVar ? " " : "", fEFlags, FormatU128(&OutVal),
9870 iVar ? " " : "", paTests[iTest].fEFlagsOut, FormatU128(&paTests[iTest].OutVal.uXmm),
9871 EFlagsDiff(fEFlags, paTests[iTest].fEFlagsOut),
9872 ( OutVal.s.Hi != paTests[iTest].OutVal.uXmm.s.Hi
9873 || OutVal.s.Lo != paTests[iTest].OutVal.uXmm.s.Lo) ? " - val" : "");
9874 }
9875 }
9876
9877 FREE_DECOMPRESSED_TESTS(g_aSsePcmpestrm[iFn]);
9878 }
9879}
9880
9881
9882
9883int main(int argc, char **argv)
9884{
9885 int rc = RTR3InitExe(argc, &argv, 0);
9886 if (RT_FAILURE(rc))
9887 return RTMsgInitFailure(rc);
9888
9889 /*
9890 * Determin the host CPU.
9891 * If not using the IEMAllAImpl.asm code, this will be set to Intel.
9892 */
9893#if (defined(RT_ARCH_X86) || defined(RT_ARCH_AMD64)) && !defined(IEM_WITHOUT_ASSEMBLY)
9894 g_idxCpuEflFlavour = ASMIsAmdCpu() || ASMIsHygonCpu()
9895 ? IEMTARGETCPU_EFL_BEHAVIOR_AMD
9896 : IEMTARGETCPU_EFL_BEHAVIOR_INTEL;
9897#else
9898 g_idxCpuEflFlavour = IEMTARGETCPU_EFL_BEHAVIOR_INTEL;
9899#endif
9900
9901 /*
9902 * Parse arguments.
9903 */
9904 enum { kModeNotSet, kModeTest, kModeGenerate, kModeDump }
9905 enmMode = kModeNotSet;
9906#define CATEGORY_INT RT_BIT_32(0)
9907#define CATEGORY_FPU_LD_ST RT_BIT_32(1)
9908#define CATEGORY_FPU_BINARY_1 RT_BIT_32(2)
9909#define CATEGORY_FPU_BINARY_2 RT_BIT_32(3)
9910#define CATEGORY_FPU_OTHER RT_BIT_32(4)
9911#define CATEGORY_SSE_FP_BINARY RT_BIT_32(5)
9912#define CATEGORY_SSE_FP_OTHER RT_BIT_32(6)
9913#define CATEGORY_SSE_PCMPXSTRX RT_BIT_32(7)
9914 uint32_t fCategories = UINT32_MAX;
9915 bool fCpuData = true;
9916 bool fCommonData = true;
9917 uint32_t const cDefaultTests = 96;
9918 uint32_t cTests = cDefaultTests;
9919 RTGETOPTDEF const s_aOptions[] =
9920 {
9921 // mode:
9922 { "--generate", 'g', RTGETOPT_REQ_NOTHING },
9923 { "--dump", 'G', RTGETOPT_REQ_NOTHING },
9924 { "--test", 't', RTGETOPT_REQ_NOTHING },
9925 { "--benchmark", 'b', RTGETOPT_REQ_NOTHING },
9926 // test selection (both)
9927 { "--all", 'a', RTGETOPT_REQ_NOTHING },
9928 { "--none", 'z', RTGETOPT_REQ_NOTHING },
9929 { "--zap", 'z', RTGETOPT_REQ_NOTHING },
9930 { "--fpu-ld-st", 'F', RTGETOPT_REQ_NOTHING }, /* FPU stuff is upper case */
9931 { "--fpu-load-store", 'F', RTGETOPT_REQ_NOTHING },
9932 { "--fpu-binary-1", 'B', RTGETOPT_REQ_NOTHING },
9933 { "--fpu-binary-2", 'P', RTGETOPT_REQ_NOTHING },
9934 { "--fpu-other", 'O', RTGETOPT_REQ_NOTHING },
9935 { "--sse-fp-binary", 'S', RTGETOPT_REQ_NOTHING },
9936 { "--sse-fp-other", 'T', RTGETOPT_REQ_NOTHING },
9937 { "--sse-pcmpxstrx", 'C', RTGETOPT_REQ_NOTHING },
9938 { "--int", 'i', RTGETOPT_REQ_NOTHING },
9939 { "--include", 'I', RTGETOPT_REQ_STRING },
9940 { "--exclude", 'X', RTGETOPT_REQ_STRING },
9941 // generation parameters
9942 { "--common", 'm', RTGETOPT_REQ_NOTHING },
9943 { "--cpu", 'c', RTGETOPT_REQ_NOTHING },
9944 { "--number-of-tests", 'n', RTGETOPT_REQ_UINT32 },
9945 { "--verbose", 'v', RTGETOPT_REQ_NOTHING },
9946 { "--quiet", 'q', RTGETOPT_REQ_NOTHING },
9947 };
9948
9949 RTGETOPTSTATE State;
9950 rc = RTGetOptInit(&State, argc, argv, s_aOptions, RT_ELEMENTS(s_aOptions), 1, 0);
9951 AssertRCReturn(rc, RTEXITCODE_FAILURE);
9952
9953 RTGETOPTUNION ValueUnion;
9954 while ((rc = RTGetOpt(&State, &ValueUnion)))
9955 {
9956 switch (rc)
9957 {
9958 case 'g':
9959 enmMode = kModeGenerate;
9960 g_cPicoSecBenchmark = 0;
9961 break;
9962 case 'G':
9963 enmMode = kModeDump;
9964 g_cPicoSecBenchmark = 0;
9965 break;
9966 case 't':
9967 enmMode = kModeTest;
9968 g_cPicoSecBenchmark = 0;
9969 break;
9970 case 'b':
9971 enmMode = kModeTest;
9972 g_cPicoSecBenchmark += RT_NS_1SEC / 2 * UINT64_C(1000); /* half a second in pico seconds */
9973 break;
9974
9975 case 'a':
9976 fCpuData = true;
9977 fCommonData = true;
9978 fCategories = UINT32_MAX;
9979 break;
9980 case 'z':
9981 fCpuData = false;
9982 fCommonData = false;
9983 fCategories = 0;
9984 break;
9985
9986 case 'F':
9987 fCategories |= CATEGORY_FPU_LD_ST;
9988 break;
9989 case 'O':
9990 fCategories |= CATEGORY_FPU_OTHER;
9991 break;
9992 case 'B':
9993 fCategories |= CATEGORY_FPU_BINARY_1;
9994 break;
9995 case 'P':
9996 fCategories |= CATEGORY_FPU_BINARY_2;
9997 break;
9998 case 'S':
9999 fCategories |= CATEGORY_SSE_FP_BINARY;
10000 break;
10001 case 'T':
10002 fCategories |= CATEGORY_SSE_FP_OTHER;
10003 break;
10004 case 'C':
10005 fCategories |= CATEGORY_SSE_PCMPXSTRX;
10006 break;
10007 case 'i':
10008 fCategories |= CATEGORY_INT;
10009 break;
10010
10011 case 'I':
10012 if (g_cIncludeTestPatterns >= RT_ELEMENTS(g_apszIncludeTestPatterns))
10013 return RTMsgErrorExit(RTEXITCODE_SYNTAX, "Too many include patterns (max %zu)",
10014 RT_ELEMENTS(g_apszIncludeTestPatterns));
10015 g_apszIncludeTestPatterns[g_cIncludeTestPatterns++] = ValueUnion.psz;
10016 break;
10017 case 'X':
10018 if (g_cExcludeTestPatterns >= RT_ELEMENTS(g_apszExcludeTestPatterns))
10019 return RTMsgErrorExit(RTEXITCODE_SYNTAX, "Too many exclude patterns (max %zu)",
10020 RT_ELEMENTS(g_apszExcludeTestPatterns));
10021 g_apszExcludeTestPatterns[g_cExcludeTestPatterns++] = ValueUnion.psz;
10022 break;
10023
10024 case 'm':
10025 fCommonData = true;
10026 break;
10027 case 'c':
10028 fCpuData = true;
10029 break;
10030 case 'n':
10031 cTests = ValueUnion.u32;
10032 break;
10033
10034 case 'q':
10035 g_cVerbosity = 0;
10036 break;
10037 case 'v':
10038 g_cVerbosity++;
10039 break;
10040
10041 case 'h':
10042 RTPrintf("usage: %s <-g|-t> [options]\n"
10043 "\n"
10044 "Mode:\n"
10045 " -g, --generate\n"
10046 " Generate test data.\n"
10047 " -t, --test\n"
10048 " Execute tests.\n"
10049 " -b, --benchmark\n"
10050 " Execute tests and do 1/2 seconds of benchmarking.\n"
10051 " Repeating the option increases the benchmark duration by 0.5 seconds.\n"
10052 "\n"
10053 "Test selection (both modes):\n"
10054 " -a, --all\n"
10055 " Enable all tests and generated test data. (default)\n"
10056 " -z, --zap, --none\n"
10057 " Disable all tests and test data types.\n"
10058 " -i, --int\n"
10059 " Enable non-FPU tests.\n"
10060 " -F, --fpu-ld-st\n"
10061 " Enable FPU load and store tests.\n"
10062 " -B, --fpu-binary-1\n"
10063 " Enable FPU binary 80-bit FP tests.\n"
10064 " -P, --fpu-binary-2\n"
10065 " Enable FPU binary 64- and 32-bit FP tests.\n"
10066 " -O, --fpu-other\n"
10067 " Enable FPU binary 64- and 32-bit FP tests.\n"
10068 " -S, --sse-fp-binary\n"
10069 " Enable SSE binary 64- and 32-bit FP tests.\n"
10070 " -T, --sse-fp-other\n"
10071 " Enable misc SSE 64- and 32-bit FP tests.\n"
10072 " -C, --sse-pcmpxstrx\n"
10073 " Enable SSE pcmpxstrx tests.\n"
10074 " -I,--include=<test-patter>\n"
10075 " Enable tests matching the given pattern.\n"
10076 " -X,--exclude=<test-patter>\n"
10077 " Skip tests matching the given pattern (overrides --include).\n"
10078 "\n"
10079 "Generation:\n"
10080 " -m, --common\n"
10081 " Enable generating common test data.\n"
10082 " -c, --only-cpu\n"
10083 " Enable generating CPU specific test data.\n"
10084 " -n, --number-of-test <count>\n"
10085 " Number of tests to generate. Default: %u\n"
10086 "\n"
10087 "Other:\n"
10088 " -v, --verbose\n"
10089 " -q, --quiet\n"
10090 " Noise level. Default: --quiet\n"
10091 , argv[0], cDefaultTests);
10092 return RTEXITCODE_SUCCESS;
10093 default:
10094 return RTGetOptPrintError(rc, &ValueUnion);
10095 }
10096 }
10097
10098 static const struct
10099 {
10100 uint32_t fCategory;
10101 void (*pfnTest)(void);
10102#ifdef TSTIEMAIMPL_WITH_GENERATOR
10103 const char *pszFilenameFmt;
10104 RTEXITCODE (*pfnGenerate)(uint32_t cTests, const char * const *papszNameFmts);
10105 RTEXITCODE (*pfnDumpAll)(const char * const *papszNameFmts);
10106 uint32_t cMinTests;
10107# define GROUP_ENTRY(a_fCategory, a_BaseNm, a_szFilenameFmt, a_cMinTests) \
10108 { a_fCategory, a_BaseNm ## Test, a_szFilenameFmt, a_BaseNm ## Generate, a_BaseNm ## DumpAll, a_cMinTests }
10109#else
10110# define GROUP_ENTRY(a_fCategory, a_BaseNm, a_szFilenameFmt, a_cMinTests) \
10111 { a_fCategory, a_BaseNm ## Test }
10112#endif
10113#define GROUP_ENTRY_MANUAL(a_fCategory, a_BaseNm) \
10114 { a_fCategory, a_BaseNm ## Test }
10115 } s_aGroups[] =
10116 {
10117 GROUP_ENTRY(CATEGORY_INT, BinU8, "tstIEMAImplDataInt-%s.bin.gz", 0),
10118 GROUP_ENTRY(CATEGORY_INT, BinU16, "tstIEMAImplDataInt-%s.bin.gz", 0),
10119 GROUP_ENTRY(CATEGORY_INT, BinU32, "tstIEMAImplDataInt-%s.bin.gz", 0),
10120 GROUP_ENTRY(CATEGORY_INT, BinU64, "tstIEMAImplDataInt-%s.bin.gz", 0),
10121 GROUP_ENTRY(CATEGORY_INT, ShiftDbl, "tstIEMAImplDataInt-%s.bin.gz", 128),
10122 GROUP_ENTRY(CATEGORY_INT, Unary, "tstIEMAImplDataInt-%s.bin.gz", 0),
10123 GROUP_ENTRY(CATEGORY_INT, Shift, "tstIEMAImplDataInt-%s.bin.gz", 0),
10124 GROUP_ENTRY(CATEGORY_INT, MulDiv, "tstIEMAImplDataInt-%s.bin.gz", 0),
10125 GROUP_ENTRY_MANUAL(CATEGORY_INT, Xchg),
10126 GROUP_ENTRY_MANUAL(CATEGORY_INT, Xadd),
10127 GROUP_ENTRY_MANUAL(CATEGORY_INT, CmpXchg),
10128 GROUP_ENTRY_MANUAL(CATEGORY_INT, CmpXchg8b),
10129 GROUP_ENTRY_MANUAL(CATEGORY_INT, CmpXchg16b),
10130 GROUP_ENTRY_MANUAL(CATEGORY_INT, Bswap),
10131
10132 GROUP_ENTRY(CATEGORY_FPU_LD_ST, FpuLdConst, "tstIEMAImplDataFpuLdSt-%s.bin.gz", 0),
10133 GROUP_ENTRY(CATEGORY_FPU_LD_ST, FpuLdInt, "tstIEMAImplDataFpuLdSt-%s.bin.gz", 0),
10134 GROUP_ENTRY(CATEGORY_FPU_LD_ST, FpuLdD80, "tstIEMAImplDataFpuLdSt-%s.bin.gz", 0),
10135 GROUP_ENTRY(CATEGORY_FPU_LD_ST, FpuLdMem, "tstIEMAImplDataFpuLdSt-%s.bin.gz", 384), /* needs better coverage */
10136
10137 GROUP_ENTRY(CATEGORY_FPU_LD_ST, FpuStInt, "tstIEMAImplDataFpuLdSt-%s.bin.gz", 0),
10138 GROUP_ENTRY(CATEGORY_FPU_LD_ST, FpuStD80, "tstIEMAImplDataFpuLdSt-%s.bin.gz", 0),
10139 GROUP_ENTRY(CATEGORY_FPU_LD_ST, FpuStMem, "tstIEMAImplDataFpuLdSt-%s.bin.gz", 384), /* needs better coverage */
10140
10141 GROUP_ENTRY(CATEGORY_FPU_BINARY_1, FpuBinaryR80, "tstIEMAImplDataFpuBinary1-%s.bin.gz", 0),
10142 GROUP_ENTRY(CATEGORY_FPU_BINARY_1, FpuBinaryFswR80, "tstIEMAImplDataFpuBinary1-%s.bin.gz", 0),
10143 GROUP_ENTRY(CATEGORY_FPU_BINARY_1, FpuBinaryEflR80, "tstIEMAImplDataFpuBinary1-%s.bin.gz", 0),
10144
10145 GROUP_ENTRY(CATEGORY_FPU_BINARY_2, FpuBinaryR64, "tstIEMAImplDataFpuBinary2-%s.bin.gz", 0),
10146 GROUP_ENTRY(CATEGORY_FPU_BINARY_2, FpuBinaryR32, "tstIEMAImplDataFpuBinary2-%s.bin.gz", 0),
10147 GROUP_ENTRY(CATEGORY_FPU_BINARY_2, FpuBinaryI32, "tstIEMAImplDataFpuBinary2-%s.bin.gz", 0),
10148 GROUP_ENTRY(CATEGORY_FPU_BINARY_2, FpuBinaryI16, "tstIEMAImplDataFpuBinary2-%s.bin.gz", 0),
10149
10150 GROUP_ENTRY(CATEGORY_FPU_BINARY_2, FpuBinaryFswR64, "tstIEMAImplDataFpuBinary2-%s.bin.gz", 0),
10151 GROUP_ENTRY(CATEGORY_FPU_BINARY_2, FpuBinaryFswR32, "tstIEMAImplDataFpuBinary2-%s.bin.gz", 0),
10152 GROUP_ENTRY(CATEGORY_FPU_BINARY_2, FpuBinaryFswI32, "tstIEMAImplDataFpuBinary2-%s.bin.gz", 0),
10153 GROUP_ENTRY(CATEGORY_FPU_BINARY_2, FpuBinaryFswI16, "tstIEMAImplDataFpuBinary2-%s.bin.gz", 0),
10154
10155 GROUP_ENTRY(CATEGORY_FPU_OTHER, FpuUnaryR80, "tstIEMAImplDataFpuOther-%s.bin.gz", 0),
10156 GROUP_ENTRY(CATEGORY_FPU_OTHER, FpuUnaryFswR80, "tstIEMAImplDataFpuOther-%s.bin.gz", 0),
10157 GROUP_ENTRY(CATEGORY_FPU_OTHER, FpuUnaryTwoR80, "tstIEMAImplDataFpuOther-%s.bin.gz", 0),
10158
10159 GROUP_ENTRY(CATEGORY_SSE_FP_BINARY, SseBinaryR32, "tstIEMAImplDataSseBinary-%s.bin.gz", 0),
10160 GROUP_ENTRY(CATEGORY_SSE_FP_BINARY, SseBinaryR64, "tstIEMAImplDataSseBinary-%s.bin.gz", 0),
10161 GROUP_ENTRY(CATEGORY_SSE_FP_BINARY, SseBinaryU128R32, "tstIEMAImplDataSseBinary-%s.bin.gz", 0),
10162 GROUP_ENTRY(CATEGORY_SSE_FP_BINARY, SseBinaryU128R64, "tstIEMAImplDataSseBinary-%s.bin.gz", 0),
10163
10164 GROUP_ENTRY(CATEGORY_SSE_FP_BINARY, SseBinaryI32R64, "tstIEMAImplDataSseBinary-%s.bin.gz", 0),
10165 GROUP_ENTRY(CATEGORY_SSE_FP_BINARY, SseBinaryI64R64, "tstIEMAImplDataSseBinary-%s.bin.gz", 0),
10166 GROUP_ENTRY(CATEGORY_SSE_FP_BINARY, SseBinaryI32R32, "tstIEMAImplDataSseBinary-%s.bin.gz", 0),
10167 GROUP_ENTRY(CATEGORY_SSE_FP_BINARY, SseBinaryI64R32, "tstIEMAImplDataSseBinary-%s.bin.gz", 0),
10168
10169 GROUP_ENTRY(CATEGORY_SSE_FP_BINARY, SseBinaryR64I32, "tstIEMAImplDataSseBinary-%s.bin.gz", 0),
10170 GROUP_ENTRY(CATEGORY_SSE_FP_BINARY, SseBinaryR64I64, "tstIEMAImplDataSseBinary-%s.bin.gz", 0),
10171 GROUP_ENTRY(CATEGORY_SSE_FP_BINARY, SseBinaryR32I32, "tstIEMAImplDataSseBinary-%s.bin.gz", 0),
10172 GROUP_ENTRY(CATEGORY_SSE_FP_BINARY, SseBinaryR32I64, "tstIEMAImplDataSseBinary-%s.bin.gz", 0),
10173
10174 GROUP_ENTRY(CATEGORY_SSE_FP_OTHER, SseCompareEflR32R32, "tstIEMAImplDataSseCompare-%s.bin.gz", 0),
10175 GROUP_ENTRY(CATEGORY_SSE_FP_OTHER, SseCompareEflR64R64, "tstIEMAImplDataSseCompare-%s.bin.gz", 0),
10176 GROUP_ENTRY(CATEGORY_SSE_FP_OTHER, SseCompareF2XmmR32Imm8, "tstIEMAImplDataSseCompare-%s.bin.gz", 0),
10177 GROUP_ENTRY(CATEGORY_SSE_FP_OTHER, SseCompareF2XmmR64Imm8, "tstIEMAImplDataSseCompare-%s.bin.gz", 0),
10178
10179 GROUP_ENTRY(CATEGORY_SSE_FP_OTHER, SseConvertXmmI32R32, "tstIEMAImplDataSseConvert-%s.bin.gz", 0),
10180 GROUP_ENTRY(CATEGORY_SSE_FP_OTHER, SseConvertXmmR32I32, "tstIEMAImplDataSseConvert-%s.bin.gz", 0),
10181 GROUP_ENTRY(CATEGORY_SSE_FP_OTHER, SseConvertXmmI32R64, "tstIEMAImplDataSseConvert-%s.bin.gz", 0),
10182 GROUP_ENTRY(CATEGORY_SSE_FP_OTHER, SseConvertXmmR64I32, "tstIEMAImplDataSseConvert-%s.bin.gz", 0),
10183 GROUP_ENTRY(CATEGORY_SSE_FP_OTHER, SseConvertMmXmm, "tstIEMAImplDataSseConvert-%s.bin.gz", 0),
10184 GROUP_ENTRY(CATEGORY_SSE_FP_OTHER, SseConvertXmmR32Mm, "tstIEMAImplDataSseConvert-%s.bin.gz", 0),
10185 GROUP_ENTRY(CATEGORY_SSE_FP_OTHER, SseConvertXmmR64Mm, "tstIEMAImplDataSseConvert-%s.bin.gz", 0),
10186 GROUP_ENTRY(CATEGORY_SSE_FP_OTHER, SseConvertMmI32XmmR32, "tstIEMAImplDataSseConvert-%s.bin.gz", 0),
10187
10188 GROUP_ENTRY(CATEGORY_SSE_PCMPXSTRX, SseComparePcmpistri, "tstIEMAImplDataSsePcmpxstrx-%s.bin.gz", 0),
10189 GROUP_ENTRY(CATEGORY_SSE_PCMPXSTRX, SseComparePcmpistrm, "tstIEMAImplDataSsePcmpxstrx-%s.bin.gz", 0),
10190 GROUP_ENTRY(CATEGORY_SSE_PCMPXSTRX, SseComparePcmpestri, "tstIEMAImplDataSsePcmpxstrx-%s.bin.gz", 0),
10191 GROUP_ENTRY(CATEGORY_SSE_PCMPXSTRX, SseComparePcmpestrm, "tstIEMAImplDataSsePcmpxstrx-%s.bin.gz", 0),
10192 };
10193
10194 /*
10195 * Generate data?
10196 */
10197 if (enmMode == kModeGenerate)
10198 {
10199#ifdef TSTIEMAIMPL_WITH_GENERATOR
10200 if (cTests == 0)
10201 cTests = cDefaultTests;
10202 g_cZeroDstTests = RT_MIN(cTests / 16, 32);
10203 g_cZeroSrcTests = g_cZeroDstTests * 2;
10204
10205 RTMpGetDescription(NIL_RTCPUID, g_szCpuDesc, sizeof(g_szCpuDesc));
10206
10207 /* For the revision, use the highest for this file and VBoxRT. */
10208 static const char s_szRev[] = "$Revision: 103099 $";
10209 const char *pszRev = s_szRev;
10210 while (*pszRev && !RT_C_IS_DIGIT(*pszRev))
10211 pszRev++;
10212 g_uSvnRev = RTStrToUInt32(pszRev);
10213 g_uSvnRev = RT_MAX(g_uSvnRev, RTBldCfgRevision());
10214
10215 /* Loop thru the groups and call the generate for any that's enabled. */
10216 for (size_t i = 0; i < RT_ELEMENTS(s_aGroups); i++)
10217 if ((s_aGroups[i].fCategory & fCategories) && s_aGroups[i].pfnGenerate)
10218 {
10219 const char * const apszNameFmts[] =
10220 {
10221 /*[IEMTARGETCPU_EFL_BEHAVIOR_NATIVE] =*/ fCommonData ? s_aGroups[i].pszFilenameFmt : NULL,
10222 /*[IEMTARGETCPU_EFL_BEHAVIOR_INTEL] =*/ fCpuData ? s_aGroups[i].pszFilenameFmt : NULL,
10223 /*[IEMTARGETCPU_EFL_BEHAVIOR_AMD] =*/ fCpuData ? s_aGroups[i].pszFilenameFmt : NULL,
10224 };
10225 RTEXITCODE rcExit = s_aGroups[i].pfnGenerate(RT_MAX(cTests, s_aGroups[i].cMinTests), apszNameFmts);
10226 if (rcExit != RTEXITCODE_SUCCESS)
10227 return rcExit;
10228 }
10229 return RTEXITCODE_SUCCESS;
10230#else
10231 return RTMsgErrorExitFailure("Test data generator not compiled in!");
10232#endif
10233 }
10234
10235 /*
10236 * Dump tables (used for the conversion, mostly useless now).
10237 */
10238 if (enmMode == kModeDump)
10239 {
10240#ifdef TSTIEMAIMPL_WITH_GENERATOR
10241 /* Loop thru the groups and call the generate for any that's enabled. */
10242 for (size_t i = 0; i < RT_ELEMENTS(s_aGroups); i++)
10243 if ((s_aGroups[i].fCategory & fCategories) && s_aGroups[i].pfnDumpAll)
10244 {
10245 const char * const apszNameFmts[] =
10246 {
10247 /*[IEMTARGETCPU_EFL_BEHAVIOR_NATIVE] =*/ fCommonData ? s_aGroups[i].pszFilenameFmt : NULL,
10248 /*[IEMTARGETCPU_EFL_BEHAVIOR_INTEL] =*/ fCpuData ? s_aGroups[i].pszFilenameFmt : NULL,
10249 /*[IEMTARGETCPU_EFL_BEHAVIOR_AMD] =*/ fCpuData ? s_aGroups[i].pszFilenameFmt : NULL,
10250 };
10251 RTEXITCODE rcExit = s_aGroups[i].pfnGenerate(RT_MAX(cTests, s_aGroups[i].cMinTests), apszNameFmts);
10252 if (rcExit != RTEXITCODE_SUCCESS)
10253 return rcExit;
10254 }
10255 return RTEXITCODE_SUCCESS;
10256#else
10257 return RTMsgErrorExitFailure("Test data generator not compiled in!");
10258#endif
10259 }
10260
10261
10262 /*
10263 * Do testing. Currrently disabled by default as data needs to be checked
10264 * on both intel and AMD systems first.
10265 */
10266 rc = RTTestCreate("tstIEMAImpl", &g_hTest);
10267 AssertRCReturn(rc, RTEXITCODE_FAILURE);
10268 if (enmMode == kModeTest)
10269 {
10270 RTTestBanner(g_hTest);
10271
10272 /* Allocate guarded memory for use in the tests. */
10273#define ALLOC_GUARDED_VAR(a_puVar) do { \
10274 rc = RTTestGuardedAlloc(g_hTest, sizeof(*a_puVar), sizeof(*a_puVar), false /*fHead*/, (void **)&a_puVar); \
10275 if (RT_FAILURE(rc)) RTTestFailed(g_hTest, "Failed to allocate guarded mem: " #a_puVar); \
10276 } while (0)
10277 ALLOC_GUARDED_VAR(g_pu8);
10278 ALLOC_GUARDED_VAR(g_pu16);
10279 ALLOC_GUARDED_VAR(g_pu32);
10280 ALLOC_GUARDED_VAR(g_pu64);
10281 ALLOC_GUARDED_VAR(g_pu128);
10282 ALLOC_GUARDED_VAR(g_pu8Two);
10283 ALLOC_GUARDED_VAR(g_pu16Two);
10284 ALLOC_GUARDED_VAR(g_pu32Two);
10285 ALLOC_GUARDED_VAR(g_pu64Two);
10286 ALLOC_GUARDED_VAR(g_pu128Two);
10287 ALLOC_GUARDED_VAR(g_pfEfl);
10288 if (RTTestErrorCount(g_hTest) == 0)
10289 {
10290 /* Loop thru the groups and call test function for anything that's enabled. */
10291 for (size_t i = 0; i < RT_ELEMENTS(s_aGroups); i++)
10292 if ((s_aGroups[i].fCategory & fCategories))
10293 s_aGroups[i].pfnTest();
10294 }
10295 return RTTestSummaryAndDestroy(g_hTest);
10296 }
10297 return RTTestSkipAndDestroy(g_hTest, "unfinished testcase");
10298}
10299
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette