VirtualBox

source: vbox/trunk/src/VBox/VMM/testcase/tstIEMAImpl.cpp@ 103025

Last change on this file since 103025 was 103003, checked in by vboxsync, 10 months ago

VMM/IEM: Assembly version of iemAImpl_sub_*. bugref:10376

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 465.7 KB
Line 
1/* $Id: tstIEMAImpl.cpp 103003 2024-01-23 16:19:17Z vboxsync $ */
2/** @file
3 * IEM Assembly Instruction Helper Testcase.
4 */
5
6/*
7 * Copyright (C) 2022-2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#include "../include/IEMInternal.h"
33
34#include <iprt/errcore.h>
35#include <VBox/log.h>
36#include <iprt/assert.h>
37#include <iprt/ctype.h>
38#include <iprt/getopt.h>
39#include <iprt/initterm.h>
40#include <iprt/message.h>
41#include <iprt/mp.h>
42#include <iprt/rand.h>
43#include <iprt/stream.h>
44#include <iprt/string.h>
45#include <iprt/test.h>
46#include <iprt/time.h>
47#include <iprt/thread.h>
48#include <VBox/version.h>
49
50#include "tstIEMAImpl.h"
51
52
53/*********************************************************************************************************************************
54* Defined Constants And Macros *
55*********************************************************************************************************************************/
56#define ENTRY(a_Name) ENTRY_EX(a_Name, 0)
57#define ENTRY_EX(a_Name, a_uExtra) \
58 { RT_XSTR(a_Name), iemAImpl_ ## a_Name, NULL, \
59 g_aTests_ ## a_Name, &g_cTests_ ## a_Name, \
60 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */ }
61
62#define ENTRY_FIX(a_Name) ENTRY_FIX_EX(a_Name, 0)
63#ifdef TSTIEMAIMPL_WITH_GENERATOR
64# define ENTRY_FIX_EX(a_Name, a_uExtra) \
65 { RT_XSTR(a_Name), iemAImpl_ ## a_Name, NULL, \
66 g_aTests_ ## a_Name, &g_cTests_ ## a_Name, \
67 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */, \
68 RT_ELEMENTS(g_aFixedTests_ ## a_Name), g_aFixedTests_ ## a_Name }
69#else
70# define ENTRY_FIX_EX(a_Name, a_uExtra) ENTRY_EX(a_Name, a_uExtra)
71#endif
72
73#define ENTRY_PFN_CAST(a_Name, a_pfnType) ENTRY_PFN_CAST_EX(a_Name, a_pfnType, 0)
74#define ENTRY_PFN_CAST_EX(a_Name, a_pfnType, a_uExtra) \
75 { RT_XSTR(a_Name), (a_pfnType)iemAImpl_ ## a_Name, NULL, \
76 g_aTests_ ## a_Name, &g_cTests_ ## a_Name, \
77 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */ }
78
79#define ENTRY_BIN(a_Name) ENTRY_EX_BIN(a_Name, 0)
80#define ENTRY_EX_BIN(a_Name, a_uExtra) \
81 { RT_XSTR(a_Name), iemAImpl_ ## a_Name, NULL, \
82 g_aTests_ ## a_Name, &g_cbTests_ ## a_Name, \
83 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */ }
84
85#define ENTRY_BIN_AVX(a_Name) ENTRY_BIN_AVX_EX(a_Name, 0)
86#ifndef IEM_WITHOUT_ASSEMBLY
87# define ENTRY_BIN_AVX_EX(a_Name, a_uExtra) \
88 { RT_XSTR(a_Name), iemAImpl_ ## a_Name, NULL, \
89 g_aTests_ ## a_Name, &g_cbTests_ ## a_Name, \
90 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */ }
91#else
92# define ENTRY_BIN_AVX_EX(a_Name, a_uExtra) \
93 { RT_XSTR(a_Name), iemAImpl_ ## a_Name ## _fallback, NULL, \
94 g_aTests_ ## a_Name, &g_cbTests_ ## a_Name, \
95 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */ }
96#endif
97
98#define ENTRY_BIN_SSE_OPT(a_Name) ENTRY_BIN_SSE_OPT_EX(a_Name, 0)
99#ifndef IEM_WITHOUT_ASSEMBLY
100# define ENTRY_BIN_SSE_OPT_EX(a_Name, a_uExtra) \
101 { RT_XSTR(a_Name), iemAImpl_ ## a_Name, NULL, \
102 g_aTests_ ## a_Name, &g_cbTests_ ## a_Name, \
103 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */ }
104#else
105# define ENTRY_BIN_SSE_OPT_EX(a_Name, a_uExtra) \
106 { RT_XSTR(a_Name), iemAImpl_ ## a_Name ## _fallback, NULL, \
107 g_aTests_ ## a_Name, &g_cbTests_ ## a_Name, \
108 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */ }
109#endif
110
111
112#define ENTRY_INTEL(a_Name, a_fEflUndef) ENTRY_INTEL_EX(a_Name, a_fEflUndef, 0)
113#define ENTRY_INTEL_EX(a_Name, a_fEflUndef, a_uExtra) \
114 { RT_XSTR(a_Name) "_intel", iemAImpl_ ## a_Name ## _intel, iemAImpl_ ## a_Name, \
115 g_aTests_ ## a_Name ## _intel, &g_cTests_ ## a_Name ## _intel, \
116 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_INTEL }
117
118#define ENTRY_AMD(a_Name, a_fEflUndef) ENTRY_AMD_EX(a_Name, a_fEflUndef, 0)
119#define ENTRY_AMD_EX(a_Name, a_fEflUndef, a_uExtra) \
120 { RT_XSTR(a_Name) "_amd", iemAImpl_ ## a_Name ## _amd, iemAImpl_ ## a_Name, \
121 g_aTests_ ## a_Name ## _amd, &g_cTests_ ## a_Name ## _amd, \
122 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_AMD }
123
124#define TYPEDEF_SUBTEST_TYPE(a_TypeName, a_TestType, a_FunctionPtrType) \
125 typedef struct a_TypeName \
126 { \
127 const char *pszName; \
128 a_FunctionPtrType pfn; \
129 a_FunctionPtrType pfnNative; \
130 a_TestType const *paTests; \
131 uint32_t const *pcTests; \
132 uint32_t uExtra; \
133 uint8_t idxCpuEflFlavour; \
134 uint16_t cFixedTests; \
135 a_TestType const *paFixedTests; \
136 } a_TypeName
137
138#define COUNT_VARIATIONS(a_SubTest) \
139 (1 + ((a_SubTest).idxCpuEflFlavour == g_idxCpuEflFlavour && (a_SubTest).pfnNative) )
140
141
142/*********************************************************************************************************************************
143* Global Variables *
144*********************************************************************************************************************************/
145static RTTEST g_hTest;
146static uint8_t g_idxCpuEflFlavour = IEMTARGETCPU_EFL_BEHAVIOR_INTEL;
147#ifdef TSTIEMAIMPL_WITH_GENERATOR
148static uint32_t g_cZeroDstTests = 2;
149static uint32_t g_cZeroSrcTests = 4;
150#endif
151static uint8_t *g_pu8, *g_pu8Two;
152static uint16_t *g_pu16, *g_pu16Two;
153static uint32_t *g_pu32, *g_pu32Two, *g_pfEfl;
154static uint64_t *g_pu64, *g_pu64Two;
155static RTUINT128U *g_pu128, *g_pu128Two;
156
157static char g_aszBuf[32][256];
158static unsigned g_idxBuf = 0;
159
160static uint32_t g_cIncludeTestPatterns;
161static uint32_t g_cExcludeTestPatterns;
162static const char *g_apszIncludeTestPatterns[64];
163static const char *g_apszExcludeTestPatterns[64];
164
165/** Higher value, means longer benchmarking. */
166static uint64_t g_cPicoSecBenchmark = 0;
167
168static unsigned g_cVerbosity = 0;
169
170
171/*********************************************************************************************************************************
172* Internal Functions *
173*********************************************************************************************************************************/
174static const char *FormatR80(PCRTFLOAT80U pr80);
175static const char *FormatR64(PCRTFLOAT64U pr64);
176static const char *FormatR32(PCRTFLOAT32U pr32);
177
178
179/*
180 * Random helpers.
181 */
182
183static uint32_t RandEFlags(void)
184{
185 uint32_t fEfl = RTRandU32();
186 return (fEfl & X86_EFL_LIVE_MASK) | X86_EFL_RA1_MASK;
187}
188
189#ifdef TSTIEMAIMPL_WITH_GENERATOR
190
191static uint8_t RandU8(void)
192{
193 return RTRandU32Ex(0, 0xff);
194}
195
196
197static uint16_t RandU16(void)
198{
199 return RTRandU32Ex(0, 0xffff);
200}
201
202
203static uint32_t RandU32(void)
204{
205 return RTRandU32();
206}
207
208#endif
209
210static uint64_t RandU64(void)
211{
212 return RTRandU64();
213}
214
215
216static RTUINT128U RandU128(void)
217{
218 RTUINT128U Ret;
219 Ret.s.Hi = RTRandU64();
220 Ret.s.Lo = RTRandU64();
221 return Ret;
222}
223
224#ifdef TSTIEMAIMPL_WITH_GENERATOR
225
226static uint8_t RandU8Dst(uint32_t iTest)
227{
228 if (iTest < g_cZeroDstTests)
229 return 0;
230 return RandU8();
231}
232
233
234static uint8_t RandU8Src(uint32_t iTest)
235{
236 if (iTest < g_cZeroSrcTests)
237 return 0;
238 return RandU8();
239}
240
241
242static uint16_t RandU16Dst(uint32_t iTest)
243{
244 if (iTest < g_cZeroDstTests)
245 return 0;
246 return RandU16();
247}
248
249
250static uint16_t RandU16Src(uint32_t iTest)
251{
252 if (iTest < g_cZeroSrcTests)
253 return 0;
254 return RandU16();
255}
256
257
258static uint32_t RandU32Dst(uint32_t iTest)
259{
260 if (iTest < g_cZeroDstTests)
261 return 0;
262 return RandU32();
263}
264
265
266static uint32_t RandU32Src(uint32_t iTest)
267{
268 if (iTest < g_cZeroSrcTests)
269 return 0;
270 return RandU32();
271}
272
273
274static uint64_t RandU64Dst(uint32_t iTest)
275{
276 if (iTest < g_cZeroDstTests)
277 return 0;
278 return RandU64();
279}
280
281
282static uint64_t RandU64Src(uint32_t iTest)
283{
284 if (iTest < g_cZeroSrcTests)
285 return 0;
286 return RandU64();
287}
288
289
290/** 2nd operand for and FPU instruction, pairing with RandR80Src1. */
291static int16_t RandI16Src2(uint32_t iTest)
292{
293 if (iTest < 18 * 4)
294 switch (iTest % 4)
295 {
296 case 0: return 0;
297 case 1: return INT16_MAX;
298 case 2: return INT16_MIN;
299 case 3: break;
300 }
301 return (int16_t)RandU16();
302}
303
304
305/** 2nd operand for and FPU instruction, pairing with RandR80Src1. */
306static int32_t RandI32Src2(uint32_t iTest)
307{
308 if (iTest < 18 * 4)
309 switch (iTest % 4)
310 {
311 case 0: return 0;
312 case 1: return INT32_MAX;
313 case 2: return INT32_MIN;
314 case 3: break;
315 }
316 return (int32_t)RandU32();
317}
318
319
320static int64_t RandI64Src(uint32_t iTest)
321{
322 RT_NOREF(iTest);
323 return (int64_t)RandU64();
324}
325
326
327static uint16_t RandFcw(void)
328{
329 return RandU16() & ~X86_FCW_ZERO_MASK;
330}
331
332
333static uint16_t RandFsw(void)
334{
335 AssertCompile((X86_FSW_C_MASK | X86_FSW_XCPT_ES_MASK | X86_FSW_TOP_MASK | X86_FSW_B) == 0xffff);
336 return RandU16();
337}
338
339
340static uint32_t RandMxcsr(void)
341{
342 return RandU32() & ~X86_MXCSR_ZERO_MASK;
343}
344
345
346static void SafeR80FractionShift(PRTFLOAT80U pr80, uint8_t cShift)
347{
348 if (pr80->sj64.uFraction >= RT_BIT_64(cShift))
349 pr80->sj64.uFraction >>= cShift;
350 else
351 pr80->sj64.uFraction = (cShift % 19) + 1;
352}
353
354
355
356static RTFLOAT80U RandR80Ex(uint8_t bType, unsigned cTarget = 80, bool fIntTarget = false)
357{
358 Assert(cTarget == (!fIntTarget ? 80U : 16U) || cTarget == 64U || cTarget == 32U || (cTarget == 59U && fIntTarget));
359
360 RTFLOAT80U r80;
361 r80.au64[0] = RandU64();
362 r80.au16[4] = RandU16();
363
364 /*
365 * Adjust the random stuff according to bType.
366 */
367 bType &= 0x1f;
368 if (bType == 0 || bType == 1 || bType == 2 || bType == 3)
369 {
370 /* Zero (0), Pseudo-Infinity (1), Infinity (2), Indefinite (3). We only keep fSign here. */
371 r80.sj64.uExponent = bType == 0 ? 0 : 0x7fff;
372 r80.sj64.uFraction = bType <= 2 ? 0 : RT_BIT_64(62);
373 r80.sj64.fInteger = bType >= 2 ? 1 : 0;
374 AssertMsg(bType != 0 || RTFLOAT80U_IS_ZERO(&r80), ("%s\n", FormatR80(&r80)));
375 AssertMsg(bType != 1 || RTFLOAT80U_IS_PSEUDO_INF(&r80), ("%s\n", FormatR80(&r80)));
376 Assert( bType != 1 || RTFLOAT80U_IS_387_INVALID(&r80));
377 AssertMsg(bType != 2 || RTFLOAT80U_IS_INF(&r80), ("%s\n", FormatR80(&r80)));
378 AssertMsg(bType != 3 || RTFLOAT80U_IS_INDEFINITE(&r80), ("%s\n", FormatR80(&r80)));
379 }
380 else if (bType == 4 || bType == 5 || bType == 6 || bType == 7)
381 {
382 /* Denormals (4,5) and Pseudo denormals (6,7) */
383 if (bType & 1)
384 SafeR80FractionShift(&r80, r80.sj64.uExponent % 62);
385 else if (r80.sj64.uFraction == 0 && bType < 6)
386 r80.sj64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT80U_FRACTION_BITS) - 1);
387 r80.sj64.uExponent = 0;
388 r80.sj64.fInteger = bType >= 6;
389 AssertMsg(bType >= 6 || RTFLOAT80U_IS_DENORMAL(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
390 AssertMsg(bType < 6 || RTFLOAT80U_IS_PSEUDO_DENORMAL(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
391 }
392 else if (bType == 8 || bType == 9)
393 {
394 /* Pseudo NaN. */
395 if (bType & 1)
396 SafeR80FractionShift(&r80, r80.sj64.uExponent % 62);
397 else if (r80.sj64.uFraction == 0 && !r80.sj64.fInteger)
398 r80.sj64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT80U_FRACTION_BITS) - 1);
399 r80.sj64.uExponent = 0x7fff;
400 if (r80.sj64.fInteger)
401 r80.sj64.uFraction |= RT_BIT_64(62);
402 else
403 r80.sj64.uFraction &= ~RT_BIT_64(62);
404 r80.sj64.fInteger = 0;
405 AssertMsg(RTFLOAT80U_IS_PSEUDO_NAN(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
406 AssertMsg(RTFLOAT80U_IS_NAN(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
407 Assert(RTFLOAT80U_IS_387_INVALID(&r80));
408 }
409 else if (bType == 10 || bType == 11 || bType == 12 || bType == 13)
410 {
411 /* Quiet and signalling NaNs. */
412 if (bType & 1)
413 SafeR80FractionShift(&r80, r80.sj64.uExponent % 62);
414 else if (r80.sj64.uFraction == 0)
415 r80.sj64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT80U_FRACTION_BITS) - 1);
416 r80.sj64.uExponent = 0x7fff;
417 if (bType < 12)
418 r80.sj64.uFraction |= RT_BIT_64(62); /* quiet */
419 else
420 r80.sj64.uFraction &= ~RT_BIT_64(62); /* signaling */
421 r80.sj64.fInteger = 1;
422 AssertMsg(bType >= 12 || RTFLOAT80U_IS_QUIET_NAN(&r80), ("%s\n", FormatR80(&r80)));
423 AssertMsg(bType < 12 || RTFLOAT80U_IS_SIGNALLING_NAN(&r80), ("%s\n", FormatR80(&r80)));
424 AssertMsg(RTFLOAT80U_IS_SIGNALLING_NAN(&r80) || RTFLOAT80U_IS_QUIET_NAN(&r80), ("%s\n", FormatR80(&r80)));
425 AssertMsg(RTFLOAT80U_IS_QUIET_OR_SIGNALLING_NAN(&r80), ("%s\n", FormatR80(&r80)));
426 AssertMsg(RTFLOAT80U_IS_NAN(&r80), ("%s\n", FormatR80(&r80)));
427 }
428 else if (bType == 14 || bType == 15)
429 {
430 /* Unnormals */
431 if (bType & 1)
432 SafeR80FractionShift(&r80, RandU8() % 62);
433 r80.sj64.fInteger = 0;
434 if (r80.sj64.uExponent == RTFLOAT80U_EXP_MAX || r80.sj64.uExponent == 0)
435 r80.sj64.uExponent = (uint16_t)RTRandU32Ex(1, RTFLOAT80U_EXP_MAX - 1);
436 AssertMsg(RTFLOAT80U_IS_UNNORMAL(&r80), ("%s\n", FormatR80(&r80)));
437 Assert(RTFLOAT80U_IS_387_INVALID(&r80));
438 }
439 else if (bType < 26)
440 {
441 /* Make sure we have lots of normalized values. */
442 if (!fIntTarget)
443 {
444 const unsigned uMinExp = cTarget == 64 ? RTFLOAT80U_EXP_BIAS - RTFLOAT64U_EXP_BIAS
445 : cTarget == 32 ? RTFLOAT80U_EXP_BIAS - RTFLOAT32U_EXP_BIAS : 0;
446 const unsigned uMaxExp = cTarget == 64 ? uMinExp + RTFLOAT64U_EXP_MAX
447 : cTarget == 32 ? uMinExp + RTFLOAT32U_EXP_MAX : RTFLOAT80U_EXP_MAX;
448 r80.sj64.fInteger = 1;
449 if (r80.sj64.uExponent <= uMinExp)
450 r80.sj64.uExponent = uMinExp + 1;
451 else if (r80.sj64.uExponent >= uMaxExp)
452 r80.sj64.uExponent = uMaxExp - 1;
453
454 if (bType == 16)
455 { /* All 1s is useful to testing rounding. Also try trigger special
456 behaviour by sometimes rounding out of range, while we're at it. */
457 r80.sj64.uFraction = RT_BIT_64(63) - 1;
458 uint8_t bExp = RandU8();
459 if ((bExp & 3) == 0)
460 r80.sj64.uExponent = uMaxExp - 1;
461 else if ((bExp & 3) == 1)
462 r80.sj64.uExponent = uMinExp + 1;
463 else if ((bExp & 3) == 2)
464 r80.sj64.uExponent = uMinExp - (bExp & 15); /* (small numbers are mapped to subnormal values) */
465 }
466 }
467 else
468 {
469 /* integer target: */
470 const unsigned uMinExp = RTFLOAT80U_EXP_BIAS;
471 const unsigned uMaxExp = RTFLOAT80U_EXP_BIAS + cTarget - 2;
472 r80.sj64.fInteger = 1;
473 if (r80.sj64.uExponent < uMinExp)
474 r80.sj64.uExponent = uMinExp;
475 else if (r80.sj64.uExponent > uMaxExp)
476 r80.sj64.uExponent = uMaxExp;
477
478 if (bType == 16)
479 { /* All 1s is useful to testing rounding. Also try trigger special
480 behaviour by sometimes rounding out of range, while we're at it. */
481 r80.sj64.uFraction = RT_BIT_64(63) - 1;
482 uint8_t bExp = RandU8();
483 if ((bExp & 3) == 0)
484 r80.sj64.uExponent = uMaxExp;
485 else if ((bExp & 3) == 1)
486 r80.sj64.uFraction &= ~(RT_BIT_64(cTarget - 1 - r80.sj64.uExponent) - 1); /* no rounding */
487 }
488 }
489
490 AssertMsg(RTFLOAT80U_IS_NORMAL(&r80), ("%s\n", FormatR80(&r80)));
491 }
492 return r80;
493}
494
495
496static RTFLOAT80U RandR80(unsigned cTarget = 80, bool fIntTarget = false)
497{
498 /*
499 * Make it more likely that we get a good selection of special values.
500 */
501 return RandR80Ex(RandU8(), cTarget, fIntTarget);
502
503}
504
505
506static RTFLOAT80U RandR80Src(uint32_t iTest, unsigned cTarget = 80, bool fIntTarget = false)
507{
508 /* Make sure we cover all the basic types first before going for random selection: */
509 if (iTest <= 18)
510 return RandR80Ex(18 - iTest, cTarget, fIntTarget); /* Starting with 3 normals. */
511 return RandR80(cTarget, fIntTarget);
512}
513
514
515/**
516 * Helper for RandR80Src1 and RandR80Src2 that converts bType from a 0..11 range
517 * to a 0..17, covering all basic value types.
518 */
519static uint8_t RandR80Src12RemapType(uint8_t bType)
520{
521 switch (bType)
522 {
523 case 0: return 18; /* normal */
524 case 1: return 16; /* normal extreme rounding */
525 case 2: return 14; /* unnormal */
526 case 3: return 12; /* Signalling NaN */
527 case 4: return 10; /* Quiet NaN */
528 case 5: return 8; /* PseudoNaN */
529 case 6: return 6; /* Pseudo Denormal */
530 case 7: return 4; /* Denormal */
531 case 8: return 3; /* Indefinite */
532 case 9: return 2; /* Infinity */
533 case 10: return 1; /* Pseudo-Infinity */
534 case 11: return 0; /* Zero */
535 default: AssertFailedReturn(18);
536 }
537}
538
539
540/**
541 * This works in tandem with RandR80Src2 to make sure we cover all operand
542 * type mixes first before we venture into regular random testing.
543 *
544 * There are 11 basic variations, when we leave out the five odd ones using
545 * SafeR80FractionShift. Because of the special normalized value targetting at
546 * rounding, we make it an even 12. So 144 combinations for two operands.
547 */
548static RTFLOAT80U RandR80Src1(uint32_t iTest, unsigned cPartnerBits = 80, bool fPartnerInt = false)
549{
550 if (cPartnerBits == 80)
551 {
552 Assert(!fPartnerInt);
553 if (iTest < 12 * 12)
554 return RandR80Ex(RandR80Src12RemapType(iTest / 12));
555 }
556 else if ((cPartnerBits == 64 || cPartnerBits == 32) && !fPartnerInt)
557 {
558 if (iTest < 12 * 10)
559 return RandR80Ex(RandR80Src12RemapType(iTest / 10));
560 }
561 else if (iTest < 18 * 4 && fPartnerInt)
562 return RandR80Ex(iTest / 4);
563 return RandR80();
564}
565
566
567/** Partner to RandR80Src1. */
568static RTFLOAT80U RandR80Src2(uint32_t iTest)
569{
570 if (iTest < 12 * 12)
571 return RandR80Ex(RandR80Src12RemapType(iTest % 12));
572 return RandR80();
573}
574
575
576static void SafeR64FractionShift(PRTFLOAT64U pr64, uint8_t cShift)
577{
578 if (pr64->s64.uFraction >= RT_BIT_64(cShift))
579 pr64->s64.uFraction >>= cShift;
580 else
581 pr64->s64.uFraction = (cShift % 19) + 1;
582}
583
584
585static RTFLOAT64U RandR64Ex(uint8_t bType)
586{
587 RTFLOAT64U r64;
588 r64.u = RandU64();
589
590 /*
591 * Make it more likely that we get a good selection of special values.
592 * On average 6 out of 16 calls should return a special value.
593 */
594 bType &= 0xf;
595 if (bType == 0 || bType == 1)
596 {
597 /* 0 or Infinity. We only keep fSign here. */
598 r64.s.uExponent = bType == 0 ? 0 : 0x7ff;
599 r64.s.uFractionHigh = 0;
600 r64.s.uFractionLow = 0;
601 AssertMsg(bType != 0 || RTFLOAT64U_IS_ZERO(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
602 AssertMsg(bType != 1 || RTFLOAT64U_IS_INF(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
603 }
604 else if (bType == 2 || bType == 3)
605 {
606 /* Subnormals */
607 if (bType == 3)
608 SafeR64FractionShift(&r64, r64.s64.uExponent % 51);
609 else if (r64.s64.uFraction == 0)
610 r64.s64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT64U_FRACTION_BITS) - 1);
611 r64.s64.uExponent = 0;
612 AssertMsg(RTFLOAT64U_IS_SUBNORMAL(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
613 }
614 else if (bType == 4 || bType == 5 || bType == 6 || bType == 7)
615 {
616 /* NaNs */
617 if (bType & 1)
618 SafeR64FractionShift(&r64, r64.s64.uExponent % 51);
619 else if (r64.s64.uFraction == 0)
620 r64.s64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT64U_FRACTION_BITS) - 1);
621 r64.s64.uExponent = 0x7ff;
622 if (bType < 6)
623 r64.s64.uFraction |= RT_BIT_64(RTFLOAT64U_FRACTION_BITS - 1); /* quiet */
624 else
625 r64.s64.uFraction &= ~RT_BIT_64(RTFLOAT64U_FRACTION_BITS - 1); /* signalling */
626 AssertMsg(bType >= 6 || RTFLOAT64U_IS_QUIET_NAN(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
627 AssertMsg(bType < 6 || RTFLOAT64U_IS_SIGNALLING_NAN(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
628 AssertMsg(RTFLOAT64U_IS_NAN(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
629 }
630 else if (bType < 12)
631 {
632 /* Make sure we have lots of normalized values. */
633 if (r64.s.uExponent == 0)
634 r64.s.uExponent = 1;
635 else if (r64.s.uExponent == 0x7ff)
636 r64.s.uExponent = 0x7fe;
637 AssertMsg(RTFLOAT64U_IS_NORMAL(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
638 }
639 return r64;
640}
641
642
643static RTFLOAT64U RandR64Src(uint32_t iTest)
644{
645 if (iTest < 16)
646 return RandR64Ex(iTest);
647 return RandR64Ex(RandU8());
648}
649
650
651/** Pairing with a 80-bit floating point arg. */
652static RTFLOAT64U RandR64Src2(uint32_t iTest)
653{
654 if (iTest < 12 * 10)
655 return RandR64Ex(9 - iTest % 10); /* start with normal values */
656 return RandR64Ex(RandU8());
657}
658
659
660static void SafeR32FractionShift(PRTFLOAT32U pr32, uint8_t cShift)
661{
662 if (pr32->s.uFraction >= RT_BIT_32(cShift))
663 pr32->s.uFraction >>= cShift;
664 else
665 pr32->s.uFraction = (cShift % 19) + 1;
666}
667
668
669static RTFLOAT32U RandR32Ex(uint8_t bType)
670{
671 RTFLOAT32U r32;
672 r32.u = RandU32();
673
674 /*
675 * Make it more likely that we get a good selection of special values.
676 * On average 6 out of 16 calls should return a special value.
677 */
678 bType &= 0xf;
679 if (bType == 0 || bType == 1)
680 {
681 /* 0 or Infinity. We only keep fSign here. */
682 r32.s.uExponent = bType == 0 ? 0 : 0xff;
683 r32.s.uFraction = 0;
684 AssertMsg(bType != 0 || RTFLOAT32U_IS_ZERO(&r32), ("%s\n", FormatR32(&r32)));
685 AssertMsg(bType != 1 || RTFLOAT32U_IS_INF(&r32), ("%s\n", FormatR32(&r32)));
686 }
687 else if (bType == 2 || bType == 3)
688 {
689 /* Subnormals */
690 if (bType == 3)
691 SafeR32FractionShift(&r32, r32.s.uExponent % 22);
692 else if (r32.s.uFraction == 0)
693 r32.s.uFraction = RTRandU32Ex(1, RT_BIT_32(RTFLOAT32U_FRACTION_BITS) - 1);
694 r32.s.uExponent = 0;
695 AssertMsg(RTFLOAT32U_IS_SUBNORMAL(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
696 }
697 else if (bType == 4 || bType == 5 || bType == 6 || bType == 7)
698 {
699 /* NaNs */
700 if (bType & 1)
701 SafeR32FractionShift(&r32, r32.s.uExponent % 22);
702 else if (r32.s.uFraction == 0)
703 r32.s.uFraction = RTRandU32Ex(1, RT_BIT_32(RTFLOAT32U_FRACTION_BITS) - 1);
704 r32.s.uExponent = 0xff;
705 if (bType < 6)
706 r32.s.uFraction |= RT_BIT_32(RTFLOAT32U_FRACTION_BITS - 1); /* quiet */
707 else
708 r32.s.uFraction &= ~RT_BIT_32(RTFLOAT32U_FRACTION_BITS - 1); /* signalling */
709 AssertMsg(bType >= 6 || RTFLOAT32U_IS_QUIET_NAN(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
710 AssertMsg(bType < 6 || RTFLOAT32U_IS_SIGNALLING_NAN(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
711 AssertMsg(RTFLOAT32U_IS_NAN(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
712 }
713 else if (bType < 12)
714 {
715 /* Make sure we have lots of normalized values. */
716 if (r32.s.uExponent == 0)
717 r32.s.uExponent = 1;
718 else if (r32.s.uExponent == 0xff)
719 r32.s.uExponent = 0xfe;
720 AssertMsg(RTFLOAT32U_IS_NORMAL(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
721 }
722 return r32;
723}
724
725
726static RTFLOAT32U RandR32Src(uint32_t iTest)
727{
728 if (iTest < 16)
729 return RandR32Ex(iTest);
730 return RandR32Ex(RandU8());
731}
732
733
734/** Pairing with a 80-bit floating point arg. */
735static RTFLOAT32U RandR32Src2(uint32_t iTest)
736{
737 if (iTest < 12 * 10)
738 return RandR32Ex(9 - iTest % 10); /* start with normal values */
739 return RandR32Ex(RandU8());
740}
741
742
743static RTPBCD80U RandD80Src(uint32_t iTest)
744{
745 if (iTest < 3)
746 {
747 RTPBCD80U d80Zero = RTPBCD80U_INIT_ZERO(!(iTest & 1));
748 return d80Zero;
749 }
750 if (iTest < 5)
751 {
752 RTPBCD80U d80Ind = RTPBCD80U_INIT_INDEFINITE();
753 return d80Ind;
754 }
755
756 RTPBCD80U d80;
757 uint8_t b = RandU8();
758 d80.s.fSign = b & 1;
759
760 if ((iTest & 7) >= 6)
761 {
762 /* Illegal */
763 d80.s.uPad = (iTest & 7) == 7 ? b >> 1 : 0;
764 for (size_t iPair = 0; iPair < RT_ELEMENTS(d80.s.abPairs); iPair++)
765 d80.s.abPairs[iPair] = RandU8();
766 }
767 else
768 {
769 /* Normal */
770 d80.s.uPad = 0;
771 for (size_t iPair = 0; iPair < RT_ELEMENTS(d80.s.abPairs); iPair++)
772 {
773 uint8_t const uLo = (uint8_t)RTRandU32Ex(0, 9);
774 uint8_t const uHi = (uint8_t)RTRandU32Ex(0, 9);
775 d80.s.abPairs[iPair] = RTPBCD80U_MAKE_PAIR(uHi, uLo);
776 }
777 }
778 return d80;
779}
780
781
782static const char *GenFormatR80(PCRTFLOAT80U plrd)
783{
784 if (RTFLOAT80U_IS_ZERO(plrd))
785 return plrd->s.fSign ? "RTFLOAT80U_INIT_ZERO(1)" : "RTFLOAT80U_INIT_ZERO(0)";
786 if (RTFLOAT80U_IS_INF(plrd))
787 return plrd->s.fSign ? "RTFLOAT80U_INIT_INF(1)" : "RTFLOAT80U_INIT_INF(0)";
788 if (RTFLOAT80U_IS_INDEFINITE(plrd))
789 return plrd->s.fSign ? "RTFLOAT80U_INIT_IND(1)" : "RTFLOAT80U_INIT_IND(0)";
790 if (RTFLOAT80U_IS_QUIET_NAN(plrd) && (plrd->s.uMantissa & (RT_BIT_64(62) - 1)) == 1)
791 return plrd->s.fSign ? "RTFLOAT80U_INIT_QNAN(1)" : "RTFLOAT80U_INIT_QNAN(0)";
792 if (RTFLOAT80U_IS_SIGNALLING_NAN(plrd) && (plrd->s.uMantissa & (RT_BIT_64(62) - 1)) == 1)
793 return plrd->s.fSign ? "RTFLOAT80U_INIT_SNAN(1)" : "RTFLOAT80U_INIT_SNAN(0)";
794
795 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
796 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTFLOAT80U_INIT_C(%d,%#RX64,%u)",
797 plrd->s.fSign, plrd->s.uMantissa, plrd->s.uExponent);
798 return pszBuf;
799}
800
801static const char *GenFormatR64(PCRTFLOAT64U prd)
802{
803 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
804 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTFLOAT64U_INIT_C(%d,%#RX64,%u)",
805 prd->s.fSign, RT_MAKE_U64(prd->s.uFractionLow, prd->s.uFractionHigh), prd->s.uExponent);
806 return pszBuf;
807}
808
809
810static const char *GenFormatR32(PCRTFLOAT32U pr)
811{
812 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
813 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTFLOAT32U_INIT_C(%d,%#RX32,%u)", pr->s.fSign, pr->s.uFraction, pr->s.uExponent);
814 return pszBuf;
815}
816
817
818static const char *GenFormatD80(PCRTPBCD80U pd80)
819{
820 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
821 size_t off;
822 if (pd80->s.uPad == 0)
823 off = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTPBCD80U_INIT_C(%d", pd80->s.fSign);
824 else
825 off = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTPBCD80U_INIT_EX_C(%#x,%d", pd80->s.uPad, pd80->s.fSign);
826 size_t iPair = RT_ELEMENTS(pd80->s.abPairs);
827 while (iPair-- > 0)
828 off += RTStrPrintf(&pszBuf[off], sizeof(g_aszBuf[0]) - off, ",%d,%d",
829 RTPBCD80U_HI_DIGIT(pd80->s.abPairs[iPair]),
830 RTPBCD80U_LO_DIGIT(pd80->s.abPairs[iPair]));
831 pszBuf[off++] = ')';
832 pszBuf[off++] = '\0';
833 return pszBuf;
834}
835
836
837static const char *GenFormatI64(int64_t i64)
838{
839 if (i64 == INT64_MIN) /* This one is problematic */
840 return "INT64_MIN";
841 if (i64 == INT64_MAX)
842 return "INT64_MAX";
843 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
844 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "INT64_C(%RI64)", i64);
845 return pszBuf;
846}
847
848#if 0 /* unused */
849static const char *GenFormatI64(int64_t const *pi64)
850{
851 return GenFormatI64(*pi64);
852}
853#endif
854
855static const char *GenFormatI32(int32_t i32)
856{
857 if (i32 == INT32_MIN) /* This one is problematic */
858 return "INT32_MIN";
859 if (i32 == INT32_MAX)
860 return "INT32_MAX";
861 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
862 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "INT32_C(%RI32)", i32);
863 return pszBuf;
864}
865
866
867const char *GenFormatI32(int32_t const *pi32)
868{
869 return GenFormatI32(*pi32);
870}
871
872
873const char *GenFormatI16(int16_t i16)
874{
875 if (i16 == INT16_MIN) /* This one is problematic */
876 return "INT16_MIN";
877 if (i16 == INT16_MAX)
878 return "INT16_MAX";
879 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
880 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "INT16_C(%RI16)", i16);
881 return pszBuf;
882}
883
884
885const char *GenFormatI16(int16_t const *pi16)
886{
887 return GenFormatI16(*pi16);
888}
889
890
891static void GenerateHeader(PRTSTREAM pOut, const char *pszCpuDesc, const char *pszCpuType)
892{
893 /* We want to tag the generated source code with the revision that produced it. */
894 static char s_szRev[] = "$Revision: 103003 $";
895 const char *pszRev = RTStrStripL(strchr(s_szRev, ':') + 1);
896 size_t cchRev = 0;
897 while (RT_C_IS_DIGIT(pszRev[cchRev]))
898 cchRev++;
899
900 RTStrmPrintf(pOut,
901 "/* $Id: tstIEMAImpl.cpp 103003 2024-01-23 16:19:17Z vboxsync $ */\n"
902 "/** @file\n"
903 " * IEM Assembly Instruction Helper Testcase Data%s%s - r%.*s on %s.\n"
904 " */\n"
905 "\n"
906 "/*\n"
907 " * Copyright (C) 2022-" VBOX_C_YEAR " Oracle and/or its affiliates.\n"
908 " *\n"
909 " * This file is part of VirtualBox base platform packages, as\n"
910 " * available from https://www.virtualbox.org.\n"
911 " *\n"
912 " * This program is free software; you can redistribute it and/or\n"
913 " * modify it under the terms of the GNU General Public License\n"
914 " * as published by the Free Software Foundation, in version 3 of the\n"
915 " * License.\n"
916 " *\n"
917 " * This program is distributed in the hope that it will be useful, but\n"
918 " * WITHOUT ANY WARRANTY; without even the implied warranty of\n"
919 " * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\n"
920 " * General Public License for more details.\n"
921 " *\n"
922 " * You should have received a copy of the GNU General Public License\n"
923 " * along with this program; if not, see <https://www.gnu.org/licenses>.\n"
924 " *\n"
925 " * SPDX-License-Identifier: GPL-3.0-only\n"
926 " */\n"
927 "\n"
928 "#include \"tstIEMAImpl.h\"\n"
929 "\n"
930 ,
931 pszCpuType ? " " : "", pszCpuType ? pszCpuType : "", cchRev, pszRev, pszCpuDesc);
932}
933
934
935static PRTSTREAM GenerateOpenWithHdr(const char *pszFilename, const char *pszCpuDesc, const char *pszCpuType)
936{
937 PRTSTREAM pOut = NULL;
938 int rc = RTStrmOpen(pszFilename, "w", &pOut);
939 if (RT_SUCCESS(rc))
940 {
941 GenerateHeader(pOut, pszCpuDesc, pszCpuType);
942 return pOut;
943 }
944 RTMsgError("Failed to open %s for writing: %Rrc", pszFilename, rc);
945 return NULL;
946}
947
948
949static RTEXITCODE GenerateFooterAndClose(PRTSTREAM pOut, const char *pszFilename, RTEXITCODE rcExit)
950{
951 RTStrmPrintf(pOut,
952 "\n"
953 "/* end of file */\n");
954 int rc = RTStrmClose(pOut);
955 if (RT_SUCCESS(rc))
956 return rcExit;
957 return RTMsgErrorExitFailure("RTStrmClose failed on %s: %Rrc", pszFilename, rc);
958}
959
960
961static void GenerateArrayStart(PRTSTREAM pOut, const char *pszName, const char *pszType)
962{
963 RTStrmPrintf(pOut, "%s const g_aTests_%s[] =\n{\n", pszType, pszName);
964}
965
966
967static void GenerateArrayEnd(PRTSTREAM pOut, const char *pszName)
968{
969 RTStrmPrintf(pOut,
970 "};\n"
971 "uint32_t const g_cTests_%s = RT_ELEMENTS(g_aTests_%s);\n"
972 "\n",
973 pszName, pszName);
974}
975
976#endif /* TSTIEMAIMPL_WITH_GENERATOR */
977
978
979/*
980 * Test helpers.
981 */
982static bool IsTestEnabled(const char *pszName)
983{
984 /* Process excludes first: */
985 uint32_t i = g_cExcludeTestPatterns;
986 while (i-- > 0)
987 if (RTStrSimplePatternMultiMatch(g_apszExcludeTestPatterns[i], RTSTR_MAX, pszName, RTSTR_MAX, NULL))
988 return false;
989
990 /* If no include patterns, everything is included: */
991 i = g_cIncludeTestPatterns;
992 if (!i)
993 return true;
994
995 /* Otherwise only tests in the include patters gets tested: */
996 while (i-- > 0)
997 if (RTStrSimplePatternMultiMatch(g_apszIncludeTestPatterns[i], RTSTR_MAX, pszName, RTSTR_MAX, NULL))
998 return true;
999
1000 return false;
1001}
1002
1003
1004static bool SubTestAndCheckIfEnabled(const char *pszName)
1005{
1006 RTTestSub(g_hTest, pszName);
1007 if (IsTestEnabled(pszName))
1008 return true;
1009 RTTestSkipped(g_hTest, g_cVerbosity > 0 ? "excluded" : NULL);
1010 return false;
1011}
1012
1013
1014static const char *EFlagsDiff(uint32_t fActual, uint32_t fExpected)
1015{
1016 if (fActual == fExpected)
1017 return "";
1018
1019 uint32_t const fXor = fActual ^ fExpected;
1020 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1021 size_t cch = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), " - %#x", fXor);
1022
1023 static struct
1024 {
1025 const char *pszName;
1026 uint32_t fFlag;
1027 } const s_aFlags[] =
1028 {
1029#define EFL_ENTRY(a_Flags) { #a_Flags, X86_EFL_ ## a_Flags }
1030 EFL_ENTRY(CF),
1031 EFL_ENTRY(PF),
1032 EFL_ENTRY(AF),
1033 EFL_ENTRY(ZF),
1034 EFL_ENTRY(SF),
1035 EFL_ENTRY(TF),
1036 EFL_ENTRY(IF),
1037 EFL_ENTRY(DF),
1038 EFL_ENTRY(OF),
1039 EFL_ENTRY(IOPL),
1040 EFL_ENTRY(NT),
1041 EFL_ENTRY(RF),
1042 EFL_ENTRY(VM),
1043 EFL_ENTRY(AC),
1044 EFL_ENTRY(VIF),
1045 EFL_ENTRY(VIP),
1046 EFL_ENTRY(ID),
1047 };
1048 for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1049 if (s_aFlags[i].fFlag & fXor)
1050 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch,
1051 s_aFlags[i].fFlag & fActual ? "/%s" : "/!%s", s_aFlags[i].pszName);
1052 RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
1053 return pszBuf;
1054}
1055
1056
1057static const char *FswDiff(uint16_t fActual, uint16_t fExpected)
1058{
1059 if (fActual == fExpected)
1060 return "";
1061
1062 uint16_t const fXor = fActual ^ fExpected;
1063 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1064 size_t cch = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), " - %#x", fXor);
1065
1066 static struct
1067 {
1068 const char *pszName;
1069 uint32_t fFlag;
1070 } const s_aFlags[] =
1071 {
1072#define FSW_ENTRY(a_Flags) { #a_Flags, X86_FSW_ ## a_Flags }
1073 FSW_ENTRY(IE),
1074 FSW_ENTRY(DE),
1075 FSW_ENTRY(ZE),
1076 FSW_ENTRY(OE),
1077 FSW_ENTRY(UE),
1078 FSW_ENTRY(PE),
1079 FSW_ENTRY(SF),
1080 FSW_ENTRY(ES),
1081 FSW_ENTRY(C0),
1082 FSW_ENTRY(C1),
1083 FSW_ENTRY(C2),
1084 FSW_ENTRY(C3),
1085 FSW_ENTRY(B),
1086 };
1087 for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1088 if (s_aFlags[i].fFlag & fXor)
1089 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch,
1090 s_aFlags[i].fFlag & fActual ? "/%s" : "/!%s", s_aFlags[i].pszName);
1091 if (fXor & X86_FSW_TOP_MASK)
1092 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "/TOP%u!%u",
1093 X86_FSW_TOP_GET(fActual), X86_FSW_TOP_GET(fExpected));
1094#if 0 /* For debugging fprem & fprem1 */
1095 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, " - Q=%d (vs %d)",
1096 X86_FSW_CX_TO_QUOTIENT(fActual), X86_FSW_CX_TO_QUOTIENT(fExpected));
1097#endif
1098 RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
1099 return pszBuf;
1100}
1101
1102
1103static const char *MxcsrDiff(uint32_t fActual, uint32_t fExpected)
1104{
1105 if (fActual == fExpected)
1106 return "";
1107
1108 uint16_t const fXor = fActual ^ fExpected;
1109 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1110 size_t cch = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), " - %#x", fXor);
1111
1112 static struct
1113 {
1114 const char *pszName;
1115 uint32_t fFlag;
1116 } const s_aFlags[] =
1117 {
1118#define MXCSR_ENTRY(a_Flags) { #a_Flags, X86_MXCSR_ ## a_Flags }
1119 MXCSR_ENTRY(IE),
1120 MXCSR_ENTRY(DE),
1121 MXCSR_ENTRY(ZE),
1122 MXCSR_ENTRY(OE),
1123 MXCSR_ENTRY(UE),
1124 MXCSR_ENTRY(PE),
1125
1126 MXCSR_ENTRY(IM),
1127 MXCSR_ENTRY(DM),
1128 MXCSR_ENTRY(ZM),
1129 MXCSR_ENTRY(OM),
1130 MXCSR_ENTRY(UM),
1131 MXCSR_ENTRY(PM),
1132
1133 MXCSR_ENTRY(DAZ),
1134 MXCSR_ENTRY(FZ),
1135#undef MXCSR_ENTRY
1136 };
1137 for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1138 if (s_aFlags[i].fFlag & fXor)
1139 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch,
1140 s_aFlags[i].fFlag & fActual ? "/%s" : "/!%s", s_aFlags[i].pszName);
1141 RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
1142 return pszBuf;
1143}
1144
1145
1146static const char *FormatFcw(uint16_t fFcw)
1147{
1148 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1149
1150 const char *pszPC = NULL; /* (msc+gcc are too stupid) */
1151 switch (fFcw & X86_FCW_PC_MASK)
1152 {
1153 case X86_FCW_PC_24: pszPC = "PC24"; break;
1154 case X86_FCW_PC_RSVD: pszPC = "PCRSVD!"; break;
1155 case X86_FCW_PC_53: pszPC = "PC53"; break;
1156 case X86_FCW_PC_64: pszPC = "PC64"; break;
1157 }
1158
1159 const char *pszRC = NULL; /* (msc+gcc are too stupid) */
1160 switch (fFcw & X86_FCW_RC_MASK)
1161 {
1162 case X86_FCW_RC_NEAREST: pszRC = "NEAR"; break;
1163 case X86_FCW_RC_DOWN: pszRC = "DOWN"; break;
1164 case X86_FCW_RC_UP: pszRC = "UP"; break;
1165 case X86_FCW_RC_ZERO: pszRC = "ZERO"; break;
1166 }
1167 size_t cch = RTStrPrintf(&pszBuf[0], sizeof(g_aszBuf[0]), "%s %s", pszPC, pszRC);
1168
1169 static struct
1170 {
1171 const char *pszName;
1172 uint32_t fFlag;
1173 } const s_aFlags[] =
1174 {
1175#define FCW_ENTRY(a_Flags) { #a_Flags, X86_FCW_ ## a_Flags }
1176 FCW_ENTRY(IM),
1177 FCW_ENTRY(DM),
1178 FCW_ENTRY(ZM),
1179 FCW_ENTRY(OM),
1180 FCW_ENTRY(UM),
1181 FCW_ENTRY(PM),
1182 { "6M", 64 },
1183 };
1184 for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1185 if (fFcw & s_aFlags[i].fFlag)
1186 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, " %s", s_aFlags[i].pszName);
1187
1188 RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
1189 return pszBuf;
1190}
1191
1192
1193static const char *FormatMxcsr(uint32_t fMxcsr)
1194{
1195 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1196
1197 const char *pszRC = NULL; /* (msc+gcc are too stupid) */
1198 switch (fMxcsr & X86_MXCSR_RC_MASK)
1199 {
1200 case X86_MXCSR_RC_NEAREST: pszRC = "NEAR"; break;
1201 case X86_MXCSR_RC_DOWN: pszRC = "DOWN"; break;
1202 case X86_MXCSR_RC_UP: pszRC = "UP"; break;
1203 case X86_MXCSR_RC_ZERO: pszRC = "ZERO"; break;
1204 }
1205
1206 const char *pszDAZ = fMxcsr & X86_MXCSR_DAZ ? " DAZ" : "";
1207 const char *pszFZ = fMxcsr & X86_MXCSR_FZ ? " FZ" : "";
1208 size_t cch = RTStrPrintf(&pszBuf[0], sizeof(g_aszBuf[0]), "%s%s%s", pszRC, pszDAZ, pszFZ);
1209
1210 static struct
1211 {
1212 const char *pszName;
1213 uint32_t fFlag;
1214 } const s_aFlags[] =
1215 {
1216#define MXCSR_ENTRY(a_Flags) { #a_Flags, X86_MXCSR_ ## a_Flags }
1217 MXCSR_ENTRY(IE),
1218 MXCSR_ENTRY(DE),
1219 MXCSR_ENTRY(ZE),
1220 MXCSR_ENTRY(OE),
1221 MXCSR_ENTRY(UE),
1222 MXCSR_ENTRY(PE),
1223
1224 MXCSR_ENTRY(IM),
1225 MXCSR_ENTRY(DM),
1226 MXCSR_ENTRY(ZM),
1227 MXCSR_ENTRY(OM),
1228 MXCSR_ENTRY(UM),
1229 MXCSR_ENTRY(PM),
1230 { "6M", 64 },
1231 };
1232 for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1233 if (fMxcsr & s_aFlags[i].fFlag)
1234 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, " %s", s_aFlags[i].pszName);
1235
1236 RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
1237 return pszBuf;
1238}
1239
1240
1241static const char *FormatR80(PCRTFLOAT80U pr80)
1242{
1243 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1244 RTStrFormatR80(pszBuf, sizeof(g_aszBuf[0]), pr80, 0, 0, RTSTR_F_SPECIAL);
1245 return pszBuf;
1246}
1247
1248
1249static const char *FormatR64(PCRTFLOAT64U pr64)
1250{
1251 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1252 RTStrFormatR64(pszBuf, sizeof(g_aszBuf[0]), pr64, 0, 0, RTSTR_F_SPECIAL);
1253 return pszBuf;
1254}
1255
1256
1257static const char *FormatR32(PCRTFLOAT32U pr32)
1258{
1259 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1260 RTStrFormatR32(pszBuf, sizeof(g_aszBuf[0]), pr32, 0, 0, RTSTR_F_SPECIAL);
1261 return pszBuf;
1262}
1263
1264
1265static const char *FormatD80(PCRTPBCD80U pd80)
1266{
1267 /* There is only one indefinite endcoding (same as for 80-bit
1268 floating point), so get it out of the way first: */
1269 if (RTPBCD80U_IS_INDEFINITE(pd80))
1270 return "Ind";
1271
1272 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1273 size_t off = 0;
1274 pszBuf[off++] = pd80->s.fSign ? '-' : '+';
1275 unsigned cBadDigits = 0;
1276 size_t iPair = RT_ELEMENTS(pd80->s.abPairs);
1277 while (iPair-- > 0)
1278 {
1279 static const char s_szDigits[] = "0123456789abcdef";
1280 static const uint8_t s_bBadDigits[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1 };
1281 pszBuf[off++] = s_szDigits[RTPBCD80U_HI_DIGIT(pd80->s.abPairs[iPair])];
1282 pszBuf[off++] = s_szDigits[RTPBCD80U_LO_DIGIT(pd80->s.abPairs[iPair])];
1283 cBadDigits += s_bBadDigits[RTPBCD80U_HI_DIGIT(pd80->s.abPairs[iPair])]
1284 + s_bBadDigits[RTPBCD80U_LO_DIGIT(pd80->s.abPairs[iPair])];
1285 }
1286 if (cBadDigits || pd80->s.uPad != 0)
1287 off += RTStrPrintf(&pszBuf[off], sizeof(g_aszBuf[0]) - off, "[%u,%#x]", cBadDigits, pd80->s.uPad);
1288 pszBuf[off] = '\0';
1289 return pszBuf;
1290}
1291
1292
1293#if 0
1294static const char *FormatI64(int64_t const *piVal)
1295{
1296 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1297 RTStrFormatU64(pszBuf, sizeof(g_aszBuf[0]), *piVal, 16, 0, 0, RTSTR_F_SPECIAL | RTSTR_F_VALSIGNED);
1298 return pszBuf;
1299}
1300#endif
1301
1302
1303static const char *FormatI32(int32_t const *piVal)
1304{
1305 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1306 RTStrFormatU32(pszBuf, sizeof(g_aszBuf[0]), *piVal, 16, 0, 0, RTSTR_F_SPECIAL | RTSTR_F_VALSIGNED);
1307 return pszBuf;
1308}
1309
1310
1311static const char *FormatI16(int16_t const *piVal)
1312{
1313 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1314 RTStrFormatU16(pszBuf, sizeof(g_aszBuf[0]), *piVal, 16, 0, 0, RTSTR_F_SPECIAL | RTSTR_F_VALSIGNED);
1315 return pszBuf;
1316}
1317
1318
1319static const char *FormatU128(PCRTUINT128U puVal)
1320{
1321 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1322 RTStrFormatU128(pszBuf, sizeof(g_aszBuf[0]), puVal, 16, 0, 0, RTSTR_F_SPECIAL);
1323 return pszBuf;
1324}
1325
1326
1327/*
1328 * Binary operations.
1329 */
1330TYPEDEF_SUBTEST_TYPE(BINU8_T, BINU8_TEST_T, PFNIEMAIMPLBINU8);
1331TYPEDEF_SUBTEST_TYPE(BINU16_T, BINU16_TEST_T, PFNIEMAIMPLBINU16);
1332TYPEDEF_SUBTEST_TYPE(BINU32_T, BINU32_TEST_T, PFNIEMAIMPLBINU32);
1333TYPEDEF_SUBTEST_TYPE(BINU64_T, BINU64_TEST_T, PFNIEMAIMPLBINU64);
1334
1335#ifdef TSTIEMAIMPL_WITH_GENERATOR
1336# define GEN_BINARY_TESTS(a_cBits, a_Fmt, a_TestType) \
1337static void BinU ## a_cBits ## Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests) \
1338{ \
1339 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aBinU ## a_cBits); iFn++) \
1340 { \
1341 PFNIEMAIMPLBINU ## a_cBits const pfn = g_aBinU ## a_cBits[iFn].pfnNative \
1342 ? g_aBinU ## a_cBits[iFn].pfnNative : g_aBinU ## a_cBits[iFn].pfn; \
1343 PRTSTREAM pOutFn = pOut; \
1344 if (g_aBinU ## a_cBits[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE) \
1345 { \
1346 if (g_aBinU ## a_cBits[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
1347 continue; \
1348 pOutFn = pOutCpu; \
1349 } \
1350 \
1351 GenerateArrayStart(pOutFn, g_aBinU ## a_cBits[iFn].pszName, #a_TestType); \
1352 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1353 { \
1354 a_TestType Test; \
1355 Test.fEflIn = RandEFlags(); \
1356 Test.fEflOut = Test.fEflIn; \
1357 Test.uDstIn = RandU ## a_cBits ## Dst(iTest); \
1358 Test.uDstOut = Test.uDstIn; \
1359 Test.uSrcIn = RandU ## a_cBits ## Src(iTest); \
1360 if (g_aBinU ## a_cBits[iFn].uExtra) \
1361 Test.uSrcIn &= a_cBits - 1; /* Restrict bit index according to operand width */ \
1362 Test.uMisc = 0; \
1363 pfn(&Test.uDstOut, Test.uSrcIn, &Test.fEflOut); \
1364 RTStrmPrintf(pOutFn, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", " a_Fmt ", %#x }, /* #%u */\n", \
1365 Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uSrcIn, Test.uMisc, iTest); \
1366 } \
1367 for (uint32_t iTest = 0; iTest < g_aBinU ## a_cBits[iFn].cFixedTests; iTest++ ) \
1368 { \
1369 a_TestType Test; \
1370 Test.fEflIn = g_aBinU ## a_cBits[iFn].paFixedTests[iTest].fEflIn == UINT32_MAX ? RandEFlags() \
1371 : g_aBinU ## a_cBits[iFn].paFixedTests[iTest].fEflIn; \
1372 Test.fEflOut = Test.fEflIn; \
1373 Test.uDstIn = g_aBinU ## a_cBits[iFn].paFixedTests[iTest].uDstIn; \
1374 Test.uDstOut = Test.uDstIn; \
1375 Test.uSrcIn = g_aBinU ## a_cBits[iFn].paFixedTests[iTest].uSrcIn; \
1376 Test.uMisc = g_aBinU ## a_cBits[iFn].paFixedTests[iTest].uMisc; \
1377 pfn(&Test.uDstOut, Test.uSrcIn, &Test.fEflOut); \
1378 RTStrmPrintf(pOutFn, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", " a_Fmt ", %#x }, /* fixed #%u */\n", \
1379 Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uSrcIn, Test.uMisc, iTest); \
1380 } \
1381 GenerateArrayEnd(pOutFn, g_aBinU ## a_cBits[iFn].pszName); \
1382 } \
1383}
1384#else
1385# define GEN_BINARY_TESTS(a_cBits, a_Fmt, a_TestType)
1386#endif
1387
1388
1389/** Based on a quick probe run, guess how long to run the benchmark. */
1390static uint32_t EstimateIterations(uint32_t cProbeIterations, uint64_t cNsProbe)
1391{
1392 uint64_t cPicoSecPerIteration = cNsProbe * 1000 / cProbeIterations;
1393 uint64_t cIterations = g_cPicoSecBenchmark / cPicoSecPerIteration;
1394 if (cIterations > _2G)
1395 return _2G;
1396 if (cIterations < _4K)
1397 return _4K;
1398 return RT_ALIGN_32((uint32_t)cIterations, _4K);
1399}
1400
1401
1402#define TEST_BINARY_OPS(a_cBits, a_uType, a_Fmt, a_TestType, a_aSubTests) \
1403GEN_BINARY_TESTS(a_cBits, a_Fmt, a_TestType) \
1404\
1405static uint64_t BinU ## a_cBits ## Bench(uint32_t cIterations, PFNIEMAIMPLBINU ## a_cBits pfn, a_TestType const *pEntry) \
1406{ \
1407 uint32_t const fEflIn = pEntry->fEflIn; \
1408 a_uType const uDstIn = pEntry->uDstIn; \
1409 a_uType const uSrcIn = pEntry->uSrcIn; \
1410 cIterations /= 4; \
1411 RTThreadYield(); \
1412 uint64_t const nsStart = RTTimeNanoTS(); \
1413 for (uint32_t i = 0; i < cIterations; i++) \
1414 { \
1415 uint32_t fBenchEfl = fEflIn; \
1416 a_uType uBenchDst = uDstIn; \
1417 pfn(&uBenchDst, uSrcIn, &fBenchEfl); \
1418 \
1419 fBenchEfl = fEflIn; \
1420 uBenchDst = uDstIn; \
1421 pfn(&uBenchDst, uSrcIn, &fBenchEfl); \
1422 \
1423 fBenchEfl = fEflIn; \
1424 uBenchDst = uDstIn; \
1425 pfn(&uBenchDst, uSrcIn, &fBenchEfl); \
1426 \
1427 fBenchEfl = fEflIn; \
1428 uBenchDst = uDstIn; \
1429 pfn(&uBenchDst, uSrcIn, &fBenchEfl); \
1430 } \
1431 return RTTimeNanoTS() - nsStart; \
1432} \
1433\
1434static void BinU ## a_cBits ## Test(void) \
1435{ \
1436 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
1437 { \
1438 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
1439 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
1440 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
1441 PFNIEMAIMPLBINU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
1442 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
1443 if (!cTests) { RTTestSkipped(g_hTest, "no tests"); continue; } \
1444 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
1445 { \
1446 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1447 { \
1448 uint32_t fEfl = paTests[iTest].fEflIn; \
1449 a_uType uDst = paTests[iTest].uDstIn; \
1450 pfn(&uDst, paTests[iTest].uSrcIn, &fEfl); \
1451 if ( uDst != paTests[iTest].uDstOut \
1452 || fEfl != paTests[iTest].fEflOut ) \
1453 RTTestFailed(g_hTest, "#%u%s: efl=%#08x dst=" a_Fmt " src=" a_Fmt " -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s - %s\n", \
1454 iTest, !iVar ? "" : "/n", paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uSrcIn, \
1455 fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
1456 EFlagsDiff(fEfl, paTests[iTest].fEflOut), \
1457 uDst == paTests[iTest].uDstOut ? "eflags" : fEfl == paTests[iTest].fEflOut ? "dst" : "both"); \
1458 else \
1459 { \
1460 *g_pu ## a_cBits = paTests[iTest].uDstIn; \
1461 *g_pfEfl = paTests[iTest].fEflIn; \
1462 pfn(g_pu ## a_cBits, paTests[iTest].uSrcIn, g_pfEfl); \
1463 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
1464 RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
1465 } \
1466 } \
1467 \
1468 /* Benchmark if all succeeded. */ \
1469 if (g_cPicoSecBenchmark && RTTestSubErrorCount(g_hTest) == 0) \
1470 { \
1471 uint32_t const iTest = cTests / 2; \
1472 uint32_t const cIterations = EstimateIterations(_64K, BinU ## a_cBits ## Bench(_64K, pfn, &paTests[iTest])); \
1473 uint64_t const cNsRealRun = BinU ## a_cBits ## Bench(cIterations, pfn, &paTests[iTest]); \
1474 RTTestValueF(g_hTest, cNsRealRun * 1000 / cIterations, RTTESTUNIT_PS_PER_CALL, \
1475 "%s%s", a_aSubTests[iFn].pszName, iVar ? "-native" : ""); \
1476 } \
1477 \
1478 /* Next variation is native. */ \
1479 pfn = a_aSubTests[iFn].pfnNative; \
1480 } \
1481 } \
1482}
1483
1484
1485/*
1486 * 8-bit binary operations.
1487 */
1488static const BINU8_T g_aBinU8[] =
1489{
1490 ENTRY(add_u8),
1491 ENTRY(add_u8_locked),
1492 ENTRY(adc_u8),
1493 ENTRY(adc_u8_locked),
1494 ENTRY(sub_u8),
1495 ENTRY(sub_u8_locked),
1496 ENTRY(sbb_u8),
1497 ENTRY(sbb_u8_locked),
1498 ENTRY(or_u8),
1499 ENTRY(or_u8_locked),
1500 ENTRY(xor_u8),
1501 ENTRY(xor_u8_locked),
1502 ENTRY(and_u8),
1503 ENTRY(and_u8_locked),
1504 ENTRY_PFN_CAST(cmp_u8, PFNIEMAIMPLBINU8),
1505 ENTRY_PFN_CAST(test_u8, PFNIEMAIMPLBINU8),
1506};
1507TEST_BINARY_OPS(8, uint8_t, "%#04x", BINU8_TEST_T, g_aBinU8)
1508
1509
1510/*
1511 * 16-bit binary operations.
1512 */
1513#ifdef TSTIEMAIMPL_WITH_GENERATOR
1514static const BINU16_TEST_T g_aFixedTests_add_u16[] =
1515{
1516 /* efl in, efl out, uDstIn, uDstOut, uSrc, uExtra */
1517 { UINT32_MAX, 0, 1, 0, UINT16_MAX, 0 },
1518};
1519#endif
1520static const BINU16_T g_aBinU16[] =
1521{
1522 ENTRY_FIX(add_u16),
1523 ENTRY(add_u16_locked),
1524 ENTRY(adc_u16),
1525 ENTRY(adc_u16_locked),
1526 ENTRY(sub_u16),
1527 ENTRY(sub_u16_locked),
1528 ENTRY(sbb_u16),
1529 ENTRY(sbb_u16_locked),
1530 ENTRY(or_u16),
1531 ENTRY(or_u16_locked),
1532 ENTRY(xor_u16),
1533 ENTRY(xor_u16_locked),
1534 ENTRY(and_u16),
1535 ENTRY(and_u16_locked),
1536 ENTRY_PFN_CAST(cmp_u16, PFNIEMAIMPLBINU16),
1537 ENTRY_PFN_CAST(test_u16, PFNIEMAIMPLBINU16),
1538 ENTRY_PFN_CAST_EX(bt_u16, PFNIEMAIMPLBINU16, 1),
1539 ENTRY_EX(btc_u16, 1),
1540 ENTRY_EX(btc_u16_locked, 1),
1541 ENTRY_EX(btr_u16, 1),
1542 ENTRY_EX(btr_u16_locked, 1),
1543 ENTRY_EX(bts_u16, 1),
1544 ENTRY_EX(bts_u16_locked, 1),
1545 ENTRY_AMD( bsf_u16, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1546 ENTRY_INTEL(bsf_u16, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1547 ENTRY_AMD( bsr_u16, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1548 ENTRY_INTEL(bsr_u16, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1549 ENTRY_AMD( imul_two_u16, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1550 ENTRY_INTEL(imul_two_u16, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1551 ENTRY(arpl),
1552};
1553TEST_BINARY_OPS(16, uint16_t, "%#06x", BINU16_TEST_T, g_aBinU16)
1554
1555
1556/*
1557 * 32-bit binary operations.
1558 */
1559#ifdef TSTIEMAIMPL_WITH_GENERATOR
1560static const BINU32_TEST_T g_aFixedTests_add_u32[] =
1561{
1562 /* efl in, efl out, uDstIn, uDstOut, uSrc, uExtra */
1563 { UINT32_MAX, 0, 1, 0, UINT32_MAX, 0 },
1564};
1565#endif
1566static const BINU32_T g_aBinU32[] =
1567{
1568 ENTRY_FIX(add_u32),
1569 ENTRY(add_u32_locked),
1570 ENTRY(adc_u32),
1571 ENTRY(adc_u32_locked),
1572 ENTRY(sub_u32),
1573 ENTRY(sub_u32_locked),
1574 ENTRY(sbb_u32),
1575 ENTRY(sbb_u32_locked),
1576 ENTRY(or_u32),
1577 ENTRY(or_u32_locked),
1578 ENTRY(xor_u32),
1579 ENTRY(xor_u32_locked),
1580 ENTRY(and_u32),
1581 ENTRY(and_u32_locked),
1582 ENTRY_PFN_CAST(cmp_u32, PFNIEMAIMPLBINU32),
1583 ENTRY_PFN_CAST(test_u32, PFNIEMAIMPLBINU32),
1584 ENTRY_PFN_CAST_EX(bt_u32, PFNIEMAIMPLBINU32, 1),
1585 ENTRY_EX(btc_u32, 1),
1586 ENTRY_EX(btc_u32_locked, 1),
1587 ENTRY_EX(btr_u32, 1),
1588 ENTRY_EX(btr_u32_locked, 1),
1589 ENTRY_EX(bts_u32, 1),
1590 ENTRY_EX(bts_u32_locked, 1),
1591 ENTRY_AMD( bsf_u32, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1592 ENTRY_INTEL(bsf_u32, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1593 ENTRY_AMD( bsr_u32, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1594 ENTRY_INTEL(bsr_u32, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1595 ENTRY_AMD( imul_two_u32, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1596 ENTRY_INTEL(imul_two_u32, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1597 ENTRY(adcx_u32),
1598 ENTRY(adox_u32),
1599};
1600TEST_BINARY_OPS(32, uint32_t, "%#010RX32", BINU32_TEST_T, g_aBinU32)
1601
1602
1603/*
1604 * 64-bit binary operations.
1605 */
1606#ifdef TSTIEMAIMPL_WITH_GENERATOR
1607static const BINU64_TEST_T g_aFixedTests_add_u64[] =
1608{
1609 /* efl in, efl out, uDstIn, uDstOut, uSrc, uExtra */
1610 { UINT32_MAX, 0, 1, 0, UINT64_MAX, 0 },
1611};
1612#endif
1613static const BINU64_T g_aBinU64[] =
1614{
1615 ENTRY_FIX(add_u64),
1616 ENTRY(add_u64_locked),
1617 ENTRY(adc_u64),
1618 ENTRY(adc_u64_locked),
1619 ENTRY(sub_u64),
1620 ENTRY(sub_u64_locked),
1621 ENTRY(sbb_u64),
1622 ENTRY(sbb_u64_locked),
1623 ENTRY(or_u64),
1624 ENTRY(or_u64_locked),
1625 ENTRY(xor_u64),
1626 ENTRY(xor_u64_locked),
1627 ENTRY(and_u64),
1628 ENTRY(and_u64_locked),
1629 ENTRY_PFN_CAST(cmp_u64, PFNIEMAIMPLBINU64),
1630 ENTRY_PFN_CAST(test_u64, PFNIEMAIMPLBINU64),
1631 ENTRY_PFN_CAST_EX(bt_u64, PFNIEMAIMPLBINU64, 1),
1632 ENTRY_EX(btc_u64, 1),
1633 ENTRY_EX(btc_u64_locked, 1),
1634 ENTRY_EX(btr_u64, 1),
1635 ENTRY_EX(btr_u64_locked, 1),
1636 ENTRY_EX(bts_u64, 1),
1637 ENTRY_EX(bts_u64_locked, 1),
1638 ENTRY_AMD( bsf_u64, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1639 ENTRY_INTEL(bsf_u64, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1640 ENTRY_AMD( bsr_u64, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1641 ENTRY_INTEL(bsr_u64, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1642 ENTRY_AMD( imul_two_u64, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1643 ENTRY_INTEL(imul_two_u64, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1644 ENTRY(adcx_u64),
1645 ENTRY(adox_u64),
1646};
1647TEST_BINARY_OPS(64, uint64_t, "%#018RX64", BINU64_TEST_T, g_aBinU64)
1648
1649
1650/*
1651 * XCHG
1652 */
1653static void XchgTest(void)
1654{
1655 if (!SubTestAndCheckIfEnabled("xchg"))
1656 return;
1657 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU8, (uint8_t *pu8Mem, uint8_t *pu8Reg));
1658 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU16,(uint16_t *pu16Mem, uint16_t *pu16Reg));
1659 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU32,(uint32_t *pu32Mem, uint32_t *pu32Reg));
1660 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU64,(uint64_t *pu64Mem, uint64_t *pu64Reg));
1661
1662 static struct
1663 {
1664 uint8_t cb; uint64_t fMask;
1665 union
1666 {
1667 uintptr_t pfn;
1668 FNIEMAIMPLXCHGU8 *pfnU8;
1669 FNIEMAIMPLXCHGU16 *pfnU16;
1670 FNIEMAIMPLXCHGU32 *pfnU32;
1671 FNIEMAIMPLXCHGU64 *pfnU64;
1672 } u;
1673 }
1674 s_aXchgWorkers[] =
1675 {
1676 { 1, UINT8_MAX, { (uintptr_t)iemAImpl_xchg_u8_locked } },
1677 { 2, UINT16_MAX, { (uintptr_t)iemAImpl_xchg_u16_locked } },
1678 { 4, UINT32_MAX, { (uintptr_t)iemAImpl_xchg_u32_locked } },
1679 { 8, UINT64_MAX, { (uintptr_t)iemAImpl_xchg_u64_locked } },
1680 { 1, UINT8_MAX, { (uintptr_t)iemAImpl_xchg_u8_unlocked } },
1681 { 2, UINT16_MAX, { (uintptr_t)iemAImpl_xchg_u16_unlocked } },
1682 { 4, UINT32_MAX, { (uintptr_t)iemAImpl_xchg_u32_unlocked } },
1683 { 8, UINT64_MAX, { (uintptr_t)iemAImpl_xchg_u64_unlocked } },
1684 };
1685 for (size_t i = 0; i < RT_ELEMENTS(s_aXchgWorkers); i++)
1686 {
1687 RTUINT64U uIn1, uIn2, uMem, uDst;
1688 uMem.u = uIn1.u = RTRandU64Ex(0, s_aXchgWorkers[i].fMask);
1689 uDst.u = uIn2.u = RTRandU64Ex(0, s_aXchgWorkers[i].fMask);
1690 if (uIn1.u == uIn2.u)
1691 uDst.u = uIn2.u = ~uIn2.u;
1692
1693 switch (s_aXchgWorkers[i].cb)
1694 {
1695 case 1:
1696 s_aXchgWorkers[i].u.pfnU8(g_pu8, g_pu8Two);
1697 s_aXchgWorkers[i].u.pfnU8(&uMem.au8[0], &uDst.au8[0]);
1698 break;
1699 case 2:
1700 s_aXchgWorkers[i].u.pfnU16(g_pu16, g_pu16Two);
1701 s_aXchgWorkers[i].u.pfnU16(&uMem.Words.w0, &uDst.Words.w0);
1702 break;
1703 case 4:
1704 s_aXchgWorkers[i].u.pfnU32(g_pu32, g_pu32Two);
1705 s_aXchgWorkers[i].u.pfnU32(&uMem.DWords.dw0, &uDst.DWords.dw0);
1706 break;
1707 case 8:
1708 s_aXchgWorkers[i].u.pfnU64(g_pu64, g_pu64Two);
1709 s_aXchgWorkers[i].u.pfnU64(&uMem.u, &uDst.u);
1710 break;
1711 default: RTTestFailed(g_hTest, "%d\n", s_aXchgWorkers[i].cb); break;
1712 }
1713
1714 if (uMem.u != uIn2.u || uDst.u != uIn1.u)
1715 RTTestFailed(g_hTest, "i=%u: %#RX64, %#RX64 -> %#RX64, %#RX64\n", i, uIn1.u, uIn2.u, uMem.u, uDst.u);
1716 }
1717}
1718
1719
1720/*
1721 * XADD
1722 */
1723static void XaddTest(void)
1724{
1725#define TEST_XADD(a_cBits, a_Type, a_Fmt) do { \
1726 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXADDU ## a_cBits, (a_Type *, a_Type *, uint32_t *)); \
1727 static struct \
1728 { \
1729 const char *pszName; \
1730 FNIEMAIMPLXADDU ## a_cBits *pfn; \
1731 BINU ## a_cBits ## _TEST_T const *paTests; \
1732 uint32_t const *pcTests; \
1733 } const s_aFuncs[] = \
1734 { \
1735 { "xadd_u" # a_cBits, iemAImpl_xadd_u ## a_cBits, \
1736 g_aTests_add_u ## a_cBits, &g_cTests_add_u ## a_cBits }, \
1737 { "xadd_u" # a_cBits "8_locked", iemAImpl_xadd_u ## a_cBits ## _locked, \
1738 g_aTests_add_u ## a_cBits, &g_cTests_add_u ## a_cBits }, \
1739 }; \
1740 for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++) \
1741 { \
1742 if (!SubTestAndCheckIfEnabled(s_aFuncs[iFn].pszName)) continue; \
1743 uint32_t const cTests = *s_aFuncs[iFn].pcTests; \
1744 BINU ## a_cBits ## _TEST_T const * const paTests = s_aFuncs[iFn].paTests; \
1745 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1746 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
1747 { \
1748 uint32_t fEfl = paTests[iTest].fEflIn; \
1749 a_Type uSrc = paTests[iTest].uSrcIn; \
1750 *g_pu ## a_cBits = paTests[iTest].uDstIn; \
1751 s_aFuncs[iFn].pfn(g_pu ## a_cBits, &uSrc, &fEfl); \
1752 if ( fEfl != paTests[iTest].fEflOut \
1753 || *g_pu ## a_cBits != paTests[iTest].uDstOut \
1754 || uSrc != paTests[iTest].uDstIn) \
1755 RTTestFailed(g_hTest, "%s/#%u: efl=%#08x dst=" a_Fmt " src=" a_Fmt " -> efl=%#08x dst=" a_Fmt " src=" a_Fmt ", expected %#08x, " a_Fmt ", " a_Fmt "%s\n", \
1756 s_aFuncs[iFn].pszName, iTest, paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uSrcIn, \
1757 fEfl, *g_pu ## a_cBits, uSrc, paTests[iTest].fEflOut, paTests[iTest].uDstOut, paTests[iTest].uDstIn, \
1758 EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
1759 } \
1760 } \
1761 } while(0)
1762 TEST_XADD(8, uint8_t, "%#04x");
1763 TEST_XADD(16, uint16_t, "%#06x");
1764 TEST_XADD(32, uint32_t, "%#010RX32");
1765 TEST_XADD(64, uint64_t, "%#010RX64");
1766}
1767
1768
1769/*
1770 * CMPXCHG
1771 */
1772
1773static void CmpXchgTest(void)
1774{
1775#define TEST_CMPXCHG(a_cBits, a_Type, a_Fmt) do {\
1776 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLCMPXCHGU ## a_cBits, (a_Type *, a_Type *, a_Type, uint32_t *)); \
1777 static struct \
1778 { \
1779 const char *pszName; \
1780 FNIEMAIMPLCMPXCHGU ## a_cBits *pfn; \
1781 PFNIEMAIMPLBINU ## a_cBits pfnSub; \
1782 BINU ## a_cBits ## _TEST_T const *paTests; \
1783 uint32_t const *pcTests; \
1784 } const s_aFuncs[] = \
1785 { \
1786 { "cmpxchg_u" # a_cBits, iemAImpl_cmpxchg_u ## a_cBits, iemAImpl_sub_u ## a_cBits, \
1787 g_aTests_cmp_u ## a_cBits, &g_cTests_cmp_u ## a_cBits }, \
1788 { "cmpxchg_u" # a_cBits "_locked", iemAImpl_cmpxchg_u ## a_cBits ## _locked, iemAImpl_sub_u ## a_cBits, \
1789 g_aTests_cmp_u ## a_cBits, &g_cTests_cmp_u ## a_cBits }, \
1790 }; \
1791 for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++) \
1792 { \
1793 if (!SubTestAndCheckIfEnabled(s_aFuncs[iFn].pszName)) continue; \
1794 BINU ## a_cBits ## _TEST_T const * const paTests = s_aFuncs[iFn].paTests; \
1795 uint32_t const cTests = *s_aFuncs[iFn].pcTests; \
1796 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1797 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
1798 { \
1799 /* as is (99% likely to be negative). */ \
1800 uint32_t fEfl = paTests[iTest].fEflIn; \
1801 a_Type const uNew = paTests[iTest].uSrcIn + 0x42; \
1802 a_Type uA = paTests[iTest].uDstIn; \
1803 *g_pu ## a_cBits = paTests[iTest].uSrcIn; \
1804 a_Type const uExpect = uA != paTests[iTest].uSrcIn ? paTests[iTest].uSrcIn : uNew; \
1805 s_aFuncs[iFn].pfn(g_pu ## a_cBits, &uA, uNew, &fEfl); \
1806 if ( fEfl != paTests[iTest].fEflOut \
1807 || *g_pu ## a_cBits != uExpect \
1808 || uA != paTests[iTest].uSrcIn) \
1809 RTTestFailed(g_hTest, "%s/#%ua: efl=%#08x dst=" a_Fmt " cmp=" a_Fmt " new=" a_Fmt " -> efl=%#08x dst=" a_Fmt " old=" a_Fmt ", expected %#08x, " a_Fmt ", " a_Fmt "%s\n", \
1810 s_aFuncs[iFn].pszName, iTest, paTests[iTest].fEflIn, paTests[iTest].uSrcIn, paTests[iTest].uDstIn, \
1811 uNew, fEfl, *g_pu ## a_cBits, uA, paTests[iTest].fEflOut, uExpect, paTests[iTest].uSrcIn, \
1812 EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
1813 /* positive */ \
1814 uint32_t fEflExpect = paTests[iTest].fEflIn; \
1815 uA = paTests[iTest].uDstIn; \
1816 s_aFuncs[iFn].pfnSub(&uA, uA, &fEflExpect); \
1817 fEfl = paTests[iTest].fEflIn; \
1818 uA = paTests[iTest].uDstIn; \
1819 *g_pu ## a_cBits = uA; \
1820 s_aFuncs[iFn].pfn(g_pu ## a_cBits, &uA, uNew, &fEfl); \
1821 if ( fEfl != fEflExpect \
1822 || *g_pu ## a_cBits != uNew \
1823 || uA != paTests[iTest].uDstIn) \
1824 RTTestFailed(g_hTest, "%s/#%ua: efl=%#08x dst=" a_Fmt " cmp=" a_Fmt " new=" a_Fmt " -> efl=%#08x dst=" a_Fmt " old=" a_Fmt ", expected %#08x, " a_Fmt ", " a_Fmt "%s\n", \
1825 s_aFuncs[iFn].pszName, iTest, paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uDstIn, \
1826 uNew, fEfl, *g_pu ## a_cBits, uA, fEflExpect, uNew, paTests[iTest].uDstIn, \
1827 EFlagsDiff(fEfl, fEflExpect)); \
1828 } \
1829 } \
1830 } while(0)
1831 TEST_CMPXCHG(8, uint8_t, "%#04RX8");
1832 TEST_CMPXCHG(16, uint16_t, "%#06x");
1833 TEST_CMPXCHG(32, uint32_t, "%#010RX32");
1834#if ARCH_BITS != 32 /* calling convension issue, skipping as it's an unsupported host */
1835 TEST_CMPXCHG(64, uint64_t, "%#010RX64");
1836#endif
1837}
1838
1839static void CmpXchg8bTest(void)
1840{
1841 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLCMPXCHG8B,(uint64_t *, PRTUINT64U, PRTUINT64U, uint32_t *));
1842 static struct
1843 {
1844 const char *pszName;
1845 FNIEMAIMPLCMPXCHG8B *pfn;
1846 } const s_aFuncs[] =
1847 {
1848 { "cmpxchg8b", iemAImpl_cmpxchg8b },
1849 { "cmpxchg8b_locked", iemAImpl_cmpxchg8b_locked },
1850 };
1851 for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++)
1852 {
1853 if (!SubTestAndCheckIfEnabled(s_aFuncs[iFn].pszName))
1854 continue;
1855 for (uint32_t iTest = 0; iTest < 4; iTest += 2)
1856 {
1857 uint64_t const uOldValue = RandU64();
1858 uint64_t const uNewValue = RandU64();
1859
1860 /* positive test. */
1861 RTUINT64U uA, uB;
1862 uB.u = uNewValue;
1863 uA.u = uOldValue;
1864 *g_pu64 = uOldValue;
1865 uint32_t fEflIn = RandEFlags();
1866 uint32_t fEfl = fEflIn;
1867 s_aFuncs[iFn].pfn(g_pu64, &uA, &uB, &fEfl);
1868 if ( fEfl != (fEflIn | X86_EFL_ZF)
1869 || *g_pu64 != uNewValue
1870 || uA.u != uOldValue)
1871 RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64 cmp=%#018RX64 new=%#018RX64\n -> efl=%#08x dst=%#018RX64 old=%#018RX64,\n wanted %#08x, %#018RX64, %#018RX64%s\n",
1872 iTest, fEflIn, uOldValue, uOldValue, uNewValue,
1873 fEfl, *g_pu64, uA.u,
1874 (fEflIn | X86_EFL_ZF), uNewValue, uOldValue, EFlagsDiff(fEfl, fEflIn | X86_EFL_ZF));
1875 RTTEST_CHECK(g_hTest, uB.u == uNewValue);
1876
1877 /* negative */
1878 uint64_t const uExpect = ~uOldValue;
1879 *g_pu64 = uExpect;
1880 uA.u = uOldValue;
1881 uB.u = uNewValue;
1882 fEfl = fEflIn = RandEFlags();
1883 s_aFuncs[iFn].pfn(g_pu64, &uA, &uB, &fEfl);
1884 if ( fEfl != (fEflIn & ~X86_EFL_ZF)
1885 || *g_pu64 != uExpect
1886 || uA.u != uExpect)
1887 RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64 cmp=%#018RX64 new=%#018RX64\n -> efl=%#08x dst=%#018RX64 old=%#018RX64,\n wanted %#08x, %#018RX64, %#018RX64%s\n",
1888 iTest + 1, fEflIn, uExpect, uOldValue, uNewValue,
1889 fEfl, *g_pu64, uA.u,
1890 (fEflIn & ~X86_EFL_ZF), uExpect, uExpect, EFlagsDiff(fEfl, fEflIn & ~X86_EFL_ZF));
1891 RTTEST_CHECK(g_hTest, uB.u == uNewValue);
1892 }
1893 }
1894}
1895
1896static void CmpXchg16bTest(void)
1897{
1898 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLCMPXCHG16B,(PRTUINT128U, PRTUINT128U, PRTUINT128U, uint32_t *));
1899 static struct
1900 {
1901 const char *pszName;
1902 FNIEMAIMPLCMPXCHG16B *pfn;
1903 } const s_aFuncs[] =
1904 {
1905 { "cmpxchg16b", iemAImpl_cmpxchg16b },
1906 { "cmpxchg16b_locked", iemAImpl_cmpxchg16b_locked },
1907#if !defined(RT_ARCH_ARM64)
1908 { "cmpxchg16b_fallback", iemAImpl_cmpxchg16b_fallback },
1909#endif
1910 };
1911 for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++)
1912 {
1913 if (!SubTestAndCheckIfEnabled(s_aFuncs[iFn].pszName))
1914 continue;
1915#if !defined(IEM_WITHOUT_ASSEMBLY) && defined(RT_ARCH_AMD64)
1916 if (!(ASMCpuId_ECX(1) & X86_CPUID_FEATURE_ECX_CX16))
1917 {
1918 RTTestSkipped(g_hTest, "no hardware cmpxchg16b");
1919 continue;
1920 }
1921#endif
1922 for (uint32_t iTest = 0; iTest < 4; iTest += 2)
1923 {
1924 RTUINT128U const uOldValue = RandU128();
1925 RTUINT128U const uNewValue = RandU128();
1926
1927 /* positive test. */
1928 RTUINT128U uA, uB;
1929 uB = uNewValue;
1930 uA = uOldValue;
1931 *g_pu128 = uOldValue;
1932 uint32_t fEflIn = RandEFlags();
1933 uint32_t fEfl = fEflIn;
1934 s_aFuncs[iFn].pfn(g_pu128, &uA, &uB, &fEfl);
1935 if ( fEfl != (fEflIn | X86_EFL_ZF)
1936 || g_pu128->s.Lo != uNewValue.s.Lo
1937 || g_pu128->s.Hi != uNewValue.s.Hi
1938 || uA.s.Lo != uOldValue.s.Lo
1939 || uA.s.Hi != uOldValue.s.Hi)
1940 RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64'%016RX64 cmp=%#018RX64'%016RX64 new=%#018RX64'%016RX64\n"
1941 " -> efl=%#08x dst=%#018RX64'%016RX64 old=%#018RX64'%016RX64,\n"
1942 " wanted %#08x, %#018RX64'%016RX64, %#018RX64'%016RX64%s\n",
1943 iTest, fEflIn, uOldValue.s.Hi, uOldValue.s.Lo, uOldValue.s.Hi, uOldValue.s.Lo, uNewValue.s.Hi, uNewValue.s.Lo,
1944 fEfl, g_pu128->s.Hi, g_pu128->s.Lo, uA.s.Hi, uA.s.Lo,
1945 (fEflIn | X86_EFL_ZF), uNewValue.s.Hi, uNewValue.s.Lo, uOldValue.s.Hi, uOldValue.s.Lo,
1946 EFlagsDiff(fEfl, fEflIn | X86_EFL_ZF));
1947 RTTEST_CHECK(g_hTest, uB.s.Lo == uNewValue.s.Lo && uB.s.Hi == uNewValue.s.Hi);
1948
1949 /* negative */
1950 RTUINT128U const uExpect = RTUINT128_INIT(~uOldValue.s.Hi, ~uOldValue.s.Lo);
1951 *g_pu128 = uExpect;
1952 uA = uOldValue;
1953 uB = uNewValue;
1954 fEfl = fEflIn = RandEFlags();
1955 s_aFuncs[iFn].pfn(g_pu128, &uA, &uB, &fEfl);
1956 if ( fEfl != (fEflIn & ~X86_EFL_ZF)
1957 || g_pu128->s.Lo != uExpect.s.Lo
1958 || g_pu128->s.Hi != uExpect.s.Hi
1959 || uA.s.Lo != uExpect.s.Lo
1960 || uA.s.Hi != uExpect.s.Hi)
1961 RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64'%016RX64 cmp=%#018RX64'%016RX64 new=%#018RX64'%016RX64\n"
1962 " -> efl=%#08x dst=%#018RX64'%016RX64 old=%#018RX64'%016RX64,\n"
1963 " wanted %#08x, %#018RX64'%016RX64, %#018RX64'%016RX64%s\n",
1964 iTest + 1, fEflIn, uExpect.s.Hi, uExpect.s.Lo, uOldValue.s.Hi, uOldValue.s.Lo, uNewValue.s.Hi, uNewValue.s.Lo,
1965 fEfl, g_pu128->s.Hi, g_pu128->s.Lo, uA.s.Hi, uA.s.Lo,
1966 (fEflIn & ~X86_EFL_ZF), uExpect.s.Hi, uExpect.s.Lo, uExpect.s.Hi, uExpect.s.Lo,
1967 EFlagsDiff(fEfl, fEflIn & ~X86_EFL_ZF));
1968 RTTEST_CHECK(g_hTest, uB.s.Lo == uNewValue.s.Lo && uB.s.Hi == uNewValue.s.Hi);
1969 }
1970 }
1971}
1972
1973
1974/*
1975 * Double shifts.
1976 *
1977 * Note! We use BINUxx_TEST_T with the shift value in the uMisc field.
1978 */
1979#ifdef TSTIEMAIMPL_WITH_GENERATOR
1980# define GEN_SHIFT_DBL(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
1981static void ShiftDblU ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
1982{ \
1983 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
1984 { \
1985 if ( a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE \
1986 && a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
1987 continue; \
1988 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
1989 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1990 { \
1991 a_TestType Test; \
1992 Test.fEflIn = RandEFlags(); \
1993 Test.fEflOut = Test.fEflIn; \
1994 Test.uDstIn = RandU ## a_cBits ## Dst(iTest); \
1995 Test.uDstOut = Test.uDstIn; \
1996 Test.uSrcIn = RandU ## a_cBits ## Src(iTest); \
1997 Test.uMisc = RandU8() & (a_cBits * 4 - 1); /* need to go way beyond the a_cBits limit */ \
1998 a_aSubTests[iFn].pfnNative(&Test.uDstOut, Test.uSrcIn, Test.uMisc, &Test.fEflOut); \
1999 RTStrmPrintf(pOut, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", " a_Fmt ", %2u }, /* #%u */\n", \
2000 Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uSrcIn, Test.uMisc, iTest); \
2001 } \
2002 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
2003 } \
2004}
2005#else
2006# define GEN_SHIFT_DBL(a_cBits, a_Fmt, a_TestType, a_aSubTests)
2007#endif
2008
2009#define TEST_SHIFT_DBL(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType, a_aSubTests) \
2010TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLSHIFTDBLU ## a_cBits); \
2011\
2012static a_SubTestType const a_aSubTests[] = \
2013{ \
2014 ENTRY_AMD(shld_u ## a_cBits, X86_EFL_OF | X86_EFL_CF), \
2015 ENTRY_INTEL(shld_u ## a_cBits, X86_EFL_OF | X86_EFL_CF), \
2016 ENTRY_AMD(shrd_u ## a_cBits, X86_EFL_OF | X86_EFL_CF), \
2017 ENTRY_INTEL(shrd_u ## a_cBits, X86_EFL_OF | X86_EFL_CF), \
2018}; \
2019\
2020GEN_SHIFT_DBL(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
2021\
2022static void ShiftDblU ## a_cBits ## Test(void) \
2023{ \
2024 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2025 { \
2026 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
2027 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2028 PFNIEMAIMPLSHIFTDBLU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2029 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2030 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2031 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2032 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2033 { \
2034 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2035 { \
2036 uint32_t fEfl = paTests[iTest].fEflIn; \
2037 a_Type uDst = paTests[iTest].uDstIn; \
2038 pfn(&uDst, paTests[iTest].uSrcIn, paTests[iTest].uMisc, &fEfl); \
2039 if ( uDst != paTests[iTest].uDstOut \
2040 || fEfl != paTests[iTest].fEflOut) \
2041 RTTestFailed(g_hTest, "#%03u%s: efl=%#08x dst=" a_Fmt " src=" a_Fmt " shift=%-2u -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s%s\n", \
2042 iTest, iVar == 0 ? "" : "/n", paTests[iTest].fEflIn, \
2043 paTests[iTest].uDstIn, paTests[iTest].uSrcIn, (unsigned)paTests[iTest].uMisc, \
2044 fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
2045 EFlagsDiff(fEfl, paTests[iTest].fEflOut), uDst == paTests[iTest].uDstOut ? "" : " dst!"); \
2046 else \
2047 { \
2048 *g_pu ## a_cBits = paTests[iTest].uDstIn; \
2049 *g_pfEfl = paTests[iTest].fEflIn; \
2050 pfn(g_pu ## a_cBits, paTests[iTest].uSrcIn, paTests[iTest].uMisc, g_pfEfl); \
2051 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
2052 RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
2053 } \
2054 } \
2055 pfn = a_aSubTests[iFn].pfnNative; \
2056 } \
2057 } \
2058}
2059TEST_SHIFT_DBL(16, uint16_t, "%#06RX16", BINU16_TEST_T, SHIFT_DBL_U16_T, g_aShiftDblU16)
2060TEST_SHIFT_DBL(32, uint32_t, "%#010RX32", BINU32_TEST_T, SHIFT_DBL_U32_T, g_aShiftDblU32)
2061TEST_SHIFT_DBL(64, uint64_t, "%#018RX64", BINU64_TEST_T, SHIFT_DBL_U64_T, g_aShiftDblU64)
2062
2063#ifdef TSTIEMAIMPL_WITH_GENERATOR
2064static void ShiftDblGenerate(PRTSTREAM pOut, uint32_t cTests)
2065{
2066 ShiftDblU16Generate(pOut, cTests);
2067 ShiftDblU32Generate(pOut, cTests);
2068 ShiftDblU64Generate(pOut, cTests);
2069}
2070#endif
2071
2072static void ShiftDblTest(void)
2073{
2074 ShiftDblU16Test();
2075 ShiftDblU32Test();
2076 ShiftDblU64Test();
2077}
2078
2079
2080/*
2081 * Unary operators.
2082 *
2083 * Note! We use BINUxx_TEST_T ignoreing uSrcIn and uMisc.
2084 */
2085#ifdef TSTIEMAIMPL_WITH_GENERATOR
2086# define GEN_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType) \
2087static void UnaryU ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
2088{ \
2089 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aUnaryU ## a_cBits); iFn++) \
2090 { \
2091 GenerateArrayStart(pOut, g_aUnaryU ## a_cBits[iFn].pszName, #a_TestType); \
2092 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2093 { \
2094 a_TestType Test; \
2095 Test.fEflIn = RandEFlags(); \
2096 Test.fEflOut = Test.fEflIn; \
2097 Test.uDstIn = RandU ## a_cBits(); \
2098 Test.uDstOut = Test.uDstIn; \
2099 Test.uSrcIn = 0; \
2100 Test.uMisc = 0; \
2101 g_aUnaryU ## a_cBits[iFn].pfn(&Test.uDstOut, &Test.fEflOut); \
2102 RTStrmPrintf(pOut, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", 0, 0 }, /* #%u */\n", \
2103 Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, iTest); \
2104 } \
2105 GenerateArrayEnd(pOut, g_aUnaryU ## a_cBits[iFn].pszName); \
2106 } \
2107}
2108#else
2109# define GEN_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType)
2110#endif
2111
2112#define TEST_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType) \
2113TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLUNARYU ## a_cBits); \
2114static a_SubTestType const g_aUnaryU ## a_cBits [] = \
2115{ \
2116 ENTRY(inc_u ## a_cBits), \
2117 ENTRY(inc_u ## a_cBits ## _locked), \
2118 ENTRY(dec_u ## a_cBits), \
2119 ENTRY(dec_u ## a_cBits ## _locked), \
2120 ENTRY(not_u ## a_cBits), \
2121 ENTRY(not_u ## a_cBits ## _locked), \
2122 ENTRY(neg_u ## a_cBits), \
2123 ENTRY(neg_u ## a_cBits ## _locked), \
2124}; \
2125\
2126GEN_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType) \
2127\
2128static void UnaryU ## a_cBits ## Test(void) \
2129{ \
2130 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aUnaryU ## a_cBits); iFn++) \
2131 { \
2132 if (!SubTestAndCheckIfEnabled(g_aUnaryU ## a_cBits[iFn].pszName)) continue; \
2133 a_TestType const * const paTests = g_aUnaryU ## a_cBits[iFn].paTests; \
2134 uint32_t const cTests = *g_aUnaryU ## a_cBits[iFn].pcTests; \
2135 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2136 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2137 { \
2138 uint32_t fEfl = paTests[iTest].fEflIn; \
2139 a_Type uDst = paTests[iTest].uDstIn; \
2140 g_aUnaryU ## a_cBits[iFn].pfn(&uDst, &fEfl); \
2141 if ( uDst != paTests[iTest].uDstOut \
2142 || fEfl != paTests[iTest].fEflOut) \
2143 RTTestFailed(g_hTest, "#%u: efl=%#08x dst=" a_Fmt " -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s\n", \
2144 iTest, paTests[iTest].fEflIn, paTests[iTest].uDstIn, \
2145 fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
2146 EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
2147 else \
2148 { \
2149 *g_pu ## a_cBits = paTests[iTest].uDstIn; \
2150 *g_pfEfl = paTests[iTest].fEflIn; \
2151 g_aUnaryU ## a_cBits[iFn].pfn(g_pu ## a_cBits, g_pfEfl); \
2152 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
2153 RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
2154 } \
2155 } \
2156 } \
2157}
2158TEST_UNARY(8, uint8_t, "%#04RX8", BINU8_TEST_T, INT_UNARY_U8_T)
2159TEST_UNARY(16, uint16_t, "%#06RX16", BINU16_TEST_T, INT_UNARY_U16_T)
2160TEST_UNARY(32, uint32_t, "%#010RX32", BINU32_TEST_T, INT_UNARY_U32_T)
2161TEST_UNARY(64, uint64_t, "%#018RX64", BINU64_TEST_T, INT_UNARY_U64_T)
2162
2163#ifdef TSTIEMAIMPL_WITH_GENERATOR
2164static void UnaryGenerate(PRTSTREAM pOut, uint32_t cTests)
2165{
2166 UnaryU8Generate(pOut, cTests);
2167 UnaryU16Generate(pOut, cTests);
2168 UnaryU32Generate(pOut, cTests);
2169 UnaryU64Generate(pOut, cTests);
2170}
2171#endif
2172
2173static void UnaryTest(void)
2174{
2175 UnaryU8Test();
2176 UnaryU16Test();
2177 UnaryU32Test();
2178 UnaryU64Test();
2179}
2180
2181
2182/*
2183 * Shifts.
2184 *
2185 * Note! We use BINUxx_TEST_T with the shift count in uMisc and uSrcIn unused.
2186 */
2187#ifdef TSTIEMAIMPL_WITH_GENERATOR
2188# define GEN_SHIFT(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
2189static void ShiftU ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
2190{ \
2191 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2192 { \
2193 if ( a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE \
2194 && a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
2195 continue; \
2196 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
2197 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2198 { \
2199 a_TestType Test; \
2200 Test.fEflIn = RandEFlags(); \
2201 Test.fEflOut = Test.fEflIn; \
2202 Test.uDstIn = RandU ## a_cBits ## Dst(iTest); \
2203 Test.uDstOut = Test.uDstIn; \
2204 Test.uSrcIn = 0; \
2205 Test.uMisc = RandU8() & (a_cBits * 4 - 1); /* need to go way beyond the a_cBits limit */ \
2206 a_aSubTests[iFn].pfnNative(&Test.uDstOut, Test.uMisc, &Test.fEflOut); \
2207 RTStrmPrintf(pOut, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", 0, %-2u }, /* #%u */\n", \
2208 Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uMisc, iTest); \
2209 \
2210 Test.fEflIn = (~Test.fEflIn & X86_EFL_LIVE_MASK) | X86_EFL_RA1_MASK; \
2211 Test.fEflOut = Test.fEflIn; \
2212 Test.uDstOut = Test.uDstIn; \
2213 a_aSubTests[iFn].pfnNative(&Test.uDstOut, Test.uMisc, &Test.fEflOut); \
2214 RTStrmPrintf(pOut, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", 0, %-2u }, /* #%u b */\n", \
2215 Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uMisc, iTest); \
2216 } \
2217 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
2218 } \
2219}
2220#else
2221# define GEN_SHIFT(a_cBits, a_Fmt, a_TestType, a_aSubTests)
2222#endif
2223
2224#define TEST_SHIFT(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType, a_aSubTests) \
2225TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLSHIFTU ## a_cBits); \
2226static a_SubTestType const a_aSubTests[] = \
2227{ \
2228 ENTRY_AMD( rol_u ## a_cBits, X86_EFL_OF), \
2229 ENTRY_INTEL(rol_u ## a_cBits, X86_EFL_OF), \
2230 ENTRY_AMD( ror_u ## a_cBits, X86_EFL_OF), \
2231 ENTRY_INTEL(ror_u ## a_cBits, X86_EFL_OF), \
2232 ENTRY_AMD( rcl_u ## a_cBits, X86_EFL_OF), \
2233 ENTRY_INTEL(rcl_u ## a_cBits, X86_EFL_OF), \
2234 ENTRY_AMD( rcr_u ## a_cBits, X86_EFL_OF), \
2235 ENTRY_INTEL(rcr_u ## a_cBits, X86_EFL_OF), \
2236 ENTRY_AMD( shl_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
2237 ENTRY_INTEL(shl_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
2238 ENTRY_AMD( shr_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
2239 ENTRY_INTEL(shr_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
2240 ENTRY_AMD( sar_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
2241 ENTRY_INTEL(sar_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
2242}; \
2243\
2244GEN_SHIFT(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
2245\
2246static void ShiftU ## a_cBits ## Test(void) \
2247{ \
2248 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2249 { \
2250 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
2251 PFNIEMAIMPLSHIFTU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2252 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2253 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2254 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2255 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2256 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2257 { \
2258 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2259 { \
2260 uint32_t fEfl = paTests[iTest].fEflIn; \
2261 a_Type uDst = paTests[iTest].uDstIn; \
2262 pfn(&uDst, paTests[iTest].uMisc, &fEfl); \
2263 if ( uDst != paTests[iTest].uDstOut \
2264 || fEfl != paTests[iTest].fEflOut ) \
2265 RTTestFailed(g_hTest, "#%u%s: efl=%#08x dst=" a_Fmt " shift=%2u -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s\n", \
2266 iTest, iVar == 0 ? "" : "/n", \
2267 paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uMisc, \
2268 fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
2269 EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
2270 else \
2271 { \
2272 *g_pu ## a_cBits = paTests[iTest].uDstIn; \
2273 *g_pfEfl = paTests[iTest].fEflIn; \
2274 pfn(g_pu ## a_cBits, paTests[iTest].uMisc, g_pfEfl); \
2275 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
2276 RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
2277 } \
2278 } \
2279 pfn = a_aSubTests[iFn].pfnNative; \
2280 } \
2281 } \
2282}
2283TEST_SHIFT(8, uint8_t, "%#04RX8", BINU8_TEST_T, INT_BINARY_U8_T, g_aShiftU8)
2284TEST_SHIFT(16, uint16_t, "%#06RX16", BINU16_TEST_T, INT_BINARY_U16_T, g_aShiftU16)
2285TEST_SHIFT(32, uint32_t, "%#010RX32", BINU32_TEST_T, INT_BINARY_U32_T, g_aShiftU32)
2286TEST_SHIFT(64, uint64_t, "%#018RX64", BINU64_TEST_T, INT_BINARY_U64_T, g_aShiftU64)
2287
2288#ifdef TSTIEMAIMPL_WITH_GENERATOR
2289static void ShiftGenerate(PRTSTREAM pOut, uint32_t cTests)
2290{
2291 ShiftU8Generate(pOut, cTests);
2292 ShiftU16Generate(pOut, cTests);
2293 ShiftU32Generate(pOut, cTests);
2294 ShiftU64Generate(pOut, cTests);
2295}
2296#endif
2297
2298static void ShiftTest(void)
2299{
2300 ShiftU8Test();
2301 ShiftU16Test();
2302 ShiftU32Test();
2303 ShiftU64Test();
2304}
2305
2306
2307/*
2308 * Multiplication and division.
2309 *
2310 * Note! The 8-bit functions has a different format, so we need to duplicate things.
2311 * Note! Currently ignoring undefined bits.
2312 */
2313
2314/* U8 */
2315TYPEDEF_SUBTEST_TYPE(INT_MULDIV_U8_T, MULDIVU8_TEST_T, PFNIEMAIMPLMULDIVU8);
2316static INT_MULDIV_U8_T const g_aMulDivU8[] =
2317{
2318 ENTRY_AMD_EX(mul_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF,
2319 X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF),
2320 ENTRY_INTEL_EX(mul_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0),
2321 ENTRY_AMD_EX(imul_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF,
2322 X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF),
2323 ENTRY_INTEL_EX(imul_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0),
2324 ENTRY_AMD_EX(div_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0),
2325 ENTRY_INTEL_EX(div_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0),
2326 ENTRY_AMD_EX(idiv_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0),
2327 ENTRY_INTEL_EX(idiv_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0),
2328};
2329
2330#ifdef TSTIEMAIMPL_WITH_GENERATOR
2331static void MulDivU8Generate(PRTSTREAM pOut, uint32_t cTests)
2332{
2333 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aMulDivU8); iFn++)
2334 {
2335 if ( g_aMulDivU8[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE
2336 && g_aMulDivU8[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
2337 continue;
2338 GenerateArrayStart(pOut, g_aMulDivU8[iFn].pszName, "MULDIVU8_TEST_T"); \
2339 for (uint32_t iTest = 0; iTest < cTests; iTest++ )
2340 {
2341 MULDIVU8_TEST_T Test;
2342 Test.fEflIn = RandEFlags();
2343 Test.fEflOut = Test.fEflIn;
2344 Test.uDstIn = RandU16Dst(iTest);
2345 Test.uDstOut = Test.uDstIn;
2346 Test.uSrcIn = RandU8Src(iTest);
2347 Test.rc = g_aMulDivU8[iFn].pfnNative(&Test.uDstOut, Test.uSrcIn, &Test.fEflOut);
2348 RTStrmPrintf(pOut, " { %#08x, %#08x, %#06RX16, %#06RX16, %#04RX8, %d }, /* #%u */\n",
2349 Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uSrcIn, Test.rc, iTest);
2350 }
2351 GenerateArrayEnd(pOut, g_aMulDivU8[iFn].pszName);
2352 }
2353}
2354#endif
2355
2356static void MulDivU8Test(void)
2357{
2358 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aMulDivU8); iFn++)
2359 {
2360 if (!SubTestAndCheckIfEnabled(g_aMulDivU8[iFn].pszName)) continue; \
2361 MULDIVU8_TEST_T const * const paTests = g_aMulDivU8[iFn].paTests;
2362 uint32_t const cTests = *g_aMulDivU8[iFn].pcTests;
2363 uint32_t const fEflIgn = g_aMulDivU8[iFn].uExtra;
2364 PFNIEMAIMPLMULDIVU8 pfn = g_aMulDivU8[iFn].pfn;
2365 uint32_t const cVars = COUNT_VARIATIONS(g_aMulDivU8[iFn]); \
2366 if (!cTests) RTTestSkipped(g_hTest, "no tests");
2367 for (uint32_t iVar = 0; iVar < cVars; iVar++)
2368 {
2369 for (uint32_t iTest = 0; iTest < cTests; iTest++ )
2370 {
2371 uint32_t fEfl = paTests[iTest].fEflIn;
2372 uint16_t uDst = paTests[iTest].uDstIn;
2373 int rc = g_aMulDivU8[iFn].pfn(&uDst, paTests[iTest].uSrcIn, &fEfl);
2374 if ( uDst != paTests[iTest].uDstOut
2375 || (fEfl | fEflIgn) != (paTests[iTest].fEflOut | fEflIgn)
2376 || rc != paTests[iTest].rc)
2377 RTTestFailed(g_hTest, "#%02u%s: efl=%#08x dst=%#06RX16 src=%#04RX8\n"
2378 " %s-> efl=%#08x dst=%#06RX16 rc=%d\n"
2379 "%sexpected %#08x %#06RX16 %d%s\n",
2380 iTest, iVar ? "/n" : "", paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uSrcIn,
2381 iVar ? " " : "", fEfl, uDst, rc,
2382 iVar ? " " : "", paTests[iTest].fEflOut, paTests[iTest].uDstOut, paTests[iTest].rc,
2383 EFlagsDiff(fEfl | fEflIgn, paTests[iTest].fEflOut | fEflIgn));
2384 else
2385 {
2386 *g_pu16 = paTests[iTest].uDstIn;
2387 *g_pfEfl = paTests[iTest].fEflIn;
2388 rc = g_aMulDivU8[iFn].pfn(g_pu16, paTests[iTest].uSrcIn, g_pfEfl);
2389 RTTEST_CHECK(g_hTest, *g_pu16 == paTests[iTest].uDstOut);
2390 RTTEST_CHECK(g_hTest, (*g_pfEfl | fEflIgn) == (paTests[iTest].fEflOut | fEflIgn));
2391 RTTEST_CHECK(g_hTest, rc == paTests[iTest].rc);
2392 }
2393 }
2394 pfn = g_aMulDivU8[iFn].pfnNative;
2395 }
2396 }
2397}
2398
2399#ifdef TSTIEMAIMPL_WITH_GENERATOR
2400# define GEN_MULDIV(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
2401void MulDivU ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
2402{ \
2403 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2404 { \
2405 if ( a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE \
2406 && a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
2407 continue; \
2408 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
2409 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2410 { \
2411 a_TestType Test; \
2412 Test.fEflIn = RandEFlags(); \
2413 Test.fEflOut = Test.fEflIn; \
2414 Test.uDst1In = RandU ## a_cBits ## Dst(iTest); \
2415 Test.uDst1Out = Test.uDst1In; \
2416 Test.uDst2In = RandU ## a_cBits ## Dst(iTest); \
2417 Test.uDst2Out = Test.uDst2In; \
2418 Test.uSrcIn = RandU ## a_cBits ## Src(iTest); \
2419 Test.rc = a_aSubTests[iFn].pfnNative(&Test.uDst1Out, &Test.uDst2Out, Test.uSrcIn, &Test.fEflOut); \
2420 RTStrmPrintf(pOut, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", " a_Fmt ", " a_Fmt ", " a_Fmt ", %d }, /* #%u */\n", \
2421 Test.fEflIn, Test.fEflOut, Test.uDst1In, Test.uDst1Out, Test.uDst2In, Test.uDst2Out, Test.uSrcIn, \
2422 Test.rc, iTest); \
2423 } \
2424 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
2425 } \
2426}
2427#else
2428# define GEN_MULDIV(a_cBits, a_Fmt, a_TestType, a_aSubTests)
2429#endif
2430
2431#define TEST_MULDIV(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType, a_aSubTests) \
2432TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLMULDIVU ## a_cBits); \
2433static a_SubTestType const a_aSubTests [] = \
2434{ \
2435 ENTRY_AMD_EX(mul_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0), \
2436 ENTRY_INTEL_EX(mul_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0), \
2437 ENTRY_AMD_EX(imul_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0), \
2438 ENTRY_INTEL_EX(imul_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0), \
2439 ENTRY_AMD_EX(div_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0), \
2440 ENTRY_INTEL_EX(div_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0), \
2441 ENTRY_AMD_EX(idiv_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0), \
2442 ENTRY_INTEL_EX(idiv_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0), \
2443}; \
2444\
2445GEN_MULDIV(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
2446\
2447static void MulDivU ## a_cBits ## Test(void) \
2448{ \
2449 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2450 { \
2451 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
2452 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2453 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2454 uint32_t const fEflIgn = a_aSubTests[iFn].uExtra; \
2455 PFNIEMAIMPLMULDIVU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2456 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2457 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2458 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2459 { \
2460 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2461 { \
2462 uint32_t fEfl = paTests[iTest].fEflIn; \
2463 a_Type uDst1 = paTests[iTest].uDst1In; \
2464 a_Type uDst2 = paTests[iTest].uDst2In; \
2465 int rc = pfn(&uDst1, &uDst2, paTests[iTest].uSrcIn, &fEfl); \
2466 if ( uDst1 != paTests[iTest].uDst1Out \
2467 || uDst2 != paTests[iTest].uDst2Out \
2468 || (fEfl | fEflIgn) != (paTests[iTest].fEflOut | fEflIgn)\
2469 || rc != paTests[iTest].rc) \
2470 RTTestFailed(g_hTest, "#%02u%s: efl=%#08x dst1=" a_Fmt " dst2=" a_Fmt " src=" a_Fmt "\n" \
2471 " -> efl=%#08x dst1=" a_Fmt " dst2=" a_Fmt " rc=%d\n" \
2472 "expected %#08x " a_Fmt " " a_Fmt " %d%s -%s%s%s\n", \
2473 iTest, iVar == 0 ? "" : "/n", \
2474 paTests[iTest].fEflIn, paTests[iTest].uDst1In, paTests[iTest].uDst2In, paTests[iTest].uSrcIn, \
2475 fEfl, uDst1, uDst2, rc, \
2476 paTests[iTest].fEflOut, paTests[iTest].uDst1Out, paTests[iTest].uDst2Out, paTests[iTest].rc, \
2477 EFlagsDiff(fEfl | fEflIgn, paTests[iTest].fEflOut | fEflIgn), \
2478 uDst1 != paTests[iTest].uDst1Out ? " dst1" : "", uDst2 != paTests[iTest].uDst2Out ? " dst2" : "", \
2479 (fEfl | fEflIgn) != (paTests[iTest].fEflOut | fEflIgn) ? " eflags" : ""); \
2480 else \
2481 { \
2482 *g_pu ## a_cBits = paTests[iTest].uDst1In; \
2483 *g_pu ## a_cBits ## Two = paTests[iTest].uDst2In; \
2484 *g_pfEfl = paTests[iTest].fEflIn; \
2485 rc = pfn(g_pu ## a_cBits, g_pu ## a_cBits ## Two, paTests[iTest].uSrcIn, g_pfEfl); \
2486 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDst1Out); \
2487 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits ## Two == paTests[iTest].uDst2Out); \
2488 RTTEST_CHECK(g_hTest, (*g_pfEfl | fEflIgn) == (paTests[iTest].fEflOut | fEflIgn)); \
2489 RTTEST_CHECK(g_hTest, rc == paTests[iTest].rc); \
2490 } \
2491 } \
2492 pfn = a_aSubTests[iFn].pfnNative; \
2493 } \
2494 } \
2495}
2496TEST_MULDIV(16, uint16_t, "%#06RX16", MULDIVU16_TEST_T, INT_MULDIV_U16_T, g_aMulDivU16)
2497TEST_MULDIV(32, uint32_t, "%#010RX32", MULDIVU32_TEST_T, INT_MULDIV_U32_T, g_aMulDivU32)
2498TEST_MULDIV(64, uint64_t, "%#018RX64", MULDIVU64_TEST_T, INT_MULDIV_U64_T, g_aMulDivU64)
2499
2500#ifdef TSTIEMAIMPL_WITH_GENERATOR
2501static void MulDivGenerate(PRTSTREAM pOut, uint32_t cTests)
2502{
2503 MulDivU8Generate(pOut, cTests);
2504 MulDivU16Generate(pOut, cTests);
2505 MulDivU32Generate(pOut, cTests);
2506 MulDivU64Generate(pOut, cTests);
2507}
2508#endif
2509
2510static void MulDivTest(void)
2511{
2512 MulDivU8Test();
2513 MulDivU16Test();
2514 MulDivU32Test();
2515 MulDivU64Test();
2516}
2517
2518
2519/*
2520 * BSWAP
2521 */
2522static void BswapTest(void)
2523{
2524 if (SubTestAndCheckIfEnabled("bswap_u16"))
2525 {
2526 *g_pu32 = UINT32_C(0x12345678);
2527 iemAImpl_bswap_u16(g_pu32);
2528#if 0
2529 RTTEST_CHECK_MSG(g_hTest, *g_pu32 == UINT32_C(0x12347856), (g_hTest, "*g_pu32=%#RX32\n", *g_pu32));
2530#else
2531 RTTEST_CHECK_MSG(g_hTest, *g_pu32 == UINT32_C(0x12340000), (g_hTest, "*g_pu32=%#RX32\n", *g_pu32));
2532#endif
2533 *g_pu32 = UINT32_C(0xffff1122);
2534 iemAImpl_bswap_u16(g_pu32);
2535#if 0
2536 RTTEST_CHECK_MSG(g_hTest, *g_pu32 == UINT32_C(0xffff2211), (g_hTest, "*g_pu32=%#RX32\n", *g_pu32));
2537#else
2538 RTTEST_CHECK_MSG(g_hTest, *g_pu32 == UINT32_C(0xffff0000), (g_hTest, "*g_pu32=%#RX32\n", *g_pu32));
2539#endif
2540 }
2541
2542 if (SubTestAndCheckIfEnabled("bswap_u32"))
2543 {
2544 *g_pu32 = UINT32_C(0x12345678);
2545 iemAImpl_bswap_u32(g_pu32);
2546 RTTEST_CHECK(g_hTest, *g_pu32 == UINT32_C(0x78563412));
2547 }
2548
2549 if (SubTestAndCheckIfEnabled("bswap_u64"))
2550 {
2551 *g_pu64 = UINT64_C(0x0123456789abcdef);
2552 iemAImpl_bswap_u64(g_pu64);
2553 RTTEST_CHECK(g_hTest, *g_pu64 == UINT64_C(0xefcdab8967452301));
2554 }
2555}
2556
2557
2558
2559/*********************************************************************************************************************************
2560* Floating point (x87 style) *
2561*********************************************************************************************************************************/
2562
2563/*
2564 * FPU constant loading.
2565 */
2566TYPEDEF_SUBTEST_TYPE(FPU_LD_CONST_T, FPU_LD_CONST_TEST_T, PFNIEMAIMPLFPUR80LDCONST);
2567
2568static const FPU_LD_CONST_T g_aFpuLdConst[] =
2569{
2570 ENTRY(fld1),
2571 ENTRY(fldl2t),
2572 ENTRY(fldl2e),
2573 ENTRY(fldpi),
2574 ENTRY(fldlg2),
2575 ENTRY(fldln2),
2576 ENTRY(fldz),
2577};
2578
2579#ifdef TSTIEMAIMPL_WITH_GENERATOR
2580static void FpuLdConstGenerate(PRTSTREAM pOut, uint32_t cTests)
2581{
2582 X86FXSTATE State;
2583 RT_ZERO(State);
2584 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdConst); iFn++)
2585 {
2586 GenerateArrayStart(pOut, g_aFpuLdConst[iFn].pszName, "FPU_LD_CONST_TEST_T");
2587 for (uint32_t iTest = 0; iTest < cTests; iTest += 4)
2588 {
2589 State.FCW = RandFcw();
2590 State.FSW = RandFsw();
2591
2592 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
2593 {
2594 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
2595 State.FCW = (State.FCW & ~X86_FCW_RC_MASK) | (iRounding << X86_FCW_RC_SHIFT);
2596 g_aFpuLdConst[iFn].pfn(&State, &Res);
2597 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s }, /* #%u */\n",
2598 State.FCW, State.FSW, Res.FSW, GenFormatR80(&Res.r80Result), iTest + iRounding);
2599 }
2600 }
2601 GenerateArrayEnd(pOut, g_aFpuLdConst[iFn].pszName);
2602 }
2603}
2604#endif
2605
2606static void FpuLoadConstTest(void)
2607{
2608 /*
2609 * Inputs:
2610 * - FSW: C0, C1, C2, C3
2611 * - FCW: Exception masks, Precision control, Rounding control.
2612 *
2613 * C1 set to 1 on stack overflow, zero otherwise. C0, C2, and C3 are "undefined".
2614 */
2615 X86FXSTATE State;
2616 RT_ZERO(State);
2617 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdConst); iFn++)
2618 {
2619 if (!SubTestAndCheckIfEnabled(g_aFpuLdConst[iFn].pszName))
2620 continue;
2621
2622 uint32_t const cTests = *g_aFpuLdConst[iFn].pcTests;
2623 FPU_LD_CONST_TEST_T const *paTests = g_aFpuLdConst[iFn].paTests;
2624 PFNIEMAIMPLFPUR80LDCONST pfn = g_aFpuLdConst[iFn].pfn;
2625 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuLdConst[iFn]); \
2626 if (!cTests) RTTestSkipped(g_hTest, "no tests");
2627 for (uint32_t iVar = 0; iVar < cVars; iVar++)
2628 {
2629 for (uint32_t iTest = 0; iTest < cTests; iTest++)
2630 {
2631 State.FCW = paTests[iTest].fFcw;
2632 State.FSW = paTests[iTest].fFswIn;
2633 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
2634 pfn(&State, &Res);
2635 if ( Res.FSW != paTests[iTest].fFswOut
2636 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult))
2637 RTTestFailed(g_hTest, "#%u%s: fcw=%#06x fsw=%#06x -> fsw=%#06x %s, expected %#06x %s%s%s (%s)\n",
2638 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
2639 Res.FSW, FormatR80(&Res.r80Result),
2640 paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult),
2641 FswDiff(Res.FSW, paTests[iTest].fFswOut),
2642 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "",
2643 FormatFcw(paTests[iTest].fFcw) );
2644 }
2645 pfn = g_aFpuLdConst[iFn].pfnNative;
2646 }
2647 }
2648}
2649
2650
2651/*
2652 * Load floating point values from memory.
2653 */
2654#ifdef TSTIEMAIMPL_WITH_GENERATOR
2655# define GEN_FPU_LOAD(a_cBits, a_rdTypeIn, a_aSubTests, a_TestType) \
2656static void FpuLdR ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
2657{ \
2658 X86FXSTATE State; \
2659 RT_ZERO(State); \
2660 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2661 { \
2662 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
2663 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2664 { \
2665 State.FCW = RandFcw(); \
2666 State.FSW = RandFsw(); \
2667 a_rdTypeIn InVal = RandR ## a_cBits ## Src(iTest); \
2668 \
2669 for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
2670 { \
2671 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
2672 State.FCW = (State.FCW & ~X86_FCW_RC_MASK) | (iRounding << X86_FCW_RC_SHIFT); \
2673 a_aSubTests[iFn].pfn(&State, &Res, &InVal); \
2674 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u */\n", \
2675 State.FCW, State.FSW, Res.FSW, GenFormatR80(&Res.r80Result), \
2676 GenFormatR ## a_cBits(&InVal), iTest, iRounding); \
2677 } \
2678 } \
2679 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
2680 } \
2681}
2682#else
2683# define GEN_FPU_LOAD(a_cBits, a_rdTypeIn, a_aSubTests, a_TestType)
2684#endif
2685
2686#define TEST_FPU_LOAD(a_cBits, a_rdTypeIn, a_SubTestType, a_aSubTests, a_TestType) \
2687typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPULDR80FROM ## a_cBits,(PCX86FXSTATE, PIEMFPURESULT, PC ## a_rdTypeIn)); \
2688typedef FNIEMAIMPLFPULDR80FROM ## a_cBits *PFNIEMAIMPLFPULDR80FROM ## a_cBits; \
2689TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPULDR80FROM ## a_cBits); \
2690\
2691static const a_SubTestType a_aSubTests[] = \
2692{ \
2693 ENTRY(RT_CONCAT(fld_r80_from_r,a_cBits)) \
2694}; \
2695GEN_FPU_LOAD(a_cBits, a_rdTypeIn, a_aSubTests, a_TestType) \
2696\
2697static void FpuLdR ## a_cBits ## Test(void) \
2698{ \
2699 X86FXSTATE State; \
2700 RT_ZERO(State); \
2701 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2702 { \
2703 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
2704 \
2705 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2706 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2707 PFNIEMAIMPLFPULDR80FROM ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2708 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2709 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2710 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2711 { \
2712 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2713 { \
2714 a_rdTypeIn const InVal = paTests[iTest].InVal; \
2715 State.FCW = paTests[iTest].fFcw; \
2716 State.FSW = paTests[iTest].fFswIn; \
2717 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
2718 pfn(&State, &Res, &InVal); \
2719 if ( Res.FSW != paTests[iTest].fFswOut \
2720 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult)) \
2721 RTTestFailed(g_hTest, "#%03u%s: fcw=%#06x fsw=%#06x in=%s\n" \
2722 "%s -> fsw=%#06x %s\n" \
2723 "%s expected %#06x %s%s%s (%s)\n", \
2724 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
2725 FormatR ## a_cBits(&paTests[iTest].InVal), \
2726 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result), \
2727 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult), \
2728 FswDiff(Res.FSW, paTests[iTest].fFswOut), \
2729 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "", \
2730 FormatFcw(paTests[iTest].fFcw) ); \
2731 } \
2732 pfn = a_aSubTests[iFn].pfnNative; \
2733 } \
2734 } \
2735}
2736
2737TEST_FPU_LOAD(80, RTFLOAT80U, FPU_LD_R80_T, g_aFpuLdR80, FPU_R80_IN_TEST_T)
2738TEST_FPU_LOAD(64, RTFLOAT64U, FPU_LD_R64_T, g_aFpuLdR64, FPU_R64_IN_TEST_T)
2739TEST_FPU_LOAD(32, RTFLOAT32U, FPU_LD_R32_T, g_aFpuLdR32, FPU_R32_IN_TEST_T)
2740
2741#ifdef TSTIEMAIMPL_WITH_GENERATOR
2742static void FpuLdMemGenerate(PRTSTREAM pOut, uint32_t cTests)
2743{
2744 FpuLdR80Generate(pOut, cTests);
2745 FpuLdR64Generate(pOut, cTests);
2746 FpuLdR32Generate(pOut, cTests);
2747}
2748#endif
2749
2750static void FpuLdMemTest(void)
2751{
2752 FpuLdR80Test();
2753 FpuLdR64Test();
2754 FpuLdR32Test();
2755}
2756
2757
2758/*
2759 * Load integer values from memory.
2760 */
2761#ifdef TSTIEMAIMPL_WITH_GENERATOR
2762# define GEN_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_aSubTests, a_TestType) \
2763static void FpuLdI ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
2764{ \
2765 X86FXSTATE State; \
2766 RT_ZERO(State); \
2767 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2768 { \
2769 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
2770 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2771 { \
2772 State.FCW = RandFcw(); \
2773 State.FSW = RandFsw(); \
2774 a_iTypeIn InVal = (a_iTypeIn)RandU ## a_cBits ## Src(iTest); \
2775 \
2776 for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
2777 { \
2778 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
2779 State.FCW = (State.FCW & ~X86_FCW_RC_MASK) | (iRounding << X86_FCW_RC_SHIFT); \
2780 a_aSubTests[iFn].pfn(&State, &Res, &InVal); \
2781 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, " a_szFmtIn " }, /* #%u/%u */\n", \
2782 State.FCW, State.FSW, Res.FSW, GenFormatR80(&Res.r80Result), InVal, iTest, iRounding); \
2783 } \
2784 } \
2785 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
2786 } \
2787}
2788#else
2789# define GEN_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_aSubTests, a_TestType)
2790#endif
2791
2792#define TEST_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_SubTestType, a_aSubTests, a_TestType) \
2793typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPULDR80FROMI ## a_cBits,(PCX86FXSTATE, PIEMFPURESULT, a_iTypeIn const *)); \
2794typedef FNIEMAIMPLFPULDR80FROMI ## a_cBits *PFNIEMAIMPLFPULDR80FROMI ## a_cBits; \
2795TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPULDR80FROMI ## a_cBits); \
2796\
2797static const a_SubTestType a_aSubTests[] = \
2798{ \
2799 ENTRY(RT_CONCAT(fild_r80_from_i,a_cBits)) \
2800}; \
2801GEN_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_aSubTests, a_TestType) \
2802\
2803static void FpuLdI ## a_cBits ## Test(void) \
2804{ \
2805 X86FXSTATE State; \
2806 RT_ZERO(State); \
2807 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2808 { \
2809 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
2810 \
2811 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2812 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2813 PFNIEMAIMPLFPULDR80FROMI ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2814 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2815 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2816 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2817 { \
2818 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2819 { \
2820 a_iTypeIn const iInVal = paTests[iTest].iInVal; \
2821 State.FCW = paTests[iTest].fFcw; \
2822 State.FSW = paTests[iTest].fFswIn; \
2823 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
2824 pfn(&State, &Res, &iInVal); \
2825 if ( Res.FSW != paTests[iTest].fFswOut \
2826 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult)) \
2827 RTTestFailed(g_hTest, "#%03u%s: fcw=%#06x fsw=%#06x in=" a_szFmtIn "\n" \
2828 "%s -> fsw=%#06x %s\n" \
2829 "%s expected %#06x %s%s%s (%s)\n", \
2830 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, paTests[iTest].iInVal, \
2831 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result), \
2832 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult), \
2833 FswDiff(Res.FSW, paTests[iTest].fFswOut), \
2834 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "", \
2835 FormatFcw(paTests[iTest].fFcw) ); \
2836 } \
2837 pfn = a_aSubTests[iFn].pfnNative; \
2838 } \
2839 } \
2840}
2841
2842TEST_FPU_LOAD_INT(64, int64_t, "%RI64", FPU_LD_I64_T, g_aFpuLdU64, FPU_I64_IN_TEST_T)
2843TEST_FPU_LOAD_INT(32, int32_t, "%RI32", FPU_LD_I32_T, g_aFpuLdU32, FPU_I32_IN_TEST_T)
2844TEST_FPU_LOAD_INT(16, int16_t, "%RI16", FPU_LD_I16_T, g_aFpuLdU16, FPU_I16_IN_TEST_T)
2845
2846#ifdef TSTIEMAIMPL_WITH_GENERATOR
2847static void FpuLdIntGenerate(PRTSTREAM pOut, uint32_t cTests)
2848{
2849 FpuLdI64Generate(pOut, cTests);
2850 FpuLdI32Generate(pOut, cTests);
2851 FpuLdI16Generate(pOut, cTests);
2852}
2853#endif
2854
2855static void FpuLdIntTest(void)
2856{
2857 FpuLdI64Test();
2858 FpuLdI32Test();
2859 FpuLdI16Test();
2860}
2861
2862
2863/*
2864 * Load binary coded decimal values from memory.
2865 */
2866typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPULDR80FROMD80,(PCX86FXSTATE, PIEMFPURESULT, PCRTPBCD80U));
2867typedef FNIEMAIMPLFPULDR80FROMD80 *PFNIEMAIMPLFPULDR80FROMD80;
2868TYPEDEF_SUBTEST_TYPE(FPU_LD_D80_T, FPU_D80_IN_TEST_T, PFNIEMAIMPLFPULDR80FROMD80);
2869
2870static const FPU_LD_D80_T g_aFpuLdD80[] =
2871{
2872 ENTRY(fld_r80_from_d80)
2873};
2874
2875#ifdef TSTIEMAIMPL_WITH_GENERATOR
2876static void FpuLdD80Generate(PRTSTREAM pOut, uint32_t cTests)
2877{
2878 X86FXSTATE State;
2879 RT_ZERO(State);
2880 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdD80); iFn++)
2881 {
2882 GenerateArrayStart(pOut, g_aFpuLdD80[iFn].pszName, "FPU_D80_IN_TEST_T");
2883 for (uint32_t iTest = 0; iTest < cTests; iTest++)
2884 {
2885 State.FCW = RandFcw();
2886 State.FSW = RandFsw();
2887 RTPBCD80U InVal = RandD80Src(iTest);
2888
2889 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
2890 {
2891 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
2892 State.FCW = (State.FCW & ~X86_FCW_RC_MASK) | (iRounding << X86_FCW_RC_SHIFT);
2893 g_aFpuLdD80[iFn].pfn(&State, &Res, &InVal);
2894 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u */\n",
2895 State.FCW, State.FSW, Res.FSW, GenFormatR80(&Res.r80Result), GenFormatD80(&InVal),
2896 iTest, iRounding);
2897 }
2898 }
2899 GenerateArrayEnd(pOut, g_aFpuLdD80[iFn].pszName);
2900 }
2901}
2902#endif
2903
2904static void FpuLdD80Test(void)
2905{
2906 X86FXSTATE State;
2907 RT_ZERO(State);
2908 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdD80); iFn++)
2909 {
2910 if (!SubTestAndCheckIfEnabled(g_aFpuLdD80[iFn].pszName))
2911 continue;
2912
2913 uint32_t const cTests = *g_aFpuLdD80[iFn].pcTests;
2914 FPU_D80_IN_TEST_T const * const paTests = g_aFpuLdD80[iFn].paTests;
2915 PFNIEMAIMPLFPULDR80FROMD80 pfn = g_aFpuLdD80[iFn].pfn;
2916 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuLdD80[iFn]);
2917 if (!cTests) RTTestSkipped(g_hTest, "no tests");
2918 for (uint32_t iVar = 0; iVar < cVars; iVar++)
2919 {
2920 for (uint32_t iTest = 0; iTest < cTests; iTest++)
2921 {
2922 RTPBCD80U const InVal = paTests[iTest].InVal;
2923 State.FCW = paTests[iTest].fFcw;
2924 State.FSW = paTests[iTest].fFswIn;
2925 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
2926 pfn(&State, &Res, &InVal);
2927 if ( Res.FSW != paTests[iTest].fFswOut
2928 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult))
2929 RTTestFailed(g_hTest, "#%03u%s: fcw=%#06x fsw=%#06x in=%s\n"
2930 "%s -> fsw=%#06x %s\n"
2931 "%s expected %#06x %s%s%s (%s)\n",
2932 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
2933 FormatD80(&paTests[iTest].InVal),
2934 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result),
2935 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult),
2936 FswDiff(Res.FSW, paTests[iTest].fFswOut),
2937 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "",
2938 FormatFcw(paTests[iTest].fFcw) );
2939 }
2940 pfn = g_aFpuLdD80[iFn].pfnNative;
2941 }
2942 }
2943}
2944
2945
2946/*
2947 * Store values floating point values to memory.
2948 */
2949#ifdef TSTIEMAIMPL_WITH_GENERATOR
2950static const RTFLOAT80U g_aFpuStR32Specials[] =
2951{
2952 RTFLOAT80U_INIT_C(0, 0xffffff8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
2953 RTFLOAT80U_INIT_C(1, 0xffffff8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
2954 RTFLOAT80U_INIT_C(0, 0xfffffe8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding */
2955 RTFLOAT80U_INIT_C(1, 0xfffffe8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding */
2956};
2957static const RTFLOAT80U g_aFpuStR64Specials[] =
2958{
2959 RTFLOAT80U_INIT_C(0, 0xfffffffffffffc00, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
2960 RTFLOAT80U_INIT_C(1, 0xfffffffffffffc00, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
2961 RTFLOAT80U_INIT_C(0, 0xfffffffffffff400, RTFLOAT80U_EXP_BIAS), /* near rounding */
2962 RTFLOAT80U_INIT_C(1, 0xfffffffffffff400, RTFLOAT80U_EXP_BIAS), /* near rounding */
2963 RTFLOAT80U_INIT_C(0, 0xd0b9e6fdda887400, 687 + RTFLOAT80U_EXP_BIAS), /* random example for this */
2964};
2965static const RTFLOAT80U g_aFpuStR80Specials[] =
2966{
2967 RTFLOAT80U_INIT_C(0, 0x8000000000000000, RTFLOAT80U_EXP_BIAS), /* placeholder */
2968};
2969# define GEN_FPU_STORE(a_cBits, a_rdType, a_aSubTests, a_TestType) \
2970static void FpuStR ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
2971{ \
2972 uint32_t const cTotalTests = cTests + RT_ELEMENTS(g_aFpuStR ## a_cBits ## Specials); \
2973 X86FXSTATE State; \
2974 RT_ZERO(State); \
2975 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2976 { \
2977 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
2978 for (uint32_t iTest = 0; iTest < cTotalTests; iTest++) \
2979 { \
2980 uint16_t const fFcw = RandFcw(); \
2981 State.FSW = RandFsw(); \
2982 RTFLOAT80U const InVal = iTest < cTests ? RandR80Src(iTest, a_cBits) \
2983 : g_aFpuStR ## a_cBits ## Specials[iTest - cTests]; \
2984 \
2985 for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
2986 { \
2987 /* PC doesn't influence these, so leave as is. */ \
2988 AssertCompile(X86_FCW_OM_BIT + 1 == X86_FCW_UM_BIT && X86_FCW_UM_BIT + 1 == X86_FCW_PM_BIT); \
2989 for (uint16_t iMask = 0; iMask < 16; iMask += 2 /*1*/) \
2990 { \
2991 uint16_t uFswOut = 0; \
2992 a_rdType OutVal; \
2993 RT_ZERO(OutVal); \
2994 memset(&OutVal, 0xfe, sizeof(OutVal)); \
2995 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_OM | X86_FCW_UM | X86_FCW_PM)) \
2996 | (iRounding << X86_FCW_RC_SHIFT); \
2997 /*if (iMask & 1) State.FCW ^= X86_FCW_MASK_ALL;*/ \
2998 State.FCW |= (iMask >> 1) << X86_FCW_OM_BIT; \
2999 a_aSubTests[iFn].pfn(&State, &uFswOut, &OutVal, &InVal); \
3000 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u */\n", \
3001 State.FCW, State.FSW, uFswOut, GenFormatR80(&InVal), \
3002 GenFormatR ## a_cBits(&OutVal), iTest, iRounding, iMask); \
3003 } \
3004 } \
3005 } \
3006 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
3007 } \
3008}
3009#else
3010# define GEN_FPU_STORE(a_cBits, a_rdType, a_aSubTests, a_TestType)
3011#endif
3012
3013#define TEST_FPU_STORE(a_cBits, a_rdType, a_SubTestType, a_aSubTests, a_TestType) \
3014typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPUSTR80TOR ## a_cBits,(PCX86FXSTATE, uint16_t *, \
3015 PRTFLOAT ## a_cBits ## U, PCRTFLOAT80U)); \
3016typedef FNIEMAIMPLFPUSTR80TOR ## a_cBits *PFNIEMAIMPLFPUSTR80TOR ## a_cBits; \
3017TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPUSTR80TOR ## a_cBits); \
3018\
3019static const a_SubTestType a_aSubTests[] = \
3020{ \
3021 ENTRY(RT_CONCAT(fst_r80_to_r,a_cBits)) \
3022}; \
3023GEN_FPU_STORE(a_cBits, a_rdType, a_aSubTests, a_TestType) \
3024\
3025static void FpuStR ## a_cBits ## Test(void) \
3026{ \
3027 X86FXSTATE State; \
3028 RT_ZERO(State); \
3029 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3030 { \
3031 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
3032 \
3033 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
3034 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
3035 PFNIEMAIMPLFPUSTR80TOR ## a_cBits pfn = a_aSubTests[iFn].pfn; \
3036 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
3037 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
3038 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
3039 { \
3040 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
3041 { \
3042 RTFLOAT80U const InVal = paTests[iTest].InVal; \
3043 uint16_t uFswOut = 0; \
3044 a_rdType OutVal; \
3045 RT_ZERO(OutVal); \
3046 memset(&OutVal, 0xfe, sizeof(OutVal)); \
3047 State.FCW = paTests[iTest].fFcw; \
3048 State.FSW = paTests[iTest].fFswIn; \
3049 pfn(&State, &uFswOut, &OutVal, &InVal); \
3050 if ( uFswOut != paTests[iTest].fFswOut \
3051 || !RTFLOAT ## a_cBits ## U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal)) \
3052 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n" \
3053 "%s -> fsw=%#06x %s\n" \
3054 "%s expected %#06x %s%s%s (%s)\n", \
3055 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
3056 FormatR80(&paTests[iTest].InVal), \
3057 iVar ? " " : "", uFswOut, FormatR ## a_cBits(&OutVal), \
3058 iVar ? " " : "", paTests[iTest].fFswOut, FormatR ## a_cBits(&paTests[iTest].OutVal), \
3059 FswDiff(uFswOut, paTests[iTest].fFswOut), \
3060 !RTFLOAT ## a_cBits ## U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal) ? " - val" : "", \
3061 FormatFcw(paTests[iTest].fFcw) ); \
3062 } \
3063 pfn = a_aSubTests[iFn].pfnNative; \
3064 } \
3065 } \
3066}
3067
3068TEST_FPU_STORE(80, RTFLOAT80U, FPU_ST_R80_T, g_aFpuStR80, FPU_ST_R80_TEST_T)
3069TEST_FPU_STORE(64, RTFLOAT64U, FPU_ST_R64_T, g_aFpuStR64, FPU_ST_R64_TEST_T)
3070TEST_FPU_STORE(32, RTFLOAT32U, FPU_ST_R32_T, g_aFpuStR32, FPU_ST_R32_TEST_T)
3071
3072#ifdef TSTIEMAIMPL_WITH_GENERATOR
3073static void FpuStMemGenerate(PRTSTREAM pOut, uint32_t cTests)
3074{
3075 FpuStR80Generate(pOut, cTests);
3076 FpuStR64Generate(pOut, cTests);
3077 FpuStR32Generate(pOut, cTests);
3078}
3079#endif
3080
3081static void FpuStMemTest(void)
3082{
3083 FpuStR80Test();
3084 FpuStR64Test();
3085 FpuStR32Test();
3086}
3087
3088
3089/*
3090 * Store integer values to memory or register.
3091 */
3092TYPEDEF_SUBTEST_TYPE(FPU_ST_I16_T, FPU_ST_I16_TEST_T, PFNIEMAIMPLFPUSTR80TOI16);
3093TYPEDEF_SUBTEST_TYPE(FPU_ST_I32_T, FPU_ST_I32_TEST_T, PFNIEMAIMPLFPUSTR80TOI32);
3094TYPEDEF_SUBTEST_TYPE(FPU_ST_I64_T, FPU_ST_I64_TEST_T, PFNIEMAIMPLFPUSTR80TOI64);
3095
3096static const FPU_ST_I16_T g_aFpuStI16[] =
3097{
3098 ENTRY(fist_r80_to_i16),
3099 ENTRY_AMD( fistt_r80_to_i16, 0),
3100 ENTRY_INTEL(fistt_r80_to_i16, 0),
3101};
3102static const FPU_ST_I32_T g_aFpuStI32[] =
3103{
3104 ENTRY(fist_r80_to_i32),
3105 ENTRY(fistt_r80_to_i32),
3106};
3107static const FPU_ST_I64_T g_aFpuStI64[] =
3108{
3109 ENTRY(fist_r80_to_i64),
3110 ENTRY(fistt_r80_to_i64),
3111};
3112
3113#ifdef TSTIEMAIMPL_WITH_GENERATOR
3114static const RTFLOAT80U g_aFpuStI16Specials[] = /* 16-bit variant borrows properties from the 32-bit one, thus all this stuff. */
3115{
3116 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 13 + RTFLOAT80U_EXP_BIAS),
3117 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 13 + RTFLOAT80U_EXP_BIAS),
3118 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 14 + RTFLOAT80U_EXP_BIAS),
3119 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 14 + RTFLOAT80U_EXP_BIAS),
3120 RTFLOAT80U_INIT_C(0, 0x8000080000000000, 14 + RTFLOAT80U_EXP_BIAS),
3121 RTFLOAT80U_INIT_C(1, 0x8000080000000000, 14 + RTFLOAT80U_EXP_BIAS),
3122 RTFLOAT80U_INIT_C(0, 0x8000100000000000, 14 + RTFLOAT80U_EXP_BIAS),
3123 RTFLOAT80U_INIT_C(1, 0x8000100000000000, 14 + RTFLOAT80U_EXP_BIAS),
3124 RTFLOAT80U_INIT_C(0, 0x8000200000000000, 14 + RTFLOAT80U_EXP_BIAS),
3125 RTFLOAT80U_INIT_C(1, 0x8000200000000000, 14 + RTFLOAT80U_EXP_BIAS),
3126 RTFLOAT80U_INIT_C(0, 0x8000400000000000, 14 + RTFLOAT80U_EXP_BIAS),
3127 RTFLOAT80U_INIT_C(1, 0x8000400000000000, 14 + RTFLOAT80U_EXP_BIAS),
3128 RTFLOAT80U_INIT_C(0, 0x8000800000000000, 14 + RTFLOAT80U_EXP_BIAS),
3129 RTFLOAT80U_INIT_C(1, 0x8000800000000000, 14 + RTFLOAT80U_EXP_BIAS),
3130 RTFLOAT80U_INIT_C(1, 0x8000ffffffffffff, 14 + RTFLOAT80U_EXP_BIAS),
3131 RTFLOAT80U_INIT_C(0, 0x8001000000000000, 14 + RTFLOAT80U_EXP_BIAS),
3132 RTFLOAT80U_INIT_C(1, 0x8001000000000000, 14 + RTFLOAT80U_EXP_BIAS),
3133 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 14 + RTFLOAT80U_EXP_BIAS),
3134 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 14 + RTFLOAT80U_EXP_BIAS),
3135 RTFLOAT80U_INIT_C(0, 0xffff800000000000, 14 + RTFLOAT80U_EXP_BIAS),
3136 RTFLOAT80U_INIT_C(0, 0xffff000000000000, 14 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
3137 RTFLOAT80U_INIT_C(0, 0xfffe000000000000, 14 + RTFLOAT80U_EXP_BIAS),
3138 RTFLOAT80U_INIT_C(1, 0xffff800000000000, 14 + RTFLOAT80U_EXP_BIAS),
3139 RTFLOAT80U_INIT_C(1, 0xffff000000000000, 14 + RTFLOAT80U_EXP_BIAS), /* min */
3140 RTFLOAT80U_INIT_C(1, 0xfffe000000000000, 14 + RTFLOAT80U_EXP_BIAS),
3141 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 15 + RTFLOAT80U_EXP_BIAS),
3142 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 15 + RTFLOAT80U_EXP_BIAS),
3143 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 16 + RTFLOAT80U_EXP_BIAS),
3144 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 17 + RTFLOAT80U_EXP_BIAS),
3145 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 20 + RTFLOAT80U_EXP_BIAS),
3146 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 24 + RTFLOAT80U_EXP_BIAS),
3147 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 28 + RTFLOAT80U_EXP_BIAS),
3148 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
3149 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
3150 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS),
3151 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS),
3152 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
3153 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
3154 RTFLOAT80U_INIT_C(0, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
3155 RTFLOAT80U_INIT_C(1, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
3156 RTFLOAT80U_INIT_C(0, 0x8000ffffffffffff, 31 + RTFLOAT80U_EXP_BIAS),
3157 RTFLOAT80U_INIT_C(1, 0x8000ffffffffffff, 31 + RTFLOAT80U_EXP_BIAS),
3158 RTFLOAT80U_INIT_C(0, 0x8001000000000000, 31 + RTFLOAT80U_EXP_BIAS),
3159 RTFLOAT80U_INIT_C(1, 0x8001000000000000, 31 + RTFLOAT80U_EXP_BIAS),
3160 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
3161 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
3162 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 32 + RTFLOAT80U_EXP_BIAS),
3163};
3164static const RTFLOAT80U g_aFpuStI32Specials[] =
3165{
3166 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
3167 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
3168 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
3169 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS), /* min */
3170 RTFLOAT80U_INIT_C(0, 0xffffffff80000000, 30 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
3171 RTFLOAT80U_INIT_C(1, 0xffffffff80000000, 30 + RTFLOAT80U_EXP_BIAS), /* min */
3172 RTFLOAT80U_INIT_C(0, 0xffffffff00000000, 30 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
3173 RTFLOAT80U_INIT_C(1, 0xffffffff00000000, 30 + RTFLOAT80U_EXP_BIAS), /* min */
3174 RTFLOAT80U_INIT_C(0, 0xfffffffe00000000, 30 + RTFLOAT80U_EXP_BIAS),
3175 RTFLOAT80U_INIT_C(1, 0xfffffffe00000000, 30 + RTFLOAT80U_EXP_BIAS),
3176 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
3177 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
3178 RTFLOAT80U_INIT_C(0, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
3179 RTFLOAT80U_INIT_C(1, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
3180 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
3181 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
3182};
3183static const RTFLOAT80U g_aFpuStI64Specials[] =
3184{
3185 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 61 + RTFLOAT80U_EXP_BIAS),
3186 RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, 61 + RTFLOAT80U_EXP_BIAS),
3187 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 62 + RTFLOAT80U_EXP_BIAS),
3188 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 62 + RTFLOAT80U_EXP_BIAS),
3189 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 62 + RTFLOAT80U_EXP_BIAS),
3190 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 62 + RTFLOAT80U_EXP_BIAS),
3191 RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, 62 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
3192 RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, 62 + RTFLOAT80U_EXP_BIAS), /* min */
3193 RTFLOAT80U_INIT_C(0, 0xfffffffffffffffe, 62 + RTFLOAT80U_EXP_BIAS),
3194 RTFLOAT80U_INIT_C(1, 0xfffffffffffffffe, 62 + RTFLOAT80U_EXP_BIAS),
3195 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 63 + RTFLOAT80U_EXP_BIAS),
3196 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 63 + RTFLOAT80U_EXP_BIAS),
3197 RTFLOAT80U_INIT_C(0, 0x8000000000000001, 63 + RTFLOAT80U_EXP_BIAS),
3198 RTFLOAT80U_INIT_C(1, 0x8000000000000001, 63 + RTFLOAT80U_EXP_BIAS),
3199 RTFLOAT80U_INIT_C(0, 0x8000000000000002, 63 + RTFLOAT80U_EXP_BIAS),
3200 RTFLOAT80U_INIT_C(1, 0x8000000000000002, 63 + RTFLOAT80U_EXP_BIAS),
3201 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 63 + RTFLOAT80U_EXP_BIAS),
3202};
3203
3204# define GEN_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_aSubTests, a_TestType) \
3205static void FpuStI ## a_cBits ## Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests) \
3206{ \
3207 X86FXSTATE State; \
3208 RT_ZERO(State); \
3209 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3210 { \
3211 PFNIEMAIMPLFPUSTR80TOI ## a_cBits const pfn = a_aSubTests[iFn].pfnNative \
3212 ? a_aSubTests[iFn].pfnNative : a_aSubTests[iFn].pfn; \
3213 PRTSTREAM pOutFn = pOut; \
3214 if (a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE) \
3215 { \
3216 if (a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
3217 continue; \
3218 pOutFn = pOutCpu; \
3219 } \
3220 \
3221 GenerateArrayStart(pOutFn, a_aSubTests[iFn].pszName, #a_TestType); \
3222 uint32_t const cTotalTests = cTests + RT_ELEMENTS(g_aFpuStI ## a_cBits ## Specials); \
3223 for (uint32_t iTest = 0; iTest < cTotalTests; iTest++) \
3224 { \
3225 uint16_t const fFcw = RandFcw(); \
3226 State.FSW = RandFsw(); \
3227 RTFLOAT80U const InVal = iTest < cTests ? RandR80Src(iTest, a_cBits, true) \
3228 : g_aFpuStI ## a_cBits ## Specials[iTest - cTests]; \
3229 \
3230 for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
3231 { \
3232 /* PC doesn't influence these, so leave as is. */ \
3233 AssertCompile(X86_FCW_OM_BIT + 1 == X86_FCW_UM_BIT && X86_FCW_UM_BIT + 1 == X86_FCW_PM_BIT); \
3234 for (uint16_t iMask = 0; iMask < 16; iMask += 2 /*1*/) \
3235 { \
3236 uint16_t uFswOut = 0; \
3237 a_iType iOutVal = ~(a_iType)2; \
3238 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_OM | X86_FCW_UM | X86_FCW_PM)) \
3239 | (iRounding << X86_FCW_RC_SHIFT); \
3240 /*if (iMask & 1) State.FCW ^= X86_FCW_MASK_ALL;*/ \
3241 State.FCW |= (iMask >> 1) << X86_FCW_OM_BIT; \
3242 pfn(&State, &uFswOut, &iOutVal, &InVal); \
3243 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u */\n", \
3244 State.FCW, State.FSW, uFswOut, GenFormatR80(&InVal), \
3245 GenFormatI ## a_cBits(iOutVal), iTest, iRounding, iMask); \
3246 } \
3247 } \
3248 } \
3249 GenerateArrayEnd(pOutFn, a_aSubTests[iFn].pszName); \
3250 } \
3251}
3252#else
3253# define GEN_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_aSubTests, a_TestType)
3254#endif
3255
3256#define TEST_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_SubTestType, a_aSubTests, a_TestType) \
3257GEN_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_aSubTests, a_TestType) \
3258\
3259static void FpuStI ## a_cBits ## Test(void) \
3260{ \
3261 X86FXSTATE State; \
3262 RT_ZERO(State); \
3263 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3264 { \
3265 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
3266 \
3267 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
3268 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
3269 PFNIEMAIMPLFPUSTR80TOI ## a_cBits pfn = a_aSubTests[iFn].pfn; \
3270 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
3271 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
3272 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
3273 { \
3274 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
3275 { \
3276 RTFLOAT80U const InVal = paTests[iTest].InVal; \
3277 uint16_t uFswOut = 0; \
3278 a_iType iOutVal = ~(a_iType)2; \
3279 State.FCW = paTests[iTest].fFcw; \
3280 State.FSW = paTests[iTest].fFswIn; \
3281 pfn(&State, &uFswOut, &iOutVal, &InVal); \
3282 if ( uFswOut != paTests[iTest].fFswOut \
3283 || iOutVal != paTests[iTest].iOutVal) \
3284 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n" \
3285 "%s -> fsw=%#06x " a_szFmt "\n" \
3286 "%s expected %#06x " a_szFmt "%s%s (%s)\n", \
3287 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
3288 FormatR80(&paTests[iTest].InVal), \
3289 iVar ? " " : "", uFswOut, iOutVal, \
3290 iVar ? " " : "", paTests[iTest].fFswOut, paTests[iTest].iOutVal, \
3291 FswDiff(uFswOut, paTests[iTest].fFswOut), \
3292 iOutVal != paTests[iTest].iOutVal ? " - val" : "", FormatFcw(paTests[iTest].fFcw) ); \
3293 } \
3294 pfn = a_aSubTests[iFn].pfnNative; \
3295 } \
3296 } \
3297}
3298
3299//fistt_r80_to_i16 diffs for AMD, of course :-)
3300
3301TEST_FPU_STORE_INT(64, int64_t, "%RI64", FPU_ST_I64_T, g_aFpuStI64, FPU_ST_I64_TEST_T)
3302TEST_FPU_STORE_INT(32, int32_t, "%RI32", FPU_ST_I32_T, g_aFpuStI32, FPU_ST_I32_TEST_T)
3303TEST_FPU_STORE_INT(16, int16_t, "%RI16", FPU_ST_I16_T, g_aFpuStI16, FPU_ST_I16_TEST_T)
3304
3305#ifdef TSTIEMAIMPL_WITH_GENERATOR
3306static void FpuStIntGenerate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
3307{
3308 FpuStI64Generate(pOut, pOutCpu, cTests);
3309 FpuStI32Generate(pOut, pOutCpu, cTests);
3310 FpuStI16Generate(pOut, pOutCpu, cTests);
3311}
3312#endif
3313
3314static void FpuStIntTest(void)
3315{
3316 FpuStI64Test();
3317 FpuStI32Test();
3318 FpuStI16Test();
3319}
3320
3321
3322/*
3323 * Store as packed BCD value (memory).
3324 */
3325typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPUSTR80TOD80,(PCX86FXSTATE, uint16_t *, PRTPBCD80U, PCRTFLOAT80U));
3326typedef FNIEMAIMPLFPUSTR80TOD80 *PFNIEMAIMPLFPUSTR80TOD80;
3327TYPEDEF_SUBTEST_TYPE(FPU_ST_D80_T, FPU_ST_D80_TEST_T, PFNIEMAIMPLFPUSTR80TOD80);
3328
3329static const FPU_ST_D80_T g_aFpuStD80[] =
3330{
3331 ENTRY(fst_r80_to_d80),
3332};
3333
3334#ifdef TSTIEMAIMPL_WITH_GENERATOR
3335static void FpuStD80Generate(PRTSTREAM pOut, uint32_t cTests)
3336{
3337 static RTFLOAT80U const s_aSpecials[] =
3338 {
3339 RTFLOAT80U_INIT_C(0, 0xde0b6b3a763fffe0, RTFLOAT80U_EXP_BIAS + 59), /* 1 below max */
3340 RTFLOAT80U_INIT_C(1, 0xde0b6b3a763fffe0, RTFLOAT80U_EXP_BIAS + 59), /* 1 above min */
3341 RTFLOAT80U_INIT_C(0, 0xde0b6b3a763ffff0, RTFLOAT80U_EXP_BIAS + 59), /* exact max */
3342 RTFLOAT80U_INIT_C(1, 0xde0b6b3a763ffff0, RTFLOAT80U_EXP_BIAS + 59), /* exact min */
3343 RTFLOAT80U_INIT_C(0, 0xde0b6b3a763fffff, RTFLOAT80U_EXP_BIAS + 59), /* max & all rounded off bits set */
3344 RTFLOAT80U_INIT_C(1, 0xde0b6b3a763fffff, RTFLOAT80U_EXP_BIAS + 59), /* min & all rounded off bits set */
3345 RTFLOAT80U_INIT_C(0, 0xde0b6b3a763ffff8, RTFLOAT80U_EXP_BIAS + 59), /* max & some rounded off bits set */
3346 RTFLOAT80U_INIT_C(1, 0xde0b6b3a763ffff8, RTFLOAT80U_EXP_BIAS + 59), /* min & some rounded off bits set */
3347 RTFLOAT80U_INIT_C(0, 0xde0b6b3a763ffff1, RTFLOAT80U_EXP_BIAS + 59), /* max & some other rounded off bits set */
3348 RTFLOAT80U_INIT_C(1, 0xde0b6b3a763ffff1, RTFLOAT80U_EXP_BIAS + 59), /* min & some other rounded off bits set */
3349 RTFLOAT80U_INIT_C(0, 0xde0b6b3a76400000, RTFLOAT80U_EXP_BIAS + 59), /* 1 above max */
3350 RTFLOAT80U_INIT_C(1, 0xde0b6b3a76400000, RTFLOAT80U_EXP_BIAS + 59), /* 1 below min */
3351 };
3352
3353 X86FXSTATE State;
3354 RT_ZERO(State);
3355 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuStD80); iFn++)
3356 {
3357 GenerateArrayStart(pOut, g_aFpuStD80[iFn].pszName, "FPU_ST_D80_TEST_T");
3358 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
3359 {
3360 uint16_t const fFcw = RandFcw();
3361 State.FSW = RandFsw();
3362 RTFLOAT80U const InVal = iTest < cTests ? RandR80Src(iTest, 59, true) : s_aSpecials[iTest - cTests];
3363
3364 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
3365 {
3366 /* PC doesn't influence these, so leave as is. */
3367 AssertCompile(X86_FCW_OM_BIT + 1 == X86_FCW_UM_BIT && X86_FCW_UM_BIT + 1 == X86_FCW_PM_BIT);
3368 for (uint16_t iMask = 0; iMask < 16; iMask += 2 /*1*/)
3369 {
3370 uint16_t uFswOut = 0;
3371 RTPBCD80U OutVal = RTPBCD80U_INIT_ZERO(0);
3372 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_OM | X86_FCW_UM | X86_FCW_PM))
3373 | (iRounding << X86_FCW_RC_SHIFT);
3374 /*if (iMask & 1) State.FCW ^= X86_FCW_MASK_ALL;*/
3375 State.FCW |= (iMask >> 1) << X86_FCW_OM_BIT;
3376 g_aFpuStD80[iFn].pfn(&State, &uFswOut, &OutVal, &InVal);
3377 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u */\n",
3378 State.FCW, State.FSW, uFswOut, GenFormatR80(&InVal),
3379 GenFormatD80(&OutVal), iTest, iRounding, iMask);
3380 }
3381 }
3382 }
3383 GenerateArrayEnd(pOut, g_aFpuStD80[iFn].pszName);
3384 }
3385}
3386#endif
3387
3388
3389static void FpuStD80Test(void)
3390{
3391 X86FXSTATE State;
3392 RT_ZERO(State);
3393 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuStD80); iFn++)
3394 {
3395 if (!SubTestAndCheckIfEnabled(g_aFpuStD80[iFn].pszName))
3396 continue;
3397
3398 uint32_t const cTests = *g_aFpuStD80[iFn].pcTests;
3399 FPU_ST_D80_TEST_T const * const paTests = g_aFpuStD80[iFn].paTests;
3400 PFNIEMAIMPLFPUSTR80TOD80 pfn = g_aFpuStD80[iFn].pfn;
3401 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuStD80[iFn]);
3402 if (!cTests) RTTestSkipped(g_hTest, "no tests");
3403 for (uint32_t iVar = 0; iVar < cVars; iVar++)
3404 {
3405 for (uint32_t iTest = 0; iTest < cTests; iTest++)
3406 {
3407 RTFLOAT80U const InVal = paTests[iTest].InVal;
3408 uint16_t uFswOut = 0;
3409 RTPBCD80U OutVal = RTPBCD80U_INIT_ZERO(0);
3410 State.FCW = paTests[iTest].fFcw;
3411 State.FSW = paTests[iTest].fFswIn;
3412 pfn(&State, &uFswOut, &OutVal, &InVal);
3413 if ( uFswOut != paTests[iTest].fFswOut
3414 || !RTPBCD80U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal))
3415 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
3416 "%s -> fsw=%#06x %s\n"
3417 "%s expected %#06x %s%s%s (%s)\n",
3418 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
3419 FormatR80(&paTests[iTest].InVal),
3420 iVar ? " " : "", uFswOut, FormatD80(&OutVal),
3421 iVar ? " " : "", paTests[iTest].fFswOut, FormatD80(&paTests[iTest].OutVal),
3422 FswDiff(uFswOut, paTests[iTest].fFswOut),
3423 RTPBCD80U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal) ? " - val" : "",
3424 FormatFcw(paTests[iTest].fFcw) );
3425 }
3426 pfn = g_aFpuStD80[iFn].pfnNative;
3427 }
3428 }
3429}
3430
3431
3432
3433/*********************************************************************************************************************************
3434* x87 FPU Binary Operations *
3435*********************************************************************************************************************************/
3436
3437/*
3438 * Binary FPU operations on two 80-bit floating point values.
3439 */
3440TYPEDEF_SUBTEST_TYPE(FPU_BINARY_R80_T, FPU_BINARY_R80_TEST_T, PFNIEMAIMPLFPUR80);
3441enum { kFpuBinaryHint_fprem = 1, };
3442
3443static const FPU_BINARY_R80_T g_aFpuBinaryR80[] =
3444{
3445 ENTRY(fadd_r80_by_r80),
3446 ENTRY(fsub_r80_by_r80),
3447 ENTRY(fsubr_r80_by_r80),
3448 ENTRY(fmul_r80_by_r80),
3449 ENTRY(fdiv_r80_by_r80),
3450 ENTRY(fdivr_r80_by_r80),
3451 ENTRY_EX(fprem_r80_by_r80, kFpuBinaryHint_fprem),
3452 ENTRY_EX(fprem1_r80_by_r80, kFpuBinaryHint_fprem),
3453 ENTRY(fscale_r80_by_r80),
3454 ENTRY_AMD( fpatan_r80_by_r80, 0), // C1 and rounding differs on AMD
3455 ENTRY_INTEL(fpatan_r80_by_r80, 0), // C1 and rounding differs on AMD
3456 ENTRY_AMD( fyl2x_r80_by_r80, 0), // C1 and rounding differs on AMD
3457 ENTRY_INTEL(fyl2x_r80_by_r80, 0), // C1 and rounding differs on AMD
3458 ENTRY_AMD( fyl2xp1_r80_by_r80, 0), // C1 and rounding differs on AMD
3459 ENTRY_INTEL(fyl2xp1_r80_by_r80, 0), // C1 and rounding differs on AMD
3460};
3461
3462#ifdef TSTIEMAIMPL_WITH_GENERATOR
3463static void FpuBinaryR80Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
3464{
3465 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
3466
3467 static struct { RTFLOAT80U Val1, Val2; } const s_aSpecials[] =
3468 {
3469 { RTFLOAT80U_INIT_C(1, 0xdd762f07f2e80eef, 30142), /* causes weird overflows with DOWN and NEAR rounding. */
3470 RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1) },
3471 { RTFLOAT80U_INIT_ZERO(0), /* causes weird overflows with UP and NEAR rounding when precision is lower than 64. */
3472 RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1) },
3473 { RTFLOAT80U_INIT_ZERO(0), /* minus variant */
3474 RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1) },
3475 { RTFLOAT80U_INIT_C(0, 0xcef238bb9a0afd86, 577 + RTFLOAT80U_EXP_BIAS), /* for fprem and fprem1, max sequence length */
3476 RTFLOAT80U_INIT_C(0, 0xf11684ec0beaad94, 1 + RTFLOAT80U_EXP_BIAS) },
3477 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, -13396 + RTFLOAT80U_EXP_BIAS), /* for fdiv. We missed PE. */
3478 RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, 16383 + RTFLOAT80U_EXP_BIAS) },
3479 { RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1 + RTFLOAT80U_EXP_BIAS), /* for fprem/fprem1 */
3480 RTFLOAT80U_INIT_C(0, 0xe000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
3481 { RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1 + RTFLOAT80U_EXP_BIAS), /* for fprem/fprem1 */
3482 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
3483 /* fscale: This may seriously increase the exponent, and it turns out overflow and underflow behaviour changes
3484 once RTFLOAT80U_EXP_BIAS_ADJUST is exceeded. */
3485 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^1 */
3486 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
3487 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^64 */
3488 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 6 + RTFLOAT80U_EXP_BIAS) },
3489 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^1024 */
3490 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 10 + RTFLOAT80U_EXP_BIAS) },
3491 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^4096 */
3492 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 12 + RTFLOAT80U_EXP_BIAS) },
3493 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^16384 */
3494 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 49150 */
3495 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3496 RTFLOAT80U_INIT_C(0, 0xc000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 57342 - within 10980XE range */
3497 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^24577 */
3498 RTFLOAT80U_INIT_C(0, 0xc002000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 57343 - outside 10980XE range, behaviour changes! */
3499 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^32768 - result is within range on 10980XE */
3500 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 15 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 65534 */
3501 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^65536 */
3502 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 16 + RTFLOAT80U_EXP_BIAS) },
3503 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^1048576 */
3504 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 20 + RTFLOAT80U_EXP_BIAS) },
3505 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^16777216 */
3506 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 24 + RTFLOAT80U_EXP_BIAS) },
3507 { RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1), /* for fscale: min * 2^-24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3508 RTFLOAT80U_INIT_C(1, 0xc000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: -24575 - within 10980XE range */
3509 { RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1), /* for fscale: max * 2^-24577 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3510 RTFLOAT80U_INIT_C(1, 0xc002000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: -24576 - outside 10980XE range, behaviour changes! */
3511 /* fscale: Negative variants for the essentials of the above. */
3512 { RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3513 RTFLOAT80U_INIT_C(0, 0xc000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 57342 - within 10980XE range */
3514 { RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^24577 */
3515 RTFLOAT80U_INIT_C(0, 0xc002000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 57343 - outside 10980XE range, behaviour changes! */
3516 { RTFLOAT80U_INIT_C(1, 0x8000000000000000, 1), /* for fscale: min * 2^-24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3517 RTFLOAT80U_INIT_C(1, 0xc000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: -57342 - within 10980XE range */
3518 { RTFLOAT80U_INIT_C(1, 0x8000000000000000, 1), /* for fscale: max * 2^-24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3519 RTFLOAT80U_INIT_C(1, 0xc002000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: -57343 - outside 10980XE range, behaviour changes! */
3520 /* fscale: Some fun with denormals and pseudo-denormals. */
3521 { RTFLOAT80U_INIT_C(0, 0x0800000000000000, 0), /* for fscale: max * 2^-4 */
3522 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 2 + RTFLOAT80U_EXP_BIAS) },
3523 { RTFLOAT80U_INIT_C(0, 0x0800000000000000, 0), /* for fscale: max * 2^+1 */
3524 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
3525 { RTFLOAT80U_INIT_C(0, 0x0800000000000000, 0), RTFLOAT80U_INIT_ZERO(0) }, /* for fscale: max * 2^+0 */
3526 { RTFLOAT80U_INIT_C(0, 0x0000000000000008, 0), /* for fscale: max * 2^-4 => underflow */
3527 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 2 + RTFLOAT80U_EXP_BIAS) },
3528 { RTFLOAT80U_INIT_C(0, 0x8005000300020001, 0), RTFLOAT80U_INIT_ZERO(0) }, /* pseudo-normal number * 2^+0. */
3529 { RTFLOAT80U_INIT_C(1, 0x8005000300020001, 0), RTFLOAT80U_INIT_ZERO(0) }, /* pseudo-normal number * 2^+0. */
3530 { RTFLOAT80U_INIT_C(0, 0x8005000300020001, 0), /* pseudo-normal number * 2^-4 */
3531 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 2 + RTFLOAT80U_EXP_BIAS) },
3532 { RTFLOAT80U_INIT_C(0, 0x8005000300020001, 0), /* pseudo-normal number * 2^+0 */
3533 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
3534 { RTFLOAT80U_INIT_C(0, 0x8005000300020001, 0), /* pseudo-normal number * 2^+1 */
3535 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1 + RTFLOAT80U_EXP_BIAS) },
3536 };
3537
3538 X86FXSTATE State;
3539 RT_ZERO(State);
3540 uint32_t cMinNormalPairs = (cTests - 144) / 4;
3541 uint32_t cMinTargetRangeInputs = cMinNormalPairs / 2;
3542 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryR80); iFn++)
3543 {
3544 PFNIEMAIMPLFPUR80 const pfn = g_aFpuBinaryR80[iFn].pfnNative ? g_aFpuBinaryR80[iFn].pfnNative : g_aFpuBinaryR80[iFn].pfn;
3545 PRTSTREAM pOutFn = pOut;
3546 if (g_aFpuBinaryR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE)
3547 {
3548 if (g_aFpuBinaryR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
3549 continue;
3550 pOutFn = pOutCpu;
3551 }
3552
3553 GenerateArrayStart(pOutFn, g_aFpuBinaryR80[iFn].pszName, "FPU_BINARY_R80_TEST_T");
3554 uint32_t iTestOutput = 0;
3555 uint32_t cNormalInputPairs = 0;
3556 uint32_t cTargetRangeInputs = 0;
3557 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
3558 {
3559 RTFLOAT80U InVal1 = iTest < cTests ? RandR80Src1(iTest) : s_aSpecials[iTest - cTests].Val1;
3560 RTFLOAT80U InVal2 = iTest < cTests ? RandR80Src2(iTest) : s_aSpecials[iTest - cTests].Val2;
3561 bool fTargetRange = false;
3562 if (RTFLOAT80U_IS_NORMAL(&InVal1) && RTFLOAT80U_IS_NORMAL(&InVal2))
3563 {
3564 cNormalInputPairs++;
3565 if ( g_aFpuBinaryR80[iFn].uExtra == kFpuBinaryHint_fprem
3566 && (uint32_t)InVal1.s.uExponent - (uint32_t)InVal2.s.uExponent - (uint32_t)64 <= (uint32_t)512)
3567 cTargetRangeInputs += fTargetRange = true;
3568 else if (cTargetRangeInputs < cMinTargetRangeInputs && iTest < cTests)
3569 if (g_aFpuBinaryR80[iFn].uExtra == kFpuBinaryHint_fprem)
3570 { /* The aim is two values with an exponent difference between 64 and 640 so we can do the whole sequence. */
3571 InVal2.s.uExponent = RTRandU32Ex(1, RTFLOAT80U_EXP_MAX - 66);
3572 InVal1.s.uExponent = RTRandU32Ex(InVal2.s.uExponent + 64, RT_MIN(InVal2.s.uExponent + 512, RTFLOAT80U_EXP_MAX - 1));
3573 cTargetRangeInputs += fTargetRange = true;
3574 }
3575 }
3576 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
3577 {
3578 iTest -= 1;
3579 continue;
3580 }
3581
3582 uint16_t const fFcwExtra = 0;
3583 uint16_t const fFcw = RandFcw();
3584 State.FSW = RandFsw();
3585
3586 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
3587 for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
3588 {
3589 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_PC_MASK | X86_FCW_MASK_ALL))
3590 | (iRounding << X86_FCW_RC_SHIFT)
3591 | (iPrecision << X86_FCW_PC_SHIFT)
3592 | X86_FCW_MASK_ALL;
3593 IEMFPURESULT ResM = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3594 pfn(&State, &ResM, &InVal1, &InVal2);
3595 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/m = #%u */\n",
3596 State.FCW | fFcwExtra, State.FSW, ResM.FSW, GenFormatR80(&InVal1), GenFormatR80(&InVal2),
3597 GenFormatR80(&ResM.r80Result), iTest, iRounding, iPrecision, iTestOutput++);
3598
3599 State.FCW = State.FCW & ~X86_FCW_MASK_ALL;
3600 IEMFPURESULT ResU = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3601 pfn(&State, &ResU, &InVal1, &InVal2);
3602 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/u = #%u */\n",
3603 State.FCW | fFcwExtra, State.FSW, ResU.FSW, GenFormatR80(&InVal1), GenFormatR80(&InVal2),
3604 GenFormatR80(&ResU.r80Result), iTest, iRounding, iPrecision, iTestOutput++);
3605
3606 uint16_t fXcpt = (ResM.FSW | ResU.FSW) & X86_FSW_XCPT_MASK & ~X86_FSW_SF;
3607 if (fXcpt)
3608 {
3609 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
3610 IEMFPURESULT Res1 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3611 pfn(&State, &Res1, &InVal1, &InVal2);
3612 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/%#x = #%u */\n",
3613 State.FCW | fFcwExtra, State.FSW, Res1.FSW, GenFormatR80(&InVal1), GenFormatR80(&InVal2),
3614 GenFormatR80(&Res1.r80Result), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
3615 if (((Res1.FSW & X86_FSW_XCPT_MASK) & fXcpt) != (Res1.FSW & X86_FSW_XCPT_MASK))
3616 {
3617 fXcpt |= Res1.FSW & X86_FSW_XCPT_MASK;
3618 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
3619 IEMFPURESULT Res2 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3620 pfn(&State, &Res2, &InVal1, &InVal2);
3621 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/%#x[!] = #%u */\n",
3622 State.FCW | fFcwExtra, State.FSW, Res2.FSW, GenFormatR80(&InVal1), GenFormatR80(&InVal2),
3623 GenFormatR80(&Res2.r80Result), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
3624 }
3625 if (!RT_IS_POWER_OF_TWO(fXcpt))
3626 for (uint16_t fUnmasked = 1; fUnmasked <= X86_FCW_PM; fUnmasked <<= 1)
3627 if (fUnmasked & fXcpt)
3628 {
3629 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | (fXcpt & ~fUnmasked);
3630 IEMFPURESULT Res3 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3631 pfn(&State, &Res3, &InVal1, &InVal2);
3632 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/u%#x = #%u */\n",
3633 State.FCW | fFcwExtra, State.FSW, Res3.FSW, GenFormatR80(&InVal1), GenFormatR80(&InVal2),
3634 GenFormatR80(&Res3.r80Result), iTest, iRounding, iPrecision, fUnmasked, iTestOutput++);
3635 }
3636 }
3637
3638 /* If the values are in range and caused no exceptions, do the whole series of
3639 partial reminders till we get the non-partial one or run into an exception. */
3640 if (fTargetRange && fXcpt == 0 && g_aFpuBinaryR80[iFn].uExtra == kFpuBinaryHint_fprem)
3641 {
3642 IEMFPURESULT ResPrev = ResM;
3643 for (unsigned i = 0; i < 32 && (ResPrev.FSW & (X86_FSW_C2 | X86_FSW_XCPT_MASK)) == X86_FSW_C2; i++)
3644 {
3645 State.FCW = State.FCW | X86_FCW_MASK_ALL;
3646 State.FSW = ResPrev.FSW;
3647 IEMFPURESULT ResSeq = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3648 pfn(&State, &ResSeq, &ResPrev.r80Result, &InVal2);
3649 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/seq%u = #%u */\n",
3650 State.FCW | fFcwExtra, State.FSW, ResSeq.FSW, GenFormatR80(&ResPrev.r80Result),
3651 GenFormatR80(&InVal2), GenFormatR80(&ResSeq.r80Result),
3652 iTest, iRounding, iPrecision, i + 1, iTestOutput++);
3653 ResPrev = ResSeq;
3654 }
3655 }
3656 }
3657 }
3658 GenerateArrayEnd(pOutFn, g_aFpuBinaryR80[iFn].pszName);
3659 }
3660}
3661#endif
3662
3663
3664static void FpuBinaryR80Test(void)
3665{
3666 X86FXSTATE State;
3667 RT_ZERO(State);
3668 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryR80); iFn++)
3669 {
3670 if (!SubTestAndCheckIfEnabled(g_aFpuBinaryR80[iFn].pszName))
3671 continue;
3672
3673 uint32_t const cTests = *g_aFpuBinaryR80[iFn].pcTests;
3674 FPU_BINARY_R80_TEST_T const * const paTests = g_aFpuBinaryR80[iFn].paTests;
3675 PFNIEMAIMPLFPUR80 pfn = g_aFpuBinaryR80[iFn].pfn;
3676 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuBinaryR80[iFn]);
3677 if (!cTests) RTTestSkipped(g_hTest, "no tests");
3678 for (uint32_t iVar = 0; iVar < cVars; iVar++)
3679 {
3680 for (uint32_t iTest = 0; iTest < cTests; iTest++)
3681 {
3682 RTFLOAT80U const InVal1 = paTests[iTest].InVal1;
3683 RTFLOAT80U const InVal2 = paTests[iTest].InVal2;
3684 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3685 State.FCW = paTests[iTest].fFcw;
3686 State.FSW = paTests[iTest].fFswIn;
3687 pfn(&State, &Res, &InVal1, &InVal2);
3688 if ( Res.FSW != paTests[iTest].fFswOut
3689 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal))
3690 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n"
3691 "%s -> fsw=%#06x %s\n"
3692 "%s expected %#06x %s%s%s (%s)\n",
3693 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
3694 FormatR80(&paTests[iTest].InVal1), FormatR80(&paTests[iTest].InVal2),
3695 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result),
3696 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].OutVal),
3697 FswDiff(Res.FSW, paTests[iTest].fFswOut),
3698 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal) ? " - val" : "",
3699 FormatFcw(paTests[iTest].fFcw) );
3700 }
3701 pfn = g_aFpuBinaryR80[iFn].pfnNative;
3702 }
3703 }
3704}
3705
3706
3707/*
3708 * Binary FPU operations on one 80-bit floating point value and one 64-bit or 32-bit one.
3709 */
3710#define int64_t_IS_NORMAL(a) 1
3711#define int32_t_IS_NORMAL(a) 1
3712#define int16_t_IS_NORMAL(a) 1
3713
3714#ifdef TSTIEMAIMPL_WITH_GENERATOR
3715static struct { RTFLOAT80U Val1; RTFLOAT64U Val2; } const s_aFpuBinaryR64Specials[] =
3716{
3717 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3718 RTFLOAT64U_INIT_C(0, 0xfeeeeddddcccc, RTFLOAT64U_EXP_BIAS) }, /* whatever */
3719};
3720static struct { RTFLOAT80U Val1; RTFLOAT32U Val2; } const s_aFpuBinaryR32Specials[] =
3721{
3722 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3723 RTFLOAT32U_INIT_C(0, 0x7fffee, RTFLOAT32U_EXP_BIAS) }, /* whatever */
3724};
3725static struct { RTFLOAT80U Val1; int32_t Val2; } const s_aFpuBinaryI32Specials[] =
3726{
3727 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT32_MAX }, /* whatever */
3728};
3729static struct { RTFLOAT80U Val1; int16_t Val2; } const s_aFpuBinaryI16Specials[] =
3730{
3731 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT16_MAX }, /* whatever */
3732};
3733
3734# define GEN_FPU_BINARY_SMALL(a_fIntType, a_cBits, a_LoBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
3735static void FpuBinary ## a_UpBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
3736{ \
3737 cTests = RT_MAX(160, cTests); /* there are 144 standard input variations for r80 by r80 */ \
3738 \
3739 X86FXSTATE State; \
3740 RT_ZERO(State); \
3741 uint32_t cMinNormalPairs = (cTests - 144) / 4; \
3742 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3743 { \
3744 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
3745 uint32_t cNormalInputPairs = 0; \
3746 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aFpuBinary ## a_UpBits ## Specials); iTest += 1) \
3747 { \
3748 RTFLOAT80U const InVal1 = iTest < cTests ? RandR80Src1(iTest, a_cBits, a_fIntType) \
3749 : s_aFpuBinary ## a_UpBits ## Specials[iTest - cTests].Val1; \
3750 a_Type2 const InVal2 = iTest < cTests ? Rand ## a_UpBits ## Src2(iTest) \
3751 : s_aFpuBinary ## a_UpBits ## Specials[iTest - cTests].Val2; \
3752 if (RTFLOAT80U_IS_NORMAL(&InVal1) && a_Type2 ## _IS_NORMAL(&InVal2)) \
3753 cNormalInputPairs++; \
3754 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests) \
3755 { \
3756 iTest -= 1; \
3757 continue; \
3758 } \
3759 \
3760 uint16_t const fFcw = RandFcw(); \
3761 State.FSW = RandFsw(); \
3762 \
3763 for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
3764 { \
3765 for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++) \
3766 { \
3767 for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL) \
3768 { \
3769 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_PC_MASK | X86_FCW_MASK_ALL)) \
3770 | (iRounding << X86_FCW_RC_SHIFT) \
3771 | (iPrecision << X86_FCW_PC_SHIFT) \
3772 | iMask; \
3773 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
3774 a_aSubTests[iFn].pfn(&State, &Res, &InVal1, &InVal2); \
3775 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/%c */\n", \
3776 State.FCW, State.FSW, Res.FSW, GenFormatR80(&InVal1), GenFormat ## a_UpBits(&InVal2), \
3777 GenFormatR80(&Res.r80Result), iTest, iRounding, iPrecision, iMask ? 'c' : 'u'); \
3778 } \
3779 } \
3780 } \
3781 } \
3782 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
3783 } \
3784}
3785#else
3786# define GEN_FPU_BINARY_SMALL(a_fIntType, a_cBits, a_LoBits, a_UpBits, a_Type2, a_aSubTests, a_TestType)
3787#endif
3788
3789#define TEST_FPU_BINARY_SMALL(a_fIntType, a_cBits, a_LoBits, a_UpBits, a_I, a_Type2, a_SubTestType, a_aSubTests, a_TestType) \
3790TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPU ## a_UpBits); \
3791\
3792static const a_SubTestType a_aSubTests[] = \
3793{ \
3794 ENTRY(RT_CONCAT4(f, a_I, add_r80_by_, a_LoBits)), \
3795 ENTRY(RT_CONCAT4(f, a_I, mul_r80_by_, a_LoBits)), \
3796 ENTRY(RT_CONCAT4(f, a_I, sub_r80_by_, a_LoBits)), \
3797 ENTRY(RT_CONCAT4(f, a_I, subr_r80_by_, a_LoBits)), \
3798 ENTRY(RT_CONCAT4(f, a_I, div_r80_by_, a_LoBits)), \
3799 ENTRY(RT_CONCAT4(f, a_I, divr_r80_by_, a_LoBits)), \
3800}; \
3801\
3802GEN_FPU_BINARY_SMALL(a_fIntType, a_cBits, a_LoBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
3803\
3804static void FpuBinary ## a_UpBits ## Test(void) \
3805{ \
3806 X86FXSTATE State; \
3807 RT_ZERO(State); \
3808 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3809 { \
3810 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
3811 \
3812 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
3813 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
3814 PFNIEMAIMPLFPU ## a_UpBits pfn = a_aSubTests[iFn].pfn; \
3815 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
3816 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
3817 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
3818 { \
3819 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
3820 { \
3821 RTFLOAT80U const InVal1 = paTests[iTest].InVal1; \
3822 a_Type2 const InVal2 = paTests[iTest].InVal2; \
3823 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
3824 State.FCW = paTests[iTest].fFcw; \
3825 State.FSW = paTests[iTest].fFswIn; \
3826 pfn(&State, &Res, &InVal1, &InVal2); \
3827 if ( Res.FSW != paTests[iTest].fFswOut \
3828 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal)) \
3829 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n" \
3830 "%s -> fsw=%#06x %s\n" \
3831 "%s expected %#06x %s%s%s (%s)\n", \
3832 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
3833 FormatR80(&paTests[iTest].InVal1), Format ## a_UpBits(&paTests[iTest].InVal2), \
3834 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result), \
3835 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].OutVal), \
3836 FswDiff(Res.FSW, paTests[iTest].fFswOut), \
3837 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal) ? " - val" : "", \
3838 FormatFcw(paTests[iTest].fFcw) ); \
3839 } \
3840 pfn = a_aSubTests[iFn].pfnNative; \
3841 } \
3842 } \
3843}
3844
3845TEST_FPU_BINARY_SMALL(0, 64, r64, R64, RT_NOTHING, RTFLOAT64U, FPU_BINARY_R64_T, g_aFpuBinaryR64, FPU_BINARY_R64_TEST_T)
3846TEST_FPU_BINARY_SMALL(0, 32, r32, R32, RT_NOTHING, RTFLOAT32U, FPU_BINARY_R32_T, g_aFpuBinaryR32, FPU_BINARY_R32_TEST_T)
3847TEST_FPU_BINARY_SMALL(1, 32, i32, I32, i, int32_t, FPU_BINARY_I32_T, g_aFpuBinaryI32, FPU_BINARY_I32_TEST_T)
3848TEST_FPU_BINARY_SMALL(1, 16, i16, I16, i, int16_t, FPU_BINARY_I16_T, g_aFpuBinaryI16, FPU_BINARY_I16_TEST_T)
3849
3850
3851/*
3852 * Binary operations on 80-, 64- and 32-bit floating point only affecting FSW.
3853 */
3854#ifdef TSTIEMAIMPL_WITH_GENERATOR
3855static struct { RTFLOAT80U Val1, Val2; } const s_aFpuBinaryFswR80Specials[] =
3856{
3857 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3858 RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS) }, /* whatever */
3859};
3860static struct { RTFLOAT80U Val1; RTFLOAT64U Val2; } const s_aFpuBinaryFswR64Specials[] =
3861{
3862 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3863 RTFLOAT64U_INIT_C(0, 0xfeeeeddddcccc, RTFLOAT64U_EXP_BIAS) }, /* whatever */
3864};
3865static struct { RTFLOAT80U Val1; RTFLOAT32U Val2; } const s_aFpuBinaryFswR32Specials[] =
3866{
3867 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3868 RTFLOAT32U_INIT_C(0, 0x7fffee, RTFLOAT32U_EXP_BIAS) }, /* whatever */
3869};
3870static struct { RTFLOAT80U Val1; int32_t Val2; } const s_aFpuBinaryFswI32Specials[] =
3871{
3872 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT32_MAX }, /* whatever */
3873};
3874static struct { RTFLOAT80U Val1; int16_t Val2; } const s_aFpuBinaryFswI16Specials[] =
3875{
3876 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT16_MAX }, /* whatever */
3877};
3878
3879# define GEN_FPU_BINARY_FSW(a_fIntType, a_cBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
3880static void FpuBinaryFsw ## a_UpBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
3881{ \
3882 cTests = RT_MAX(160, cTests); /* there are 144 standard input variations for r80 by r80 */ \
3883 \
3884 X86FXSTATE State; \
3885 RT_ZERO(State); \
3886 uint32_t cMinNormalPairs = (cTests - 144) / 4; \
3887 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3888 { \
3889 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
3890 uint32_t cNormalInputPairs = 0; \
3891 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aFpuBinaryFsw ## a_UpBits ## Specials); iTest += 1) \
3892 { \
3893 RTFLOAT80U const InVal1 = iTest < cTests ? RandR80Src1(iTest, a_cBits, a_fIntType) \
3894 : s_aFpuBinaryFsw ## a_UpBits ## Specials[iTest - cTests].Val1; \
3895 a_Type2 const InVal2 = iTest < cTests ? Rand ## a_UpBits ## Src2(iTest) \
3896 : s_aFpuBinaryFsw ## a_UpBits ## Specials[iTest - cTests].Val2; \
3897 if (RTFLOAT80U_IS_NORMAL(&InVal1) && a_Type2 ## _IS_NORMAL(&InVal2)) \
3898 cNormalInputPairs++; \
3899 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests) \
3900 { \
3901 iTest -= 1; \
3902 continue; \
3903 } \
3904 \
3905 uint16_t const fFcw = RandFcw(); \
3906 State.FSW = RandFsw(); \
3907 \
3908 /* Guess these aren't affected by precision or rounding, so just flip the exception mask. */ \
3909 for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL) \
3910 { \
3911 State.FCW = (fFcw & ~(X86_FCW_MASK_ALL)) | iMask; \
3912 uint16_t fFswOut = 0; \
3913 a_aSubTests[iFn].pfn(&State, &fFswOut, &InVal1, &InVal2); \
3914 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%c */\n", \
3915 State.FCW, State.FSW, fFswOut, GenFormatR80(&InVal1), GenFormat ## a_UpBits(&InVal2), \
3916 iTest, iMask ? 'c' : 'u'); \
3917 } \
3918 } \
3919 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
3920 } \
3921}
3922#else
3923# define GEN_FPU_BINARY_FSW(a_fIntType, a_cBits, a_UpBits, a_Type2, a_aSubTests, a_TestType)
3924#endif
3925
3926#define TEST_FPU_BINARY_FSW(a_fIntType, a_cBits, a_UpBits, a_Type2, a_SubTestType, a_aSubTests, a_TestType, ...) \
3927TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPU ## a_UpBits ## FSW); \
3928\
3929static const a_SubTestType a_aSubTests[] = \
3930{ \
3931 __VA_ARGS__ \
3932}; \
3933\
3934GEN_FPU_BINARY_FSW(a_fIntType, a_cBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
3935\
3936static void FpuBinaryFsw ## a_UpBits ## Test(void) \
3937{ \
3938 X86FXSTATE State; \
3939 RT_ZERO(State); \
3940 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3941 { \
3942 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
3943 \
3944 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
3945 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
3946 PFNIEMAIMPLFPU ## a_UpBits ## FSW pfn = a_aSubTests[iFn].pfn; \
3947 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
3948 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
3949 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
3950 { \
3951 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
3952 { \
3953 uint16_t fFswOut = 0; \
3954 RTFLOAT80U const InVal1 = paTests[iTest].InVal1; \
3955 a_Type2 const InVal2 = paTests[iTest].InVal2; \
3956 State.FCW = paTests[iTest].fFcw; \
3957 State.FSW = paTests[iTest].fFswIn; \
3958 pfn(&State, &fFswOut, &InVal1, &InVal2); \
3959 if (fFswOut != paTests[iTest].fFswOut) \
3960 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n" \
3961 "%s -> fsw=%#06x\n" \
3962 "%s expected %#06x %s (%s)\n", \
3963 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
3964 FormatR80(&paTests[iTest].InVal1), Format ## a_UpBits(&paTests[iTest].InVal2), \
3965 iVar ? " " : "", fFswOut, \
3966 iVar ? " " : "", paTests[iTest].fFswOut, \
3967 FswDiff(fFswOut, paTests[iTest].fFswOut), FormatFcw(paTests[iTest].fFcw) ); \
3968 } \
3969 pfn = a_aSubTests[iFn].pfnNative; \
3970 } \
3971 } \
3972}
3973
3974TEST_FPU_BINARY_FSW(0, 80, R80, RTFLOAT80U, FPU_BINARY_FSW_R80_T, g_aFpuBinaryFswR80, FPU_BINARY_R80_TEST_T, ENTRY(fcom_r80_by_r80), ENTRY(fucom_r80_by_r80))
3975TEST_FPU_BINARY_FSW(0, 64, R64, RTFLOAT64U, FPU_BINARY_FSW_R64_T, g_aFpuBinaryFswR64, FPU_BINARY_R64_TEST_T, ENTRY(fcom_r80_by_r64))
3976TEST_FPU_BINARY_FSW(0, 32, R32, RTFLOAT32U, FPU_BINARY_FSW_R32_T, g_aFpuBinaryFswR32, FPU_BINARY_R32_TEST_T, ENTRY(fcom_r80_by_r32))
3977TEST_FPU_BINARY_FSW(1, 32, I32, int32_t, FPU_BINARY_FSW_I32_T, g_aFpuBinaryFswI32, FPU_BINARY_I32_TEST_T, ENTRY(ficom_r80_by_i32))
3978TEST_FPU_BINARY_FSW(1, 16, I16, int16_t, FPU_BINARY_FSW_I16_T, g_aFpuBinaryFswI16, FPU_BINARY_I16_TEST_T, ENTRY(ficom_r80_by_i16))
3979
3980
3981/*
3982 * Binary operations on 80-bit floating point that effects only EFLAGS and possibly FSW.
3983 */
3984TYPEDEF_SUBTEST_TYPE(FPU_BINARY_EFL_R80_T, FPU_BINARY_EFL_R80_TEST_T, PFNIEMAIMPLFPUR80EFL);
3985
3986static const FPU_BINARY_EFL_R80_T g_aFpuBinaryEflR80[] =
3987{
3988 ENTRY(fcomi_r80_by_r80),
3989 ENTRY(fucomi_r80_by_r80),
3990};
3991
3992#ifdef TSTIEMAIMPL_WITH_GENERATOR
3993static struct { RTFLOAT80U Val1, Val2; } const s_aFpuBinaryEflR80Specials[] =
3994{
3995 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3996 RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS) }, /* whatever */
3997};
3998
3999static void FpuBinaryEflR80Generate(PRTSTREAM pOut, uint32_t cTests)
4000{
4001 cTests = RT_MAX(160, cTests); /* there are 144 standard input variations */
4002
4003 X86FXSTATE State;
4004 RT_ZERO(State);
4005 uint32_t cMinNormalPairs = (cTests - 144) / 4;
4006 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryEflR80); iFn++)
4007 {
4008 GenerateArrayStart(pOut, g_aFpuBinaryEflR80[iFn].pszName, "FPU_BINARY_EFL_R80_TEST_T");
4009 uint32_t cNormalInputPairs = 0;
4010 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aFpuBinaryEflR80Specials); iTest += 1)
4011 {
4012 RTFLOAT80U const InVal1 = iTest < cTests ? RandR80Src1(iTest) : s_aFpuBinaryEflR80Specials[iTest - cTests].Val1;
4013 RTFLOAT80U const InVal2 = iTest < cTests ? RandR80Src2(iTest) : s_aFpuBinaryEflR80Specials[iTest - cTests].Val2;
4014 if (RTFLOAT80U_IS_NORMAL(&InVal1) && RTFLOAT80U_IS_NORMAL(&InVal2))
4015 cNormalInputPairs++;
4016 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
4017 {
4018 iTest -= 1;
4019 continue;
4020 }
4021
4022 uint16_t const fFcw = RandFcw();
4023 State.FSW = RandFsw();
4024
4025 /* Guess these aren't affected by precision or rounding, so just flip the exception mask. */
4026 for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL)
4027 {
4028 State.FCW = (fFcw & ~(X86_FCW_MASK_ALL)) | iMask;
4029 uint16_t uFswOut = 0;
4030 uint32_t fEflOut = g_aFpuBinaryEflR80[iFn].pfn(&State, &uFswOut, &InVal1, &InVal2);
4031 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s, %#08x }, /* #%u/%c */\n",
4032 State.FCW, State.FSW, uFswOut, GenFormatR80(&InVal1), GenFormatR80(&InVal2), fEflOut,
4033 iTest, iMask ? 'c' : 'u');
4034 }
4035 }
4036 GenerateArrayEnd(pOut, g_aFpuBinaryEflR80[iFn].pszName);
4037 }
4038}
4039#endif /*TSTIEMAIMPL_WITH_GENERATOR*/
4040
4041static void FpuBinaryEflR80Test(void)
4042{
4043 X86FXSTATE State;
4044 RT_ZERO(State);
4045 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryEflR80); iFn++)
4046 {
4047 if (!SubTestAndCheckIfEnabled(g_aFpuBinaryEflR80[iFn].pszName))
4048 continue;
4049
4050 uint32_t const cTests = *g_aFpuBinaryEflR80[iFn].pcTests;
4051 FPU_BINARY_EFL_R80_TEST_T const * const paTests = g_aFpuBinaryEflR80[iFn].paTests;
4052 PFNIEMAIMPLFPUR80EFL pfn = g_aFpuBinaryEflR80[iFn].pfn;
4053 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuBinaryEflR80[iFn]);
4054 if (!cTests) RTTestSkipped(g_hTest, "no tests");
4055 for (uint32_t iVar = 0; iVar < cVars; iVar++)
4056 {
4057 for (uint32_t iTest = 0; iTest < cTests; iTest++)
4058 {
4059 RTFLOAT80U const InVal1 = paTests[iTest].InVal1;
4060 RTFLOAT80U const InVal2 = paTests[iTest].InVal2;
4061 State.FCW = paTests[iTest].fFcw;
4062 State.FSW = paTests[iTest].fFswIn;
4063 uint16_t uFswOut = 0;
4064 uint32_t fEflOut = pfn(&State, &uFswOut, &InVal1, &InVal2);
4065 if ( uFswOut != paTests[iTest].fFswOut
4066 || fEflOut != paTests[iTest].fEflOut)
4067 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n"
4068 "%s -> fsw=%#06x efl=%#08x\n"
4069 "%s expected %#06x %#08x %s%s (%s)\n",
4070 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
4071 FormatR80(&paTests[iTest].InVal1), FormatR80(&paTests[iTest].InVal2),
4072 iVar ? " " : "", uFswOut, fEflOut,
4073 iVar ? " " : "", paTests[iTest].fFswOut, paTests[iTest].fEflOut,
4074 FswDiff(uFswOut, paTests[iTest].fFswOut), EFlagsDiff(fEflOut, paTests[iTest].fEflOut),
4075 FormatFcw(paTests[iTest].fFcw));
4076 }
4077 pfn = g_aFpuBinaryEflR80[iFn].pfnNative;
4078 }
4079 }
4080}
4081
4082
4083/*********************************************************************************************************************************
4084* x87 FPU Unary Operations *
4085*********************************************************************************************************************************/
4086
4087/*
4088 * Unary FPU operations on one 80-bit floating point value.
4089 *
4090 * Note! The FCW reserved bit 7 is used to indicate whether a test may produce
4091 * a rounding error or not.
4092 */
4093TYPEDEF_SUBTEST_TYPE(FPU_UNARY_R80_T, FPU_UNARY_R80_TEST_T, PFNIEMAIMPLFPUR80UNARY);
4094
4095enum { kUnary_Accurate = 0, kUnary_Accurate_Trigonometry /*probably not accurate, but need impl to know*/, kUnary_Rounding_F2xm1 };
4096static const FPU_UNARY_R80_T g_aFpuUnaryR80[] =
4097{
4098 ENTRY_EX( fabs_r80, kUnary_Accurate),
4099 ENTRY_EX( fchs_r80, kUnary_Accurate),
4100 ENTRY_AMD_EX( f2xm1_r80, 0, kUnary_Accurate), // C1 differs for -1m0x3fb263cc2c331e15^-2654 (different ln2 constant?)
4101 ENTRY_INTEL_EX(f2xm1_r80, 0, kUnary_Rounding_F2xm1),
4102 ENTRY_EX( fsqrt_r80, kUnary_Accurate),
4103 ENTRY_EX( frndint_r80, kUnary_Accurate),
4104 ENTRY_AMD_EX( fsin_r80, 0, kUnary_Accurate_Trigonometry), // value & C1 differences for pseudo denormals and others (e.g. -1m0x2b1e5683cbca5725^-3485)
4105 ENTRY_INTEL_EX(fsin_r80, 0, kUnary_Accurate_Trigonometry),
4106 ENTRY_AMD_EX( fcos_r80, 0, kUnary_Accurate_Trigonometry), // value & C1 differences
4107 ENTRY_INTEL_EX(fcos_r80, 0, kUnary_Accurate_Trigonometry),
4108};
4109
4110#ifdef TSTIEMAIMPL_WITH_GENERATOR
4111
4112static bool FpuUnaryR80MayHaveRoundingError(PCRTFLOAT80U pr80Val, int enmKind)
4113{
4114 if ( enmKind == kUnary_Rounding_F2xm1
4115 && RTFLOAT80U_IS_NORMAL(pr80Val)
4116 && pr80Val->s.uExponent < RTFLOAT80U_EXP_BIAS
4117 && pr80Val->s.uExponent >= RTFLOAT80U_EXP_BIAS - 69)
4118 return true;
4119 return false;
4120}
4121
4122static void FpuUnaryR80Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
4123{
4124 static RTFLOAT80U const s_aSpecials[] =
4125 {
4126 RTFLOAT80U_INIT_C(0, 0x8000000000000000, RTFLOAT80U_EXP_BIAS - 1), /* 0.5 (for f2xm1) */
4127 RTFLOAT80U_INIT_C(1, 0x8000000000000000, RTFLOAT80U_EXP_BIAS - 1), /* -0.5 (for f2xm1) */
4128 RTFLOAT80U_INIT_C(0, 0x8000000000000000, RTFLOAT80U_EXP_BIAS), /* 1.0 (for f2xm1) */
4129 RTFLOAT80U_INIT_C(1, 0x8000000000000000, RTFLOAT80U_EXP_BIAS), /* -1.0 (for f2xm1) */
4130 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0), /* +1.0^-16382 */
4131 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 0), /* -1.0^-16382 */
4132 RTFLOAT80U_INIT_C(0, 0xc000000000000000, 0), /* +1.1^-16382 */
4133 RTFLOAT80U_INIT_C(1, 0xc000000000000000, 0), /* -1.1^-16382 */
4134 RTFLOAT80U_INIT_C(0, 0xc000100000000000, 0), /* +1.1xxx1^-16382 */
4135 RTFLOAT80U_INIT_C(1, 0xc000100000000000, 0), /* -1.1xxx1^-16382 */
4136 };
4137 X86FXSTATE State;
4138 RT_ZERO(State);
4139 uint32_t cMinNormals = cTests / 4;
4140 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryR80); iFn++)
4141 {
4142 PFNIEMAIMPLFPUR80UNARY const pfn = g_aFpuUnaryR80[iFn].pfnNative ? g_aFpuUnaryR80[iFn].pfnNative : g_aFpuUnaryR80[iFn].pfn;
4143 PRTSTREAM pOutFn = pOut;
4144 if (g_aFpuUnaryR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE)
4145 {
4146 if (g_aFpuUnaryR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
4147 continue;
4148 pOutFn = pOutCpu;
4149 }
4150
4151 GenerateArrayStart(pOutFn, g_aFpuUnaryR80[iFn].pszName, "FPU_UNARY_R80_TEST_T");
4152 uint32_t iTestOutput = 0;
4153 uint32_t cNormalInputs = 0;
4154 uint32_t cTargetRangeInputs = 0;
4155 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
4156 {
4157 RTFLOAT80U InVal = iTest < cTests ? RandR80Src(iTest) : s_aSpecials[iTest - cTests];
4158 if (RTFLOAT80U_IS_NORMAL(&InVal))
4159 {
4160 if (g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1)
4161 {
4162 unsigned uTargetExp = g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1
4163 ? RTFLOAT80U_EXP_BIAS /* 2^0..2^-69 */ : RTFLOAT80U_EXP_BIAS + 63 + 1 /* 2^64..2^-64 */;
4164 unsigned cTargetExp = g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1 ? 69 : 63*2 + 2;
4165 if (InVal.s.uExponent <= uTargetExp && InVal.s.uExponent >= uTargetExp - cTargetExp)
4166 cTargetRangeInputs++;
4167 else if (cTargetRangeInputs < cMinNormals / 2 && iTest + cMinNormals / 2 >= cTests && iTest < cTests)
4168 {
4169 InVal.s.uExponent = RTRandU32Ex(uTargetExp - cTargetExp, uTargetExp);
4170 cTargetRangeInputs++;
4171 }
4172 }
4173 cNormalInputs++;
4174 }
4175 else if (cNormalInputs < cMinNormals && iTest + cMinNormals >= cTests && iTest < cTests)
4176 {
4177 iTest -= 1;
4178 continue;
4179 }
4180
4181 uint16_t const fFcwExtra = FpuUnaryR80MayHaveRoundingError(&InVal, g_aFpuUnaryR80[iFn].uExtra) ? 0x80 : 0;
4182 uint16_t const fFcw = RandFcw();
4183 State.FSW = RandFsw();
4184
4185 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
4186 for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
4187 {
4188 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_PC_MASK | X86_FCW_MASK_ALL))
4189 | (iRounding << X86_FCW_RC_SHIFT)
4190 | (iPrecision << X86_FCW_PC_SHIFT)
4191 | X86_FCW_MASK_ALL;
4192 IEMFPURESULT ResM = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4193 pfn(&State, &ResM, &InVal);
4194 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u/m = #%u */\n",
4195 State.FCW | fFcwExtra, State.FSW, ResM.FSW, GenFormatR80(&InVal),
4196 GenFormatR80(&ResM.r80Result), iTest, iRounding, iPrecision, iTestOutput++);
4197
4198 State.FCW = State.FCW & ~X86_FCW_MASK_ALL;
4199 IEMFPURESULT ResU = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4200 pfn(&State, &ResU, &InVal);
4201 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u/u = #%u */\n",
4202 State.FCW | fFcwExtra, State.FSW, ResU.FSW, GenFormatR80(&InVal),
4203 GenFormatR80(&ResU.r80Result), iTest, iRounding, iPrecision, iTestOutput++);
4204
4205 uint16_t fXcpt = (ResM.FSW | ResU.FSW) & X86_FSW_XCPT_MASK & ~X86_FSW_SF;
4206 if (fXcpt)
4207 {
4208 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
4209 IEMFPURESULT Res1 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4210 pfn(&State, &Res1, &InVal);
4211 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u/%#x = #%u */\n",
4212 State.FCW | fFcwExtra, State.FSW, Res1.FSW, GenFormatR80(&InVal),
4213 GenFormatR80(&Res1.r80Result), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
4214 if (((Res1.FSW & X86_FSW_XCPT_MASK) & fXcpt) != (Res1.FSW & X86_FSW_XCPT_MASK))
4215 {
4216 fXcpt |= Res1.FSW & X86_FSW_XCPT_MASK;
4217 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
4218 IEMFPURESULT Res2 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4219 pfn(&State, &Res2, &InVal);
4220 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u/%#x[!] = #%u */\n",
4221 State.FCW | fFcwExtra, State.FSW, Res2.FSW, GenFormatR80(&InVal),
4222 GenFormatR80(&Res2.r80Result), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
4223 }
4224 if (!RT_IS_POWER_OF_TWO(fXcpt))
4225 for (uint16_t fUnmasked = 1; fUnmasked <= X86_FCW_PM; fUnmasked <<= 1)
4226 if (fUnmasked & fXcpt)
4227 {
4228 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | (fXcpt & ~fUnmasked);
4229 IEMFPURESULT Res3 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4230 pfn(&State, &Res3, &InVal);
4231 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u/u%#x = #%u */\n",
4232 State.FCW | fFcwExtra, State.FSW, Res3.FSW, GenFormatR80(&InVal),
4233 GenFormatR80(&Res3.r80Result), iTest, iRounding, iPrecision, fUnmasked, iTestOutput++);
4234 }
4235 }
4236 }
4237 }
4238 GenerateArrayEnd(pOutFn, g_aFpuUnaryR80[iFn].pszName);
4239 }
4240}
4241#endif
4242
4243static bool FpuIsEqualFcwMaybeIgnoreRoundErr(uint16_t fFcw1, uint16_t fFcw2, bool fRndErrOk, bool *pfRndErr)
4244{
4245 if (fFcw1 == fFcw2)
4246 return true;
4247 if (fRndErrOk && (fFcw1 & ~X86_FSW_C1) == (fFcw2 & ~X86_FSW_C1))
4248 {
4249 *pfRndErr = true;
4250 return true;
4251 }
4252 return false;
4253}
4254
4255static bool FpuIsEqualR80MaybeIgnoreRoundErr(PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2, bool fRndErrOk, bool *pfRndErr)
4256{
4257 if (RTFLOAT80U_ARE_IDENTICAL(pr80Val1, pr80Val2))
4258 return true;
4259 if ( fRndErrOk
4260 && pr80Val1->s.fSign == pr80Val2->s.fSign)
4261 {
4262 if ( ( pr80Val1->s.uExponent == pr80Val2->s.uExponent
4263 && ( pr80Val1->s.uMantissa > pr80Val2->s.uMantissa
4264 ? pr80Val1->s.uMantissa - pr80Val2->s.uMantissa == 1
4265 : pr80Val2->s.uMantissa - pr80Val1->s.uMantissa == 1))
4266 ||
4267 ( pr80Val1->s.uExponent + 1 == pr80Val2->s.uExponent
4268 && pr80Val1->s.uMantissa == UINT64_MAX
4269 && pr80Val2->s.uMantissa == RT_BIT_64(63))
4270 ||
4271 ( pr80Val1->s.uExponent == pr80Val2->s.uExponent + 1
4272 && pr80Val2->s.uMantissa == UINT64_MAX
4273 && pr80Val1->s.uMantissa == RT_BIT_64(63)) )
4274 {
4275 *pfRndErr = true;
4276 return true;
4277 }
4278 }
4279 return false;
4280}
4281
4282
4283static void FpuUnaryR80Test(void)
4284{
4285 X86FXSTATE State;
4286 RT_ZERO(State);
4287 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryR80); iFn++)
4288 {
4289 if (!SubTestAndCheckIfEnabled(g_aFpuUnaryR80[iFn].pszName))
4290 continue;
4291
4292 uint32_t const cTests = *g_aFpuUnaryR80[iFn].pcTests;
4293 FPU_UNARY_R80_TEST_T const * const paTests = g_aFpuUnaryR80[iFn].paTests;
4294 PFNIEMAIMPLFPUR80UNARY pfn = g_aFpuUnaryR80[iFn].pfn;
4295 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuUnaryR80[iFn]);
4296 uint32_t cRndErrs = 0;
4297 uint32_t cPossibleRndErrs = 0;
4298 if (!cTests) RTTestSkipped(g_hTest, "no tests");
4299 for (uint32_t iVar = 0; iVar < cVars; iVar++)
4300 {
4301 for (uint32_t iTest = 0; iTest < cTests; iTest++)
4302 {
4303 RTFLOAT80U const InVal = paTests[iTest].InVal;
4304 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4305 bool const fRndErrOk = RT_BOOL(paTests[iTest].fFcw & 0x80);
4306 State.FCW = paTests[iTest].fFcw & ~(uint16_t)0x80;
4307 State.FSW = paTests[iTest].fFswIn;
4308 pfn(&State, &Res, &InVal);
4309 bool fRndErr = false;
4310 if ( !FpuIsEqualFcwMaybeIgnoreRoundErr(Res.FSW, paTests[iTest].fFswOut, fRndErrOk, &fRndErr)
4311 || !FpuIsEqualR80MaybeIgnoreRoundErr(&Res.r80Result, &paTests[iTest].OutVal, fRndErrOk, &fRndErr))
4312 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
4313 "%s -> fsw=%#06x %s\n"
4314 "%s expected %#06x %s%s%s%s (%s)\n",
4315 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
4316 FormatR80(&paTests[iTest].InVal),
4317 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result),
4318 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].OutVal),
4319 FswDiff(Res.FSW, paTests[iTest].fFswOut),
4320 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal) ? " - val" : "",
4321 fRndErrOk ? " - rounding errors ok" : "", FormatFcw(paTests[iTest].fFcw));
4322 cRndErrs += fRndErr;
4323 cPossibleRndErrs += fRndErrOk;
4324 }
4325 pfn = g_aFpuUnaryR80[iFn].pfnNative;
4326 }
4327 if (cPossibleRndErrs > 0)
4328 RTTestPrintf(g_hTest, RTTESTLVL_ALWAYS, "rounding errors: %u out of %u\n", cRndErrs, cPossibleRndErrs);
4329 }
4330}
4331
4332
4333/*
4334 * Unary FPU operations on one 80-bit floating point value, but only affects the FSW.
4335 */
4336TYPEDEF_SUBTEST_TYPE(FPU_UNARY_FSW_R80_T, FPU_UNARY_R80_TEST_T, PFNIEMAIMPLFPUR80UNARYFSW);
4337
4338static const FPU_UNARY_FSW_R80_T g_aFpuUnaryFswR80[] =
4339{
4340 ENTRY(ftst_r80),
4341 ENTRY_EX(fxam_r80, 1),
4342};
4343
4344#ifdef TSTIEMAIMPL_WITH_GENERATOR
4345static void FpuUnaryFswR80Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
4346{
4347 static RTFLOAT80U const s_aSpecials[] =
4348 {
4349 RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), /* whatever */
4350 };
4351
4352 X86FXSTATE State;
4353 RT_ZERO(State);
4354 uint32_t cMinNormals = cTests / 4;
4355 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryFswR80); iFn++)
4356 {
4357 bool const fIsFxam = g_aFpuUnaryFswR80[iFn].uExtra == 1;
4358 PFNIEMAIMPLFPUR80UNARYFSW const pfn = g_aFpuUnaryFswR80[iFn].pfnNative ? g_aFpuUnaryFswR80[iFn].pfnNative : g_aFpuUnaryFswR80[iFn].pfn;
4359 PRTSTREAM pOutFn = pOut;
4360 if (g_aFpuUnaryFswR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE)
4361 {
4362 if (g_aFpuUnaryFswR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
4363 continue;
4364 pOutFn = pOutCpu;
4365 }
4366 State.FTW = 0;
4367
4368 GenerateArrayStart(pOutFn, g_aFpuUnaryFswR80[iFn].pszName, "FPU_UNARY_R80_TEST_T");
4369 uint32_t cNormalInputs = 0;
4370 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
4371 {
4372 RTFLOAT80U const InVal = iTest < cTests ? RandR80Src(iTest) : s_aSpecials[iTest - cTests];
4373 if (RTFLOAT80U_IS_NORMAL(&InVal))
4374 cNormalInputs++;
4375 else if (cNormalInputs < cMinNormals && iTest + cMinNormals >= cTests && iTest < cTests)
4376 {
4377 iTest -= 1;
4378 continue;
4379 }
4380
4381 uint16_t const fFcw = RandFcw();
4382 State.FSW = RandFsw();
4383 if (!fIsFxam)
4384 {
4385 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
4386 {
4387 for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
4388 {
4389 for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL)
4390 {
4391 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_PC_MASK | X86_FCW_MASK_ALL))
4392 | (iRounding << X86_FCW_RC_SHIFT)
4393 | (iPrecision << X86_FCW_PC_SHIFT)
4394 | iMask;
4395 uint16_t fFswOut = 0;
4396 pfn(&State, &fFswOut, &InVal);
4397 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s }, /* #%u/%u/%u/%c */\n",
4398 State.FCW, State.FSW, fFswOut, GenFormatR80(&InVal),
4399 iTest, iRounding, iPrecision, iMask ? 'c' : 'u');
4400 }
4401 }
4402 }
4403 }
4404 else
4405 {
4406 uint16_t fFswOut = 0;
4407 uint16_t const fEmpty = RTRandU32Ex(0, 3) == 3 ? 0x80 : 0; /* Using MBZ bit 7 in FCW to indicate empty tag value. */
4408 State.FTW = !fEmpty ? 1 << X86_FSW_TOP_GET(State.FSW) : 0;
4409 State.FCW = fFcw;
4410 pfn(&State, &fFswOut, &InVal);
4411 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s }, /* #%u%s */\n",
4412 fFcw | fEmpty, State.FSW, fFswOut, GenFormatR80(&InVal), iTest, fEmpty ? "/empty" : "");
4413 }
4414 }
4415 GenerateArrayEnd(pOutFn, g_aFpuUnaryFswR80[iFn].pszName);
4416 }
4417}
4418#endif
4419
4420
4421static void FpuUnaryFswR80Test(void)
4422{
4423 X86FXSTATE State;
4424 RT_ZERO(State);
4425 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryFswR80); iFn++)
4426 {
4427 if (!SubTestAndCheckIfEnabled(g_aFpuUnaryFswR80[iFn].pszName))
4428 continue;
4429
4430 uint32_t const cTests = *g_aFpuUnaryFswR80[iFn].pcTests;
4431 FPU_UNARY_R80_TEST_T const * const paTests = g_aFpuUnaryFswR80[iFn].paTests;
4432 PFNIEMAIMPLFPUR80UNARYFSW pfn = g_aFpuUnaryFswR80[iFn].pfn;
4433 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuUnaryFswR80[iFn]);
4434 if (!cTests) RTTestSkipped(g_hTest, "no tests");
4435 for (uint32_t iVar = 0; iVar < cVars; iVar++)
4436 {
4437 for (uint32_t iTest = 0; iTest < cTests; iTest++)
4438 {
4439 RTFLOAT80U const InVal = paTests[iTest].InVal;
4440 uint16_t fFswOut = 0;
4441 State.FSW = paTests[iTest].fFswIn;
4442 State.FCW = paTests[iTest].fFcw & ~(uint16_t)0x80; /* see generator code */
4443 State.FTW = paTests[iTest].fFcw & 0x80 ? 0 : 1 << X86_FSW_TOP_GET(paTests[iTest].fFswIn);
4444 pfn(&State, &fFswOut, &InVal);
4445 if (fFswOut != paTests[iTest].fFswOut)
4446 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
4447 "%s -> fsw=%#06x\n"
4448 "%s expected %#06x %s (%s%s)\n",
4449 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
4450 FormatR80(&paTests[iTest].InVal),
4451 iVar ? " " : "", fFswOut,
4452 iVar ? " " : "", paTests[iTest].fFswOut,
4453 FswDiff(fFswOut, paTests[iTest].fFswOut), FormatFcw(paTests[iTest].fFcw),
4454 paTests[iTest].fFcw & 0x80 ? " empty" : "");
4455 }
4456 pfn = g_aFpuUnaryFswR80[iFn].pfnNative;
4457 }
4458 }
4459}
4460
4461/*
4462 * Unary FPU operations on one 80-bit floating point value, but with two outputs.
4463 */
4464TYPEDEF_SUBTEST_TYPE(FPU_UNARY_TWO_R80_T, FPU_UNARY_TWO_R80_TEST_T, PFNIEMAIMPLFPUR80UNARYTWO);
4465
4466static const FPU_UNARY_TWO_R80_T g_aFpuUnaryTwoR80[] =
4467{
4468 ENTRY(fxtract_r80_r80),
4469 ENTRY_AMD( fptan_r80_r80, 0), // rounding differences
4470 ENTRY_INTEL(fptan_r80_r80, 0),
4471 ENTRY_AMD( fsincos_r80_r80, 0), // C1 differences & value differences (e.g. -1m0x235cf2f580244a27^-1696)
4472 ENTRY_INTEL(fsincos_r80_r80, 0),
4473};
4474
4475#ifdef TSTIEMAIMPL_WITH_GENERATOR
4476static void FpuUnaryTwoR80Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
4477{
4478 static RTFLOAT80U const s_aSpecials[] =
4479 {
4480 RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), /* whatever */
4481 };
4482
4483 X86FXSTATE State;
4484 RT_ZERO(State);
4485 uint32_t cMinNormals = cTests / 4;
4486 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryTwoR80); iFn++)
4487 {
4488 PFNIEMAIMPLFPUR80UNARYTWO const pfn = g_aFpuUnaryTwoR80[iFn].pfnNative ? g_aFpuUnaryTwoR80[iFn].pfnNative : g_aFpuUnaryTwoR80[iFn].pfn;
4489 PRTSTREAM pOutFn = pOut;
4490 if (g_aFpuUnaryTwoR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE)
4491 {
4492 if (g_aFpuUnaryTwoR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
4493 continue;
4494 pOutFn = pOutCpu;
4495 }
4496
4497 GenerateArrayStart(pOutFn, g_aFpuUnaryTwoR80[iFn].pszName, "FPU_UNARY_TWO_R80_TEST_T");
4498 uint32_t iTestOutput = 0;
4499 uint32_t cNormalInputs = 0;
4500 uint32_t cTargetRangeInputs = 0;
4501 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
4502 {
4503 RTFLOAT80U InVal = iTest < cTests ? RandR80Src(iTest) : s_aSpecials[iTest - cTests];
4504 if (RTFLOAT80U_IS_NORMAL(&InVal))
4505 {
4506 if (iFn != 0)
4507 {
4508 unsigned uTargetExp = RTFLOAT80U_EXP_BIAS + 63 + 1 /* 2^64..2^-64 */;
4509 unsigned cTargetExp = g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1 ? 69 : 63*2 + 2;
4510 if (InVal.s.uExponent <= uTargetExp && InVal.s.uExponent >= uTargetExp - cTargetExp)
4511 cTargetRangeInputs++;
4512 else if (cTargetRangeInputs < cMinNormals / 2 && iTest + cMinNormals / 2 >= cTests && iTest < cTests)
4513 {
4514 InVal.s.uExponent = RTRandU32Ex(uTargetExp - cTargetExp, uTargetExp);
4515 cTargetRangeInputs++;
4516 }
4517 }
4518 cNormalInputs++;
4519 }
4520 else if (cNormalInputs < cMinNormals && iTest + cMinNormals >= cTests && iTest < cTests)
4521 {
4522 iTest -= 1;
4523 continue;
4524 }
4525
4526 uint16_t const fFcwExtra = 0; /* for rounding error indication */
4527 uint16_t const fFcw = RandFcw();
4528 State.FSW = RandFsw();
4529
4530 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
4531 for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
4532 {
4533 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_PC_MASK | X86_FCW_MASK_ALL))
4534 | (iRounding << X86_FCW_RC_SHIFT)
4535 | (iPrecision << X86_FCW_PC_SHIFT)
4536 | X86_FCW_MASK_ALL;
4537 IEMFPURESULTTWO ResM = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4538 pfn(&State, &ResM, &InVal);
4539 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/m = #%u */\n",
4540 State.FCW | fFcwExtra, State.FSW, ResM.FSW, GenFormatR80(&InVal), GenFormatR80(&ResM.r80Result1),
4541 GenFormatR80(&ResM.r80Result2), iTest, iRounding, iPrecision, iTestOutput++);
4542
4543 State.FCW = State.FCW & ~X86_FCW_MASK_ALL;
4544 IEMFPURESULTTWO ResU = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4545 pfn(&State, &ResU, &InVal);
4546 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/u = #%u */\n",
4547 State.FCW | fFcwExtra, State.FSW, ResU.FSW, GenFormatR80(&InVal), GenFormatR80(&ResU.r80Result1),
4548 GenFormatR80(&ResU.r80Result2), iTest, iRounding, iPrecision, iTestOutput++);
4549
4550 uint16_t fXcpt = (ResM.FSW | ResU.FSW) & X86_FSW_XCPT_MASK & ~X86_FSW_SF;
4551 if (fXcpt)
4552 {
4553 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
4554 IEMFPURESULTTWO Res1 = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4555 pfn(&State, &Res1, &InVal);
4556 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/%#x = #%u */\n",
4557 State.FCW | fFcwExtra, State.FSW, Res1.FSW, GenFormatR80(&InVal), GenFormatR80(&Res1.r80Result1),
4558 GenFormatR80(&Res1.r80Result2), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
4559 if (((Res1.FSW & X86_FSW_XCPT_MASK) & fXcpt) != (Res1.FSW & X86_FSW_XCPT_MASK))
4560 {
4561 fXcpt |= Res1.FSW & X86_FSW_XCPT_MASK;
4562 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
4563 IEMFPURESULTTWO Res2 = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4564 pfn(&State, &Res2, &InVal);
4565 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/%#x[!] = #%u */\n",
4566 State.FCW | fFcwExtra, State.FSW, Res2.FSW, GenFormatR80(&InVal), GenFormatR80(&Res2.r80Result1),
4567 GenFormatR80(&Res2.r80Result2), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
4568 }
4569 if (!RT_IS_POWER_OF_TWO(fXcpt))
4570 for (uint16_t fUnmasked = 1; fUnmasked <= X86_FCW_PM; fUnmasked <<= 1)
4571 if (fUnmasked & fXcpt)
4572 {
4573 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | (fXcpt & ~fUnmasked);
4574 IEMFPURESULTTWO Res3 = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4575 pfn(&State, &Res3, &InVal);
4576 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/u%#x = #%u */\n",
4577 State.FCW | fFcwExtra, State.FSW, Res3.FSW, GenFormatR80(&InVal), GenFormatR80(&Res3.r80Result1),
4578 GenFormatR80(&Res3.r80Result2), iTest, iRounding, iPrecision, fUnmasked, iTestOutput++);
4579 }
4580 }
4581 }
4582 }
4583 GenerateArrayEnd(pOutFn, g_aFpuUnaryTwoR80[iFn].pszName);
4584 }
4585}
4586#endif
4587
4588
4589static void FpuUnaryTwoR80Test(void)
4590{
4591 X86FXSTATE State;
4592 RT_ZERO(State);
4593 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryTwoR80); iFn++)
4594 {
4595 if (!SubTestAndCheckIfEnabled(g_aFpuUnaryTwoR80[iFn].pszName))
4596 continue;
4597
4598 uint32_t const cTests = *g_aFpuUnaryTwoR80[iFn].pcTests;
4599 FPU_UNARY_TWO_R80_TEST_T const * const paTests = g_aFpuUnaryTwoR80[iFn].paTests;
4600 PFNIEMAIMPLFPUR80UNARYTWO pfn = g_aFpuUnaryTwoR80[iFn].pfn;
4601 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuUnaryTwoR80[iFn]);
4602 if (!cTests) RTTestSkipped(g_hTest, "no tests");
4603 for (uint32_t iVar = 0; iVar < cVars; iVar++)
4604 {
4605 for (uint32_t iTest = 0; iTest < cTests; iTest++)
4606 {
4607 IEMFPURESULTTWO Res = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4608 RTFLOAT80U const InVal = paTests[iTest].InVal;
4609 State.FCW = paTests[iTest].fFcw;
4610 State.FSW = paTests[iTest].fFswIn;
4611 pfn(&State, &Res, &InVal);
4612 if ( Res.FSW != paTests[iTest].fFswOut
4613 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result1, &paTests[iTest].OutVal1)
4614 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result2, &paTests[iTest].OutVal2) )
4615 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
4616 "%s -> fsw=%#06x %s %s\n"
4617 "%s expected %#06x %s %s %s%s%s (%s)\n",
4618 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
4619 FormatR80(&paTests[iTest].InVal),
4620 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result1), FormatR80(&Res.r80Result2),
4621 iVar ? " " : "", paTests[iTest].fFswOut,
4622 FormatR80(&paTests[iTest].OutVal1), FormatR80(&paTests[iTest].OutVal2),
4623 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result1, &paTests[iTest].OutVal1) ? " - val1" : "",
4624 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result2, &paTests[iTest].OutVal2) ? " - val2" : "",
4625 FswDiff(Res.FSW, paTests[iTest].fFswOut), FormatFcw(paTests[iTest].fFcw) );
4626 }
4627 pfn = g_aFpuUnaryTwoR80[iFn].pfnNative;
4628 }
4629 }
4630}
4631
4632
4633/*********************************************************************************************************************************
4634* SSE floating point Binary Operations *
4635*********************************************************************************************************************************/
4636
4637/*
4638 * Binary SSE operations on packed single precision floating point values.
4639 */
4640TYPEDEF_SUBTEST_TYPE(SSE_BINARY_R32_T, SSE_BINARY_TEST_T, PFNIEMAIMPLFPSSEF2U128);
4641
4642static const SSE_BINARY_R32_T g_aSseBinaryR32[] =
4643{
4644 ENTRY_BIN(addps_u128),
4645 ENTRY_BIN(mulps_u128),
4646 ENTRY_BIN(subps_u128),
4647 ENTRY_BIN(minps_u128),
4648 ENTRY_BIN(divps_u128),
4649 ENTRY_BIN(maxps_u128),
4650 ENTRY_BIN(haddps_u128),
4651 ENTRY_BIN(hsubps_u128),
4652 ENTRY_BIN(sqrtps_u128),
4653 ENTRY_BIN(addsubps_u128),
4654 ENTRY_BIN(cvtps2pd_u128),
4655};
4656
4657#ifdef TSTIEMAIMPL_WITH_GENERATOR
4658static RTEXITCODE SseBinaryR32Generate(const char *pszDataFileFmt, uint32_t cTests)
4659{
4660 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
4661
4662 static struct { RTFLOAT32U aVal1[4], aVal2[4]; } const s_aSpecials[] =
4663 {
4664 { { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), },
4665 { RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1), RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1), RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1), RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1) } },
4666 /** @todo More specials. */
4667 };
4668
4669 X86FXSTATE State;
4670 RT_ZERO(State);
4671 uint32_t cMinNormalPairs = (cTests - 144) / 4;
4672 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR32); iFn++)
4673 {
4674 PFNIEMAIMPLFPSSEF2U128 const pfn = g_aSseBinaryR32[iFn].pfnNative ? g_aSseBinaryR32[iFn].pfnNative : g_aSseBinaryR32[iFn].pfn;
4675
4676 PRTSTREAM pStrmOut = NULL;
4677 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseBinaryR32[iFn].pszName);
4678 if (RT_FAILURE(rc))
4679 {
4680 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseBinaryR32[iFn].pszName, rc);
4681 return RTEXITCODE_FAILURE;
4682 }
4683
4684 uint32_t cNormalInputPairs = 0;
4685 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
4686 {
4687 SSE_BINARY_TEST_T TestData; RT_ZERO(TestData);
4688
4689 TestData.InVal1.ar32[0] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
4690 TestData.InVal1.ar32[1] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
4691 TestData.InVal1.ar32[2] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[2];
4692 TestData.InVal1.ar32[3] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[3];
4693
4694 TestData.InVal2.ar32[0] = iTest < cTests ? RandR32Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[0];
4695 TestData.InVal2.ar32[1] = iTest < cTests ? RandR32Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[1];
4696 TestData.InVal2.ar32[2] = iTest < cTests ? RandR32Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[2];
4697 TestData.InVal2.ar32[3] = iTest < cTests ? RandR32Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[3];
4698
4699 if ( RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[0]) && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[0])
4700 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[1]) && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[1])
4701 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[2]) && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[2])
4702 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[3]) && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[3]))
4703 cNormalInputPairs++;
4704 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
4705 {
4706 iTest -= 1;
4707 continue;
4708 }
4709
4710 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
4711 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
4712 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
4713 for (uint8_t iFz = 0; iFz < 2; iFz++)
4714 {
4715 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
4716 | (iRounding << X86_MXCSR_RC_SHIFT)
4717 | (iDaz ? X86_MXCSR_DAZ : 0)
4718 | (iFz ? X86_MXCSR_FZ : 0)
4719 | X86_MXCSR_XCPT_MASK;
4720 IEMSSERESULT ResM; RT_ZERO(ResM);
4721 pfn(&State, &ResM, &TestData.InVal1, &TestData.InVal2);
4722 TestData.fMxcsrIn = State.MXCSR;
4723 TestData.fMxcsrOut = ResM.MXCSR;
4724 TestData.OutVal = ResM.uResult;
4725 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4726
4727 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
4728 IEMSSERESULT ResU; RT_ZERO(ResU);
4729 pfn(&State, &ResU, &TestData.InVal1, &TestData.InVal2);
4730 TestData.fMxcsrIn = State.MXCSR;
4731 TestData.fMxcsrOut = ResU.MXCSR;
4732 TestData.OutVal = ResU.uResult;
4733 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4734
4735 uint16_t fXcpt = (ResM.MXCSR | ResU.MXCSR) & X86_MXCSR_XCPT_FLAGS;
4736 if (fXcpt)
4737 {
4738 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
4739 IEMSSERESULT Res1; RT_ZERO(Res1);
4740 pfn(&State, &Res1, &TestData.InVal1, &TestData.InVal2);
4741 TestData.fMxcsrIn = State.MXCSR;
4742 TestData.fMxcsrOut = Res1.MXCSR;
4743 TestData.OutVal = Res1.uResult;
4744 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4745
4746 if (((Res1.MXCSR & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (Res1.MXCSR & X86_MXCSR_XCPT_FLAGS))
4747 {
4748 fXcpt |= Res1.MXCSR & X86_MXCSR_XCPT_FLAGS;
4749 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
4750 IEMSSERESULT Res2; RT_ZERO(Res2);
4751 pfn(&State, &Res2, &TestData.InVal1, &TestData.InVal2);
4752 TestData.fMxcsrIn = State.MXCSR;
4753 TestData.fMxcsrOut = Res2.MXCSR;
4754 TestData.OutVal = Res2.uResult;
4755 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4756 }
4757 if (!RT_IS_POWER_OF_TWO(fXcpt))
4758 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
4759 if (fUnmasked & fXcpt)
4760 {
4761 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
4762 IEMSSERESULT Res3; RT_ZERO(Res3);
4763 pfn(&State, &Res3, &TestData.InVal1, &TestData.InVal2);
4764 TestData.fMxcsrIn = State.MXCSR;
4765 TestData.fMxcsrOut = Res3.MXCSR;
4766 TestData.OutVal = Res3.uResult;
4767 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4768 }
4769 }
4770 }
4771 }
4772 rc = RTStrmClose(pStrmOut);
4773 if (RT_FAILURE(rc))
4774 {
4775 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseBinaryR32[iFn].pszName, rc);
4776 return RTEXITCODE_FAILURE;
4777 }
4778 }
4779
4780 return RTEXITCODE_SUCCESS;
4781}
4782#endif
4783
4784static void SseBinaryR32Test(void)
4785{
4786 X86FXSTATE State;
4787 RT_ZERO(State);
4788 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR32); iFn++)
4789 {
4790 if (!SubTestAndCheckIfEnabled(g_aSseBinaryR32[iFn].pszName))
4791 continue;
4792
4793 uint32_t const cTests = *g_aSseBinaryR32[iFn].pcTests;
4794 SSE_BINARY_TEST_T const * const paTests = g_aSseBinaryR32[iFn].paTests;
4795 PFNIEMAIMPLFPSSEF2U128 pfn = g_aSseBinaryR32[iFn].pfn;
4796 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryR32[iFn]);
4797 if (!cTests) RTTestSkipped(g_hTest, "no tests");
4798 for (uint32_t iVar = 0; iVar < cVars; iVar++)
4799 {
4800 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_BINARY_TEST_T); iTest++)
4801 {
4802 IEMSSERESULT Res; RT_ZERO(Res);
4803
4804 State.MXCSR = paTests[iTest].fMxcsrIn;
4805 pfn(&State, &Res, &paTests[iTest].InVal1, &paTests[iTest].InVal2);
4806 bool fValsIdentical = RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[0], &paTests[iTest].OutVal.ar32[0])
4807 && RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[1], &paTests[iTest].OutVal.ar32[1])
4808 && RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[2], &paTests[iTest].OutVal.ar32[2])
4809 && RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[3], &paTests[iTest].OutVal.ar32[3]);
4810 if ( Res.MXCSR != paTests[iTest].fMxcsrOut
4811 || !fValsIdentical)
4812 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s'%s'%s in2=%s'%s'%s'%s\n"
4813 "%s -> mxcsr=%#08x %s'%s'%s'%s\n"
4814 "%s expected %#08x %s'%s'%s'%s%s%s (%s)\n",
4815 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
4816 FormatR32(&paTests[iTest].InVal1.ar32[0]), FormatR32(&paTests[iTest].InVal1.ar32[1]),
4817 FormatR32(&paTests[iTest].InVal1.ar32[2]), FormatR32(&paTests[iTest].InVal1.ar32[3]),
4818 FormatR32(&paTests[iTest].InVal2.ar32[0]), FormatR32(&paTests[iTest].InVal2.ar32[1]),
4819 FormatR32(&paTests[iTest].InVal2.ar32[2]), FormatR32(&paTests[iTest].InVal2.ar32[3]),
4820 iVar ? " " : "", Res.MXCSR,
4821 FormatR32(&Res.uResult.ar32[0]), FormatR32(&Res.uResult.ar32[1]),
4822 FormatR32(&Res.uResult.ar32[2]), FormatR32(&Res.uResult.ar32[3]),
4823 iVar ? " " : "", paTests[iTest].fMxcsrOut,
4824 FormatR32(&paTests[iTest].OutVal.ar32[0]), FormatR32(&paTests[iTest].OutVal.ar32[1]),
4825 FormatR32(&paTests[iTest].OutVal.ar32[2]), FormatR32(&paTests[iTest].OutVal.ar32[3]),
4826 MxcsrDiff(Res.MXCSR, paTests[iTest].fMxcsrOut),
4827 !fValsIdentical ? " - val" : "",
4828 FormatMxcsr(paTests[iTest].fMxcsrIn) );
4829 }
4830 pfn = g_aSseBinaryR32[iFn].pfnNative;
4831 }
4832 }
4833}
4834
4835
4836/*
4837 * Binary SSE operations on packed single precision floating point values.
4838 */
4839TYPEDEF_SUBTEST_TYPE(SSE_BINARY_R64_T, SSE_BINARY_TEST_T, PFNIEMAIMPLFPSSEF2U128);
4840
4841static const SSE_BINARY_R64_T g_aSseBinaryR64[] =
4842{
4843 ENTRY_BIN(addpd_u128),
4844 ENTRY_BIN(mulpd_u128),
4845 ENTRY_BIN(subpd_u128),
4846 ENTRY_BIN(minpd_u128),
4847 ENTRY_BIN(divpd_u128),
4848 ENTRY_BIN(maxpd_u128),
4849 ENTRY_BIN(haddpd_u128),
4850 ENTRY_BIN(hsubpd_u128),
4851 ENTRY_BIN(sqrtpd_u128),
4852 ENTRY_BIN(addsubpd_u128),
4853 ENTRY_BIN(cvtpd2ps_u128),
4854};
4855
4856#ifdef TSTIEMAIMPL_WITH_GENERATOR
4857static RTEXITCODE SseBinaryR64Generate(const char *pszDataFileFmt, uint32_t cTests)
4858{
4859 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
4860
4861 static struct { RTFLOAT64U aVal1[2], aVal2[2]; } const s_aSpecials[] =
4862 {
4863 { { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) },
4864 { RTFLOAT64U_INIT_C(0, 8388607, RTFLOAT64U_EXP_MAX - 1), RTFLOAT64U_INIT_C(0, 8388607, RTFLOAT64U_EXP_MAX - 1) } },
4865 /** @todo More specials. */
4866 };
4867
4868 X86FXSTATE State;
4869 RT_ZERO(State);
4870 uint32_t cMinNormalPairs = (cTests - 144) / 4;
4871 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR64); iFn++)
4872 {
4873 PFNIEMAIMPLFPSSEF2U128 const pfn = g_aSseBinaryR64[iFn].pfnNative ? g_aSseBinaryR64[iFn].pfnNative : g_aSseBinaryR64[iFn].pfn;
4874
4875 PRTSTREAM pStrmOut = NULL;
4876 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseBinaryR64[iFn].pszName);
4877 if (RT_FAILURE(rc))
4878 {
4879 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseBinaryR64[iFn].pszName, rc);
4880 return RTEXITCODE_FAILURE;
4881 }
4882
4883 uint32_t cNormalInputPairs = 0;
4884 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
4885 {
4886 SSE_BINARY_TEST_T TestData; RT_ZERO(TestData);
4887
4888 TestData.InVal1.ar64[0] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
4889 TestData.InVal1.ar64[1] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
4890 TestData.InVal2.ar64[0] = iTest < cTests ? RandR64Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[0];
4891 TestData.InVal2.ar64[1] = iTest < cTests ? RandR64Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[0];
4892
4893 if ( RTFLOAT64U_IS_NORMAL(&TestData.InVal1.ar64[0]) && RTFLOAT64U_IS_NORMAL(&TestData.InVal1.ar64[1])
4894 && RTFLOAT64U_IS_NORMAL(&TestData.InVal2.ar64[0]) && RTFLOAT64U_IS_NORMAL(&TestData.InVal2.ar64[1]))
4895 cNormalInputPairs++;
4896 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
4897 {
4898 iTest -= 1;
4899 continue;
4900 }
4901
4902 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
4903 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
4904 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
4905 for (uint8_t iFz = 0; iFz < 2; iFz++)
4906 {
4907 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
4908 | (iRounding << X86_MXCSR_RC_SHIFT)
4909 | (iDaz ? X86_MXCSR_DAZ : 0)
4910 | (iFz ? X86_MXCSR_FZ : 0)
4911 | X86_MXCSR_XCPT_MASK;
4912 IEMSSERESULT ResM; RT_ZERO(ResM);
4913 pfn(&State, &ResM, &TestData.InVal1, &TestData.InVal2);
4914 TestData.fMxcsrIn = State.MXCSR;
4915 TestData.fMxcsrOut = ResM.MXCSR;
4916 TestData.OutVal = ResM.uResult;
4917 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4918
4919 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
4920 IEMSSERESULT ResU; RT_ZERO(ResU);
4921 pfn(&State, &ResU, &TestData.InVal1, &TestData.InVal2);
4922 TestData.fMxcsrIn = State.MXCSR;
4923 TestData.fMxcsrOut = ResU.MXCSR;
4924 TestData.OutVal = ResU.uResult;
4925 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4926
4927 uint16_t fXcpt = (ResM.MXCSR | ResU.MXCSR) & X86_MXCSR_XCPT_FLAGS;
4928 if (fXcpt)
4929 {
4930 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
4931 IEMSSERESULT Res1; RT_ZERO(Res1);
4932 pfn(&State, &Res1, &TestData.InVal1, &TestData.InVal2);
4933 TestData.fMxcsrIn = State.MXCSR;
4934 TestData.fMxcsrOut = Res1.MXCSR;
4935 TestData.OutVal = Res1.uResult;
4936 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4937
4938 if (((Res1.MXCSR & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (Res1.MXCSR & X86_MXCSR_XCPT_FLAGS))
4939 {
4940 fXcpt |= Res1.MXCSR & X86_MXCSR_XCPT_FLAGS;
4941 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
4942 IEMSSERESULT Res2; RT_ZERO(Res2);
4943 pfn(&State, &Res2, &TestData.InVal1, &TestData.InVal2);
4944 TestData.fMxcsrIn = State.MXCSR;
4945 TestData.fMxcsrOut = Res2.MXCSR;
4946 TestData.OutVal = Res2.uResult;
4947 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4948 }
4949 if (!RT_IS_POWER_OF_TWO(fXcpt))
4950 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
4951 if (fUnmasked & fXcpt)
4952 {
4953 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
4954 IEMSSERESULT Res3; RT_ZERO(Res3);
4955 pfn(&State, &Res3, &TestData.InVal1, &TestData.InVal2);
4956 TestData.fMxcsrIn = State.MXCSR;
4957 TestData.fMxcsrOut = Res3.MXCSR;
4958 TestData.OutVal = Res3.uResult;
4959 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4960 }
4961 }
4962 }
4963 }
4964 rc = RTStrmClose(pStrmOut);
4965 if (RT_FAILURE(rc))
4966 {
4967 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseBinaryR64[iFn].pszName, rc);
4968 return RTEXITCODE_FAILURE;
4969 }
4970 }
4971
4972 return RTEXITCODE_SUCCESS;
4973}
4974#endif
4975
4976
4977static void SseBinaryR64Test(void)
4978{
4979 X86FXSTATE State;
4980 RT_ZERO(State);
4981 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR64); iFn++)
4982 {
4983 if (!SubTestAndCheckIfEnabled(g_aSseBinaryR64[iFn].pszName))
4984 continue;
4985
4986 uint32_t const cTests = *g_aSseBinaryR64[iFn].pcTests;
4987 SSE_BINARY_TEST_T const * const paTests = g_aSseBinaryR64[iFn].paTests;
4988 PFNIEMAIMPLFPSSEF2U128 pfn = g_aSseBinaryR64[iFn].pfn;
4989 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryR64[iFn]);
4990 if (!cTests) RTTestSkipped(g_hTest, "no tests");
4991 for (uint32_t iVar = 0; iVar < cVars; iVar++)
4992 {
4993 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_BINARY_TEST_T); iTest++)
4994 {
4995 IEMSSERESULT Res; RT_ZERO(Res);
4996
4997 State.MXCSR = paTests[iTest].fMxcsrIn;
4998 pfn(&State, &Res, &paTests[iTest].InVal1, &paTests[iTest].InVal2);
4999 if ( Res.MXCSR != paTests[iTest].fMxcsrOut
5000 || !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[0], &paTests[iTest].OutVal.ar64[0])
5001 || !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[1], &paTests[iTest].OutVal.ar64[1]))
5002 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s in2=%s'%s\n"
5003 "%s -> mxcsr=%#08x %s'%s\n"
5004 "%s expected %#08x %s'%s%s%s (%s)\n",
5005 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
5006 FormatR64(&paTests[iTest].InVal1.ar64[0]), FormatR64(&paTests[iTest].InVal1.ar64[1]),
5007 FormatR64(&paTests[iTest].InVal2.ar64[0]), FormatR64(&paTests[iTest].InVal2.ar64[1]),
5008 iVar ? " " : "", Res.MXCSR,
5009 FormatR64(&Res.uResult.ar64[0]), FormatR64(&Res.uResult.ar64[1]),
5010 iVar ? " " : "", paTests[iTest].fMxcsrOut,
5011 FormatR64(&paTests[iTest].OutVal.ar64[0]), FormatR64(&paTests[iTest].OutVal.ar64[1]),
5012 MxcsrDiff(Res.MXCSR, paTests[iTest].fMxcsrOut),
5013 ( !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[0], &paTests[iTest].OutVal.ar64[0])
5014 || !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[1], &paTests[iTest].OutVal.ar64[1]))
5015 ? " - val" : "",
5016 FormatMxcsr(paTests[iTest].fMxcsrIn) );
5017 }
5018 pfn = g_aSseBinaryR64[iFn].pfnNative;
5019 }
5020 }
5021}
5022
5023
5024/*
5025 * Binary SSE operations on packed single precision floating point values.
5026 */
5027TYPEDEF_SUBTEST_TYPE(SSE_BINARY_U128_R32_T, SSE_BINARY_U128_R32_TEST_T, PFNIEMAIMPLFPSSEF2U128R32);
5028
5029static const SSE_BINARY_U128_R32_T g_aSseBinaryU128R32[] =
5030{
5031 ENTRY_BIN(addss_u128_r32),
5032 ENTRY_BIN(mulss_u128_r32),
5033 ENTRY_BIN(subss_u128_r32),
5034 ENTRY_BIN(minss_u128_r32),
5035 ENTRY_BIN(divss_u128_r32),
5036 ENTRY_BIN(maxss_u128_r32),
5037 ENTRY_BIN(cvtss2sd_u128_r32),
5038 ENTRY_BIN(sqrtss_u128_r32),
5039};
5040
5041#ifdef TSTIEMAIMPL_WITH_GENERATOR
5042static RTEXITCODE SseBinaryU128R32Generate(const char *pszDataFileFmt, uint32_t cTests)
5043{
5044 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
5045
5046 static struct { RTFLOAT32U aVal1[4], Val2; } const s_aSpecials[] =
5047 {
5048 { { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), }, RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1) },
5049 /** @todo More specials. */
5050 };
5051
5052 X86FXSTATE State;
5053 RT_ZERO(State);
5054 uint32_t cMinNormalPairs = (cTests - 144) / 4;
5055 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryU128R32); iFn++)
5056 {
5057 PFNIEMAIMPLFPSSEF2U128R32 const pfn = g_aSseBinaryU128R32[iFn].pfnNative ? g_aSseBinaryU128R32[iFn].pfnNative : g_aSseBinaryU128R32[iFn].pfn;
5058
5059 PRTSTREAM pStrmOut = NULL;
5060 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseBinaryU128R32[iFn].pszName);
5061 if (RT_FAILURE(rc))
5062 {
5063 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseBinaryU128R32[iFn].pszName, rc);
5064 return RTEXITCODE_FAILURE;
5065 }
5066
5067 uint32_t cNormalInputPairs = 0;
5068 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
5069 {
5070 SSE_BINARY_U128_R32_TEST_T TestData; RT_ZERO(TestData);
5071
5072 TestData.InVal1.ar32[0] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
5073 TestData.InVal1.ar32[1] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
5074 TestData.InVal1.ar32[2] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[2];
5075 TestData.InVal1.ar32[3] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[3];
5076
5077 TestData.r32Val2 = iTest < cTests ? RandR32Src2(iTest) : s_aSpecials[iTest - cTests].Val2;
5078
5079 if ( RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[0])
5080 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[1])
5081 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[2])
5082 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[3])
5083 && RTFLOAT32U_IS_NORMAL(&TestData.r32Val2))
5084 cNormalInputPairs++;
5085 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
5086 {
5087 iTest -= 1;
5088 continue;
5089 }
5090
5091 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
5092 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
5093 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
5094 for (uint8_t iFz = 0; iFz < 2; iFz++)
5095 {
5096 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
5097 | (iRounding << X86_MXCSR_RC_SHIFT)
5098 | (iDaz ? X86_MXCSR_DAZ : 0)
5099 | (iFz ? X86_MXCSR_FZ : 0)
5100 | X86_MXCSR_XCPT_MASK;
5101 IEMSSERESULT ResM; RT_ZERO(ResM);
5102 pfn(&State, &ResM, &TestData.InVal1, &TestData.r32Val2);
5103 TestData.fMxcsrIn = State.MXCSR;
5104 TestData.fMxcsrOut = ResM.MXCSR;
5105 TestData.OutVal = ResM.uResult;
5106 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5107
5108 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
5109 IEMSSERESULT ResU; RT_ZERO(ResU);
5110 pfn(&State, &ResU, &TestData.InVal1, &TestData.r32Val2);
5111 TestData.fMxcsrIn = State.MXCSR;
5112 TestData.fMxcsrOut = ResU.MXCSR;
5113 TestData.OutVal = ResU.uResult;
5114 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5115
5116 uint16_t fXcpt = (ResM.MXCSR | ResU.MXCSR) & X86_MXCSR_XCPT_FLAGS;
5117 if (fXcpt)
5118 {
5119 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
5120 IEMSSERESULT Res1; RT_ZERO(Res1);
5121 pfn(&State, &Res1, &TestData.InVal1, &TestData.r32Val2);
5122 TestData.fMxcsrIn = State.MXCSR;
5123 TestData.fMxcsrOut = Res1.MXCSR;
5124 TestData.OutVal = Res1.uResult;
5125 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5126
5127 if (((Res1.MXCSR & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (Res1.MXCSR & X86_MXCSR_XCPT_FLAGS))
5128 {
5129 fXcpt |= Res1.MXCSR & X86_MXCSR_XCPT_FLAGS;
5130 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
5131 IEMSSERESULT Res2; RT_ZERO(Res2);
5132 pfn(&State, &Res2, &TestData.InVal1, &TestData.r32Val2);
5133 TestData.fMxcsrIn = State.MXCSR;
5134 TestData.fMxcsrOut = Res2.MXCSR;
5135 TestData.OutVal = Res2.uResult;
5136 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5137 }
5138 if (!RT_IS_POWER_OF_TWO(fXcpt))
5139 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
5140 if (fUnmasked & fXcpt)
5141 {
5142 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
5143 IEMSSERESULT Res3; RT_ZERO(Res3);
5144 pfn(&State, &Res3, &TestData.InVal1, &TestData.r32Val2);
5145 TestData.fMxcsrIn = State.MXCSR;
5146 TestData.fMxcsrOut = Res3.MXCSR;
5147 TestData.OutVal = Res3.uResult;
5148 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5149 }
5150 }
5151 }
5152 }
5153 rc = RTStrmClose(pStrmOut);
5154 if (RT_FAILURE(rc))
5155 {
5156 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseBinaryU128R32[iFn].pszName, rc);
5157 return RTEXITCODE_FAILURE;
5158 }
5159 }
5160
5161 return RTEXITCODE_SUCCESS;
5162}
5163#endif
5164
5165static void SseBinaryU128R32Test(void)
5166{
5167 X86FXSTATE State;
5168 RT_ZERO(State);
5169 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryU128R32); iFn++)
5170 {
5171 if (!SubTestAndCheckIfEnabled(g_aSseBinaryU128R32[iFn].pszName))
5172 continue;
5173
5174 uint32_t const cTests = *g_aSseBinaryU128R32[iFn].pcTests;
5175 SSE_BINARY_U128_R32_TEST_T const * const paTests = g_aSseBinaryU128R32[iFn].paTests;
5176 PFNIEMAIMPLFPSSEF2U128R32 pfn = g_aSseBinaryU128R32[iFn].pfn;
5177 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryU128R32[iFn]);
5178 if (!cTests) RTTestSkipped(g_hTest, "no tests");
5179 for (uint32_t iVar = 0; iVar < cVars; iVar++)
5180 {
5181 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_BINARY_TEST_T); iTest++)
5182 {
5183 IEMSSERESULT Res; RT_ZERO(Res);
5184
5185 State.MXCSR = paTests[iTest].fMxcsrIn;
5186 pfn(&State, &Res, &paTests[iTest].InVal1, &paTests[iTest].r32Val2);
5187 bool fValsIdentical = RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[0], &paTests[iTest].OutVal.ar32[0])
5188 && RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[1], &paTests[iTest].OutVal.ar32[1])
5189 && RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[2], &paTests[iTest].OutVal.ar32[2])
5190 && RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[3], &paTests[iTest].OutVal.ar32[3]);
5191 if ( Res.MXCSR != paTests[iTest].fMxcsrOut
5192 || !fValsIdentical)
5193 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s'%s'%s in2=%s\n"
5194 "%s -> mxcsr=%#08x %s'%s'%s'%s\n"
5195 "%s expected %#08x %s'%s'%s'%s%s%s (%s)\n",
5196 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
5197 FormatR32(&paTests[iTest].InVal1.ar32[0]), FormatR32(&paTests[iTest].InVal1.ar32[1]),
5198 FormatR32(&paTests[iTest].InVal1.ar32[2]), FormatR32(&paTests[iTest].InVal1.ar32[3]),
5199 FormatR32(&paTests[iTest].r32Val2),
5200 iVar ? " " : "", Res.MXCSR,
5201 FormatR32(&Res.uResult.ar32[0]), FormatR32(&Res.uResult.ar32[1]),
5202 FormatR32(&Res.uResult.ar32[2]), FormatR32(&Res.uResult.ar32[3]),
5203 iVar ? " " : "", paTests[iTest].fMxcsrOut,
5204 FormatR32(&paTests[iTest].OutVal.ar32[0]), FormatR32(&paTests[iTest].OutVal.ar32[1]),
5205 FormatR32(&paTests[iTest].OutVal.ar32[2]), FormatR32(&paTests[iTest].OutVal.ar32[3]),
5206 MxcsrDiff(Res.MXCSR, paTests[iTest].fMxcsrOut),
5207 !fValsIdentical ? " - val" : "",
5208 FormatMxcsr(paTests[iTest].fMxcsrIn) );
5209 }
5210 }
5211 }
5212}
5213
5214
5215/*
5216 * Binary SSE operations on packed single precision floating point values (xxxsd xmm1, r/m64).
5217 */
5218TYPEDEF_SUBTEST_TYPE(SSE_BINARY_U128_R64_T, SSE_BINARY_U128_R64_TEST_T, PFNIEMAIMPLFPSSEF2U128R64);
5219
5220static const SSE_BINARY_U128_R64_T g_aSseBinaryU128R64[] =
5221{
5222 ENTRY_BIN(addsd_u128_r64),
5223 ENTRY_BIN(mulsd_u128_r64),
5224 ENTRY_BIN(subsd_u128_r64),
5225 ENTRY_BIN(minsd_u128_r64),
5226 ENTRY_BIN(divsd_u128_r64),
5227 ENTRY_BIN(maxsd_u128_r64),
5228 ENTRY_BIN(cvtsd2ss_u128_r64),
5229 ENTRY_BIN(sqrtsd_u128_r64),
5230};
5231
5232#ifdef TSTIEMAIMPL_WITH_GENERATOR
5233static RTEXITCODE SseBinaryU128R64Generate(const char *pszDataFileFmt, uint32_t cTests)
5234{
5235 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
5236
5237 static struct { RTFLOAT64U aVal1[2], Val2; } const s_aSpecials[] =
5238 {
5239 { { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) }, RTFLOAT64U_INIT_C(0, 8388607, RTFLOAT64U_EXP_MAX - 1) },
5240 /** @todo More specials. */
5241 };
5242
5243 X86FXSTATE State;
5244 RT_ZERO(State);
5245 uint32_t cMinNormalPairs = (cTests - 144) / 4;
5246 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryU128R64); iFn++)
5247 {
5248 PFNIEMAIMPLFPSSEF2U128R64 const pfn = g_aSseBinaryU128R64[iFn].pfnNative ? g_aSseBinaryU128R64[iFn].pfnNative : g_aSseBinaryU128R64[iFn].pfn;
5249
5250 PRTSTREAM pStrmOut = NULL;
5251 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseBinaryU128R64[iFn].pszName);
5252 if (RT_FAILURE(rc))
5253 {
5254 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseBinaryU128R64[iFn].pszName, rc);
5255 return RTEXITCODE_FAILURE;
5256 }
5257
5258 uint32_t cNormalInputPairs = 0;
5259 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
5260 {
5261 SSE_BINARY_U128_R64_TEST_T TestData; RT_ZERO(TestData);
5262
5263 TestData.InVal1.ar64[0] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
5264 TestData.InVal1.ar64[1] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
5265 TestData.r64Val2 = iTest < cTests ? RandR64Src2(iTest) : s_aSpecials[iTest - cTests].Val2;
5266
5267 if ( RTFLOAT64U_IS_NORMAL(&TestData.InVal1.ar64[0]) && RTFLOAT64U_IS_NORMAL(&TestData.InVal1.ar64[1])
5268 && RTFLOAT64U_IS_NORMAL(&TestData.r64Val2))
5269 cNormalInputPairs++;
5270 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
5271 {
5272 iTest -= 1;
5273 continue;
5274 }
5275
5276 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
5277 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
5278 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
5279 for (uint8_t iFz = 0; iFz < 2; iFz++)
5280 {
5281 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
5282 | (iRounding << X86_MXCSR_RC_SHIFT)
5283 | (iDaz ? X86_MXCSR_DAZ : 0)
5284 | (iFz ? X86_MXCSR_FZ : 0)
5285 | X86_MXCSR_XCPT_MASK;
5286 IEMSSERESULT ResM; RT_ZERO(ResM);
5287 pfn(&State, &ResM, &TestData.InVal1, &TestData.r64Val2);
5288 TestData.fMxcsrIn = State.MXCSR;
5289 TestData.fMxcsrOut = ResM.MXCSR;
5290 TestData.OutVal = ResM.uResult;
5291 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5292
5293 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
5294 IEMSSERESULT ResU; RT_ZERO(ResU);
5295 pfn(&State, &ResU, &TestData.InVal1, &TestData.r64Val2);
5296 TestData.fMxcsrIn = State.MXCSR;
5297 TestData.fMxcsrOut = ResU.MXCSR;
5298 TestData.OutVal = ResU.uResult;
5299 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5300
5301 uint16_t fXcpt = (ResM.MXCSR | ResU.MXCSR) & X86_MXCSR_XCPT_FLAGS;
5302 if (fXcpt)
5303 {
5304 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
5305 IEMSSERESULT Res1; RT_ZERO(Res1);
5306 pfn(&State, &Res1, &TestData.InVal1, &TestData.r64Val2);
5307 TestData.fMxcsrIn = State.MXCSR;
5308 TestData.fMxcsrOut = Res1.MXCSR;
5309 TestData.OutVal = Res1.uResult;
5310 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5311
5312 if (((Res1.MXCSR & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (Res1.MXCSR & X86_MXCSR_XCPT_FLAGS))
5313 {
5314 fXcpt |= Res1.MXCSR & X86_MXCSR_XCPT_FLAGS;
5315 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
5316 IEMSSERESULT Res2; RT_ZERO(Res2);
5317 pfn(&State, &Res2, &TestData.InVal1, &TestData.r64Val2);
5318 TestData.fMxcsrIn = State.MXCSR;
5319 TestData.fMxcsrOut = Res2.MXCSR;
5320 TestData.OutVal = Res2.uResult;
5321 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5322 }
5323 if (!RT_IS_POWER_OF_TWO(fXcpt))
5324 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
5325 if (fUnmasked & fXcpt)
5326 {
5327 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
5328 IEMSSERESULT Res3; RT_ZERO(Res3);
5329 pfn(&State, &Res3, &TestData.InVal1, &TestData.r64Val2);
5330 TestData.fMxcsrIn = State.MXCSR;
5331 TestData.fMxcsrOut = Res3.MXCSR;
5332 TestData.OutVal = Res3.uResult;
5333 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5334 }
5335 }
5336 }
5337 }
5338 rc = RTStrmClose(pStrmOut);
5339 if (RT_FAILURE(rc))
5340 {
5341 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseBinaryU128R64[iFn].pszName, rc);
5342 return RTEXITCODE_FAILURE;
5343 }
5344 }
5345
5346 return RTEXITCODE_SUCCESS;
5347}
5348#endif
5349
5350
5351static void SseBinaryU128R64Test(void)
5352{
5353 X86FXSTATE State;
5354 RT_ZERO(State);
5355 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryU128R64); iFn++)
5356 {
5357 if (!SubTestAndCheckIfEnabled(g_aSseBinaryU128R64[iFn].pszName))
5358 continue;
5359
5360 uint32_t const cTests = *g_aSseBinaryU128R64[iFn].pcTests;
5361 SSE_BINARY_U128_R64_TEST_T const * const paTests = g_aSseBinaryU128R64[iFn].paTests;
5362 PFNIEMAIMPLFPSSEF2U128R64 pfn = g_aSseBinaryU128R64[iFn].pfn;
5363 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryU128R64[iFn]);
5364 if (!cTests) RTTestSkipped(g_hTest, "no tests");
5365 for (uint32_t iVar = 0; iVar < cVars; iVar++)
5366 {
5367 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_BINARY_U128_R64_TEST_T); iTest++)
5368 {
5369 IEMSSERESULT Res; RT_ZERO(Res);
5370
5371 State.MXCSR = paTests[iTest].fMxcsrIn;
5372 pfn(&State, &Res, &paTests[iTest].InVal1, &paTests[iTest].r64Val2);
5373 if ( Res.MXCSR != paTests[iTest].fMxcsrOut
5374 || !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[0], &paTests[iTest].OutVal.ar64[0])
5375 || !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[1], &paTests[iTest].OutVal.ar64[1]))
5376 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s in2=%s\n"
5377 "%s -> mxcsr=%#08x %s'%s\n"
5378 "%s expected %#08x %s'%s%s%s (%s)\n",
5379 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
5380 FormatR64(&paTests[iTest].InVal1.ar64[0]), FormatR64(&paTests[iTest].InVal1.ar64[1]),
5381 FormatR64(&paTests[iTest].r64Val2),
5382 iVar ? " " : "", Res.MXCSR,
5383 FormatR64(&Res.uResult.ar64[0]), FormatR64(&Res.uResult.ar64[1]),
5384 iVar ? " " : "", paTests[iTest].fMxcsrOut,
5385 FormatR64(&paTests[iTest].OutVal.ar64[0]), FormatR64(&paTests[iTest].OutVal.ar64[1]),
5386 MxcsrDiff(Res.MXCSR, paTests[iTest].fMxcsrOut),
5387 ( !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[0], &paTests[iTest].OutVal.ar64[0])
5388 || !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[1], &paTests[iTest].OutVal.ar64[1]))
5389 ? " - val" : "",
5390 FormatMxcsr(paTests[iTest].fMxcsrIn) );
5391 }
5392 }
5393 }
5394}
5395
5396
5397/*
5398 * SSE operations converting single double-precision floating point values to signed double-word integers (cvttsd2si and friends).
5399 */
5400TYPEDEF_SUBTEST_TYPE(SSE_BINARY_I32_R64_T, SSE_BINARY_I32_R64_TEST_T, PFNIEMAIMPLSSEF2I32U64);
5401
5402static const SSE_BINARY_I32_R64_T g_aSseBinaryI32R64[] =
5403{
5404 ENTRY_BIN(cvttsd2si_i32_r64),
5405 ENTRY_BIN(cvtsd2si_i32_r64),
5406};
5407
5408#ifdef TSTIEMAIMPL_WITH_GENERATOR
5409static RTEXITCODE SseBinaryI32R64Generate(const char *pszDataFileFmt, uint32_t cTests)
5410{
5411 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
5412
5413 static struct { RTFLOAT64U Val; } const s_aSpecials[] =
5414 {
5415 { RTFLOAT64U_INIT_C(0, 8388607, RTFLOAT64U_EXP_MAX - 1) },
5416 /** @todo More specials. */
5417 };
5418
5419 X86FXSTATE State;
5420 RT_ZERO(State);
5421 uint32_t cMinNormalPairs = (cTests - 144) / 4;
5422 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI32R64); iFn++)
5423 {
5424 PFNIEMAIMPLSSEF2I32U64 const pfn = g_aSseBinaryI32R64[iFn].pfnNative ? g_aSseBinaryI32R64[iFn].pfnNative : g_aSseBinaryI32R64[iFn].pfn;
5425
5426 PRTSTREAM pStrmOut = NULL;
5427 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseBinaryI32R64[iFn].pszName);
5428 if (RT_FAILURE(rc))
5429 {
5430 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseBinaryI32R64[iFn].pszName, rc);
5431 return RTEXITCODE_FAILURE;
5432 }
5433
5434 uint32_t cNormalInputPairs = 0;
5435 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
5436 {
5437 SSE_BINARY_I32_R64_TEST_T TestData; RT_ZERO(TestData);
5438
5439 TestData.r64ValIn = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val;
5440
5441 if (RTFLOAT64U_IS_NORMAL(&TestData.r64ValIn))
5442 cNormalInputPairs++;
5443 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
5444 {
5445 iTest -= 1;
5446 continue;
5447 }
5448
5449 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
5450 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
5451 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
5452 for (uint8_t iFz = 0; iFz < 2; iFz++)
5453 {
5454 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
5455 | (iRounding << X86_MXCSR_RC_SHIFT)
5456 | (iDaz ? X86_MXCSR_DAZ : 0)
5457 | (iFz ? X86_MXCSR_FZ : 0)
5458 | X86_MXCSR_XCPT_MASK;
5459 uint32_t fMxcsrM; int32_t i32OutM;
5460 pfn(&State, &fMxcsrM, &i32OutM, &TestData.r64ValIn.u);
5461 TestData.fMxcsrIn = State.MXCSR;
5462 TestData.fMxcsrOut = fMxcsrM;
5463 TestData.i32ValOut = i32OutM;
5464 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5465
5466 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
5467 uint32_t fMxcsrU; int32_t i32OutU;
5468 pfn(&State, &fMxcsrU, &i32OutU, &TestData.r64ValIn.u);
5469 TestData.fMxcsrIn = State.MXCSR;
5470 TestData.fMxcsrOut = fMxcsrU;
5471 TestData.i32ValOut = i32OutU;
5472 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5473
5474 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
5475 if (fXcpt)
5476 {
5477 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
5478 uint32_t fMxcsr1; int32_t i32Out1;
5479 pfn(&State, &fMxcsr1, &i32Out1, &TestData.r64ValIn.u);
5480 TestData.fMxcsrIn = State.MXCSR;
5481 TestData.fMxcsrOut = fMxcsr1;
5482 TestData.i32ValOut = i32Out1;
5483 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5484
5485 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
5486 {
5487 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
5488 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
5489 uint32_t fMxcsr2; int32_t i32Out2;
5490 pfn(&State, &fMxcsr2, &i32Out2, &TestData.r64ValIn.u);
5491 TestData.fMxcsrIn = State.MXCSR;
5492 TestData.fMxcsrOut = fMxcsr2;
5493 TestData.i32ValOut = i32Out2;
5494 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5495 }
5496 if (!RT_IS_POWER_OF_TWO(fXcpt))
5497 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
5498 if (fUnmasked & fXcpt)
5499 {
5500 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
5501 uint32_t fMxcsr3; int32_t i32Out3;
5502 pfn(&State, &fMxcsr3, &i32Out3, &TestData.r64ValIn.u);
5503 TestData.fMxcsrIn = State.MXCSR;
5504 TestData.fMxcsrOut = fMxcsr3;
5505 TestData.i32ValOut = i32Out3;
5506 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5507 }
5508 }
5509 }
5510 }
5511 rc = RTStrmClose(pStrmOut);
5512 if (RT_FAILURE(rc))
5513 {
5514 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseBinaryI32R64[iFn].pszName, rc);
5515 return RTEXITCODE_FAILURE;
5516 }
5517 }
5518
5519 return RTEXITCODE_SUCCESS;
5520}
5521#endif
5522
5523
5524static void SseBinaryI32R64Test(void)
5525{
5526 X86FXSTATE State;
5527 RT_ZERO(State);
5528 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI32R64); iFn++)
5529 {
5530 if (!SubTestAndCheckIfEnabled(g_aSseBinaryI32R64[iFn].pszName))
5531 continue;
5532
5533 uint32_t const cTests = *g_aSseBinaryI32R64[iFn].pcTests;
5534 SSE_BINARY_I32_R64_TEST_T const * const paTests = g_aSseBinaryI32R64[iFn].paTests;
5535 PFNIEMAIMPLSSEF2I32U64 pfn = g_aSseBinaryI32R64[iFn].pfn;
5536 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryI32R64[iFn]);
5537 if (!cTests) RTTestSkipped(g_hTest, "no tests");
5538 for (uint32_t iVar = 0; iVar < cVars; iVar++)
5539 {
5540 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_BINARY_I32_R64_TEST_T); iTest++)
5541 {
5542 uint32_t fMxcsr = 0;
5543 int32_t i32Dst = 0;
5544
5545 State.MXCSR = paTests[iTest].fMxcsrIn;
5546 pfn(&State, &fMxcsr, &i32Dst, &paTests[iTest].r64ValIn.u);
5547 if ( fMxcsr != paTests[iTest].fMxcsrOut
5548 || i32Dst != paTests[iTest].i32ValOut)
5549 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s\n"
5550 "%s -> mxcsr=%#08x %RI32\n"
5551 "%s expected %#08x %RI32%s%s (%s)\n",
5552 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
5553 FormatR64(&paTests[iTest].r64ValIn),
5554 iVar ? " " : "", fMxcsr, i32Dst,
5555 iVar ? " " : "", paTests[iTest].fMxcsrOut, paTests[iTest].i32ValOut,
5556 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
5557 i32Dst != paTests[iTest].i32ValOut
5558 ? " - val" : "",
5559 FormatMxcsr(paTests[iTest].fMxcsrIn) );
5560 }
5561 }
5562 }
5563}
5564
5565
5566/*
5567 * SSE operations converting single double-precision floating point values to signed quad-word integers (cvttsd2si and friends).
5568 */
5569TYPEDEF_SUBTEST_TYPE(SSE_BINARY_I64_R64_T, SSE_BINARY_I64_R64_TEST_T, PFNIEMAIMPLSSEF2I64U64);
5570
5571static const SSE_BINARY_I64_R64_T g_aSseBinaryI64R64[] =
5572{
5573 ENTRY_BIN(cvttsd2si_i64_r64),
5574 ENTRY_BIN(cvtsd2si_i64_r64),
5575};
5576
5577#ifdef TSTIEMAIMPL_WITH_GENERATOR
5578static RTEXITCODE SseBinaryI64R64Generate(const char *pszDataFileFmt, uint32_t cTests)
5579{
5580 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
5581
5582 static struct { RTFLOAT64U Val; } const s_aSpecials[] =
5583 {
5584 { RTFLOAT64U_INIT_C(0, 8388607, RTFLOAT64U_EXP_MAX - 1) },
5585 /** @todo More specials. */
5586 };
5587
5588 X86FXSTATE State;
5589 RT_ZERO(State);
5590 uint32_t cMinNormalPairs = (cTests - 144) / 4;
5591 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI64R64); iFn++)
5592 {
5593 PFNIEMAIMPLSSEF2I64U64 const pfn = g_aSseBinaryI64R64[iFn].pfnNative ? g_aSseBinaryI64R64[iFn].pfnNative : g_aSseBinaryI64R64[iFn].pfn;
5594
5595 PRTSTREAM pStrmOut = NULL;
5596 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseBinaryI64R64[iFn].pszName);
5597 if (RT_FAILURE(rc))
5598 {
5599 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseBinaryI64R64[iFn].pszName, rc);
5600 return RTEXITCODE_FAILURE;
5601 }
5602
5603 uint32_t cNormalInputPairs = 0;
5604 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
5605 {
5606 SSE_BINARY_I64_R64_TEST_T TestData; RT_ZERO(TestData);
5607
5608 TestData.r64ValIn = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val;
5609
5610 if (RTFLOAT64U_IS_NORMAL(&TestData.r64ValIn))
5611 cNormalInputPairs++;
5612 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
5613 {
5614 iTest -= 1;
5615 continue;
5616 }
5617
5618 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
5619 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
5620 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
5621 for (uint8_t iFz = 0; iFz < 2; iFz++)
5622 {
5623 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
5624 | (iRounding << X86_MXCSR_RC_SHIFT)
5625 | (iDaz ? X86_MXCSR_DAZ : 0)
5626 | (iFz ? X86_MXCSR_FZ : 0)
5627 | X86_MXCSR_XCPT_MASK;
5628 uint32_t fMxcsrM; int64_t i64OutM;
5629 pfn(&State, &fMxcsrM, &i64OutM, &TestData.r64ValIn.u);
5630 TestData.fMxcsrIn = State.MXCSR;
5631 TestData.fMxcsrOut = fMxcsrM;
5632 TestData.i64ValOut = i64OutM;
5633 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5634
5635 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
5636 uint32_t fMxcsrU; int64_t i64OutU;
5637 pfn(&State, &fMxcsrU, &i64OutU, &TestData.r64ValIn.u);
5638 TestData.fMxcsrIn = State.MXCSR;
5639 TestData.fMxcsrOut = fMxcsrU;
5640 TestData.i64ValOut = i64OutU;
5641 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5642
5643 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
5644 if (fXcpt)
5645 {
5646 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
5647 uint32_t fMxcsr1; int64_t i64Out1;
5648 pfn(&State, &fMxcsr1, &i64Out1, &TestData.r64ValIn.u);
5649 TestData.fMxcsrIn = State.MXCSR;
5650 TestData.fMxcsrOut = fMxcsr1;
5651 TestData.i64ValOut = i64Out1;
5652 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5653
5654 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
5655 {
5656 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
5657 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
5658 uint32_t fMxcsr2; int64_t i64Out2;
5659 pfn(&State, &fMxcsr2, &i64Out2, &TestData.r64ValIn.u);
5660 TestData.fMxcsrIn = State.MXCSR;
5661 TestData.fMxcsrOut = fMxcsr2;
5662 TestData.i64ValOut = i64Out2;
5663 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5664 }
5665 if (!RT_IS_POWER_OF_TWO(fXcpt))
5666 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
5667 if (fUnmasked & fXcpt)
5668 {
5669 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
5670 uint32_t fMxcsr3; int64_t i64Out3;
5671 pfn(&State, &fMxcsr3, &i64Out3, &TestData.r64ValIn.u);
5672 TestData.fMxcsrIn = State.MXCSR;
5673 TestData.fMxcsrOut = fMxcsr3;
5674 TestData.i64ValOut = i64Out3;
5675 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5676 }
5677 }
5678 }
5679 }
5680 rc = RTStrmClose(pStrmOut);
5681 if (RT_FAILURE(rc))
5682 {
5683 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseBinaryI64R64[iFn].pszName, rc);
5684 return RTEXITCODE_FAILURE;
5685 }
5686 }
5687
5688 return RTEXITCODE_SUCCESS;
5689}
5690#endif
5691
5692
5693static void SseBinaryI64R64Test(void)
5694{
5695 X86FXSTATE State;
5696 RT_ZERO(State);
5697 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI64R64); iFn++)
5698 {
5699 if (!SubTestAndCheckIfEnabled(g_aSseBinaryI64R64[iFn].pszName))
5700 continue;
5701
5702 uint32_t const cTests = *g_aSseBinaryI64R64[iFn].pcTests;
5703 SSE_BINARY_I64_R64_TEST_T const * const paTests = g_aSseBinaryI64R64[iFn].paTests;
5704 PFNIEMAIMPLSSEF2I64U64 pfn = g_aSseBinaryI64R64[iFn].pfn;
5705 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryI32R64[iFn]);
5706 if (!cTests) RTTestSkipped(g_hTest, "no tests");
5707 for (uint32_t iVar = 0; iVar < cVars; iVar++)
5708 {
5709 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_BINARY_I64_R64_TEST_T); iTest++)
5710 {
5711 uint32_t fMxcsr = 0;
5712 int64_t i64Dst = 0;
5713
5714 State.MXCSR = paTests[iTest].fMxcsrIn;
5715 pfn(&State, &fMxcsr, &i64Dst, &paTests[iTest].r64ValIn.u);
5716 if ( fMxcsr != paTests[iTest].fMxcsrOut
5717 || i64Dst != paTests[iTest].i64ValOut)
5718 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s\n"
5719 "%s -> mxcsr=%#08x %RI64\n"
5720 "%s expected %#08x %RI64%s%s (%s)\n",
5721 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
5722 FormatR64(&paTests[iTest].r64ValIn),
5723 iVar ? " " : "", fMxcsr, i64Dst,
5724 iVar ? " " : "", paTests[iTest].fMxcsrOut, paTests[iTest].i64ValOut,
5725 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
5726 i64Dst != paTests[iTest].i64ValOut
5727 ? " - val" : "",
5728 FormatMxcsr(paTests[iTest].fMxcsrIn) );
5729 }
5730 }
5731 }
5732}
5733
5734
5735/*
5736 * SSE operations converting single single-precision floating point values to signed double-word integers (cvttss2si and friends).
5737 */
5738TYPEDEF_SUBTEST_TYPE(SSE_BINARY_I32_R32_T, SSE_BINARY_I32_R32_TEST_T, PFNIEMAIMPLSSEF2I32U32);
5739
5740static const SSE_BINARY_I32_R32_T g_aSseBinaryI32R32[] =
5741{
5742 ENTRY_BIN(cvttss2si_i32_r32),
5743 ENTRY_BIN(cvtss2si_i32_r32),
5744};
5745
5746#ifdef TSTIEMAIMPL_WITH_GENERATOR
5747static RTEXITCODE SseBinaryI32R32Generate(const char *pszDataFileFmt, uint32_t cTests)
5748{
5749 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
5750
5751 static struct { RTFLOAT32U Val; } const s_aSpecials[] =
5752 {
5753 { RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1) },
5754 /** @todo More specials. */
5755 };
5756
5757 X86FXSTATE State;
5758 RT_ZERO(State);
5759 uint32_t cMinNormalPairs = (cTests - 144) / 4;
5760 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI32R32); iFn++)
5761 {
5762 PFNIEMAIMPLSSEF2I32U32 const pfn = g_aSseBinaryI32R32[iFn].pfnNative ? g_aSseBinaryI32R32[iFn].pfnNative : g_aSseBinaryI32R32[iFn].pfn;
5763
5764 PRTSTREAM pStrmOut = NULL;
5765 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseBinaryI32R32[iFn].pszName);
5766 if (RT_FAILURE(rc))
5767 {
5768 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseBinaryI32R32[iFn].pszName, rc);
5769 return RTEXITCODE_FAILURE;
5770 }
5771
5772 uint32_t cNormalInputPairs = 0;
5773 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
5774 {
5775 SSE_BINARY_I32_R32_TEST_T TestData; RT_ZERO(TestData);
5776
5777 TestData.r32ValIn = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val;
5778
5779 if (RTFLOAT32U_IS_NORMAL(&TestData.r32ValIn))
5780 cNormalInputPairs++;
5781 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
5782 {
5783 iTest -= 1;
5784 continue;
5785 }
5786
5787 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
5788 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
5789 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
5790 for (uint8_t iFz = 0; iFz < 2; iFz++)
5791 {
5792 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
5793 | (iRounding << X86_MXCSR_RC_SHIFT)
5794 | (iDaz ? X86_MXCSR_DAZ : 0)
5795 | (iFz ? X86_MXCSR_FZ : 0)
5796 | X86_MXCSR_XCPT_MASK;
5797 uint32_t fMxcsrM; int32_t i32OutM;
5798 pfn(&State, &fMxcsrM, &i32OutM, &TestData.r32ValIn.u);
5799 TestData.fMxcsrIn = State.MXCSR;
5800 TestData.fMxcsrOut = fMxcsrM;
5801 TestData.i32ValOut = i32OutM;
5802 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5803
5804 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
5805 uint32_t fMxcsrU; int32_t i32OutU;
5806 pfn(&State, &fMxcsrU, &i32OutU, &TestData.r32ValIn.u);
5807 TestData.fMxcsrIn = State.MXCSR;
5808 TestData.fMxcsrOut = fMxcsrU;
5809 TestData.i32ValOut = i32OutU;
5810 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5811
5812 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
5813 if (fXcpt)
5814 {
5815 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
5816 uint32_t fMxcsr1; int32_t i32Out1;
5817 pfn(&State, &fMxcsr1, &i32Out1, &TestData.r32ValIn.u);
5818 TestData.fMxcsrIn = State.MXCSR;
5819 TestData.fMxcsrOut = fMxcsr1;
5820 TestData.i32ValOut = i32Out1;
5821 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5822
5823 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
5824 {
5825 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
5826 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
5827 uint32_t fMxcsr2; int32_t i32Out2;
5828 pfn(&State, &fMxcsr2, &i32Out2, &TestData.r32ValIn.u);
5829 TestData.fMxcsrIn = State.MXCSR;
5830 TestData.fMxcsrOut = fMxcsr2;
5831 TestData.i32ValOut = i32Out2;
5832 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5833 }
5834 if (!RT_IS_POWER_OF_TWO(fXcpt))
5835 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
5836 if (fUnmasked & fXcpt)
5837 {
5838 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
5839 uint32_t fMxcsr3; int32_t i32Out3;
5840 pfn(&State, &fMxcsr3, &i32Out3, &TestData.r32ValIn.u);
5841 TestData.fMxcsrIn = State.MXCSR;
5842 TestData.fMxcsrOut = fMxcsr3;
5843 TestData.i32ValOut = i32Out3;
5844 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5845 }
5846 }
5847 }
5848 }
5849 rc = RTStrmClose(pStrmOut);
5850 if (RT_FAILURE(rc))
5851 {
5852 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseBinaryI32R32[iFn].pszName, rc);
5853 return RTEXITCODE_FAILURE;
5854 }
5855 }
5856
5857 return RTEXITCODE_SUCCESS;
5858}
5859#endif
5860
5861
5862static void SseBinaryI32R32Test(void)
5863{
5864 X86FXSTATE State;
5865 RT_ZERO(State);
5866 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI32R32); iFn++)
5867 {
5868 if (!SubTestAndCheckIfEnabled(g_aSseBinaryI32R32[iFn].pszName))
5869 continue;
5870
5871 uint32_t const cTests = *g_aSseBinaryI32R32[iFn].pcTests;
5872 SSE_BINARY_I32_R32_TEST_T const * const paTests = g_aSseBinaryI32R32[iFn].paTests;
5873 PFNIEMAIMPLSSEF2I32U32 pfn = g_aSseBinaryI32R32[iFn].pfn;
5874 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryI32R32[iFn]);
5875 if (!cTests) RTTestSkipped(g_hTest, "no tests");
5876 for (uint32_t iVar = 0; iVar < cVars; iVar++)
5877 {
5878 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_BINARY_I32_R32_TEST_T); iTest++)
5879 {
5880 uint32_t fMxcsr = 0;
5881 int32_t i32Dst = 0;
5882
5883 State.MXCSR = paTests[iTest].fMxcsrIn;
5884 pfn(&State, &fMxcsr, &i32Dst, &paTests[iTest].r32ValIn.u);
5885 if ( fMxcsr != paTests[iTest].fMxcsrOut
5886 || i32Dst != paTests[iTest].i32ValOut)
5887 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s\n"
5888 "%s -> mxcsr=%#08x %RI32\n"
5889 "%s expected %#08x %RI32%s%s (%s)\n",
5890 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
5891 FormatR32(&paTests[iTest].r32ValIn),
5892 iVar ? " " : "", fMxcsr, i32Dst,
5893 iVar ? " " : "", paTests[iTest].fMxcsrOut, paTests[iTest].i32ValOut,
5894 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
5895 i32Dst != paTests[iTest].i32ValOut
5896 ? " - val" : "",
5897 FormatMxcsr(paTests[iTest].fMxcsrIn) );
5898 }
5899 }
5900 }
5901}
5902
5903
5904/*
5905 * SSE operations converting single single-precision floating point values to signed quad-word integers (cvttss2si and friends).
5906 */
5907TYPEDEF_SUBTEST_TYPE(SSE_BINARY_I64_R32_T, SSE_BINARY_I64_R32_TEST_T, PFNIEMAIMPLSSEF2I64U32);
5908
5909static const SSE_BINARY_I64_R32_T g_aSseBinaryI64R32[] =
5910{
5911 ENTRY_BIN(cvttss2si_i64_r32),
5912 ENTRY_BIN(cvtss2si_i64_r32),
5913};
5914
5915#ifdef TSTIEMAIMPL_WITH_GENERATOR
5916static RTEXITCODE SseBinaryI64R32Generate(const char *pszDataFileFmt, uint32_t cTests)
5917{
5918 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
5919
5920 static struct { RTFLOAT32U Val; } const s_aSpecials[] =
5921 {
5922 { RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1) },
5923 /** @todo More specials. */
5924 };
5925
5926 X86FXSTATE State;
5927 RT_ZERO(State);
5928 uint32_t cMinNormalPairs = (cTests - 144) / 4;
5929 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI64R32); iFn++)
5930 {
5931 PFNIEMAIMPLSSEF2I64U32 const pfn = g_aSseBinaryI64R32[iFn].pfnNative ? g_aSseBinaryI64R32[iFn].pfnNative : g_aSseBinaryI64R32[iFn].pfn;
5932
5933 PRTSTREAM pStrmOut = NULL;
5934 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseBinaryI64R32[iFn].pszName);
5935 if (RT_FAILURE(rc))
5936 {
5937 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseBinaryI64R32[iFn].pszName, rc);
5938 return RTEXITCODE_FAILURE;
5939 }
5940
5941 uint32_t cNormalInputPairs = 0;
5942 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
5943 {
5944 SSE_BINARY_I64_R32_TEST_T TestData; RT_ZERO(TestData);
5945
5946 TestData.r32ValIn = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val;
5947
5948 if (RTFLOAT32U_IS_NORMAL(&TestData.r32ValIn))
5949 cNormalInputPairs++;
5950 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
5951 {
5952 iTest -= 1;
5953 continue;
5954 }
5955
5956 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
5957 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
5958 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
5959 for (uint8_t iFz = 0; iFz < 2; iFz++)
5960 {
5961 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
5962 | (iRounding << X86_MXCSR_RC_SHIFT)
5963 | (iDaz ? X86_MXCSR_DAZ : 0)
5964 | (iFz ? X86_MXCSR_FZ : 0)
5965 | X86_MXCSR_XCPT_MASK;
5966 uint32_t fMxcsrM; int64_t i64OutM;
5967 pfn(&State, &fMxcsrM, &i64OutM, &TestData.r32ValIn.u);
5968 TestData.fMxcsrIn = State.MXCSR;
5969 TestData.fMxcsrOut = fMxcsrM;
5970 TestData.i64ValOut = i64OutM;
5971 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5972
5973 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
5974 uint32_t fMxcsrU; int64_t i64OutU;
5975 pfn(&State, &fMxcsrU, &i64OutU, &TestData.r32ValIn.u);
5976 TestData.fMxcsrIn = State.MXCSR;
5977 TestData.fMxcsrOut = fMxcsrU;
5978 TestData.i64ValOut = i64OutU;
5979 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5980
5981 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
5982 if (fXcpt)
5983 {
5984 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
5985 uint32_t fMxcsr1; int64_t i64Out1;
5986 pfn(&State, &fMxcsr1, &i64Out1, &TestData.r32ValIn.u);
5987 TestData.fMxcsrIn = State.MXCSR;
5988 TestData.fMxcsrOut = fMxcsr1;
5989 TestData.i64ValOut = i64Out1;
5990 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5991
5992 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
5993 {
5994 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
5995 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
5996 uint32_t fMxcsr2; int64_t i64Out2;
5997 pfn(&State, &fMxcsr2, &i64Out2, &TestData.r32ValIn.u);
5998 TestData.fMxcsrIn = State.MXCSR;
5999 TestData.fMxcsrOut = fMxcsr2;
6000 TestData.i64ValOut = i64Out2;
6001 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6002 }
6003 if (!RT_IS_POWER_OF_TWO(fXcpt))
6004 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
6005 if (fUnmasked & fXcpt)
6006 {
6007 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
6008 uint32_t fMxcsr3; int64_t i64Out3;
6009 pfn(&State, &fMxcsr3, &i64Out3, &TestData.r32ValIn.u);
6010 TestData.fMxcsrIn = State.MXCSR;
6011 TestData.fMxcsrOut = fMxcsr3;
6012 TestData.i64ValOut = i64Out3;
6013 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6014 }
6015 }
6016 }
6017 }
6018 rc = RTStrmClose(pStrmOut);
6019 if (RT_FAILURE(rc))
6020 {
6021 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseBinaryI64R32[iFn].pszName, rc);
6022 return RTEXITCODE_FAILURE;
6023 }
6024 }
6025
6026 return RTEXITCODE_SUCCESS;
6027}
6028#endif
6029
6030
6031static void SseBinaryI64R32Test(void)
6032{
6033 X86FXSTATE State;
6034 RT_ZERO(State);
6035 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI64R32); iFn++)
6036 {
6037 if (!SubTestAndCheckIfEnabled(g_aSseBinaryI64R32[iFn].pszName))
6038 continue;
6039
6040 uint32_t const cTests = *g_aSseBinaryI64R32[iFn].pcTests;
6041 SSE_BINARY_I64_R32_TEST_T const * const paTests = g_aSseBinaryI64R32[iFn].paTests;
6042 PFNIEMAIMPLSSEF2I64U32 pfn = g_aSseBinaryI64R32[iFn].pfn;
6043 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryI64R32[iFn]);
6044 if (!cTests) RTTestSkipped(g_hTest, "no tests");
6045 for (uint32_t iVar = 0; iVar < cVars; iVar++)
6046 {
6047 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_BINARY_I64_R32_TEST_T); iTest++)
6048 {
6049 uint32_t fMxcsr = 0;
6050 int64_t i64Dst = 0;
6051
6052 State.MXCSR = paTests[iTest].fMxcsrIn;
6053 pfn(&State, &fMxcsr, &i64Dst, &paTests[iTest].r32ValIn.u);
6054 if ( fMxcsr != paTests[iTest].fMxcsrOut
6055 || i64Dst != paTests[iTest].i64ValOut)
6056 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s\n"
6057 "%s -> mxcsr=%#08x %RI64\n"
6058 "%s expected %#08x %RI64%s%s (%s)\n",
6059 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
6060 FormatR32(&paTests[iTest].r32ValIn),
6061 iVar ? " " : "", fMxcsr, i64Dst,
6062 iVar ? " " : "", paTests[iTest].fMxcsrOut, paTests[iTest].i64ValOut,
6063 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
6064 i64Dst != paTests[iTest].i64ValOut
6065 ? " - val" : "",
6066 FormatMxcsr(paTests[iTest].fMxcsrIn) );
6067 }
6068 }
6069 }
6070}
6071
6072
6073/*
6074 * SSE operations converting single signed double-word integers to double-precision floating point values (probably only cvtsi2sd).
6075 */
6076TYPEDEF_SUBTEST_TYPE(SSE_BINARY_R64_I32_T, SSE_BINARY_R64_I32_TEST_T, PFNIEMAIMPLSSEF2R64I32);
6077
6078static const SSE_BINARY_R64_I32_T g_aSseBinaryR64I32[] =
6079{
6080 ENTRY_BIN(cvtsi2sd_r64_i32)
6081};
6082
6083#ifdef TSTIEMAIMPL_WITH_GENERATOR
6084static RTEXITCODE SseBinaryR64I32Generate(const char *pszDataFileFmt, uint32_t cTests)
6085{
6086 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
6087
6088 static int32_t const s_aSpecials[] =
6089 {
6090 INT32_MIN,
6091 INT32_MAX,
6092 /** @todo More specials. */
6093 };
6094
6095 X86FXSTATE State;
6096 RT_ZERO(State);
6097 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR64I32); iFn++)
6098 {
6099 PFNIEMAIMPLSSEF2R64I32 const pfn = g_aSseBinaryR64I32[iFn].pfnNative ? g_aSseBinaryR64I32[iFn].pfnNative : g_aSseBinaryR64I32[iFn].pfn;
6100
6101 PRTSTREAM pStrmOut = NULL;
6102 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseBinaryR64I32[iFn].pszName);
6103 if (RT_FAILURE(rc))
6104 {
6105 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseBinaryR64I32[iFn].pszName, rc);
6106 return RTEXITCODE_FAILURE;
6107 }
6108
6109 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
6110 {
6111 SSE_BINARY_R64_I32_TEST_T TestData; RT_ZERO(TestData);
6112
6113 TestData.i32ValIn = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
6114
6115 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
6116 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
6117 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
6118 for (uint8_t iFz = 0; iFz < 2; iFz++)
6119 {
6120 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
6121 | (iRounding << X86_MXCSR_RC_SHIFT)
6122 | (iDaz ? X86_MXCSR_DAZ : 0)
6123 | (iFz ? X86_MXCSR_FZ : 0)
6124 | X86_MXCSR_XCPT_MASK;
6125 uint32_t fMxcsrM; RTFLOAT64U r64OutM;
6126 pfn(&State, &fMxcsrM, &r64OutM, &TestData.i32ValIn);
6127 TestData.fMxcsrIn = State.MXCSR;
6128 TestData.fMxcsrOut = fMxcsrM;
6129 TestData.r64ValOut = r64OutM;
6130 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6131
6132 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
6133 uint32_t fMxcsrU; RTFLOAT64U r64OutU;
6134 pfn(&State, &fMxcsrU, &r64OutU, &TestData.i32ValIn);
6135 TestData.fMxcsrIn = State.MXCSR;
6136 TestData.fMxcsrOut = fMxcsrU;
6137 TestData.r64ValOut = r64OutU;
6138 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6139
6140 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
6141 if (fXcpt)
6142 {
6143 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
6144 uint32_t fMxcsr1; RTFLOAT64U r64Out1;
6145 pfn(&State, &fMxcsr1, &r64Out1, &TestData.i32ValIn);
6146 TestData.fMxcsrIn = State.MXCSR;
6147 TestData.fMxcsrOut = fMxcsr1;
6148 TestData.r64ValOut = r64Out1;
6149 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6150
6151 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
6152 {
6153 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
6154 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
6155 uint32_t fMxcsr2; RTFLOAT64U r64Out2;
6156 pfn(&State, &fMxcsr2, &r64Out2, &TestData.i32ValIn);
6157 TestData.fMxcsrIn = State.MXCSR;
6158 TestData.fMxcsrOut = fMxcsr2;
6159 TestData.r64ValOut = r64Out2;
6160 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6161 }
6162 if (!RT_IS_POWER_OF_TWO(fXcpt))
6163 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
6164 if (fUnmasked & fXcpt)
6165 {
6166 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
6167 uint32_t fMxcsr3; RTFLOAT64U r64Out3;
6168 pfn(&State, &fMxcsr3, &r64Out3, &TestData.i32ValIn);
6169 TestData.fMxcsrIn = State.MXCSR;
6170 TestData.fMxcsrOut = fMxcsr3;
6171 TestData.r64ValOut = r64Out3;
6172 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6173 }
6174 }
6175 }
6176 }
6177 rc = RTStrmClose(pStrmOut);
6178 if (RT_FAILURE(rc))
6179 {
6180 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseBinaryR64I32[iFn].pszName, rc);
6181 return RTEXITCODE_FAILURE;
6182 }
6183 }
6184
6185 return RTEXITCODE_SUCCESS;
6186}
6187#endif
6188
6189
6190static void SseBinaryR64I32Test(void)
6191{
6192 X86FXSTATE State;
6193 RT_ZERO(State);
6194 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR64I32); iFn++)
6195 {
6196 if (!SubTestAndCheckIfEnabled(g_aSseBinaryR64I32[iFn].pszName))
6197 continue;
6198
6199 uint32_t const cTests = *g_aSseBinaryR64I32[iFn].pcTests;
6200 SSE_BINARY_R64_I32_TEST_T const * const paTests = g_aSseBinaryR64I32[iFn].paTests;
6201 PFNIEMAIMPLSSEF2R64I32 pfn = g_aSseBinaryR64I32[iFn].pfn;
6202 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryR64I32[iFn]);
6203 if (!cTests) RTTestSkipped(g_hTest, "no tests");
6204 for (uint32_t iVar = 0; iVar < cVars; iVar++)
6205 {
6206 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_BINARY_R64_I32_TEST_T); iTest++)
6207 {
6208 uint32_t fMxcsr = 0;
6209 RTFLOAT64U r64Dst; RT_ZERO(r64Dst);
6210
6211 State.MXCSR = paTests[iTest].fMxcsrIn;
6212 pfn(&State, &fMxcsr, &r64Dst, &paTests[iTest].i32ValIn);
6213 if ( fMxcsr != paTests[iTest].fMxcsrOut
6214 || !RTFLOAT64U_ARE_IDENTICAL(&r64Dst, &paTests[iTest].r64ValOut))
6215 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI32\n"
6216 "%s -> mxcsr=%#08x %s\n"
6217 "%s expected %#08x %s%s%s (%s)\n",
6218 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
6219 &paTests[iTest].i32ValIn,
6220 iVar ? " " : "", fMxcsr, FormatR64(&r64Dst),
6221 iVar ? " " : "", paTests[iTest].fMxcsrOut, FormatR64(&paTests[iTest].r64ValOut),
6222 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
6223 !RTFLOAT64U_ARE_IDENTICAL(&r64Dst, &paTests[iTest].r64ValOut)
6224 ? " - val" : "",
6225 FormatMxcsr(paTests[iTest].fMxcsrIn) );
6226 }
6227 }
6228 }
6229}
6230
6231
6232/*
6233 * SSE operations converting single signed quad-word integers to double-precision floating point values (probably only cvtsi2sd).
6234 */
6235TYPEDEF_SUBTEST_TYPE(SSE_BINARY_R64_I64_T, SSE_BINARY_R64_I64_TEST_T, PFNIEMAIMPLSSEF2R64I64);
6236
6237static const SSE_BINARY_R64_I64_T g_aSseBinaryR64I64[] =
6238{
6239 ENTRY_BIN(cvtsi2sd_r64_i64),
6240};
6241
6242#ifdef TSTIEMAIMPL_WITH_GENERATOR
6243static RTEXITCODE SseBinaryR64I64Generate(const char *pszDataFileFmt, uint32_t cTests)
6244{
6245 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
6246
6247 static int64_t const s_aSpecials[] =
6248 {
6249 INT64_MIN,
6250 INT64_MAX
6251 /** @todo More specials. */
6252 };
6253
6254 X86FXSTATE State;
6255 RT_ZERO(State);
6256 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR64I64); iFn++)
6257 {
6258 PFNIEMAIMPLSSEF2R64I64 const pfn = g_aSseBinaryR64I64[iFn].pfnNative ? g_aSseBinaryR64I64[iFn].pfnNative : g_aSseBinaryR64I64[iFn].pfn;
6259
6260 PRTSTREAM pStrmOut = NULL;
6261 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseBinaryR64I64[iFn].pszName);
6262 if (RT_FAILURE(rc))
6263 {
6264 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseBinaryR64I64[iFn].pszName, rc);
6265 return RTEXITCODE_FAILURE;
6266 }
6267
6268 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
6269 {
6270 SSE_BINARY_R64_I64_TEST_T TestData; RT_ZERO(TestData);
6271
6272 TestData.i64ValIn = iTest < cTests ? RandI64Src(iTest) : s_aSpecials[iTest - cTests];
6273
6274 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
6275 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
6276 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
6277 for (uint8_t iFz = 0; iFz < 2; iFz++)
6278 {
6279 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
6280 | (iRounding << X86_MXCSR_RC_SHIFT)
6281 | (iDaz ? X86_MXCSR_DAZ : 0)
6282 | (iFz ? X86_MXCSR_FZ : 0)
6283 | X86_MXCSR_XCPT_MASK;
6284 uint32_t fMxcsrM; RTFLOAT64U r64OutM;
6285 pfn(&State, &fMxcsrM, &r64OutM, &TestData.i64ValIn);
6286 TestData.fMxcsrIn = State.MXCSR;
6287 TestData.fMxcsrOut = fMxcsrM;
6288 TestData.r64ValOut = r64OutM;
6289 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6290
6291 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
6292 uint32_t fMxcsrU; RTFLOAT64U r64OutU;
6293 pfn(&State, &fMxcsrU, &r64OutU, &TestData.i64ValIn);
6294 TestData.fMxcsrIn = State.MXCSR;
6295 TestData.fMxcsrOut = fMxcsrU;
6296 TestData.r64ValOut = r64OutU;
6297 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6298
6299 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
6300 if (fXcpt)
6301 {
6302 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
6303 uint32_t fMxcsr1; RTFLOAT64U r64Out1;
6304 pfn(&State, &fMxcsr1, &r64Out1, &TestData.i64ValIn);
6305 TestData.fMxcsrIn = State.MXCSR;
6306 TestData.fMxcsrOut = fMxcsr1;
6307 TestData.r64ValOut = r64Out1;
6308 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6309
6310 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
6311 {
6312 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
6313 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
6314 uint32_t fMxcsr2; RTFLOAT64U r64Out2;
6315 pfn(&State, &fMxcsr2, &r64Out2, &TestData.i64ValIn);
6316 TestData.fMxcsrIn = State.MXCSR;
6317 TestData.fMxcsrOut = fMxcsr2;
6318 TestData.r64ValOut = r64Out2;
6319 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6320 }
6321 if (!RT_IS_POWER_OF_TWO(fXcpt))
6322 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
6323 if (fUnmasked & fXcpt)
6324 {
6325 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
6326 uint32_t fMxcsr3; RTFLOAT64U r64Out3;
6327 pfn(&State, &fMxcsr3, &r64Out3, &TestData.i64ValIn);
6328 TestData.fMxcsrIn = State.MXCSR;
6329 TestData.fMxcsrOut = fMxcsr3;
6330 TestData.r64ValOut = r64Out3;
6331 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6332 }
6333 }
6334 }
6335 }
6336 rc = RTStrmClose(pStrmOut);
6337 if (RT_FAILURE(rc))
6338 {
6339 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseBinaryR64I64[iFn].pszName, rc);
6340 return RTEXITCODE_FAILURE;
6341 }
6342 }
6343
6344 return RTEXITCODE_SUCCESS;
6345}
6346#endif
6347
6348
6349static void SseBinaryR64I64Test(void)
6350{
6351 X86FXSTATE State;
6352 RT_ZERO(State);
6353 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR64I64); iFn++)
6354 {
6355 if (!SubTestAndCheckIfEnabled(g_aSseBinaryR64I64[iFn].pszName))
6356 continue;
6357
6358 uint32_t const cTests = *g_aSseBinaryR64I64[iFn].pcTests;
6359 SSE_BINARY_R64_I64_TEST_T const * const paTests = g_aSseBinaryR64I64[iFn].paTests;
6360 PFNIEMAIMPLSSEF2R64I64 pfn = g_aSseBinaryR64I64[iFn].pfn;
6361 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryR64I64[iFn]);
6362 if (!cTests) RTTestSkipped(g_hTest, "no tests");
6363 for (uint32_t iVar = 0; iVar < cVars; iVar++)
6364 {
6365 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_BINARY_R64_I64_TEST_T); iTest++)
6366 {
6367 uint32_t fMxcsr = 0;
6368 RTFLOAT64U r64Dst; RT_ZERO(r64Dst);
6369
6370 State.MXCSR = paTests[iTest].fMxcsrIn;
6371 pfn(&State, &fMxcsr, &r64Dst, &paTests[iTest].i64ValIn);
6372 if ( fMxcsr != paTests[iTest].fMxcsrOut
6373 || !RTFLOAT64U_ARE_IDENTICAL(&r64Dst, &paTests[iTest].r64ValOut))
6374 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI64\n"
6375 "%s -> mxcsr=%#08x %s\n"
6376 "%s expected %#08x %s%s%s (%s)\n",
6377 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
6378 &paTests[iTest].i64ValIn,
6379 iVar ? " " : "", fMxcsr, FormatR64(&r64Dst),
6380 iVar ? " " : "", paTests[iTest].fMxcsrOut, FormatR64(&paTests[iTest].r64ValOut),
6381 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
6382 !RTFLOAT64U_ARE_IDENTICAL(&r64Dst, &paTests[iTest].r64ValOut)
6383 ? " - val" : "",
6384 FormatMxcsr(paTests[iTest].fMxcsrIn) );
6385 }
6386 }
6387 }
6388}
6389
6390
6391/*
6392 * SSE operations converting single signed double-word integers to single-precision floating point values (probably only cvtsi2ss).
6393 */
6394TYPEDEF_SUBTEST_TYPE(SSE_BINARY_R32_I32_T, SSE_BINARY_R32_I32_TEST_T, PFNIEMAIMPLSSEF2R32I32);
6395
6396static const SSE_BINARY_R32_I32_T g_aSseBinaryR32I32[] =
6397{
6398 ENTRY_BIN(cvtsi2ss_r32_i32),
6399};
6400
6401#ifdef TSTIEMAIMPL_WITH_GENERATOR
6402static RTEXITCODE SseBinaryR32I32Generate(const char *pszDataFileFmt, uint32_t cTests)
6403{
6404 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
6405
6406 static int32_t const s_aSpecials[] =
6407 {
6408 INT32_MIN,
6409 INT32_MAX,
6410 /** @todo More specials. */
6411 };
6412
6413 X86FXSTATE State;
6414 RT_ZERO(State);
6415 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR32I32); iFn++)
6416 {
6417 PFNIEMAIMPLSSEF2R32I32 const pfn = g_aSseBinaryR32I32[iFn].pfnNative ? g_aSseBinaryR32I32[iFn].pfnNative : g_aSseBinaryR32I32[iFn].pfn;
6418
6419 PRTSTREAM pStrmOut = NULL;
6420 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseBinaryR32I32[iFn].pszName);
6421 if (RT_FAILURE(rc))
6422 {
6423 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseBinaryR32I32[iFn].pszName, rc);
6424 return RTEXITCODE_FAILURE;
6425 }
6426
6427 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
6428 {
6429 SSE_BINARY_R32_I32_TEST_T TestData; RT_ZERO(TestData);
6430
6431 TestData.i32ValIn = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
6432
6433 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
6434 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
6435 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
6436 for (uint8_t iFz = 0; iFz < 2; iFz++)
6437 {
6438 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
6439 | (iRounding << X86_MXCSR_RC_SHIFT)
6440 | (iDaz ? X86_MXCSR_DAZ : 0)
6441 | (iFz ? X86_MXCSR_FZ : 0)
6442 | X86_MXCSR_XCPT_MASK;
6443 uint32_t fMxcsrM; RTFLOAT32U r32OutM;
6444 pfn(&State, &fMxcsrM, &r32OutM, &TestData.i32ValIn);
6445 TestData.fMxcsrIn = State.MXCSR;
6446 TestData.fMxcsrOut = fMxcsrM;
6447 TestData.r32ValOut = r32OutM;
6448 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6449
6450 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
6451 uint32_t fMxcsrU; RTFLOAT32U r32OutU;
6452 pfn(&State, &fMxcsrU, &r32OutU, &TestData.i32ValIn);
6453 TestData.fMxcsrIn = State.MXCSR;
6454 TestData.fMxcsrOut = fMxcsrU;
6455 TestData.r32ValOut = r32OutU;
6456 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6457
6458 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
6459 if (fXcpt)
6460 {
6461 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
6462 uint32_t fMxcsr1; RTFLOAT32U r32Out1;
6463 pfn(&State, &fMxcsr1, &r32Out1, &TestData.i32ValIn);
6464 TestData.fMxcsrIn = State.MXCSR;
6465 TestData.fMxcsrOut = fMxcsr1;
6466 TestData.r32ValOut = r32Out1;
6467 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6468
6469 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
6470 {
6471 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
6472 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
6473 uint32_t fMxcsr2; RTFLOAT32U r32Out2;
6474 pfn(&State, &fMxcsr2, &r32Out2, &TestData.i32ValIn);
6475 TestData.fMxcsrIn = State.MXCSR;
6476 TestData.fMxcsrOut = fMxcsr2;
6477 TestData.r32ValOut = r32Out2;
6478 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6479 }
6480 if (!RT_IS_POWER_OF_TWO(fXcpt))
6481 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
6482 if (fUnmasked & fXcpt)
6483 {
6484 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
6485 uint32_t fMxcsr3; RTFLOAT32U r32Out3;
6486 pfn(&State, &fMxcsr3, &r32Out3, &TestData.i32ValIn);
6487 TestData.fMxcsrIn = State.MXCSR;
6488 TestData.fMxcsrOut = fMxcsr3;
6489 TestData.r32ValOut = r32Out3;
6490 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6491 }
6492 }
6493 }
6494 }
6495 rc = RTStrmClose(pStrmOut);
6496 if (RT_FAILURE(rc))
6497 {
6498 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseBinaryR32I32[iFn].pszName, rc);
6499 return RTEXITCODE_FAILURE;
6500 }
6501 }
6502
6503 return RTEXITCODE_SUCCESS;
6504}
6505#endif
6506
6507
6508static void SseBinaryR32I32Test(void)
6509{
6510 X86FXSTATE State;
6511 RT_ZERO(State);
6512 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR32I32); iFn++)
6513 {
6514 if (!SubTestAndCheckIfEnabled(g_aSseBinaryR32I32[iFn].pszName))
6515 continue;
6516
6517 uint32_t const cTests = *g_aSseBinaryR32I32[iFn].pcTests;
6518 SSE_BINARY_R32_I32_TEST_T const * const paTests = g_aSseBinaryR32I32[iFn].paTests;
6519 PFNIEMAIMPLSSEF2R32I32 pfn = g_aSseBinaryR32I32[iFn].pfn;
6520 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryR32I32[iFn]);
6521 if (!cTests) RTTestSkipped(g_hTest, "no tests");
6522 for (uint32_t iVar = 0; iVar < cVars; iVar++)
6523 {
6524 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_BINARY_R32_I32_TEST_T); iTest++)
6525 {
6526 uint32_t fMxcsr = 0;
6527 RTFLOAT32U r32Dst; RT_ZERO(r32Dst);
6528
6529 State.MXCSR = paTests[iTest].fMxcsrIn;
6530 pfn(&State, &fMxcsr, &r32Dst, &paTests[iTest].i32ValIn);
6531 if ( fMxcsr != paTests[iTest].fMxcsrOut
6532 || !RTFLOAT32U_ARE_IDENTICAL(&r32Dst, &paTests[iTest].r32ValOut))
6533 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI32\n"
6534 "%s -> mxcsr=%#08x %RI32\n"
6535 "%s expected %#08x %RI32%s%s (%s)\n",
6536 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
6537 &paTests[iTest].i32ValIn,
6538 iVar ? " " : "", fMxcsr, FormatR32(&r32Dst),
6539 iVar ? " " : "", paTests[iTest].fMxcsrOut, FormatR32(&paTests[iTest].r32ValOut),
6540 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
6541 !RTFLOAT32U_ARE_IDENTICAL(&r32Dst, &paTests[iTest].r32ValOut)
6542 ? " - val" : "",
6543 FormatMxcsr(paTests[iTest].fMxcsrIn) );
6544 }
6545 }
6546 }
6547}
6548
6549
6550/*
6551 * SSE operations converting single signed quad-word integers to single-precision floating point values (probably only cvtsi2ss).
6552 */
6553TYPEDEF_SUBTEST_TYPE(SSE_BINARY_R32_I64_T, SSE_BINARY_R32_I64_TEST_T, PFNIEMAIMPLSSEF2R32I64);
6554
6555static const SSE_BINARY_R32_I64_T g_aSseBinaryR32I64[] =
6556{
6557 ENTRY_BIN(cvtsi2ss_r32_i64),
6558};
6559
6560#ifdef TSTIEMAIMPL_WITH_GENERATOR
6561static RTEXITCODE SseBinaryR32I64Generate(const char *pszDataFileFmt, uint32_t cTests)
6562{
6563 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
6564
6565 static int64_t const s_aSpecials[] =
6566 {
6567 INT64_MIN,
6568 INT64_MAX
6569 /** @todo More specials. */
6570 };
6571
6572 X86FXSTATE State;
6573 RT_ZERO(State);
6574 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR32I64); iFn++)
6575 {
6576 PFNIEMAIMPLSSEF2R32I64 const pfn = g_aSseBinaryR32I64[iFn].pfnNative ? g_aSseBinaryR32I64[iFn].pfnNative : g_aSseBinaryR32I64[iFn].pfn;
6577
6578 PRTSTREAM pStrmOut = NULL;
6579 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseBinaryR32I64[iFn].pszName);
6580 if (RT_FAILURE(rc))
6581 {
6582 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseBinaryR32I64[iFn].pszName, rc);
6583 return RTEXITCODE_FAILURE;
6584 }
6585
6586 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
6587 {
6588 SSE_BINARY_R32_I64_TEST_T TestData; RT_ZERO(TestData);
6589
6590 TestData.i64ValIn = iTest < cTests ? RandI64Src(iTest) : s_aSpecials[iTest - cTests];
6591
6592 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
6593 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
6594 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
6595 for (uint8_t iFz = 0; iFz < 2; iFz++)
6596 {
6597 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
6598 | (iRounding << X86_MXCSR_RC_SHIFT)
6599 | (iDaz ? X86_MXCSR_DAZ : 0)
6600 | (iFz ? X86_MXCSR_FZ : 0)
6601 | X86_MXCSR_XCPT_MASK;
6602 uint32_t fMxcsrM; RTFLOAT32U r32OutM;
6603 pfn(&State, &fMxcsrM, &r32OutM, &TestData.i64ValIn);
6604 TestData.fMxcsrIn = State.MXCSR;
6605 TestData.fMxcsrOut = fMxcsrM;
6606 TestData.r32ValOut = r32OutM;
6607 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6608
6609 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
6610 uint32_t fMxcsrU; RTFLOAT32U r32OutU;
6611 pfn(&State, &fMxcsrU, &r32OutU, &TestData.i64ValIn);
6612 TestData.fMxcsrIn = State.MXCSR;
6613 TestData.fMxcsrOut = fMxcsrU;
6614 TestData.r32ValOut = r32OutU;
6615 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6616
6617 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
6618 if (fXcpt)
6619 {
6620 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
6621 uint32_t fMxcsr1; RTFLOAT32U r32Out1;
6622 pfn(&State, &fMxcsr1, &r32Out1, &TestData.i64ValIn);
6623 TestData.fMxcsrIn = State.MXCSR;
6624 TestData.fMxcsrOut = fMxcsr1;
6625 TestData.r32ValOut = r32Out1;
6626 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6627
6628 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
6629 {
6630 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
6631 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
6632 uint32_t fMxcsr2; RTFLOAT32U r32Out2;
6633 pfn(&State, &fMxcsr2, &r32Out2, &TestData.i64ValIn);
6634 TestData.fMxcsrIn = State.MXCSR;
6635 TestData.fMxcsrOut = fMxcsr2;
6636 TestData.r32ValOut = r32Out2;
6637 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6638 }
6639 if (!RT_IS_POWER_OF_TWO(fXcpt))
6640 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
6641 if (fUnmasked & fXcpt)
6642 {
6643 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
6644 uint32_t fMxcsr3; RTFLOAT32U r32Out3;
6645 pfn(&State, &fMxcsr3, &r32Out3, &TestData.i64ValIn);
6646 TestData.fMxcsrIn = State.MXCSR;
6647 TestData.fMxcsrOut = fMxcsr3;
6648 TestData.r32ValOut = r32Out3;
6649 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6650 }
6651 }
6652 }
6653 }
6654 rc = RTStrmClose(pStrmOut);
6655 if (RT_FAILURE(rc))
6656 {
6657 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseBinaryR32I64[iFn].pszName, rc);
6658 return RTEXITCODE_FAILURE;
6659 }
6660 }
6661
6662 return RTEXITCODE_SUCCESS;
6663}
6664#endif
6665
6666
6667static void SseBinaryR32I64Test(void)
6668{
6669 X86FXSTATE State;
6670 RT_ZERO(State);
6671 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR32I64); iFn++)
6672 {
6673 if (!SubTestAndCheckIfEnabled(g_aSseBinaryR32I64[iFn].pszName))
6674 continue;
6675
6676 uint32_t const cTests = *g_aSseBinaryR32I64[iFn].pcTests;
6677 SSE_BINARY_R32_I64_TEST_T const * const paTests = g_aSseBinaryR32I64[iFn].paTests;
6678 PFNIEMAIMPLSSEF2R32I64 pfn = g_aSseBinaryR32I64[iFn].pfn;
6679 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryR32I64[iFn]);
6680 if (!cTests) RTTestSkipped(g_hTest, "no tests");
6681 for (uint32_t iVar = 0; iVar < cVars; iVar++)
6682 {
6683 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_BINARY_R32_I64_TEST_T); iTest++)
6684 {
6685 uint32_t fMxcsr = 0;
6686 RTFLOAT32U r32Dst; RT_ZERO(r32Dst);
6687
6688 State.MXCSR = paTests[iTest].fMxcsrIn;
6689 pfn(&State, &fMxcsr, &r32Dst, &paTests[iTest].i64ValIn);
6690 if ( fMxcsr != paTests[iTest].fMxcsrOut
6691 || !RTFLOAT32U_ARE_IDENTICAL(&r32Dst, &paTests[iTest].r32ValOut))
6692 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI64\n"
6693 "%s -> mxcsr=%#08x %RI32\n"
6694 "%s expected %#08x %RI32%s%s (%s)\n",
6695 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
6696 &paTests[iTest].i64ValIn,
6697 iVar ? " " : "", fMxcsr, FormatR32(&r32Dst),
6698 iVar ? " " : "", paTests[iTest].fMxcsrOut, FormatR32(&paTests[iTest].r32ValOut),
6699 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
6700 !RTFLOAT32U_ARE_IDENTICAL(&r32Dst, &paTests[iTest].r32ValOut)
6701 ? " - val" : "",
6702 FormatMxcsr(paTests[iTest].fMxcsrIn) );
6703 }
6704 }
6705 }
6706}
6707
6708
6709/*
6710 * Compare SSE operations on single single-precision floating point values - outputting only EFLAGS.
6711 */
6712TYPEDEF_SUBTEST_TYPE(SSE_COMPARE_EFL_R32_R32_T, SSE_COMPARE_EFL_R32_R32_TEST_T, PFNIEMAIMPLF2EFLMXCSR128);
6713
6714static const SSE_COMPARE_EFL_R32_R32_T g_aSseCompareEflR32R32[] =
6715{
6716 ENTRY_BIN(ucomiss_u128),
6717 ENTRY_BIN(comiss_u128),
6718 ENTRY_BIN_AVX(vucomiss_u128),
6719 ENTRY_BIN_AVX(vcomiss_u128),
6720};
6721
6722#ifdef TSTIEMAIMPL_WITH_GENERATOR
6723static RTEXITCODE SseCompareEflR32R32Generate(const char *pszDataFileFmt, uint32_t cTests)
6724{
6725 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
6726
6727 static struct { RTFLOAT32U Val1, Val2; } const s_aSpecials[] =
6728 {
6729 { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0) },
6730 { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(1) },
6731 { RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(0) },
6732 { RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(1) },
6733 { RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(0) },
6734 { RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(1) },
6735 { RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(0) },
6736 { RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(1) },
6737 /** @todo More specials. */
6738 };
6739
6740 uint32_t cMinNormalPairs = (cTests - 144) / 4;
6741 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareEflR32R32); iFn++)
6742 {
6743 PFNIEMAIMPLF2EFLMXCSR128 const pfn = g_aSseCompareEflR32R32[iFn].pfnNative ? g_aSseCompareEflR32R32[iFn].pfnNative : g_aSseCompareEflR32R32[iFn].pfn;
6744
6745 PRTSTREAM pStrmOut = NULL;
6746 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseCompareEflR32R32[iFn].pszName);
6747 if (RT_FAILURE(rc))
6748 {
6749 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseCompareEflR32R32[iFn].pszName, rc);
6750 return RTEXITCODE_FAILURE;
6751 }
6752
6753 uint32_t cNormalInputPairs = 0;
6754 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
6755 {
6756 SSE_COMPARE_EFL_R32_R32_TEST_T TestData; RT_ZERO(TestData);
6757 X86XMMREG ValIn1; RT_ZERO(ValIn1);
6758 X86XMMREG ValIn2; RT_ZERO(ValIn2);
6759
6760 TestData.r32ValIn1 = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val1;
6761 TestData.r32ValIn2 = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val2;
6762
6763 ValIn1.ar32[0] = TestData.r32ValIn1;
6764 ValIn2.ar32[0] = TestData.r32ValIn2;
6765
6766 if ( RTFLOAT32U_IS_NORMAL(&TestData.r32ValIn1)
6767 && RTFLOAT32U_IS_NORMAL(&TestData.r32ValIn2))
6768 cNormalInputPairs++;
6769 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
6770 {
6771 iTest -= 1;
6772 continue;
6773 }
6774
6775 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
6776 uint32_t const fEFlags = RandEFlags();
6777 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
6778 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
6779 for (uint8_t iFz = 0; iFz < 2; iFz++)
6780 {
6781 uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
6782 | (iRounding << X86_MXCSR_RC_SHIFT)
6783 | (iDaz ? X86_MXCSR_DAZ : 0)
6784 | (iFz ? X86_MXCSR_FZ : 0)
6785 | X86_MXCSR_XCPT_MASK;
6786 uint32_t fMxcsrM = fMxcsrIn;
6787 uint32_t fEFlagsM = fEFlags;
6788 pfn(&fMxcsrM, &fEFlagsM, &ValIn1, &ValIn2);
6789 TestData.fMxcsrIn = fMxcsrIn;
6790 TestData.fMxcsrOut = fMxcsrM;
6791 TestData.fEflIn = fEFlags;
6792 TestData.fEflOut = fEFlagsM;
6793 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6794
6795 fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
6796 uint32_t fMxcsrU = fMxcsrIn;
6797 uint32_t fEFlagsU = fEFlags;
6798 pfn(&fMxcsrU, &fEFlagsU, &ValIn1, &ValIn2);
6799 TestData.fMxcsrIn = fMxcsrIn;
6800 TestData.fMxcsrOut = fMxcsrU;
6801 TestData.fEflIn = fEFlags;
6802 TestData.fEflOut = fEFlagsU;
6803 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6804
6805 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
6806 if (fXcpt)
6807 {
6808 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
6809 uint32_t fMxcsr1 = fMxcsrIn;
6810 uint32_t fEFlags1 = fEFlags;
6811 pfn(&fMxcsr1, &fEFlags1, &ValIn1, &ValIn2);
6812 TestData.fMxcsrIn = fMxcsrIn;
6813 TestData.fMxcsrOut = fMxcsr1;
6814 TestData.fEflIn = fEFlags;
6815 TestData.fEflOut = fEFlags1;
6816 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6817
6818 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
6819 {
6820 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
6821 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
6822 uint32_t fMxcsr2 = fMxcsrIn;
6823 uint32_t fEFlags2 = fEFlags;
6824 pfn(&fMxcsr2, &fEFlags2, &ValIn1, &ValIn2);
6825 TestData.fMxcsrIn = fMxcsrIn;
6826 TestData.fMxcsrOut = fMxcsr2;
6827 TestData.fEflIn = fEFlags;
6828 TestData.fEflOut = fEFlags2;
6829 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6830 }
6831 if (!RT_IS_POWER_OF_TWO(fXcpt))
6832 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
6833 if (fUnmasked & fXcpt)
6834 {
6835 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
6836 uint32_t fMxcsr3 = fMxcsrIn;
6837 uint32_t fEFlags3 = fEFlags;
6838 pfn(&fMxcsr3, &fEFlags3, &ValIn1, &ValIn2);
6839 TestData.fMxcsrIn = fMxcsrIn;
6840 TestData.fMxcsrOut = fMxcsr3;
6841 TestData.fEflIn = fEFlags;
6842 TestData.fEflOut = fEFlags3;
6843 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6844 }
6845 }
6846 }
6847 }
6848 rc = RTStrmClose(pStrmOut);
6849 if (RT_FAILURE(rc))
6850 {
6851 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseCompareEflR32R32[iFn].pszName, rc);
6852 return RTEXITCODE_FAILURE;
6853 }
6854 }
6855
6856 return RTEXITCODE_SUCCESS;
6857}
6858#endif
6859
6860static void SseCompareEflR32R32Test(void)
6861{
6862 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareEflR32R32); iFn++)
6863 {
6864 if (!SubTestAndCheckIfEnabled(g_aSseCompareEflR32R32[iFn].pszName))
6865 continue;
6866
6867 uint32_t const cTests = *g_aSseCompareEflR32R32[iFn].pcTests;
6868 SSE_COMPARE_EFL_R32_R32_TEST_T const * const paTests = g_aSseCompareEflR32R32[iFn].paTests;
6869 PFNIEMAIMPLF2EFLMXCSR128 pfn = g_aSseCompareEflR32R32[iFn].pfn;
6870 uint32_t const cVars = COUNT_VARIATIONS(g_aSseCompareEflR32R32[iFn]);
6871 if (!cTests) RTTestSkipped(g_hTest, "no tests");
6872 for (uint32_t iVar = 0; iVar < cVars; iVar++)
6873 {
6874 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_COMPARE_EFL_R32_R32_TEST_T); iTest++)
6875 {
6876 X86XMMREG ValIn1; RT_ZERO(ValIn1);
6877 X86XMMREG ValIn2; RT_ZERO(ValIn2);
6878
6879 ValIn1.ar32[0] = paTests[iTest].r32ValIn1;
6880 ValIn2.ar32[0] = paTests[iTest].r32ValIn2;
6881 uint32_t fMxcsr = paTests[iTest].fMxcsrIn;
6882 uint32_t fEFlags = paTests[iTest].fEflIn;
6883 pfn(&fMxcsr, &fEFlags, &ValIn1, &ValIn2);
6884 if ( fMxcsr != paTests[iTest].fMxcsrOut
6885 || fEFlags != paTests[iTest].fEflOut)
6886 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x efl=%#08x in1=%s in2=%s\n"
6887 "%s -> mxcsr=%#08x %#08x\n"
6888 "%s expected %#08x %#08x%s (%s) (EFL: %s)\n",
6889 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn, paTests[iTest].fEflIn,
6890 FormatR32(&paTests[iTest].r32ValIn1), FormatR32(&paTests[iTest].r32ValIn2),
6891 iVar ? " " : "", fMxcsr, fEFlags,
6892 iVar ? " " : "", paTests[iTest].fMxcsrOut, paTests[iTest].fEflOut,
6893 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
6894 FormatMxcsr(paTests[iTest].fMxcsrIn),
6895 EFlagsDiff(fEFlags, paTests[iTest].fEflOut));
6896 }
6897 }
6898 }
6899}
6900
6901
6902/*
6903 * Compare SSE operations on single single-precision floating point values - outputting only EFLAGS.
6904 */
6905TYPEDEF_SUBTEST_TYPE(SSE_COMPARE_EFL_R64_R64_T, SSE_COMPARE_EFL_R64_R64_TEST_T, PFNIEMAIMPLF2EFLMXCSR128);
6906
6907static const SSE_COMPARE_EFL_R64_R64_T g_aSseCompareEflR64R64[] =
6908{
6909 ENTRY_BIN(ucomisd_u128),
6910 ENTRY_BIN(comisd_u128),
6911 ENTRY_BIN_AVX(vucomisd_u128),
6912 ENTRY_BIN_AVX(vcomisd_u128)
6913};
6914
6915#ifdef TSTIEMAIMPL_WITH_GENERATOR
6916static RTEXITCODE SseCompareEflR64R64Generate(const char *pszDataFileFmt, uint32_t cTests)
6917{
6918 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
6919
6920 static struct { RTFLOAT64U Val1, Val2; } const s_aSpecials[] =
6921 {
6922 { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) },
6923 { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(1) },
6924 { RTFLOAT64U_INIT_ZERO(1), RTFLOAT64U_INIT_ZERO(0) },
6925 { RTFLOAT64U_INIT_ZERO(1), RTFLOAT64U_INIT_ZERO(1) },
6926 { RTFLOAT64U_INIT_INF(0), RTFLOAT64U_INIT_INF(0) },
6927 { RTFLOAT64U_INIT_INF(0), RTFLOAT64U_INIT_INF(1) },
6928 { RTFLOAT64U_INIT_INF(1), RTFLOAT64U_INIT_INF(0) },
6929 { RTFLOAT64U_INIT_INF(1), RTFLOAT64U_INIT_INF(1) },
6930 /** @todo More specials. */
6931 };
6932
6933 uint32_t cMinNormalPairs = (cTests - 144) / 4;
6934 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareEflR64R64); iFn++)
6935 {
6936 PFNIEMAIMPLF2EFLMXCSR128 const pfn = g_aSseCompareEflR64R64[iFn].pfnNative ? g_aSseCompareEflR64R64[iFn].pfnNative : g_aSseCompareEflR64R64[iFn].pfn;
6937
6938 PRTSTREAM pStrmOut = NULL;
6939 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseCompareEflR64R64[iFn].pszName);
6940 if (RT_FAILURE(rc))
6941 {
6942 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseCompareEflR64R64[iFn].pszName, rc);
6943 return RTEXITCODE_FAILURE;
6944 }
6945
6946 uint32_t cNormalInputPairs = 0;
6947 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
6948 {
6949 SSE_COMPARE_EFL_R64_R64_TEST_T TestData; RT_ZERO(TestData);
6950 X86XMMREG ValIn1; RT_ZERO(ValIn1);
6951 X86XMMREG ValIn2; RT_ZERO(ValIn2);
6952
6953 TestData.r64ValIn1 = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val1;
6954 TestData.r64ValIn2 = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val2;
6955
6956 ValIn1.ar64[0] = TestData.r64ValIn1;
6957 ValIn2.ar64[0] = TestData.r64ValIn2;
6958
6959 if ( RTFLOAT64U_IS_NORMAL(&TestData.r64ValIn1)
6960 && RTFLOAT64U_IS_NORMAL(&TestData.r64ValIn2))
6961 cNormalInputPairs++;
6962 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
6963 {
6964 iTest -= 1;
6965 continue;
6966 }
6967
6968 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
6969 uint32_t const fEFlags = RandEFlags();
6970 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
6971 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
6972 for (uint8_t iFz = 0; iFz < 2; iFz++)
6973 {
6974 uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
6975 | (iRounding << X86_MXCSR_RC_SHIFT)
6976 | (iDaz ? X86_MXCSR_DAZ : 0)
6977 | (iFz ? X86_MXCSR_FZ : 0)
6978 | X86_MXCSR_XCPT_MASK;
6979 uint32_t fMxcsrM = fMxcsrIn;
6980 uint32_t fEFlagsM = fEFlags;
6981 pfn(&fMxcsrM, &fEFlagsM, &ValIn1, &ValIn2);
6982 TestData.fMxcsrIn = fMxcsrIn;
6983 TestData.fMxcsrOut = fMxcsrM;
6984 TestData.fEflIn = fEFlags;
6985 TestData.fEflOut = fEFlagsM;
6986 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6987
6988 fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
6989 uint32_t fMxcsrU = fMxcsrIn;
6990 uint32_t fEFlagsU = fEFlags;
6991 pfn(&fMxcsrU, &fEFlagsU, &ValIn1, &ValIn2);
6992 TestData.fMxcsrIn = fMxcsrIn;
6993 TestData.fMxcsrOut = fMxcsrU;
6994 TestData.fEflIn = fEFlags;
6995 TestData.fEflOut = fEFlagsU;
6996 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6997
6998 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
6999 if (fXcpt)
7000 {
7001 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
7002 uint32_t fMxcsr1 = fMxcsrIn;
7003 uint32_t fEFlags1 = fEFlags;
7004 pfn(&fMxcsr1, &fEFlags1, &ValIn1, &ValIn2);
7005 TestData.fMxcsrIn = fMxcsrIn;
7006 TestData.fMxcsrOut = fMxcsr1;
7007 TestData.fEflIn = fEFlags;
7008 TestData.fEflOut = fEFlags1;
7009 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7010
7011 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
7012 {
7013 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
7014 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
7015 uint32_t fMxcsr2 = fMxcsrIn;
7016 uint32_t fEFlags2 = fEFlags;
7017 pfn(&fMxcsr2, &fEFlags2, &ValIn1, &ValIn2);
7018 TestData.fMxcsrIn = fMxcsrIn;
7019 TestData.fMxcsrOut = fMxcsr2;
7020 TestData.fEflIn = fEFlags;
7021 TestData.fEflOut = fEFlags2;
7022 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7023 }
7024 if (!RT_IS_POWER_OF_TWO(fXcpt))
7025 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
7026 if (fUnmasked & fXcpt)
7027 {
7028 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
7029 uint32_t fMxcsr3 = fMxcsrIn;
7030 uint32_t fEFlags3 = fEFlags;
7031 pfn(&fMxcsr3, &fEFlags3, &ValIn1, &ValIn2);
7032 TestData.fMxcsrIn = fMxcsrIn;
7033 TestData.fMxcsrOut = fMxcsr3;
7034 TestData.fEflIn = fEFlags;
7035 TestData.fEflOut = fEFlags3;
7036 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7037 }
7038 }
7039 }
7040 }
7041 rc = RTStrmClose(pStrmOut);
7042 if (RT_FAILURE(rc))
7043 {
7044 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseCompareEflR64R64[iFn].pszName, rc);
7045 return RTEXITCODE_FAILURE;
7046 }
7047 }
7048
7049 return RTEXITCODE_SUCCESS;
7050}
7051#endif
7052
7053static void SseCompareEflR64R64Test(void)
7054{
7055 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareEflR64R64); iFn++)
7056 {
7057 if (!SubTestAndCheckIfEnabled(g_aSseCompareEflR64R64[iFn].pszName))
7058 continue;
7059
7060 uint32_t const cTests = *g_aSseCompareEflR64R64[iFn].pcTests;
7061 SSE_COMPARE_EFL_R64_R64_TEST_T const * const paTests = g_aSseCompareEflR64R64[iFn].paTests;
7062 PFNIEMAIMPLF2EFLMXCSR128 pfn = g_aSseCompareEflR64R64[iFn].pfn;
7063 uint32_t const cVars = COUNT_VARIATIONS(g_aSseCompareEflR64R64[iFn]);
7064 if (!cTests) RTTestSkipped(g_hTest, "no tests");
7065 for (uint32_t iVar = 0; iVar < cVars; iVar++)
7066 {
7067 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_COMPARE_EFL_R64_R64_TEST_T); iTest++)
7068 {
7069 X86XMMREG ValIn1; RT_ZERO(ValIn1);
7070 X86XMMREG ValIn2; RT_ZERO(ValIn2);
7071
7072 ValIn1.ar64[0] = paTests[iTest].r64ValIn1;
7073 ValIn2.ar64[0] = paTests[iTest].r64ValIn2;
7074 uint32_t fMxcsr = paTests[iTest].fMxcsrIn;
7075 uint32_t fEFlags = paTests[iTest].fEflIn;
7076 pfn(&fMxcsr, &fEFlags, &ValIn1, &ValIn2);
7077 if ( fMxcsr != paTests[iTest].fMxcsrOut
7078 || fEFlags != paTests[iTest].fEflOut)
7079 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x efl=%#08x in1=%s in2=%s\n"
7080 "%s -> mxcsr=%#08x %#08x\n"
7081 "%s expected %#08x %#08x%s (%s) (EFL: %s)\n",
7082 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn, paTests[iTest].fEflIn,
7083 FormatR64(&paTests[iTest].r64ValIn1), FormatR64(&paTests[iTest].r64ValIn2),
7084 iVar ? " " : "", fMxcsr, fEFlags,
7085 iVar ? " " : "", paTests[iTest].fMxcsrOut, paTests[iTest].fEflOut,
7086 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
7087 FormatMxcsr(paTests[iTest].fMxcsrIn),
7088 EFlagsDiff(fEFlags, paTests[iTest].fEflOut));
7089 }
7090 }
7091 }
7092}
7093
7094
7095/*
7096 * Compare SSE operations on packed and single single-precision floating point values - outputting a mask.
7097 */
7098/** Maximum immediate to try to keep the testdata size under control (at least a little bit)- */
7099#define SSE_COMPARE_F2_XMM_IMM8_MAX 0x1f
7100
7101TYPEDEF_SUBTEST_TYPE(SSE_COMPARE_F2_XMM_IMM8_T, SSE_COMPARE_F2_XMM_IMM8_TEST_T, PFNIEMAIMPLMXCSRF2XMMIMM8);
7102
7103static const SSE_COMPARE_F2_XMM_IMM8_T g_aSseCompareF2XmmR32Imm8[] =
7104{
7105 ENTRY_BIN(cmpps_u128),
7106 ENTRY_BIN(cmpss_u128)
7107};
7108
7109#ifdef TSTIEMAIMPL_WITH_GENERATOR
7110static RTEXITCODE SseCompareF2XmmR32Imm8Generate(const char *pszDataFileFmt, uint32_t cTests)
7111{
7112 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
7113
7114 static struct { RTFLOAT32U Val1, Val2; } const s_aSpecials[] =
7115 {
7116 { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0) },
7117 { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(1) },
7118 { RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(0) },
7119 { RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(1) },
7120 { RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(0) },
7121 { RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(1) },
7122 { RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(0) },
7123 { RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(1) },
7124 /** @todo More specials. */
7125 };
7126
7127 uint32_t cMinNormalPairs = (cTests - 144) / 4;
7128 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareF2XmmR32Imm8); iFn++)
7129 {
7130 PFNIEMAIMPLMXCSRF2XMMIMM8 const pfn = g_aSseCompareF2XmmR32Imm8[iFn].pfnNative ? g_aSseCompareF2XmmR32Imm8[iFn].pfnNative : g_aSseCompareF2XmmR32Imm8[iFn].pfn;
7131
7132 PRTSTREAM pStrmOut = NULL;
7133 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseCompareF2XmmR32Imm8[iFn].pszName);
7134 if (RT_FAILURE(rc))
7135 {
7136 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseCompareF2XmmR32Imm8[iFn].pszName, rc);
7137 return RTEXITCODE_FAILURE;
7138 }
7139
7140 uint32_t cNormalInputPairs = 0;
7141 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
7142 {
7143 SSE_COMPARE_F2_XMM_IMM8_TEST_T TestData; RT_ZERO(TestData);
7144
7145 TestData.InVal1.ar32[0] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val1;
7146 TestData.InVal1.ar32[1] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val1;
7147 TestData.InVal1.ar32[2] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val1;
7148 TestData.InVal1.ar32[3] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val1;
7149
7150 TestData.InVal2.ar32[0] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val2;
7151 TestData.InVal2.ar32[1] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val2;
7152 TestData.InVal2.ar32[2] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val2;
7153 TestData.InVal2.ar32[3] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val2;
7154
7155 if ( RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[0])
7156 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[1])
7157 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[2])
7158 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[3])
7159 && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[0])
7160 && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[1])
7161 && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[2])
7162 && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[3]))
7163 cNormalInputPairs++;
7164 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
7165 {
7166 iTest -= 1;
7167 continue;
7168 }
7169
7170 IEMMEDIAF2XMMSRC Src;
7171 Src.uSrc1 = TestData.InVal1;
7172 Src.uSrc2 = TestData.InVal2;
7173 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
7174 for (uint8_t bImm = 0; bImm <= SSE_COMPARE_F2_XMM_IMM8_MAX; bImm++)
7175 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
7176 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
7177 for (uint8_t iFz = 0; iFz < 2; iFz++)
7178 {
7179 uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
7180 | (iRounding << X86_MXCSR_RC_SHIFT)
7181 | (iDaz ? X86_MXCSR_DAZ : 0)
7182 | (iFz ? X86_MXCSR_FZ : 0)
7183 | X86_MXCSR_XCPT_MASK;
7184 uint32_t fMxcsrM = fMxcsrIn;
7185 X86XMMREG ResM;
7186 pfn(&fMxcsrM, &ResM, &Src, bImm);
7187 TestData.fMxcsrIn = fMxcsrIn;
7188 TestData.fMxcsrOut = fMxcsrM;
7189 TestData.bImm = bImm;
7190 TestData.OutVal = ResM;
7191 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7192
7193 fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
7194 uint32_t fMxcsrU = fMxcsrIn;
7195 X86XMMREG ResU;
7196 pfn(&fMxcsrU, &ResU, &Src, bImm);
7197 TestData.fMxcsrIn = fMxcsrIn;
7198 TestData.fMxcsrOut = fMxcsrU;
7199 TestData.bImm = bImm;
7200 TestData.OutVal = ResU;
7201 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7202
7203 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
7204 if (fXcpt)
7205 {
7206 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
7207 uint32_t fMxcsr1 = fMxcsrIn;
7208 X86XMMREG Res1;
7209 pfn(&fMxcsr1, &Res1, &Src, bImm);
7210 TestData.fMxcsrIn = fMxcsrIn;
7211 TestData.fMxcsrOut = fMxcsr1;
7212 TestData.bImm = bImm;
7213 TestData.OutVal = Res1;
7214 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7215
7216 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
7217 {
7218 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
7219 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
7220 uint32_t fMxcsr2 = fMxcsrIn;
7221 X86XMMREG Res2;
7222 pfn(&fMxcsr2, &Res2, &Src, bImm);
7223 TestData.fMxcsrIn = fMxcsrIn;
7224 TestData.fMxcsrOut = fMxcsr2;
7225 TestData.bImm = bImm;
7226 TestData.OutVal = Res2;
7227 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7228 }
7229 if (!RT_IS_POWER_OF_TWO(fXcpt))
7230 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
7231 if (fUnmasked & fXcpt)
7232 {
7233 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
7234 uint32_t fMxcsr3 = fMxcsrIn;
7235 X86XMMREG Res3;
7236 pfn(&fMxcsr3, &Res3, &Src, bImm);
7237 TestData.fMxcsrIn = fMxcsrIn;
7238 TestData.fMxcsrOut = fMxcsr3;
7239 TestData.bImm = bImm;
7240 TestData.OutVal = Res3;
7241 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7242 }
7243 }
7244 }
7245 }
7246 rc = RTStrmClose(pStrmOut);
7247 if (RT_FAILURE(rc))
7248 {
7249 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseCompareF2XmmR32Imm8[iFn].pszName, rc);
7250 return RTEXITCODE_FAILURE;
7251 }
7252 }
7253
7254 return RTEXITCODE_SUCCESS;
7255}
7256#endif
7257
7258static void SseCompareF2XmmR32Imm8Test(void)
7259{
7260 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareF2XmmR32Imm8); iFn++)
7261 {
7262 if (!SubTestAndCheckIfEnabled(g_aSseCompareF2XmmR32Imm8[iFn].pszName))
7263 continue;
7264
7265 uint32_t const cTests = *g_aSseCompareF2XmmR32Imm8[iFn].pcTests;
7266 SSE_COMPARE_F2_XMM_IMM8_TEST_T const * const paTests = g_aSseCompareF2XmmR32Imm8[iFn].paTests;
7267 PFNIEMAIMPLMXCSRF2XMMIMM8 pfn = g_aSseCompareF2XmmR32Imm8[iFn].pfn;
7268 uint32_t const cVars = COUNT_VARIATIONS(g_aSseCompareF2XmmR32Imm8[iFn]);
7269 if (!cTests) RTTestSkipped(g_hTest, "no tests");
7270 for (uint32_t iVar = 0; iVar < cVars; iVar++)
7271 {
7272 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_COMPARE_F2_XMM_IMM8_TEST_T); iTest++)
7273 {
7274 IEMMEDIAF2XMMSRC Src;
7275 X86XMMREG ValOut;
7276
7277 Src.uSrc1 = paTests[iTest].InVal1;
7278 Src.uSrc2 = paTests[iTest].InVal2;
7279 uint32_t fMxcsr = paTests[iTest].fMxcsrIn;
7280 pfn(&fMxcsr, &ValOut, &Src, paTests[iTest].bImm);
7281 if ( fMxcsr != paTests[iTest].fMxcsrOut
7282 || ValOut.au32[0] != paTests[iTest].OutVal.au32[0]
7283 || ValOut.au32[1] != paTests[iTest].OutVal.au32[1]
7284 || ValOut.au32[2] != paTests[iTest].OutVal.au32[2]
7285 || ValOut.au32[3] != paTests[iTest].OutVal.au32[3])
7286 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s'%s'%s in2=%s'%s'%s'%s imm8=%x\n"
7287 "%s -> mxcsr=%#08x %RX32'%RX32'%RX32'%RX32\n"
7288 "%s expected %#08x %RX32'%RX32'%RX32'%RX32%s%s (%s)\n",
7289 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
7290 FormatR32(&paTests[iTest].InVal1.ar32[0]), FormatR32(&paTests[iTest].InVal1.ar32[1]),
7291 FormatR32(&paTests[iTest].InVal1.ar32[2]), FormatR32(&paTests[iTest].InVal1.ar32[3]),
7292 FormatR32(&paTests[iTest].InVal2.ar32[0]), FormatR32(&paTests[iTest].InVal2.ar32[1]),
7293 FormatR32(&paTests[iTest].InVal2.ar32[2]), FormatR32(&paTests[iTest].InVal2.ar32[3]),
7294 paTests[iTest].bImm,
7295 iVar ? " " : "", fMxcsr, ValOut.au32[0], ValOut.au32[1], ValOut.au32[2], ValOut.au32[3],
7296 iVar ? " " : "", paTests[iTest].fMxcsrOut,
7297 paTests[iTest].OutVal.au32[0], paTests[iTest].OutVal.au32[1],
7298 paTests[iTest].OutVal.au32[2], paTests[iTest].OutVal.au32[3],
7299 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
7300 ( ValOut.au32[0] != paTests[iTest].OutVal.au32[0]
7301 || ValOut.au32[1] != paTests[iTest].OutVal.au32[1]
7302 || ValOut.au32[2] != paTests[iTest].OutVal.au32[2]
7303 || ValOut.au32[3] != paTests[iTest].OutVal.au32[3])
7304 ? " - val" : "",
7305 FormatMxcsr(paTests[iTest].fMxcsrIn));
7306 }
7307 }
7308 }
7309}
7310
7311
7312/*
7313 * Compare SSE operations on packed and single double-precision floating point values - outputting a mask.
7314 */
7315static const SSE_COMPARE_F2_XMM_IMM8_T g_aSseCompareF2XmmR64Imm8[] =
7316{
7317 ENTRY_BIN(cmppd_u128),
7318 ENTRY_BIN(cmpsd_u128)
7319};
7320
7321#ifdef TSTIEMAIMPL_WITH_GENERATOR
7322static RTEXITCODE SseCompareF2XmmR64Imm8Generate(const char *pszDataFileFmt, uint32_t cTests)
7323{
7324 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
7325
7326 static struct { RTFLOAT64U Val1, Val2; } const s_aSpecials[] =
7327 {
7328 { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) },
7329 { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(1) },
7330 { RTFLOAT64U_INIT_ZERO(1), RTFLOAT64U_INIT_ZERO(0) },
7331 { RTFLOAT64U_INIT_ZERO(1), RTFLOAT64U_INIT_ZERO(1) },
7332 { RTFLOAT64U_INIT_INF(0), RTFLOAT64U_INIT_INF(0) },
7333 { RTFLOAT64U_INIT_INF(0), RTFLOAT64U_INIT_INF(1) },
7334 { RTFLOAT64U_INIT_INF(1), RTFLOAT64U_INIT_INF(0) },
7335 { RTFLOAT64U_INIT_INF(1), RTFLOAT64U_INIT_INF(1) },
7336 /** @todo More specials. */
7337 };
7338
7339 uint32_t cMinNormalPairs = (cTests - 144) / 4;
7340 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareF2XmmR64Imm8); iFn++)
7341 {
7342 PFNIEMAIMPLMXCSRF2XMMIMM8 const pfn = g_aSseCompareF2XmmR64Imm8[iFn].pfnNative ? g_aSseCompareF2XmmR64Imm8[iFn].pfnNative : g_aSseCompareF2XmmR64Imm8[iFn].pfn;
7343
7344 PRTSTREAM pStrmOut = NULL;
7345 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseCompareF2XmmR64Imm8[iFn].pszName);
7346 if (RT_FAILURE(rc))
7347 {
7348 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseCompareF2XmmR64Imm8[iFn].pszName, rc);
7349 return RTEXITCODE_FAILURE;
7350 }
7351
7352 uint32_t cNormalInputPairs = 0;
7353 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
7354 {
7355 SSE_COMPARE_F2_XMM_IMM8_TEST_T TestData; RT_ZERO(TestData);
7356
7357 TestData.InVal1.ar64[0] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val1;
7358 TestData.InVal1.ar64[1] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val1;
7359
7360 TestData.InVal2.ar64[0] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val2;
7361 TestData.InVal2.ar64[1] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val2;
7362
7363 if ( RTFLOAT64U_IS_NORMAL(&TestData.InVal1.ar64[0])
7364 && RTFLOAT64U_IS_NORMAL(&TestData.InVal1.ar64[1])
7365 && RTFLOAT64U_IS_NORMAL(&TestData.InVal2.ar64[0])
7366 && RTFLOAT64U_IS_NORMAL(&TestData.InVal2.ar64[1]))
7367 cNormalInputPairs++;
7368 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
7369 {
7370 iTest -= 1;
7371 continue;
7372 }
7373
7374 IEMMEDIAF2XMMSRC Src;
7375 Src.uSrc1 = TestData.InVal1;
7376 Src.uSrc2 = TestData.InVal2;
7377 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
7378 for (uint8_t bImm = 0; bImm <= SSE_COMPARE_F2_XMM_IMM8_MAX; bImm++)
7379 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
7380 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
7381 for (uint8_t iFz = 0; iFz < 2; iFz++)
7382 {
7383 uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
7384 | (iRounding << X86_MXCSR_RC_SHIFT)
7385 | (iDaz ? X86_MXCSR_DAZ : 0)
7386 | (iFz ? X86_MXCSR_FZ : 0)
7387 | X86_MXCSR_XCPT_MASK;
7388 uint32_t fMxcsrM = fMxcsrIn;
7389 X86XMMREG ResM;
7390 pfn(&fMxcsrM, &ResM, &Src, bImm);
7391 TestData.fMxcsrIn = fMxcsrIn;
7392 TestData.fMxcsrOut = fMxcsrM;
7393 TestData.bImm = bImm;
7394 TestData.OutVal = ResM;
7395 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7396
7397 fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
7398 uint32_t fMxcsrU = fMxcsrIn;
7399 X86XMMREG ResU;
7400 pfn(&fMxcsrU, &ResU, &Src, bImm);
7401 TestData.fMxcsrIn = fMxcsrIn;
7402 TestData.fMxcsrOut = fMxcsrU;
7403 TestData.bImm = bImm;
7404 TestData.OutVal = ResU;
7405 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7406
7407 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
7408 if (fXcpt)
7409 {
7410 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
7411 uint32_t fMxcsr1 = fMxcsrIn;
7412 X86XMMREG Res1;
7413 pfn(&fMxcsr1, &Res1, &Src, bImm);
7414 TestData.fMxcsrIn = fMxcsrIn;
7415 TestData.fMxcsrOut = fMxcsr1;
7416 TestData.bImm = bImm;
7417 TestData.OutVal = Res1;
7418 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7419
7420 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
7421 {
7422 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
7423 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
7424 uint32_t fMxcsr2 = fMxcsrIn;
7425 X86XMMREG Res2;
7426 pfn(&fMxcsr2, &Res2, &Src, bImm);
7427 TestData.fMxcsrIn = fMxcsrIn;
7428 TestData.fMxcsrOut = fMxcsr2;
7429 TestData.bImm = bImm;
7430 TestData.OutVal = Res2;
7431 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7432 }
7433 if (!RT_IS_POWER_OF_TWO(fXcpt))
7434 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
7435 if (fUnmasked & fXcpt)
7436 {
7437 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
7438 uint32_t fMxcsr3 = fMxcsrIn;
7439 X86XMMREG Res3;
7440 pfn(&fMxcsr3, &Res3, &Src, bImm);
7441 TestData.fMxcsrIn = fMxcsrIn;
7442 TestData.fMxcsrOut = fMxcsr3;
7443 TestData.bImm = bImm;
7444 TestData.OutVal = Res3;
7445 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7446 }
7447 }
7448 }
7449 }
7450 rc = RTStrmClose(pStrmOut);
7451 if (RT_FAILURE(rc))
7452 {
7453 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseCompareF2XmmR64Imm8[iFn].pszName, rc);
7454 return RTEXITCODE_FAILURE;
7455 }
7456 }
7457
7458 return RTEXITCODE_SUCCESS;
7459}
7460#endif
7461
7462static void SseCompareF2XmmR64Imm8Test(void)
7463{
7464 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareF2XmmR64Imm8); iFn++)
7465 {
7466 if (!SubTestAndCheckIfEnabled(g_aSseCompareF2XmmR64Imm8[iFn].pszName))
7467 continue;
7468
7469 uint32_t const cTests = *g_aSseCompareF2XmmR64Imm8[iFn].pcTests;
7470 SSE_COMPARE_F2_XMM_IMM8_TEST_T const * const paTests = g_aSseCompareF2XmmR64Imm8[iFn].paTests;
7471 PFNIEMAIMPLMXCSRF2XMMIMM8 pfn = g_aSseCompareF2XmmR64Imm8[iFn].pfn;
7472 uint32_t const cVars = COUNT_VARIATIONS(g_aSseCompareF2XmmR64Imm8[iFn]);
7473 if (!cTests) RTTestSkipped(g_hTest, "no tests");
7474 for (uint32_t iVar = 0; iVar < cVars; iVar++)
7475 {
7476 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_COMPARE_F2_XMM_IMM8_TEST_T); iTest++)
7477 {
7478 IEMMEDIAF2XMMSRC Src;
7479 X86XMMREG ValOut;
7480
7481 Src.uSrc1 = paTests[iTest].InVal1;
7482 Src.uSrc2 = paTests[iTest].InVal2;
7483 uint32_t fMxcsr = paTests[iTest].fMxcsrIn;
7484 pfn(&fMxcsr, &ValOut, &Src, paTests[iTest].bImm);
7485 if ( fMxcsr != paTests[iTest].fMxcsrOut
7486 || ValOut.au64[0] != paTests[iTest].OutVal.au64[0]
7487 || ValOut.au64[1] != paTests[iTest].OutVal.au64[1])
7488 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s in2=%s'%s imm8=%x\n"
7489 "%s -> mxcsr=%#08x %RX64'%RX64\n"
7490 "%s expected %#08x %RX64'%RX64%s%s (%s)\n",
7491 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
7492 FormatR64(&paTests[iTest].InVal1.ar64[0]), FormatR64(&paTests[iTest].InVal1.ar64[1]),
7493 FormatR64(&paTests[iTest].InVal2.ar64[0]), FormatR64(&paTests[iTest].InVal2.ar64[1]),
7494 paTests[iTest].bImm,
7495 iVar ? " " : "", fMxcsr, ValOut.au64[0], ValOut.au64[1],
7496 iVar ? " " : "", paTests[iTest].fMxcsrOut,
7497 paTests[iTest].OutVal.au64[0], paTests[iTest].OutVal.au64[1],
7498 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
7499 ( ValOut.au64[0] != paTests[iTest].OutVal.au64[0]
7500 || ValOut.au64[1] != paTests[iTest].OutVal.au64[1])
7501 ? " - val" : "",
7502 FormatMxcsr(paTests[iTest].fMxcsrIn));
7503 }
7504 }
7505 }
7506}
7507
7508
7509/*
7510 * Convert SSE operations converting signed double-words to single-precision floating point values.
7511 */
7512TYPEDEF_SUBTEST_TYPE(SSE_CONVERT_XMM_T, SSE_CONVERT_XMM_TEST_T, PFNIEMAIMPLFPSSEF2U128);
7513
7514static const SSE_CONVERT_XMM_T g_aSseConvertXmmI32R32[] =
7515{
7516 ENTRY_BIN(cvtdq2ps_u128)
7517};
7518
7519#ifdef TSTIEMAIMPL_WITH_GENERATOR
7520static RTEXITCODE SseConvertXmmI32R32Generate(const char *pszDataFileFmt, uint32_t cTests)
7521{
7522 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
7523
7524 static int32_t const s_aSpecials[] =
7525 {
7526 INT32_MIN,
7527 INT32_MIN / 2,
7528 0,
7529 INT32_MAX / 2,
7530 INT32_MAX,
7531 (int32_t)0x80000000
7532 /** @todo More specials. */
7533 };
7534
7535 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmI32R32); iFn++)
7536 {
7537 PFNIEMAIMPLFPSSEF2U128 const pfn = g_aSseConvertXmmI32R32[iFn].pfnNative ? g_aSseConvertXmmI32R32[iFn].pfnNative : g_aSseConvertXmmI32R32[iFn].pfn;
7538
7539 PRTSTREAM pStrmOut = NULL;
7540 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseConvertXmmI32R32[iFn].pszName);
7541 if (RT_FAILURE(rc))
7542 {
7543 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseConvertXmmI32R32[iFn].pszName, rc);
7544 return RTEXITCODE_FAILURE;
7545 }
7546
7547 X86FXSTATE State;
7548 RT_ZERO(State);
7549 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
7550 {
7551 SSE_CONVERT_XMM_TEST_T TestData; RT_ZERO(TestData);
7552
7553 TestData.InVal.ai32[0] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
7554 TestData.InVal.ai32[1] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
7555 TestData.InVal.ai32[2] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
7556 TestData.InVal.ai32[3] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
7557
7558 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
7559 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
7560 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
7561 for (uint8_t iFz = 0; iFz < 2; iFz++)
7562 {
7563 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
7564 | (iRounding << X86_MXCSR_RC_SHIFT)
7565 | (iDaz ? X86_MXCSR_DAZ : 0)
7566 | (iFz ? X86_MXCSR_FZ : 0)
7567 | X86_MXCSR_XCPT_MASK;
7568 IEMSSERESULT ResM; RT_ZERO(ResM);
7569 pfn(&State, &ResM, &ResM.uResult, &TestData.InVal);
7570 TestData.fMxcsrIn = State.MXCSR;
7571 TestData.fMxcsrOut = ResM.MXCSR;
7572 TestData.OutVal = ResM.uResult;
7573 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7574
7575 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
7576 IEMSSERESULT ResU; RT_ZERO(ResU);
7577 pfn(&State, &ResU, &ResU.uResult, &TestData.InVal);
7578 TestData.fMxcsrIn = State.MXCSR;
7579 TestData.fMxcsrOut = ResU.MXCSR;
7580 TestData.OutVal = ResU.uResult;
7581 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7582
7583 uint16_t fXcpt = (ResM.MXCSR | ResU.MXCSR) & X86_MXCSR_XCPT_FLAGS;
7584 if (fXcpt)
7585 {
7586 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
7587 IEMSSERESULT Res1; RT_ZERO(Res1);
7588 pfn(&State, &Res1, &Res1.uResult, &TestData.InVal);
7589 TestData.fMxcsrIn = State.MXCSR;
7590 TestData.fMxcsrOut = Res1.MXCSR;
7591 TestData.OutVal = Res1.uResult;
7592 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7593
7594 if (((Res1.MXCSR & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (Res1.MXCSR & X86_MXCSR_XCPT_FLAGS))
7595 {
7596 fXcpt |= Res1.MXCSR & X86_MXCSR_XCPT_FLAGS;
7597 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
7598 IEMSSERESULT Res2; RT_ZERO(Res2);
7599 pfn(&State, &Res2, &Res2.uResult, &TestData.InVal);
7600 TestData.fMxcsrIn = State.MXCSR;
7601 TestData.fMxcsrOut = Res2.MXCSR;
7602 TestData.OutVal = Res2.uResult;
7603 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7604 }
7605 if (!RT_IS_POWER_OF_TWO(fXcpt))
7606 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
7607 if (fUnmasked & fXcpt)
7608 {
7609 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
7610 IEMSSERESULT Res3; RT_ZERO(Res3);
7611 pfn(&State, &Res3, &Res3.uResult, &TestData.InVal);
7612 TestData.fMxcsrIn = State.MXCSR;
7613 TestData.fMxcsrOut = Res3.MXCSR;
7614 TestData.OutVal = Res3.uResult;
7615 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7616 }
7617 }
7618 }
7619 }
7620 rc = RTStrmClose(pStrmOut);
7621 if (RT_FAILURE(rc))
7622 {
7623 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseConvertXmmI32R32[iFn].pszName, rc);
7624 return RTEXITCODE_FAILURE;
7625 }
7626 }
7627
7628 return RTEXITCODE_SUCCESS;
7629}
7630#endif
7631
7632static void SseConvertXmmI32R32Test(void)
7633{
7634 X86FXSTATE State;
7635 RT_ZERO(State);
7636
7637 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmI32R32); iFn++)
7638 {
7639 if (!SubTestAndCheckIfEnabled(g_aSseConvertXmmI32R32[iFn].pszName))
7640 continue;
7641
7642 uint32_t const cTests = *g_aSseConvertXmmI32R32[iFn].pcTests;
7643 SSE_CONVERT_XMM_TEST_T const * const paTests = g_aSseConvertXmmI32R32[iFn].paTests;
7644 PFNIEMAIMPLFPSSEF2U128 pfn = g_aSseConvertXmmI32R32[iFn].pfn;
7645 uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertXmmI32R32[iFn]);
7646 if (!cTests) RTTestSkipped(g_hTest, "no tests");
7647 for (uint32_t iVar = 0; iVar < cVars; iVar++)
7648 {
7649 for (uint32_t iTest = 0; iTest < cTests / sizeof(*paTests); iTest++)
7650 {
7651 IEMSSERESULT Res; RT_ZERO(Res);
7652
7653 State.MXCSR = paTests[iTest].fMxcsrIn;
7654 pfn(&State, &Res, &Res.uResult, &paTests[iTest].InVal);
7655 if ( Res.MXCSR != paTests[iTest].fMxcsrOut
7656 || !RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[0], &paTests[iTest].OutVal.ar32[0])
7657 || !RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[1], &paTests[iTest].OutVal.ar32[1])
7658 || !RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[2], &paTests[iTest].OutVal.ar32[2])
7659 || !RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[3], &paTests[iTest].OutVal.ar32[3]))
7660 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI32'%RI32'%RI32'%RI32 \n"
7661 "%s -> mxcsr=%#08x %s'%s'%s'%s\n"
7662 "%s expected %#08x %s'%s'%s'%s%s%s (%s)\n",
7663 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
7664 paTests[iTest].InVal.ai32[0], paTests[iTest].InVal.ai32[1],
7665 paTests[iTest].InVal.ai32[2], paTests[iTest].InVal.ai32[3],
7666 iVar ? " " : "", Res.MXCSR,
7667 FormatR32(&Res.uResult.ar32[0]), FormatR32(&Res.uResult.ar32[1]),
7668 FormatR32(&Res.uResult.ar32[2]), FormatR32(&Res.uResult.ar32[3]),
7669 iVar ? " " : "", paTests[iTest].fMxcsrOut,
7670 FormatR32(&paTests[iTest].OutVal.ar32[0]), FormatR32(&paTests[iTest].OutVal.ar32[1]),
7671 FormatR32(&paTests[iTest].OutVal.ar32[2]), FormatR32(&paTests[iTest].OutVal.ar32[3]),
7672 MxcsrDiff(Res.MXCSR, paTests[iTest].fMxcsrOut),
7673 ( !RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[0], &paTests[iTest].OutVal.ar32[0])
7674 || !RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[1], &paTests[iTest].OutVal.ar32[1])
7675 || !RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[2], &paTests[iTest].OutVal.ar32[2])
7676 || !RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[3], &paTests[iTest].OutVal.ar32[3]))
7677 ? " - val" : "",
7678 FormatMxcsr(paTests[iTest].fMxcsrIn));
7679 }
7680 }
7681 }
7682}
7683
7684
7685/*
7686 * Convert SSE operations converting signed double-words to single-precision floating point values.
7687 */
7688static const SSE_CONVERT_XMM_T g_aSseConvertXmmR32I32[] =
7689{
7690 ENTRY_BIN(cvtps2dq_u128),
7691 ENTRY_BIN(cvttps2dq_u128)
7692};
7693
7694#ifdef TSTIEMAIMPL_WITH_GENERATOR
7695static RTEXITCODE SseConvertXmmR32I32Generate(const char *pszDataFileFmt, uint32_t cTests)
7696{
7697 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
7698
7699 static struct { RTFLOAT32U aVal1[4]; } const s_aSpecials[] =
7700 {
7701 { { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0) } },
7702 { { RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(1) } },
7703 { { RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(0) } },
7704 { { RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(1) } }
7705 /** @todo More specials. */
7706 };
7707
7708 X86FXSTATE State;
7709 RT_ZERO(State);
7710 uint32_t cMinNormalPairs = (cTests - 144) / 4;
7711 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR32I32); iFn++)
7712 {
7713 PFNIEMAIMPLFPSSEF2U128 const pfn = g_aSseConvertXmmR32I32[iFn].pfnNative ? g_aSseConvertXmmR32I32[iFn].pfnNative : g_aSseConvertXmmR32I32[iFn].pfn;
7714
7715 PRTSTREAM pStrmOut = NULL;
7716 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseConvertXmmR32I32[iFn].pszName);
7717 if (RT_FAILURE(rc))
7718 {
7719 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseConvertXmmR32I32[iFn].pszName, rc);
7720 return RTEXITCODE_FAILURE;
7721 }
7722
7723 uint32_t cNormalInputPairs = 0;
7724 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
7725 {
7726 SSE_CONVERT_XMM_TEST_T TestData; RT_ZERO(TestData);
7727
7728 TestData.InVal.ar32[0] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
7729 TestData.InVal.ar32[1] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
7730 TestData.InVal.ar32[2] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[2];
7731 TestData.InVal.ar32[3] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[3];
7732
7733 if ( RTFLOAT32U_IS_NORMAL(&TestData.InVal.ar32[0])
7734 && RTFLOAT32U_IS_NORMAL(&TestData.InVal.ar32[1])
7735 && RTFLOAT32U_IS_NORMAL(&TestData.InVal.ar32[2])
7736 && RTFLOAT32U_IS_NORMAL(&TestData.InVal.ar32[3]))
7737 cNormalInputPairs++;
7738 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
7739 {
7740 iTest -= 1;
7741 continue;
7742 }
7743
7744 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
7745 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
7746 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
7747 for (uint8_t iFz = 0; iFz < 2; iFz++)
7748 {
7749 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
7750 | (iRounding << X86_MXCSR_RC_SHIFT)
7751 | (iDaz ? X86_MXCSR_DAZ : 0)
7752 | (iFz ? X86_MXCSR_FZ : 0)
7753 | X86_MXCSR_XCPT_MASK;
7754 IEMSSERESULT ResM; RT_ZERO(ResM);
7755 pfn(&State, &ResM, &ResM.uResult, &TestData.InVal);
7756 TestData.fMxcsrIn = State.MXCSR;
7757 TestData.fMxcsrOut = ResM.MXCSR;
7758 TestData.OutVal = ResM.uResult;
7759 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7760
7761 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
7762 IEMSSERESULT ResU; RT_ZERO(ResU);
7763 pfn(&State, &ResU, &ResU.uResult, &TestData.InVal);
7764 TestData.fMxcsrIn = State.MXCSR;
7765 TestData.fMxcsrOut = ResU.MXCSR;
7766 TestData.OutVal = ResU.uResult;
7767 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7768
7769 uint16_t fXcpt = (ResM.MXCSR | ResU.MXCSR) & X86_MXCSR_XCPT_FLAGS;
7770 if (fXcpt)
7771 {
7772 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
7773 IEMSSERESULT Res1; RT_ZERO(Res1);
7774 pfn(&State, &Res1, &Res1.uResult, &TestData.InVal);
7775 TestData.fMxcsrIn = State.MXCSR;
7776 TestData.fMxcsrOut = Res1.MXCSR;
7777 TestData.OutVal = Res1.uResult;
7778 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7779
7780 if (((Res1.MXCSR & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (Res1.MXCSR & X86_MXCSR_XCPT_FLAGS))
7781 {
7782 fXcpt |= Res1.MXCSR & X86_MXCSR_XCPT_FLAGS;
7783 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
7784 IEMSSERESULT Res2; RT_ZERO(Res2);
7785 pfn(&State, &Res2, &Res2.uResult, &TestData.InVal);
7786 TestData.fMxcsrIn = State.MXCSR;
7787 TestData.fMxcsrOut = Res2.MXCSR;
7788 TestData.OutVal = Res2.uResult;
7789 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7790 }
7791 if (!RT_IS_POWER_OF_TWO(fXcpt))
7792 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
7793 if (fUnmasked & fXcpt)
7794 {
7795 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
7796 IEMSSERESULT Res3; RT_ZERO(Res3);
7797 pfn(&State, &Res3, &Res3.uResult, &TestData.InVal);
7798 TestData.fMxcsrIn = State.MXCSR;
7799 TestData.fMxcsrOut = Res3.MXCSR;
7800 TestData.OutVal = Res3.uResult;
7801 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7802 }
7803 }
7804 }
7805 }
7806 rc = RTStrmClose(pStrmOut);
7807 if (RT_FAILURE(rc))
7808 {
7809 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseConvertXmmR32I32[iFn].pszName, rc);
7810 return RTEXITCODE_FAILURE;
7811 }
7812 }
7813
7814 return RTEXITCODE_SUCCESS;
7815}
7816#endif
7817
7818static void SseConvertXmmR32I32Test(void)
7819{
7820 X86FXSTATE State;
7821 RT_ZERO(State);
7822
7823 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR32I32); iFn++)
7824 {
7825 if (!SubTestAndCheckIfEnabled(g_aSseConvertXmmR32I32[iFn].pszName))
7826 continue;
7827
7828 uint32_t const cTests = *g_aSseConvertXmmR32I32[iFn].pcTests;
7829 SSE_CONVERT_XMM_TEST_T const * const paTests = g_aSseConvertXmmR32I32[iFn].paTests;
7830 PFNIEMAIMPLFPSSEF2U128 pfn = g_aSseConvertXmmR32I32[iFn].pfn;
7831 uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertXmmR32I32[iFn]);
7832 if (!cTests) RTTestSkipped(g_hTest, "no tests");
7833 for (uint32_t iVar = 0; iVar < cVars; iVar++)
7834 {
7835 for (uint32_t iTest = 0; iTest < cTests / sizeof(*paTests); iTest++)
7836 {
7837 IEMSSERESULT Res; RT_ZERO(Res);
7838
7839 State.MXCSR = paTests[iTest].fMxcsrIn;
7840 pfn(&State, &Res, &Res.uResult, &paTests[iTest].InVal);
7841 if ( Res.MXCSR != paTests[iTest].fMxcsrOut
7842 || Res.uResult.ai32[0] != paTests[iTest].OutVal.ai32[0]
7843 || Res.uResult.ai32[1] != paTests[iTest].OutVal.ai32[1]
7844 || Res.uResult.ai32[2] != paTests[iTest].OutVal.ai32[2]
7845 || Res.uResult.ai32[3] != paTests[iTest].OutVal.ai32[3])
7846 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s'%s'%s \n"
7847 "%s -> mxcsr=%#08x %RI32'%RI32'%RI32'%RI32\n"
7848 "%s expected %#08x %RI32'%RI32'%RI32'%RI32%s%s (%s)\n",
7849 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
7850 FormatR32(&paTests[iTest].InVal.ar32[0]), FormatR32(&paTests[iTest].InVal.ar32[1]),
7851 FormatR32(&paTests[iTest].InVal.ar32[2]), FormatR32(&paTests[iTest].InVal.ar32[3]),
7852 iVar ? " " : "", Res.MXCSR,
7853 Res.uResult.ai32[0], Res.uResult.ai32[1],
7854 Res.uResult.ai32[2], Res.uResult.ai32[3],
7855 iVar ? " " : "", paTests[iTest].fMxcsrOut,
7856 paTests[iTest].OutVal.ai32[0], paTests[iTest].OutVal.ai32[1],
7857 paTests[iTest].OutVal.ai32[2], paTests[iTest].OutVal.ai32[3],
7858 MxcsrDiff(Res.MXCSR, paTests[iTest].fMxcsrOut),
7859 ( Res.uResult.ai32[0] != paTests[iTest].OutVal.ai32[0]
7860 || Res.uResult.ai32[1] != paTests[iTest].OutVal.ai32[1]
7861 || Res.uResult.ai32[2] != paTests[iTest].OutVal.ai32[2]
7862 || Res.uResult.ai32[3] != paTests[iTest].OutVal.ai32[3])
7863 ? " - val" : "",
7864 FormatMxcsr(paTests[iTest].fMxcsrIn));
7865 }
7866 }
7867 }
7868}
7869
7870
7871/*
7872 * Convert SSE operations converting signed double-words to double-precision floating point values.
7873 */
7874static const SSE_CONVERT_XMM_T g_aSseConvertXmmI32R64[] =
7875{
7876 ENTRY_BIN(cvtdq2pd_u128)
7877};
7878
7879#ifdef TSTIEMAIMPL_WITH_GENERATOR
7880static RTEXITCODE SseConvertXmmI32R64Generate(const char *pszDataFileFmt, uint32_t cTests)
7881{
7882 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
7883
7884 static int32_t const s_aSpecials[] =
7885 {
7886 INT32_MIN,
7887 INT32_MIN / 2,
7888 0,
7889 INT32_MAX / 2,
7890 INT32_MAX,
7891 (int32_t)0x80000000
7892 /** @todo More specials. */
7893 };
7894
7895 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmI32R64); iFn++)
7896 {
7897 PFNIEMAIMPLFPSSEF2U128 const pfn = g_aSseConvertXmmI32R64[iFn].pfnNative ? g_aSseConvertXmmI32R64[iFn].pfnNative : g_aSseConvertXmmI32R64[iFn].pfn;
7898
7899 PRTSTREAM pStrmOut = NULL;
7900 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseConvertXmmI32R64[iFn].pszName);
7901 if (RT_FAILURE(rc))
7902 {
7903 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseConvertXmmI32R64[iFn].pszName, rc);
7904 return RTEXITCODE_FAILURE;
7905 }
7906
7907 X86FXSTATE State;
7908 RT_ZERO(State);
7909 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
7910 {
7911 SSE_CONVERT_XMM_TEST_T TestData; RT_ZERO(TestData);
7912
7913 TestData.InVal.ai32[0] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
7914 TestData.InVal.ai32[1] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
7915 TestData.InVal.ai32[2] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
7916 TestData.InVal.ai32[3] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
7917
7918 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
7919 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
7920 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
7921 for (uint8_t iFz = 0; iFz < 2; iFz++)
7922 {
7923 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
7924 | (iRounding << X86_MXCSR_RC_SHIFT)
7925 | (iDaz ? X86_MXCSR_DAZ : 0)
7926 | (iFz ? X86_MXCSR_FZ : 0)
7927 | X86_MXCSR_XCPT_MASK;
7928 IEMSSERESULT ResM; RT_ZERO(ResM);
7929 pfn(&State, &ResM, &ResM.uResult, &TestData.InVal);
7930 TestData.fMxcsrIn = State.MXCSR;
7931 TestData.fMxcsrOut = ResM.MXCSR;
7932 TestData.OutVal = ResM.uResult;
7933 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7934
7935 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
7936 IEMSSERESULT ResU; RT_ZERO(ResU);
7937 pfn(&State, &ResU, &ResU.uResult, &TestData.InVal);
7938 TestData.fMxcsrIn = State.MXCSR;
7939 TestData.fMxcsrOut = ResU.MXCSR;
7940 TestData.OutVal = ResU.uResult;
7941 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7942
7943 uint16_t fXcpt = (ResM.MXCSR | ResU.MXCSR) & X86_MXCSR_XCPT_FLAGS;
7944 if (fXcpt)
7945 {
7946 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
7947 IEMSSERESULT Res1; RT_ZERO(Res1);
7948 pfn(&State, &Res1, &Res1.uResult, &TestData.InVal);
7949 TestData.fMxcsrIn = State.MXCSR;
7950 TestData.fMxcsrOut = Res1.MXCSR;
7951 TestData.OutVal = Res1.uResult;
7952 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7953
7954 if (((Res1.MXCSR & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (Res1.MXCSR & X86_MXCSR_XCPT_FLAGS))
7955 {
7956 fXcpt |= Res1.MXCSR & X86_MXCSR_XCPT_FLAGS;
7957 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
7958 IEMSSERESULT Res2; RT_ZERO(Res2);
7959 pfn(&State, &Res2, &Res2.uResult, &TestData.InVal);
7960 TestData.fMxcsrIn = State.MXCSR;
7961 TestData.fMxcsrOut = Res2.MXCSR;
7962 TestData.OutVal = Res2.uResult;
7963 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7964 }
7965 if (!RT_IS_POWER_OF_TWO(fXcpt))
7966 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
7967 if (fUnmasked & fXcpt)
7968 {
7969 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
7970 IEMSSERESULT Res3; RT_ZERO(Res3);
7971 pfn(&State, &Res3, &Res3.uResult, &TestData.InVal);
7972 TestData.fMxcsrIn = State.MXCSR;
7973 TestData.fMxcsrOut = Res3.MXCSR;
7974 TestData.OutVal = Res3.uResult;
7975 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7976 }
7977 }
7978 }
7979 }
7980 rc = RTStrmClose(pStrmOut);
7981 if (RT_FAILURE(rc))
7982 {
7983 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseConvertXmmI32R64[iFn].pszName, rc);
7984 return RTEXITCODE_FAILURE;
7985 }
7986 }
7987
7988 return RTEXITCODE_SUCCESS;
7989}
7990#endif
7991
7992static void SseConvertXmmI32R64Test(void)
7993{
7994 X86FXSTATE State;
7995 RT_ZERO(State);
7996
7997 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmI32R64); iFn++)
7998 {
7999 if (!SubTestAndCheckIfEnabled(g_aSseConvertXmmI32R64[iFn].pszName))
8000 continue;
8001
8002 uint32_t const cTests = *g_aSseConvertXmmI32R64[iFn].pcTests;
8003 SSE_CONVERT_XMM_TEST_T const * const paTests = g_aSseConvertXmmI32R64[iFn].paTests;
8004 PFNIEMAIMPLFPSSEF2U128 pfn = g_aSseConvertXmmI32R64[iFn].pfn;
8005 uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertXmmI32R64[iFn]);
8006 if (!cTests) RTTestSkipped(g_hTest, "no tests");
8007 for (uint32_t iVar = 0; iVar < cVars; iVar++)
8008 {
8009 for (uint32_t iTest = 0; iTest < cTests / sizeof(*paTests); iTest++)
8010 {
8011 IEMSSERESULT Res; RT_ZERO(Res);
8012
8013 State.MXCSR = paTests[iTest].fMxcsrIn;
8014 pfn(&State, &Res, &Res.uResult, &paTests[iTest].InVal);
8015 if ( Res.MXCSR != paTests[iTest].fMxcsrOut
8016 || !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[0], &paTests[iTest].OutVal.ar64[0])
8017 || !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[1], &paTests[iTest].OutVal.ar64[1]))
8018 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI32'%RI32'%RI32'%RI32 \n"
8019 "%s -> mxcsr=%#08x %s'%s\n"
8020 "%s expected %#08x %s'%s%s%s (%s)\n",
8021 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
8022 paTests[iTest].InVal.ai32[0], paTests[iTest].InVal.ai32[1],
8023 paTests[iTest].InVal.ai32[2], paTests[iTest].InVal.ai32[3],
8024 iVar ? " " : "", Res.MXCSR,
8025 FormatR64(&Res.uResult.ar64[0]), FormatR64(&Res.uResult.ar64[1]),
8026 iVar ? " " : "", paTests[iTest].fMxcsrOut,
8027 FormatR64(&paTests[iTest].OutVal.ar64[0]), FormatR64(&paTests[iTest].OutVal.ar64[1]),
8028 MxcsrDiff(Res.MXCSR, paTests[iTest].fMxcsrOut),
8029 ( !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[0], &paTests[iTest].OutVal.ar64[0])
8030 || !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[1], &paTests[iTest].OutVal.ar64[1]))
8031 ? " - val" : "",
8032 FormatMxcsr(paTests[iTest].fMxcsrIn));
8033 }
8034 }
8035 }
8036}
8037
8038
8039/*
8040 * Convert SSE operations converting signed double-words to double-precision floating point values.
8041 */
8042static const SSE_CONVERT_XMM_T g_aSseConvertXmmR64I32[] =
8043{
8044 ENTRY_BIN(cvtpd2dq_u128),
8045 ENTRY_BIN(cvttpd2dq_u128)
8046};
8047
8048#ifdef TSTIEMAIMPL_WITH_GENERATOR
8049static RTEXITCODE SseConvertXmmR64I32Generate(const char *pszDataFileFmt, uint32_t cTests)
8050{
8051 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
8052
8053 static struct { RTFLOAT64U aVal1[2]; } const s_aSpecials[] =
8054 {
8055 { { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) } },
8056 { { RTFLOAT64U_INIT_ZERO(1), RTFLOAT64U_INIT_ZERO(1) } },
8057 { { RTFLOAT64U_INIT_INF(0), RTFLOAT64U_INIT_INF(0) } },
8058 { { RTFLOAT64U_INIT_INF(1), RTFLOAT64U_INIT_INF(1) } }
8059 /** @todo More specials. */
8060 };
8061
8062 X86FXSTATE State;
8063 RT_ZERO(State);
8064 uint32_t cMinNormalPairs = (cTests - 144) / 4;
8065 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR64I32); iFn++)
8066 {
8067 PFNIEMAIMPLFPSSEF2U128 const pfn = g_aSseConvertXmmR64I32[iFn].pfnNative ? g_aSseConvertXmmR64I32[iFn].pfnNative : g_aSseConvertXmmR64I32[iFn].pfn;
8068
8069 PRTSTREAM pStrmOut = NULL;
8070 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseConvertXmmR64I32[iFn].pszName);
8071 if (RT_FAILURE(rc))
8072 {
8073 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseConvertXmmR64I32[iFn].pszName, rc);
8074 return RTEXITCODE_FAILURE;
8075 }
8076
8077 uint32_t cNormalInputPairs = 0;
8078 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
8079 {
8080 SSE_CONVERT_XMM_TEST_T TestData; RT_ZERO(TestData);
8081
8082 TestData.InVal.ar64[0] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
8083 TestData.InVal.ar64[1] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
8084
8085 if ( RTFLOAT64U_IS_NORMAL(&TestData.InVal.ar64[0])
8086 && RTFLOAT64U_IS_NORMAL(&TestData.InVal.ar64[1]))
8087 cNormalInputPairs++;
8088 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
8089 {
8090 iTest -= 1;
8091 continue;
8092 }
8093
8094 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
8095 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
8096 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
8097 for (uint8_t iFz = 0; iFz < 2; iFz++)
8098 {
8099 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
8100 | (iRounding << X86_MXCSR_RC_SHIFT)
8101 | (iDaz ? X86_MXCSR_DAZ : 0)
8102 | (iFz ? X86_MXCSR_FZ : 0)
8103 | X86_MXCSR_XCPT_MASK;
8104 IEMSSERESULT ResM; RT_ZERO(ResM);
8105 pfn(&State, &ResM, &ResM.uResult, &TestData.InVal);
8106 TestData.fMxcsrIn = State.MXCSR;
8107 TestData.fMxcsrOut = ResM.MXCSR;
8108 TestData.OutVal = ResM.uResult;
8109 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8110
8111 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
8112 IEMSSERESULT ResU; RT_ZERO(ResU);
8113 pfn(&State, &ResU, &ResU.uResult, &TestData.InVal);
8114 TestData.fMxcsrIn = State.MXCSR;
8115 TestData.fMxcsrOut = ResU.MXCSR;
8116 TestData.OutVal = ResU.uResult;
8117 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8118
8119 uint16_t fXcpt = (ResM.MXCSR | ResU.MXCSR) & X86_MXCSR_XCPT_FLAGS;
8120 if (fXcpt)
8121 {
8122 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
8123 IEMSSERESULT Res1; RT_ZERO(Res1);
8124 pfn(&State, &Res1, &Res1.uResult, &TestData.InVal);
8125 TestData.fMxcsrIn = State.MXCSR;
8126 TestData.fMxcsrOut = Res1.MXCSR;
8127 TestData.OutVal = Res1.uResult;
8128 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8129
8130 if (((Res1.MXCSR & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (Res1.MXCSR & X86_MXCSR_XCPT_FLAGS))
8131 {
8132 fXcpt |= Res1.MXCSR & X86_MXCSR_XCPT_FLAGS;
8133 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
8134 IEMSSERESULT Res2; RT_ZERO(Res2);
8135 pfn(&State, &Res2, &Res2.uResult, &TestData.InVal);
8136 TestData.fMxcsrIn = State.MXCSR;
8137 TestData.fMxcsrOut = Res2.MXCSR;
8138 TestData.OutVal = Res2.uResult;
8139 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8140 }
8141 if (!RT_IS_POWER_OF_TWO(fXcpt))
8142 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
8143 if (fUnmasked & fXcpt)
8144 {
8145 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
8146 IEMSSERESULT Res3; RT_ZERO(Res3);
8147 pfn(&State, &Res3, &Res3.uResult, &TestData.InVal);
8148 TestData.fMxcsrIn = State.MXCSR;
8149 TestData.fMxcsrOut = Res3.MXCSR;
8150 TestData.OutVal = Res3.uResult;
8151 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8152 }
8153 }
8154 }
8155 }
8156 rc = RTStrmClose(pStrmOut);
8157 if (RT_FAILURE(rc))
8158 {
8159 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseConvertXmmR64I32[iFn].pszName, rc);
8160 return RTEXITCODE_FAILURE;
8161 }
8162 }
8163
8164 return RTEXITCODE_SUCCESS;
8165}
8166#endif
8167
8168static void SseConvertXmmR64I32Test(void)
8169{
8170 X86FXSTATE State;
8171 RT_ZERO(State);
8172
8173 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR64I32); iFn++)
8174 {
8175 if (!SubTestAndCheckIfEnabled(g_aSseConvertXmmR64I32[iFn].pszName))
8176 continue;
8177
8178 uint32_t const cTests = *g_aSseConvertXmmR64I32[iFn].pcTests;
8179 SSE_CONVERT_XMM_TEST_T const * const paTests = g_aSseConvertXmmR64I32[iFn].paTests;
8180 PFNIEMAIMPLFPSSEF2U128 pfn = g_aSseConvertXmmR64I32[iFn].pfn;
8181 uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertXmmR64I32[iFn]);
8182 if (!cTests) RTTestSkipped(g_hTest, "no tests");
8183 for (uint32_t iVar = 0; iVar < cVars; iVar++)
8184 {
8185 for (uint32_t iTest = 0; iTest < cTests / sizeof(*paTests); iTest++)
8186 {
8187 IEMSSERESULT Res; RT_ZERO(Res);
8188
8189 State.MXCSR = paTests[iTest].fMxcsrIn;
8190 pfn(&State, &Res, &Res.uResult, &paTests[iTest].InVal);
8191 if ( Res.MXCSR != paTests[iTest].fMxcsrOut
8192 || Res.uResult.ai32[0] != paTests[iTest].OutVal.ai32[0]
8193 || Res.uResult.ai32[1] != paTests[iTest].OutVal.ai32[1]
8194 || Res.uResult.ai32[2] != paTests[iTest].OutVal.ai32[2]
8195 || Res.uResult.ai32[3] != paTests[iTest].OutVal.ai32[3])
8196 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s \n"
8197 "%s -> mxcsr=%#08x %RI32'%RI32'%RI32'%RI32\n"
8198 "%s expected %#08x %RI32'%RI32'%RI32'%RI32%s%s (%s)\n",
8199 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
8200 FormatR64(&paTests[iTest].InVal.ar64[0]), FormatR64(&paTests[iTest].InVal.ar64[1]),
8201 iVar ? " " : "", Res.MXCSR,
8202 Res.uResult.ai32[0], Res.uResult.ai32[1],
8203 Res.uResult.ai32[2], Res.uResult.ai32[3],
8204 iVar ? " " : "", paTests[iTest].fMxcsrOut,
8205 paTests[iTest].OutVal.ai32[0], paTests[iTest].OutVal.ai32[1],
8206 paTests[iTest].OutVal.ai32[2], paTests[iTest].OutVal.ai32[3],
8207 MxcsrDiff(Res.MXCSR, paTests[iTest].fMxcsrOut),
8208 ( Res.uResult.ai32[0] != paTests[iTest].OutVal.ai32[0]
8209 || Res.uResult.ai32[1] != paTests[iTest].OutVal.ai32[1]
8210 || Res.uResult.ai32[2] != paTests[iTest].OutVal.ai32[2]
8211 || Res.uResult.ai32[3] != paTests[iTest].OutVal.ai32[3])
8212 ? " - val" : "",
8213 FormatMxcsr(paTests[iTest].fMxcsrIn));
8214 }
8215 }
8216 }
8217}
8218
8219
8220/*
8221 * Convert SSE operations converting double-precision floating point values to signed double-word values.
8222 */
8223TYPEDEF_SUBTEST_TYPE(SSE_CONVERT_MM_XMM_T, SSE_CONVERT_MM_XMM_TEST_T, PFNIEMAIMPLMXCSRU64U128);
8224
8225static const SSE_CONVERT_MM_XMM_T g_aSseConvertMmXmm[] =
8226{
8227 ENTRY_BIN(cvtpd2pi_u128),
8228 ENTRY_BIN(cvttpd2pi_u128)
8229};
8230
8231#ifdef TSTIEMAIMPL_WITH_GENERATOR
8232static RTEXITCODE SseConvertMmXmmGenerate(const char *pszDataFileFmt, uint32_t cTests)
8233{
8234 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
8235
8236 static struct { RTFLOAT64U aVal1[2]; } const s_aSpecials[] =
8237 {
8238 { { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) } },
8239 { { RTFLOAT64U_INIT_ZERO(1), RTFLOAT64U_INIT_ZERO(1) } },
8240 { { RTFLOAT64U_INIT_INF(0), RTFLOAT64U_INIT_INF(0) } },
8241 { { RTFLOAT64U_INIT_INF(1), RTFLOAT64U_INIT_INF(1) } }
8242 /** @todo More specials. */
8243 };
8244
8245 uint32_t cMinNormalPairs = (cTests - 144) / 4;
8246 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertMmXmm); iFn++)
8247 {
8248 PFNIEMAIMPLMXCSRU64U128 const pfn = g_aSseConvertMmXmm[iFn].pfnNative ? g_aSseConvertMmXmm[iFn].pfnNative : g_aSseConvertMmXmm[iFn].pfn;
8249
8250 PRTSTREAM pStrmOut = NULL;
8251 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseConvertMmXmm[iFn].pszName);
8252 if (RT_FAILURE(rc))
8253 {
8254 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseConvertMmXmm[iFn].pszName, rc);
8255 return RTEXITCODE_FAILURE;
8256 }
8257
8258 uint32_t cNormalInputPairs = 0;
8259 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
8260 {
8261 SSE_CONVERT_MM_XMM_TEST_T TestData; RT_ZERO(TestData);
8262
8263 TestData.InVal.ar64[0] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
8264 TestData.InVal.ar64[1] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
8265
8266 if ( RTFLOAT64U_IS_NORMAL(&TestData.InVal.ar64[0])
8267 && RTFLOAT64U_IS_NORMAL(&TestData.InVal.ar64[1]))
8268 cNormalInputPairs++;
8269 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
8270 {
8271 iTest -= 1;
8272 continue;
8273 }
8274
8275 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
8276 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
8277 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
8278 for (uint8_t iFz = 0; iFz < 2; iFz++)
8279 {
8280 uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
8281 | (iRounding << X86_MXCSR_RC_SHIFT)
8282 | (iDaz ? X86_MXCSR_DAZ : 0)
8283 | (iFz ? X86_MXCSR_FZ : 0)
8284 | X86_MXCSR_XCPT_MASK;
8285 uint32_t fMxcsrM = fMxcsrIn;
8286 uint64_t u64ResM;
8287 pfn(&fMxcsrM, &u64ResM, &TestData.InVal);
8288 TestData.fMxcsrIn = fMxcsrIn;
8289 TestData.fMxcsrOut = fMxcsrM;
8290 TestData.OutVal.u = u64ResM;
8291 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8292
8293 fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
8294 uint32_t fMxcsrU = fMxcsrIn;
8295 uint64_t u64ResU;
8296 pfn(&fMxcsrU, &u64ResU, &TestData.InVal);
8297 TestData.fMxcsrIn = fMxcsrIn;
8298 TestData.fMxcsrOut = fMxcsrU;
8299 TestData.OutVal.u = u64ResU;
8300 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8301
8302 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
8303 if (fXcpt)
8304 {
8305 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
8306 uint32_t fMxcsr1 = fMxcsrIn;
8307 uint64_t u64Res1;
8308 pfn(&fMxcsr1, &u64Res1, &TestData.InVal);
8309 TestData.fMxcsrIn = fMxcsrIn;
8310 TestData.fMxcsrOut = fMxcsr1;
8311 TestData.OutVal.u = u64Res1;
8312 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8313
8314 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
8315 {
8316 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
8317 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
8318 uint32_t fMxcsr2 = fMxcsrIn;
8319 uint64_t u64Res2;
8320 pfn(&fMxcsr2, &u64Res2, &TestData.InVal);
8321 TestData.fMxcsrIn = fMxcsrIn;
8322 TestData.fMxcsrOut = fMxcsr2;
8323 TestData.OutVal.u = u64Res2;
8324 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8325 }
8326 if (!RT_IS_POWER_OF_TWO(fXcpt))
8327 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
8328 if (fUnmasked & fXcpt)
8329 {
8330 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
8331 uint32_t fMxcsr3 = fMxcsrIn;
8332 uint64_t u64Res3;
8333 pfn(&fMxcsr3, &u64Res3, &TestData.InVal);
8334 TestData.fMxcsrIn = fMxcsrIn;
8335 TestData.fMxcsrOut = fMxcsr3;
8336 TestData.OutVal.u = u64Res3;
8337 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8338 }
8339 }
8340 }
8341 }
8342 rc = RTStrmClose(pStrmOut);
8343 if (RT_FAILURE(rc))
8344 {
8345 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseConvertMmXmm[iFn].pszName, rc);
8346 return RTEXITCODE_FAILURE;
8347 }
8348 }
8349
8350 return RTEXITCODE_SUCCESS;
8351}
8352#endif
8353
8354static void SseConvertMmXmmTest(void)
8355{
8356 X86FXSTATE State;
8357 RT_ZERO(State);
8358
8359 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertMmXmm); iFn++)
8360 {
8361 if (!SubTestAndCheckIfEnabled(g_aSseConvertMmXmm[iFn].pszName))
8362 continue;
8363
8364 uint32_t const cTests = *g_aSseConvertMmXmm[iFn].pcTests;
8365 SSE_CONVERT_MM_XMM_TEST_T const * const paTests = g_aSseConvertMmXmm[iFn].paTests;
8366 PFNIEMAIMPLMXCSRU64U128 pfn = g_aSseConvertMmXmm[iFn].pfn;
8367 uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertMmXmm[iFn]);
8368 if (!cTests) RTTestSkipped(g_hTest, "no tests");
8369 for (uint32_t iVar = 0; iVar < cVars; iVar++)
8370 {
8371 for (uint32_t iTest = 0; iTest < cTests / sizeof(*paTests); iTest++)
8372 {
8373 RTUINT64U ValOut;
8374 uint32_t fMxcsr = paTests[iTest].fMxcsrIn;
8375 pfn(&fMxcsr, &ValOut.u, &paTests[iTest].InVal);
8376 if ( fMxcsr != paTests[iTest].fMxcsrOut
8377 || ValOut.ai32[0] != paTests[iTest].OutVal.ai32[0]
8378 || ValOut.ai32[1] != paTests[iTest].OutVal.ai32[1])
8379 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s\n"
8380 "%s -> mxcsr=%#08x %RI32'%RI32\n"
8381 "%s expected %#08x %RI32'%RI32%s%s (%s)\n",
8382 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
8383 FormatR64(&paTests[iTest].InVal.ar64[0]), FormatR64(&paTests[iTest].InVal.ar64[1]),
8384 iVar ? " " : "", fMxcsr, ValOut.ai32[0], ValOut.ai32[1],
8385 iVar ? " " : "", paTests[iTest].fMxcsrOut,
8386 paTests[iTest].OutVal.ai32[0], paTests[iTest].OutVal.ai32[1],
8387 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
8388 ( ValOut.ai32[0] != paTests[iTest].OutVal.ai32[0]
8389 || ValOut.ai32[1] != paTests[iTest].OutVal.ai32[1])
8390 ? " - val" : "",
8391 FormatMxcsr(paTests[iTest].fMxcsrIn));
8392 }
8393 }
8394 }
8395}
8396
8397
8398/*
8399 * Convert SSE operations converting signed double-word values to double precision floating-point values (probably only cvtpi2pd).
8400 */
8401TYPEDEF_SUBTEST_TYPE(SSE_CONVERT_XMM_R64_MM_T, SSE_CONVERT_XMM_MM_TEST_T, PFNIEMAIMPLMXCSRU128U64);
8402
8403static const SSE_CONVERT_XMM_R64_MM_T g_aSseConvertXmmR64Mm[] =
8404{
8405 ENTRY_BIN(cvtpi2pd_u128)
8406};
8407
8408#ifdef TSTIEMAIMPL_WITH_GENERATOR
8409static RTEXITCODE SseConvertXmmR64MmGenerate(const char *pszDataFileFmt, uint32_t cTests)
8410{
8411 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
8412
8413 static struct { int32_t aVal[2]; } const s_aSpecials[] =
8414 {
8415 { { INT32_MIN, INT32_MIN } },
8416 { { INT32_MAX, INT32_MAX } }
8417 /** @todo More specials. */
8418 };
8419
8420 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR64Mm); iFn++)
8421 {
8422 PFNIEMAIMPLMXCSRU128U64 const pfn = g_aSseConvertXmmR64Mm[iFn].pfnNative ? g_aSseConvertXmmR64Mm[iFn].pfnNative : g_aSseConvertXmmR64Mm[iFn].pfn;
8423
8424 PRTSTREAM pStrmOut = NULL;
8425 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseConvertXmmR64Mm[iFn].pszName);
8426 if (RT_FAILURE(rc))
8427 {
8428 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseConvertXmmR64Mm[iFn].pszName, rc);
8429 return RTEXITCODE_FAILURE;
8430 }
8431
8432 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
8433 {
8434 SSE_CONVERT_XMM_MM_TEST_T TestData; RT_ZERO(TestData);
8435
8436 TestData.InVal.ai32[0] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests].aVal[0];
8437 TestData.InVal.ai32[1] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests].aVal[1];
8438
8439 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
8440 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
8441 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
8442 for (uint8_t iFz = 0; iFz < 2; iFz++)
8443 {
8444 uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
8445 | (iRounding << X86_MXCSR_RC_SHIFT)
8446 | (iDaz ? X86_MXCSR_DAZ : 0)
8447 | (iFz ? X86_MXCSR_FZ : 0)
8448 | X86_MXCSR_XCPT_MASK;
8449 uint32_t fMxcsrM = fMxcsrIn;
8450 pfn(&fMxcsrM, &TestData.OutVal, TestData.InVal.u);
8451 TestData.fMxcsrIn = fMxcsrIn;
8452 TestData.fMxcsrOut = fMxcsrM;
8453 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8454
8455 fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
8456 uint32_t fMxcsrU = fMxcsrIn;
8457 pfn(&fMxcsrU, &TestData.OutVal, TestData.InVal.u);
8458 TestData.fMxcsrIn = fMxcsrIn;
8459 TestData.fMxcsrOut = fMxcsrU;
8460 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8461
8462 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
8463 if (fXcpt)
8464 {
8465 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
8466 uint32_t fMxcsr1 = fMxcsrIn;
8467 pfn(&fMxcsr1, &TestData.OutVal, TestData.InVal.u);
8468 TestData.fMxcsrIn = fMxcsrIn;
8469 TestData.fMxcsrOut = fMxcsr1;
8470 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8471
8472 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
8473 {
8474 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
8475 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
8476 uint32_t fMxcsr2 = fMxcsrIn;
8477 pfn(&fMxcsr2, &TestData.OutVal, TestData.InVal.u);
8478 TestData.fMxcsrIn = fMxcsrIn;
8479 TestData.fMxcsrOut = fMxcsr2;
8480 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8481 }
8482 if (!RT_IS_POWER_OF_TWO(fXcpt))
8483 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
8484 if (fUnmasked & fXcpt)
8485 {
8486 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
8487 uint32_t fMxcsr3 = fMxcsrIn;
8488 pfn(&fMxcsr3, &TestData.OutVal, TestData.InVal.u);
8489 TestData.fMxcsrIn = fMxcsrIn;
8490 TestData.fMxcsrOut = fMxcsr3;
8491 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8492 }
8493 }
8494 }
8495 }
8496 rc = RTStrmClose(pStrmOut);
8497 if (RT_FAILURE(rc))
8498 {
8499 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseConvertXmmR64Mm[iFn].pszName, rc);
8500 return RTEXITCODE_FAILURE;
8501 }
8502 }
8503
8504 return RTEXITCODE_SUCCESS;
8505}
8506#endif
8507
8508static void SseConvertXmmR64MmTest(void)
8509{
8510 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR64Mm); iFn++)
8511 {
8512 if (!SubTestAndCheckIfEnabled(g_aSseConvertXmmR64Mm[iFn].pszName))
8513 continue;
8514
8515 uint32_t const cTests = *g_aSseConvertXmmR64Mm[iFn].pcTests;
8516 SSE_CONVERT_XMM_MM_TEST_T const * const paTests = g_aSseConvertXmmR64Mm[iFn].paTests;
8517 PFNIEMAIMPLMXCSRU128U64 pfn = g_aSseConvertXmmR64Mm[iFn].pfn;
8518 uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertXmmR64Mm[iFn]);
8519 if (!cTests) RTTestSkipped(g_hTest, "no tests");
8520 for (uint32_t iVar = 0; iVar < cVars; iVar++)
8521 {
8522 for (uint32_t iTest = 0; iTest < cTests / sizeof(*paTests); iTest++)
8523 {
8524 X86XMMREG ValOut;
8525 uint32_t fMxcsr = paTests[iTest].fMxcsrIn;
8526 pfn(&fMxcsr, &ValOut, paTests[iTest].InVal.u);
8527 if ( fMxcsr != paTests[iTest].fMxcsrOut
8528 || !RTFLOAT64U_ARE_IDENTICAL(&ValOut.ar64[0], &paTests[iTest].OutVal.ar64[0])
8529 || !RTFLOAT64U_ARE_IDENTICAL(&ValOut.ar64[1], &paTests[iTest].OutVal.ar64[1]))
8530 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI32'%RI32\n"
8531 "%s -> mxcsr=%#08x %s'%s\n"
8532 "%s expected %#08x %s'%s%s%s (%s)\n",
8533 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
8534 paTests[iTest].InVal.ai32[0], paTests[iTest].InVal.ai32[1],
8535 iVar ? " " : "", fMxcsr,
8536 FormatR64(&ValOut.ar64[0]), FormatR64(&ValOut.ar64[1]),
8537 iVar ? " " : "", paTests[iTest].fMxcsrOut,
8538 FormatR64(&paTests[iTest].OutVal.ar64[0]), FormatR64(&paTests[iTest].OutVal.ar64[1]),
8539 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
8540 ( !RTFLOAT64U_ARE_IDENTICAL(&ValOut.ar64[0], &paTests[iTest].OutVal.ar64[0])
8541 || !RTFLOAT64U_ARE_IDENTICAL(&ValOut.ar64[1], &paTests[iTest].OutVal.ar64[1]))
8542 ? " - val" : "",
8543 FormatMxcsr(paTests[iTest].fMxcsrIn));
8544 }
8545 }
8546 }
8547}
8548
8549
8550/*
8551 * Convert SSE operations converting signed double-word values to double precision floating-point values (probably only cvtpi2pd).
8552 */
8553TYPEDEF_SUBTEST_TYPE(SSE_CONVERT_XMM_R32_MM_T, SSE_CONVERT_XMM_MM_TEST_T, PFNIEMAIMPLMXCSRU128U64);
8554
8555static const SSE_CONVERT_XMM_R32_MM_T g_aSseConvertXmmR32Mm[] =
8556{
8557 ENTRY_BIN(cvtpi2ps_u128)
8558};
8559
8560#ifdef TSTIEMAIMPL_WITH_GENERATOR
8561static RTEXITCODE SseConvertXmmR32MmGenerate(const char *pszDataFileFmt, uint32_t cTests)
8562{
8563 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
8564
8565 static struct { int32_t aVal[2]; } const s_aSpecials[] =
8566 {
8567 { { INT32_MIN, INT32_MIN } },
8568 { { INT32_MAX, INT32_MAX } }
8569 /** @todo More specials. */
8570 };
8571
8572 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR32Mm); iFn++)
8573 {
8574 PFNIEMAIMPLMXCSRU128U64 const pfn = g_aSseConvertXmmR32Mm[iFn].pfnNative ? g_aSseConvertXmmR32Mm[iFn].pfnNative : g_aSseConvertXmmR32Mm[iFn].pfn;
8575
8576 PRTSTREAM pStrmOut = NULL;
8577 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseConvertXmmR32Mm[iFn].pszName);
8578 if (RT_FAILURE(rc))
8579 {
8580 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseConvertXmmR32Mm[iFn].pszName, rc);
8581 return RTEXITCODE_FAILURE;
8582 }
8583
8584 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
8585 {
8586 SSE_CONVERT_XMM_MM_TEST_T TestData; RT_ZERO(TestData);
8587
8588 TestData.InVal.ai32[0] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests].aVal[0];
8589 TestData.InVal.ai32[1] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests].aVal[1];
8590
8591 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
8592 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
8593 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
8594 for (uint8_t iFz = 0; iFz < 2; iFz++)
8595 {
8596 uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
8597 | (iRounding << X86_MXCSR_RC_SHIFT)
8598 | (iDaz ? X86_MXCSR_DAZ : 0)
8599 | (iFz ? X86_MXCSR_FZ : 0)
8600 | X86_MXCSR_XCPT_MASK;
8601 uint32_t fMxcsrM = fMxcsrIn;
8602 pfn(&fMxcsrM, &TestData.OutVal, TestData.InVal.u);
8603 TestData.fMxcsrIn = fMxcsrIn;
8604 TestData.fMxcsrOut = fMxcsrM;
8605 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8606
8607 fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
8608 uint32_t fMxcsrU = fMxcsrIn;
8609 pfn(&fMxcsrU, &TestData.OutVal, TestData.InVal.u);
8610 TestData.fMxcsrIn = fMxcsrIn;
8611 TestData.fMxcsrOut = fMxcsrU;
8612 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8613
8614 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
8615 if (fXcpt)
8616 {
8617 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
8618 uint32_t fMxcsr1 = fMxcsrIn;
8619 pfn(&fMxcsr1, &TestData.OutVal, TestData.InVal.u);
8620 TestData.fMxcsrIn = fMxcsrIn;
8621 TestData.fMxcsrOut = fMxcsr1;
8622 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8623
8624 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
8625 {
8626 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
8627 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
8628 uint32_t fMxcsr2 = fMxcsrIn;
8629 pfn(&fMxcsr2, &TestData.OutVal, TestData.InVal.u);
8630 TestData.fMxcsrIn = fMxcsrIn;
8631 TestData.fMxcsrOut = fMxcsr2;
8632 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8633 }
8634 if (!RT_IS_POWER_OF_TWO(fXcpt))
8635 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
8636 if (fUnmasked & fXcpt)
8637 {
8638 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
8639 uint32_t fMxcsr3 = fMxcsrIn;
8640 pfn(&fMxcsr3, &TestData.OutVal, TestData.InVal.u);
8641 TestData.fMxcsrIn = fMxcsrIn;
8642 TestData.fMxcsrOut = fMxcsr3;
8643 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8644 }
8645 }
8646 }
8647 }
8648 rc = RTStrmClose(pStrmOut);
8649 if (RT_FAILURE(rc))
8650 {
8651 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseConvertXmmR32Mm[iFn].pszName, rc);
8652 return RTEXITCODE_FAILURE;
8653 }
8654 }
8655
8656 return RTEXITCODE_SUCCESS;
8657}
8658#endif
8659
8660static void SseConvertXmmR32MmTest(void)
8661{
8662 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR32Mm); iFn++)
8663 {
8664 if (!SubTestAndCheckIfEnabled(g_aSseConvertXmmR32Mm[iFn].pszName))
8665 continue;
8666
8667 uint32_t const cTests = *g_aSseConvertXmmR32Mm[iFn].pcTests;
8668 SSE_CONVERT_XMM_MM_TEST_T const * const paTests = g_aSseConvertXmmR32Mm[iFn].paTests;
8669 PFNIEMAIMPLMXCSRU128U64 pfn = g_aSseConvertXmmR32Mm[iFn].pfn;
8670 uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertXmmR32Mm[iFn]);
8671 if (!cTests) RTTestSkipped(g_hTest, "no tests");
8672 for (uint32_t iVar = 0; iVar < cVars; iVar++)
8673 {
8674 for (uint32_t iTest = 0; iTest < cTests / sizeof(*paTests); iTest++)
8675 {
8676 X86XMMREG ValOut;
8677 uint32_t fMxcsr = paTests[iTest].fMxcsrIn;
8678 pfn(&fMxcsr, &ValOut, paTests[iTest].InVal.u);
8679 if ( fMxcsr != paTests[iTest].fMxcsrOut
8680 || !RTFLOAT32U_ARE_IDENTICAL(&ValOut.ar32[0], &paTests[iTest].OutVal.ar32[0])
8681 || !RTFLOAT32U_ARE_IDENTICAL(&ValOut.ar32[1], &paTests[iTest].OutVal.ar32[1]))
8682 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI32'%RI32\n"
8683 "%s -> mxcsr=%#08x %s'%s\n"
8684 "%s expected %#08x %s'%s%s%s (%s)\n",
8685 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
8686 paTests[iTest].InVal.ai32[0], paTests[iTest].InVal.ai32[1],
8687 iVar ? " " : "", fMxcsr,
8688 FormatR32(&ValOut.ar32[0]), FormatR32(&ValOut.ar32[1]),
8689 iVar ? " " : "", paTests[iTest].fMxcsrOut,
8690 FormatR32(&paTests[iTest].OutVal.ar32[0]), FormatR32(&paTests[iTest].OutVal.ar32[1]),
8691 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
8692 ( !RTFLOAT32U_ARE_IDENTICAL(&ValOut.ar32[0], &paTests[iTest].OutVal.ar32[0])
8693 || !RTFLOAT32U_ARE_IDENTICAL(&ValOut.ar32[1], &paTests[iTest].OutVal.ar32[1]))
8694 ? " - val" : "",
8695 FormatMxcsr(paTests[iTest].fMxcsrIn));
8696 }
8697 }
8698 }
8699}
8700
8701
8702/*
8703 * Convert SSE operations converting single-precision floating point values to signed double-word values.
8704 */
8705TYPEDEF_SUBTEST_TYPE(SSE_CONVERT_MM_I32_XMM_R32_T, SSE_CONVERT_MM_R32_TEST_T, PFNIEMAIMPLMXCSRU64U64);
8706
8707static const SSE_CONVERT_MM_I32_XMM_R32_T g_aSseConvertMmI32XmmR32[] =
8708{
8709 ENTRY_BIN(cvtps2pi_u128),
8710 ENTRY_BIN(cvttps2pi_u128)
8711};
8712
8713#ifdef TSTIEMAIMPL_WITH_GENERATOR
8714static RTEXITCODE SseConvertMmI32XmmR32Generate(const char *pszDataFileFmt, uint32_t cTests)
8715{
8716 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
8717
8718 static struct { RTFLOAT32U aVal1[2]; } const s_aSpecials[] =
8719 {
8720 { { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0) } },
8721 { { RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(1) } },
8722 { { RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(0) } },
8723 { { RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(1) } }
8724 /** @todo More specials. */
8725 };
8726
8727 uint32_t cMinNormalPairs = (cTests - 144) / 4;
8728 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertMmI32XmmR32); iFn++)
8729 {
8730 PFNIEMAIMPLMXCSRU64U64 const pfn = g_aSseConvertMmI32XmmR32[iFn].pfnNative ? g_aSseConvertMmI32XmmR32[iFn].pfnNative : g_aSseConvertMmI32XmmR32[iFn].pfn;
8731
8732 PRTSTREAM pStrmOut = NULL;
8733 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseConvertMmI32XmmR32[iFn].pszName);
8734 if (RT_FAILURE(rc))
8735 {
8736 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseConvertMmI32XmmR32[iFn].pszName, rc);
8737 return RTEXITCODE_FAILURE;
8738 }
8739
8740 uint32_t cNormalInputPairs = 0;
8741 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
8742 {
8743 SSE_CONVERT_MM_R32_TEST_T TestData; RT_ZERO(TestData);
8744
8745 TestData.ar32InVal[0] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
8746 TestData.ar32InVal[1] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
8747
8748 if ( RTFLOAT32U_IS_NORMAL(&TestData.ar32InVal[0])
8749 && RTFLOAT32U_IS_NORMAL(&TestData.ar32InVal[1]))
8750 cNormalInputPairs++;
8751 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
8752 {
8753 iTest -= 1;
8754 continue;
8755 }
8756
8757 RTFLOAT64U TestVal;
8758 TestVal.au32[0] = TestData.ar32InVal[0].u;
8759 TestVal.au32[1] = TestData.ar32InVal[1].u;
8760
8761 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
8762 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
8763 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
8764 for (uint8_t iFz = 0; iFz < 2; iFz++)
8765 {
8766 uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
8767 | (iRounding << X86_MXCSR_RC_SHIFT)
8768 | (iDaz ? X86_MXCSR_DAZ : 0)
8769 | (iFz ? X86_MXCSR_FZ : 0)
8770 | X86_MXCSR_XCPT_MASK;
8771 uint32_t fMxcsrM = fMxcsrIn;
8772 uint64_t u64ResM;
8773 pfn(&fMxcsrM, &u64ResM, TestVal.u);
8774 TestData.fMxcsrIn = fMxcsrIn;
8775 TestData.fMxcsrOut = fMxcsrM;
8776 TestData.OutVal.u = u64ResM;
8777 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8778
8779 fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
8780 uint32_t fMxcsrU = fMxcsrIn;
8781 uint64_t u64ResU;
8782 pfn(&fMxcsrU, &u64ResU, TestVal.u);
8783 TestData.fMxcsrIn = fMxcsrIn;
8784 TestData.fMxcsrOut = fMxcsrU;
8785 TestData.OutVal.u = u64ResU;
8786 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8787
8788 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
8789 if (fXcpt)
8790 {
8791 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
8792 uint32_t fMxcsr1 = fMxcsrIn;
8793 uint64_t u64Res1;
8794 pfn(&fMxcsr1, &u64Res1, TestVal.u);
8795 TestData.fMxcsrIn = fMxcsrIn;
8796 TestData.fMxcsrOut = fMxcsr1;
8797 TestData.OutVal.u = u64Res1;
8798 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8799
8800 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
8801 {
8802 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
8803 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
8804 uint32_t fMxcsr2 = fMxcsrIn;
8805 uint64_t u64Res2;
8806 pfn(&fMxcsr2, &u64Res2, TestVal.u);
8807 TestData.fMxcsrIn = fMxcsrIn;
8808 TestData.fMxcsrOut = fMxcsr2;
8809 TestData.OutVal.u = u64Res2;
8810 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8811 }
8812 if (!RT_IS_POWER_OF_TWO(fXcpt))
8813 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
8814 if (fUnmasked & fXcpt)
8815 {
8816 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
8817 uint32_t fMxcsr3 = fMxcsrIn;
8818 uint64_t u64Res3;
8819 pfn(&fMxcsr3, &u64Res3, TestVal.u);
8820 TestData.fMxcsrIn = fMxcsrIn;
8821 TestData.fMxcsrOut = fMxcsr3;
8822 TestData.OutVal.u = u64Res3;
8823 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8824 }
8825 }
8826 }
8827 }
8828 rc = RTStrmClose(pStrmOut);
8829 if (RT_FAILURE(rc))
8830 {
8831 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseConvertMmI32XmmR32[iFn].pszName, rc);
8832 return RTEXITCODE_FAILURE;
8833 }
8834 }
8835
8836 return RTEXITCODE_SUCCESS;
8837}
8838#endif
8839
8840static void SseConvertMmI32XmmR32Test(void)
8841{
8842 X86FXSTATE State;
8843 RT_ZERO(State);
8844
8845 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertMmI32XmmR32); iFn++)
8846 {
8847 if (!SubTestAndCheckIfEnabled(g_aSseConvertMmI32XmmR32[iFn].pszName))
8848 continue;
8849
8850 uint32_t const cTests = *g_aSseConvertMmI32XmmR32[iFn].pcTests;
8851 SSE_CONVERT_MM_R32_TEST_T const * const paTests = g_aSseConvertMmI32XmmR32[iFn].paTests;
8852 PFNIEMAIMPLMXCSRU64U64 pfn = g_aSseConvertMmI32XmmR32[iFn].pfn;
8853 uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertMmI32XmmR32[iFn]);
8854 if (!cTests) RTTestSkipped(g_hTest, "no tests");
8855 for (uint32_t iVar = 0; iVar < cVars; iVar++)
8856 {
8857 for (uint32_t iTest = 0; iTest < cTests / sizeof(*paTests); iTest++)
8858 {
8859 RTUINT64U ValOut;
8860 RTUINT64U ValIn;
8861
8862 ValIn.au32[0] = paTests[iTest].ar32InVal[0].u;
8863 ValIn.au32[1] = paTests[iTest].ar32InVal[1].u;
8864
8865 uint32_t fMxcsr = paTests[iTest].fMxcsrIn;
8866 pfn(&fMxcsr, &ValOut.u, ValIn.u);
8867 if ( fMxcsr != paTests[iTest].fMxcsrOut
8868 || ValOut.ai32[0] != paTests[iTest].OutVal.ai32[0]
8869 || ValOut.ai32[1] != paTests[iTest].OutVal.ai32[1])
8870 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s \n"
8871 "%s -> mxcsr=%#08x %RI32'%RI32\n"
8872 "%s expected %#08x %RI32'%RI32%s%s (%s)\n",
8873 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
8874 FormatR32(&paTests[iTest].ar32InVal[0]), FormatR32(&paTests[iTest].ar32InVal[1]),
8875 iVar ? " " : "", fMxcsr,
8876 ValOut.ai32[0], ValOut.ai32[1],
8877 iVar ? " " : "", paTests[iTest].fMxcsrOut,
8878 paTests[iTest].OutVal.ai32[0], paTests[iTest].OutVal.ai32[1],
8879 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
8880 ( ValOut.ai32[0] != paTests[iTest].OutVal.ai32[0]
8881 || ValOut.ai32[1] != paTests[iTest].OutVal.ai32[1])
8882 ? " - val" : "",
8883 FormatMxcsr(paTests[iTest].fMxcsrIn));
8884 }
8885 }
8886 }
8887}
8888
8889
8890/*
8891 * SSE 4.2 pcmpxstrx instructions.
8892 */
8893TYPEDEF_SUBTEST_TYPE(SSE_PCMPISTRI_T, SSE_PCMPISTRI_TEST_T, PFNIEMAIMPLPCMPISTRIU128IMM8);
8894
8895static const SSE_PCMPISTRI_T g_aSsePcmpistri[] =
8896{
8897 ENTRY_BIN_SSE_OPT(pcmpistri_u128),
8898};
8899
8900#ifdef TSTIEMAIMPL_WITH_GENERATOR
8901static RTEXITCODE SseComparePcmpistriGenerate(const char *pszDataFileFmt, uint32_t cTests)
8902{
8903 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
8904
8905 static struct { RTUINT128U uSrc1; RTUINT128U uSrc2; } const s_aSpecials[] =
8906 {
8907 { RTUINT128_INIT_C(0, 0), RTUINT128_INIT_C(0, 0) },
8908 /** @todo More specials. */
8909 };
8910
8911 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSsePcmpistri); iFn++)
8912 {
8913 PFNIEMAIMPLPCMPISTRIU128IMM8 const pfn = g_aSsePcmpistri[iFn].pfnNative ? g_aSsePcmpistri[iFn].pfnNative : g_aSsePcmpistri[iFn].pfn;
8914
8915 PRTSTREAM pStrmOut = NULL;
8916 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSsePcmpistri[iFn].pszName);
8917 if (RT_FAILURE(rc))
8918 {
8919 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSsePcmpistri[iFn].pszName, rc);
8920 return RTEXITCODE_FAILURE;
8921 }
8922
8923 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
8924 {
8925 SSE_PCMPISTRI_TEST_T TestData; RT_ZERO(TestData);
8926
8927 TestData.InVal1.uXmm = iTest < cTests ? RandU128() : s_aSpecials[iTest - cTests].uSrc1;
8928 TestData.InVal2.uXmm = iTest < cTests ? RandU128() : s_aSpecials[iTest - cTests].uSrc2;
8929
8930 IEMPCMPISTRXSRC TestVal;
8931 TestVal.uSrc1 = TestData.InVal1.uXmm;
8932 TestVal.uSrc2 = TestData.InVal2.uXmm;
8933
8934 uint32_t const fEFlagsIn = RandEFlags();
8935 for (uint16_t u16Imm = 0; u16Imm < 256; u16Imm++)
8936 {
8937 uint32_t fEFlagsOut = fEFlagsIn;
8938 pfn(&TestData.u32EcxOut, &fEFlagsOut, &TestVal, (uint8_t)u16Imm);
8939 TestData.fEFlagsIn = fEFlagsIn;
8940 TestData.fEFlagsOut = fEFlagsOut;
8941 TestData.bImm = (uint8_t)u16Imm;
8942 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8943 }
8944
8945 /* Repeat the test with the input value being the same. */
8946 TestData.InVal2.uXmm = TestData.InVal1.uXmm;
8947 TestVal.uSrc1 = TestData.InVal1.uXmm;
8948 TestVal.uSrc2 = TestData.InVal2.uXmm;
8949
8950 for (uint16_t u16Imm = 0; u16Imm < 256; u16Imm++)
8951 {
8952 uint32_t fEFlagsOut = fEFlagsIn;
8953 pfn(&TestData.u32EcxOut, &fEFlagsOut, &TestVal, (uint8_t)u16Imm);
8954 TestData.fEFlagsIn = fEFlagsIn;
8955 TestData.fEFlagsOut = fEFlagsOut;
8956 TestData.bImm = (uint8_t)u16Imm;
8957 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8958 }
8959 }
8960 rc = RTStrmClose(pStrmOut);
8961 if (RT_FAILURE(rc))
8962 {
8963 RTMsgError("Failed to close data file for %s: %Rrc", g_aSsePcmpistri[iFn].pszName, rc);
8964 return RTEXITCODE_FAILURE;
8965 }
8966 }
8967
8968 return RTEXITCODE_SUCCESS;
8969}
8970#endif
8971
8972static void SseComparePcmpistriTest(void)
8973{
8974 X86FXSTATE State;
8975 RT_ZERO(State);
8976
8977 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSsePcmpistri); iFn++)
8978 {
8979 if (!SubTestAndCheckIfEnabled(g_aSsePcmpistri[iFn].pszName))
8980 continue;
8981
8982 uint32_t const cTests = *g_aSsePcmpistri[iFn].pcTests;
8983 SSE_PCMPISTRI_TEST_T const * const paTests = g_aSsePcmpistri[iFn].paTests;
8984 PFNIEMAIMPLPCMPISTRIU128IMM8 pfn = g_aSsePcmpistri[iFn].pfn;
8985 uint32_t const cVars = COUNT_VARIATIONS(g_aSsePcmpistri[iFn]);
8986 if (!cTests) RTTestSkipped(g_hTest, "no tests");
8987 for (uint32_t iVar = 0; iVar < cVars; iVar++)
8988 {
8989 for (uint32_t iTest = 0; iTest < cTests / sizeof(*paTests); iTest++)
8990 {
8991 IEMPCMPISTRXSRC TestVal;
8992 TestVal.uSrc1 = paTests[iTest].InVal1.uXmm;
8993 TestVal.uSrc2 = paTests[iTest].InVal2.uXmm;
8994
8995 uint32_t fEFlags = paTests[iTest].fEFlagsIn;
8996 uint32_t u32EcxOut = 0;
8997 pfn(&u32EcxOut, &fEFlags, &TestVal, paTests[iTest].bImm);
8998 if ( fEFlags != paTests[iTest].fEFlagsOut
8999 || u32EcxOut != paTests[iTest].u32EcxOut)
9000 RTTestFailed(g_hTest, "#%04u%s: efl=%#08x in1=%s in2=%s bImm=%#x\n"
9001 "%s -> efl=%#08x %RU32\n"
9002 "%s expected %#08x %RU32%s%s\n",
9003 iTest, iVar ? "/n" : "", paTests[iTest].fEFlagsIn,
9004 FormatU128(&paTests[iTest].InVal1.uXmm), FormatU128(&paTests[iTest].InVal2.uXmm), paTests[iTest].bImm,
9005 iVar ? " " : "", fEFlags, u32EcxOut,
9006 iVar ? " " : "", paTests[iTest].fEFlagsOut, paTests[iTest].u32EcxOut,
9007 EFlagsDiff(fEFlags, paTests[iTest].fEFlagsOut),
9008 (u32EcxOut != paTests[iTest].u32EcxOut) ? " - val" : "");
9009 }
9010 }
9011 }
9012}
9013
9014
9015TYPEDEF_SUBTEST_TYPE(SSE_PCMPISTRM_T, SSE_PCMPISTRM_TEST_T, PFNIEMAIMPLPCMPISTRMU128IMM8);
9016
9017static const SSE_PCMPISTRM_T g_aSsePcmpistrm[] =
9018{
9019 ENTRY_BIN_SSE_OPT(pcmpistrm_u128),
9020};
9021
9022#ifdef TSTIEMAIMPL_WITH_GENERATOR
9023static RTEXITCODE SseComparePcmpistrmGenerate(const char *pszDataFileFmt, uint32_t cTests)
9024{
9025 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
9026
9027 static struct { RTUINT128U uSrc1; RTUINT128U uSrc2; } const s_aSpecials[] =
9028 {
9029 { RTUINT128_INIT_C(0, 0), RTUINT128_INIT_C(0, 0) },
9030 /** @todo More specials. */
9031 };
9032
9033 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSsePcmpistrm); iFn++)
9034 {
9035 PFNIEMAIMPLPCMPISTRMU128IMM8 const pfn = g_aSsePcmpistrm[iFn].pfnNative ? g_aSsePcmpistrm[iFn].pfnNative : g_aSsePcmpistrm[iFn].pfn;
9036
9037 PRTSTREAM pStrmOut = NULL;
9038 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSsePcmpistrm[iFn].pszName);
9039 if (RT_FAILURE(rc))
9040 {
9041 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSsePcmpistrm[iFn].pszName, rc);
9042 return RTEXITCODE_FAILURE;
9043 }
9044
9045 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
9046 {
9047 SSE_PCMPISTRM_TEST_T TestData; RT_ZERO(TestData);
9048
9049 TestData.InVal1.uXmm = iTest < cTests ? RandU128() : s_aSpecials[iTest - cTests].uSrc1;
9050 TestData.InVal2.uXmm = iTest < cTests ? RandU128() : s_aSpecials[iTest - cTests].uSrc2;
9051
9052 IEMPCMPISTRXSRC TestVal;
9053 TestVal.uSrc1 = TestData.InVal1.uXmm;
9054 TestVal.uSrc2 = TestData.InVal2.uXmm;
9055
9056 uint32_t const fEFlagsIn = RandEFlags();
9057 for (uint16_t u16Imm = 0; u16Imm < 256; u16Imm++)
9058 {
9059 uint32_t fEFlagsOut = fEFlagsIn;
9060 pfn(&TestData.OutVal.uXmm, &fEFlagsOut, &TestVal, (uint8_t)u16Imm);
9061 TestData.fEFlagsIn = fEFlagsIn;
9062 TestData.fEFlagsOut = fEFlagsOut;
9063 TestData.bImm = (uint8_t)u16Imm;
9064 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
9065 }
9066
9067 /* Repeat the test with the input value being the same. */
9068 TestData.InVal2.uXmm = TestData.InVal1.uXmm;
9069 TestVal.uSrc1 = TestData.InVal1.uXmm;
9070 TestVal.uSrc2 = TestData.InVal2.uXmm;
9071
9072 for (uint16_t u16Imm = 0; u16Imm < 256; u16Imm++)
9073 {
9074 uint32_t fEFlagsOut = fEFlagsIn;
9075 pfn(&TestData.OutVal.uXmm, &fEFlagsOut, &TestVal, (uint8_t)u16Imm);
9076 TestData.fEFlagsIn = fEFlagsIn;
9077 TestData.fEFlagsOut = fEFlagsOut;
9078 TestData.bImm = (uint8_t)u16Imm;
9079 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
9080 }
9081 }
9082 rc = RTStrmClose(pStrmOut);
9083 if (RT_FAILURE(rc))
9084 {
9085 RTMsgError("Failed to close data file for %s: %Rrc", g_aSsePcmpistrm[iFn].pszName, rc);
9086 return RTEXITCODE_FAILURE;
9087 }
9088 }
9089
9090 return RTEXITCODE_SUCCESS;
9091}
9092#endif
9093
9094static void SseComparePcmpistrmTest(void)
9095{
9096 X86FXSTATE State;
9097 RT_ZERO(State);
9098
9099 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSsePcmpistrm); iFn++)
9100 {
9101 if (!SubTestAndCheckIfEnabled(g_aSsePcmpistrm[iFn].pszName))
9102 continue;
9103
9104 uint32_t const cTests = *g_aSsePcmpistrm[iFn].pcTests;
9105 SSE_PCMPISTRM_TEST_T const * const paTests = g_aSsePcmpistrm[iFn].paTests;
9106 PFNIEMAIMPLPCMPISTRMU128IMM8 pfn = g_aSsePcmpistrm[iFn].pfn;
9107 uint32_t const cVars = COUNT_VARIATIONS(g_aSsePcmpistrm[iFn]);
9108 if (!cTests) RTTestSkipped(g_hTest, "no tests");
9109 for (uint32_t iVar = 0; iVar < cVars; iVar++)
9110 {
9111 for (uint32_t iTest = 0; iTest < cTests / sizeof(*paTests); iTest++)
9112 {
9113 IEMPCMPISTRXSRC TestVal;
9114 TestVal.uSrc1 = paTests[iTest].InVal1.uXmm;
9115 TestVal.uSrc2 = paTests[iTest].InVal2.uXmm;
9116
9117 uint32_t fEFlags = paTests[iTest].fEFlagsIn;
9118 RTUINT128U OutVal;
9119 pfn(&OutVal, &fEFlags, &TestVal, paTests[iTest].bImm);
9120 if ( fEFlags != paTests[iTest].fEFlagsOut
9121 || OutVal.s.Hi != paTests[iTest].OutVal.uXmm.s.Hi
9122 || OutVal.s.Lo != paTests[iTest].OutVal.uXmm.s.Lo)
9123 RTTestFailed(g_hTest, "#%04u%s: efl=%#08x in1=%s in2=%s bImm=%#x\n"
9124 "%s -> efl=%#08x %s\n"
9125 "%s expected %#08x %s%s%s\n",
9126 iTest, iVar ? "/n" : "", paTests[iTest].fEFlagsIn,
9127 FormatU128(&paTests[iTest].InVal1.uXmm), FormatU128(&paTests[iTest].InVal2.uXmm), paTests[iTest].bImm,
9128 iVar ? " " : "", fEFlags, FormatU128(&OutVal),
9129 iVar ? " " : "", paTests[iTest].fEFlagsOut, FormatU128(&paTests[iTest].OutVal.uXmm),
9130 EFlagsDiff(fEFlags, paTests[iTest].fEFlagsOut),
9131 ( OutVal.s.Hi != paTests[iTest].OutVal.uXmm.s.Hi
9132 || OutVal.s.Lo != paTests[iTest].OutVal.uXmm.s.Lo) ? " - val" : "");
9133 }
9134 }
9135 }
9136}
9137
9138
9139TYPEDEF_SUBTEST_TYPE(SSE_PCMPESTRI_T, SSE_PCMPESTRI_TEST_T, PFNIEMAIMPLPCMPESTRIU128IMM8);
9140
9141static const SSE_PCMPESTRI_T g_aSsePcmpestri[] =
9142{
9143 ENTRY_BIN_SSE_OPT(pcmpestri_u128),
9144};
9145
9146#ifdef TSTIEMAIMPL_WITH_GENERATOR
9147static RTEXITCODE SseComparePcmpestriGenerate(const char *pszDataFileFmt, uint32_t cTests)
9148{
9149 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
9150
9151 static struct { RTUINT128U uSrc1; RTUINT128U uSrc2; } const s_aSpecials[] =
9152 {
9153 { RTUINT128_INIT_C(0, 0), RTUINT128_INIT_C(0, 0) },
9154 /** @todo More specials. */
9155 };
9156
9157 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSsePcmpestri); iFn++)
9158 {
9159 PFNIEMAIMPLPCMPESTRIU128IMM8 const pfn = g_aSsePcmpestri[iFn].pfnNative ? g_aSsePcmpestri[iFn].pfnNative : g_aSsePcmpestri[iFn].pfn;
9160
9161 PRTSTREAM pStrmOut = NULL;
9162 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSsePcmpestri[iFn].pszName);
9163 if (RT_FAILURE(rc))
9164 {
9165 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSsePcmpestri[iFn].pszName, rc);
9166 return RTEXITCODE_FAILURE;
9167 }
9168
9169 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
9170 {
9171 SSE_PCMPESTRI_TEST_T TestData; RT_ZERO(TestData);
9172
9173 TestData.InVal1.uXmm = iTest < cTests ? RandU128() : s_aSpecials[iTest - cTests].uSrc1;
9174 TestData.InVal2.uXmm = iTest < cTests ? RandU128() : s_aSpecials[iTest - cTests].uSrc2;
9175
9176 for (int64_t i64Rax = -20; i64Rax < 20; i64Rax += 20)
9177 for (int64_t i64Rdx = -20; i64Rdx < 20; i64Rdx += 20)
9178 {
9179 TestData.u64Rax = (uint64_t)i64Rax;
9180 TestData.u64Rdx = (uint64_t)i64Rdx;
9181
9182 IEMPCMPESTRXSRC TestVal;
9183 TestVal.uSrc1 = TestData.InVal1.uXmm;
9184 TestVal.uSrc2 = TestData.InVal2.uXmm;
9185 TestVal.u64Rax = TestData.u64Rax;
9186 TestVal.u64Rdx = TestData.u64Rdx;
9187
9188 uint32_t const fEFlagsIn = RandEFlags();
9189 for (uint16_t u16Imm = 0; u16Imm < 256; u16Imm++)
9190 {
9191 uint32_t fEFlagsOut = fEFlagsIn;
9192 pfn(&TestData.u32EcxOut, &fEFlagsOut, &TestVal, (uint8_t)u16Imm);
9193 TestData.fEFlagsIn = fEFlagsIn;
9194 TestData.fEFlagsOut = fEFlagsOut;
9195 TestData.bImm = (uint8_t)u16Imm;
9196 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
9197 }
9198
9199 /* Repeat the test with the input value being the same. */
9200 TestData.InVal2.uXmm = TestData.InVal1.uXmm;
9201 TestVal.uSrc1 = TestData.InVal1.uXmm;
9202 TestVal.uSrc2 = TestData.InVal2.uXmm;
9203
9204 for (uint16_t u16Imm = 0; u16Imm < 256; u16Imm++)
9205 {
9206 uint32_t fEFlagsOut = fEFlagsIn;
9207 pfn(&TestData.u32EcxOut, &fEFlagsOut, &TestVal, (uint8_t)u16Imm);
9208 TestData.fEFlagsIn = fEFlagsIn;
9209 TestData.fEFlagsOut = fEFlagsOut;
9210 TestData.bImm = (uint8_t)u16Imm;
9211 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
9212 }
9213 }
9214 }
9215 rc = RTStrmClose(pStrmOut);
9216 if (RT_FAILURE(rc))
9217 {
9218 RTMsgError("Failed to close data file for %s: %Rrc", g_aSsePcmpestri[iFn].pszName, rc);
9219 return RTEXITCODE_FAILURE;
9220 }
9221 }
9222
9223 return RTEXITCODE_SUCCESS;
9224}
9225#endif
9226
9227static void SseComparePcmpestriTest(void)
9228{
9229 X86FXSTATE State;
9230 RT_ZERO(State);
9231
9232 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSsePcmpestri); iFn++)
9233 {
9234 if (!SubTestAndCheckIfEnabled(g_aSsePcmpestri[iFn].pszName))
9235 continue;
9236
9237 uint32_t const cTests = *g_aSsePcmpestri[iFn].pcTests;
9238 SSE_PCMPESTRI_TEST_T const * const paTests = g_aSsePcmpestri[iFn].paTests;
9239 PFNIEMAIMPLPCMPESTRIU128IMM8 pfn = g_aSsePcmpestri[iFn].pfn;
9240 uint32_t const cVars = COUNT_VARIATIONS(g_aSsePcmpestri[iFn]);
9241 if (!cTests) RTTestSkipped(g_hTest, "no tests");
9242 for (uint32_t iVar = 0; iVar < cVars; iVar++)
9243 {
9244 for (uint32_t iTest = 0; iTest < cTests / sizeof(*paTests); iTest++)
9245 {
9246 IEMPCMPESTRXSRC TestVal;
9247 TestVal.uSrc1 = paTests[iTest].InVal1.uXmm;
9248 TestVal.uSrc2 = paTests[iTest].InVal2.uXmm;
9249 TestVal.u64Rax = paTests[iTest].u64Rax;
9250 TestVal.u64Rdx = paTests[iTest].u64Rdx;
9251
9252 uint32_t fEFlags = paTests[iTest].fEFlagsIn;
9253 uint32_t u32EcxOut = 0;
9254 pfn(&u32EcxOut, &fEFlags, &TestVal, paTests[iTest].bImm);
9255 if ( fEFlags != paTests[iTest].fEFlagsOut
9256 || u32EcxOut != paTests[iTest].u32EcxOut)
9257 RTTestFailed(g_hTest, "#%04u%s: efl=%#08x in1=%s rax1=%RI64 in2=%s rdx2=%RI64 bImm=%#x\n"
9258 "%s -> efl=%#08x %RU32\n"
9259 "%s expected %#08x %RU32%s%s\n",
9260 iTest, iVar ? "/n" : "", paTests[iTest].fEFlagsIn,
9261 FormatU128(&paTests[iTest].InVal1.uXmm), paTests[iTest].u64Rax,
9262 FormatU128(&paTests[iTest].InVal2.uXmm), paTests[iTest].u64Rdx,
9263 paTests[iTest].bImm,
9264 iVar ? " " : "", fEFlags, u32EcxOut,
9265 iVar ? " " : "", paTests[iTest].fEFlagsOut, paTests[iTest].u32EcxOut,
9266 EFlagsDiff(fEFlags, paTests[iTest].fEFlagsOut),
9267 (u32EcxOut != paTests[iTest].u32EcxOut) ? " - val" : "");
9268 }
9269 }
9270 }
9271}
9272
9273
9274TYPEDEF_SUBTEST_TYPE(SSE_PCMPESTRM_T, SSE_PCMPESTRM_TEST_T, PFNIEMAIMPLPCMPESTRMU128IMM8);
9275
9276static const SSE_PCMPESTRM_T g_aSsePcmpestrm[] =
9277{
9278 ENTRY_BIN_SSE_OPT(pcmpestrm_u128),
9279};
9280
9281#ifdef TSTIEMAIMPL_WITH_GENERATOR
9282static RTEXITCODE SseComparePcmpestrmGenerate(const char *pszDataFileFmt, uint32_t cTests)
9283{
9284 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
9285
9286 static struct { RTUINT128U uSrc1; RTUINT128U uSrc2; } const s_aSpecials[] =
9287 {
9288 { RTUINT128_INIT_C(0, 0), RTUINT128_INIT_C(0, 0) },
9289 /** @todo More specials. */
9290 };
9291
9292 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSsePcmpestrm); iFn++)
9293 {
9294 PFNIEMAIMPLPCMPESTRMU128IMM8 const pfn = g_aSsePcmpestrm[iFn].pfnNative ? g_aSsePcmpestrm[iFn].pfnNative : g_aSsePcmpestrm[iFn].pfn;
9295
9296 PRTSTREAM pStrmOut = NULL;
9297 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSsePcmpestrm[iFn].pszName);
9298 if (RT_FAILURE(rc))
9299 {
9300 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSsePcmpestrm[iFn].pszName, rc);
9301 return RTEXITCODE_FAILURE;
9302 }
9303
9304 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
9305 {
9306 SSE_PCMPESTRM_TEST_T TestData; RT_ZERO(TestData);
9307
9308 TestData.InVal1.uXmm = iTest < cTests ? RandU128() : s_aSpecials[iTest - cTests].uSrc1;
9309 TestData.InVal2.uXmm = iTest < cTests ? RandU128() : s_aSpecials[iTest - cTests].uSrc2;
9310
9311 for (int64_t i64Rax = -20; i64Rax < 20; i64Rax += 20)
9312 for (int64_t i64Rdx = -20; i64Rdx < 20; i64Rdx += 20)
9313 {
9314 TestData.u64Rax = (uint64_t)i64Rax;
9315 TestData.u64Rdx = (uint64_t)i64Rdx;
9316
9317 IEMPCMPESTRXSRC TestVal;
9318 TestVal.uSrc1 = TestData.InVal1.uXmm;
9319 TestVal.uSrc2 = TestData.InVal2.uXmm;
9320 TestVal.u64Rax = TestData.u64Rax;
9321 TestVal.u64Rdx = TestData.u64Rdx;
9322
9323 uint32_t const fEFlagsIn = RandEFlags();
9324 for (uint16_t u16Imm = 0; u16Imm < 256; u16Imm++)
9325 {
9326 uint32_t fEFlagsOut = fEFlagsIn;
9327 pfn(&TestData.OutVal.uXmm, &fEFlagsOut, &TestVal, (uint8_t)u16Imm);
9328 TestData.fEFlagsIn = fEFlagsIn;
9329 TestData.fEFlagsOut = fEFlagsOut;
9330 TestData.bImm = (uint8_t)u16Imm;
9331 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
9332 }
9333
9334 /* Repeat the test with the input value being the same. */
9335 TestData.InVal2.uXmm = TestData.InVal1.uXmm;
9336 TestVal.uSrc1 = TestData.InVal1.uXmm;
9337 TestVal.uSrc2 = TestData.InVal2.uXmm;
9338
9339 for (uint16_t u16Imm = 0; u16Imm < 256; u16Imm++)
9340 {
9341 uint32_t fEFlagsOut = fEFlagsIn;
9342 pfn(&TestData.OutVal.uXmm, &fEFlagsOut, &TestVal, (uint8_t)u16Imm);
9343 TestData.fEFlagsIn = fEFlagsIn;
9344 TestData.fEFlagsOut = fEFlagsOut;
9345 TestData.bImm = (uint8_t)u16Imm;
9346 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
9347 }
9348 }
9349 }
9350 rc = RTStrmClose(pStrmOut);
9351 if (RT_FAILURE(rc))
9352 {
9353 RTMsgError("Failed to close data file for %s: %Rrc", g_aSsePcmpestrm[iFn].pszName, rc);
9354 return RTEXITCODE_FAILURE;
9355 }
9356 }
9357
9358 return RTEXITCODE_SUCCESS;
9359}
9360#endif
9361
9362static void SseComparePcmpestrmTest(void)
9363{
9364 X86FXSTATE State;
9365 RT_ZERO(State);
9366
9367 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSsePcmpestrm); iFn++)
9368 {
9369 if (!SubTestAndCheckIfEnabled(g_aSsePcmpestrm[iFn].pszName))
9370 continue;
9371
9372 uint32_t const cTests = *g_aSsePcmpestrm[iFn].pcTests;
9373 SSE_PCMPESTRM_TEST_T const * const paTests = g_aSsePcmpestrm[iFn].paTests;
9374 PFNIEMAIMPLPCMPESTRMU128IMM8 pfn = g_aSsePcmpestrm[iFn].pfn;
9375 uint32_t const cVars = COUNT_VARIATIONS(g_aSsePcmpestrm[iFn]);
9376 if (!cTests) RTTestSkipped(g_hTest, "no tests");
9377 for (uint32_t iVar = 0; iVar < cVars; iVar++)
9378 {
9379 for (uint32_t iTest = 0; iTest < cTests / sizeof(*paTests); iTest++)
9380 {
9381 IEMPCMPESTRXSRC TestVal;
9382 TestVal.uSrc1 = paTests[iTest].InVal1.uXmm;
9383 TestVal.uSrc2 = paTests[iTest].InVal2.uXmm;
9384 TestVal.u64Rax = paTests[iTest].u64Rax;
9385 TestVal.u64Rdx = paTests[iTest].u64Rdx;
9386
9387 uint32_t fEFlags = paTests[iTest].fEFlagsIn;
9388 RTUINT128U OutVal;
9389 pfn(&OutVal, &fEFlags, &TestVal, paTests[iTest].bImm);
9390 if ( fEFlags != paTests[iTest].fEFlagsOut
9391 || OutVal.s.Hi != paTests[iTest].OutVal.uXmm.s.Hi
9392 || OutVal.s.Lo != paTests[iTest].OutVal.uXmm.s.Lo)
9393 RTTestFailed(g_hTest, "#%04u%s: efl=%#08x in1=%s rax1=%RI64 in2=%s rdx2=%RI64 bImm=%#x\n"
9394 "%s -> efl=%#08x %s\n"
9395 "%s expected %#08x %s%s%s\n",
9396 iTest, iVar ? "/n" : "", paTests[iTest].fEFlagsIn,
9397 FormatU128(&paTests[iTest].InVal1.uXmm), paTests[iTest].u64Rax,
9398 FormatU128(&paTests[iTest].InVal2.uXmm), paTests[iTest].u64Rdx,
9399 paTests[iTest].bImm,
9400 iVar ? " " : "", fEFlags, FormatU128(&OutVal),
9401 iVar ? " " : "", paTests[iTest].fEFlagsOut, FormatU128(&paTests[iTest].OutVal.uXmm),
9402 EFlagsDiff(fEFlags, paTests[iTest].fEFlagsOut),
9403 ( OutVal.s.Hi != paTests[iTest].OutVal.uXmm.s.Hi
9404 || OutVal.s.Lo != paTests[iTest].OutVal.uXmm.s.Lo) ? " - val" : "");
9405 }
9406 }
9407 }
9408}
9409
9410
9411
9412int main(int argc, char **argv)
9413{
9414 int rc = RTR3InitExe(argc, &argv, 0);
9415 if (RT_FAILURE(rc))
9416 return RTMsgInitFailure(rc);
9417
9418 /*
9419 * Determin the host CPU.
9420 * If not using the IEMAllAImpl.asm code, this will be set to Intel.
9421 */
9422#if (defined(RT_ARCH_X86) || defined(RT_ARCH_AMD64)) && !defined(IEM_WITHOUT_ASSEMBLY)
9423 g_idxCpuEflFlavour = ASMIsAmdCpu() || ASMIsHygonCpu()
9424 ? IEMTARGETCPU_EFL_BEHAVIOR_AMD
9425 : IEMTARGETCPU_EFL_BEHAVIOR_INTEL;
9426#else
9427 g_idxCpuEflFlavour = IEMTARGETCPU_EFL_BEHAVIOR_INTEL;
9428#endif
9429
9430 /*
9431 * Parse arguments.
9432 */
9433 enum { kModeNotSet, kModeTest, kModeGenerate }
9434 enmMode = kModeNotSet;
9435 bool fInt = true;
9436 bool fFpuLdSt = true;
9437 bool fFpuBinary1 = true;
9438 bool fFpuBinary2 = true;
9439 bool fFpuOther = true;
9440 bool fCpuData = true;
9441 bool fCommonData = true;
9442 bool fSseFpBinary = true;
9443 bool fSseFpOther = true;
9444 bool fSsePcmpxstrx = true;
9445 uint32_t const cDefaultTests = 96;
9446 uint32_t cTests = cDefaultTests;
9447 RTGETOPTDEF const s_aOptions[] =
9448 {
9449 // mode:
9450 { "--generate", 'g', RTGETOPT_REQ_NOTHING },
9451 { "--test", 't', RTGETOPT_REQ_NOTHING },
9452 { "--benchmark", 'b', RTGETOPT_REQ_NOTHING },
9453 // test selection (both)
9454 { "--all", 'a', RTGETOPT_REQ_NOTHING },
9455 { "--none", 'z', RTGETOPT_REQ_NOTHING },
9456 { "--zap", 'z', RTGETOPT_REQ_NOTHING },
9457 { "--fpu-ld-st", 'F', RTGETOPT_REQ_NOTHING }, /* FPU stuff is upper case */
9458 { "--fpu-load-store", 'F', RTGETOPT_REQ_NOTHING },
9459 { "--fpu-binary-1", 'B', RTGETOPT_REQ_NOTHING },
9460 { "--fpu-binary-2", 'P', RTGETOPT_REQ_NOTHING },
9461 { "--fpu-other", 'O', RTGETOPT_REQ_NOTHING },
9462 { "--sse-fp-binary", 'S', RTGETOPT_REQ_NOTHING },
9463 { "--sse-fp-other", 'T', RTGETOPT_REQ_NOTHING },
9464 { "--sse-pcmpxstrx", 'C', RTGETOPT_REQ_NOTHING },
9465 { "--int", 'i', RTGETOPT_REQ_NOTHING },
9466 { "--include", 'I', RTGETOPT_REQ_STRING },
9467 { "--exclude", 'X', RTGETOPT_REQ_STRING },
9468 // generation parameters
9469 { "--common", 'm', RTGETOPT_REQ_NOTHING },
9470 { "--cpu", 'c', RTGETOPT_REQ_NOTHING },
9471 { "--number-of-tests", 'n', RTGETOPT_REQ_UINT32 },
9472 { "--verbose", 'v', RTGETOPT_REQ_NOTHING },
9473 { "--quiet", 'q', RTGETOPT_REQ_NOTHING },
9474 };
9475
9476 RTGETOPTSTATE State;
9477 rc = RTGetOptInit(&State, argc, argv, s_aOptions, RT_ELEMENTS(s_aOptions), 1, 0);
9478 AssertRCReturn(rc, RTEXITCODE_FAILURE);
9479
9480 RTGETOPTUNION ValueUnion;
9481 while ((rc = RTGetOpt(&State, &ValueUnion)))
9482 {
9483 switch (rc)
9484 {
9485 case 'g':
9486 enmMode = kModeGenerate;
9487 g_cPicoSecBenchmark = 0;
9488 break;
9489 case 't':
9490 enmMode = kModeTest;
9491 g_cPicoSecBenchmark = 0;
9492 break;
9493 case 'b':
9494 enmMode = kModeTest;
9495 g_cPicoSecBenchmark += RT_NS_1SEC / 2 * UINT64_C(1000); /* half a second in pico seconds */
9496 break;
9497
9498 case 'a':
9499 fCpuData = true;
9500 fCommonData = true;
9501 fInt = true;
9502 fFpuLdSt = true;
9503 fFpuBinary1 = true;
9504 fFpuBinary2 = true;
9505 fFpuOther = true;
9506 fSseFpBinary = true;
9507 fSseFpOther = true;
9508 fSsePcmpxstrx = true;
9509 break;
9510 case 'z':
9511 fCpuData = false;
9512 fCommonData = false;
9513 fInt = false;
9514 fFpuLdSt = false;
9515 fFpuBinary1 = false;
9516 fFpuBinary2 = false;
9517 fFpuOther = false;
9518 fSseFpBinary = false;
9519 fSseFpOther = false;
9520 fSsePcmpxstrx = false;
9521 break;
9522
9523 case 'F':
9524 fFpuLdSt = true;
9525 break;
9526 case 'O':
9527 fFpuOther = true;
9528 break;
9529 case 'B':
9530 fFpuBinary1 = true;
9531 break;
9532 case 'P':
9533 fFpuBinary2 = true;
9534 break;
9535 case 'S':
9536 fSseFpBinary = true;
9537 break;
9538 case 'T':
9539 fSseFpOther = true;
9540 break;
9541 case 'C':
9542 fSsePcmpxstrx = true;
9543 break;
9544 case 'i':
9545 fInt = true;
9546 break;
9547
9548 case 'I':
9549 if (g_cIncludeTestPatterns >= RT_ELEMENTS(g_apszIncludeTestPatterns))
9550 return RTMsgErrorExit(RTEXITCODE_SYNTAX, "Too many include patterns (max %zu)",
9551 RT_ELEMENTS(g_apszIncludeTestPatterns));
9552 g_apszIncludeTestPatterns[g_cIncludeTestPatterns++] = ValueUnion.psz;
9553 break;
9554 case 'X':
9555 if (g_cExcludeTestPatterns >= RT_ELEMENTS(g_apszExcludeTestPatterns))
9556 return RTMsgErrorExit(RTEXITCODE_SYNTAX, "Too many exclude patterns (max %zu)",
9557 RT_ELEMENTS(g_apszExcludeTestPatterns));
9558 g_apszExcludeTestPatterns[g_cExcludeTestPatterns++] = ValueUnion.psz;
9559 break;
9560
9561 case 'm':
9562 fCommonData = true;
9563 break;
9564 case 'c':
9565 fCpuData = true;
9566 break;
9567 case 'n':
9568 cTests = ValueUnion.u32;
9569 break;
9570
9571 case 'q':
9572 g_cVerbosity = 0;
9573 break;
9574 case 'v':
9575 g_cVerbosity++;
9576 break;
9577
9578 case 'h':
9579 RTPrintf("usage: %s <-g|-t> [options]\n"
9580 "\n"
9581 "Mode:\n"
9582 " -g, --generate\n"
9583 " Generate test data.\n"
9584 " -t, --test\n"
9585 " Execute tests.\n"
9586 " -b, --benchmark\n"
9587 " Execute tests and do 1/2 seconds of benchmarking.\n"
9588 " Repeating the option increases the benchmark duration by 0.5 seconds.\n"
9589 "\n"
9590 "Test selection (both modes):\n"
9591 " -a, --all\n"
9592 " Enable all tests and generated test data. (default)\n"
9593 " -z, --zap, --none\n"
9594 " Disable all tests and test data types.\n"
9595 " -i, --int\n"
9596 " Enable non-FPU tests.\n"
9597 " -F, --fpu-ld-st\n"
9598 " Enable FPU load and store tests.\n"
9599 " -B, --fpu-binary-1\n"
9600 " Enable FPU binary 80-bit FP tests.\n"
9601 " -P, --fpu-binary-2\n"
9602 " Enable FPU binary 64- and 32-bit FP tests.\n"
9603 " -O, --fpu-other\n"
9604 " Enable FPU binary 64- and 32-bit FP tests.\n"
9605 " -S, --sse-fp-binary\n"
9606 " Enable SSE binary 64- and 32-bit FP tests.\n"
9607 " -T, --sse-fp-other\n"
9608 " Enable misc SSE 64- and 32-bit FP tests.\n"
9609 " -C, --sse-pcmpxstrx\n"
9610 " Enable SSE pcmpxstrx tests.\n"
9611 " -I,--include=<test-patter>\n"
9612 " Enable tests matching the given pattern.\n"
9613 " -X,--exclude=<test-patter>\n"
9614 " Skip tests matching the given pattern (overrides --include).\n"
9615 "\n"
9616 "Generation:\n"
9617 " -m, --common\n"
9618 " Enable generating common test data.\n"
9619 " -c, --only-cpu\n"
9620 " Enable generating CPU specific test data.\n"
9621 " -n, --number-of-test <count>\n"
9622 " Number of tests to generate. Default: %u\n"
9623 "\n"
9624 "Other:\n"
9625 " -v, --verbose\n"
9626 " -q, --quiet\n"
9627 " Noise level. Default: --quiet\n"
9628 , argv[0], cDefaultTests);
9629 return RTEXITCODE_SUCCESS;
9630 default:
9631 return RTGetOptPrintError(rc, &ValueUnion);
9632 }
9633 }
9634
9635 /*
9636 * Generate data?
9637 */
9638 if (enmMode == kModeGenerate)
9639 {
9640#ifdef TSTIEMAIMPL_WITH_GENERATOR
9641 char szCpuDesc[256] = {0};
9642 RTMpGetDescription(NIL_RTCPUID, szCpuDesc, sizeof(szCpuDesc));
9643 const char * const pszCpuType = g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD ? "Amd" : "Intel";
9644# if defined(RT_OS_WINDOWS) || defined(RT_OS_OS2)
9645 const char * const pszBitBucket = "NUL";
9646# else
9647 const char * const pszBitBucket = "/dev/null";
9648# endif
9649
9650 if (cTests == 0)
9651 cTests = cDefaultTests;
9652 g_cZeroDstTests = RT_MIN(cTests / 16, 32);
9653 g_cZeroSrcTests = g_cZeroDstTests * 2;
9654
9655 if (fInt)
9656 {
9657 const char *pszDataFile = fCommonData ? "tstIEMAImplDataInt.cpp" : pszBitBucket;
9658 PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
9659 const char *pszDataCpuFile = !fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
9660 ? "tstIEMAImplDataInt-Amd.cpp" : "tstIEMAImplDataInt-Intel.cpp";
9661 PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
9662 if (!pStrmData || !pStrmDataCpu)
9663 return RTEXITCODE_FAILURE;
9664
9665 BinU8Generate( pStrmData, pStrmDataCpu, cTests);
9666 BinU16Generate(pStrmData, pStrmDataCpu, cTests);
9667 BinU32Generate(pStrmData, pStrmDataCpu, cTests);
9668 BinU64Generate(pStrmData, pStrmDataCpu, cTests);
9669 ShiftDblGenerate(pStrmDataCpu, RT_MAX(cTests, 128));
9670 UnaryGenerate(pStrmData, cTests);
9671 ShiftGenerate(pStrmDataCpu, cTests);
9672 MulDivGenerate(pStrmDataCpu, cTests);
9673
9674 RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
9675 GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
9676 if (rcExit != RTEXITCODE_SUCCESS)
9677 return rcExit;
9678 }
9679
9680 if (fFpuLdSt)
9681 {
9682 const char *pszDataFile = fCommonData ? "tstIEMAImplDataFpuLdSt.cpp" : pszBitBucket;
9683 PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
9684 const char *pszDataCpuFile = !fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
9685 ? "tstIEMAImplDataFpuLdSt-Amd.cpp" : "tstIEMAImplDataFpuLdSt-Intel.cpp";
9686 PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
9687 if (!pStrmData || !pStrmDataCpu)
9688 return RTEXITCODE_FAILURE;
9689
9690 FpuLdConstGenerate(pStrmData, cTests);
9691 FpuLdIntGenerate(pStrmData, cTests);
9692 FpuLdD80Generate(pStrmData, cTests);
9693 FpuStIntGenerate(pStrmData, pStrmDataCpu, cTests);
9694 FpuStD80Generate(pStrmData, cTests);
9695 uint32_t const cTests2 = RT_MAX(cTests, 384); /* need better coverage for the next ones. */
9696 FpuLdMemGenerate(pStrmData, cTests2);
9697 FpuStMemGenerate(pStrmData, cTests2);
9698
9699 RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
9700 GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
9701 if (rcExit != RTEXITCODE_SUCCESS)
9702 return rcExit;
9703 }
9704
9705 if (fFpuBinary1)
9706 {
9707 const char *pszDataFile = fCommonData ? "tstIEMAImplDataFpuBinary1.cpp" : pszBitBucket;
9708 PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
9709 const char *pszDataCpuFile = !fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
9710 ? "tstIEMAImplDataFpuBinary1-Amd.cpp" : "tstIEMAImplDataFpuBinary1-Intel.cpp";
9711 PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
9712 if (!pStrmData || !pStrmDataCpu)
9713 return RTEXITCODE_FAILURE;
9714
9715 FpuBinaryR80Generate(pStrmData, pStrmDataCpu, cTests);
9716 FpuBinaryFswR80Generate(pStrmData, cTests);
9717 FpuBinaryEflR80Generate(pStrmData, cTests);
9718
9719 RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
9720 GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
9721 if (rcExit != RTEXITCODE_SUCCESS)
9722 return rcExit;
9723 }
9724
9725 if (fFpuBinary2)
9726 {
9727 const char *pszDataFile = fCommonData ? "tstIEMAImplDataFpuBinary2.cpp" : pszBitBucket;
9728 PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
9729 const char *pszDataCpuFile = pszBitBucket; /*!fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
9730 ? "tstIEMAImplDataFpuBinary2-Amd.cpp" : "tstIEMAImplDataFpuBinary2-Intel.cpp"; */
9731 PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
9732 if (!pStrmData || !pStrmDataCpu)
9733 return RTEXITCODE_FAILURE;
9734
9735 FpuBinaryR64Generate(pStrmData, cTests);
9736 FpuBinaryR32Generate(pStrmData, cTests);
9737 FpuBinaryI32Generate(pStrmData, cTests);
9738 FpuBinaryI16Generate(pStrmData, cTests);
9739 FpuBinaryFswR64Generate(pStrmData, cTests);
9740 FpuBinaryFswR32Generate(pStrmData, cTests);
9741 FpuBinaryFswI32Generate(pStrmData, cTests);
9742 FpuBinaryFswI16Generate(pStrmData, cTests);
9743
9744 RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
9745 GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
9746 if (rcExit != RTEXITCODE_SUCCESS)
9747 return rcExit;
9748 }
9749
9750 if (fFpuOther)
9751 {
9752 const char *pszDataFile = fCommonData ? "tstIEMAImplDataFpuOther.cpp" : pszBitBucket;
9753 PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
9754 const char *pszDataCpuFile = !fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
9755 ? "tstIEMAImplDataFpuOther-Amd.cpp" : "tstIEMAImplDataFpuOther-Intel.cpp";
9756 PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
9757 if (!pStrmData || !pStrmDataCpu)
9758 return RTEXITCODE_FAILURE;
9759
9760 FpuUnaryR80Generate(pStrmData, pStrmDataCpu, cTests);
9761 FpuUnaryFswR80Generate(pStrmData, pStrmDataCpu, cTests);
9762 FpuUnaryTwoR80Generate(pStrmData, pStrmDataCpu, cTests);
9763
9764 RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
9765 GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
9766 if (rcExit != RTEXITCODE_SUCCESS)
9767 return rcExit;
9768 }
9769
9770 if (fSseFpBinary)
9771 {
9772 const char *pszDataFileFmt = fCommonData ? "tstIEMAImplDataSseBinary-%s.bin" : pszBitBucket;
9773
9774 RTEXITCODE rcExit = SseBinaryR32Generate(pszDataFileFmt, cTests);
9775 if (rcExit == RTEXITCODE_SUCCESS)
9776 rcExit = SseBinaryR64Generate(pszDataFileFmt, cTests);
9777 if (rcExit == RTEXITCODE_SUCCESS)
9778 rcExit = SseBinaryU128R32Generate(pszDataFileFmt, cTests);
9779 if (rcExit == RTEXITCODE_SUCCESS)
9780 rcExit = SseBinaryU128R64Generate(pszDataFileFmt, cTests);
9781
9782 if (rcExit == RTEXITCODE_SUCCESS)
9783 rcExit = SseBinaryI32R64Generate(pszDataFileFmt, cTests);
9784 if (rcExit == RTEXITCODE_SUCCESS)
9785 rcExit = SseBinaryI64R64Generate(pszDataFileFmt, cTests);
9786 if (rcExit == RTEXITCODE_SUCCESS)
9787 rcExit = SseBinaryI32R32Generate(pszDataFileFmt, cTests);
9788 if (rcExit == RTEXITCODE_SUCCESS)
9789 rcExit = SseBinaryI64R32Generate(pszDataFileFmt, cTests);
9790
9791 if (rcExit == RTEXITCODE_SUCCESS)
9792 rcExit = SseBinaryR64I32Generate(pszDataFileFmt, cTests);
9793 if (rcExit == RTEXITCODE_SUCCESS)
9794 rcExit = SseBinaryR64I64Generate(pszDataFileFmt, cTests);
9795 if (rcExit == RTEXITCODE_SUCCESS)
9796 rcExit = SseBinaryR32I32Generate(pszDataFileFmt, cTests);
9797 if (rcExit == RTEXITCODE_SUCCESS)
9798 rcExit = SseBinaryR32I64Generate(pszDataFileFmt, cTests);
9799 if (rcExit != RTEXITCODE_SUCCESS)
9800 return rcExit;
9801 }
9802
9803 if (fSseFpOther)
9804 {
9805 const char *pszDataFileFmtCmp = fCommonData ? "tstIEMAImplDataSseCompare-%s.bin" : pszBitBucket;
9806 const char *pszDataFileFmtConv = fCommonData ? "tstIEMAImplDataSseConvert-%s.bin" : pszBitBucket;
9807
9808 RTEXITCODE rcExit = SseCompareEflR32R32Generate(pszDataFileFmtCmp, cTests);
9809 if (rcExit == RTEXITCODE_SUCCESS)
9810 rcExit = SseCompareEflR64R64Generate(pszDataFileFmtCmp, cTests);
9811 if (rcExit == RTEXITCODE_SUCCESS)
9812 rcExit = SseCompareF2XmmR32Imm8Generate(pszDataFileFmtCmp, cTests);
9813 if (rcExit == RTEXITCODE_SUCCESS)
9814 rcExit = SseCompareF2XmmR64Imm8Generate(pszDataFileFmtCmp, cTests);
9815 if (rcExit == RTEXITCODE_SUCCESS)
9816 rcExit = SseConvertXmmI32R32Generate(pszDataFileFmtConv, cTests);
9817 if (rcExit == RTEXITCODE_SUCCESS)
9818 rcExit = SseConvertXmmR32I32Generate(pszDataFileFmtConv, cTests);
9819 if (rcExit == RTEXITCODE_SUCCESS)
9820 rcExit = SseConvertXmmI32R64Generate(pszDataFileFmtConv, cTests);
9821 if (rcExit == RTEXITCODE_SUCCESS)
9822 rcExit = SseConvertXmmR64I32Generate(pszDataFileFmtConv, cTests);
9823 if (rcExit == RTEXITCODE_SUCCESS)
9824 rcExit = SseConvertMmXmmGenerate(pszDataFileFmtConv, cTests);
9825 if (rcExit == RTEXITCODE_SUCCESS)
9826 rcExit = SseConvertXmmR32MmGenerate(pszDataFileFmtConv, cTests);
9827 if (rcExit == RTEXITCODE_SUCCESS)
9828 rcExit = SseConvertXmmR64MmGenerate(pszDataFileFmtConv, cTests);
9829 if (rcExit == RTEXITCODE_SUCCESS)
9830 rcExit = SseConvertMmI32XmmR32Generate(pszDataFileFmtConv, cTests);
9831 if (rcExit != RTEXITCODE_SUCCESS)
9832 return rcExit;
9833 }
9834
9835 if (fSsePcmpxstrx)
9836 {
9837 const char *pszDataFileFmtCmp = fCommonData ? "tstIEMAImplDataSsePcmpxstrx-%s.bin" : pszBitBucket;
9838
9839 RTEXITCODE rcExit = SseComparePcmpistriGenerate(pszDataFileFmtCmp, cTests);
9840 if (rcExit == RTEXITCODE_SUCCESS)
9841 rcExit = SseComparePcmpistrmGenerate(pszDataFileFmtCmp, cTests);
9842 if (rcExit == RTEXITCODE_SUCCESS)
9843 rcExit = SseComparePcmpestriGenerate(pszDataFileFmtCmp, cTests);
9844 if (rcExit == RTEXITCODE_SUCCESS)
9845 rcExit = SseComparePcmpestrmGenerate(pszDataFileFmtCmp, cTests);
9846 if (rcExit != RTEXITCODE_SUCCESS)
9847 return rcExit;
9848 }
9849
9850 return RTEXITCODE_SUCCESS;
9851#else
9852 return RTMsgErrorExitFailure("Test data generator not compiled in!");
9853#endif
9854 }
9855
9856 /*
9857 * Do testing. Currrently disabled by default as data needs to be checked
9858 * on both intel and AMD systems first.
9859 */
9860 rc = RTTestCreate("tstIEMAimpl", &g_hTest);
9861 AssertRCReturn(rc, RTEXITCODE_FAILURE);
9862 if (enmMode == kModeTest)
9863 {
9864 RTTestBanner(g_hTest);
9865
9866 /* Allocate guarded memory for use in the tests. */
9867#define ALLOC_GUARDED_VAR(a_puVar) do { \
9868 rc = RTTestGuardedAlloc(g_hTest, sizeof(*a_puVar), sizeof(*a_puVar), false /*fHead*/, (void **)&a_puVar); \
9869 if (RT_FAILURE(rc)) RTTestFailed(g_hTest, "Failed to allocate guarded mem: " #a_puVar); \
9870 } while (0)
9871 ALLOC_GUARDED_VAR(g_pu8);
9872 ALLOC_GUARDED_VAR(g_pu16);
9873 ALLOC_GUARDED_VAR(g_pu32);
9874 ALLOC_GUARDED_VAR(g_pu64);
9875 ALLOC_GUARDED_VAR(g_pu128);
9876 ALLOC_GUARDED_VAR(g_pu8Two);
9877 ALLOC_GUARDED_VAR(g_pu16Two);
9878 ALLOC_GUARDED_VAR(g_pu32Two);
9879 ALLOC_GUARDED_VAR(g_pu64Two);
9880 ALLOC_GUARDED_VAR(g_pu128Two);
9881 ALLOC_GUARDED_VAR(g_pfEfl);
9882 if (RTTestErrorCount(g_hTest) == 0)
9883 {
9884 if (fInt)
9885 {
9886 BinU8Test();
9887 BinU16Test();
9888 BinU32Test();
9889 BinU64Test();
9890 XchgTest();
9891 XaddTest();
9892 CmpXchgTest();
9893 CmpXchg8bTest();
9894 CmpXchg16bTest();
9895 ShiftDblTest();
9896 UnaryTest();
9897 ShiftTest();
9898 MulDivTest();
9899 BswapTest();
9900 }
9901
9902 if (fFpuLdSt)
9903 {
9904 FpuLoadConstTest();
9905 FpuLdMemTest();
9906 FpuLdIntTest();
9907 FpuLdD80Test();
9908 FpuStMemTest();
9909 FpuStIntTest();
9910 FpuStD80Test();
9911 }
9912
9913 if (fFpuBinary1)
9914 {
9915 FpuBinaryR80Test();
9916 FpuBinaryFswR80Test();
9917 FpuBinaryEflR80Test();
9918 }
9919
9920 if (fFpuBinary2)
9921 {
9922 FpuBinaryR64Test();
9923 FpuBinaryR32Test();
9924 FpuBinaryI32Test();
9925 FpuBinaryI16Test();
9926 FpuBinaryFswR64Test();
9927 FpuBinaryFswR32Test();
9928 FpuBinaryFswI32Test();
9929 FpuBinaryFswI16Test();
9930 }
9931
9932 if (fFpuOther)
9933 {
9934 FpuUnaryR80Test();
9935 FpuUnaryFswR80Test();
9936 FpuUnaryTwoR80Test();
9937 }
9938
9939 if (fSseFpBinary)
9940 {
9941 SseBinaryR32Test();
9942 SseBinaryR64Test();
9943 SseBinaryU128R32Test();
9944 SseBinaryU128R64Test();
9945
9946 SseBinaryI32R64Test();
9947 SseBinaryI64R64Test();
9948 SseBinaryI32R32Test();
9949 SseBinaryI64R32Test();
9950
9951 SseBinaryR64I32Test();
9952 SseBinaryR64I64Test();
9953 SseBinaryR32I32Test();
9954 SseBinaryR32I64Test();
9955 }
9956
9957 if (fSseFpOther)
9958 {
9959 SseCompareEflR32R32Test();
9960 SseCompareEflR64R64Test();
9961 SseCompareEflR64R64Test();
9962 SseCompareF2XmmR32Imm8Test();
9963 SseCompareF2XmmR64Imm8Test();
9964 SseConvertXmmI32R32Test();
9965 SseConvertXmmR32I32Test();
9966 SseConvertXmmI32R64Test();
9967 SseConvertXmmR64I32Test();
9968 SseConvertMmXmmTest();
9969 SseConvertXmmR32MmTest();
9970 SseConvertXmmR64MmTest();
9971 SseConvertMmI32XmmR32Test();
9972 }
9973
9974 if (fSsePcmpxstrx)
9975 {
9976 SseComparePcmpistriTest();
9977 SseComparePcmpistrmTest();
9978 SseComparePcmpestriTest();
9979 SseComparePcmpestrmTest();
9980 }
9981 }
9982 return RTTestSummaryAndDestroy(g_hTest);
9983 }
9984 return RTTestSkipAndDestroy(g_hTest, "unfinished testcase");
9985}
9986
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette