VirtualBox

source: vbox/trunk/src/VBox/VMM/testcase/tstIEMAImpl.cpp@ 98106

Last change on this file since 98106 was 98106, checked in by vboxsync, 2 years ago

Manual (C) year updates.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 435.3 KB
Line 
1/* $Id: tstIEMAImpl.cpp 98106 2023-01-17 22:43:07Z vboxsync $ */
2/** @file
3 * IEM Assembly Instruction Helper Testcase.
4 */
5
6/*
7 * Copyright (C) 2022-2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#include "../include/IEMInternal.h"
33
34#include <iprt/errcore.h>
35#include <VBox/log.h>
36#include <iprt/assert.h>
37#include <iprt/ctype.h>
38#include <iprt/getopt.h>
39#include <iprt/initterm.h>
40#include <iprt/message.h>
41#include <iprt/mp.h>
42#include <iprt/rand.h>
43#include <iprt/stream.h>
44#include <iprt/string.h>
45#include <iprt/test.h>
46#include <VBox/version.h>
47
48#include "tstIEMAImpl.h"
49
50
51/*********************************************************************************************************************************
52* Defined Constants And Macros *
53*********************************************************************************************************************************/
54#define ENTRY(a_Name) ENTRY_EX(a_Name, 0)
55#define ENTRY_EX(a_Name, a_uExtra) \
56 { RT_XSTR(a_Name), iemAImpl_ ## a_Name, NULL, \
57 g_aTests_ ## a_Name, &g_cTests_ ## a_Name, \
58 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */ }
59
60#define ENTRY_BIN(a_Name) ENTRY_EX_BIN(a_Name, 0)
61#define ENTRY_EX_BIN(a_Name, a_uExtra) \
62 { RT_XSTR(a_Name), iemAImpl_ ## a_Name, NULL, \
63 g_aTests_ ## a_Name, &g_cbTests_ ## a_Name, \
64 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */ }
65
66#define ENTRY_BIN_AVX(a_Name) ENTRY_BIN_AVX_EX(a_Name, 0)
67#ifndef IEM_WITHOUT_ASSEMBLY
68# define ENTRY_BIN_AVX_EX(a_Name, a_uExtra) \
69 { RT_XSTR(a_Name), iemAImpl_ ## a_Name, NULL, \
70 g_aTests_ ## a_Name, &g_cbTests_ ## a_Name, \
71 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */ }
72#else
73# define ENTRY_BIN_AVX_EX(a_Name, a_uExtra) \
74 { RT_XSTR(a_Name), iemAImpl_ ## a_Name ## _fallback, NULL, \
75 g_aTests_ ## a_Name, &g_cbTests_ ## a_Name, \
76 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */ }
77#endif
78
79
80#define ENTRY_INTEL(a_Name, a_fEflUndef) ENTRY_INTEL_EX(a_Name, a_fEflUndef, 0)
81#define ENTRY_INTEL_EX(a_Name, a_fEflUndef, a_uExtra) \
82 { RT_XSTR(a_Name) "_intel", iemAImpl_ ## a_Name ## _intel, iemAImpl_ ## a_Name, \
83 g_aTests_ ## a_Name ## _intel, &g_cTests_ ## a_Name ## _intel, \
84 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_INTEL }
85
86#define ENTRY_AMD(a_Name, a_fEflUndef) ENTRY_AMD_EX(a_Name, a_fEflUndef, 0)
87#define ENTRY_AMD_EX(a_Name, a_fEflUndef, a_uExtra) \
88 { RT_XSTR(a_Name) "_amd", iemAImpl_ ## a_Name ## _amd, iemAImpl_ ## a_Name, \
89 g_aTests_ ## a_Name ## _amd, &g_cTests_ ## a_Name ## _amd, \
90 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_AMD }
91
92#define TYPEDEF_SUBTEST_TYPE(a_TypeName, a_TestType, a_FunctionPtrType) \
93 typedef struct a_TypeName \
94 { \
95 const char *pszName; \
96 a_FunctionPtrType pfn; \
97 a_FunctionPtrType pfnNative; \
98 a_TestType const *paTests; \
99 uint32_t const *pcTests; \
100 uint32_t uExtra; \
101 uint8_t idxCpuEflFlavour; \
102 } a_TypeName
103
104#define COUNT_VARIATIONS(a_SubTest) \
105 (1 + ((a_SubTest).idxCpuEflFlavour == g_idxCpuEflFlavour && (a_SubTest).pfnNative) )
106
107
108/*********************************************************************************************************************************
109* Global Variables *
110*********************************************************************************************************************************/
111static RTTEST g_hTest;
112static uint8_t g_idxCpuEflFlavour = IEMTARGETCPU_EFL_BEHAVIOR_INTEL;
113#ifdef TSTIEMAIMPL_WITH_GENERATOR
114static uint32_t g_cZeroDstTests = 2;
115static uint32_t g_cZeroSrcTests = 4;
116#endif
117static uint8_t *g_pu8, *g_pu8Two;
118static uint16_t *g_pu16, *g_pu16Two;
119static uint32_t *g_pu32, *g_pu32Two, *g_pfEfl;
120static uint64_t *g_pu64, *g_pu64Two;
121static RTUINT128U *g_pu128, *g_pu128Two;
122
123static char g_aszBuf[32][256];
124static unsigned g_idxBuf = 0;
125
126static uint32_t g_cIncludeTestPatterns;
127static uint32_t g_cExcludeTestPatterns;
128static const char *g_apszIncludeTestPatterns[64];
129static const char *g_apszExcludeTestPatterns[64];
130
131static unsigned g_cVerbosity = 0;
132
133
134/*********************************************************************************************************************************
135* Internal Functions *
136*********************************************************************************************************************************/
137static const char *FormatR80(PCRTFLOAT80U pr80);
138static const char *FormatR64(PCRTFLOAT64U pr64);
139static const char *FormatR32(PCRTFLOAT32U pr32);
140
141
142/*
143 * Random helpers.
144 */
145
146static uint32_t RandEFlags(void)
147{
148 uint32_t fEfl = RTRandU32();
149 return (fEfl & X86_EFL_LIVE_MASK) | X86_EFL_RA1_MASK;
150}
151
152#ifdef TSTIEMAIMPL_WITH_GENERATOR
153
154static uint8_t RandU8(void)
155{
156 return RTRandU32Ex(0, 0xff);
157}
158
159
160static uint16_t RandU16(void)
161{
162 return RTRandU32Ex(0, 0xffff);
163}
164
165
166static uint32_t RandU32(void)
167{
168 return RTRandU32();
169}
170
171#endif
172
173static uint64_t RandU64(void)
174{
175 return RTRandU64();
176}
177
178
179static RTUINT128U RandU128(void)
180{
181 RTUINT128U Ret;
182 Ret.s.Hi = RTRandU64();
183 Ret.s.Lo = RTRandU64();
184 return Ret;
185}
186
187#ifdef TSTIEMAIMPL_WITH_GENERATOR
188
189static uint8_t RandU8Dst(uint32_t iTest)
190{
191 if (iTest < g_cZeroDstTests)
192 return 0;
193 return RandU8();
194}
195
196
197static uint8_t RandU8Src(uint32_t iTest)
198{
199 if (iTest < g_cZeroSrcTests)
200 return 0;
201 return RandU8();
202}
203
204
205static uint16_t RandU16Dst(uint32_t iTest)
206{
207 if (iTest < g_cZeroDstTests)
208 return 0;
209 return RandU16();
210}
211
212
213static uint16_t RandU16Src(uint32_t iTest)
214{
215 if (iTest < g_cZeroSrcTests)
216 return 0;
217 return RandU16();
218}
219
220
221static uint32_t RandU32Dst(uint32_t iTest)
222{
223 if (iTest < g_cZeroDstTests)
224 return 0;
225 return RandU32();
226}
227
228
229static uint32_t RandU32Src(uint32_t iTest)
230{
231 if (iTest < g_cZeroSrcTests)
232 return 0;
233 return RandU32();
234}
235
236
237static uint64_t RandU64Dst(uint32_t iTest)
238{
239 if (iTest < g_cZeroDstTests)
240 return 0;
241 return RandU64();
242}
243
244
245static uint64_t RandU64Src(uint32_t iTest)
246{
247 if (iTest < g_cZeroSrcTests)
248 return 0;
249 return RandU64();
250}
251
252
253/** 2nd operand for and FPU instruction, pairing with RandR80Src1. */
254static int16_t RandI16Src2(uint32_t iTest)
255{
256 if (iTest < 18 * 4)
257 switch (iTest % 4)
258 {
259 case 0: return 0;
260 case 1: return INT16_MAX;
261 case 2: return INT16_MIN;
262 case 3: break;
263 }
264 return (int16_t)RandU16();
265}
266
267
268/** 2nd operand for and FPU instruction, pairing with RandR80Src1. */
269static int32_t RandI32Src2(uint32_t iTest)
270{
271 if (iTest < 18 * 4)
272 switch (iTest % 4)
273 {
274 case 0: return 0;
275 case 1: return INT32_MAX;
276 case 2: return INT32_MIN;
277 case 3: break;
278 }
279 return (int32_t)RandU32();
280}
281
282
283static int64_t RandI64Src(uint32_t iTest)
284{
285 RT_NOREF(iTest);
286 return (int64_t)RandU64();
287}
288
289
290static uint16_t RandFcw(void)
291{
292 return RandU16() & ~X86_FCW_ZERO_MASK;
293}
294
295
296static uint16_t RandFsw(void)
297{
298 AssertCompile((X86_FSW_C_MASK | X86_FSW_XCPT_ES_MASK | X86_FSW_TOP_MASK | X86_FSW_B) == 0xffff);
299 return RandU16();
300}
301
302
303static uint32_t RandMxcsr(void)
304{
305 return RandU32() & ~X86_MXCSR_ZERO_MASK;
306}
307
308
309static void SafeR80FractionShift(PRTFLOAT80U pr80, uint8_t cShift)
310{
311 if (pr80->sj64.uFraction >= RT_BIT_64(cShift))
312 pr80->sj64.uFraction >>= cShift;
313 else
314 pr80->sj64.uFraction = (cShift % 19) + 1;
315}
316
317
318
319static RTFLOAT80U RandR80Ex(uint8_t bType, unsigned cTarget = 80, bool fIntTarget = false)
320{
321 Assert(cTarget == (!fIntTarget ? 80U : 16U) || cTarget == 64U || cTarget == 32U || (cTarget == 59U && fIntTarget));
322
323 RTFLOAT80U r80;
324 r80.au64[0] = RandU64();
325 r80.au16[4] = RandU16();
326
327 /*
328 * Adjust the random stuff according to bType.
329 */
330 bType &= 0x1f;
331 if (bType == 0 || bType == 1 || bType == 2 || bType == 3)
332 {
333 /* Zero (0), Pseudo-Infinity (1), Infinity (2), Indefinite (3). We only keep fSign here. */
334 r80.sj64.uExponent = bType == 0 ? 0 : 0x7fff;
335 r80.sj64.uFraction = bType <= 2 ? 0 : RT_BIT_64(62);
336 r80.sj64.fInteger = bType >= 2 ? 1 : 0;
337 AssertMsg(bType != 0 || RTFLOAT80U_IS_ZERO(&r80), ("%s\n", FormatR80(&r80)));
338 AssertMsg(bType != 1 || RTFLOAT80U_IS_PSEUDO_INF(&r80), ("%s\n", FormatR80(&r80)));
339 Assert( bType != 1 || RTFLOAT80U_IS_387_INVALID(&r80));
340 AssertMsg(bType != 2 || RTFLOAT80U_IS_INF(&r80), ("%s\n", FormatR80(&r80)));
341 AssertMsg(bType != 3 || RTFLOAT80U_IS_INDEFINITE(&r80), ("%s\n", FormatR80(&r80)));
342 }
343 else if (bType == 4 || bType == 5 || bType == 6 || bType == 7)
344 {
345 /* Denormals (4,5) and Pseudo denormals (6,7) */
346 if (bType & 1)
347 SafeR80FractionShift(&r80, r80.sj64.uExponent % 62);
348 else if (r80.sj64.uFraction == 0 && bType < 6)
349 r80.sj64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT80U_FRACTION_BITS) - 1);
350 r80.sj64.uExponent = 0;
351 r80.sj64.fInteger = bType >= 6;
352 AssertMsg(bType >= 6 || RTFLOAT80U_IS_DENORMAL(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
353 AssertMsg(bType < 6 || RTFLOAT80U_IS_PSEUDO_DENORMAL(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
354 }
355 else if (bType == 8 || bType == 9)
356 {
357 /* Pseudo NaN. */
358 if (bType & 1)
359 SafeR80FractionShift(&r80, r80.sj64.uExponent % 62);
360 else if (r80.sj64.uFraction == 0 && !r80.sj64.fInteger)
361 r80.sj64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT80U_FRACTION_BITS) - 1);
362 r80.sj64.uExponent = 0x7fff;
363 if (r80.sj64.fInteger)
364 r80.sj64.uFraction |= RT_BIT_64(62);
365 else
366 r80.sj64.uFraction &= ~RT_BIT_64(62);
367 r80.sj64.fInteger = 0;
368 AssertMsg(RTFLOAT80U_IS_PSEUDO_NAN(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
369 AssertMsg(RTFLOAT80U_IS_NAN(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
370 Assert(RTFLOAT80U_IS_387_INVALID(&r80));
371 }
372 else if (bType == 10 || bType == 11 || bType == 12 || bType == 13)
373 {
374 /* Quiet and signalling NaNs. */
375 if (bType & 1)
376 SafeR80FractionShift(&r80, r80.sj64.uExponent % 62);
377 else if (r80.sj64.uFraction == 0)
378 r80.sj64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT80U_FRACTION_BITS) - 1);
379 r80.sj64.uExponent = 0x7fff;
380 if (bType < 12)
381 r80.sj64.uFraction |= RT_BIT_64(62); /* quiet */
382 else
383 r80.sj64.uFraction &= ~RT_BIT_64(62); /* signaling */
384 r80.sj64.fInteger = 1;
385 AssertMsg(bType >= 12 || RTFLOAT80U_IS_QUIET_NAN(&r80), ("%s\n", FormatR80(&r80)));
386 AssertMsg(bType < 12 || RTFLOAT80U_IS_SIGNALLING_NAN(&r80), ("%s\n", FormatR80(&r80)));
387 AssertMsg(RTFLOAT80U_IS_SIGNALLING_NAN(&r80) || RTFLOAT80U_IS_QUIET_NAN(&r80), ("%s\n", FormatR80(&r80)));
388 AssertMsg(RTFLOAT80U_IS_QUIET_OR_SIGNALLING_NAN(&r80), ("%s\n", FormatR80(&r80)));
389 AssertMsg(RTFLOAT80U_IS_NAN(&r80), ("%s\n", FormatR80(&r80)));
390 }
391 else if (bType == 14 || bType == 15)
392 {
393 /* Unnormals */
394 if (bType & 1)
395 SafeR80FractionShift(&r80, RandU8() % 62);
396 r80.sj64.fInteger = 0;
397 if (r80.sj64.uExponent == RTFLOAT80U_EXP_MAX || r80.sj64.uExponent == 0)
398 r80.sj64.uExponent = (uint16_t)RTRandU32Ex(1, RTFLOAT80U_EXP_MAX - 1);
399 AssertMsg(RTFLOAT80U_IS_UNNORMAL(&r80), ("%s\n", FormatR80(&r80)));
400 Assert(RTFLOAT80U_IS_387_INVALID(&r80));
401 }
402 else if (bType < 26)
403 {
404 /* Make sure we have lots of normalized values. */
405 if (!fIntTarget)
406 {
407 const unsigned uMinExp = cTarget == 64 ? RTFLOAT80U_EXP_BIAS - RTFLOAT64U_EXP_BIAS
408 : cTarget == 32 ? RTFLOAT80U_EXP_BIAS - RTFLOAT32U_EXP_BIAS : 0;
409 const unsigned uMaxExp = cTarget == 64 ? uMinExp + RTFLOAT64U_EXP_MAX
410 : cTarget == 32 ? uMinExp + RTFLOAT32U_EXP_MAX : RTFLOAT80U_EXP_MAX;
411 r80.sj64.fInteger = 1;
412 if (r80.sj64.uExponent <= uMinExp)
413 r80.sj64.uExponent = uMinExp + 1;
414 else if (r80.sj64.uExponent >= uMaxExp)
415 r80.sj64.uExponent = uMaxExp - 1;
416
417 if (bType == 16)
418 { /* All 1s is useful to testing rounding. Also try trigger special
419 behaviour by sometimes rounding out of range, while we're at it. */
420 r80.sj64.uFraction = RT_BIT_64(63) - 1;
421 uint8_t bExp = RandU8();
422 if ((bExp & 3) == 0)
423 r80.sj64.uExponent = uMaxExp - 1;
424 else if ((bExp & 3) == 1)
425 r80.sj64.uExponent = uMinExp + 1;
426 else if ((bExp & 3) == 2)
427 r80.sj64.uExponent = uMinExp - (bExp & 15); /* (small numbers are mapped to subnormal values) */
428 }
429 }
430 else
431 {
432 /* integer target: */
433 const unsigned uMinExp = RTFLOAT80U_EXP_BIAS;
434 const unsigned uMaxExp = RTFLOAT80U_EXP_BIAS + cTarget - 2;
435 r80.sj64.fInteger = 1;
436 if (r80.sj64.uExponent < uMinExp)
437 r80.sj64.uExponent = uMinExp;
438 else if (r80.sj64.uExponent > uMaxExp)
439 r80.sj64.uExponent = uMaxExp;
440
441 if (bType == 16)
442 { /* All 1s is useful to testing rounding. Also try trigger special
443 behaviour by sometimes rounding out of range, while we're at it. */
444 r80.sj64.uFraction = RT_BIT_64(63) - 1;
445 uint8_t bExp = RandU8();
446 if ((bExp & 3) == 0)
447 r80.sj64.uExponent = uMaxExp;
448 else if ((bExp & 3) == 1)
449 r80.sj64.uFraction &= ~(RT_BIT_64(cTarget - 1 - r80.sj64.uExponent) - 1); /* no rounding */
450 }
451 }
452
453 AssertMsg(RTFLOAT80U_IS_NORMAL(&r80), ("%s\n", FormatR80(&r80)));
454 }
455 return r80;
456}
457
458
459static RTFLOAT80U RandR80(unsigned cTarget = 80, bool fIntTarget = false)
460{
461 /*
462 * Make it more likely that we get a good selection of special values.
463 */
464 return RandR80Ex(RandU8(), cTarget, fIntTarget);
465
466}
467
468
469static RTFLOAT80U RandR80Src(uint32_t iTest, unsigned cTarget = 80, bool fIntTarget = false)
470{
471 /* Make sure we cover all the basic types first before going for random selection: */
472 if (iTest <= 18)
473 return RandR80Ex(18 - iTest, cTarget, fIntTarget); /* Starting with 3 normals. */
474 return RandR80(cTarget, fIntTarget);
475}
476
477
478/**
479 * Helper for RandR80Src1 and RandR80Src2 that converts bType from a 0..11 range
480 * to a 0..17, covering all basic value types.
481 */
482static uint8_t RandR80Src12RemapType(uint8_t bType)
483{
484 switch (bType)
485 {
486 case 0: return 18; /* normal */
487 case 1: return 16; /* normal extreme rounding */
488 case 2: return 14; /* unnormal */
489 case 3: return 12; /* Signalling NaN */
490 case 4: return 10; /* Quiet NaN */
491 case 5: return 8; /* PseudoNaN */
492 case 6: return 6; /* Pseudo Denormal */
493 case 7: return 4; /* Denormal */
494 case 8: return 3; /* Indefinite */
495 case 9: return 2; /* Infinity */
496 case 10: return 1; /* Pseudo-Infinity */
497 case 11: return 0; /* Zero */
498 default: AssertFailedReturn(18);
499 }
500}
501
502
503/**
504 * This works in tandem with RandR80Src2 to make sure we cover all operand
505 * type mixes first before we venture into regular random testing.
506 *
507 * There are 11 basic variations, when we leave out the five odd ones using
508 * SafeR80FractionShift. Because of the special normalized value targetting at
509 * rounding, we make it an even 12. So 144 combinations for two operands.
510 */
511static RTFLOAT80U RandR80Src1(uint32_t iTest, unsigned cPartnerBits = 80, bool fPartnerInt = false)
512{
513 if (cPartnerBits == 80)
514 {
515 Assert(!fPartnerInt);
516 if (iTest < 12 * 12)
517 return RandR80Ex(RandR80Src12RemapType(iTest / 12));
518 }
519 else if ((cPartnerBits == 64 || cPartnerBits == 32) && !fPartnerInt)
520 {
521 if (iTest < 12 * 10)
522 return RandR80Ex(RandR80Src12RemapType(iTest / 10));
523 }
524 else if (iTest < 18 * 4 && fPartnerInt)
525 return RandR80Ex(iTest / 4);
526 return RandR80();
527}
528
529
530/** Partner to RandR80Src1. */
531static RTFLOAT80U RandR80Src2(uint32_t iTest)
532{
533 if (iTest < 12 * 12)
534 return RandR80Ex(RandR80Src12RemapType(iTest % 12));
535 return RandR80();
536}
537
538
539static void SafeR64FractionShift(PRTFLOAT64U pr64, uint8_t cShift)
540{
541 if (pr64->s64.uFraction >= RT_BIT_64(cShift))
542 pr64->s64.uFraction >>= cShift;
543 else
544 pr64->s64.uFraction = (cShift % 19) + 1;
545}
546
547
548static RTFLOAT64U RandR64Ex(uint8_t bType)
549{
550 RTFLOAT64U r64;
551 r64.u = RandU64();
552
553 /*
554 * Make it more likely that we get a good selection of special values.
555 * On average 6 out of 16 calls should return a special value.
556 */
557 bType &= 0xf;
558 if (bType == 0 || bType == 1)
559 {
560 /* 0 or Infinity. We only keep fSign here. */
561 r64.s.uExponent = bType == 0 ? 0 : 0x7ff;
562 r64.s.uFractionHigh = 0;
563 r64.s.uFractionLow = 0;
564 AssertMsg(bType != 0 || RTFLOAT64U_IS_ZERO(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
565 AssertMsg(bType != 1 || RTFLOAT64U_IS_INF(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
566 }
567 else if (bType == 2 || bType == 3)
568 {
569 /* Subnormals */
570 if (bType == 3)
571 SafeR64FractionShift(&r64, r64.s64.uExponent % 51);
572 else if (r64.s64.uFraction == 0)
573 r64.s64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT64U_FRACTION_BITS) - 1);
574 r64.s64.uExponent = 0;
575 AssertMsg(RTFLOAT64U_IS_SUBNORMAL(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
576 }
577 else if (bType == 4 || bType == 5 || bType == 6 || bType == 7)
578 {
579 /* NaNs */
580 if (bType & 1)
581 SafeR64FractionShift(&r64, r64.s64.uExponent % 51);
582 else if (r64.s64.uFraction == 0)
583 r64.s64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT64U_FRACTION_BITS) - 1);
584 r64.s64.uExponent = 0x7ff;
585 if (bType < 6)
586 r64.s64.uFraction |= RT_BIT_64(RTFLOAT64U_FRACTION_BITS - 1); /* quiet */
587 else
588 r64.s64.uFraction &= ~RT_BIT_64(RTFLOAT64U_FRACTION_BITS - 1); /* signalling */
589 AssertMsg(bType >= 6 || RTFLOAT64U_IS_QUIET_NAN(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
590 AssertMsg(bType < 6 || RTFLOAT64U_IS_SIGNALLING_NAN(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
591 AssertMsg(RTFLOAT64U_IS_NAN(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
592 }
593 else if (bType < 12)
594 {
595 /* Make sure we have lots of normalized values. */
596 if (r64.s.uExponent == 0)
597 r64.s.uExponent = 1;
598 else if (r64.s.uExponent == 0x7ff)
599 r64.s.uExponent = 0x7fe;
600 AssertMsg(RTFLOAT64U_IS_NORMAL(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
601 }
602 return r64;
603}
604
605
606static RTFLOAT64U RandR64Src(uint32_t iTest)
607{
608 if (iTest < 16)
609 return RandR64Ex(iTest);
610 return RandR64Ex(RandU8());
611}
612
613
614/** Pairing with a 80-bit floating point arg. */
615static RTFLOAT64U RandR64Src2(uint32_t iTest)
616{
617 if (iTest < 12 * 10)
618 return RandR64Ex(9 - iTest % 10); /* start with normal values */
619 return RandR64Ex(RandU8());
620}
621
622
623static void SafeR32FractionShift(PRTFLOAT32U pr32, uint8_t cShift)
624{
625 if (pr32->s.uFraction >= RT_BIT_32(cShift))
626 pr32->s.uFraction >>= cShift;
627 else
628 pr32->s.uFraction = (cShift % 19) + 1;
629}
630
631
632static RTFLOAT32U RandR32Ex(uint8_t bType)
633{
634 RTFLOAT32U r32;
635 r32.u = RandU32();
636
637 /*
638 * Make it more likely that we get a good selection of special values.
639 * On average 6 out of 16 calls should return a special value.
640 */
641 bType &= 0xf;
642 if (bType == 0 || bType == 1)
643 {
644 /* 0 or Infinity. We only keep fSign here. */
645 r32.s.uExponent = bType == 0 ? 0 : 0xff;
646 r32.s.uFraction = 0;
647 AssertMsg(bType != 0 || RTFLOAT32U_IS_ZERO(&r32), ("%s\n", FormatR32(&r32)));
648 AssertMsg(bType != 1 || RTFLOAT32U_IS_INF(&r32), ("%s\n", FormatR32(&r32)));
649 }
650 else if (bType == 2 || bType == 3)
651 {
652 /* Subnormals */
653 if (bType == 3)
654 SafeR32FractionShift(&r32, r32.s.uExponent % 22);
655 else if (r32.s.uFraction == 0)
656 r32.s.uFraction = RTRandU32Ex(1, RT_BIT_32(RTFLOAT32U_FRACTION_BITS) - 1);
657 r32.s.uExponent = 0;
658 AssertMsg(RTFLOAT32U_IS_SUBNORMAL(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
659 }
660 else if (bType == 4 || bType == 5 || bType == 6 || bType == 7)
661 {
662 /* NaNs */
663 if (bType & 1)
664 SafeR32FractionShift(&r32, r32.s.uExponent % 22);
665 else if (r32.s.uFraction == 0)
666 r32.s.uFraction = RTRandU32Ex(1, RT_BIT_32(RTFLOAT32U_FRACTION_BITS) - 1);
667 r32.s.uExponent = 0xff;
668 if (bType < 6)
669 r32.s.uFraction |= RT_BIT_32(RTFLOAT32U_FRACTION_BITS - 1); /* quiet */
670 else
671 r32.s.uFraction &= ~RT_BIT_32(RTFLOAT32U_FRACTION_BITS - 1); /* signalling */
672 AssertMsg(bType >= 6 || RTFLOAT32U_IS_QUIET_NAN(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
673 AssertMsg(bType < 6 || RTFLOAT32U_IS_SIGNALLING_NAN(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
674 AssertMsg(RTFLOAT32U_IS_NAN(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
675 }
676 else if (bType < 12)
677 {
678 /* Make sure we have lots of normalized values. */
679 if (r32.s.uExponent == 0)
680 r32.s.uExponent = 1;
681 else if (r32.s.uExponent == 0xff)
682 r32.s.uExponent = 0xfe;
683 AssertMsg(RTFLOAT32U_IS_NORMAL(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
684 }
685 return r32;
686}
687
688
689static RTFLOAT32U RandR32Src(uint32_t iTest)
690{
691 if (iTest < 16)
692 return RandR32Ex(iTest);
693 return RandR32Ex(RandU8());
694}
695
696
697/** Pairing with a 80-bit floating point arg. */
698static RTFLOAT32U RandR32Src2(uint32_t iTest)
699{
700 if (iTest < 12 * 10)
701 return RandR32Ex(9 - iTest % 10); /* start with normal values */
702 return RandR32Ex(RandU8());
703}
704
705
706static RTPBCD80U RandD80Src(uint32_t iTest)
707{
708 if (iTest < 3)
709 {
710 RTPBCD80U d80Zero = RTPBCD80U_INIT_ZERO(!(iTest & 1));
711 return d80Zero;
712 }
713 if (iTest < 5)
714 {
715 RTPBCD80U d80Ind = RTPBCD80U_INIT_INDEFINITE();
716 return d80Ind;
717 }
718
719 RTPBCD80U d80;
720 uint8_t b = RandU8();
721 d80.s.fSign = b & 1;
722
723 if ((iTest & 7) >= 6)
724 {
725 /* Illegal */
726 d80.s.uPad = (iTest & 7) == 7 ? b >> 1 : 0;
727 for (size_t iPair = 0; iPair < RT_ELEMENTS(d80.s.abPairs); iPair++)
728 d80.s.abPairs[iPair] = RandU8();
729 }
730 else
731 {
732 /* Normal */
733 d80.s.uPad = 0;
734 for (size_t iPair = 0; iPair < RT_ELEMENTS(d80.s.abPairs); iPair++)
735 {
736 uint8_t const uLo = (uint8_t)RTRandU32Ex(0, 9);
737 uint8_t const uHi = (uint8_t)RTRandU32Ex(0, 9);
738 d80.s.abPairs[iPair] = RTPBCD80U_MAKE_PAIR(uHi, uLo);
739 }
740 }
741 return d80;
742}
743
744
745const char *GenFormatR80(PCRTFLOAT80U plrd)
746{
747 if (RTFLOAT80U_IS_ZERO(plrd))
748 return plrd->s.fSign ? "RTFLOAT80U_INIT_ZERO(1)" : "RTFLOAT80U_INIT_ZERO(0)";
749 if (RTFLOAT80U_IS_INF(plrd))
750 return plrd->s.fSign ? "RTFLOAT80U_INIT_INF(1)" : "RTFLOAT80U_INIT_INF(0)";
751 if (RTFLOAT80U_IS_INDEFINITE(plrd))
752 return plrd->s.fSign ? "RTFLOAT80U_INIT_IND(1)" : "RTFLOAT80U_INIT_IND(0)";
753 if (RTFLOAT80U_IS_QUIET_NAN(plrd) && (plrd->s.uMantissa & (RT_BIT_64(62) - 1)) == 1)
754 return plrd->s.fSign ? "RTFLOAT80U_INIT_QNAN(1)" : "RTFLOAT80U_INIT_QNAN(0)";
755 if (RTFLOAT80U_IS_SIGNALLING_NAN(plrd) && (plrd->s.uMantissa & (RT_BIT_64(62) - 1)) == 1)
756 return plrd->s.fSign ? "RTFLOAT80U_INIT_SNAN(1)" : "RTFLOAT80U_INIT_SNAN(0)";
757
758 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
759 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTFLOAT80U_INIT_C(%d,%#RX64,%u)",
760 plrd->s.fSign, plrd->s.uMantissa, plrd->s.uExponent);
761 return pszBuf;
762}
763
764const char *GenFormatR64(PCRTFLOAT64U prd)
765{
766 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
767 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTFLOAT64U_INIT_C(%d,%#RX64,%u)",
768 prd->s.fSign, RT_MAKE_U64(prd->s.uFractionLow, prd->s.uFractionHigh), prd->s.uExponent);
769 return pszBuf;
770}
771
772
773const char *GenFormatR32(PCRTFLOAT32U pr)
774{
775 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
776 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTFLOAT32U_INIT_C(%d,%#RX32,%u)", pr->s.fSign, pr->s.uFraction, pr->s.uExponent);
777 return pszBuf;
778}
779
780
781const char *GenFormatD80(PCRTPBCD80U pd80)
782{
783 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
784 size_t off;
785 if (pd80->s.uPad == 0)
786 off = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTPBCD80U_INIT_C(%d", pd80->s.fSign);
787 else
788 off = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTPBCD80U_INIT_EX_C(%#x,%d", pd80->s.uPad, pd80->s.fSign);
789 size_t iPair = RT_ELEMENTS(pd80->s.abPairs);
790 while (iPair-- > 0)
791 off += RTStrPrintf(&pszBuf[off], sizeof(g_aszBuf[0]) - off, ",%d,%d",
792 RTPBCD80U_HI_DIGIT(pd80->s.abPairs[iPair]),
793 RTPBCD80U_LO_DIGIT(pd80->s.abPairs[iPair]));
794 pszBuf[off++] = ')';
795 pszBuf[off++] = '\0';
796 return pszBuf;
797}
798
799
800const char *GenFormatI64(int64_t i64)
801{
802 if (i64 == INT64_MIN) /* This one is problematic */
803 return "INT64_MIN";
804 if (i64 == INT64_MAX)
805 return "INT64_MAX";
806 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
807 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "INT64_C(%RI64)", i64);
808 return pszBuf;
809}
810
811
812const char *GenFormatI64(int64_t const *pi64)
813{
814 return GenFormatI64(*pi64);
815}
816
817
818const char *GenFormatI32(int32_t i32)
819{
820 if (i32 == INT32_MIN) /* This one is problematic */
821 return "INT32_MIN";
822 if (i32 == INT32_MAX)
823 return "INT32_MAX";
824 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
825 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "INT32_C(%RI32)", i32);
826 return pszBuf;
827}
828
829
830const char *GenFormatI32(int32_t const *pi32)
831{
832 return GenFormatI32(*pi32);
833}
834
835
836const char *GenFormatI16(int16_t i16)
837{
838 if (i16 == INT16_MIN) /* This one is problematic */
839 return "INT16_MIN";
840 if (i16 == INT16_MAX)
841 return "INT16_MAX";
842 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
843 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "INT16_C(%RI16)", i16);
844 return pszBuf;
845}
846
847
848const char *GenFormatI16(int16_t const *pi16)
849{
850 return GenFormatI16(*pi16);
851}
852
853
854static void GenerateHeader(PRTSTREAM pOut, const char *pszCpuDesc, const char *pszCpuType)
855{
856 /* We want to tag the generated source code with the revision that produced it. */
857 static char s_szRev[] = "$Revision: 98106 $";
858 const char *pszRev = RTStrStripL(strchr(s_szRev, ':') + 1);
859 size_t cchRev = 0;
860 while (RT_C_IS_DIGIT(pszRev[cchRev]))
861 cchRev++;
862
863 RTStrmPrintf(pOut,
864 "/* $Id: tstIEMAImpl.cpp 98106 2023-01-17 22:43:07Z vboxsync $ */\n"
865 "/** @file\n"
866 " * IEM Assembly Instruction Helper Testcase Data%s%s - r%.*s on %s.\n"
867 " */\n"
868 "\n"
869 "/*\n"
870 " * Copyright (C) 2022-" VBOX_C_YEAR " Oracle and/or its affiliates.\n"
871 " *\n"
872 " * This file is part of VirtualBox base platform packages, as\n"
873 " * available from https://www.virtualbox.org.\n"
874 " *\n"
875 " * This program is free software; you can redistribute it and/or\n"
876 " * modify it under the terms of the GNU General Public License\n"
877 " * as published by the Free Software Foundation, in version 3 of the\n"
878 " * License.\n"
879 " *\n"
880 " * This program is distributed in the hope that it will be useful, but\n"
881 " * WITHOUT ANY WARRANTY; without even the implied warranty of\n"
882 " * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU\n"
883 " * General Public License for more details.\n"
884 " *\n"
885 " * You should have received a copy of the GNU General Public License\n"
886 " * along with this program; if not, see <https://www.gnu.org/licenses>.\n"
887 " *\n"
888 " * SPDX-License-Identifier: GPL-3.0-only\n"
889 " */\n"
890 "\n"
891 "#include \"tstIEMAImpl.h\"\n"
892 "\n"
893 ,
894 pszCpuType ? " " : "", pszCpuType ? pszCpuType : "", cchRev, pszRev, pszCpuDesc);
895}
896
897
898static PRTSTREAM GenerateOpenWithHdr(const char *pszFilename, const char *pszCpuDesc, const char *pszCpuType)
899{
900 PRTSTREAM pOut = NULL;
901 int rc = RTStrmOpen(pszFilename, "w", &pOut);
902 if (RT_SUCCESS(rc))
903 {
904 GenerateHeader(pOut, pszCpuDesc, pszCpuType);
905 return pOut;
906 }
907 RTMsgError("Failed to open %s for writing: %Rrc", pszFilename, rc);
908 return NULL;
909}
910
911
912static RTEXITCODE GenerateFooterAndClose(PRTSTREAM pOut, const char *pszFilename, RTEXITCODE rcExit)
913{
914 RTStrmPrintf(pOut,
915 "\n"
916 "/* end of file */\n");
917 int rc = RTStrmClose(pOut);
918 if (RT_SUCCESS(rc))
919 return rcExit;
920 return RTMsgErrorExitFailure("RTStrmClose failed on %s: %Rrc", pszFilename, rc);
921}
922
923
924static void GenerateArrayStart(PRTSTREAM pOut, const char *pszName, const char *pszType)
925{
926 RTStrmPrintf(pOut, "%s const g_aTests_%s[] =\n{\n", pszType, pszName);
927}
928
929
930static void GenerateArrayEnd(PRTSTREAM pOut, const char *pszName)
931{
932 RTStrmPrintf(pOut,
933 "};\n"
934 "uint32_t const g_cTests_%s = RT_ELEMENTS(g_aTests_%s);\n"
935 "\n",
936 pszName, pszName);
937}
938
939#endif /* TSTIEMAIMPL_WITH_GENERATOR */
940
941
942/*
943 * Test helpers.
944 */
945static bool IsTestEnabled(const char *pszName)
946{
947 /* Process excludes first: */
948 uint32_t i = g_cExcludeTestPatterns;
949 while (i-- > 0)
950 if (RTStrSimplePatternMultiMatch(g_apszExcludeTestPatterns[i], RTSTR_MAX, pszName, RTSTR_MAX, NULL))
951 return false;
952
953 /* If no include patterns, everything is included: */
954 i = g_cIncludeTestPatterns;
955 if (!i)
956 return true;
957
958 /* Otherwise only tests in the include patters gets tested: */
959 while (i-- > 0)
960 if (RTStrSimplePatternMultiMatch(g_apszIncludeTestPatterns[i], RTSTR_MAX, pszName, RTSTR_MAX, NULL))
961 return true;
962
963 return false;
964}
965
966
967static bool SubTestAndCheckIfEnabled(const char *pszName)
968{
969 RTTestSub(g_hTest, pszName);
970 if (IsTestEnabled(pszName))
971 return true;
972 RTTestSkipped(g_hTest, g_cVerbosity > 0 ? "excluded" : NULL);
973 return false;
974}
975
976
977static const char *EFlagsDiff(uint32_t fActual, uint32_t fExpected)
978{
979 if (fActual == fExpected)
980 return "";
981
982 uint32_t const fXor = fActual ^ fExpected;
983 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
984 size_t cch = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), " - %#x", fXor);
985
986 static struct
987 {
988 const char *pszName;
989 uint32_t fFlag;
990 } const s_aFlags[] =
991 {
992#define EFL_ENTRY(a_Flags) { #a_Flags, X86_EFL_ ## a_Flags }
993 EFL_ENTRY(CF),
994 EFL_ENTRY(PF),
995 EFL_ENTRY(AF),
996 EFL_ENTRY(ZF),
997 EFL_ENTRY(SF),
998 EFL_ENTRY(TF),
999 EFL_ENTRY(IF),
1000 EFL_ENTRY(DF),
1001 EFL_ENTRY(OF),
1002 EFL_ENTRY(IOPL),
1003 EFL_ENTRY(NT),
1004 EFL_ENTRY(RF),
1005 EFL_ENTRY(VM),
1006 EFL_ENTRY(AC),
1007 EFL_ENTRY(VIF),
1008 EFL_ENTRY(VIP),
1009 EFL_ENTRY(ID),
1010 };
1011 for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1012 if (s_aFlags[i].fFlag & fXor)
1013 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch,
1014 s_aFlags[i].fFlag & fActual ? "/%s" : "/!%s", s_aFlags[i].pszName);
1015 RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
1016 return pszBuf;
1017}
1018
1019
1020static const char *FswDiff(uint16_t fActual, uint16_t fExpected)
1021{
1022 if (fActual == fExpected)
1023 return "";
1024
1025 uint16_t const fXor = fActual ^ fExpected;
1026 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1027 size_t cch = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), " - %#x", fXor);
1028
1029 static struct
1030 {
1031 const char *pszName;
1032 uint32_t fFlag;
1033 } const s_aFlags[] =
1034 {
1035#define FSW_ENTRY(a_Flags) { #a_Flags, X86_FSW_ ## a_Flags }
1036 FSW_ENTRY(IE),
1037 FSW_ENTRY(DE),
1038 FSW_ENTRY(ZE),
1039 FSW_ENTRY(OE),
1040 FSW_ENTRY(UE),
1041 FSW_ENTRY(PE),
1042 FSW_ENTRY(SF),
1043 FSW_ENTRY(ES),
1044 FSW_ENTRY(C0),
1045 FSW_ENTRY(C1),
1046 FSW_ENTRY(C2),
1047 FSW_ENTRY(C3),
1048 FSW_ENTRY(B),
1049 };
1050 for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1051 if (s_aFlags[i].fFlag & fXor)
1052 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch,
1053 s_aFlags[i].fFlag & fActual ? "/%s" : "/!%s", s_aFlags[i].pszName);
1054 if (fXor & X86_FSW_TOP_MASK)
1055 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "/TOP%u!%u",
1056 X86_FSW_TOP_GET(fActual), X86_FSW_TOP_GET(fExpected));
1057#if 0 /* For debugging fprem & fprem1 */
1058 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, " - Q=%d (vs %d)",
1059 X86_FSW_CX_TO_QUOTIENT(fActual), X86_FSW_CX_TO_QUOTIENT(fExpected));
1060#endif
1061 RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
1062 return pszBuf;
1063}
1064
1065
1066static const char *MxcsrDiff(uint32_t fActual, uint32_t fExpected)
1067{
1068 if (fActual == fExpected)
1069 return "";
1070
1071 uint16_t const fXor = fActual ^ fExpected;
1072 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1073 size_t cch = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), " - %#x", fXor);
1074
1075 static struct
1076 {
1077 const char *pszName;
1078 uint32_t fFlag;
1079 } const s_aFlags[] =
1080 {
1081#define MXCSR_ENTRY(a_Flags) { #a_Flags, X86_MXCSR_ ## a_Flags }
1082 MXCSR_ENTRY(IE),
1083 MXCSR_ENTRY(DE),
1084 MXCSR_ENTRY(ZE),
1085 MXCSR_ENTRY(OE),
1086 MXCSR_ENTRY(UE),
1087 MXCSR_ENTRY(PE),
1088
1089 MXCSR_ENTRY(IM),
1090 MXCSR_ENTRY(DM),
1091 MXCSR_ENTRY(ZM),
1092 MXCSR_ENTRY(OM),
1093 MXCSR_ENTRY(UM),
1094 MXCSR_ENTRY(PM),
1095
1096 MXCSR_ENTRY(DAZ),
1097 MXCSR_ENTRY(FZ),
1098#undef MXCSR_ENTRY
1099 };
1100 for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1101 if (s_aFlags[i].fFlag & fXor)
1102 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch,
1103 s_aFlags[i].fFlag & fActual ? "/%s" : "/!%s", s_aFlags[i].pszName);
1104 RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
1105 return pszBuf;
1106}
1107
1108
1109static const char *FormatFcw(uint16_t fFcw)
1110{
1111 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1112
1113 const char *pszPC = NULL; /* (msc+gcc are too stupid) */
1114 switch (fFcw & X86_FCW_PC_MASK)
1115 {
1116 case X86_FCW_PC_24: pszPC = "PC24"; break;
1117 case X86_FCW_PC_RSVD: pszPC = "PCRSVD!"; break;
1118 case X86_FCW_PC_53: pszPC = "PC53"; break;
1119 case X86_FCW_PC_64: pszPC = "PC64"; break;
1120 }
1121
1122 const char *pszRC = NULL; /* (msc+gcc are too stupid) */
1123 switch (fFcw & X86_FCW_RC_MASK)
1124 {
1125 case X86_FCW_RC_NEAREST: pszRC = "NEAR"; break;
1126 case X86_FCW_RC_DOWN: pszRC = "DOWN"; break;
1127 case X86_FCW_RC_UP: pszRC = "UP"; break;
1128 case X86_FCW_RC_ZERO: pszRC = "ZERO"; break;
1129 }
1130 size_t cch = RTStrPrintf(&pszBuf[0], sizeof(g_aszBuf[0]), "%s %s", pszPC, pszRC);
1131
1132 static struct
1133 {
1134 const char *pszName;
1135 uint32_t fFlag;
1136 } const s_aFlags[] =
1137 {
1138#define FCW_ENTRY(a_Flags) { #a_Flags, X86_FCW_ ## a_Flags }
1139 FCW_ENTRY(IM),
1140 FCW_ENTRY(DM),
1141 FCW_ENTRY(ZM),
1142 FCW_ENTRY(OM),
1143 FCW_ENTRY(UM),
1144 FCW_ENTRY(PM),
1145 { "6M", 64 },
1146 };
1147 for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1148 if (fFcw & s_aFlags[i].fFlag)
1149 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, " %s", s_aFlags[i].pszName);
1150
1151 RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
1152 return pszBuf;
1153}
1154
1155
1156static const char *FormatMxcsr(uint32_t fMxcsr)
1157{
1158 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1159
1160 const char *pszRC = NULL; /* (msc+gcc are too stupid) */
1161 switch (fMxcsr & X86_MXCSR_RC_MASK)
1162 {
1163 case X86_MXCSR_RC_NEAREST: pszRC = "NEAR"; break;
1164 case X86_MXCSR_RC_DOWN: pszRC = "DOWN"; break;
1165 case X86_MXCSR_RC_UP: pszRC = "UP"; break;
1166 case X86_MXCSR_RC_ZERO: pszRC = "ZERO"; break;
1167 }
1168
1169 const char *pszDAZ = fMxcsr & X86_MXCSR_DAZ ? " DAZ" : "";
1170 const char *pszFZ = fMxcsr & X86_MXCSR_FZ ? " FZ" : "";
1171 size_t cch = RTStrPrintf(&pszBuf[0], sizeof(g_aszBuf[0]), "%s%s%s", pszRC, pszDAZ, pszFZ);
1172
1173 static struct
1174 {
1175 const char *pszName;
1176 uint32_t fFlag;
1177 } const s_aFlags[] =
1178 {
1179#define MXCSR_ENTRY(a_Flags) { #a_Flags, X86_MXCSR_ ## a_Flags }
1180 MXCSR_ENTRY(IE),
1181 MXCSR_ENTRY(DE),
1182 MXCSR_ENTRY(ZE),
1183 MXCSR_ENTRY(OE),
1184 MXCSR_ENTRY(UE),
1185 MXCSR_ENTRY(PE),
1186
1187 MXCSR_ENTRY(IM),
1188 MXCSR_ENTRY(DM),
1189 MXCSR_ENTRY(ZM),
1190 MXCSR_ENTRY(OM),
1191 MXCSR_ENTRY(UM),
1192 MXCSR_ENTRY(PM),
1193 { "6M", 64 },
1194 };
1195 for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1196 if (fMxcsr & s_aFlags[i].fFlag)
1197 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, " %s", s_aFlags[i].pszName);
1198
1199 RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
1200 return pszBuf;
1201}
1202
1203
1204static const char *FormatR80(PCRTFLOAT80U pr80)
1205{
1206 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1207 RTStrFormatR80(pszBuf, sizeof(g_aszBuf[0]), pr80, 0, 0, RTSTR_F_SPECIAL);
1208 return pszBuf;
1209}
1210
1211
1212static const char *FormatR64(PCRTFLOAT64U pr64)
1213{
1214 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1215 RTStrFormatR64(pszBuf, sizeof(g_aszBuf[0]), pr64, 0, 0, RTSTR_F_SPECIAL);
1216 return pszBuf;
1217}
1218
1219
1220static const char *FormatR32(PCRTFLOAT32U pr32)
1221{
1222 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1223 RTStrFormatR32(pszBuf, sizeof(g_aszBuf[0]), pr32, 0, 0, RTSTR_F_SPECIAL);
1224 return pszBuf;
1225}
1226
1227
1228static const char *FormatD80(PCRTPBCD80U pd80)
1229{
1230 /* There is only one indefinite endcoding (same as for 80-bit
1231 floating point), so get it out of the way first: */
1232 if (RTPBCD80U_IS_INDEFINITE(pd80))
1233 return "Ind";
1234
1235 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1236 size_t off = 0;
1237 pszBuf[off++] = pd80->s.fSign ? '-' : '+';
1238 unsigned cBadDigits = 0;
1239 size_t iPair = RT_ELEMENTS(pd80->s.abPairs);
1240 while (iPair-- > 0)
1241 {
1242 static const char s_szDigits[] = "0123456789abcdef";
1243 static const uint8_t s_bBadDigits[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1 };
1244 pszBuf[off++] = s_szDigits[RTPBCD80U_HI_DIGIT(pd80->s.abPairs[iPair])];
1245 pszBuf[off++] = s_szDigits[RTPBCD80U_LO_DIGIT(pd80->s.abPairs[iPair])];
1246 cBadDigits += s_bBadDigits[RTPBCD80U_HI_DIGIT(pd80->s.abPairs[iPair])]
1247 + s_bBadDigits[RTPBCD80U_LO_DIGIT(pd80->s.abPairs[iPair])];
1248 }
1249 if (cBadDigits || pd80->s.uPad != 0)
1250 off += RTStrPrintf(&pszBuf[off], sizeof(g_aszBuf[0]) - off, "[%u,%#x]", cBadDigits, pd80->s.uPad);
1251 pszBuf[off] = '\0';
1252 return pszBuf;
1253}
1254
1255
1256#if 0
1257static const char *FormatI64(int64_t const *piVal)
1258{
1259 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1260 RTStrFormatU64(pszBuf, sizeof(g_aszBuf[0]), *piVal, 16, 0, 0, RTSTR_F_SPECIAL | RTSTR_F_VALSIGNED);
1261 return pszBuf;
1262}
1263#endif
1264
1265
1266static const char *FormatI32(int32_t const *piVal)
1267{
1268 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1269 RTStrFormatU32(pszBuf, sizeof(g_aszBuf[0]), *piVal, 16, 0, 0, RTSTR_F_SPECIAL | RTSTR_F_VALSIGNED);
1270 return pszBuf;
1271}
1272
1273
1274static const char *FormatI16(int16_t const *piVal)
1275{
1276 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1277 RTStrFormatU16(pszBuf, sizeof(g_aszBuf[0]), *piVal, 16, 0, 0, RTSTR_F_SPECIAL | RTSTR_F_VALSIGNED);
1278 return pszBuf;
1279}
1280
1281
1282/*
1283 * Binary operations.
1284 */
1285TYPEDEF_SUBTEST_TYPE(BINU8_T, BINU8_TEST_T, PFNIEMAIMPLBINU8);
1286TYPEDEF_SUBTEST_TYPE(BINU16_T, BINU16_TEST_T, PFNIEMAIMPLBINU16);
1287TYPEDEF_SUBTEST_TYPE(BINU32_T, BINU32_TEST_T, PFNIEMAIMPLBINU32);
1288TYPEDEF_SUBTEST_TYPE(BINU64_T, BINU64_TEST_T, PFNIEMAIMPLBINU64);
1289
1290#ifdef TSTIEMAIMPL_WITH_GENERATOR
1291# define GEN_BINARY_TESTS(a_cBits, a_Fmt, a_TestType) \
1292static void BinU ## a_cBits ## Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests) \
1293{ \
1294 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aBinU ## a_cBits); iFn++) \
1295 { \
1296 PFNIEMAIMPLBINU ## a_cBits const pfn = g_aBinU ## a_cBits[iFn].pfnNative \
1297 ? g_aBinU ## a_cBits[iFn].pfnNative : g_aBinU ## a_cBits[iFn].pfn; \
1298 PRTSTREAM pOutFn = pOut; \
1299 if (g_aBinU ## a_cBits[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE) \
1300 { \
1301 if (g_aBinU ## a_cBits[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
1302 continue; \
1303 pOutFn = pOutCpu; \
1304 } \
1305 \
1306 GenerateArrayStart(pOutFn, g_aBinU ## a_cBits[iFn].pszName, #a_TestType); \
1307 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1308 { \
1309 a_TestType Test; \
1310 Test.fEflIn = RandEFlags(); \
1311 Test.fEflOut = Test.fEflIn; \
1312 Test.uDstIn = RandU ## a_cBits ## Dst(iTest); \
1313 Test.uDstOut = Test.uDstIn; \
1314 Test.uSrcIn = RandU ## a_cBits ## Src(iTest); \
1315 if (g_aBinU ## a_cBits[iFn].uExtra) \
1316 Test.uSrcIn &= a_cBits - 1; /* Restrict bit index according to operand width */ \
1317 Test.uMisc = 0; \
1318 pfn(&Test.uDstOut, Test.uSrcIn, &Test.fEflOut); \
1319 RTStrmPrintf(pOutFn, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", " a_Fmt ", %#x }, /* #%u */\n", \
1320 Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uSrcIn, Test.uMisc, iTest); \
1321 } \
1322 GenerateArrayEnd(pOutFn, g_aBinU ## a_cBits[iFn].pszName); \
1323 } \
1324}
1325#else
1326# define GEN_BINARY_TESTS(a_cBits, a_Fmt, a_TestType)
1327#endif
1328
1329#define TEST_BINARY_OPS(a_cBits, a_uType, a_Fmt, a_TestType, a_aSubTests) \
1330GEN_BINARY_TESTS(a_cBits, a_Fmt, a_TestType) \
1331\
1332static void BinU ## a_cBits ## Test(void) \
1333{ \
1334 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
1335 { \
1336 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
1337 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
1338 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
1339 PFNIEMAIMPLBINU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
1340 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
1341 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1342 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
1343 { \
1344 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1345 { \
1346 uint32_t fEfl = paTests[iTest].fEflIn; \
1347 a_uType uDst = paTests[iTest].uDstIn; \
1348 pfn(&uDst, paTests[iTest].uSrcIn, &fEfl); \
1349 if ( uDst != paTests[iTest].uDstOut \
1350 || fEfl != paTests[iTest].fEflOut) \
1351 RTTestFailed(g_hTest, "#%u%s: efl=%#08x dst=" a_Fmt " src=" a_Fmt " -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s - %s\n", \
1352 iTest, !iVar ? "" : "/n", paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uSrcIn, \
1353 fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
1354 EFlagsDiff(fEfl, paTests[iTest].fEflOut), \
1355 uDst == paTests[iTest].uDstOut ? "eflags" : fEfl == paTests[iTest].fEflOut ? "dst" : "both"); \
1356 else \
1357 { \
1358 *g_pu ## a_cBits = paTests[iTest].uDstIn; \
1359 *g_pfEfl = paTests[iTest].fEflIn; \
1360 pfn(g_pu ## a_cBits, paTests[iTest].uSrcIn, g_pfEfl); \
1361 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
1362 RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
1363 } \
1364 } \
1365 pfn = a_aSubTests[iFn].pfnNative; \
1366 } \
1367 } \
1368}
1369
1370
1371/*
1372 * 8-bit binary operations.
1373 */
1374static const BINU8_T g_aBinU8[] =
1375{
1376 ENTRY(add_u8),
1377 ENTRY(add_u8_locked),
1378 ENTRY(adc_u8),
1379 ENTRY(adc_u8_locked),
1380 ENTRY(sub_u8),
1381 ENTRY(sub_u8_locked),
1382 ENTRY(sbb_u8),
1383 ENTRY(sbb_u8_locked),
1384 ENTRY(or_u8),
1385 ENTRY(or_u8_locked),
1386 ENTRY(xor_u8),
1387 ENTRY(xor_u8_locked),
1388 ENTRY(and_u8),
1389 ENTRY(and_u8_locked),
1390 ENTRY(cmp_u8),
1391 ENTRY(test_u8),
1392};
1393TEST_BINARY_OPS(8, uint8_t, "%#04x", BINU8_TEST_T, g_aBinU8)
1394
1395
1396/*
1397 * 16-bit binary operations.
1398 */
1399static const BINU16_T g_aBinU16[] =
1400{
1401 ENTRY(add_u16),
1402 ENTRY(add_u16_locked),
1403 ENTRY(adc_u16),
1404 ENTRY(adc_u16_locked),
1405 ENTRY(sub_u16),
1406 ENTRY(sub_u16_locked),
1407 ENTRY(sbb_u16),
1408 ENTRY(sbb_u16_locked),
1409 ENTRY(or_u16),
1410 ENTRY(or_u16_locked),
1411 ENTRY(xor_u16),
1412 ENTRY(xor_u16_locked),
1413 ENTRY(and_u16),
1414 ENTRY(and_u16_locked),
1415 ENTRY(cmp_u16),
1416 ENTRY(test_u16),
1417 ENTRY_EX(bt_u16, 1),
1418 ENTRY_EX(btc_u16, 1),
1419 ENTRY_EX(btc_u16_locked, 1),
1420 ENTRY_EX(btr_u16, 1),
1421 ENTRY_EX(btr_u16_locked, 1),
1422 ENTRY_EX(bts_u16, 1),
1423 ENTRY_EX(bts_u16_locked, 1),
1424 ENTRY_AMD( bsf_u16, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1425 ENTRY_INTEL(bsf_u16, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1426 ENTRY_AMD( bsr_u16, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1427 ENTRY_INTEL(bsr_u16, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1428 ENTRY_AMD( imul_two_u16, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1429 ENTRY_INTEL(imul_two_u16, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1430 ENTRY(arpl),
1431};
1432TEST_BINARY_OPS(16, uint16_t, "%#06x", BINU16_TEST_T, g_aBinU16)
1433
1434
1435/*
1436 * 32-bit binary operations.
1437 */
1438static const BINU32_T g_aBinU32[] =
1439{
1440 ENTRY(add_u32),
1441 ENTRY(add_u32_locked),
1442 ENTRY(adc_u32),
1443 ENTRY(adc_u32_locked),
1444 ENTRY(sub_u32),
1445 ENTRY(sub_u32_locked),
1446 ENTRY(sbb_u32),
1447 ENTRY(sbb_u32_locked),
1448 ENTRY(or_u32),
1449 ENTRY(or_u32_locked),
1450 ENTRY(xor_u32),
1451 ENTRY(xor_u32_locked),
1452 ENTRY(and_u32),
1453 ENTRY(and_u32_locked),
1454 ENTRY(cmp_u32),
1455 ENTRY(test_u32),
1456 ENTRY_EX(bt_u32, 1),
1457 ENTRY_EX(btc_u32, 1),
1458 ENTRY_EX(btc_u32_locked, 1),
1459 ENTRY_EX(btr_u32, 1),
1460 ENTRY_EX(btr_u32_locked, 1),
1461 ENTRY_EX(bts_u32, 1),
1462 ENTRY_EX(bts_u32_locked, 1),
1463 ENTRY_AMD( bsf_u32, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1464 ENTRY_INTEL(bsf_u32, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1465 ENTRY_AMD( bsr_u32, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1466 ENTRY_INTEL(bsr_u32, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1467 ENTRY_AMD( imul_two_u32, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1468 ENTRY_INTEL(imul_two_u32, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1469};
1470TEST_BINARY_OPS(32, uint32_t, "%#010RX32", BINU32_TEST_T, g_aBinU32)
1471
1472
1473/*
1474 * 64-bit binary operations.
1475 */
1476static const BINU64_T g_aBinU64[] =
1477{
1478 ENTRY(add_u64),
1479 ENTRY(add_u64_locked),
1480 ENTRY(adc_u64),
1481 ENTRY(adc_u64_locked),
1482 ENTRY(sub_u64),
1483 ENTRY(sub_u64_locked),
1484 ENTRY(sbb_u64),
1485 ENTRY(sbb_u64_locked),
1486 ENTRY(or_u64),
1487 ENTRY(or_u64_locked),
1488 ENTRY(xor_u64),
1489 ENTRY(xor_u64_locked),
1490 ENTRY(and_u64),
1491 ENTRY(and_u64_locked),
1492 ENTRY(cmp_u64),
1493 ENTRY(test_u64),
1494 ENTRY_EX(bt_u64, 1),
1495 ENTRY_EX(btc_u64, 1),
1496 ENTRY_EX(btc_u64_locked, 1),
1497 ENTRY_EX(btr_u64, 1),
1498 ENTRY_EX(btr_u64_locked, 1),
1499 ENTRY_EX(bts_u64, 1),
1500 ENTRY_EX(bts_u64_locked, 1),
1501 ENTRY_AMD( bsf_u64, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1502 ENTRY_INTEL(bsf_u64, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1503 ENTRY_AMD( bsr_u64, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1504 ENTRY_INTEL(bsr_u64, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1505 ENTRY_AMD( imul_two_u64, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1506 ENTRY_INTEL(imul_two_u64, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1507};
1508TEST_BINARY_OPS(64, uint64_t, "%#018RX64", BINU64_TEST_T, g_aBinU64)
1509
1510
1511/*
1512 * XCHG
1513 */
1514static void XchgTest(void)
1515{
1516 if (!SubTestAndCheckIfEnabled("xchg"))
1517 return;
1518 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU8, (uint8_t *pu8Mem, uint8_t *pu8Reg));
1519 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU16,(uint16_t *pu16Mem, uint16_t *pu16Reg));
1520 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU32,(uint32_t *pu32Mem, uint32_t *pu32Reg));
1521 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU64,(uint64_t *pu64Mem, uint64_t *pu64Reg));
1522
1523 static struct
1524 {
1525 uint8_t cb; uint64_t fMask;
1526 union
1527 {
1528 uintptr_t pfn;
1529 FNIEMAIMPLXCHGU8 *pfnU8;
1530 FNIEMAIMPLXCHGU16 *pfnU16;
1531 FNIEMAIMPLXCHGU32 *pfnU32;
1532 FNIEMAIMPLXCHGU64 *pfnU64;
1533 } u;
1534 }
1535 s_aXchgWorkers[] =
1536 {
1537 { 1, UINT8_MAX, { (uintptr_t)iemAImpl_xchg_u8_locked } },
1538 { 2, UINT16_MAX, { (uintptr_t)iemAImpl_xchg_u16_locked } },
1539 { 4, UINT32_MAX, { (uintptr_t)iemAImpl_xchg_u32_locked } },
1540 { 8, UINT64_MAX, { (uintptr_t)iemAImpl_xchg_u64_locked } },
1541 { 1, UINT8_MAX, { (uintptr_t)iemAImpl_xchg_u8_unlocked } },
1542 { 2, UINT16_MAX, { (uintptr_t)iemAImpl_xchg_u16_unlocked } },
1543 { 4, UINT32_MAX, { (uintptr_t)iemAImpl_xchg_u32_unlocked } },
1544 { 8, UINT64_MAX, { (uintptr_t)iemAImpl_xchg_u64_unlocked } },
1545 };
1546 for (size_t i = 0; i < RT_ELEMENTS(s_aXchgWorkers); i++)
1547 {
1548 RTUINT64U uIn1, uIn2, uMem, uDst;
1549 uMem.u = uIn1.u = RTRandU64Ex(0, s_aXchgWorkers[i].fMask);
1550 uDst.u = uIn2.u = RTRandU64Ex(0, s_aXchgWorkers[i].fMask);
1551 if (uIn1.u == uIn2.u)
1552 uDst.u = uIn2.u = ~uIn2.u;
1553
1554 switch (s_aXchgWorkers[i].cb)
1555 {
1556 case 1:
1557 s_aXchgWorkers[i].u.pfnU8(g_pu8, g_pu8Two);
1558 s_aXchgWorkers[i].u.pfnU8(&uMem.au8[0], &uDst.au8[0]);
1559 break;
1560 case 2:
1561 s_aXchgWorkers[i].u.pfnU16(g_pu16, g_pu16Two);
1562 s_aXchgWorkers[i].u.pfnU16(&uMem.Words.w0, &uDst.Words.w0);
1563 break;
1564 case 4:
1565 s_aXchgWorkers[i].u.pfnU32(g_pu32, g_pu32Two);
1566 s_aXchgWorkers[i].u.pfnU32(&uMem.DWords.dw0, &uDst.DWords.dw0);
1567 break;
1568 case 8:
1569 s_aXchgWorkers[i].u.pfnU64(g_pu64, g_pu64Two);
1570 s_aXchgWorkers[i].u.pfnU64(&uMem.u, &uDst.u);
1571 break;
1572 default: RTTestFailed(g_hTest, "%d\n", s_aXchgWorkers[i].cb); break;
1573 }
1574
1575 if (uMem.u != uIn2.u || uDst.u != uIn1.u)
1576 RTTestFailed(g_hTest, "i=%u: %#RX64, %#RX64 -> %#RX64, %#RX64\n", i, uIn1.u, uIn2.u, uMem.u, uDst.u);
1577 }
1578}
1579
1580
1581/*
1582 * XADD
1583 */
1584static void XaddTest(void)
1585{
1586#define TEST_XADD(a_cBits, a_Type, a_Fmt) do { \
1587 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXADDU ## a_cBits, (a_Type *, a_Type *, uint32_t *)); \
1588 static struct \
1589 { \
1590 const char *pszName; \
1591 FNIEMAIMPLXADDU ## a_cBits *pfn; \
1592 BINU ## a_cBits ## _TEST_T const *paTests; \
1593 uint32_t const *pcTests; \
1594 } const s_aFuncs[] = \
1595 { \
1596 { "xadd_u" # a_cBits, iemAImpl_xadd_u ## a_cBits, \
1597 g_aTests_add_u ## a_cBits, &g_cTests_add_u ## a_cBits }, \
1598 { "xadd_u" # a_cBits "8_locked", iemAImpl_xadd_u ## a_cBits ## _locked, \
1599 g_aTests_add_u ## a_cBits, &g_cTests_add_u ## a_cBits }, \
1600 }; \
1601 for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++) \
1602 { \
1603 if (!SubTestAndCheckIfEnabled(s_aFuncs[iFn].pszName)) continue; \
1604 uint32_t const cTests = *s_aFuncs[iFn].pcTests; \
1605 BINU ## a_cBits ## _TEST_T const * const paTests = s_aFuncs[iFn].paTests; \
1606 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1607 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
1608 { \
1609 uint32_t fEfl = paTests[iTest].fEflIn; \
1610 a_Type uSrc = paTests[iTest].uSrcIn; \
1611 *g_pu ## a_cBits = paTests[iTest].uDstIn; \
1612 s_aFuncs[iFn].pfn(g_pu ## a_cBits, &uSrc, &fEfl); \
1613 if ( fEfl != paTests[iTest].fEflOut \
1614 || *g_pu ## a_cBits != paTests[iTest].uDstOut \
1615 || uSrc != paTests[iTest].uDstIn) \
1616 RTTestFailed(g_hTest, "%s/#%u: efl=%#08x dst=" a_Fmt " src=" a_Fmt " -> efl=%#08x dst=" a_Fmt " src=" a_Fmt ", expected %#08x, " a_Fmt ", " a_Fmt "%s\n", \
1617 s_aFuncs[iFn].pszName, iTest, paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uSrcIn, \
1618 fEfl, *g_pu ## a_cBits, uSrc, paTests[iTest].fEflOut, paTests[iTest].uDstOut, paTests[iTest].uDstIn, \
1619 EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
1620 } \
1621 } \
1622 } while(0)
1623 TEST_XADD(8, uint8_t, "%#04x");
1624 TEST_XADD(16, uint16_t, "%#06x");
1625 TEST_XADD(32, uint32_t, "%#010RX32");
1626 TEST_XADD(64, uint64_t, "%#010RX64");
1627}
1628
1629
1630/*
1631 * CMPXCHG
1632 */
1633
1634static void CmpXchgTest(void)
1635{
1636#define TEST_CMPXCHG(a_cBits, a_Type, a_Fmt) do {\
1637 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLCMPXCHGU ## a_cBits, (a_Type *, a_Type *, a_Type, uint32_t *)); \
1638 static struct \
1639 { \
1640 const char *pszName; \
1641 FNIEMAIMPLCMPXCHGU ## a_cBits *pfn; \
1642 PFNIEMAIMPLBINU ## a_cBits pfnSub; \
1643 BINU ## a_cBits ## _TEST_T const *paTests; \
1644 uint32_t const *pcTests; \
1645 } const s_aFuncs[] = \
1646 { \
1647 { "cmpxchg_u" # a_cBits, iemAImpl_cmpxchg_u ## a_cBits, iemAImpl_sub_u ## a_cBits, \
1648 g_aTests_cmp_u ## a_cBits, &g_cTests_cmp_u ## a_cBits }, \
1649 { "cmpxchg_u" # a_cBits "_locked", iemAImpl_cmpxchg_u ## a_cBits ## _locked, iemAImpl_sub_u ## a_cBits, \
1650 g_aTests_cmp_u ## a_cBits, &g_cTests_cmp_u ## a_cBits }, \
1651 }; \
1652 for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++) \
1653 { \
1654 if (!SubTestAndCheckIfEnabled(s_aFuncs[iFn].pszName)) continue; \
1655 BINU ## a_cBits ## _TEST_T const * const paTests = s_aFuncs[iFn].paTests; \
1656 uint32_t const cTests = *s_aFuncs[iFn].pcTests; \
1657 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1658 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
1659 { \
1660 /* as is (99% likely to be negative). */ \
1661 uint32_t fEfl = paTests[iTest].fEflIn; \
1662 a_Type const uNew = paTests[iTest].uSrcIn + 0x42; \
1663 a_Type uA = paTests[iTest].uDstIn; \
1664 *g_pu ## a_cBits = paTests[iTest].uSrcIn; \
1665 a_Type const uExpect = uA != paTests[iTest].uSrcIn ? paTests[iTest].uSrcIn : uNew; \
1666 s_aFuncs[iFn].pfn(g_pu ## a_cBits, &uA, uNew, &fEfl); \
1667 if ( fEfl != paTests[iTest].fEflOut \
1668 || *g_pu ## a_cBits != uExpect \
1669 || uA != paTests[iTest].uSrcIn) \
1670 RTTestFailed(g_hTest, "%s/#%ua: efl=%#08x dst=" a_Fmt " cmp=" a_Fmt " new=" a_Fmt " -> efl=%#08x dst=" a_Fmt " old=" a_Fmt ", expected %#08x, " a_Fmt ", " a_Fmt "%s\n", \
1671 s_aFuncs[iFn].pszName, iTest, paTests[iTest].fEflIn, paTests[iTest].uSrcIn, paTests[iTest].uDstIn, \
1672 uNew, fEfl, *g_pu ## a_cBits, uA, paTests[iTest].fEflOut, uExpect, paTests[iTest].uSrcIn, \
1673 EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
1674 /* positive */ \
1675 uint32_t fEflExpect = paTests[iTest].fEflIn; \
1676 uA = paTests[iTest].uDstIn; \
1677 s_aFuncs[iFn].pfnSub(&uA, uA, &fEflExpect); \
1678 fEfl = paTests[iTest].fEflIn; \
1679 uA = paTests[iTest].uDstIn; \
1680 *g_pu ## a_cBits = uA; \
1681 s_aFuncs[iFn].pfn(g_pu ## a_cBits, &uA, uNew, &fEfl); \
1682 if ( fEfl != fEflExpect \
1683 || *g_pu ## a_cBits != uNew \
1684 || uA != paTests[iTest].uDstIn) \
1685 RTTestFailed(g_hTest, "%s/#%ua: efl=%#08x dst=" a_Fmt " cmp=" a_Fmt " new=" a_Fmt " -> efl=%#08x dst=" a_Fmt " old=" a_Fmt ", expected %#08x, " a_Fmt ", " a_Fmt "%s\n", \
1686 s_aFuncs[iFn].pszName, iTest, paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uDstIn, \
1687 uNew, fEfl, *g_pu ## a_cBits, uA, fEflExpect, uNew, paTests[iTest].uDstIn, \
1688 EFlagsDiff(fEfl, fEflExpect)); \
1689 } \
1690 } \
1691 } while(0)
1692 TEST_CMPXCHG(8, uint8_t, "%#04RX8");
1693 TEST_CMPXCHG(16, uint16_t, "%#06x");
1694 TEST_CMPXCHG(32, uint32_t, "%#010RX32");
1695#if ARCH_BITS != 32 /* calling convension issue, skipping as it's an unsupported host */
1696 TEST_CMPXCHG(64, uint64_t, "%#010RX64");
1697#endif
1698}
1699
1700static void CmpXchg8bTest(void)
1701{
1702 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLCMPXCHG8B,(uint64_t *, PRTUINT64U, PRTUINT64U, uint32_t *));
1703 static struct
1704 {
1705 const char *pszName;
1706 FNIEMAIMPLCMPXCHG8B *pfn;
1707 } const s_aFuncs[] =
1708 {
1709 { "cmpxchg8b", iemAImpl_cmpxchg8b },
1710 { "cmpxchg8b_locked", iemAImpl_cmpxchg8b_locked },
1711 };
1712 for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++)
1713 {
1714 if (!SubTestAndCheckIfEnabled(s_aFuncs[iFn].pszName))
1715 continue;
1716 for (uint32_t iTest = 0; iTest < 4; iTest += 2)
1717 {
1718 uint64_t const uOldValue = RandU64();
1719 uint64_t const uNewValue = RandU64();
1720
1721 /* positive test. */
1722 RTUINT64U uA, uB;
1723 uB.u = uNewValue;
1724 uA.u = uOldValue;
1725 *g_pu64 = uOldValue;
1726 uint32_t fEflIn = RandEFlags();
1727 uint32_t fEfl = fEflIn;
1728 s_aFuncs[iFn].pfn(g_pu64, &uA, &uB, &fEfl);
1729 if ( fEfl != (fEflIn | X86_EFL_ZF)
1730 || *g_pu64 != uNewValue
1731 || uA.u != uOldValue)
1732 RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64 cmp=%#018RX64 new=%#018RX64\n -> efl=%#08x dst=%#018RX64 old=%#018RX64,\n wanted %#08x, %#018RX64, %#018RX64%s\n",
1733 iTest, fEflIn, uOldValue, uOldValue, uNewValue,
1734 fEfl, *g_pu64, uA.u,
1735 (fEflIn | X86_EFL_ZF), uNewValue, uOldValue, EFlagsDiff(fEfl, fEflIn | X86_EFL_ZF));
1736 RTTEST_CHECK(g_hTest, uB.u == uNewValue);
1737
1738 /* negative */
1739 uint64_t const uExpect = ~uOldValue;
1740 *g_pu64 = uExpect;
1741 uA.u = uOldValue;
1742 uB.u = uNewValue;
1743 fEfl = fEflIn = RandEFlags();
1744 s_aFuncs[iFn].pfn(g_pu64, &uA, &uB, &fEfl);
1745 if ( fEfl != (fEflIn & ~X86_EFL_ZF)
1746 || *g_pu64 != uExpect
1747 || uA.u != uExpect)
1748 RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64 cmp=%#018RX64 new=%#018RX64\n -> efl=%#08x dst=%#018RX64 old=%#018RX64,\n wanted %#08x, %#018RX64, %#018RX64%s\n",
1749 iTest + 1, fEflIn, uExpect, uOldValue, uNewValue,
1750 fEfl, *g_pu64, uA.u,
1751 (fEflIn & ~X86_EFL_ZF), uExpect, uExpect, EFlagsDiff(fEfl, fEflIn & ~X86_EFL_ZF));
1752 RTTEST_CHECK(g_hTest, uB.u == uNewValue);
1753 }
1754 }
1755}
1756
1757static void CmpXchg16bTest(void)
1758{
1759 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLCMPXCHG16B,(PRTUINT128U, PRTUINT128U, PRTUINT128U, uint32_t *));
1760 static struct
1761 {
1762 const char *pszName;
1763 FNIEMAIMPLCMPXCHG16B *pfn;
1764 } const s_aFuncs[] =
1765 {
1766 { "cmpxchg16b", iemAImpl_cmpxchg16b },
1767 { "cmpxchg16b_locked", iemAImpl_cmpxchg16b_locked },
1768#if !defined(RT_ARCH_ARM64)
1769 { "cmpxchg16b_fallback", iemAImpl_cmpxchg16b_fallback },
1770#endif
1771 };
1772 for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++)
1773 {
1774 if (!SubTestAndCheckIfEnabled(s_aFuncs[iFn].pszName))
1775 continue;
1776#if !defined(IEM_WITHOUT_ASSEMBLY) && defined(RT_ARCH_AMD64)
1777 if (!(ASMCpuId_ECX(1) & X86_CPUID_FEATURE_ECX_CX16))
1778 {
1779 RTTestSkipped(g_hTest, "no hardware cmpxchg16b");
1780 continue;
1781 }
1782#endif
1783 for (uint32_t iTest = 0; iTest < 4; iTest += 2)
1784 {
1785 RTUINT128U const uOldValue = RandU128();
1786 RTUINT128U const uNewValue = RandU128();
1787
1788 /* positive test. */
1789 RTUINT128U uA, uB;
1790 uB = uNewValue;
1791 uA = uOldValue;
1792 *g_pu128 = uOldValue;
1793 uint32_t fEflIn = RandEFlags();
1794 uint32_t fEfl = fEflIn;
1795 s_aFuncs[iFn].pfn(g_pu128, &uA, &uB, &fEfl);
1796 if ( fEfl != (fEflIn | X86_EFL_ZF)
1797 || g_pu128->s.Lo != uNewValue.s.Lo
1798 || g_pu128->s.Hi != uNewValue.s.Hi
1799 || uA.s.Lo != uOldValue.s.Lo
1800 || uA.s.Hi != uOldValue.s.Hi)
1801 RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64'%016RX64 cmp=%#018RX64'%016RX64 new=%#018RX64'%016RX64\n"
1802 " -> efl=%#08x dst=%#018RX64'%016RX64 old=%#018RX64'%016RX64,\n"
1803 " wanted %#08x, %#018RX64'%016RX64, %#018RX64'%016RX64%s\n",
1804 iTest, fEflIn, uOldValue.s.Hi, uOldValue.s.Lo, uOldValue.s.Hi, uOldValue.s.Lo, uNewValue.s.Hi, uNewValue.s.Lo,
1805 fEfl, g_pu128->s.Hi, g_pu128->s.Lo, uA.s.Hi, uA.s.Lo,
1806 (fEflIn | X86_EFL_ZF), uNewValue.s.Hi, uNewValue.s.Lo, uOldValue.s.Hi, uOldValue.s.Lo,
1807 EFlagsDiff(fEfl, fEflIn | X86_EFL_ZF));
1808 RTTEST_CHECK(g_hTest, uB.s.Lo == uNewValue.s.Lo && uB.s.Hi == uNewValue.s.Hi);
1809
1810 /* negative */
1811 RTUINT128U const uExpect = RTUINT128_INIT(~uOldValue.s.Hi, ~uOldValue.s.Lo);
1812 *g_pu128 = uExpect;
1813 uA = uOldValue;
1814 uB = uNewValue;
1815 fEfl = fEflIn = RandEFlags();
1816 s_aFuncs[iFn].pfn(g_pu128, &uA, &uB, &fEfl);
1817 if ( fEfl != (fEflIn & ~X86_EFL_ZF)
1818 || g_pu128->s.Lo != uExpect.s.Lo
1819 || g_pu128->s.Hi != uExpect.s.Hi
1820 || uA.s.Lo != uExpect.s.Lo
1821 || uA.s.Hi != uExpect.s.Hi)
1822 RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64'%016RX64 cmp=%#018RX64'%016RX64 new=%#018RX64'%016RX64\n"
1823 " -> efl=%#08x dst=%#018RX64'%016RX64 old=%#018RX64'%016RX64,\n"
1824 " wanted %#08x, %#018RX64'%016RX64, %#018RX64'%016RX64%s\n",
1825 iTest + 1, fEflIn, uExpect.s.Hi, uExpect.s.Lo, uOldValue.s.Hi, uOldValue.s.Lo, uNewValue.s.Hi, uNewValue.s.Lo,
1826 fEfl, g_pu128->s.Hi, g_pu128->s.Lo, uA.s.Hi, uA.s.Lo,
1827 (fEflIn & ~X86_EFL_ZF), uExpect.s.Hi, uExpect.s.Lo, uExpect.s.Hi, uExpect.s.Lo,
1828 EFlagsDiff(fEfl, fEflIn & ~X86_EFL_ZF));
1829 RTTEST_CHECK(g_hTest, uB.s.Lo == uNewValue.s.Lo && uB.s.Hi == uNewValue.s.Hi);
1830 }
1831 }
1832}
1833
1834
1835/*
1836 * Double shifts.
1837 *
1838 * Note! We use BINUxx_TEST_T with the shift value in the uMisc field.
1839 */
1840#ifdef TSTIEMAIMPL_WITH_GENERATOR
1841# define GEN_SHIFT_DBL(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
1842void ShiftDblU ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
1843{ \
1844 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
1845 { \
1846 if ( a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE \
1847 && a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
1848 continue; \
1849 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
1850 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1851 { \
1852 a_TestType Test; \
1853 Test.fEflIn = RandEFlags(); \
1854 Test.fEflOut = Test.fEflIn; \
1855 Test.uDstIn = RandU ## a_cBits ## Dst(iTest); \
1856 Test.uDstOut = Test.uDstIn; \
1857 Test.uSrcIn = RandU ## a_cBits ## Src(iTest); \
1858 Test.uMisc = RandU8() & (a_cBits * 4 - 1); /* need to go way beyond the a_cBits limit */ \
1859 a_aSubTests[iFn].pfnNative(&Test.uDstOut, Test.uSrcIn, Test.uMisc, &Test.fEflOut); \
1860 RTStrmPrintf(pOut, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", " a_Fmt ", %2u }, /* #%u */\n", \
1861 Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uSrcIn, Test.uMisc, iTest); \
1862 } \
1863 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
1864 } \
1865}
1866#else
1867# define GEN_SHIFT_DBL(a_cBits, a_Fmt, a_TestType, a_aSubTests)
1868#endif
1869
1870#define TEST_SHIFT_DBL(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType, a_aSubTests) \
1871TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLSHIFTDBLU ## a_cBits); \
1872\
1873static a_SubTestType const a_aSubTests[] = \
1874{ \
1875 ENTRY_AMD(shld_u ## a_cBits, X86_EFL_OF | X86_EFL_CF), \
1876 ENTRY_INTEL(shld_u ## a_cBits, X86_EFL_OF | X86_EFL_CF), \
1877 ENTRY_AMD(shrd_u ## a_cBits, X86_EFL_OF | X86_EFL_CF), \
1878 ENTRY_INTEL(shrd_u ## a_cBits, X86_EFL_OF | X86_EFL_CF), \
1879}; \
1880\
1881GEN_SHIFT_DBL(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
1882\
1883static void ShiftDblU ## a_cBits ## Test(void) \
1884{ \
1885 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
1886 { \
1887 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
1888 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
1889 PFNIEMAIMPLSHIFTDBLU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
1890 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
1891 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
1892 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1893 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
1894 { \
1895 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1896 { \
1897 uint32_t fEfl = paTests[iTest].fEflIn; \
1898 a_Type uDst = paTests[iTest].uDstIn; \
1899 pfn(&uDst, paTests[iTest].uSrcIn, paTests[iTest].uMisc, &fEfl); \
1900 if ( uDst != paTests[iTest].uDstOut \
1901 || fEfl != paTests[iTest].fEflOut) \
1902 RTTestFailed(g_hTest, "#%03u%s: efl=%#08x dst=" a_Fmt " src=" a_Fmt " shift=%-2u -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s%s\n", \
1903 iTest, iVar == 0 ? "" : "/n", paTests[iTest].fEflIn, \
1904 paTests[iTest].uDstIn, paTests[iTest].uSrcIn, (unsigned)paTests[iTest].uMisc, \
1905 fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
1906 EFlagsDiff(fEfl, paTests[iTest].fEflOut), uDst == paTests[iTest].uDstOut ? "" : " dst!"); \
1907 else \
1908 { \
1909 *g_pu ## a_cBits = paTests[iTest].uDstIn; \
1910 *g_pfEfl = paTests[iTest].fEflIn; \
1911 pfn(g_pu ## a_cBits, paTests[iTest].uSrcIn, paTests[iTest].uMisc, g_pfEfl); \
1912 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
1913 RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
1914 } \
1915 } \
1916 pfn = a_aSubTests[iFn].pfnNative; \
1917 } \
1918 } \
1919}
1920TEST_SHIFT_DBL(16, uint16_t, "%#06RX16", BINU16_TEST_T, SHIFT_DBL_U16_T, g_aShiftDblU16)
1921TEST_SHIFT_DBL(32, uint32_t, "%#010RX32", BINU32_TEST_T, SHIFT_DBL_U32_T, g_aShiftDblU32)
1922TEST_SHIFT_DBL(64, uint64_t, "%#018RX64", BINU64_TEST_T, SHIFT_DBL_U64_T, g_aShiftDblU64)
1923
1924#ifdef TSTIEMAIMPL_WITH_GENERATOR
1925static void ShiftDblGenerate(PRTSTREAM pOut, uint32_t cTests)
1926{
1927 ShiftDblU16Generate(pOut, cTests);
1928 ShiftDblU32Generate(pOut, cTests);
1929 ShiftDblU64Generate(pOut, cTests);
1930}
1931#endif
1932
1933static void ShiftDblTest(void)
1934{
1935 ShiftDblU16Test();
1936 ShiftDblU32Test();
1937 ShiftDblU64Test();
1938}
1939
1940
1941/*
1942 * Unary operators.
1943 *
1944 * Note! We use BINUxx_TEST_T ignoreing uSrcIn and uMisc.
1945 */
1946#ifdef TSTIEMAIMPL_WITH_GENERATOR
1947# define GEN_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType) \
1948void UnaryU ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
1949{ \
1950 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aUnaryU ## a_cBits); iFn++) \
1951 { \
1952 GenerateArrayStart(pOut, g_aUnaryU ## a_cBits[iFn].pszName, #a_TestType); \
1953 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1954 { \
1955 a_TestType Test; \
1956 Test.fEflIn = RandEFlags(); \
1957 Test.fEflOut = Test.fEflIn; \
1958 Test.uDstIn = RandU ## a_cBits(); \
1959 Test.uDstOut = Test.uDstIn; \
1960 Test.uSrcIn = 0; \
1961 Test.uMisc = 0; \
1962 g_aUnaryU ## a_cBits[iFn].pfn(&Test.uDstOut, &Test.fEflOut); \
1963 RTStrmPrintf(pOut, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", 0, 0 }, /* #%u */\n", \
1964 Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, iTest); \
1965 } \
1966 GenerateArrayEnd(pOut, g_aUnaryU ## a_cBits[iFn].pszName); \
1967 } \
1968}
1969#else
1970# define GEN_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType)
1971#endif
1972
1973#define TEST_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType) \
1974TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLUNARYU ## a_cBits); \
1975static a_SubTestType const g_aUnaryU ## a_cBits [] = \
1976{ \
1977 ENTRY(inc_u ## a_cBits), \
1978 ENTRY(inc_u ## a_cBits ## _locked), \
1979 ENTRY(dec_u ## a_cBits), \
1980 ENTRY(dec_u ## a_cBits ## _locked), \
1981 ENTRY(not_u ## a_cBits), \
1982 ENTRY(not_u ## a_cBits ## _locked), \
1983 ENTRY(neg_u ## a_cBits), \
1984 ENTRY(neg_u ## a_cBits ## _locked), \
1985}; \
1986\
1987GEN_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType) \
1988\
1989static void UnaryU ## a_cBits ## Test(void) \
1990{ \
1991 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aUnaryU ## a_cBits); iFn++) \
1992 { \
1993 if (!SubTestAndCheckIfEnabled(g_aUnaryU ## a_cBits[iFn].pszName)) continue; \
1994 a_TestType const * const paTests = g_aUnaryU ## a_cBits[iFn].paTests; \
1995 uint32_t const cTests = *g_aUnaryU ## a_cBits[iFn].pcTests; \
1996 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1997 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1998 { \
1999 uint32_t fEfl = paTests[iTest].fEflIn; \
2000 a_Type uDst = paTests[iTest].uDstIn; \
2001 g_aUnaryU ## a_cBits[iFn].pfn(&uDst, &fEfl); \
2002 if ( uDst != paTests[iTest].uDstOut \
2003 || fEfl != paTests[iTest].fEflOut) \
2004 RTTestFailed(g_hTest, "#%u: efl=%#08x dst=" a_Fmt " -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s\n", \
2005 iTest, paTests[iTest].fEflIn, paTests[iTest].uDstIn, \
2006 fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
2007 EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
2008 else \
2009 { \
2010 *g_pu ## a_cBits = paTests[iTest].uDstIn; \
2011 *g_pfEfl = paTests[iTest].fEflIn; \
2012 g_aUnaryU ## a_cBits[iFn].pfn(g_pu ## a_cBits, g_pfEfl); \
2013 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
2014 RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
2015 } \
2016 } \
2017 } \
2018}
2019TEST_UNARY(8, uint8_t, "%#04RX8", BINU8_TEST_T, INT_UNARY_U8_T)
2020TEST_UNARY(16, uint16_t, "%#06RX16", BINU16_TEST_T, INT_UNARY_U16_T)
2021TEST_UNARY(32, uint32_t, "%#010RX32", BINU32_TEST_T, INT_UNARY_U32_T)
2022TEST_UNARY(64, uint64_t, "%#018RX64", BINU64_TEST_T, INT_UNARY_U64_T)
2023
2024#ifdef TSTIEMAIMPL_WITH_GENERATOR
2025static void UnaryGenerate(PRTSTREAM pOut, uint32_t cTests)
2026{
2027 UnaryU8Generate(pOut, cTests);
2028 UnaryU16Generate(pOut, cTests);
2029 UnaryU32Generate(pOut, cTests);
2030 UnaryU64Generate(pOut, cTests);
2031}
2032#endif
2033
2034static void UnaryTest(void)
2035{
2036 UnaryU8Test();
2037 UnaryU16Test();
2038 UnaryU32Test();
2039 UnaryU64Test();
2040}
2041
2042
2043/*
2044 * Shifts.
2045 *
2046 * Note! We use BINUxx_TEST_T with the shift count in uMisc and uSrcIn unused.
2047 */
2048#ifdef TSTIEMAIMPL_WITH_GENERATOR
2049# define GEN_SHIFT(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
2050void ShiftU ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
2051{ \
2052 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2053 { \
2054 if ( a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE \
2055 && a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
2056 continue; \
2057 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
2058 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2059 { \
2060 a_TestType Test; \
2061 Test.fEflIn = RandEFlags(); \
2062 Test.fEflOut = Test.fEflIn; \
2063 Test.uDstIn = RandU ## a_cBits ## Dst(iTest); \
2064 Test.uDstOut = Test.uDstIn; \
2065 Test.uSrcIn = 0; \
2066 Test.uMisc = RandU8() & (a_cBits * 4 - 1); /* need to go way beyond the a_cBits limit */ \
2067 a_aSubTests[iFn].pfnNative(&Test.uDstOut, Test.uMisc, &Test.fEflOut); \
2068 RTStrmPrintf(pOut, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", 0, %-2u }, /* #%u */\n", \
2069 Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uMisc, iTest); \
2070 \
2071 Test.fEflIn = (~Test.fEflIn & X86_EFL_LIVE_MASK) | X86_EFL_RA1_MASK; \
2072 Test.fEflOut = Test.fEflIn; \
2073 Test.uDstOut = Test.uDstIn; \
2074 a_aSubTests[iFn].pfnNative(&Test.uDstOut, Test.uMisc, &Test.fEflOut); \
2075 RTStrmPrintf(pOut, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", 0, %-2u }, /* #%u b */\n", \
2076 Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uMisc, iTest); \
2077 } \
2078 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
2079 } \
2080}
2081#else
2082# define GEN_SHIFT(a_cBits, a_Fmt, a_TestType, a_aSubTests)
2083#endif
2084
2085#define TEST_SHIFT(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType, a_aSubTests) \
2086TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLSHIFTU ## a_cBits); \
2087static a_SubTestType const a_aSubTests[] = \
2088{ \
2089 ENTRY_AMD( rol_u ## a_cBits, X86_EFL_OF), \
2090 ENTRY_INTEL(rol_u ## a_cBits, X86_EFL_OF), \
2091 ENTRY_AMD( ror_u ## a_cBits, X86_EFL_OF), \
2092 ENTRY_INTEL(ror_u ## a_cBits, X86_EFL_OF), \
2093 ENTRY_AMD( rcl_u ## a_cBits, X86_EFL_OF), \
2094 ENTRY_INTEL(rcl_u ## a_cBits, X86_EFL_OF), \
2095 ENTRY_AMD( rcr_u ## a_cBits, X86_EFL_OF), \
2096 ENTRY_INTEL(rcr_u ## a_cBits, X86_EFL_OF), \
2097 ENTRY_AMD( shl_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
2098 ENTRY_INTEL(shl_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
2099 ENTRY_AMD( shr_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
2100 ENTRY_INTEL(shr_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
2101 ENTRY_AMD( sar_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
2102 ENTRY_INTEL(sar_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
2103}; \
2104\
2105GEN_SHIFT(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
2106\
2107static void ShiftU ## a_cBits ## Test(void) \
2108{ \
2109 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2110 { \
2111 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
2112 PFNIEMAIMPLSHIFTU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2113 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2114 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2115 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2116 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2117 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2118 { \
2119 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2120 { \
2121 uint32_t fEfl = paTests[iTest].fEflIn; \
2122 a_Type uDst = paTests[iTest].uDstIn; \
2123 pfn(&uDst, paTests[iTest].uMisc, &fEfl); \
2124 if ( uDst != paTests[iTest].uDstOut \
2125 || fEfl != paTests[iTest].fEflOut ) \
2126 RTTestFailed(g_hTest, "#%u%s: efl=%#08x dst=" a_Fmt " shift=%2u -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s\n", \
2127 iTest, iVar == 0 ? "" : "/n", \
2128 paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uMisc, \
2129 fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
2130 EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
2131 else \
2132 { \
2133 *g_pu ## a_cBits = paTests[iTest].uDstIn; \
2134 *g_pfEfl = paTests[iTest].fEflIn; \
2135 pfn(g_pu ## a_cBits, paTests[iTest].uMisc, g_pfEfl); \
2136 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
2137 RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
2138 } \
2139 } \
2140 pfn = a_aSubTests[iFn].pfnNative; \
2141 } \
2142 } \
2143}
2144TEST_SHIFT(8, uint8_t, "%#04RX8", BINU8_TEST_T, INT_BINARY_U8_T, g_aShiftU8)
2145TEST_SHIFT(16, uint16_t, "%#06RX16", BINU16_TEST_T, INT_BINARY_U16_T, g_aShiftU16)
2146TEST_SHIFT(32, uint32_t, "%#010RX32", BINU32_TEST_T, INT_BINARY_U32_T, g_aShiftU32)
2147TEST_SHIFT(64, uint64_t, "%#018RX64", BINU64_TEST_T, INT_BINARY_U64_T, g_aShiftU64)
2148
2149#ifdef TSTIEMAIMPL_WITH_GENERATOR
2150static void ShiftGenerate(PRTSTREAM pOut, uint32_t cTests)
2151{
2152 ShiftU8Generate(pOut, cTests);
2153 ShiftU16Generate(pOut, cTests);
2154 ShiftU32Generate(pOut, cTests);
2155 ShiftU64Generate(pOut, cTests);
2156}
2157#endif
2158
2159static void ShiftTest(void)
2160{
2161 ShiftU8Test();
2162 ShiftU16Test();
2163 ShiftU32Test();
2164 ShiftU64Test();
2165}
2166
2167
2168/*
2169 * Multiplication and division.
2170 *
2171 * Note! The 8-bit functions has a different format, so we need to duplicate things.
2172 * Note! Currently ignoring undefined bits.
2173 */
2174
2175/* U8 */
2176TYPEDEF_SUBTEST_TYPE(INT_MULDIV_U8_T, MULDIVU8_TEST_T, PFNIEMAIMPLMULDIVU8);
2177static INT_MULDIV_U8_T const g_aMulDivU8[] =
2178{
2179 ENTRY_AMD_EX(mul_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF,
2180 X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF),
2181 ENTRY_INTEL_EX(mul_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0),
2182 ENTRY_AMD_EX(imul_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF,
2183 X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF),
2184 ENTRY_INTEL_EX(imul_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0),
2185 ENTRY_AMD_EX(div_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0),
2186 ENTRY_INTEL_EX(div_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0),
2187 ENTRY_AMD_EX(idiv_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0),
2188 ENTRY_INTEL_EX(idiv_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0),
2189};
2190
2191#ifdef TSTIEMAIMPL_WITH_GENERATOR
2192static void MulDivU8Generate(PRTSTREAM pOut, uint32_t cTests)
2193{
2194 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aMulDivU8); iFn++)
2195 {
2196 if ( g_aMulDivU8[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE
2197 && g_aMulDivU8[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
2198 continue;
2199 GenerateArrayStart(pOut, g_aMulDivU8[iFn].pszName, "MULDIVU8_TEST_T"); \
2200 for (uint32_t iTest = 0; iTest < cTests; iTest++ )
2201 {
2202 MULDIVU8_TEST_T Test;
2203 Test.fEflIn = RandEFlags();
2204 Test.fEflOut = Test.fEflIn;
2205 Test.uDstIn = RandU16Dst(iTest);
2206 Test.uDstOut = Test.uDstIn;
2207 Test.uSrcIn = RandU8Src(iTest);
2208 Test.rc = g_aMulDivU8[iFn].pfnNative(&Test.uDstOut, Test.uSrcIn, &Test.fEflOut);
2209 RTStrmPrintf(pOut, " { %#08x, %#08x, %#06RX16, %#06RX16, %#04RX8, %d }, /* #%u */\n",
2210 Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uSrcIn, Test.rc, iTest);
2211 }
2212 GenerateArrayEnd(pOut, g_aMulDivU8[iFn].pszName);
2213 }
2214}
2215#endif
2216
2217static void MulDivU8Test(void)
2218{
2219 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aMulDivU8); iFn++)
2220 {
2221 if (!SubTestAndCheckIfEnabled(g_aMulDivU8[iFn].pszName)) continue; \
2222 MULDIVU8_TEST_T const * const paTests = g_aMulDivU8[iFn].paTests;
2223 uint32_t const cTests = *g_aMulDivU8[iFn].pcTests;
2224 uint32_t const fEflIgn = g_aMulDivU8[iFn].uExtra;
2225 PFNIEMAIMPLMULDIVU8 pfn = g_aMulDivU8[iFn].pfn;
2226 uint32_t const cVars = COUNT_VARIATIONS(g_aMulDivU8[iFn]); \
2227 if (!cTests) RTTestSkipped(g_hTest, "no tests");
2228 for (uint32_t iVar = 0; iVar < cVars; iVar++)
2229 {
2230 for (uint32_t iTest = 0; iTest < cTests; iTest++ )
2231 {
2232 uint32_t fEfl = paTests[iTest].fEflIn;
2233 uint16_t uDst = paTests[iTest].uDstIn;
2234 int rc = g_aMulDivU8[iFn].pfn(&uDst, paTests[iTest].uSrcIn, &fEfl);
2235 if ( uDst != paTests[iTest].uDstOut
2236 || (fEfl | fEflIgn) != (paTests[iTest].fEflOut | fEflIgn)
2237 || rc != paTests[iTest].rc)
2238 RTTestFailed(g_hTest, "#%02u%s: efl=%#08x dst=%#06RX16 src=%#04RX8\n"
2239 " %s-> efl=%#08x dst=%#06RX16 rc=%d\n"
2240 "%sexpected %#08x %#06RX16 %d%s\n",
2241 iTest, iVar ? "/n" : "", paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uSrcIn,
2242 iVar ? " " : "", fEfl, uDst, rc,
2243 iVar ? " " : "", paTests[iTest].fEflOut, paTests[iTest].uDstOut, paTests[iTest].rc,
2244 EFlagsDiff(fEfl | fEflIgn, paTests[iTest].fEflOut | fEflIgn));
2245 else
2246 {
2247 *g_pu16 = paTests[iTest].uDstIn;
2248 *g_pfEfl = paTests[iTest].fEflIn;
2249 rc = g_aMulDivU8[iFn].pfn(g_pu16, paTests[iTest].uSrcIn, g_pfEfl);
2250 RTTEST_CHECK(g_hTest, *g_pu16 == paTests[iTest].uDstOut);
2251 RTTEST_CHECK(g_hTest, (*g_pfEfl | fEflIgn) == (paTests[iTest].fEflOut | fEflIgn));
2252 RTTEST_CHECK(g_hTest, rc == paTests[iTest].rc);
2253 }
2254 }
2255 pfn = g_aMulDivU8[iFn].pfnNative;
2256 }
2257 }
2258}
2259
2260#ifdef TSTIEMAIMPL_WITH_GENERATOR
2261# define GEN_MULDIV(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
2262void MulDivU ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
2263{ \
2264 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2265 { \
2266 if ( a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE \
2267 && a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
2268 continue; \
2269 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
2270 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2271 { \
2272 a_TestType Test; \
2273 Test.fEflIn = RandEFlags(); \
2274 Test.fEflOut = Test.fEflIn; \
2275 Test.uDst1In = RandU ## a_cBits ## Dst(iTest); \
2276 Test.uDst1Out = Test.uDst1In; \
2277 Test.uDst2In = RandU ## a_cBits ## Dst(iTest); \
2278 Test.uDst2Out = Test.uDst2In; \
2279 Test.uSrcIn = RandU ## a_cBits ## Src(iTest); \
2280 Test.rc = a_aSubTests[iFn].pfnNative(&Test.uDst1Out, &Test.uDst2Out, Test.uSrcIn, &Test.fEflOut); \
2281 RTStrmPrintf(pOut, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", " a_Fmt ", " a_Fmt ", " a_Fmt ", %d }, /* #%u */\n", \
2282 Test.fEflIn, Test.fEflOut, Test.uDst1In, Test.uDst1Out, Test.uDst2In, Test.uDst2Out, Test.uSrcIn, \
2283 Test.rc, iTest); \
2284 } \
2285 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
2286 } \
2287}
2288#else
2289# define GEN_MULDIV(a_cBits, a_Fmt, a_TestType, a_aSubTests)
2290#endif
2291
2292#define TEST_MULDIV(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType, a_aSubTests) \
2293TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLMULDIVU ## a_cBits); \
2294static a_SubTestType const a_aSubTests [] = \
2295{ \
2296 ENTRY_AMD_EX(mul_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0), \
2297 ENTRY_INTEL_EX(mul_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0), \
2298 ENTRY_AMD_EX(imul_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0), \
2299 ENTRY_INTEL_EX(imul_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0), \
2300 ENTRY_AMD_EX(div_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0), \
2301 ENTRY_INTEL_EX(div_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0), \
2302 ENTRY_AMD_EX(idiv_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0), \
2303 ENTRY_INTEL_EX(idiv_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0), \
2304}; \
2305\
2306GEN_MULDIV(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
2307\
2308static void MulDivU ## a_cBits ## Test(void) \
2309{ \
2310 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2311 { \
2312 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
2313 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2314 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2315 uint32_t const fEflIgn = a_aSubTests[iFn].uExtra; \
2316 PFNIEMAIMPLMULDIVU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2317 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2318 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2319 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2320 { \
2321 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2322 { \
2323 uint32_t fEfl = paTests[iTest].fEflIn; \
2324 a_Type uDst1 = paTests[iTest].uDst1In; \
2325 a_Type uDst2 = paTests[iTest].uDst2In; \
2326 int rc = pfn(&uDst1, &uDst2, paTests[iTest].uSrcIn, &fEfl); \
2327 if ( uDst1 != paTests[iTest].uDst1Out \
2328 || uDst2 != paTests[iTest].uDst2Out \
2329 || (fEfl | fEflIgn) != (paTests[iTest].fEflOut | fEflIgn)\
2330 || rc != paTests[iTest].rc) \
2331 RTTestFailed(g_hTest, "#%02u%s: efl=%#08x dst1=" a_Fmt " dst2=" a_Fmt " src=" a_Fmt "\n" \
2332 " -> efl=%#08x dst1=" a_Fmt " dst2=" a_Fmt " rc=%d\n" \
2333 "expected %#08x " a_Fmt " " a_Fmt " %d%s -%s%s%s\n", \
2334 iTest, iVar == 0 ? "" : "/n", \
2335 paTests[iTest].fEflIn, paTests[iTest].uDst1In, paTests[iTest].uDst2In, paTests[iTest].uSrcIn, \
2336 fEfl, uDst1, uDst2, rc, \
2337 paTests[iTest].fEflOut, paTests[iTest].uDst1Out, paTests[iTest].uDst2Out, paTests[iTest].rc, \
2338 EFlagsDiff(fEfl | fEflIgn, paTests[iTest].fEflOut | fEflIgn), \
2339 uDst1 != paTests[iTest].uDst1Out ? " dst1" : "", uDst2 != paTests[iTest].uDst2Out ? " dst2" : "", \
2340 (fEfl | fEflIgn) != (paTests[iTest].fEflOut | fEflIgn) ? " eflags" : ""); \
2341 else \
2342 { \
2343 *g_pu ## a_cBits = paTests[iTest].uDst1In; \
2344 *g_pu ## a_cBits ## Two = paTests[iTest].uDst2In; \
2345 *g_pfEfl = paTests[iTest].fEflIn; \
2346 rc = pfn(g_pu ## a_cBits, g_pu ## a_cBits ## Two, paTests[iTest].uSrcIn, g_pfEfl); \
2347 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDst1Out); \
2348 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits ## Two == paTests[iTest].uDst2Out); \
2349 RTTEST_CHECK(g_hTest, (*g_pfEfl | fEflIgn) == (paTests[iTest].fEflOut | fEflIgn)); \
2350 RTTEST_CHECK(g_hTest, rc == paTests[iTest].rc); \
2351 } \
2352 } \
2353 pfn = a_aSubTests[iFn].pfnNative; \
2354 } \
2355 } \
2356}
2357TEST_MULDIV(16, uint16_t, "%#06RX16", MULDIVU16_TEST_T, INT_MULDIV_U16_T, g_aMulDivU16)
2358TEST_MULDIV(32, uint32_t, "%#010RX32", MULDIVU32_TEST_T, INT_MULDIV_U32_T, g_aMulDivU32)
2359TEST_MULDIV(64, uint64_t, "%#018RX64", MULDIVU64_TEST_T, INT_MULDIV_U64_T, g_aMulDivU64)
2360
2361#ifdef TSTIEMAIMPL_WITH_GENERATOR
2362static void MulDivGenerate(PRTSTREAM pOut, uint32_t cTests)
2363{
2364 MulDivU8Generate(pOut, cTests);
2365 MulDivU16Generate(pOut, cTests);
2366 MulDivU32Generate(pOut, cTests);
2367 MulDivU64Generate(pOut, cTests);
2368}
2369#endif
2370
2371static void MulDivTest(void)
2372{
2373 MulDivU8Test();
2374 MulDivU16Test();
2375 MulDivU32Test();
2376 MulDivU64Test();
2377}
2378
2379
2380/*
2381 * BSWAP
2382 */
2383static void BswapTest(void)
2384{
2385 if (SubTestAndCheckIfEnabled("bswap_u16"))
2386 {
2387 *g_pu32 = UINT32_C(0x12345678);
2388 iemAImpl_bswap_u16(g_pu32);
2389#if 0
2390 RTTEST_CHECK_MSG(g_hTest, *g_pu32 == UINT32_C(0x12347856), (g_hTest, "*g_pu32=%#RX32\n", *g_pu32));
2391#else
2392 RTTEST_CHECK_MSG(g_hTest, *g_pu32 == UINT32_C(0x12340000), (g_hTest, "*g_pu32=%#RX32\n", *g_pu32));
2393#endif
2394 *g_pu32 = UINT32_C(0xffff1122);
2395 iemAImpl_bswap_u16(g_pu32);
2396#if 0
2397 RTTEST_CHECK_MSG(g_hTest, *g_pu32 == UINT32_C(0xffff2211), (g_hTest, "*g_pu32=%#RX32\n", *g_pu32));
2398#else
2399 RTTEST_CHECK_MSG(g_hTest, *g_pu32 == UINT32_C(0xffff0000), (g_hTest, "*g_pu32=%#RX32\n", *g_pu32));
2400#endif
2401 }
2402
2403 if (SubTestAndCheckIfEnabled("bswap_u32"))
2404 {
2405 *g_pu32 = UINT32_C(0x12345678);
2406 iemAImpl_bswap_u32(g_pu32);
2407 RTTEST_CHECK(g_hTest, *g_pu32 == UINT32_C(0x78563412));
2408 }
2409
2410 if (SubTestAndCheckIfEnabled("bswap_u64"))
2411 {
2412 *g_pu64 = UINT64_C(0x0123456789abcdef);
2413 iemAImpl_bswap_u64(g_pu64);
2414 RTTEST_CHECK(g_hTest, *g_pu64 == UINT64_C(0xefcdab8967452301));
2415 }
2416}
2417
2418
2419
2420/*********************************************************************************************************************************
2421* Floating point (x87 style) *
2422*********************************************************************************************************************************/
2423
2424/*
2425 * FPU constant loading.
2426 */
2427TYPEDEF_SUBTEST_TYPE(FPU_LD_CONST_T, FPU_LD_CONST_TEST_T, PFNIEMAIMPLFPUR80LDCONST);
2428
2429static const FPU_LD_CONST_T g_aFpuLdConst[] =
2430{
2431 ENTRY(fld1),
2432 ENTRY(fldl2t),
2433 ENTRY(fldl2e),
2434 ENTRY(fldpi),
2435 ENTRY(fldlg2),
2436 ENTRY(fldln2),
2437 ENTRY(fldz),
2438};
2439
2440#ifdef TSTIEMAIMPL_WITH_GENERATOR
2441static void FpuLdConstGenerate(PRTSTREAM pOut, uint32_t cTests)
2442{
2443 X86FXSTATE State;
2444 RT_ZERO(State);
2445 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdConst); iFn++)
2446 {
2447 GenerateArrayStart(pOut, g_aFpuLdConst[iFn].pszName, "FPU_LD_CONST_TEST_T");
2448 for (uint32_t iTest = 0; iTest < cTests; iTest += 4)
2449 {
2450 State.FCW = RandFcw();
2451 State.FSW = RandFsw();
2452
2453 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
2454 {
2455 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
2456 State.FCW = (State.FCW & ~X86_FCW_RC_MASK) | (iRounding << X86_FCW_RC_SHIFT);
2457 g_aFpuLdConst[iFn].pfn(&State, &Res);
2458 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s }, /* #%u */\n",
2459 State.FCW, State.FSW, Res.FSW, GenFormatR80(&Res.r80Result), iTest + iRounding);
2460 }
2461 }
2462 GenerateArrayEnd(pOut, g_aFpuLdConst[iFn].pszName);
2463 }
2464}
2465#endif
2466
2467static void FpuLoadConstTest(void)
2468{
2469 /*
2470 * Inputs:
2471 * - FSW: C0, C1, C2, C3
2472 * - FCW: Exception masks, Precision control, Rounding control.
2473 *
2474 * C1 set to 1 on stack overflow, zero otherwise. C0, C2, and C3 are "undefined".
2475 */
2476 X86FXSTATE State;
2477 RT_ZERO(State);
2478 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdConst); iFn++)
2479 {
2480 if (!SubTestAndCheckIfEnabled(g_aFpuLdConst[iFn].pszName))
2481 continue;
2482
2483 uint32_t const cTests = *g_aFpuLdConst[iFn].pcTests;
2484 FPU_LD_CONST_TEST_T const *paTests = g_aFpuLdConst[iFn].paTests;
2485 PFNIEMAIMPLFPUR80LDCONST pfn = g_aFpuLdConst[iFn].pfn;
2486 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuLdConst[iFn]); \
2487 if (!cTests) RTTestSkipped(g_hTest, "no tests");
2488 for (uint32_t iVar = 0; iVar < cVars; iVar++)
2489 {
2490 for (uint32_t iTest = 0; iTest < cTests; iTest++)
2491 {
2492 State.FCW = paTests[iTest].fFcw;
2493 State.FSW = paTests[iTest].fFswIn;
2494 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
2495 pfn(&State, &Res);
2496 if ( Res.FSW != paTests[iTest].fFswOut
2497 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult))
2498 RTTestFailed(g_hTest, "#%u%s: fcw=%#06x fsw=%#06x -> fsw=%#06x %s, expected %#06x %s%s%s (%s)\n",
2499 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
2500 Res.FSW, FormatR80(&Res.r80Result),
2501 paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult),
2502 FswDiff(Res.FSW, paTests[iTest].fFswOut),
2503 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "",
2504 FormatFcw(paTests[iTest].fFcw) );
2505 }
2506 pfn = g_aFpuLdConst[iFn].pfnNative;
2507 }
2508 }
2509}
2510
2511
2512/*
2513 * Load floating point values from memory.
2514 */
2515#ifdef TSTIEMAIMPL_WITH_GENERATOR
2516# define GEN_FPU_LOAD(a_cBits, a_rdTypeIn, a_aSubTests, a_TestType) \
2517static void FpuLdR ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
2518{ \
2519 X86FXSTATE State; \
2520 RT_ZERO(State); \
2521 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2522 { \
2523 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
2524 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2525 { \
2526 State.FCW = RandFcw(); \
2527 State.FSW = RandFsw(); \
2528 a_rdTypeIn InVal = RandR ## a_cBits ## Src(iTest); \
2529 \
2530 for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
2531 { \
2532 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
2533 State.FCW = (State.FCW & ~X86_FCW_RC_MASK) | (iRounding << X86_FCW_RC_SHIFT); \
2534 a_aSubTests[iFn].pfn(&State, &Res, &InVal); \
2535 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u */\n", \
2536 State.FCW, State.FSW, Res.FSW, GenFormatR80(&Res.r80Result), \
2537 GenFormatR ## a_cBits(&InVal), iTest, iRounding); \
2538 } \
2539 } \
2540 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
2541 } \
2542}
2543#else
2544# define GEN_FPU_LOAD(a_cBits, a_rdTypeIn, a_aSubTests, a_TestType)
2545#endif
2546
2547#define TEST_FPU_LOAD(a_cBits, a_rdTypeIn, a_SubTestType, a_aSubTests, a_TestType) \
2548typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPULDR80FROM ## a_cBits,(PCX86FXSTATE, PIEMFPURESULT, PC ## a_rdTypeIn)); \
2549typedef FNIEMAIMPLFPULDR80FROM ## a_cBits *PFNIEMAIMPLFPULDR80FROM ## a_cBits; \
2550TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPULDR80FROM ## a_cBits); \
2551\
2552static const a_SubTestType a_aSubTests[] = \
2553{ \
2554 ENTRY(RT_CONCAT(fld_r80_from_r,a_cBits)) \
2555}; \
2556GEN_FPU_LOAD(a_cBits, a_rdTypeIn, a_aSubTests, a_TestType) \
2557\
2558static void FpuLdR ## a_cBits ## Test(void) \
2559{ \
2560 X86FXSTATE State; \
2561 RT_ZERO(State); \
2562 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2563 { \
2564 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
2565 \
2566 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2567 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2568 PFNIEMAIMPLFPULDR80FROM ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2569 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2570 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2571 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2572 { \
2573 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2574 { \
2575 a_rdTypeIn const InVal = paTests[iTest].InVal; \
2576 State.FCW = paTests[iTest].fFcw; \
2577 State.FSW = paTests[iTest].fFswIn; \
2578 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
2579 pfn(&State, &Res, &InVal); \
2580 if ( Res.FSW != paTests[iTest].fFswOut \
2581 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult)) \
2582 RTTestFailed(g_hTest, "#%03u%s: fcw=%#06x fsw=%#06x in=%s\n" \
2583 "%s -> fsw=%#06x %s\n" \
2584 "%s expected %#06x %s%s%s (%s)\n", \
2585 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
2586 FormatR ## a_cBits(&paTests[iTest].InVal), \
2587 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result), \
2588 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult), \
2589 FswDiff(Res.FSW, paTests[iTest].fFswOut), \
2590 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "", \
2591 FormatFcw(paTests[iTest].fFcw) ); \
2592 } \
2593 pfn = a_aSubTests[iFn].pfnNative; \
2594 } \
2595 } \
2596}
2597
2598TEST_FPU_LOAD(80, RTFLOAT80U, FPU_LD_R80_T, g_aFpuLdR80, FPU_R80_IN_TEST_T)
2599TEST_FPU_LOAD(64, RTFLOAT64U, FPU_LD_R64_T, g_aFpuLdR64, FPU_R64_IN_TEST_T)
2600TEST_FPU_LOAD(32, RTFLOAT32U, FPU_LD_R32_T, g_aFpuLdR32, FPU_R32_IN_TEST_T)
2601
2602#ifdef TSTIEMAIMPL_WITH_GENERATOR
2603static void FpuLdMemGenerate(PRTSTREAM pOut, uint32_t cTests)
2604{
2605 FpuLdR80Generate(pOut, cTests);
2606 FpuLdR64Generate(pOut, cTests);
2607 FpuLdR32Generate(pOut, cTests);
2608}
2609#endif
2610
2611static void FpuLdMemTest(void)
2612{
2613 FpuLdR80Test();
2614 FpuLdR64Test();
2615 FpuLdR32Test();
2616}
2617
2618
2619/*
2620 * Load integer values from memory.
2621 */
2622#ifdef TSTIEMAIMPL_WITH_GENERATOR
2623# define GEN_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_aSubTests, a_TestType) \
2624static void FpuLdI ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
2625{ \
2626 X86FXSTATE State; \
2627 RT_ZERO(State); \
2628 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2629 { \
2630 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
2631 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2632 { \
2633 State.FCW = RandFcw(); \
2634 State.FSW = RandFsw(); \
2635 a_iTypeIn InVal = (a_iTypeIn)RandU ## a_cBits ## Src(iTest); \
2636 \
2637 for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
2638 { \
2639 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
2640 State.FCW = (State.FCW & ~X86_FCW_RC_MASK) | (iRounding << X86_FCW_RC_SHIFT); \
2641 a_aSubTests[iFn].pfn(&State, &Res, &InVal); \
2642 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, " a_szFmtIn " }, /* #%u/%u */\n", \
2643 State.FCW, State.FSW, Res.FSW, GenFormatR80(&Res.r80Result), InVal, iTest, iRounding); \
2644 } \
2645 } \
2646 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
2647 } \
2648}
2649#else
2650# define GEN_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_aSubTests, a_TestType)
2651#endif
2652
2653#define TEST_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_SubTestType, a_aSubTests, a_TestType) \
2654typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPULDR80FROMI ## a_cBits,(PCX86FXSTATE, PIEMFPURESULT, a_iTypeIn const *)); \
2655typedef FNIEMAIMPLFPULDR80FROMI ## a_cBits *PFNIEMAIMPLFPULDR80FROMI ## a_cBits; \
2656TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPULDR80FROMI ## a_cBits); \
2657\
2658static const a_SubTestType a_aSubTests[] = \
2659{ \
2660 ENTRY(RT_CONCAT(fild_r80_from_i,a_cBits)) \
2661}; \
2662GEN_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_aSubTests, a_TestType) \
2663\
2664static void FpuLdI ## a_cBits ## Test(void) \
2665{ \
2666 X86FXSTATE State; \
2667 RT_ZERO(State); \
2668 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2669 { \
2670 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
2671 \
2672 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2673 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2674 PFNIEMAIMPLFPULDR80FROMI ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2675 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2676 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2677 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2678 { \
2679 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2680 { \
2681 a_iTypeIn const iInVal = paTests[iTest].iInVal; \
2682 State.FCW = paTests[iTest].fFcw; \
2683 State.FSW = paTests[iTest].fFswIn; \
2684 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
2685 pfn(&State, &Res, &iInVal); \
2686 if ( Res.FSW != paTests[iTest].fFswOut \
2687 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult)) \
2688 RTTestFailed(g_hTest, "#%03u%s: fcw=%#06x fsw=%#06x in=" a_szFmtIn "\n" \
2689 "%s -> fsw=%#06x %s\n" \
2690 "%s expected %#06x %s%s%s (%s)\n", \
2691 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, paTests[iTest].iInVal, \
2692 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result), \
2693 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult), \
2694 FswDiff(Res.FSW, paTests[iTest].fFswOut), \
2695 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "", \
2696 FormatFcw(paTests[iTest].fFcw) ); \
2697 } \
2698 pfn = a_aSubTests[iFn].pfnNative; \
2699 } \
2700 } \
2701}
2702
2703TEST_FPU_LOAD_INT(64, int64_t, "%RI64", FPU_LD_I64_T, g_aFpuLdU64, FPU_I64_IN_TEST_T)
2704TEST_FPU_LOAD_INT(32, int32_t, "%RI32", FPU_LD_I32_T, g_aFpuLdU32, FPU_I32_IN_TEST_T)
2705TEST_FPU_LOAD_INT(16, int16_t, "%RI16", FPU_LD_I16_T, g_aFpuLdU16, FPU_I16_IN_TEST_T)
2706
2707#ifdef TSTIEMAIMPL_WITH_GENERATOR
2708static void FpuLdIntGenerate(PRTSTREAM pOut, uint32_t cTests)
2709{
2710 FpuLdI64Generate(pOut, cTests);
2711 FpuLdI32Generate(pOut, cTests);
2712 FpuLdI16Generate(pOut, cTests);
2713}
2714#endif
2715
2716static void FpuLdIntTest(void)
2717{
2718 FpuLdI64Test();
2719 FpuLdI32Test();
2720 FpuLdI16Test();
2721}
2722
2723
2724/*
2725 * Load binary coded decimal values from memory.
2726 */
2727typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPULDR80FROMD80,(PCX86FXSTATE, PIEMFPURESULT, PCRTPBCD80U));
2728typedef FNIEMAIMPLFPULDR80FROMD80 *PFNIEMAIMPLFPULDR80FROMD80;
2729TYPEDEF_SUBTEST_TYPE(FPU_LD_D80_T, FPU_D80_IN_TEST_T, PFNIEMAIMPLFPULDR80FROMD80);
2730
2731static const FPU_LD_D80_T g_aFpuLdD80[] =
2732{
2733 ENTRY(fld_r80_from_d80)
2734};
2735
2736#ifdef TSTIEMAIMPL_WITH_GENERATOR
2737static void FpuLdD80Generate(PRTSTREAM pOut, uint32_t cTests)
2738{
2739 X86FXSTATE State;
2740 RT_ZERO(State);
2741 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdD80); iFn++)
2742 {
2743 GenerateArrayStart(pOut, g_aFpuLdD80[iFn].pszName, "FPU_D80_IN_TEST_T");
2744 for (uint32_t iTest = 0; iTest < cTests; iTest++)
2745 {
2746 State.FCW = RandFcw();
2747 State.FSW = RandFsw();
2748 RTPBCD80U InVal = RandD80Src(iTest);
2749
2750 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
2751 {
2752 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
2753 State.FCW = (State.FCW & ~X86_FCW_RC_MASK) | (iRounding << X86_FCW_RC_SHIFT);
2754 g_aFpuLdD80[iFn].pfn(&State, &Res, &InVal);
2755 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u */\n",
2756 State.FCW, State.FSW, Res.FSW, GenFormatR80(&Res.r80Result), GenFormatD80(&InVal),
2757 iTest, iRounding);
2758 }
2759 }
2760 GenerateArrayEnd(pOut, g_aFpuLdD80[iFn].pszName);
2761 }
2762}
2763#endif
2764
2765static void FpuLdD80Test(void)
2766{
2767 X86FXSTATE State;
2768 RT_ZERO(State);
2769 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdD80); iFn++)
2770 {
2771 if (!SubTestAndCheckIfEnabled(g_aFpuLdD80[iFn].pszName))
2772 continue;
2773
2774 uint32_t const cTests = *g_aFpuLdD80[iFn].pcTests;
2775 FPU_D80_IN_TEST_T const * const paTests = g_aFpuLdD80[iFn].paTests;
2776 PFNIEMAIMPLFPULDR80FROMD80 pfn = g_aFpuLdD80[iFn].pfn;
2777 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuLdD80[iFn]);
2778 if (!cTests) RTTestSkipped(g_hTest, "no tests");
2779 for (uint32_t iVar = 0; iVar < cVars; iVar++)
2780 {
2781 for (uint32_t iTest = 0; iTest < cTests; iTest++)
2782 {
2783 RTPBCD80U const InVal = paTests[iTest].InVal;
2784 State.FCW = paTests[iTest].fFcw;
2785 State.FSW = paTests[iTest].fFswIn;
2786 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
2787 pfn(&State, &Res, &InVal);
2788 if ( Res.FSW != paTests[iTest].fFswOut
2789 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult))
2790 RTTestFailed(g_hTest, "#%03u%s: fcw=%#06x fsw=%#06x in=%s\n"
2791 "%s -> fsw=%#06x %s\n"
2792 "%s expected %#06x %s%s%s (%s)\n",
2793 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
2794 FormatD80(&paTests[iTest].InVal),
2795 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result),
2796 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult),
2797 FswDiff(Res.FSW, paTests[iTest].fFswOut),
2798 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "",
2799 FormatFcw(paTests[iTest].fFcw) );
2800 }
2801 pfn = g_aFpuLdD80[iFn].pfnNative;
2802 }
2803 }
2804}
2805
2806
2807/*
2808 * Store values floating point values to memory.
2809 */
2810#ifdef TSTIEMAIMPL_WITH_GENERATOR
2811static const RTFLOAT80U g_aFpuStR32Specials[] =
2812{
2813 RTFLOAT80U_INIT_C(0, 0xffffff8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
2814 RTFLOAT80U_INIT_C(1, 0xffffff8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
2815 RTFLOAT80U_INIT_C(0, 0xfffffe8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding */
2816 RTFLOAT80U_INIT_C(1, 0xfffffe8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding */
2817};
2818static const RTFLOAT80U g_aFpuStR64Specials[] =
2819{
2820 RTFLOAT80U_INIT_C(0, 0xfffffffffffffc00, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
2821 RTFLOAT80U_INIT_C(1, 0xfffffffffffffc00, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
2822 RTFLOAT80U_INIT_C(0, 0xfffffffffffff400, RTFLOAT80U_EXP_BIAS), /* near rounding */
2823 RTFLOAT80U_INIT_C(1, 0xfffffffffffff400, RTFLOAT80U_EXP_BIAS), /* near rounding */
2824 RTFLOAT80U_INIT_C(0, 0xd0b9e6fdda887400, 687 + RTFLOAT80U_EXP_BIAS), /* random example for this */
2825};
2826static const RTFLOAT80U g_aFpuStR80Specials[] =
2827{
2828 RTFLOAT80U_INIT_C(0, 0x8000000000000000, RTFLOAT80U_EXP_BIAS), /* placeholder */
2829};
2830# define GEN_FPU_STORE(a_cBits, a_rdType, a_aSubTests, a_TestType) \
2831static void FpuStR ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
2832{ \
2833 uint32_t const cTotalTests = cTests + RT_ELEMENTS(g_aFpuStR ## a_cBits ## Specials); \
2834 X86FXSTATE State; \
2835 RT_ZERO(State); \
2836 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2837 { \
2838 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
2839 for (uint32_t iTest = 0; iTest < cTotalTests; iTest++) \
2840 { \
2841 uint16_t const fFcw = RandFcw(); \
2842 State.FSW = RandFsw(); \
2843 RTFLOAT80U const InVal = iTest < cTests ? RandR80Src(iTest, a_cBits) \
2844 : g_aFpuStR ## a_cBits ## Specials[iTest - cTests]; \
2845 \
2846 for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
2847 { \
2848 /* PC doesn't influence these, so leave as is. */ \
2849 AssertCompile(X86_FCW_OM_BIT + 1 == X86_FCW_UM_BIT && X86_FCW_UM_BIT + 1 == X86_FCW_PM_BIT); \
2850 for (uint16_t iMask = 0; iMask < 16; iMask += 2 /*1*/) \
2851 { \
2852 uint16_t uFswOut = 0; \
2853 a_rdType OutVal; \
2854 RT_ZERO(OutVal); \
2855 memset(&OutVal, 0xfe, sizeof(OutVal)); \
2856 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_OM | X86_FCW_UM | X86_FCW_PM)) \
2857 | (iRounding << X86_FCW_RC_SHIFT); \
2858 /*if (iMask & 1) State.FCW ^= X86_FCW_MASK_ALL;*/ \
2859 State.FCW |= (iMask >> 1) << X86_FCW_OM_BIT; \
2860 a_aSubTests[iFn].pfn(&State, &uFswOut, &OutVal, &InVal); \
2861 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u */\n", \
2862 State.FCW, State.FSW, uFswOut, GenFormatR80(&InVal), \
2863 GenFormatR ## a_cBits(&OutVal), iTest, iRounding, iMask); \
2864 } \
2865 } \
2866 } \
2867 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
2868 } \
2869}
2870#else
2871# define GEN_FPU_STORE(a_cBits, a_rdType, a_aSubTests, a_TestType)
2872#endif
2873
2874#define TEST_FPU_STORE(a_cBits, a_rdType, a_SubTestType, a_aSubTests, a_TestType) \
2875typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPUSTR80TOR ## a_cBits,(PCX86FXSTATE, uint16_t *, \
2876 PRTFLOAT ## a_cBits ## U, PCRTFLOAT80U)); \
2877typedef FNIEMAIMPLFPUSTR80TOR ## a_cBits *PFNIEMAIMPLFPUSTR80TOR ## a_cBits; \
2878TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPUSTR80TOR ## a_cBits); \
2879\
2880static const a_SubTestType a_aSubTests[] = \
2881{ \
2882 ENTRY(RT_CONCAT(fst_r80_to_r,a_cBits)) \
2883}; \
2884GEN_FPU_STORE(a_cBits, a_rdType, a_aSubTests, a_TestType) \
2885\
2886static void FpuStR ## a_cBits ## Test(void) \
2887{ \
2888 X86FXSTATE State; \
2889 RT_ZERO(State); \
2890 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2891 { \
2892 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
2893 \
2894 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2895 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2896 PFNIEMAIMPLFPUSTR80TOR ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2897 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2898 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2899 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2900 { \
2901 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2902 { \
2903 RTFLOAT80U const InVal = paTests[iTest].InVal; \
2904 uint16_t uFswOut = 0; \
2905 a_rdType OutVal; \
2906 RT_ZERO(OutVal); \
2907 memset(&OutVal, 0xfe, sizeof(OutVal)); \
2908 State.FCW = paTests[iTest].fFcw; \
2909 State.FSW = paTests[iTest].fFswIn; \
2910 pfn(&State, &uFswOut, &OutVal, &InVal); \
2911 if ( uFswOut != paTests[iTest].fFswOut \
2912 || !RTFLOAT ## a_cBits ## U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal)) \
2913 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n" \
2914 "%s -> fsw=%#06x %s\n" \
2915 "%s expected %#06x %s%s%s (%s)\n", \
2916 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
2917 FormatR80(&paTests[iTest].InVal), \
2918 iVar ? " " : "", uFswOut, FormatR ## a_cBits(&OutVal), \
2919 iVar ? " " : "", paTests[iTest].fFswOut, FormatR ## a_cBits(&paTests[iTest].OutVal), \
2920 FswDiff(uFswOut, paTests[iTest].fFswOut), \
2921 !RTFLOAT ## a_cBits ## U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal) ? " - val" : "", \
2922 FormatFcw(paTests[iTest].fFcw) ); \
2923 } \
2924 pfn = a_aSubTests[iFn].pfnNative; \
2925 } \
2926 } \
2927}
2928
2929TEST_FPU_STORE(80, RTFLOAT80U, FPU_ST_R80_T, g_aFpuStR80, FPU_ST_R80_TEST_T)
2930TEST_FPU_STORE(64, RTFLOAT64U, FPU_ST_R64_T, g_aFpuStR64, FPU_ST_R64_TEST_T)
2931TEST_FPU_STORE(32, RTFLOAT32U, FPU_ST_R32_T, g_aFpuStR32, FPU_ST_R32_TEST_T)
2932
2933#ifdef TSTIEMAIMPL_WITH_GENERATOR
2934static void FpuStMemGenerate(PRTSTREAM pOut, uint32_t cTests)
2935{
2936 FpuStR80Generate(pOut, cTests);
2937 FpuStR64Generate(pOut, cTests);
2938 FpuStR32Generate(pOut, cTests);
2939}
2940#endif
2941
2942static void FpuStMemTest(void)
2943{
2944 FpuStR80Test();
2945 FpuStR64Test();
2946 FpuStR32Test();
2947}
2948
2949
2950/*
2951 * Store integer values to memory or register.
2952 */
2953TYPEDEF_SUBTEST_TYPE(FPU_ST_I16_T, FPU_ST_I16_TEST_T, PFNIEMAIMPLFPUSTR80TOI16);
2954TYPEDEF_SUBTEST_TYPE(FPU_ST_I32_T, FPU_ST_I32_TEST_T, PFNIEMAIMPLFPUSTR80TOI32);
2955TYPEDEF_SUBTEST_TYPE(FPU_ST_I64_T, FPU_ST_I64_TEST_T, PFNIEMAIMPLFPUSTR80TOI64);
2956
2957static const FPU_ST_I16_T g_aFpuStI16[] =
2958{
2959 ENTRY(fist_r80_to_i16),
2960 ENTRY_AMD( fistt_r80_to_i16, 0),
2961 ENTRY_INTEL(fistt_r80_to_i16, 0),
2962};
2963static const FPU_ST_I32_T g_aFpuStI32[] =
2964{
2965 ENTRY(fist_r80_to_i32),
2966 ENTRY(fistt_r80_to_i32),
2967};
2968static const FPU_ST_I64_T g_aFpuStI64[] =
2969{
2970 ENTRY(fist_r80_to_i64),
2971 ENTRY(fistt_r80_to_i64),
2972};
2973
2974#ifdef TSTIEMAIMPL_WITH_GENERATOR
2975static const RTFLOAT80U g_aFpuStI16Specials[] = /* 16-bit variant borrows properties from the 32-bit one, thus all this stuff. */
2976{
2977 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 13 + RTFLOAT80U_EXP_BIAS),
2978 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 13 + RTFLOAT80U_EXP_BIAS),
2979 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 14 + RTFLOAT80U_EXP_BIAS),
2980 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 14 + RTFLOAT80U_EXP_BIAS),
2981 RTFLOAT80U_INIT_C(0, 0x8000080000000000, 14 + RTFLOAT80U_EXP_BIAS),
2982 RTFLOAT80U_INIT_C(1, 0x8000080000000000, 14 + RTFLOAT80U_EXP_BIAS),
2983 RTFLOAT80U_INIT_C(0, 0x8000100000000000, 14 + RTFLOAT80U_EXP_BIAS),
2984 RTFLOAT80U_INIT_C(1, 0x8000100000000000, 14 + RTFLOAT80U_EXP_BIAS),
2985 RTFLOAT80U_INIT_C(0, 0x8000200000000000, 14 + RTFLOAT80U_EXP_BIAS),
2986 RTFLOAT80U_INIT_C(1, 0x8000200000000000, 14 + RTFLOAT80U_EXP_BIAS),
2987 RTFLOAT80U_INIT_C(0, 0x8000400000000000, 14 + RTFLOAT80U_EXP_BIAS),
2988 RTFLOAT80U_INIT_C(1, 0x8000400000000000, 14 + RTFLOAT80U_EXP_BIAS),
2989 RTFLOAT80U_INIT_C(0, 0x8000800000000000, 14 + RTFLOAT80U_EXP_BIAS),
2990 RTFLOAT80U_INIT_C(1, 0x8000800000000000, 14 + RTFLOAT80U_EXP_BIAS),
2991 RTFLOAT80U_INIT_C(1, 0x8000ffffffffffff, 14 + RTFLOAT80U_EXP_BIAS),
2992 RTFLOAT80U_INIT_C(0, 0x8001000000000000, 14 + RTFLOAT80U_EXP_BIAS),
2993 RTFLOAT80U_INIT_C(1, 0x8001000000000000, 14 + RTFLOAT80U_EXP_BIAS),
2994 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 14 + RTFLOAT80U_EXP_BIAS),
2995 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 14 + RTFLOAT80U_EXP_BIAS),
2996 RTFLOAT80U_INIT_C(0, 0xffff800000000000, 14 + RTFLOAT80U_EXP_BIAS),
2997 RTFLOAT80U_INIT_C(0, 0xffff000000000000, 14 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
2998 RTFLOAT80U_INIT_C(0, 0xfffe000000000000, 14 + RTFLOAT80U_EXP_BIAS),
2999 RTFLOAT80U_INIT_C(1, 0xffff800000000000, 14 + RTFLOAT80U_EXP_BIAS),
3000 RTFLOAT80U_INIT_C(1, 0xffff000000000000, 14 + RTFLOAT80U_EXP_BIAS), /* min */
3001 RTFLOAT80U_INIT_C(1, 0xfffe000000000000, 14 + RTFLOAT80U_EXP_BIAS),
3002 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 15 + RTFLOAT80U_EXP_BIAS),
3003 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 15 + RTFLOAT80U_EXP_BIAS),
3004 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 16 + RTFLOAT80U_EXP_BIAS),
3005 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 17 + RTFLOAT80U_EXP_BIAS),
3006 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 20 + RTFLOAT80U_EXP_BIAS),
3007 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 24 + RTFLOAT80U_EXP_BIAS),
3008 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 28 + RTFLOAT80U_EXP_BIAS),
3009 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
3010 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
3011 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS),
3012 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS),
3013 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
3014 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
3015 RTFLOAT80U_INIT_C(0, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
3016 RTFLOAT80U_INIT_C(1, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
3017 RTFLOAT80U_INIT_C(0, 0x8000ffffffffffff, 31 + RTFLOAT80U_EXP_BIAS),
3018 RTFLOAT80U_INIT_C(1, 0x8000ffffffffffff, 31 + RTFLOAT80U_EXP_BIAS),
3019 RTFLOAT80U_INIT_C(0, 0x8001000000000000, 31 + RTFLOAT80U_EXP_BIAS),
3020 RTFLOAT80U_INIT_C(1, 0x8001000000000000, 31 + RTFLOAT80U_EXP_BIAS),
3021 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
3022 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
3023 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 32 + RTFLOAT80U_EXP_BIAS),
3024};
3025static const RTFLOAT80U g_aFpuStI32Specials[] =
3026{
3027 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
3028 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
3029 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
3030 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS), /* min */
3031 RTFLOAT80U_INIT_C(0, 0xffffffff80000000, 30 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
3032 RTFLOAT80U_INIT_C(1, 0xffffffff80000000, 30 + RTFLOAT80U_EXP_BIAS), /* min */
3033 RTFLOAT80U_INIT_C(0, 0xffffffff00000000, 30 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
3034 RTFLOAT80U_INIT_C(1, 0xffffffff00000000, 30 + RTFLOAT80U_EXP_BIAS), /* min */
3035 RTFLOAT80U_INIT_C(0, 0xfffffffe00000000, 30 + RTFLOAT80U_EXP_BIAS),
3036 RTFLOAT80U_INIT_C(1, 0xfffffffe00000000, 30 + RTFLOAT80U_EXP_BIAS),
3037 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
3038 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
3039 RTFLOAT80U_INIT_C(0, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
3040 RTFLOAT80U_INIT_C(1, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
3041 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
3042 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
3043};
3044static const RTFLOAT80U g_aFpuStI64Specials[] =
3045{
3046 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 61 + RTFLOAT80U_EXP_BIAS),
3047 RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, 61 + RTFLOAT80U_EXP_BIAS),
3048 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 62 + RTFLOAT80U_EXP_BIAS),
3049 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 62 + RTFLOAT80U_EXP_BIAS),
3050 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 62 + RTFLOAT80U_EXP_BIAS),
3051 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 62 + RTFLOAT80U_EXP_BIAS),
3052 RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, 62 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
3053 RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, 62 + RTFLOAT80U_EXP_BIAS), /* min */
3054 RTFLOAT80U_INIT_C(0, 0xfffffffffffffffe, 62 + RTFLOAT80U_EXP_BIAS),
3055 RTFLOAT80U_INIT_C(1, 0xfffffffffffffffe, 62 + RTFLOAT80U_EXP_BIAS),
3056 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 63 + RTFLOAT80U_EXP_BIAS),
3057 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 63 + RTFLOAT80U_EXP_BIAS),
3058 RTFLOAT80U_INIT_C(0, 0x8000000000000001, 63 + RTFLOAT80U_EXP_BIAS),
3059 RTFLOAT80U_INIT_C(1, 0x8000000000000001, 63 + RTFLOAT80U_EXP_BIAS),
3060 RTFLOAT80U_INIT_C(0, 0x8000000000000002, 63 + RTFLOAT80U_EXP_BIAS),
3061 RTFLOAT80U_INIT_C(1, 0x8000000000000002, 63 + RTFLOAT80U_EXP_BIAS),
3062 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 63 + RTFLOAT80U_EXP_BIAS),
3063};
3064
3065# define GEN_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_aSubTests, a_TestType) \
3066static void FpuStI ## a_cBits ## Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests) \
3067{ \
3068 X86FXSTATE State; \
3069 RT_ZERO(State); \
3070 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3071 { \
3072 PFNIEMAIMPLFPUSTR80TOI ## a_cBits const pfn = a_aSubTests[iFn].pfnNative \
3073 ? a_aSubTests[iFn].pfnNative : a_aSubTests[iFn].pfn; \
3074 PRTSTREAM pOutFn = pOut; \
3075 if (a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE) \
3076 { \
3077 if (a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
3078 continue; \
3079 pOutFn = pOutCpu; \
3080 } \
3081 \
3082 GenerateArrayStart(pOutFn, a_aSubTests[iFn].pszName, #a_TestType); \
3083 uint32_t const cTotalTests = cTests + RT_ELEMENTS(g_aFpuStI ## a_cBits ## Specials); \
3084 for (uint32_t iTest = 0; iTest < cTotalTests; iTest++) \
3085 { \
3086 uint16_t const fFcw = RandFcw(); \
3087 State.FSW = RandFsw(); \
3088 RTFLOAT80U const InVal = iTest < cTests ? RandR80Src(iTest, a_cBits, true) \
3089 : g_aFpuStI ## a_cBits ## Specials[iTest - cTests]; \
3090 \
3091 for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
3092 { \
3093 /* PC doesn't influence these, so leave as is. */ \
3094 AssertCompile(X86_FCW_OM_BIT + 1 == X86_FCW_UM_BIT && X86_FCW_UM_BIT + 1 == X86_FCW_PM_BIT); \
3095 for (uint16_t iMask = 0; iMask < 16; iMask += 2 /*1*/) \
3096 { \
3097 uint16_t uFswOut = 0; \
3098 a_iType iOutVal = ~(a_iType)2; \
3099 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_OM | X86_FCW_UM | X86_FCW_PM)) \
3100 | (iRounding << X86_FCW_RC_SHIFT); \
3101 /*if (iMask & 1) State.FCW ^= X86_FCW_MASK_ALL;*/ \
3102 State.FCW |= (iMask >> 1) << X86_FCW_OM_BIT; \
3103 pfn(&State, &uFswOut, &iOutVal, &InVal); \
3104 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u */\n", \
3105 State.FCW, State.FSW, uFswOut, GenFormatR80(&InVal), \
3106 GenFormatI ## a_cBits(iOutVal), iTest, iRounding, iMask); \
3107 } \
3108 } \
3109 } \
3110 GenerateArrayEnd(pOutFn, a_aSubTests[iFn].pszName); \
3111 } \
3112}
3113#else
3114# define GEN_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_aSubTests, a_TestType)
3115#endif
3116
3117#define TEST_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_SubTestType, a_aSubTests, a_TestType) \
3118GEN_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_aSubTests, a_TestType) \
3119\
3120static void FpuStI ## a_cBits ## Test(void) \
3121{ \
3122 X86FXSTATE State; \
3123 RT_ZERO(State); \
3124 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3125 { \
3126 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
3127 \
3128 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
3129 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
3130 PFNIEMAIMPLFPUSTR80TOI ## a_cBits pfn = a_aSubTests[iFn].pfn; \
3131 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
3132 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
3133 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
3134 { \
3135 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
3136 { \
3137 RTFLOAT80U const InVal = paTests[iTest].InVal; \
3138 uint16_t uFswOut = 0; \
3139 a_iType iOutVal = ~(a_iType)2; \
3140 State.FCW = paTests[iTest].fFcw; \
3141 State.FSW = paTests[iTest].fFswIn; \
3142 pfn(&State, &uFswOut, &iOutVal, &InVal); \
3143 if ( uFswOut != paTests[iTest].fFswOut \
3144 || iOutVal != paTests[iTest].iOutVal) \
3145 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n" \
3146 "%s -> fsw=%#06x " a_szFmt "\n" \
3147 "%s expected %#06x " a_szFmt "%s%s (%s)\n", \
3148 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
3149 FormatR80(&paTests[iTest].InVal), \
3150 iVar ? " " : "", uFswOut, iOutVal, \
3151 iVar ? " " : "", paTests[iTest].fFswOut, paTests[iTest].iOutVal, \
3152 FswDiff(uFswOut, paTests[iTest].fFswOut), \
3153 iOutVal != paTests[iTest].iOutVal ? " - val" : "", FormatFcw(paTests[iTest].fFcw) ); \
3154 } \
3155 pfn = a_aSubTests[iFn].pfnNative; \
3156 } \
3157 } \
3158}
3159
3160//fistt_r80_to_i16 diffs for AMD, of course :-)
3161
3162TEST_FPU_STORE_INT(64, int64_t, "%RI64", FPU_ST_I64_T, g_aFpuStI64, FPU_ST_I64_TEST_T)
3163TEST_FPU_STORE_INT(32, int32_t, "%RI32", FPU_ST_I32_T, g_aFpuStI32, FPU_ST_I32_TEST_T)
3164TEST_FPU_STORE_INT(16, int16_t, "%RI16", FPU_ST_I16_T, g_aFpuStI16, FPU_ST_I16_TEST_T)
3165
3166#ifdef TSTIEMAIMPL_WITH_GENERATOR
3167static void FpuStIntGenerate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
3168{
3169 FpuStI64Generate(pOut, pOutCpu, cTests);
3170 FpuStI32Generate(pOut, pOutCpu, cTests);
3171 FpuStI16Generate(pOut, pOutCpu, cTests);
3172}
3173#endif
3174
3175static void FpuStIntTest(void)
3176{
3177 FpuStI64Test();
3178 FpuStI32Test();
3179 FpuStI16Test();
3180}
3181
3182
3183/*
3184 * Store as packed BCD value (memory).
3185 */
3186typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPUSTR80TOD80,(PCX86FXSTATE, uint16_t *, PRTPBCD80U, PCRTFLOAT80U));
3187typedef FNIEMAIMPLFPUSTR80TOD80 *PFNIEMAIMPLFPUSTR80TOD80;
3188TYPEDEF_SUBTEST_TYPE(FPU_ST_D80_T, FPU_ST_D80_TEST_T, PFNIEMAIMPLFPUSTR80TOD80);
3189
3190static const FPU_ST_D80_T g_aFpuStD80[] =
3191{
3192 ENTRY(fst_r80_to_d80),
3193};
3194
3195#ifdef TSTIEMAIMPL_WITH_GENERATOR
3196static void FpuStD80Generate(PRTSTREAM pOut, uint32_t cTests)
3197{
3198 static RTFLOAT80U const s_aSpecials[] =
3199 {
3200 RTFLOAT80U_INIT_C(0, 0xde0b6b3a763fffe0, RTFLOAT80U_EXP_BIAS + 59), /* 1 below max */
3201 RTFLOAT80U_INIT_C(1, 0xde0b6b3a763fffe0, RTFLOAT80U_EXP_BIAS + 59), /* 1 above min */
3202 RTFLOAT80U_INIT_C(0, 0xde0b6b3a763ffff0, RTFLOAT80U_EXP_BIAS + 59), /* exact max */
3203 RTFLOAT80U_INIT_C(1, 0xde0b6b3a763ffff0, RTFLOAT80U_EXP_BIAS + 59), /* exact min */
3204 RTFLOAT80U_INIT_C(0, 0xde0b6b3a763fffff, RTFLOAT80U_EXP_BIAS + 59), /* max & all rounded off bits set */
3205 RTFLOAT80U_INIT_C(1, 0xde0b6b3a763fffff, RTFLOAT80U_EXP_BIAS + 59), /* min & all rounded off bits set */
3206 RTFLOAT80U_INIT_C(0, 0xde0b6b3a763ffff8, RTFLOAT80U_EXP_BIAS + 59), /* max & some rounded off bits set */
3207 RTFLOAT80U_INIT_C(1, 0xde0b6b3a763ffff8, RTFLOAT80U_EXP_BIAS + 59), /* min & some rounded off bits set */
3208 RTFLOAT80U_INIT_C(0, 0xde0b6b3a763ffff1, RTFLOAT80U_EXP_BIAS + 59), /* max & some other rounded off bits set */
3209 RTFLOAT80U_INIT_C(1, 0xde0b6b3a763ffff1, RTFLOAT80U_EXP_BIAS + 59), /* min & some other rounded off bits set */
3210 RTFLOAT80U_INIT_C(0, 0xde0b6b3a76400000, RTFLOAT80U_EXP_BIAS + 59), /* 1 above max */
3211 RTFLOAT80U_INIT_C(1, 0xde0b6b3a76400000, RTFLOAT80U_EXP_BIAS + 59), /* 1 below min */
3212 };
3213
3214 X86FXSTATE State;
3215 RT_ZERO(State);
3216 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuStD80); iFn++)
3217 {
3218 GenerateArrayStart(pOut, g_aFpuStD80[iFn].pszName, "FPU_ST_D80_TEST_T");
3219 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
3220 {
3221 uint16_t const fFcw = RandFcw();
3222 State.FSW = RandFsw();
3223 RTFLOAT80U const InVal = iTest < cTests ? RandR80Src(iTest, 59, true) : s_aSpecials[iTest - cTests];
3224
3225 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
3226 {
3227 /* PC doesn't influence these, so leave as is. */
3228 AssertCompile(X86_FCW_OM_BIT + 1 == X86_FCW_UM_BIT && X86_FCW_UM_BIT + 1 == X86_FCW_PM_BIT);
3229 for (uint16_t iMask = 0; iMask < 16; iMask += 2 /*1*/)
3230 {
3231 uint16_t uFswOut = 0;
3232 RTPBCD80U OutVal = RTPBCD80U_INIT_ZERO(0);
3233 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_OM | X86_FCW_UM | X86_FCW_PM))
3234 | (iRounding << X86_FCW_RC_SHIFT);
3235 /*if (iMask & 1) State.FCW ^= X86_FCW_MASK_ALL;*/
3236 State.FCW |= (iMask >> 1) << X86_FCW_OM_BIT;
3237 g_aFpuStD80[iFn].pfn(&State, &uFswOut, &OutVal, &InVal);
3238 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u */\n",
3239 State.FCW, State.FSW, uFswOut, GenFormatR80(&InVal),
3240 GenFormatD80(&OutVal), iTest, iRounding, iMask);
3241 }
3242 }
3243 }
3244 GenerateArrayEnd(pOut, g_aFpuStD80[iFn].pszName);
3245 }
3246}
3247#endif
3248
3249
3250static void FpuStD80Test(void)
3251{
3252 X86FXSTATE State;
3253 RT_ZERO(State);
3254 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuStD80); iFn++)
3255 {
3256 if (!SubTestAndCheckIfEnabled(g_aFpuStD80[iFn].pszName))
3257 continue;
3258
3259 uint32_t const cTests = *g_aFpuStD80[iFn].pcTests;
3260 FPU_ST_D80_TEST_T const * const paTests = g_aFpuStD80[iFn].paTests;
3261 PFNIEMAIMPLFPUSTR80TOD80 pfn = g_aFpuStD80[iFn].pfn;
3262 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuStD80[iFn]);
3263 if (!cTests) RTTestSkipped(g_hTest, "no tests");
3264 for (uint32_t iVar = 0; iVar < cVars; iVar++)
3265 {
3266 for (uint32_t iTest = 0; iTest < cTests; iTest++)
3267 {
3268 RTFLOAT80U const InVal = paTests[iTest].InVal;
3269 uint16_t uFswOut = 0;
3270 RTPBCD80U OutVal = RTPBCD80U_INIT_ZERO(0);
3271 State.FCW = paTests[iTest].fFcw;
3272 State.FSW = paTests[iTest].fFswIn;
3273 pfn(&State, &uFswOut, &OutVal, &InVal);
3274 if ( uFswOut != paTests[iTest].fFswOut
3275 || !RTPBCD80U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal))
3276 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
3277 "%s -> fsw=%#06x %s\n"
3278 "%s expected %#06x %s%s%s (%s)\n",
3279 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
3280 FormatR80(&paTests[iTest].InVal),
3281 iVar ? " " : "", uFswOut, FormatD80(&OutVal),
3282 iVar ? " " : "", paTests[iTest].fFswOut, FormatD80(&paTests[iTest].OutVal),
3283 FswDiff(uFswOut, paTests[iTest].fFswOut),
3284 RTPBCD80U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal) ? " - val" : "",
3285 FormatFcw(paTests[iTest].fFcw) );
3286 }
3287 pfn = g_aFpuStD80[iFn].pfnNative;
3288 }
3289 }
3290}
3291
3292
3293
3294/*********************************************************************************************************************************
3295* x87 FPU Binary Operations *
3296*********************************************************************************************************************************/
3297
3298/*
3299 * Binary FPU operations on two 80-bit floating point values.
3300 */
3301TYPEDEF_SUBTEST_TYPE(FPU_BINARY_R80_T, FPU_BINARY_R80_TEST_T, PFNIEMAIMPLFPUR80);
3302enum { kFpuBinaryHint_fprem = 1, };
3303
3304static const FPU_BINARY_R80_T g_aFpuBinaryR80[] =
3305{
3306 ENTRY(fadd_r80_by_r80),
3307 ENTRY(fsub_r80_by_r80),
3308 ENTRY(fsubr_r80_by_r80),
3309 ENTRY(fmul_r80_by_r80),
3310 ENTRY(fdiv_r80_by_r80),
3311 ENTRY(fdivr_r80_by_r80),
3312 ENTRY_EX(fprem_r80_by_r80, kFpuBinaryHint_fprem),
3313 ENTRY_EX(fprem1_r80_by_r80, kFpuBinaryHint_fprem),
3314 ENTRY(fscale_r80_by_r80),
3315 ENTRY_AMD( fpatan_r80_by_r80, 0), // C1 and rounding differs on AMD
3316 ENTRY_INTEL(fpatan_r80_by_r80, 0), // C1 and rounding differs on AMD
3317 ENTRY_AMD( fyl2x_r80_by_r80, 0), // C1 and rounding differs on AMD
3318 ENTRY_INTEL(fyl2x_r80_by_r80, 0), // C1 and rounding differs on AMD
3319 ENTRY_AMD( fyl2xp1_r80_by_r80, 0), // C1 and rounding differs on AMD
3320 ENTRY_INTEL(fyl2xp1_r80_by_r80, 0), // C1 and rounding differs on AMD
3321};
3322
3323#ifdef TSTIEMAIMPL_WITH_GENERATOR
3324static void FpuBinaryR80Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
3325{
3326 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
3327
3328 static struct { RTFLOAT80U Val1, Val2; } const s_aSpecials[] =
3329 {
3330 { RTFLOAT80U_INIT_C(1, 0xdd762f07f2e80eef, 30142), /* causes weird overflows with DOWN and NEAR rounding. */
3331 RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1) },
3332 { RTFLOAT80U_INIT_ZERO(0), /* causes weird overflows with UP and NEAR rounding when precision is lower than 64. */
3333 RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1) },
3334 { RTFLOAT80U_INIT_ZERO(0), /* minus variant */
3335 RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1) },
3336 { RTFLOAT80U_INIT_C(0, 0xcef238bb9a0afd86, 577 + RTFLOAT80U_EXP_BIAS), /* for fprem and fprem1, max sequence length */
3337 RTFLOAT80U_INIT_C(0, 0xf11684ec0beaad94, 1 + RTFLOAT80U_EXP_BIAS) },
3338 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, -13396 + RTFLOAT80U_EXP_BIAS), /* for fdiv. We missed PE. */
3339 RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, 16383 + RTFLOAT80U_EXP_BIAS) },
3340 { RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1 + RTFLOAT80U_EXP_BIAS), /* for fprem/fprem1 */
3341 RTFLOAT80U_INIT_C(0, 0xe000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
3342 { RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1 + RTFLOAT80U_EXP_BIAS), /* for fprem/fprem1 */
3343 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
3344 /* fscale: This may seriously increase the exponent, and it turns out overflow and underflow behaviour changes
3345 once RTFLOAT80U_EXP_BIAS_ADJUST is exceeded. */
3346 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^1 */
3347 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
3348 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^64 */
3349 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 6 + RTFLOAT80U_EXP_BIAS) },
3350 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^1024 */
3351 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 10 + RTFLOAT80U_EXP_BIAS) },
3352 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^4096 */
3353 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 12 + RTFLOAT80U_EXP_BIAS) },
3354 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^16384 */
3355 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 49150 */
3356 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3357 RTFLOAT80U_INIT_C(0, 0xc000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 57342 - within 10980XE range */
3358 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^24577 */
3359 RTFLOAT80U_INIT_C(0, 0xc002000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 57343 - outside 10980XE range, behaviour changes! */
3360 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^32768 - result is within range on 10980XE */
3361 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 15 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 65534 */
3362 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^65536 */
3363 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 16 + RTFLOAT80U_EXP_BIAS) },
3364 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^1048576 */
3365 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 20 + RTFLOAT80U_EXP_BIAS) },
3366 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^16777216 */
3367 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 24 + RTFLOAT80U_EXP_BIAS) },
3368 { RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1), /* for fscale: min * 2^-24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3369 RTFLOAT80U_INIT_C(1, 0xc000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: -24575 - within 10980XE range */
3370 { RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1), /* for fscale: max * 2^-24577 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3371 RTFLOAT80U_INIT_C(1, 0xc002000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: -24576 - outside 10980XE range, behaviour changes! */
3372 /* fscale: Negative variants for the essentials of the above. */
3373 { RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3374 RTFLOAT80U_INIT_C(0, 0xc000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 57342 - within 10980XE range */
3375 { RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^24577 */
3376 RTFLOAT80U_INIT_C(0, 0xc002000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 57343 - outside 10980XE range, behaviour changes! */
3377 { RTFLOAT80U_INIT_C(1, 0x8000000000000000, 1), /* for fscale: min * 2^-24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3378 RTFLOAT80U_INIT_C(1, 0xc000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: -57342 - within 10980XE range */
3379 { RTFLOAT80U_INIT_C(1, 0x8000000000000000, 1), /* for fscale: max * 2^-24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3380 RTFLOAT80U_INIT_C(1, 0xc002000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: -57343 - outside 10980XE range, behaviour changes! */
3381 /* fscale: Some fun with denormals and pseudo-denormals. */
3382 { RTFLOAT80U_INIT_C(0, 0x0800000000000000, 0), /* for fscale: max * 2^-4 */
3383 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 2 + RTFLOAT80U_EXP_BIAS) },
3384 { RTFLOAT80U_INIT_C(0, 0x0800000000000000, 0), /* for fscale: max * 2^+1 */
3385 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
3386 { RTFLOAT80U_INIT_C(0, 0x0800000000000000, 0), RTFLOAT80U_INIT_ZERO(0) }, /* for fscale: max * 2^+0 */
3387 { RTFLOAT80U_INIT_C(0, 0x0000000000000008, 0), /* for fscale: max * 2^-4 => underflow */
3388 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 2 + RTFLOAT80U_EXP_BIAS) },
3389 { RTFLOAT80U_INIT_C(0, 0x8005000300020001, 0), RTFLOAT80U_INIT_ZERO(0) }, /* pseudo-normal number * 2^+0. */
3390 { RTFLOAT80U_INIT_C(1, 0x8005000300020001, 0), RTFLOAT80U_INIT_ZERO(0) }, /* pseudo-normal number * 2^+0. */
3391 { RTFLOAT80U_INIT_C(0, 0x8005000300020001, 0), /* pseudo-normal number * 2^-4 */
3392 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 2 + RTFLOAT80U_EXP_BIAS) },
3393 { RTFLOAT80U_INIT_C(0, 0x8005000300020001, 0), /* pseudo-normal number * 2^+0 */
3394 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
3395 { RTFLOAT80U_INIT_C(0, 0x8005000300020001, 0), /* pseudo-normal number * 2^+1 */
3396 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1 + RTFLOAT80U_EXP_BIAS) },
3397 };
3398
3399 X86FXSTATE State;
3400 RT_ZERO(State);
3401 uint32_t cMinNormalPairs = (cTests - 144) / 4;
3402 uint32_t cMinTargetRangeInputs = cMinNormalPairs / 2;
3403 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryR80); iFn++)
3404 {
3405 PFNIEMAIMPLFPUR80 const pfn = g_aFpuBinaryR80[iFn].pfnNative ? g_aFpuBinaryR80[iFn].pfnNative : g_aFpuBinaryR80[iFn].pfn;
3406 PRTSTREAM pOutFn = pOut;
3407 if (g_aFpuBinaryR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE)
3408 {
3409 if (g_aFpuBinaryR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
3410 continue;
3411 pOutFn = pOutCpu;
3412 }
3413
3414 GenerateArrayStart(pOutFn, g_aFpuBinaryR80[iFn].pszName, "FPU_BINARY_R80_TEST_T");
3415 uint32_t iTestOutput = 0;
3416 uint32_t cNormalInputPairs = 0;
3417 uint32_t cTargetRangeInputs = 0;
3418 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
3419 {
3420 RTFLOAT80U InVal1 = iTest < cTests ? RandR80Src1(iTest) : s_aSpecials[iTest - cTests].Val1;
3421 RTFLOAT80U InVal2 = iTest < cTests ? RandR80Src2(iTest) : s_aSpecials[iTest - cTests].Val2;
3422 bool fTargetRange = false;
3423 if (RTFLOAT80U_IS_NORMAL(&InVal1) && RTFLOAT80U_IS_NORMAL(&InVal2))
3424 {
3425 cNormalInputPairs++;
3426 if ( g_aFpuBinaryR80[iFn].uExtra == kFpuBinaryHint_fprem
3427 && (uint32_t)InVal1.s.uExponent - (uint32_t)InVal2.s.uExponent - (uint32_t)64 <= (uint32_t)512)
3428 cTargetRangeInputs += fTargetRange = true;
3429 else if (cTargetRangeInputs < cMinTargetRangeInputs && iTest < cTests)
3430 if (g_aFpuBinaryR80[iFn].uExtra == kFpuBinaryHint_fprem)
3431 { /* The aim is two values with an exponent difference between 64 and 640 so we can do the whole sequence. */
3432 InVal2.s.uExponent = RTRandU32Ex(1, RTFLOAT80U_EXP_MAX - 66);
3433 InVal1.s.uExponent = RTRandU32Ex(InVal2.s.uExponent + 64, RT_MIN(InVal2.s.uExponent + 512, RTFLOAT80U_EXP_MAX - 1));
3434 cTargetRangeInputs += fTargetRange = true;
3435 }
3436 }
3437 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
3438 {
3439 iTest -= 1;
3440 continue;
3441 }
3442
3443 uint16_t const fFcwExtra = 0;
3444 uint16_t const fFcw = RandFcw();
3445 State.FSW = RandFsw();
3446
3447 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
3448 for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
3449 {
3450 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_PC_MASK | X86_FCW_MASK_ALL))
3451 | (iRounding << X86_FCW_RC_SHIFT)
3452 | (iPrecision << X86_FCW_PC_SHIFT)
3453 | X86_FCW_MASK_ALL;
3454 IEMFPURESULT ResM = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3455 pfn(&State, &ResM, &InVal1, &InVal2);
3456 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/m = #%u */\n",
3457 State.FCW | fFcwExtra, State.FSW, ResM.FSW, GenFormatR80(&InVal1), GenFormatR80(&InVal2),
3458 GenFormatR80(&ResM.r80Result), iTest, iRounding, iPrecision, iTestOutput++);
3459
3460 State.FCW = State.FCW & ~X86_FCW_MASK_ALL;
3461 IEMFPURESULT ResU = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3462 pfn(&State, &ResU, &InVal1, &InVal2);
3463 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/u = #%u */\n",
3464 State.FCW | fFcwExtra, State.FSW, ResU.FSW, GenFormatR80(&InVal1), GenFormatR80(&InVal2),
3465 GenFormatR80(&ResU.r80Result), iTest, iRounding, iPrecision, iTestOutput++);
3466
3467 uint16_t fXcpt = (ResM.FSW | ResU.FSW) & X86_FSW_XCPT_MASK & ~X86_FSW_SF;
3468 if (fXcpt)
3469 {
3470 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
3471 IEMFPURESULT Res1 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3472 pfn(&State, &Res1, &InVal1, &InVal2);
3473 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/%#x = #%u */\n",
3474 State.FCW | fFcwExtra, State.FSW, Res1.FSW, GenFormatR80(&InVal1), GenFormatR80(&InVal2),
3475 GenFormatR80(&Res1.r80Result), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
3476 if (((Res1.FSW & X86_FSW_XCPT_MASK) & fXcpt) != (Res1.FSW & X86_FSW_XCPT_MASK))
3477 {
3478 fXcpt |= Res1.FSW & X86_FSW_XCPT_MASK;
3479 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
3480 IEMFPURESULT Res2 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3481 pfn(&State, &Res2, &InVal1, &InVal2);
3482 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/%#x[!] = #%u */\n",
3483 State.FCW | fFcwExtra, State.FSW, Res2.FSW, GenFormatR80(&InVal1), GenFormatR80(&InVal2),
3484 GenFormatR80(&Res2.r80Result), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
3485 }
3486 if (!RT_IS_POWER_OF_TWO(fXcpt))
3487 for (uint16_t fUnmasked = 1; fUnmasked <= X86_FCW_PM; fUnmasked <<= 1)
3488 if (fUnmasked & fXcpt)
3489 {
3490 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | (fXcpt & ~fUnmasked);
3491 IEMFPURESULT Res3 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3492 pfn(&State, &Res3, &InVal1, &InVal2);
3493 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/u%#x = #%u */\n",
3494 State.FCW | fFcwExtra, State.FSW, Res3.FSW, GenFormatR80(&InVal1), GenFormatR80(&InVal2),
3495 GenFormatR80(&Res3.r80Result), iTest, iRounding, iPrecision, fUnmasked, iTestOutput++);
3496 }
3497 }
3498
3499 /* If the values are in range and caused no exceptions, do the whole series of
3500 partial reminders till we get the non-partial one or run into an exception. */
3501 if (fTargetRange && fXcpt == 0 && g_aFpuBinaryR80[iFn].uExtra == kFpuBinaryHint_fprem)
3502 {
3503 IEMFPURESULT ResPrev = ResM;
3504 for (unsigned i = 0; i < 32 && (ResPrev.FSW & (X86_FSW_C2 | X86_FSW_XCPT_MASK)) == X86_FSW_C2; i++)
3505 {
3506 State.FCW = State.FCW | X86_FCW_MASK_ALL;
3507 State.FSW = ResPrev.FSW;
3508 IEMFPURESULT ResSeq = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3509 pfn(&State, &ResSeq, &ResPrev.r80Result, &InVal2);
3510 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/seq%u = #%u */\n",
3511 State.FCW | fFcwExtra, State.FSW, ResSeq.FSW, GenFormatR80(&ResPrev.r80Result),
3512 GenFormatR80(&InVal2), GenFormatR80(&ResSeq.r80Result),
3513 iTest, iRounding, iPrecision, i + 1, iTestOutput++);
3514 ResPrev = ResSeq;
3515 }
3516 }
3517 }
3518 }
3519 GenerateArrayEnd(pOutFn, g_aFpuBinaryR80[iFn].pszName);
3520 }
3521}
3522#endif
3523
3524
3525static void FpuBinaryR80Test(void)
3526{
3527 X86FXSTATE State;
3528 RT_ZERO(State);
3529 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryR80); iFn++)
3530 {
3531 if (!SubTestAndCheckIfEnabled(g_aFpuBinaryR80[iFn].pszName))
3532 continue;
3533
3534 uint32_t const cTests = *g_aFpuBinaryR80[iFn].pcTests;
3535 FPU_BINARY_R80_TEST_T const * const paTests = g_aFpuBinaryR80[iFn].paTests;
3536 PFNIEMAIMPLFPUR80 pfn = g_aFpuBinaryR80[iFn].pfn;
3537 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuBinaryR80[iFn]);
3538 if (!cTests) RTTestSkipped(g_hTest, "no tests");
3539 for (uint32_t iVar = 0; iVar < cVars; iVar++)
3540 {
3541 for (uint32_t iTest = 0; iTest < cTests; iTest++)
3542 {
3543 RTFLOAT80U const InVal1 = paTests[iTest].InVal1;
3544 RTFLOAT80U const InVal2 = paTests[iTest].InVal2;
3545 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3546 State.FCW = paTests[iTest].fFcw;
3547 State.FSW = paTests[iTest].fFswIn;
3548 pfn(&State, &Res, &InVal1, &InVal2);
3549 if ( Res.FSW != paTests[iTest].fFswOut
3550 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal))
3551 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n"
3552 "%s -> fsw=%#06x %s\n"
3553 "%s expected %#06x %s%s%s (%s)\n",
3554 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
3555 FormatR80(&paTests[iTest].InVal1), FormatR80(&paTests[iTest].InVal2),
3556 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result),
3557 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].OutVal),
3558 FswDiff(Res.FSW, paTests[iTest].fFswOut),
3559 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal) ? " - val" : "",
3560 FormatFcw(paTests[iTest].fFcw) );
3561 }
3562 pfn = g_aFpuBinaryR80[iFn].pfnNative;
3563 }
3564 }
3565}
3566
3567
3568/*
3569 * Binary FPU operations on one 80-bit floating point value and one 64-bit or 32-bit one.
3570 */
3571#define int64_t_IS_NORMAL(a) 1
3572#define int32_t_IS_NORMAL(a) 1
3573#define int16_t_IS_NORMAL(a) 1
3574
3575#ifdef TSTIEMAIMPL_WITH_GENERATOR
3576static struct { RTFLOAT80U Val1; RTFLOAT64U Val2; } const s_aFpuBinaryR64Specials[] =
3577{
3578 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3579 RTFLOAT64U_INIT_C(0, 0xfeeeeddddcccc, RTFLOAT64U_EXP_BIAS) }, /* whatever */
3580};
3581static struct { RTFLOAT80U Val1; RTFLOAT32U Val2; } const s_aFpuBinaryR32Specials[] =
3582{
3583 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3584 RTFLOAT32U_INIT_C(0, 0x7fffee, RTFLOAT32U_EXP_BIAS) }, /* whatever */
3585};
3586static struct { RTFLOAT80U Val1; int32_t Val2; } const s_aFpuBinaryI32Specials[] =
3587{
3588 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT32_MAX }, /* whatever */
3589};
3590static struct { RTFLOAT80U Val1; int16_t Val2; } const s_aFpuBinaryI16Specials[] =
3591{
3592 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT16_MAX }, /* whatever */
3593};
3594
3595# define GEN_FPU_BINARY_SMALL(a_fIntType, a_cBits, a_LoBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
3596static void FpuBinary ## a_UpBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
3597{ \
3598 cTests = RT_MAX(160, cTests); /* there are 144 standard input variations for r80 by r80 */ \
3599 \
3600 X86FXSTATE State; \
3601 RT_ZERO(State); \
3602 uint32_t cMinNormalPairs = (cTests - 144) / 4; \
3603 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3604 { \
3605 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
3606 uint32_t cNormalInputPairs = 0; \
3607 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aFpuBinary ## a_UpBits ## Specials); iTest += 1) \
3608 { \
3609 RTFLOAT80U const InVal1 = iTest < cTests ? RandR80Src1(iTest, a_cBits, a_fIntType) \
3610 : s_aFpuBinary ## a_UpBits ## Specials[iTest - cTests].Val1; \
3611 a_Type2 const InVal2 = iTest < cTests ? Rand ## a_UpBits ## Src2(iTest) \
3612 : s_aFpuBinary ## a_UpBits ## Specials[iTest - cTests].Val2; \
3613 if (RTFLOAT80U_IS_NORMAL(&InVal1) && a_Type2 ## _IS_NORMAL(&InVal2)) \
3614 cNormalInputPairs++; \
3615 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests) \
3616 { \
3617 iTest -= 1; \
3618 continue; \
3619 } \
3620 \
3621 uint16_t const fFcw = RandFcw(); \
3622 State.FSW = RandFsw(); \
3623 \
3624 for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
3625 { \
3626 for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++) \
3627 { \
3628 for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL) \
3629 { \
3630 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_PC_MASK | X86_FCW_MASK_ALL)) \
3631 | (iRounding << X86_FCW_RC_SHIFT) \
3632 | (iPrecision << X86_FCW_PC_SHIFT) \
3633 | iMask; \
3634 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
3635 a_aSubTests[iFn].pfn(&State, &Res, &InVal1, &InVal2); \
3636 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/%c */\n", \
3637 State.FCW, State.FSW, Res.FSW, GenFormatR80(&InVal1), GenFormat ## a_UpBits(&InVal2), \
3638 GenFormatR80(&Res.r80Result), iTest, iRounding, iPrecision, iMask ? 'c' : 'u'); \
3639 } \
3640 } \
3641 } \
3642 } \
3643 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
3644 } \
3645}
3646#else
3647# define GEN_FPU_BINARY_SMALL(a_fIntType, a_cBits, a_LoBits, a_UpBits, a_Type2, a_aSubTests, a_TestType)
3648#endif
3649
3650#define TEST_FPU_BINARY_SMALL(a_fIntType, a_cBits, a_LoBits, a_UpBits, a_I, a_Type2, a_SubTestType, a_aSubTests, a_TestType) \
3651TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPU ## a_UpBits); \
3652\
3653static const a_SubTestType a_aSubTests[] = \
3654{ \
3655 ENTRY(RT_CONCAT4(f, a_I, add_r80_by_, a_LoBits)), \
3656 ENTRY(RT_CONCAT4(f, a_I, mul_r80_by_, a_LoBits)), \
3657 ENTRY(RT_CONCAT4(f, a_I, sub_r80_by_, a_LoBits)), \
3658 ENTRY(RT_CONCAT4(f, a_I, subr_r80_by_, a_LoBits)), \
3659 ENTRY(RT_CONCAT4(f, a_I, div_r80_by_, a_LoBits)), \
3660 ENTRY(RT_CONCAT4(f, a_I, divr_r80_by_, a_LoBits)), \
3661}; \
3662\
3663GEN_FPU_BINARY_SMALL(a_fIntType, a_cBits, a_LoBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
3664\
3665static void FpuBinary ## a_UpBits ## Test(void) \
3666{ \
3667 X86FXSTATE State; \
3668 RT_ZERO(State); \
3669 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3670 { \
3671 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
3672 \
3673 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
3674 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
3675 PFNIEMAIMPLFPU ## a_UpBits pfn = a_aSubTests[iFn].pfn; \
3676 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
3677 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
3678 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
3679 { \
3680 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
3681 { \
3682 RTFLOAT80U const InVal1 = paTests[iTest].InVal1; \
3683 a_Type2 const InVal2 = paTests[iTest].InVal2; \
3684 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
3685 State.FCW = paTests[iTest].fFcw; \
3686 State.FSW = paTests[iTest].fFswIn; \
3687 pfn(&State, &Res, &InVal1, &InVal2); \
3688 if ( Res.FSW != paTests[iTest].fFswOut \
3689 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal)) \
3690 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n" \
3691 "%s -> fsw=%#06x %s\n" \
3692 "%s expected %#06x %s%s%s (%s)\n", \
3693 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
3694 FormatR80(&paTests[iTest].InVal1), Format ## a_UpBits(&paTests[iTest].InVal2), \
3695 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result), \
3696 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].OutVal), \
3697 FswDiff(Res.FSW, paTests[iTest].fFswOut), \
3698 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal) ? " - val" : "", \
3699 FormatFcw(paTests[iTest].fFcw) ); \
3700 } \
3701 pfn = a_aSubTests[iFn].pfnNative; \
3702 } \
3703 } \
3704}
3705
3706TEST_FPU_BINARY_SMALL(0, 64, r64, R64, RT_NOTHING, RTFLOAT64U, FPU_BINARY_R64_T, g_aFpuBinaryR64, FPU_BINARY_R64_TEST_T)
3707TEST_FPU_BINARY_SMALL(0, 32, r32, R32, RT_NOTHING, RTFLOAT32U, FPU_BINARY_R32_T, g_aFpuBinaryR32, FPU_BINARY_R32_TEST_T)
3708TEST_FPU_BINARY_SMALL(1, 32, i32, I32, i, int32_t, FPU_BINARY_I32_T, g_aFpuBinaryI32, FPU_BINARY_I32_TEST_T)
3709TEST_FPU_BINARY_SMALL(1, 16, i16, I16, i, int16_t, FPU_BINARY_I16_T, g_aFpuBinaryI16, FPU_BINARY_I16_TEST_T)
3710
3711
3712/*
3713 * Binary operations on 80-, 64- and 32-bit floating point only affecting FSW.
3714 */
3715#ifdef TSTIEMAIMPL_WITH_GENERATOR
3716static struct { RTFLOAT80U Val1, Val2; } const s_aFpuBinaryFswR80Specials[] =
3717{
3718 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3719 RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS) }, /* whatever */
3720};
3721static struct { RTFLOAT80U Val1; RTFLOAT64U Val2; } const s_aFpuBinaryFswR64Specials[] =
3722{
3723 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3724 RTFLOAT64U_INIT_C(0, 0xfeeeeddddcccc, RTFLOAT64U_EXP_BIAS) }, /* whatever */
3725};
3726static struct { RTFLOAT80U Val1; RTFLOAT32U Val2; } const s_aFpuBinaryFswR32Specials[] =
3727{
3728 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3729 RTFLOAT32U_INIT_C(0, 0x7fffee, RTFLOAT32U_EXP_BIAS) }, /* whatever */
3730};
3731static struct { RTFLOAT80U Val1; int32_t Val2; } const s_aFpuBinaryFswI32Specials[] =
3732{
3733 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT32_MAX }, /* whatever */
3734};
3735static struct { RTFLOAT80U Val1; int16_t Val2; } const s_aFpuBinaryFswI16Specials[] =
3736{
3737 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT16_MAX }, /* whatever */
3738};
3739
3740# define GEN_FPU_BINARY_FSW(a_fIntType, a_cBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
3741static void FpuBinaryFsw ## a_UpBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
3742{ \
3743 cTests = RT_MAX(160, cTests); /* there are 144 standard input variations for r80 by r80 */ \
3744 \
3745 X86FXSTATE State; \
3746 RT_ZERO(State); \
3747 uint32_t cMinNormalPairs = (cTests - 144) / 4; \
3748 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3749 { \
3750 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
3751 uint32_t cNormalInputPairs = 0; \
3752 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aFpuBinaryFsw ## a_UpBits ## Specials); iTest += 1) \
3753 { \
3754 RTFLOAT80U const InVal1 = iTest < cTests ? RandR80Src1(iTest, a_cBits, a_fIntType) \
3755 : s_aFpuBinaryFsw ## a_UpBits ## Specials[iTest - cTests].Val1; \
3756 a_Type2 const InVal2 = iTest < cTests ? Rand ## a_UpBits ## Src2(iTest) \
3757 : s_aFpuBinaryFsw ## a_UpBits ## Specials[iTest - cTests].Val2; \
3758 if (RTFLOAT80U_IS_NORMAL(&InVal1) && a_Type2 ## _IS_NORMAL(&InVal2)) \
3759 cNormalInputPairs++; \
3760 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests) \
3761 { \
3762 iTest -= 1; \
3763 continue; \
3764 } \
3765 \
3766 uint16_t const fFcw = RandFcw(); \
3767 State.FSW = RandFsw(); \
3768 \
3769 /* Guess these aren't affected by precision or rounding, so just flip the exception mask. */ \
3770 for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL) \
3771 { \
3772 State.FCW = (fFcw & ~(X86_FCW_MASK_ALL)) | iMask; \
3773 uint16_t fFswOut = 0; \
3774 a_aSubTests[iFn].pfn(&State, &fFswOut, &InVal1, &InVal2); \
3775 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%c */\n", \
3776 State.FCW, State.FSW, fFswOut, GenFormatR80(&InVal1), GenFormat ## a_UpBits(&InVal2), \
3777 iTest, iMask ? 'c' : 'u'); \
3778 } \
3779 } \
3780 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
3781 } \
3782}
3783#else
3784# define GEN_FPU_BINARY_FSW(a_fIntType, a_cBits, a_UpBits, a_Type2, a_aSubTests, a_TestType)
3785#endif
3786
3787#define TEST_FPU_BINARY_FSW(a_fIntType, a_cBits, a_UpBits, a_Type2, a_SubTestType, a_aSubTests, a_TestType, ...) \
3788TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPU ## a_UpBits ## FSW); \
3789\
3790static const a_SubTestType a_aSubTests[] = \
3791{ \
3792 __VA_ARGS__ \
3793}; \
3794\
3795GEN_FPU_BINARY_FSW(a_fIntType, a_cBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
3796\
3797static void FpuBinaryFsw ## a_UpBits ## Test(void) \
3798{ \
3799 X86FXSTATE State; \
3800 RT_ZERO(State); \
3801 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3802 { \
3803 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
3804 \
3805 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
3806 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
3807 PFNIEMAIMPLFPU ## a_UpBits ## FSW pfn = a_aSubTests[iFn].pfn; \
3808 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
3809 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
3810 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
3811 { \
3812 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
3813 { \
3814 uint16_t fFswOut = 0; \
3815 RTFLOAT80U const InVal1 = paTests[iTest].InVal1; \
3816 a_Type2 const InVal2 = paTests[iTest].InVal2; \
3817 State.FCW = paTests[iTest].fFcw; \
3818 State.FSW = paTests[iTest].fFswIn; \
3819 pfn(&State, &fFswOut, &InVal1, &InVal2); \
3820 if (fFswOut != paTests[iTest].fFswOut) \
3821 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n" \
3822 "%s -> fsw=%#06x\n" \
3823 "%s expected %#06x %s (%s)\n", \
3824 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
3825 FormatR80(&paTests[iTest].InVal1), Format ## a_UpBits(&paTests[iTest].InVal2), \
3826 iVar ? " " : "", fFswOut, \
3827 iVar ? " " : "", paTests[iTest].fFswOut, \
3828 FswDiff(fFswOut, paTests[iTest].fFswOut), FormatFcw(paTests[iTest].fFcw) ); \
3829 } \
3830 pfn = a_aSubTests[iFn].pfnNative; \
3831 } \
3832 } \
3833}
3834
3835TEST_FPU_BINARY_FSW(0, 80, R80, RTFLOAT80U, FPU_BINARY_FSW_R80_T, g_aFpuBinaryFswR80, FPU_BINARY_R80_TEST_T, ENTRY(fcom_r80_by_r80), ENTRY(fucom_r80_by_r80))
3836TEST_FPU_BINARY_FSW(0, 64, R64, RTFLOAT64U, FPU_BINARY_FSW_R64_T, g_aFpuBinaryFswR64, FPU_BINARY_R64_TEST_T, ENTRY(fcom_r80_by_r64))
3837TEST_FPU_BINARY_FSW(0, 32, R32, RTFLOAT32U, FPU_BINARY_FSW_R32_T, g_aFpuBinaryFswR32, FPU_BINARY_R32_TEST_T, ENTRY(fcom_r80_by_r32))
3838TEST_FPU_BINARY_FSW(1, 32, I32, int32_t, FPU_BINARY_FSW_I32_T, g_aFpuBinaryFswI32, FPU_BINARY_I32_TEST_T, ENTRY(ficom_r80_by_i32))
3839TEST_FPU_BINARY_FSW(1, 16, I16, int16_t, FPU_BINARY_FSW_I16_T, g_aFpuBinaryFswI16, FPU_BINARY_I16_TEST_T, ENTRY(ficom_r80_by_i16))
3840
3841
3842/*
3843 * Binary operations on 80-bit floating point that effects only EFLAGS and possibly FSW.
3844 */
3845TYPEDEF_SUBTEST_TYPE(FPU_BINARY_EFL_R80_T, FPU_BINARY_EFL_R80_TEST_T, PFNIEMAIMPLFPUR80EFL);
3846
3847static const FPU_BINARY_EFL_R80_T g_aFpuBinaryEflR80[] =
3848{
3849 ENTRY(fcomi_r80_by_r80),
3850 ENTRY(fucomi_r80_by_r80),
3851};
3852
3853#ifdef TSTIEMAIMPL_WITH_GENERATOR
3854static struct { RTFLOAT80U Val1, Val2; } const s_aFpuBinaryEflR80Specials[] =
3855{
3856 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3857 RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS) }, /* whatever */
3858};
3859
3860static void FpuBinaryEflR80Generate(PRTSTREAM pOut, uint32_t cTests)
3861{
3862 cTests = RT_MAX(160, cTests); /* there are 144 standard input variations */
3863
3864 X86FXSTATE State;
3865 RT_ZERO(State);
3866 uint32_t cMinNormalPairs = (cTests - 144) / 4;
3867 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryEflR80); iFn++)
3868 {
3869 GenerateArrayStart(pOut, g_aFpuBinaryEflR80[iFn].pszName, "FPU_BINARY_EFL_R80_TEST_T");
3870 uint32_t cNormalInputPairs = 0;
3871 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aFpuBinaryEflR80Specials); iTest += 1)
3872 {
3873 RTFLOAT80U const InVal1 = iTest < cTests ? RandR80Src1(iTest) : s_aFpuBinaryEflR80Specials[iTest - cTests].Val1;
3874 RTFLOAT80U const InVal2 = iTest < cTests ? RandR80Src2(iTest) : s_aFpuBinaryEflR80Specials[iTest - cTests].Val2;
3875 if (RTFLOAT80U_IS_NORMAL(&InVal1) && RTFLOAT80U_IS_NORMAL(&InVal2))
3876 cNormalInputPairs++;
3877 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
3878 {
3879 iTest -= 1;
3880 continue;
3881 }
3882
3883 uint16_t const fFcw = RandFcw();
3884 State.FSW = RandFsw();
3885
3886 /* Guess these aren't affected by precision or rounding, so just flip the exception mask. */
3887 for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL)
3888 {
3889 State.FCW = (fFcw & ~(X86_FCW_MASK_ALL)) | iMask;
3890 uint16_t uFswOut = 0;
3891 uint32_t fEflOut = g_aFpuBinaryEflR80[iFn].pfn(&State, &uFswOut, &InVal1, &InVal2);
3892 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s, %#08x }, /* #%u/%c */\n",
3893 State.FCW, State.FSW, uFswOut, GenFormatR80(&InVal1), GenFormatR80(&InVal2), fEflOut,
3894 iTest, iMask ? 'c' : 'u');
3895 }
3896 }
3897 GenerateArrayEnd(pOut, g_aFpuBinaryEflR80[iFn].pszName);
3898 }
3899}
3900#endif /*TSTIEMAIMPL_WITH_GENERATOR*/
3901
3902static void FpuBinaryEflR80Test(void)
3903{
3904 X86FXSTATE State;
3905 RT_ZERO(State);
3906 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryEflR80); iFn++)
3907 {
3908 if (!SubTestAndCheckIfEnabled(g_aFpuBinaryEflR80[iFn].pszName))
3909 continue;
3910
3911 uint32_t const cTests = *g_aFpuBinaryEflR80[iFn].pcTests;
3912 FPU_BINARY_EFL_R80_TEST_T const * const paTests = g_aFpuBinaryEflR80[iFn].paTests;
3913 PFNIEMAIMPLFPUR80EFL pfn = g_aFpuBinaryEflR80[iFn].pfn;
3914 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuBinaryEflR80[iFn]);
3915 if (!cTests) RTTestSkipped(g_hTest, "no tests");
3916 for (uint32_t iVar = 0; iVar < cVars; iVar++)
3917 {
3918 for (uint32_t iTest = 0; iTest < cTests; iTest++)
3919 {
3920 RTFLOAT80U const InVal1 = paTests[iTest].InVal1;
3921 RTFLOAT80U const InVal2 = paTests[iTest].InVal2;
3922 State.FCW = paTests[iTest].fFcw;
3923 State.FSW = paTests[iTest].fFswIn;
3924 uint16_t uFswOut = 0;
3925 uint32_t fEflOut = pfn(&State, &uFswOut, &InVal1, &InVal2);
3926 if ( uFswOut != paTests[iTest].fFswOut
3927 || fEflOut != paTests[iTest].fEflOut)
3928 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n"
3929 "%s -> fsw=%#06x efl=%#08x\n"
3930 "%s expected %#06x %#08x %s%s (%s)\n",
3931 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
3932 FormatR80(&paTests[iTest].InVal1), FormatR80(&paTests[iTest].InVal2),
3933 iVar ? " " : "", uFswOut, fEflOut,
3934 iVar ? " " : "", paTests[iTest].fFswOut, paTests[iTest].fEflOut,
3935 FswDiff(uFswOut, paTests[iTest].fFswOut), EFlagsDiff(fEflOut, paTests[iTest].fEflOut),
3936 FormatFcw(paTests[iTest].fFcw));
3937 }
3938 pfn = g_aFpuBinaryEflR80[iFn].pfnNative;
3939 }
3940 }
3941}
3942
3943
3944/*********************************************************************************************************************************
3945* x87 FPU Unary Operations *
3946*********************************************************************************************************************************/
3947
3948/*
3949 * Unary FPU operations on one 80-bit floating point value.
3950 *
3951 * Note! The FCW reserved bit 7 is used to indicate whether a test may produce
3952 * a rounding error or not.
3953 */
3954TYPEDEF_SUBTEST_TYPE(FPU_UNARY_R80_T, FPU_UNARY_R80_TEST_T, PFNIEMAIMPLFPUR80UNARY);
3955
3956enum { kUnary_Accurate = 0, kUnary_Accurate_Trigonometry /*probably not accurate, but need impl to know*/, kUnary_Rounding_F2xm1 };
3957static const FPU_UNARY_R80_T g_aFpuUnaryR80[] =
3958{
3959 ENTRY_EX( fabs_r80, kUnary_Accurate),
3960 ENTRY_EX( fchs_r80, kUnary_Accurate),
3961 ENTRY_AMD_EX( f2xm1_r80, 0, kUnary_Accurate), // C1 differs for -1m0x3fb263cc2c331e15^-2654 (different ln2 constant?)
3962 ENTRY_INTEL_EX(f2xm1_r80, 0, kUnary_Rounding_F2xm1),
3963 ENTRY_EX( fsqrt_r80, kUnary_Accurate),
3964 ENTRY_EX( frndint_r80, kUnary_Accurate),
3965 ENTRY_AMD_EX( fsin_r80, 0, kUnary_Accurate_Trigonometry), // value & C1 differences for pseudo denormals and others (e.g. -1m0x2b1e5683cbca5725^-3485)
3966 ENTRY_INTEL_EX(fsin_r80, 0, kUnary_Accurate_Trigonometry),
3967 ENTRY_AMD_EX( fcos_r80, 0, kUnary_Accurate_Trigonometry), // value & C1 differences
3968 ENTRY_INTEL_EX(fcos_r80, 0, kUnary_Accurate_Trigonometry),
3969};
3970
3971#ifdef TSTIEMAIMPL_WITH_GENERATOR
3972
3973static bool FpuUnaryR80MayHaveRoundingError(PCRTFLOAT80U pr80Val, int enmKind)
3974{
3975 if ( enmKind == kUnary_Rounding_F2xm1
3976 && RTFLOAT80U_IS_NORMAL(pr80Val)
3977 && pr80Val->s.uExponent < RTFLOAT80U_EXP_BIAS
3978 && pr80Val->s.uExponent >= RTFLOAT80U_EXP_BIAS - 69)
3979 return true;
3980 return false;
3981}
3982
3983static void FpuUnaryR80Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
3984{
3985 static RTFLOAT80U const s_aSpecials[] =
3986 {
3987 RTFLOAT80U_INIT_C(0, 0x8000000000000000, RTFLOAT80U_EXP_BIAS - 1), /* 0.5 (for f2xm1) */
3988 RTFLOAT80U_INIT_C(1, 0x8000000000000000, RTFLOAT80U_EXP_BIAS - 1), /* -0.5 (for f2xm1) */
3989 RTFLOAT80U_INIT_C(0, 0x8000000000000000, RTFLOAT80U_EXP_BIAS), /* 1.0 (for f2xm1) */
3990 RTFLOAT80U_INIT_C(1, 0x8000000000000000, RTFLOAT80U_EXP_BIAS), /* -1.0 (for f2xm1) */
3991 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0), /* +1.0^-16382 */
3992 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 0), /* -1.0^-16382 */
3993 RTFLOAT80U_INIT_C(0, 0xc000000000000000, 0), /* +1.1^-16382 */
3994 RTFLOAT80U_INIT_C(1, 0xc000000000000000, 0), /* -1.1^-16382 */
3995 RTFLOAT80U_INIT_C(0, 0xc000100000000000, 0), /* +1.1xxx1^-16382 */
3996 RTFLOAT80U_INIT_C(1, 0xc000100000000000, 0), /* -1.1xxx1^-16382 */
3997 };
3998 X86FXSTATE State;
3999 RT_ZERO(State);
4000 uint32_t cMinNormals = cTests / 4;
4001 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryR80); iFn++)
4002 {
4003 PFNIEMAIMPLFPUR80UNARY const pfn = g_aFpuUnaryR80[iFn].pfnNative ? g_aFpuUnaryR80[iFn].pfnNative : g_aFpuUnaryR80[iFn].pfn;
4004 PRTSTREAM pOutFn = pOut;
4005 if (g_aFpuUnaryR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE)
4006 {
4007 if (g_aFpuUnaryR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
4008 continue;
4009 pOutFn = pOutCpu;
4010 }
4011
4012 GenerateArrayStart(pOutFn, g_aFpuUnaryR80[iFn].pszName, "FPU_UNARY_R80_TEST_T");
4013 uint32_t iTestOutput = 0;
4014 uint32_t cNormalInputs = 0;
4015 uint32_t cTargetRangeInputs = 0;
4016 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
4017 {
4018 RTFLOAT80U InVal = iTest < cTests ? RandR80Src(iTest) : s_aSpecials[iTest - cTests];
4019 if (RTFLOAT80U_IS_NORMAL(&InVal))
4020 {
4021 if (g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1)
4022 {
4023 unsigned uTargetExp = g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1
4024 ? RTFLOAT80U_EXP_BIAS /* 2^0..2^-69 */ : RTFLOAT80U_EXP_BIAS + 63 + 1 /* 2^64..2^-64 */;
4025 unsigned cTargetExp = g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1 ? 69 : 63*2 + 2;
4026 if (InVal.s.uExponent <= uTargetExp && InVal.s.uExponent >= uTargetExp - cTargetExp)
4027 cTargetRangeInputs++;
4028 else if (cTargetRangeInputs < cMinNormals / 2 && iTest + cMinNormals / 2 >= cTests && iTest < cTests)
4029 {
4030 InVal.s.uExponent = RTRandU32Ex(uTargetExp - cTargetExp, uTargetExp);
4031 cTargetRangeInputs++;
4032 }
4033 }
4034 cNormalInputs++;
4035 }
4036 else if (cNormalInputs < cMinNormals && iTest + cMinNormals >= cTests && iTest < cTests)
4037 {
4038 iTest -= 1;
4039 continue;
4040 }
4041
4042 uint16_t const fFcwExtra = FpuUnaryR80MayHaveRoundingError(&InVal, g_aFpuUnaryR80[iFn].uExtra) ? 0x80 : 0;
4043 uint16_t const fFcw = RandFcw();
4044 State.FSW = RandFsw();
4045
4046 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
4047 for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
4048 {
4049 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_PC_MASK | X86_FCW_MASK_ALL))
4050 | (iRounding << X86_FCW_RC_SHIFT)
4051 | (iPrecision << X86_FCW_PC_SHIFT)
4052 | X86_FCW_MASK_ALL;
4053 IEMFPURESULT ResM = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4054 pfn(&State, &ResM, &InVal);
4055 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u/m = #%u */\n",
4056 State.FCW | fFcwExtra, State.FSW, ResM.FSW, GenFormatR80(&InVal),
4057 GenFormatR80(&ResM.r80Result), iTest, iRounding, iPrecision, iTestOutput++);
4058
4059 State.FCW = State.FCW & ~X86_FCW_MASK_ALL;
4060 IEMFPURESULT ResU = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4061 pfn(&State, &ResU, &InVal);
4062 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u/u = #%u */\n",
4063 State.FCW | fFcwExtra, State.FSW, ResU.FSW, GenFormatR80(&InVal),
4064 GenFormatR80(&ResU.r80Result), iTest, iRounding, iPrecision, iTestOutput++);
4065
4066 uint16_t fXcpt = (ResM.FSW | ResU.FSW) & X86_FSW_XCPT_MASK & ~X86_FSW_SF;
4067 if (fXcpt)
4068 {
4069 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
4070 IEMFPURESULT Res1 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4071 pfn(&State, &Res1, &InVal);
4072 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u/%#x = #%u */\n",
4073 State.FCW | fFcwExtra, State.FSW, Res1.FSW, GenFormatR80(&InVal),
4074 GenFormatR80(&Res1.r80Result), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
4075 if (((Res1.FSW & X86_FSW_XCPT_MASK) & fXcpt) != (Res1.FSW & X86_FSW_XCPT_MASK))
4076 {
4077 fXcpt |= Res1.FSW & X86_FSW_XCPT_MASK;
4078 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
4079 IEMFPURESULT Res2 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4080 pfn(&State, &Res2, &InVal);
4081 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u/%#x[!] = #%u */\n",
4082 State.FCW | fFcwExtra, State.FSW, Res2.FSW, GenFormatR80(&InVal),
4083 GenFormatR80(&Res2.r80Result), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
4084 }
4085 if (!RT_IS_POWER_OF_TWO(fXcpt))
4086 for (uint16_t fUnmasked = 1; fUnmasked <= X86_FCW_PM; fUnmasked <<= 1)
4087 if (fUnmasked & fXcpt)
4088 {
4089 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | (fXcpt & ~fUnmasked);
4090 IEMFPURESULT Res3 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4091 pfn(&State, &Res3, &InVal);
4092 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u/u%#x = #%u */\n",
4093 State.FCW | fFcwExtra, State.FSW, Res3.FSW, GenFormatR80(&InVal),
4094 GenFormatR80(&Res3.r80Result), iTest, iRounding, iPrecision, fUnmasked, iTestOutput++);
4095 }
4096 }
4097 }
4098 }
4099 GenerateArrayEnd(pOutFn, g_aFpuUnaryR80[iFn].pszName);
4100 }
4101}
4102#endif
4103
4104static bool FpuIsEqualFcwMaybeIgnoreRoundErr(uint16_t fFcw1, uint16_t fFcw2, bool fRndErrOk, bool *pfRndErr)
4105{
4106 if (fFcw1 == fFcw2)
4107 return true;
4108 if (fRndErrOk && (fFcw1 & ~X86_FSW_C1) == (fFcw2 & ~X86_FSW_C1))
4109 {
4110 *pfRndErr = true;
4111 return true;
4112 }
4113 return false;
4114}
4115
4116static bool FpuIsEqualR80MaybeIgnoreRoundErr(PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2, bool fRndErrOk, bool *pfRndErr)
4117{
4118 if (RTFLOAT80U_ARE_IDENTICAL(pr80Val1, pr80Val2))
4119 return true;
4120 if ( fRndErrOk
4121 && pr80Val1->s.fSign == pr80Val2->s.fSign)
4122 {
4123 if ( ( pr80Val1->s.uExponent == pr80Val2->s.uExponent
4124 && ( pr80Val1->s.uMantissa > pr80Val2->s.uMantissa
4125 ? pr80Val1->s.uMantissa - pr80Val2->s.uMantissa == 1
4126 : pr80Val2->s.uMantissa - pr80Val1->s.uMantissa == 1))
4127 ||
4128 ( pr80Val1->s.uExponent + 1 == pr80Val2->s.uExponent
4129 && pr80Val1->s.uMantissa == UINT64_MAX
4130 && pr80Val2->s.uMantissa == RT_BIT_64(63))
4131 ||
4132 ( pr80Val1->s.uExponent == pr80Val2->s.uExponent + 1
4133 && pr80Val2->s.uMantissa == UINT64_MAX
4134 && pr80Val1->s.uMantissa == RT_BIT_64(63)) )
4135 {
4136 *pfRndErr = true;
4137 return true;
4138 }
4139 }
4140 return false;
4141}
4142
4143
4144static void FpuUnaryR80Test(void)
4145{
4146 X86FXSTATE State;
4147 RT_ZERO(State);
4148 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryR80); iFn++)
4149 {
4150 if (!SubTestAndCheckIfEnabled(g_aFpuUnaryR80[iFn].pszName))
4151 continue;
4152
4153 uint32_t const cTests = *g_aFpuUnaryR80[iFn].pcTests;
4154 FPU_UNARY_R80_TEST_T const * const paTests = g_aFpuUnaryR80[iFn].paTests;
4155 PFNIEMAIMPLFPUR80UNARY pfn = g_aFpuUnaryR80[iFn].pfn;
4156 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuUnaryR80[iFn]);
4157 uint32_t cRndErrs = 0;
4158 uint32_t cPossibleRndErrs = 0;
4159 if (!cTests) RTTestSkipped(g_hTest, "no tests");
4160 for (uint32_t iVar = 0; iVar < cVars; iVar++)
4161 {
4162 for (uint32_t iTest = 0; iTest < cTests; iTest++)
4163 {
4164 RTFLOAT80U const InVal = paTests[iTest].InVal;
4165 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4166 bool const fRndErrOk = RT_BOOL(paTests[iTest].fFcw & 0x80);
4167 State.FCW = paTests[iTest].fFcw & ~(uint16_t)0x80;
4168 State.FSW = paTests[iTest].fFswIn;
4169 pfn(&State, &Res, &InVal);
4170 bool fRndErr = false;
4171 if ( !FpuIsEqualFcwMaybeIgnoreRoundErr(Res.FSW, paTests[iTest].fFswOut, fRndErrOk, &fRndErr)
4172 || !FpuIsEqualR80MaybeIgnoreRoundErr(&Res.r80Result, &paTests[iTest].OutVal, fRndErrOk, &fRndErr))
4173 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
4174 "%s -> fsw=%#06x %s\n"
4175 "%s expected %#06x %s%s%s%s (%s)\n",
4176 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
4177 FormatR80(&paTests[iTest].InVal),
4178 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result),
4179 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].OutVal),
4180 FswDiff(Res.FSW, paTests[iTest].fFswOut),
4181 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal) ? " - val" : "",
4182 fRndErrOk ? " - rounding errors ok" : "", FormatFcw(paTests[iTest].fFcw));
4183 cRndErrs += fRndErr;
4184 cPossibleRndErrs += fRndErrOk;
4185 }
4186 pfn = g_aFpuUnaryR80[iFn].pfnNative;
4187 }
4188 if (cPossibleRndErrs > 0)
4189 RTTestPrintf(g_hTest, RTTESTLVL_ALWAYS, "rounding errors: %u out of %u\n", cRndErrs, cPossibleRndErrs);
4190 }
4191}
4192
4193
4194/*
4195 * Unary FPU operations on one 80-bit floating point value, but only affects the FSW.
4196 */
4197TYPEDEF_SUBTEST_TYPE(FPU_UNARY_FSW_R80_T, FPU_UNARY_R80_TEST_T, PFNIEMAIMPLFPUR80UNARYFSW);
4198
4199static const FPU_UNARY_FSW_R80_T g_aFpuUnaryFswR80[] =
4200{
4201 ENTRY(ftst_r80),
4202 ENTRY_EX(fxam_r80, 1),
4203};
4204
4205#ifdef TSTIEMAIMPL_WITH_GENERATOR
4206static void FpuUnaryFswR80Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
4207{
4208 static RTFLOAT80U const s_aSpecials[] =
4209 {
4210 RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), /* whatever */
4211 };
4212
4213 X86FXSTATE State;
4214 RT_ZERO(State);
4215 uint32_t cMinNormals = cTests / 4;
4216 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryFswR80); iFn++)
4217 {
4218 bool const fIsFxam = g_aFpuUnaryFswR80[iFn].uExtra == 1;
4219 PFNIEMAIMPLFPUR80UNARYFSW const pfn = g_aFpuUnaryFswR80[iFn].pfnNative ? g_aFpuUnaryFswR80[iFn].pfnNative : g_aFpuUnaryFswR80[iFn].pfn;
4220 PRTSTREAM pOutFn = pOut;
4221 if (g_aFpuUnaryFswR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE)
4222 {
4223 if (g_aFpuUnaryFswR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
4224 continue;
4225 pOutFn = pOutCpu;
4226 }
4227 State.FTW = 0;
4228
4229 GenerateArrayStart(pOutFn, g_aFpuUnaryFswR80[iFn].pszName, "FPU_UNARY_R80_TEST_T");
4230 uint32_t cNormalInputs = 0;
4231 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
4232 {
4233 RTFLOAT80U const InVal = iTest < cTests ? RandR80Src(iTest) : s_aSpecials[iTest - cTests];
4234 if (RTFLOAT80U_IS_NORMAL(&InVal))
4235 cNormalInputs++;
4236 else if (cNormalInputs < cMinNormals && iTest + cMinNormals >= cTests && iTest < cTests)
4237 {
4238 iTest -= 1;
4239 continue;
4240 }
4241
4242 uint16_t const fFcw = RandFcw();
4243 State.FSW = RandFsw();
4244 if (!fIsFxam)
4245 {
4246 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
4247 {
4248 for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
4249 {
4250 for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL)
4251 {
4252 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_PC_MASK | X86_FCW_MASK_ALL))
4253 | (iRounding << X86_FCW_RC_SHIFT)
4254 | (iPrecision << X86_FCW_PC_SHIFT)
4255 | iMask;
4256 uint16_t fFswOut = 0;
4257 pfn(&State, &fFswOut, &InVal);
4258 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s }, /* #%u/%u/%u/%c */\n",
4259 State.FCW, State.FSW, fFswOut, GenFormatR80(&InVal),
4260 iTest, iRounding, iPrecision, iMask ? 'c' : 'u');
4261 }
4262 }
4263 }
4264 }
4265 else
4266 {
4267 uint16_t fFswOut = 0;
4268 uint16_t const fEmpty = RTRandU32Ex(0, 3) == 3 ? 0x80 : 0; /* Using MBZ bit 7 in FCW to indicate empty tag value. */
4269 State.FTW = !fEmpty ? 1 << X86_FSW_TOP_GET(State.FSW) : 0;
4270 State.FCW = fFcw;
4271 pfn(&State, &fFswOut, &InVal);
4272 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s }, /* #%u%s */\n",
4273 fFcw | fEmpty, State.FSW, fFswOut, GenFormatR80(&InVal), iTest, fEmpty ? "/empty" : "");
4274 }
4275 }
4276 GenerateArrayEnd(pOutFn, g_aFpuUnaryFswR80[iFn].pszName);
4277 }
4278}
4279#endif
4280
4281
4282static void FpuUnaryFswR80Test(void)
4283{
4284 X86FXSTATE State;
4285 RT_ZERO(State);
4286 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryFswR80); iFn++)
4287 {
4288 if (!SubTestAndCheckIfEnabled(g_aFpuUnaryFswR80[iFn].pszName))
4289 continue;
4290
4291 uint32_t const cTests = *g_aFpuUnaryFswR80[iFn].pcTests;
4292 FPU_UNARY_R80_TEST_T const * const paTests = g_aFpuUnaryFswR80[iFn].paTests;
4293 PFNIEMAIMPLFPUR80UNARYFSW pfn = g_aFpuUnaryFswR80[iFn].pfn;
4294 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuUnaryFswR80[iFn]);
4295 if (!cTests) RTTestSkipped(g_hTest, "no tests");
4296 for (uint32_t iVar = 0; iVar < cVars; iVar++)
4297 {
4298 for (uint32_t iTest = 0; iTest < cTests; iTest++)
4299 {
4300 RTFLOAT80U const InVal = paTests[iTest].InVal;
4301 uint16_t fFswOut = 0;
4302 State.FSW = paTests[iTest].fFswIn;
4303 State.FCW = paTests[iTest].fFcw & ~(uint16_t)0x80; /* see generator code */
4304 State.FTW = paTests[iTest].fFcw & 0x80 ? 0 : 1 << X86_FSW_TOP_GET(paTests[iTest].fFswIn);
4305 pfn(&State, &fFswOut, &InVal);
4306 if (fFswOut != paTests[iTest].fFswOut)
4307 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
4308 "%s -> fsw=%#06x\n"
4309 "%s expected %#06x %s (%s%s)\n",
4310 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
4311 FormatR80(&paTests[iTest].InVal),
4312 iVar ? " " : "", fFswOut,
4313 iVar ? " " : "", paTests[iTest].fFswOut,
4314 FswDiff(fFswOut, paTests[iTest].fFswOut), FormatFcw(paTests[iTest].fFcw),
4315 paTests[iTest].fFcw & 0x80 ? " empty" : "");
4316 }
4317 pfn = g_aFpuUnaryFswR80[iFn].pfnNative;
4318 }
4319 }
4320}
4321
4322/*
4323 * Unary FPU operations on one 80-bit floating point value, but with two outputs.
4324 */
4325TYPEDEF_SUBTEST_TYPE(FPU_UNARY_TWO_R80_T, FPU_UNARY_TWO_R80_TEST_T, PFNIEMAIMPLFPUR80UNARYTWO);
4326
4327static const FPU_UNARY_TWO_R80_T g_aFpuUnaryTwoR80[] =
4328{
4329 ENTRY(fxtract_r80_r80),
4330 ENTRY_AMD( fptan_r80_r80, 0), // rounding differences
4331 ENTRY_INTEL(fptan_r80_r80, 0),
4332 ENTRY_AMD( fsincos_r80_r80, 0), // C1 differences & value differences (e.g. -1m0x235cf2f580244a27^-1696)
4333 ENTRY_INTEL(fsincos_r80_r80, 0),
4334};
4335
4336#ifdef TSTIEMAIMPL_WITH_GENERATOR
4337static void FpuUnaryTwoR80Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
4338{
4339 static RTFLOAT80U const s_aSpecials[] =
4340 {
4341 RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), /* whatever */
4342 };
4343
4344 X86FXSTATE State;
4345 RT_ZERO(State);
4346 uint32_t cMinNormals = cTests / 4;
4347 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryTwoR80); iFn++)
4348 {
4349 PFNIEMAIMPLFPUR80UNARYTWO const pfn = g_aFpuUnaryTwoR80[iFn].pfnNative ? g_aFpuUnaryTwoR80[iFn].pfnNative : g_aFpuUnaryTwoR80[iFn].pfn;
4350 PRTSTREAM pOutFn = pOut;
4351 if (g_aFpuUnaryTwoR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE)
4352 {
4353 if (g_aFpuUnaryTwoR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
4354 continue;
4355 pOutFn = pOutCpu;
4356 }
4357
4358 GenerateArrayStart(pOutFn, g_aFpuUnaryTwoR80[iFn].pszName, "FPU_UNARY_TWO_R80_TEST_T");
4359 uint32_t iTestOutput = 0;
4360 uint32_t cNormalInputs = 0;
4361 uint32_t cTargetRangeInputs = 0;
4362 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
4363 {
4364 RTFLOAT80U InVal = iTest < cTests ? RandR80Src(iTest) : s_aSpecials[iTest - cTests];
4365 if (RTFLOAT80U_IS_NORMAL(&InVal))
4366 {
4367 if (iFn != 0)
4368 {
4369 unsigned uTargetExp = RTFLOAT80U_EXP_BIAS + 63 + 1 /* 2^64..2^-64 */;
4370 unsigned cTargetExp = g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1 ? 69 : 63*2 + 2;
4371 if (InVal.s.uExponent <= uTargetExp && InVal.s.uExponent >= uTargetExp - cTargetExp)
4372 cTargetRangeInputs++;
4373 else if (cTargetRangeInputs < cMinNormals / 2 && iTest + cMinNormals / 2 >= cTests && iTest < cTests)
4374 {
4375 InVal.s.uExponent = RTRandU32Ex(uTargetExp - cTargetExp, uTargetExp);
4376 cTargetRangeInputs++;
4377 }
4378 }
4379 cNormalInputs++;
4380 }
4381 else if (cNormalInputs < cMinNormals && iTest + cMinNormals >= cTests && iTest < cTests)
4382 {
4383 iTest -= 1;
4384 continue;
4385 }
4386
4387 uint16_t const fFcwExtra = 0; /* for rounding error indication */
4388 uint16_t const fFcw = RandFcw();
4389 State.FSW = RandFsw();
4390
4391 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
4392 for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
4393 {
4394 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_PC_MASK | X86_FCW_MASK_ALL))
4395 | (iRounding << X86_FCW_RC_SHIFT)
4396 | (iPrecision << X86_FCW_PC_SHIFT)
4397 | X86_FCW_MASK_ALL;
4398 IEMFPURESULTTWO ResM = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4399 pfn(&State, &ResM, &InVal);
4400 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/m = #%u */\n",
4401 State.FCW | fFcwExtra, State.FSW, ResM.FSW, GenFormatR80(&InVal), GenFormatR80(&ResM.r80Result1),
4402 GenFormatR80(&ResM.r80Result2), iTest, iRounding, iPrecision, iTestOutput++);
4403
4404 State.FCW = State.FCW & ~X86_FCW_MASK_ALL;
4405 IEMFPURESULTTWO ResU = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4406 pfn(&State, &ResU, &InVal);
4407 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/u = #%u */\n",
4408 State.FCW | fFcwExtra, State.FSW, ResU.FSW, GenFormatR80(&InVal), GenFormatR80(&ResU.r80Result1),
4409 GenFormatR80(&ResU.r80Result2), iTest, iRounding, iPrecision, iTestOutput++);
4410
4411 uint16_t fXcpt = (ResM.FSW | ResU.FSW) & X86_FSW_XCPT_MASK & ~X86_FSW_SF;
4412 if (fXcpt)
4413 {
4414 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
4415 IEMFPURESULTTWO Res1 = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4416 pfn(&State, &Res1, &InVal);
4417 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/%#x = #%u */\n",
4418 State.FCW | fFcwExtra, State.FSW, Res1.FSW, GenFormatR80(&InVal), GenFormatR80(&Res1.r80Result1),
4419 GenFormatR80(&Res1.r80Result2), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
4420 if (((Res1.FSW & X86_FSW_XCPT_MASK) & fXcpt) != (Res1.FSW & X86_FSW_XCPT_MASK))
4421 {
4422 fXcpt |= Res1.FSW & X86_FSW_XCPT_MASK;
4423 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
4424 IEMFPURESULTTWO Res2 = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4425 pfn(&State, &Res2, &InVal);
4426 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/%#x[!] = #%u */\n",
4427 State.FCW | fFcwExtra, State.FSW, Res2.FSW, GenFormatR80(&InVal), GenFormatR80(&Res2.r80Result1),
4428 GenFormatR80(&Res2.r80Result2), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
4429 }
4430 if (!RT_IS_POWER_OF_TWO(fXcpt))
4431 for (uint16_t fUnmasked = 1; fUnmasked <= X86_FCW_PM; fUnmasked <<= 1)
4432 if (fUnmasked & fXcpt)
4433 {
4434 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | (fXcpt & ~fUnmasked);
4435 IEMFPURESULTTWO Res3 = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4436 pfn(&State, &Res3, &InVal);
4437 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/u%#x = #%u */\n",
4438 State.FCW | fFcwExtra, State.FSW, Res3.FSW, GenFormatR80(&InVal), GenFormatR80(&Res3.r80Result1),
4439 GenFormatR80(&Res3.r80Result2), iTest, iRounding, iPrecision, fUnmasked, iTestOutput++);
4440 }
4441 }
4442 }
4443 }
4444 GenerateArrayEnd(pOutFn, g_aFpuUnaryTwoR80[iFn].pszName);
4445 }
4446}
4447#endif
4448
4449
4450static void FpuUnaryTwoR80Test(void)
4451{
4452 X86FXSTATE State;
4453 RT_ZERO(State);
4454 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryTwoR80); iFn++)
4455 {
4456 if (!SubTestAndCheckIfEnabled(g_aFpuUnaryTwoR80[iFn].pszName))
4457 continue;
4458
4459 uint32_t const cTests = *g_aFpuUnaryTwoR80[iFn].pcTests;
4460 FPU_UNARY_TWO_R80_TEST_T const * const paTests = g_aFpuUnaryTwoR80[iFn].paTests;
4461 PFNIEMAIMPLFPUR80UNARYTWO pfn = g_aFpuUnaryTwoR80[iFn].pfn;
4462 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuUnaryTwoR80[iFn]);
4463 if (!cTests) RTTestSkipped(g_hTest, "no tests");
4464 for (uint32_t iVar = 0; iVar < cVars; iVar++)
4465 {
4466 for (uint32_t iTest = 0; iTest < cTests; iTest++)
4467 {
4468 IEMFPURESULTTWO Res = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4469 RTFLOAT80U const InVal = paTests[iTest].InVal;
4470 State.FCW = paTests[iTest].fFcw;
4471 State.FSW = paTests[iTest].fFswIn;
4472 pfn(&State, &Res, &InVal);
4473 if ( Res.FSW != paTests[iTest].fFswOut
4474 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result1, &paTests[iTest].OutVal1)
4475 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result2, &paTests[iTest].OutVal2) )
4476 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
4477 "%s -> fsw=%#06x %s %s\n"
4478 "%s expected %#06x %s %s %s%s%s (%s)\n",
4479 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
4480 FormatR80(&paTests[iTest].InVal),
4481 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result1), FormatR80(&Res.r80Result2),
4482 iVar ? " " : "", paTests[iTest].fFswOut,
4483 FormatR80(&paTests[iTest].OutVal1), FormatR80(&paTests[iTest].OutVal2),
4484 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result1, &paTests[iTest].OutVal1) ? " - val1" : "",
4485 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result2, &paTests[iTest].OutVal2) ? " - val2" : "",
4486 FswDiff(Res.FSW, paTests[iTest].fFswOut), FormatFcw(paTests[iTest].fFcw) );
4487 }
4488 pfn = g_aFpuUnaryTwoR80[iFn].pfnNative;
4489 }
4490 }
4491}
4492
4493
4494/*********************************************************************************************************************************
4495* SSE floating point Binary Operations *
4496*********************************************************************************************************************************/
4497
4498/*
4499 * Binary SSE operations on packed single precision floating point values.
4500 */
4501TYPEDEF_SUBTEST_TYPE(SSE_BINARY_R32_T, SSE_BINARY_TEST_T, PFNIEMAIMPLFPSSEF2U128);
4502
4503static const SSE_BINARY_R32_T g_aSseBinaryR32[] =
4504{
4505 ENTRY_BIN(addps_u128),
4506 ENTRY_BIN(mulps_u128),
4507 ENTRY_BIN(subps_u128),
4508 ENTRY_BIN(minps_u128),
4509 ENTRY_BIN(divps_u128),
4510 ENTRY_BIN(maxps_u128),
4511 ENTRY_BIN(haddps_u128),
4512 ENTRY_BIN(hsubps_u128),
4513 ENTRY_BIN(sqrtps_u128),
4514 ENTRY_BIN(addsubps_u128),
4515 ENTRY_BIN(cvtps2pd_u128),
4516};
4517
4518#ifdef TSTIEMAIMPL_WITH_GENERATOR
4519static RTEXITCODE SseBinaryR32Generate(const char *pszDataFileFmt, uint32_t cTests)
4520{
4521 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
4522
4523 static struct { RTFLOAT32U aVal1[4], aVal2[4]; } const s_aSpecials[] =
4524 {
4525 { { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), },
4526 { RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1), RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1), RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1), RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1) } },
4527 /** @todo More specials. */
4528 };
4529
4530 X86FXSTATE State;
4531 RT_ZERO(State);
4532 uint32_t cMinNormalPairs = (cTests - 144) / 4;
4533 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR32); iFn++)
4534 {
4535 PFNIEMAIMPLFPSSEF2U128 const pfn = g_aSseBinaryR32[iFn].pfnNative ? g_aSseBinaryR32[iFn].pfnNative : g_aSseBinaryR32[iFn].pfn;
4536
4537 PRTSTREAM pStrmOut = NULL;
4538 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseBinaryR32[iFn].pszName);
4539 if (RT_FAILURE(rc))
4540 {
4541 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseBinaryR32[iFn].pszName, rc);
4542 return RTEXITCODE_FAILURE;
4543 }
4544
4545 uint32_t cNormalInputPairs = 0;
4546 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
4547 {
4548 SSE_BINARY_TEST_T TestData; RT_ZERO(TestData);
4549
4550 TestData.InVal1.ar32[0] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
4551 TestData.InVal1.ar32[1] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
4552 TestData.InVal1.ar32[2] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[2];
4553 TestData.InVal1.ar32[3] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[3];
4554
4555 TestData.InVal2.ar32[0] = iTest < cTests ? RandR32Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[0];
4556 TestData.InVal2.ar32[1] = iTest < cTests ? RandR32Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[1];
4557 TestData.InVal2.ar32[2] = iTest < cTests ? RandR32Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[2];
4558 TestData.InVal2.ar32[3] = iTest < cTests ? RandR32Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[3];
4559
4560 if ( RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[0]) && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[0])
4561 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[1]) && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[1])
4562 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[2]) && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[2])
4563 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[3]) && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[3]))
4564 cNormalInputPairs++;
4565 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
4566 {
4567 iTest -= 1;
4568 continue;
4569 }
4570
4571 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
4572 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
4573 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
4574 for (uint8_t iFz = 0; iFz < 2; iFz++)
4575 {
4576 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
4577 | (iRounding << X86_MXCSR_RC_SHIFT)
4578 | (iDaz ? X86_MXCSR_DAZ : 0)
4579 | (iFz ? X86_MXCSR_FZ : 0)
4580 | X86_MXCSR_XCPT_MASK;
4581 IEMSSERESULT ResM; RT_ZERO(ResM);
4582 pfn(&State, &ResM, &TestData.InVal1, &TestData.InVal2);
4583 TestData.fMxcsrIn = State.MXCSR;
4584 TestData.fMxcsrOut = ResM.MXCSR;
4585 TestData.OutVal = ResM.uResult;
4586 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4587
4588 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
4589 IEMSSERESULT ResU; RT_ZERO(ResU);
4590 pfn(&State, &ResU, &TestData.InVal1, &TestData.InVal2);
4591 TestData.fMxcsrIn = State.MXCSR;
4592 TestData.fMxcsrOut = ResU.MXCSR;
4593 TestData.OutVal = ResU.uResult;
4594 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4595
4596 uint16_t fXcpt = (ResM.MXCSR | ResU.MXCSR) & X86_MXCSR_XCPT_FLAGS;
4597 if (fXcpt)
4598 {
4599 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
4600 IEMSSERESULT Res1; RT_ZERO(Res1);
4601 pfn(&State, &Res1, &TestData.InVal1, &TestData.InVal2);
4602 TestData.fMxcsrIn = State.MXCSR;
4603 TestData.fMxcsrOut = Res1.MXCSR;
4604 TestData.OutVal = Res1.uResult;
4605 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4606
4607 if (((Res1.MXCSR & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (Res1.MXCSR & X86_MXCSR_XCPT_FLAGS))
4608 {
4609 fXcpt |= Res1.MXCSR & X86_MXCSR_XCPT_FLAGS;
4610 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
4611 IEMSSERESULT Res2; RT_ZERO(Res2);
4612 pfn(&State, &Res2, &TestData.InVal1, &TestData.InVal2);
4613 TestData.fMxcsrIn = State.MXCSR;
4614 TestData.fMxcsrOut = Res2.MXCSR;
4615 TestData.OutVal = Res2.uResult;
4616 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4617 }
4618 if (!RT_IS_POWER_OF_TWO(fXcpt))
4619 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
4620 if (fUnmasked & fXcpt)
4621 {
4622 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
4623 IEMSSERESULT Res3; RT_ZERO(Res3);
4624 pfn(&State, &Res3, &TestData.InVal1, &TestData.InVal2);
4625 TestData.fMxcsrIn = State.MXCSR;
4626 TestData.fMxcsrOut = Res3.MXCSR;
4627 TestData.OutVal = Res3.uResult;
4628 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4629 }
4630 }
4631 }
4632 }
4633 rc = RTStrmClose(pStrmOut);
4634 if (RT_FAILURE(rc))
4635 {
4636 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseBinaryR32[iFn].pszName, rc);
4637 return RTEXITCODE_FAILURE;
4638 }
4639 }
4640
4641 return RTEXITCODE_SUCCESS;
4642}
4643#endif
4644
4645static void SseBinaryR32Test(void)
4646{
4647 X86FXSTATE State;
4648 RT_ZERO(State);
4649 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR32); iFn++)
4650 {
4651 if (!SubTestAndCheckIfEnabled(g_aSseBinaryR32[iFn].pszName))
4652 continue;
4653
4654 uint32_t const cTests = *g_aSseBinaryR32[iFn].pcTests;
4655 SSE_BINARY_TEST_T const * const paTests = g_aSseBinaryR32[iFn].paTests;
4656 PFNIEMAIMPLFPSSEF2U128 pfn = g_aSseBinaryR32[iFn].pfn;
4657 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryR32[iFn]);
4658 if (!cTests) RTTestSkipped(g_hTest, "no tests");
4659 for (uint32_t iVar = 0; iVar < cVars; iVar++)
4660 {
4661 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_BINARY_TEST_T); iTest++)
4662 {
4663 IEMSSERESULT Res; RT_ZERO(Res);
4664
4665 State.MXCSR = paTests[iTest].fMxcsrIn;
4666 pfn(&State, &Res, &paTests[iTest].InVal1, &paTests[iTest].InVal2);
4667 bool fValsIdentical = RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[0], &paTests[iTest].OutVal.ar32[0])
4668 && RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[1], &paTests[iTest].OutVal.ar32[1])
4669 && RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[2], &paTests[iTest].OutVal.ar32[2])
4670 && RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[3], &paTests[iTest].OutVal.ar32[3]);
4671 if ( Res.MXCSR != paTests[iTest].fMxcsrOut
4672 || !fValsIdentical)
4673 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s'%s'%s in2=%s'%s'%s'%s\n"
4674 "%s -> mxcsr=%#08x %s'%s'%s'%s\n"
4675 "%s expected %#08x %s'%s'%s'%s%s%s (%s)\n",
4676 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
4677 FormatR32(&paTests[iTest].InVal1.ar32[0]), FormatR32(&paTests[iTest].InVal1.ar32[1]),
4678 FormatR32(&paTests[iTest].InVal1.ar32[2]), FormatR32(&paTests[iTest].InVal1.ar32[3]),
4679 FormatR32(&paTests[iTest].InVal2.ar32[0]), FormatR32(&paTests[iTest].InVal2.ar32[1]),
4680 FormatR32(&paTests[iTest].InVal2.ar32[2]), FormatR32(&paTests[iTest].InVal2.ar32[3]),
4681 iVar ? " " : "", Res.MXCSR,
4682 FormatR32(&Res.uResult.ar32[0]), FormatR32(&Res.uResult.ar32[1]),
4683 FormatR32(&Res.uResult.ar32[2]), FormatR32(&Res.uResult.ar32[3]),
4684 iVar ? " " : "", paTests[iTest].fMxcsrOut,
4685 FormatR32(&paTests[iTest].OutVal.ar32[0]), FormatR32(&paTests[iTest].OutVal.ar32[1]),
4686 FormatR32(&paTests[iTest].OutVal.ar32[2]), FormatR32(&paTests[iTest].OutVal.ar32[3]),
4687 MxcsrDiff(Res.MXCSR, paTests[iTest].fMxcsrOut),
4688 !fValsIdentical ? " - val" : "",
4689 FormatMxcsr(paTests[iTest].fMxcsrIn) );
4690 }
4691 pfn = g_aSseBinaryR32[iFn].pfnNative;
4692 }
4693 }
4694}
4695
4696
4697/*
4698 * Binary SSE operations on packed single precision floating point values.
4699 */
4700TYPEDEF_SUBTEST_TYPE(SSE_BINARY_R64_T, SSE_BINARY_TEST_T, PFNIEMAIMPLFPSSEF2U128);
4701
4702static const SSE_BINARY_R64_T g_aSseBinaryR64[] =
4703{
4704 ENTRY_BIN(addpd_u128),
4705 ENTRY_BIN(mulpd_u128),
4706 ENTRY_BIN(subpd_u128),
4707 ENTRY_BIN(minpd_u128),
4708 ENTRY_BIN(divpd_u128),
4709 ENTRY_BIN(maxpd_u128),
4710 ENTRY_BIN(haddpd_u128),
4711 ENTRY_BIN(hsubpd_u128),
4712 ENTRY_BIN(sqrtpd_u128),
4713 ENTRY_BIN(addsubpd_u128),
4714 ENTRY_BIN(cvtpd2ps_u128),
4715};
4716
4717#ifdef TSTIEMAIMPL_WITH_GENERATOR
4718static RTEXITCODE SseBinaryR64Generate(const char *pszDataFileFmt, uint32_t cTests)
4719{
4720 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
4721
4722 static struct { RTFLOAT64U aVal1[2], aVal2[2]; } const s_aSpecials[] =
4723 {
4724 { { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) },
4725 { RTFLOAT64U_INIT_C(0, 8388607, RTFLOAT64U_EXP_MAX - 1), RTFLOAT64U_INIT_C(0, 8388607, RTFLOAT64U_EXP_MAX - 1) } },
4726 /** @todo More specials. */
4727 };
4728
4729 X86FXSTATE State;
4730 RT_ZERO(State);
4731 uint32_t cMinNormalPairs = (cTests - 144) / 4;
4732 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR64); iFn++)
4733 {
4734 PFNIEMAIMPLFPSSEF2U128 const pfn = g_aSseBinaryR64[iFn].pfnNative ? g_aSseBinaryR64[iFn].pfnNative : g_aSseBinaryR64[iFn].pfn;
4735
4736 PRTSTREAM pStrmOut = NULL;
4737 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseBinaryR64[iFn].pszName);
4738 if (RT_FAILURE(rc))
4739 {
4740 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseBinaryR64[iFn].pszName, rc);
4741 return RTEXITCODE_FAILURE;
4742 }
4743
4744 uint32_t cNormalInputPairs = 0;
4745 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
4746 {
4747 SSE_BINARY_TEST_T TestData; RT_ZERO(TestData);
4748
4749 TestData.InVal1.ar64[0] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
4750 TestData.InVal1.ar64[1] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
4751 TestData.InVal2.ar64[0] = iTest < cTests ? RandR64Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[0];
4752 TestData.InVal2.ar64[1] = iTest < cTests ? RandR64Src2(iTest) : s_aSpecials[iTest - cTests].aVal2[0];
4753
4754 if ( RTFLOAT64U_IS_NORMAL(&TestData.InVal1.ar64[0]) && RTFLOAT64U_IS_NORMAL(&TestData.InVal1.ar64[1])
4755 && RTFLOAT64U_IS_NORMAL(&TestData.InVal2.ar64[0]) && RTFLOAT64U_IS_NORMAL(&TestData.InVal2.ar64[1]))
4756 cNormalInputPairs++;
4757 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
4758 {
4759 iTest -= 1;
4760 continue;
4761 }
4762
4763 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
4764 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
4765 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
4766 for (uint8_t iFz = 0; iFz < 2; iFz++)
4767 {
4768 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
4769 | (iRounding << X86_MXCSR_RC_SHIFT)
4770 | (iDaz ? X86_MXCSR_DAZ : 0)
4771 | (iFz ? X86_MXCSR_FZ : 0)
4772 | X86_MXCSR_XCPT_MASK;
4773 IEMSSERESULT ResM; RT_ZERO(ResM);
4774 pfn(&State, &ResM, &TestData.InVal1, &TestData.InVal2);
4775 TestData.fMxcsrIn = State.MXCSR;
4776 TestData.fMxcsrOut = ResM.MXCSR;
4777 TestData.OutVal = ResM.uResult;
4778 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4779
4780 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
4781 IEMSSERESULT ResU; RT_ZERO(ResU);
4782 pfn(&State, &ResU, &TestData.InVal1, &TestData.InVal2);
4783 TestData.fMxcsrIn = State.MXCSR;
4784 TestData.fMxcsrOut = ResU.MXCSR;
4785 TestData.OutVal = ResU.uResult;
4786 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4787
4788 uint16_t fXcpt = (ResM.MXCSR | ResU.MXCSR) & X86_MXCSR_XCPT_FLAGS;
4789 if (fXcpt)
4790 {
4791 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
4792 IEMSSERESULT Res1; RT_ZERO(Res1);
4793 pfn(&State, &Res1, &TestData.InVal1, &TestData.InVal2);
4794 TestData.fMxcsrIn = State.MXCSR;
4795 TestData.fMxcsrOut = Res1.MXCSR;
4796 TestData.OutVal = Res1.uResult;
4797 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4798
4799 if (((Res1.MXCSR & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (Res1.MXCSR & X86_MXCSR_XCPT_FLAGS))
4800 {
4801 fXcpt |= Res1.MXCSR & X86_MXCSR_XCPT_FLAGS;
4802 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
4803 IEMSSERESULT Res2; RT_ZERO(Res2);
4804 pfn(&State, &Res2, &TestData.InVal1, &TestData.InVal2);
4805 TestData.fMxcsrIn = State.MXCSR;
4806 TestData.fMxcsrOut = Res2.MXCSR;
4807 TestData.OutVal = Res2.uResult;
4808 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4809 }
4810 if (!RT_IS_POWER_OF_TWO(fXcpt))
4811 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
4812 if (fUnmasked & fXcpt)
4813 {
4814 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
4815 IEMSSERESULT Res3; RT_ZERO(Res3);
4816 pfn(&State, &Res3, &TestData.InVal1, &TestData.InVal2);
4817 TestData.fMxcsrIn = State.MXCSR;
4818 TestData.fMxcsrOut = Res3.MXCSR;
4819 TestData.OutVal = Res3.uResult;
4820 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4821 }
4822 }
4823 }
4824 }
4825 rc = RTStrmClose(pStrmOut);
4826 if (RT_FAILURE(rc))
4827 {
4828 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseBinaryR64[iFn].pszName, rc);
4829 return RTEXITCODE_FAILURE;
4830 }
4831 }
4832
4833 return RTEXITCODE_SUCCESS;
4834}
4835#endif
4836
4837
4838static void SseBinaryR64Test(void)
4839{
4840 X86FXSTATE State;
4841 RT_ZERO(State);
4842 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR64); iFn++)
4843 {
4844 if (!SubTestAndCheckIfEnabled(g_aSseBinaryR64[iFn].pszName))
4845 continue;
4846
4847 uint32_t const cTests = *g_aSseBinaryR64[iFn].pcTests;
4848 SSE_BINARY_TEST_T const * const paTests = g_aSseBinaryR64[iFn].paTests;
4849 PFNIEMAIMPLFPSSEF2U128 pfn = g_aSseBinaryR64[iFn].pfn;
4850 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryR64[iFn]);
4851 if (!cTests) RTTestSkipped(g_hTest, "no tests");
4852 for (uint32_t iVar = 0; iVar < cVars; iVar++)
4853 {
4854 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_BINARY_TEST_T); iTest++)
4855 {
4856 IEMSSERESULT Res; RT_ZERO(Res);
4857
4858 State.MXCSR = paTests[iTest].fMxcsrIn;
4859 pfn(&State, &Res, &paTests[iTest].InVal1, &paTests[iTest].InVal2);
4860 if ( Res.MXCSR != paTests[iTest].fMxcsrOut
4861 || !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[0], &paTests[iTest].OutVal.ar64[0])
4862 || !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[1], &paTests[iTest].OutVal.ar64[1]))
4863 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s in2=%s'%s\n"
4864 "%s -> mxcsr=%#08x %s'%s\n"
4865 "%s expected %#08x %s'%s%s%s (%s)\n",
4866 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
4867 FormatR64(&paTests[iTest].InVal1.ar64[0]), FormatR64(&paTests[iTest].InVal1.ar64[1]),
4868 FormatR64(&paTests[iTest].InVal2.ar64[0]), FormatR64(&paTests[iTest].InVal2.ar64[1]),
4869 iVar ? " " : "", Res.MXCSR,
4870 FormatR64(&Res.uResult.ar64[0]), FormatR64(&Res.uResult.ar64[1]),
4871 iVar ? " " : "", paTests[iTest].fMxcsrOut,
4872 FormatR64(&paTests[iTest].OutVal.ar64[0]), FormatR64(&paTests[iTest].OutVal.ar64[1]),
4873 MxcsrDiff(Res.MXCSR, paTests[iTest].fMxcsrOut),
4874 ( !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[0], &paTests[iTest].OutVal.ar64[0])
4875 || !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[1], &paTests[iTest].OutVal.ar64[1]))
4876 ? " - val" : "",
4877 FormatMxcsr(paTests[iTest].fMxcsrIn) );
4878 }
4879 pfn = g_aSseBinaryR64[iFn].pfnNative;
4880 }
4881 }
4882}
4883
4884
4885/*
4886 * Binary SSE operations on packed single precision floating point values.
4887 */
4888TYPEDEF_SUBTEST_TYPE(SSE_BINARY_U128_R32_T, SSE_BINARY_U128_R32_TEST_T, PFNIEMAIMPLFPSSEF2U128R32);
4889
4890static const SSE_BINARY_U128_R32_T g_aSseBinaryU128R32[] =
4891{
4892 ENTRY_BIN(addss_u128_r32),
4893 ENTRY_BIN(mulss_u128_r32),
4894 ENTRY_BIN(subss_u128_r32),
4895 ENTRY_BIN(minss_u128_r32),
4896 ENTRY_BIN(divss_u128_r32),
4897 ENTRY_BIN(maxss_u128_r32),
4898 ENTRY_BIN(cvtss2sd_u128_r32),
4899 ENTRY_BIN(sqrtss_u128_r32),
4900};
4901
4902#ifdef TSTIEMAIMPL_WITH_GENERATOR
4903static RTEXITCODE SseBinaryU128R32Generate(const char *pszDataFileFmt, uint32_t cTests)
4904{
4905 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
4906
4907 static struct { RTFLOAT32U aVal1[4], Val2; } const s_aSpecials[] =
4908 {
4909 { { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), }, RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1) },
4910 /** @todo More specials. */
4911 };
4912
4913 X86FXSTATE State;
4914 RT_ZERO(State);
4915 uint32_t cMinNormalPairs = (cTests - 144) / 4;
4916 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryU128R32); iFn++)
4917 {
4918 PFNIEMAIMPLFPSSEF2U128R32 const pfn = g_aSseBinaryU128R32[iFn].pfnNative ? g_aSseBinaryU128R32[iFn].pfnNative : g_aSseBinaryU128R32[iFn].pfn;
4919
4920 PRTSTREAM pStrmOut = NULL;
4921 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseBinaryU128R32[iFn].pszName);
4922 if (RT_FAILURE(rc))
4923 {
4924 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseBinaryU128R32[iFn].pszName, rc);
4925 return RTEXITCODE_FAILURE;
4926 }
4927
4928 uint32_t cNormalInputPairs = 0;
4929 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
4930 {
4931 SSE_BINARY_U128_R32_TEST_T TestData; RT_ZERO(TestData);
4932
4933 TestData.InVal1.ar32[0] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
4934 TestData.InVal1.ar32[1] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
4935 TestData.InVal1.ar32[2] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[2];
4936 TestData.InVal1.ar32[3] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[3];
4937
4938 TestData.r32Val2 = iTest < cTests ? RandR32Src2(iTest) : s_aSpecials[iTest - cTests].Val2;
4939
4940 if ( RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[0])
4941 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[1])
4942 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[2])
4943 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[3])
4944 && RTFLOAT32U_IS_NORMAL(&TestData.r32Val2))
4945 cNormalInputPairs++;
4946 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
4947 {
4948 iTest -= 1;
4949 continue;
4950 }
4951
4952 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
4953 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
4954 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
4955 for (uint8_t iFz = 0; iFz < 2; iFz++)
4956 {
4957 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
4958 | (iRounding << X86_MXCSR_RC_SHIFT)
4959 | (iDaz ? X86_MXCSR_DAZ : 0)
4960 | (iFz ? X86_MXCSR_FZ : 0)
4961 | X86_MXCSR_XCPT_MASK;
4962 IEMSSERESULT ResM; RT_ZERO(ResM);
4963 pfn(&State, &ResM, &TestData.InVal1, &TestData.r32Val2);
4964 TestData.fMxcsrIn = State.MXCSR;
4965 TestData.fMxcsrOut = ResM.MXCSR;
4966 TestData.OutVal = ResM.uResult;
4967 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4968
4969 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
4970 IEMSSERESULT ResU; RT_ZERO(ResU);
4971 pfn(&State, &ResU, &TestData.InVal1, &TestData.r32Val2);
4972 TestData.fMxcsrIn = State.MXCSR;
4973 TestData.fMxcsrOut = ResU.MXCSR;
4974 TestData.OutVal = ResU.uResult;
4975 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4976
4977 uint16_t fXcpt = (ResM.MXCSR | ResU.MXCSR) & X86_MXCSR_XCPT_FLAGS;
4978 if (fXcpt)
4979 {
4980 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
4981 IEMSSERESULT Res1; RT_ZERO(Res1);
4982 pfn(&State, &Res1, &TestData.InVal1, &TestData.r32Val2);
4983 TestData.fMxcsrIn = State.MXCSR;
4984 TestData.fMxcsrOut = Res1.MXCSR;
4985 TestData.OutVal = Res1.uResult;
4986 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4987
4988 if (((Res1.MXCSR & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (Res1.MXCSR & X86_MXCSR_XCPT_FLAGS))
4989 {
4990 fXcpt |= Res1.MXCSR & X86_MXCSR_XCPT_FLAGS;
4991 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
4992 IEMSSERESULT Res2; RT_ZERO(Res2);
4993 pfn(&State, &Res2, &TestData.InVal1, &TestData.r32Val2);
4994 TestData.fMxcsrIn = State.MXCSR;
4995 TestData.fMxcsrOut = Res2.MXCSR;
4996 TestData.OutVal = Res2.uResult;
4997 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
4998 }
4999 if (!RT_IS_POWER_OF_TWO(fXcpt))
5000 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
5001 if (fUnmasked & fXcpt)
5002 {
5003 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
5004 IEMSSERESULT Res3; RT_ZERO(Res3);
5005 pfn(&State, &Res3, &TestData.InVal1, &TestData.r32Val2);
5006 TestData.fMxcsrIn = State.MXCSR;
5007 TestData.fMxcsrOut = Res3.MXCSR;
5008 TestData.OutVal = Res3.uResult;
5009 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5010 }
5011 }
5012 }
5013 }
5014 rc = RTStrmClose(pStrmOut);
5015 if (RT_FAILURE(rc))
5016 {
5017 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseBinaryU128R32[iFn].pszName, rc);
5018 return RTEXITCODE_FAILURE;
5019 }
5020 }
5021
5022 return RTEXITCODE_SUCCESS;
5023}
5024#endif
5025
5026static void SseBinaryU128R32Test(void)
5027{
5028 X86FXSTATE State;
5029 RT_ZERO(State);
5030 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryU128R32); iFn++)
5031 {
5032 if (!SubTestAndCheckIfEnabled(g_aSseBinaryU128R32[iFn].pszName))
5033 continue;
5034
5035 uint32_t const cTests = *g_aSseBinaryU128R32[iFn].pcTests;
5036 SSE_BINARY_U128_R32_TEST_T const * const paTests = g_aSseBinaryU128R32[iFn].paTests;
5037 PFNIEMAIMPLFPSSEF2U128R32 pfn = g_aSseBinaryU128R32[iFn].pfn;
5038 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryU128R32[iFn]);
5039 if (!cTests) RTTestSkipped(g_hTest, "no tests");
5040 for (uint32_t iVar = 0; iVar < cVars; iVar++)
5041 {
5042 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_BINARY_TEST_T); iTest++)
5043 {
5044 IEMSSERESULT Res; RT_ZERO(Res);
5045
5046 State.MXCSR = paTests[iTest].fMxcsrIn;
5047 pfn(&State, &Res, &paTests[iTest].InVal1, &paTests[iTest].r32Val2);
5048 bool fValsIdentical = RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[0], &paTests[iTest].OutVal.ar32[0])
5049 && RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[1], &paTests[iTest].OutVal.ar32[1])
5050 && RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[2], &paTests[iTest].OutVal.ar32[2])
5051 && RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[3], &paTests[iTest].OutVal.ar32[3]);
5052 if ( Res.MXCSR != paTests[iTest].fMxcsrOut
5053 || !fValsIdentical)
5054 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s'%s'%s in2=%s\n"
5055 "%s -> mxcsr=%#08x %s'%s'%s'%s\n"
5056 "%s expected %#08x %s'%s'%s'%s%s%s (%s)\n",
5057 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
5058 FormatR32(&paTests[iTest].InVal1.ar32[0]), FormatR32(&paTests[iTest].InVal1.ar32[1]),
5059 FormatR32(&paTests[iTest].InVal1.ar32[2]), FormatR32(&paTests[iTest].InVal1.ar32[3]),
5060 FormatR32(&paTests[iTest].r32Val2),
5061 iVar ? " " : "", Res.MXCSR,
5062 FormatR32(&Res.uResult.ar32[0]), FormatR32(&Res.uResult.ar32[1]),
5063 FormatR32(&Res.uResult.ar32[2]), FormatR32(&Res.uResult.ar32[3]),
5064 iVar ? " " : "", paTests[iTest].fMxcsrOut,
5065 FormatR32(&paTests[iTest].OutVal.ar32[0]), FormatR32(&paTests[iTest].OutVal.ar32[1]),
5066 FormatR32(&paTests[iTest].OutVal.ar32[2]), FormatR32(&paTests[iTest].OutVal.ar32[3]),
5067 MxcsrDiff(Res.MXCSR, paTests[iTest].fMxcsrOut),
5068 !fValsIdentical ? " - val" : "",
5069 FormatMxcsr(paTests[iTest].fMxcsrIn) );
5070 }
5071 }
5072 }
5073}
5074
5075
5076/*
5077 * Binary SSE operations on packed single precision floating point values (xxxsd xmm1, r/m64).
5078 */
5079TYPEDEF_SUBTEST_TYPE(SSE_BINARY_U128_R64_T, SSE_BINARY_U128_R64_TEST_T, PFNIEMAIMPLFPSSEF2U128R64);
5080
5081static const SSE_BINARY_U128_R64_T g_aSseBinaryU128R64[] =
5082{
5083 ENTRY_BIN(addsd_u128_r64),
5084 ENTRY_BIN(mulsd_u128_r64),
5085 ENTRY_BIN(subsd_u128_r64),
5086 ENTRY_BIN(minsd_u128_r64),
5087 ENTRY_BIN(divsd_u128_r64),
5088 ENTRY_BIN(maxsd_u128_r64),
5089 ENTRY_BIN(cvtsd2ss_u128_r64),
5090 ENTRY_BIN(sqrtsd_u128_r64),
5091};
5092
5093#ifdef TSTIEMAIMPL_WITH_GENERATOR
5094static RTEXITCODE SseBinaryU128R64Generate(const char *pszDataFileFmt, uint32_t cTests)
5095{
5096 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
5097
5098 static struct { RTFLOAT64U aVal1[2], Val2; } const s_aSpecials[] =
5099 {
5100 { { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) }, RTFLOAT64U_INIT_C(0, 8388607, RTFLOAT64U_EXP_MAX - 1) },
5101 /** @todo More specials. */
5102 };
5103
5104 X86FXSTATE State;
5105 RT_ZERO(State);
5106 uint32_t cMinNormalPairs = (cTests - 144) / 4;
5107 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryU128R64); iFn++)
5108 {
5109 PFNIEMAIMPLFPSSEF2U128R64 const pfn = g_aSseBinaryU128R64[iFn].pfnNative ? g_aSseBinaryU128R64[iFn].pfnNative : g_aSseBinaryU128R64[iFn].pfn;
5110
5111 PRTSTREAM pStrmOut = NULL;
5112 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseBinaryU128R64[iFn].pszName);
5113 if (RT_FAILURE(rc))
5114 {
5115 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseBinaryU128R64[iFn].pszName, rc);
5116 return RTEXITCODE_FAILURE;
5117 }
5118
5119 uint32_t cNormalInputPairs = 0;
5120 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
5121 {
5122 SSE_BINARY_U128_R64_TEST_T TestData; RT_ZERO(TestData);
5123
5124 TestData.InVal1.ar64[0] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
5125 TestData.InVal1.ar64[1] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
5126 TestData.r64Val2 = iTest < cTests ? RandR64Src2(iTest) : s_aSpecials[iTest - cTests].Val2;
5127
5128 if ( RTFLOAT64U_IS_NORMAL(&TestData.InVal1.ar64[0]) && RTFLOAT64U_IS_NORMAL(&TestData.InVal1.ar64[1])
5129 && RTFLOAT64U_IS_NORMAL(&TestData.r64Val2))
5130 cNormalInputPairs++;
5131 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
5132 {
5133 iTest -= 1;
5134 continue;
5135 }
5136
5137 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
5138 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
5139 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
5140 for (uint8_t iFz = 0; iFz < 2; iFz++)
5141 {
5142 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
5143 | (iRounding << X86_MXCSR_RC_SHIFT)
5144 | (iDaz ? X86_MXCSR_DAZ : 0)
5145 | (iFz ? X86_MXCSR_FZ : 0)
5146 | X86_MXCSR_XCPT_MASK;
5147 IEMSSERESULT ResM; RT_ZERO(ResM);
5148 pfn(&State, &ResM, &TestData.InVal1, &TestData.r64Val2);
5149 TestData.fMxcsrIn = State.MXCSR;
5150 TestData.fMxcsrOut = ResM.MXCSR;
5151 TestData.OutVal = ResM.uResult;
5152 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5153
5154 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
5155 IEMSSERESULT ResU; RT_ZERO(ResU);
5156 pfn(&State, &ResU, &TestData.InVal1, &TestData.r64Val2);
5157 TestData.fMxcsrIn = State.MXCSR;
5158 TestData.fMxcsrOut = ResU.MXCSR;
5159 TestData.OutVal = ResU.uResult;
5160 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5161
5162 uint16_t fXcpt = (ResM.MXCSR | ResU.MXCSR) & X86_MXCSR_XCPT_FLAGS;
5163 if (fXcpt)
5164 {
5165 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
5166 IEMSSERESULT Res1; RT_ZERO(Res1);
5167 pfn(&State, &Res1, &TestData.InVal1, &TestData.r64Val2);
5168 TestData.fMxcsrIn = State.MXCSR;
5169 TestData.fMxcsrOut = Res1.MXCSR;
5170 TestData.OutVal = Res1.uResult;
5171 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5172
5173 if (((Res1.MXCSR & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (Res1.MXCSR & X86_MXCSR_XCPT_FLAGS))
5174 {
5175 fXcpt |= Res1.MXCSR & X86_MXCSR_XCPT_FLAGS;
5176 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
5177 IEMSSERESULT Res2; RT_ZERO(Res2);
5178 pfn(&State, &Res2, &TestData.InVal1, &TestData.r64Val2);
5179 TestData.fMxcsrIn = State.MXCSR;
5180 TestData.fMxcsrOut = Res2.MXCSR;
5181 TestData.OutVal = Res2.uResult;
5182 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5183 }
5184 if (!RT_IS_POWER_OF_TWO(fXcpt))
5185 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
5186 if (fUnmasked & fXcpt)
5187 {
5188 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
5189 IEMSSERESULT Res3; RT_ZERO(Res3);
5190 pfn(&State, &Res3, &TestData.InVal1, &TestData.r64Val2);
5191 TestData.fMxcsrIn = State.MXCSR;
5192 TestData.fMxcsrOut = Res3.MXCSR;
5193 TestData.OutVal = Res3.uResult;
5194 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5195 }
5196 }
5197 }
5198 }
5199 rc = RTStrmClose(pStrmOut);
5200 if (RT_FAILURE(rc))
5201 {
5202 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseBinaryU128R64[iFn].pszName, rc);
5203 return RTEXITCODE_FAILURE;
5204 }
5205 }
5206
5207 return RTEXITCODE_SUCCESS;
5208}
5209#endif
5210
5211
5212static void SseBinaryU128R64Test(void)
5213{
5214 X86FXSTATE State;
5215 RT_ZERO(State);
5216 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryU128R64); iFn++)
5217 {
5218 if (!SubTestAndCheckIfEnabled(g_aSseBinaryU128R64[iFn].pszName))
5219 continue;
5220
5221 uint32_t const cTests = *g_aSseBinaryU128R64[iFn].pcTests;
5222 SSE_BINARY_U128_R64_TEST_T const * const paTests = g_aSseBinaryU128R64[iFn].paTests;
5223 PFNIEMAIMPLFPSSEF2U128R64 pfn = g_aSseBinaryU128R64[iFn].pfn;
5224 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryU128R64[iFn]);
5225 if (!cTests) RTTestSkipped(g_hTest, "no tests");
5226 for (uint32_t iVar = 0; iVar < cVars; iVar++)
5227 {
5228 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_BINARY_U128_R64_TEST_T); iTest++)
5229 {
5230 IEMSSERESULT Res; RT_ZERO(Res);
5231
5232 State.MXCSR = paTests[iTest].fMxcsrIn;
5233 pfn(&State, &Res, &paTests[iTest].InVal1, &paTests[iTest].r64Val2);
5234 if ( Res.MXCSR != paTests[iTest].fMxcsrOut
5235 || !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[0], &paTests[iTest].OutVal.ar64[0])
5236 || !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[1], &paTests[iTest].OutVal.ar64[1]))
5237 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s in2=%s\n"
5238 "%s -> mxcsr=%#08x %s'%s\n"
5239 "%s expected %#08x %s'%s%s%s (%s)\n",
5240 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
5241 FormatR64(&paTests[iTest].InVal1.ar64[0]), FormatR64(&paTests[iTest].InVal1.ar64[1]),
5242 FormatR64(&paTests[iTest].r64Val2),
5243 iVar ? " " : "", Res.MXCSR,
5244 FormatR64(&Res.uResult.ar64[0]), FormatR64(&Res.uResult.ar64[1]),
5245 iVar ? " " : "", paTests[iTest].fMxcsrOut,
5246 FormatR64(&paTests[iTest].OutVal.ar64[0]), FormatR64(&paTests[iTest].OutVal.ar64[1]),
5247 MxcsrDiff(Res.MXCSR, paTests[iTest].fMxcsrOut),
5248 ( !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[0], &paTests[iTest].OutVal.ar64[0])
5249 || !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[1], &paTests[iTest].OutVal.ar64[1]))
5250 ? " - val" : "",
5251 FormatMxcsr(paTests[iTest].fMxcsrIn) );
5252 }
5253 }
5254 }
5255}
5256
5257
5258/*
5259 * SSE operations converting single double-precision floating point values to signed double-word integers (cvttsd2si and friends).
5260 */
5261TYPEDEF_SUBTEST_TYPE(SSE_BINARY_I32_R64_T, SSE_BINARY_I32_R64_TEST_T, PFNIEMAIMPLSSEF2I32U64);
5262
5263static const SSE_BINARY_I32_R64_T g_aSseBinaryI32R64[] =
5264{
5265 ENTRY_BIN(cvttsd2si_i32_r64),
5266 ENTRY_BIN(cvtsd2si_i32_r64),
5267};
5268
5269#ifdef TSTIEMAIMPL_WITH_GENERATOR
5270static RTEXITCODE SseBinaryI32R64Generate(const char *pszDataFileFmt, uint32_t cTests)
5271{
5272 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
5273
5274 static struct { RTFLOAT64U Val; } const s_aSpecials[] =
5275 {
5276 { RTFLOAT64U_INIT_C(0, 8388607, RTFLOAT64U_EXP_MAX - 1) },
5277 /** @todo More specials. */
5278 };
5279
5280 X86FXSTATE State;
5281 RT_ZERO(State);
5282 uint32_t cMinNormalPairs = (cTests - 144) / 4;
5283 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI32R64); iFn++)
5284 {
5285 PFNIEMAIMPLSSEF2I32U64 const pfn = g_aSseBinaryI32R64[iFn].pfnNative ? g_aSseBinaryI32R64[iFn].pfnNative : g_aSseBinaryI32R64[iFn].pfn;
5286
5287 PRTSTREAM pStrmOut = NULL;
5288 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseBinaryI32R64[iFn].pszName);
5289 if (RT_FAILURE(rc))
5290 {
5291 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseBinaryI32R64[iFn].pszName, rc);
5292 return RTEXITCODE_FAILURE;
5293 }
5294
5295 uint32_t cNormalInputPairs = 0;
5296 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
5297 {
5298 SSE_BINARY_I32_R64_TEST_T TestData; RT_ZERO(TestData);
5299
5300 TestData.r64ValIn = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val;
5301
5302 if (RTFLOAT64U_IS_NORMAL(&TestData.r64ValIn))
5303 cNormalInputPairs++;
5304 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
5305 {
5306 iTest -= 1;
5307 continue;
5308 }
5309
5310 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
5311 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
5312 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
5313 for (uint8_t iFz = 0; iFz < 2; iFz++)
5314 {
5315 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
5316 | (iRounding << X86_MXCSR_RC_SHIFT)
5317 | (iDaz ? X86_MXCSR_DAZ : 0)
5318 | (iFz ? X86_MXCSR_FZ : 0)
5319 | X86_MXCSR_XCPT_MASK;
5320 uint32_t fMxcsrM; int32_t i32OutM;
5321 pfn(&State, &fMxcsrM, &i32OutM, &TestData.r64ValIn.u);
5322 TestData.fMxcsrIn = State.MXCSR;
5323 TestData.fMxcsrOut = fMxcsrM;
5324 TestData.i32ValOut = i32OutM;
5325 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5326
5327 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
5328 uint32_t fMxcsrU; int32_t i32OutU;
5329 pfn(&State, &fMxcsrU, &i32OutU, &TestData.r64ValIn.u);
5330 TestData.fMxcsrIn = State.MXCSR;
5331 TestData.fMxcsrOut = fMxcsrU;
5332 TestData.i32ValOut = i32OutU;
5333 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5334
5335 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
5336 if (fXcpt)
5337 {
5338 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
5339 uint32_t fMxcsr1; int32_t i32Out1;
5340 pfn(&State, &fMxcsr1, &i32Out1, &TestData.r64ValIn.u);
5341 TestData.fMxcsrIn = State.MXCSR;
5342 TestData.fMxcsrOut = fMxcsr1;
5343 TestData.i32ValOut = i32Out1;
5344 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5345
5346 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
5347 {
5348 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
5349 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
5350 uint32_t fMxcsr2; int32_t i32Out2;
5351 pfn(&State, &fMxcsr2, &i32Out2, &TestData.r64ValIn.u);
5352 TestData.fMxcsrIn = State.MXCSR;
5353 TestData.fMxcsrOut = fMxcsr2;
5354 TestData.i32ValOut = i32Out2;
5355 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5356 }
5357 if (!RT_IS_POWER_OF_TWO(fXcpt))
5358 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
5359 if (fUnmasked & fXcpt)
5360 {
5361 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
5362 uint32_t fMxcsr3; int32_t i32Out3;
5363 pfn(&State, &fMxcsr3, &i32Out3, &TestData.r64ValIn.u);
5364 TestData.fMxcsrIn = State.MXCSR;
5365 TestData.fMxcsrOut = fMxcsr3;
5366 TestData.i32ValOut = i32Out3;
5367 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5368 }
5369 }
5370 }
5371 }
5372 rc = RTStrmClose(pStrmOut);
5373 if (RT_FAILURE(rc))
5374 {
5375 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseBinaryI32R64[iFn].pszName, rc);
5376 return RTEXITCODE_FAILURE;
5377 }
5378 }
5379
5380 return RTEXITCODE_SUCCESS;
5381}
5382#endif
5383
5384
5385static void SseBinaryI32R64Test(void)
5386{
5387 X86FXSTATE State;
5388 RT_ZERO(State);
5389 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI32R64); iFn++)
5390 {
5391 if (!SubTestAndCheckIfEnabled(g_aSseBinaryI32R64[iFn].pszName))
5392 continue;
5393
5394 uint32_t const cTests = *g_aSseBinaryI32R64[iFn].pcTests;
5395 SSE_BINARY_I32_R64_TEST_T const * const paTests = g_aSseBinaryI32R64[iFn].paTests;
5396 PFNIEMAIMPLSSEF2I32U64 pfn = g_aSseBinaryI32R64[iFn].pfn;
5397 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryI32R64[iFn]);
5398 if (!cTests) RTTestSkipped(g_hTest, "no tests");
5399 for (uint32_t iVar = 0; iVar < cVars; iVar++)
5400 {
5401 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_BINARY_I32_R64_TEST_T); iTest++)
5402 {
5403 uint32_t fMxcsr = 0;
5404 int32_t i32Dst = 0;
5405
5406 State.MXCSR = paTests[iTest].fMxcsrIn;
5407 pfn(&State, &fMxcsr, &i32Dst, &paTests[iTest].r64ValIn.u);
5408 if ( fMxcsr != paTests[iTest].fMxcsrOut
5409 || i32Dst != paTests[iTest].i32ValOut)
5410 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s\n"
5411 "%s -> mxcsr=%#08x %RI32\n"
5412 "%s expected %#08x %RI32%s%s (%s)\n",
5413 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
5414 FormatR64(&paTests[iTest].r64ValIn),
5415 iVar ? " " : "", fMxcsr, i32Dst,
5416 iVar ? " " : "", paTests[iTest].fMxcsrOut, paTests[iTest].i32ValOut,
5417 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
5418 i32Dst != paTests[iTest].i32ValOut
5419 ? " - val" : "",
5420 FormatMxcsr(paTests[iTest].fMxcsrIn) );
5421 }
5422 }
5423 }
5424}
5425
5426
5427/*
5428 * SSE operations converting single double-precision floating point values to signed quad-word integers (cvttsd2si and friends).
5429 */
5430TYPEDEF_SUBTEST_TYPE(SSE_BINARY_I64_R64_T, SSE_BINARY_I64_R64_TEST_T, PFNIEMAIMPLSSEF2I64U64);
5431
5432static const SSE_BINARY_I64_R64_T g_aSseBinaryI64R64[] =
5433{
5434 ENTRY_BIN(cvttsd2si_i64_r64),
5435 ENTRY_BIN(cvtsd2si_i64_r64),
5436};
5437
5438#ifdef TSTIEMAIMPL_WITH_GENERATOR
5439static RTEXITCODE SseBinaryI64R64Generate(const char *pszDataFileFmt, uint32_t cTests)
5440{
5441 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
5442
5443 static struct { RTFLOAT64U Val; } const s_aSpecials[] =
5444 {
5445 { RTFLOAT64U_INIT_C(0, 8388607, RTFLOAT64U_EXP_MAX - 1) },
5446 /** @todo More specials. */
5447 };
5448
5449 X86FXSTATE State;
5450 RT_ZERO(State);
5451 uint32_t cMinNormalPairs = (cTests - 144) / 4;
5452 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI64R64); iFn++)
5453 {
5454 PFNIEMAIMPLSSEF2I64U64 const pfn = g_aSseBinaryI64R64[iFn].pfnNative ? g_aSseBinaryI64R64[iFn].pfnNative : g_aSseBinaryI64R64[iFn].pfn;
5455
5456 PRTSTREAM pStrmOut = NULL;
5457 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseBinaryI64R64[iFn].pszName);
5458 if (RT_FAILURE(rc))
5459 {
5460 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseBinaryI64R64[iFn].pszName, rc);
5461 return RTEXITCODE_FAILURE;
5462 }
5463
5464 uint32_t cNormalInputPairs = 0;
5465 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
5466 {
5467 SSE_BINARY_I64_R64_TEST_T TestData; RT_ZERO(TestData);
5468
5469 TestData.r64ValIn = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val;
5470
5471 if (RTFLOAT64U_IS_NORMAL(&TestData.r64ValIn))
5472 cNormalInputPairs++;
5473 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
5474 {
5475 iTest -= 1;
5476 continue;
5477 }
5478
5479 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
5480 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
5481 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
5482 for (uint8_t iFz = 0; iFz < 2; iFz++)
5483 {
5484 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
5485 | (iRounding << X86_MXCSR_RC_SHIFT)
5486 | (iDaz ? X86_MXCSR_DAZ : 0)
5487 | (iFz ? X86_MXCSR_FZ : 0)
5488 | X86_MXCSR_XCPT_MASK;
5489 uint32_t fMxcsrM; int64_t i64OutM;
5490 pfn(&State, &fMxcsrM, &i64OutM, &TestData.r64ValIn.u);
5491 TestData.fMxcsrIn = State.MXCSR;
5492 TestData.fMxcsrOut = fMxcsrM;
5493 TestData.i64ValOut = i64OutM;
5494 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5495
5496 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
5497 uint32_t fMxcsrU; int64_t i64OutU;
5498 pfn(&State, &fMxcsrU, &i64OutU, &TestData.r64ValIn.u);
5499 TestData.fMxcsrIn = State.MXCSR;
5500 TestData.fMxcsrOut = fMxcsrU;
5501 TestData.i64ValOut = i64OutU;
5502 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5503
5504 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
5505 if (fXcpt)
5506 {
5507 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
5508 uint32_t fMxcsr1; int64_t i64Out1;
5509 pfn(&State, &fMxcsr1, &i64Out1, &TestData.r64ValIn.u);
5510 TestData.fMxcsrIn = State.MXCSR;
5511 TestData.fMxcsrOut = fMxcsr1;
5512 TestData.i64ValOut = i64Out1;
5513 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5514
5515 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
5516 {
5517 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
5518 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
5519 uint32_t fMxcsr2; int64_t i64Out2;
5520 pfn(&State, &fMxcsr2, &i64Out2, &TestData.r64ValIn.u);
5521 TestData.fMxcsrIn = State.MXCSR;
5522 TestData.fMxcsrOut = fMxcsr2;
5523 TestData.i64ValOut = i64Out2;
5524 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5525 }
5526 if (!RT_IS_POWER_OF_TWO(fXcpt))
5527 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
5528 if (fUnmasked & fXcpt)
5529 {
5530 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
5531 uint32_t fMxcsr3; int64_t i64Out3;
5532 pfn(&State, &fMxcsr3, &i64Out3, &TestData.r64ValIn.u);
5533 TestData.fMxcsrIn = State.MXCSR;
5534 TestData.fMxcsrOut = fMxcsr3;
5535 TestData.i64ValOut = i64Out3;
5536 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5537 }
5538 }
5539 }
5540 }
5541 rc = RTStrmClose(pStrmOut);
5542 if (RT_FAILURE(rc))
5543 {
5544 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseBinaryI64R64[iFn].pszName, rc);
5545 return RTEXITCODE_FAILURE;
5546 }
5547 }
5548
5549 return RTEXITCODE_SUCCESS;
5550}
5551#endif
5552
5553
5554static void SseBinaryI64R64Test(void)
5555{
5556 X86FXSTATE State;
5557 RT_ZERO(State);
5558 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI64R64); iFn++)
5559 {
5560 if (!SubTestAndCheckIfEnabled(g_aSseBinaryI64R64[iFn].pszName))
5561 continue;
5562
5563 uint32_t const cTests = *g_aSseBinaryI64R64[iFn].pcTests;
5564 SSE_BINARY_I64_R64_TEST_T const * const paTests = g_aSseBinaryI64R64[iFn].paTests;
5565 PFNIEMAIMPLSSEF2I64U64 pfn = g_aSseBinaryI64R64[iFn].pfn;
5566 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryI32R64[iFn]);
5567 if (!cTests) RTTestSkipped(g_hTest, "no tests");
5568 for (uint32_t iVar = 0; iVar < cVars; iVar++)
5569 {
5570 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_BINARY_I64_R64_TEST_T); iTest++)
5571 {
5572 uint32_t fMxcsr = 0;
5573 int64_t i64Dst = 0;
5574
5575 State.MXCSR = paTests[iTest].fMxcsrIn;
5576 pfn(&State, &fMxcsr, &i64Dst, &paTests[iTest].r64ValIn.u);
5577 if ( fMxcsr != paTests[iTest].fMxcsrOut
5578 || i64Dst != paTests[iTest].i64ValOut)
5579 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s\n"
5580 "%s -> mxcsr=%#08x %RI64\n"
5581 "%s expected %#08x %RI64%s%s (%s)\n",
5582 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
5583 FormatR64(&paTests[iTest].r64ValIn),
5584 iVar ? " " : "", fMxcsr, i64Dst,
5585 iVar ? " " : "", paTests[iTest].fMxcsrOut, paTests[iTest].i64ValOut,
5586 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
5587 i64Dst != paTests[iTest].i64ValOut
5588 ? " - val" : "",
5589 FormatMxcsr(paTests[iTest].fMxcsrIn) );
5590 }
5591 }
5592 }
5593}
5594
5595
5596/*
5597 * SSE operations converting single single-precision floating point values to signed double-word integers (cvttss2si and friends).
5598 */
5599TYPEDEF_SUBTEST_TYPE(SSE_BINARY_I32_R32_T, SSE_BINARY_I32_R32_TEST_T, PFNIEMAIMPLSSEF2I32U32);
5600
5601static const SSE_BINARY_I32_R32_T g_aSseBinaryI32R32[] =
5602{
5603 ENTRY_BIN(cvttss2si_i32_r32),
5604 ENTRY_BIN(cvtss2si_i32_r32),
5605};
5606
5607#ifdef TSTIEMAIMPL_WITH_GENERATOR
5608static RTEXITCODE SseBinaryI32R32Generate(const char *pszDataFileFmt, uint32_t cTests)
5609{
5610 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
5611
5612 static struct { RTFLOAT32U Val; } const s_aSpecials[] =
5613 {
5614 { RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1) },
5615 /** @todo More specials. */
5616 };
5617
5618 X86FXSTATE State;
5619 RT_ZERO(State);
5620 uint32_t cMinNormalPairs = (cTests - 144) / 4;
5621 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI32R32); iFn++)
5622 {
5623 PFNIEMAIMPLSSEF2I32U32 const pfn = g_aSseBinaryI32R32[iFn].pfnNative ? g_aSseBinaryI32R32[iFn].pfnNative : g_aSseBinaryI32R32[iFn].pfn;
5624
5625 PRTSTREAM pStrmOut = NULL;
5626 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseBinaryI32R32[iFn].pszName);
5627 if (RT_FAILURE(rc))
5628 {
5629 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseBinaryI32R32[iFn].pszName, rc);
5630 return RTEXITCODE_FAILURE;
5631 }
5632
5633 uint32_t cNormalInputPairs = 0;
5634 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
5635 {
5636 SSE_BINARY_I32_R32_TEST_T TestData; RT_ZERO(TestData);
5637
5638 TestData.r32ValIn = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val;
5639
5640 if (RTFLOAT32U_IS_NORMAL(&TestData.r32ValIn))
5641 cNormalInputPairs++;
5642 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
5643 {
5644 iTest -= 1;
5645 continue;
5646 }
5647
5648 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
5649 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
5650 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
5651 for (uint8_t iFz = 0; iFz < 2; iFz++)
5652 {
5653 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
5654 | (iRounding << X86_MXCSR_RC_SHIFT)
5655 | (iDaz ? X86_MXCSR_DAZ : 0)
5656 | (iFz ? X86_MXCSR_FZ : 0)
5657 | X86_MXCSR_XCPT_MASK;
5658 uint32_t fMxcsrM; int32_t i32OutM;
5659 pfn(&State, &fMxcsrM, &i32OutM, &TestData.r32ValIn.u);
5660 TestData.fMxcsrIn = State.MXCSR;
5661 TestData.fMxcsrOut = fMxcsrM;
5662 TestData.i32ValOut = i32OutM;
5663 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5664
5665 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
5666 uint32_t fMxcsrU; int32_t i32OutU;
5667 pfn(&State, &fMxcsrU, &i32OutU, &TestData.r32ValIn.u);
5668 TestData.fMxcsrIn = State.MXCSR;
5669 TestData.fMxcsrOut = fMxcsrU;
5670 TestData.i32ValOut = i32OutU;
5671 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5672
5673 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
5674 if (fXcpt)
5675 {
5676 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
5677 uint32_t fMxcsr1; int32_t i32Out1;
5678 pfn(&State, &fMxcsr1, &i32Out1, &TestData.r32ValIn.u);
5679 TestData.fMxcsrIn = State.MXCSR;
5680 TestData.fMxcsrOut = fMxcsr1;
5681 TestData.i32ValOut = i32Out1;
5682 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5683
5684 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
5685 {
5686 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
5687 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
5688 uint32_t fMxcsr2; int32_t i32Out2;
5689 pfn(&State, &fMxcsr2, &i32Out2, &TestData.r32ValIn.u);
5690 TestData.fMxcsrIn = State.MXCSR;
5691 TestData.fMxcsrOut = fMxcsr2;
5692 TestData.i32ValOut = i32Out2;
5693 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5694 }
5695 if (!RT_IS_POWER_OF_TWO(fXcpt))
5696 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
5697 if (fUnmasked & fXcpt)
5698 {
5699 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
5700 uint32_t fMxcsr3; int32_t i32Out3;
5701 pfn(&State, &fMxcsr3, &i32Out3, &TestData.r32ValIn.u);
5702 TestData.fMxcsrIn = State.MXCSR;
5703 TestData.fMxcsrOut = fMxcsr3;
5704 TestData.i32ValOut = i32Out3;
5705 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5706 }
5707 }
5708 }
5709 }
5710 rc = RTStrmClose(pStrmOut);
5711 if (RT_FAILURE(rc))
5712 {
5713 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseBinaryI32R32[iFn].pszName, rc);
5714 return RTEXITCODE_FAILURE;
5715 }
5716 }
5717
5718 return RTEXITCODE_SUCCESS;
5719}
5720#endif
5721
5722
5723static void SseBinaryI32R32Test(void)
5724{
5725 X86FXSTATE State;
5726 RT_ZERO(State);
5727 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI32R32); iFn++)
5728 {
5729 if (!SubTestAndCheckIfEnabled(g_aSseBinaryI32R32[iFn].pszName))
5730 continue;
5731
5732 uint32_t const cTests = *g_aSseBinaryI32R32[iFn].pcTests;
5733 SSE_BINARY_I32_R32_TEST_T const * const paTests = g_aSseBinaryI32R32[iFn].paTests;
5734 PFNIEMAIMPLSSEF2I32U32 pfn = g_aSseBinaryI32R32[iFn].pfn;
5735 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryI32R32[iFn]);
5736 if (!cTests) RTTestSkipped(g_hTest, "no tests");
5737 for (uint32_t iVar = 0; iVar < cVars; iVar++)
5738 {
5739 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_BINARY_I32_R32_TEST_T); iTest++)
5740 {
5741 uint32_t fMxcsr = 0;
5742 int32_t i32Dst = 0;
5743
5744 State.MXCSR = paTests[iTest].fMxcsrIn;
5745 pfn(&State, &fMxcsr, &i32Dst, &paTests[iTest].r32ValIn.u);
5746 if ( fMxcsr != paTests[iTest].fMxcsrOut
5747 || i32Dst != paTests[iTest].i32ValOut)
5748 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s\n"
5749 "%s -> mxcsr=%#08x %RI32\n"
5750 "%s expected %#08x %RI32%s%s (%s)\n",
5751 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
5752 FormatR32(&paTests[iTest].r32ValIn),
5753 iVar ? " " : "", fMxcsr, i32Dst,
5754 iVar ? " " : "", paTests[iTest].fMxcsrOut, paTests[iTest].i32ValOut,
5755 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
5756 i32Dst != paTests[iTest].i32ValOut
5757 ? " - val" : "",
5758 FormatMxcsr(paTests[iTest].fMxcsrIn) );
5759 }
5760 }
5761 }
5762}
5763
5764
5765/*
5766 * SSE operations converting single single-precision floating point values to signed quad-word integers (cvttss2si and friends).
5767 */
5768TYPEDEF_SUBTEST_TYPE(SSE_BINARY_I64_R32_T, SSE_BINARY_I64_R32_TEST_T, PFNIEMAIMPLSSEF2I64U32);
5769
5770static const SSE_BINARY_I64_R32_T g_aSseBinaryI64R32[] =
5771{
5772 ENTRY_BIN(cvttss2si_i64_r32),
5773 ENTRY_BIN(cvtss2si_i64_r32),
5774};
5775
5776#ifdef TSTIEMAIMPL_WITH_GENERATOR
5777static RTEXITCODE SseBinaryI64R32Generate(const char *pszDataFileFmt, uint32_t cTests)
5778{
5779 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
5780
5781 static struct { RTFLOAT32U Val; } const s_aSpecials[] =
5782 {
5783 { RTFLOAT32U_INIT_C(0, 8388607, RTFLOAT32U_EXP_MAX - 1) },
5784 /** @todo More specials. */
5785 };
5786
5787 X86FXSTATE State;
5788 RT_ZERO(State);
5789 uint32_t cMinNormalPairs = (cTests - 144) / 4;
5790 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI64R32); iFn++)
5791 {
5792 PFNIEMAIMPLSSEF2I64U32 const pfn = g_aSseBinaryI64R32[iFn].pfnNative ? g_aSseBinaryI64R32[iFn].pfnNative : g_aSseBinaryI64R32[iFn].pfn;
5793
5794 PRTSTREAM pStrmOut = NULL;
5795 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseBinaryI64R32[iFn].pszName);
5796 if (RT_FAILURE(rc))
5797 {
5798 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseBinaryI64R32[iFn].pszName, rc);
5799 return RTEXITCODE_FAILURE;
5800 }
5801
5802 uint32_t cNormalInputPairs = 0;
5803 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
5804 {
5805 SSE_BINARY_I64_R32_TEST_T TestData; RT_ZERO(TestData);
5806
5807 TestData.r32ValIn = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val;
5808
5809 if (RTFLOAT32U_IS_NORMAL(&TestData.r32ValIn))
5810 cNormalInputPairs++;
5811 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
5812 {
5813 iTest -= 1;
5814 continue;
5815 }
5816
5817 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
5818 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
5819 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
5820 for (uint8_t iFz = 0; iFz < 2; iFz++)
5821 {
5822 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
5823 | (iRounding << X86_MXCSR_RC_SHIFT)
5824 | (iDaz ? X86_MXCSR_DAZ : 0)
5825 | (iFz ? X86_MXCSR_FZ : 0)
5826 | X86_MXCSR_XCPT_MASK;
5827 uint32_t fMxcsrM; int64_t i64OutM;
5828 pfn(&State, &fMxcsrM, &i64OutM, &TestData.r32ValIn.u);
5829 TestData.fMxcsrIn = State.MXCSR;
5830 TestData.fMxcsrOut = fMxcsrM;
5831 TestData.i64ValOut = i64OutM;
5832 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5833
5834 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
5835 uint32_t fMxcsrU; int64_t i64OutU;
5836 pfn(&State, &fMxcsrU, &i64OutU, &TestData.r32ValIn.u);
5837 TestData.fMxcsrIn = State.MXCSR;
5838 TestData.fMxcsrOut = fMxcsrU;
5839 TestData.i64ValOut = i64OutU;
5840 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5841
5842 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
5843 if (fXcpt)
5844 {
5845 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
5846 uint32_t fMxcsr1; int64_t i64Out1;
5847 pfn(&State, &fMxcsr1, &i64Out1, &TestData.r32ValIn.u);
5848 TestData.fMxcsrIn = State.MXCSR;
5849 TestData.fMxcsrOut = fMxcsr1;
5850 TestData.i64ValOut = i64Out1;
5851 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5852
5853 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
5854 {
5855 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
5856 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
5857 uint32_t fMxcsr2; int64_t i64Out2;
5858 pfn(&State, &fMxcsr2, &i64Out2, &TestData.r32ValIn.u);
5859 TestData.fMxcsrIn = State.MXCSR;
5860 TestData.fMxcsrOut = fMxcsr2;
5861 TestData.i64ValOut = i64Out2;
5862 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5863 }
5864 if (!RT_IS_POWER_OF_TWO(fXcpt))
5865 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
5866 if (fUnmasked & fXcpt)
5867 {
5868 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
5869 uint32_t fMxcsr3; int64_t i64Out3;
5870 pfn(&State, &fMxcsr3, &i64Out3, &TestData.r32ValIn.u);
5871 TestData.fMxcsrIn = State.MXCSR;
5872 TestData.fMxcsrOut = fMxcsr3;
5873 TestData.i64ValOut = i64Out3;
5874 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5875 }
5876 }
5877 }
5878 }
5879 rc = RTStrmClose(pStrmOut);
5880 if (RT_FAILURE(rc))
5881 {
5882 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseBinaryI64R32[iFn].pszName, rc);
5883 return RTEXITCODE_FAILURE;
5884 }
5885 }
5886
5887 return RTEXITCODE_SUCCESS;
5888}
5889#endif
5890
5891
5892static void SseBinaryI64R32Test(void)
5893{
5894 X86FXSTATE State;
5895 RT_ZERO(State);
5896 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryI64R32); iFn++)
5897 {
5898 if (!SubTestAndCheckIfEnabled(g_aSseBinaryI64R32[iFn].pszName))
5899 continue;
5900
5901 uint32_t const cTests = *g_aSseBinaryI64R32[iFn].pcTests;
5902 SSE_BINARY_I64_R32_TEST_T const * const paTests = g_aSseBinaryI64R32[iFn].paTests;
5903 PFNIEMAIMPLSSEF2I64U32 pfn = g_aSseBinaryI64R32[iFn].pfn;
5904 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryI64R32[iFn]);
5905 if (!cTests) RTTestSkipped(g_hTest, "no tests");
5906 for (uint32_t iVar = 0; iVar < cVars; iVar++)
5907 {
5908 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_BINARY_I64_R32_TEST_T); iTest++)
5909 {
5910 uint32_t fMxcsr = 0;
5911 int64_t i64Dst = 0;
5912
5913 State.MXCSR = paTests[iTest].fMxcsrIn;
5914 pfn(&State, &fMxcsr, &i64Dst, &paTests[iTest].r32ValIn.u);
5915 if ( fMxcsr != paTests[iTest].fMxcsrOut
5916 || i64Dst != paTests[iTest].i64ValOut)
5917 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s\n"
5918 "%s -> mxcsr=%#08x %RI64\n"
5919 "%s expected %#08x %RI64%s%s (%s)\n",
5920 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
5921 FormatR32(&paTests[iTest].r32ValIn),
5922 iVar ? " " : "", fMxcsr, i64Dst,
5923 iVar ? " " : "", paTests[iTest].fMxcsrOut, paTests[iTest].i64ValOut,
5924 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
5925 i64Dst != paTests[iTest].i64ValOut
5926 ? " - val" : "",
5927 FormatMxcsr(paTests[iTest].fMxcsrIn) );
5928 }
5929 }
5930 }
5931}
5932
5933
5934/*
5935 * SSE operations converting single signed double-word integers to double-precision floating point values (probably only cvtsi2sd).
5936 */
5937TYPEDEF_SUBTEST_TYPE(SSE_BINARY_R64_I32_T, SSE_BINARY_R64_I32_TEST_T, PFNIEMAIMPLSSEF2R64I32);
5938
5939static const SSE_BINARY_R64_I32_T g_aSseBinaryR64I32[] =
5940{
5941 ENTRY_BIN(cvtsi2sd_r64_i32)
5942};
5943
5944#ifdef TSTIEMAIMPL_WITH_GENERATOR
5945static RTEXITCODE SseBinaryR64I32Generate(const char *pszDataFileFmt, uint32_t cTests)
5946{
5947 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
5948
5949 static int32_t const s_aSpecials[] =
5950 {
5951 INT32_MIN,
5952 INT32_MAX,
5953 /** @todo More specials. */
5954 };
5955
5956 X86FXSTATE State;
5957 RT_ZERO(State);
5958 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR64I32); iFn++)
5959 {
5960 PFNIEMAIMPLSSEF2R64I32 const pfn = g_aSseBinaryR64I32[iFn].pfnNative ? g_aSseBinaryR64I32[iFn].pfnNative : g_aSseBinaryR64I32[iFn].pfn;
5961
5962 PRTSTREAM pStrmOut = NULL;
5963 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseBinaryR64I32[iFn].pszName);
5964 if (RT_FAILURE(rc))
5965 {
5966 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseBinaryR64I32[iFn].pszName, rc);
5967 return RTEXITCODE_FAILURE;
5968 }
5969
5970 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
5971 {
5972 SSE_BINARY_R64_I32_TEST_T TestData; RT_ZERO(TestData);
5973
5974 TestData.i32ValIn = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
5975
5976 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
5977 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
5978 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
5979 for (uint8_t iFz = 0; iFz < 2; iFz++)
5980 {
5981 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
5982 | (iRounding << X86_MXCSR_RC_SHIFT)
5983 | (iDaz ? X86_MXCSR_DAZ : 0)
5984 | (iFz ? X86_MXCSR_FZ : 0)
5985 | X86_MXCSR_XCPT_MASK;
5986 uint32_t fMxcsrM; RTFLOAT64U r64OutM;
5987 pfn(&State, &fMxcsrM, &r64OutM, &TestData.i32ValIn);
5988 TestData.fMxcsrIn = State.MXCSR;
5989 TestData.fMxcsrOut = fMxcsrM;
5990 TestData.r64ValOut = r64OutM;
5991 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
5992
5993 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
5994 uint32_t fMxcsrU; RTFLOAT64U r64OutU;
5995 pfn(&State, &fMxcsrU, &r64OutU, &TestData.i32ValIn);
5996 TestData.fMxcsrIn = State.MXCSR;
5997 TestData.fMxcsrOut = fMxcsrU;
5998 TestData.r64ValOut = r64OutU;
5999 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6000
6001 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
6002 if (fXcpt)
6003 {
6004 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
6005 uint32_t fMxcsr1; RTFLOAT64U r64Out1;
6006 pfn(&State, &fMxcsr1, &r64Out1, &TestData.i32ValIn);
6007 TestData.fMxcsrIn = State.MXCSR;
6008 TestData.fMxcsrOut = fMxcsr1;
6009 TestData.r64ValOut = r64Out1;
6010 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6011
6012 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
6013 {
6014 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
6015 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
6016 uint32_t fMxcsr2; RTFLOAT64U r64Out2;
6017 pfn(&State, &fMxcsr2, &r64Out2, &TestData.i32ValIn);
6018 TestData.fMxcsrIn = State.MXCSR;
6019 TestData.fMxcsrOut = fMxcsr2;
6020 TestData.r64ValOut = r64Out2;
6021 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6022 }
6023 if (!RT_IS_POWER_OF_TWO(fXcpt))
6024 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
6025 if (fUnmasked & fXcpt)
6026 {
6027 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
6028 uint32_t fMxcsr3; RTFLOAT64U r64Out3;
6029 pfn(&State, &fMxcsr3, &r64Out3, &TestData.i32ValIn);
6030 TestData.fMxcsrIn = State.MXCSR;
6031 TestData.fMxcsrOut = fMxcsr3;
6032 TestData.r64ValOut = r64Out3;
6033 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6034 }
6035 }
6036 }
6037 }
6038 rc = RTStrmClose(pStrmOut);
6039 if (RT_FAILURE(rc))
6040 {
6041 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseBinaryR64I32[iFn].pszName, rc);
6042 return RTEXITCODE_FAILURE;
6043 }
6044 }
6045
6046 return RTEXITCODE_SUCCESS;
6047}
6048#endif
6049
6050
6051static void SseBinaryR64I32Test(void)
6052{
6053 X86FXSTATE State;
6054 RT_ZERO(State);
6055 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR64I32); iFn++)
6056 {
6057 if (!SubTestAndCheckIfEnabled(g_aSseBinaryR64I32[iFn].pszName))
6058 continue;
6059
6060 uint32_t const cTests = *g_aSseBinaryR64I32[iFn].pcTests;
6061 SSE_BINARY_R64_I32_TEST_T const * const paTests = g_aSseBinaryR64I32[iFn].paTests;
6062 PFNIEMAIMPLSSEF2R64I32 pfn = g_aSseBinaryR64I32[iFn].pfn;
6063 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryR64I32[iFn]);
6064 if (!cTests) RTTestSkipped(g_hTest, "no tests");
6065 for (uint32_t iVar = 0; iVar < cVars; iVar++)
6066 {
6067 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_BINARY_R64_I32_TEST_T); iTest++)
6068 {
6069 uint32_t fMxcsr = 0;
6070 RTFLOAT64U r64Dst; RT_ZERO(r64Dst);
6071
6072 State.MXCSR = paTests[iTest].fMxcsrIn;
6073 pfn(&State, &fMxcsr, &r64Dst, &paTests[iTest].i32ValIn);
6074 if ( fMxcsr != paTests[iTest].fMxcsrOut
6075 || !RTFLOAT64U_ARE_IDENTICAL(&r64Dst, &paTests[iTest].r64ValOut))
6076 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI32\n"
6077 "%s -> mxcsr=%#08x %s\n"
6078 "%s expected %#08x %s%s%s (%s)\n",
6079 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
6080 &paTests[iTest].i32ValIn,
6081 iVar ? " " : "", fMxcsr, FormatR64(&r64Dst),
6082 iVar ? " " : "", paTests[iTest].fMxcsrOut, FormatR64(&paTests[iTest].r64ValOut),
6083 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
6084 !RTFLOAT64U_ARE_IDENTICAL(&r64Dst, &paTests[iTest].r64ValOut)
6085 ? " - val" : "",
6086 FormatMxcsr(paTests[iTest].fMxcsrIn) );
6087 }
6088 }
6089 }
6090}
6091
6092
6093/*
6094 * SSE operations converting single signed quad-word integers to double-precision floating point values (probably only cvtsi2sd).
6095 */
6096TYPEDEF_SUBTEST_TYPE(SSE_BINARY_R64_I64_T, SSE_BINARY_R64_I64_TEST_T, PFNIEMAIMPLSSEF2R64I64);
6097
6098static const SSE_BINARY_R64_I64_T g_aSseBinaryR64I64[] =
6099{
6100 ENTRY_BIN(cvtsi2sd_r64_i64),
6101};
6102
6103#ifdef TSTIEMAIMPL_WITH_GENERATOR
6104static RTEXITCODE SseBinaryR64I64Generate(const char *pszDataFileFmt, uint32_t cTests)
6105{
6106 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
6107
6108 static int64_t const s_aSpecials[] =
6109 {
6110 INT64_MIN,
6111 INT64_MAX
6112 /** @todo More specials. */
6113 };
6114
6115 X86FXSTATE State;
6116 RT_ZERO(State);
6117 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR64I64); iFn++)
6118 {
6119 PFNIEMAIMPLSSEF2R64I64 const pfn = g_aSseBinaryR64I64[iFn].pfnNative ? g_aSseBinaryR64I64[iFn].pfnNative : g_aSseBinaryR64I64[iFn].pfn;
6120
6121 PRTSTREAM pStrmOut = NULL;
6122 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseBinaryR64I64[iFn].pszName);
6123 if (RT_FAILURE(rc))
6124 {
6125 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseBinaryR64I64[iFn].pszName, rc);
6126 return RTEXITCODE_FAILURE;
6127 }
6128
6129 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
6130 {
6131 SSE_BINARY_R64_I64_TEST_T TestData; RT_ZERO(TestData);
6132
6133 TestData.i64ValIn = iTest < cTests ? RandI64Src(iTest) : s_aSpecials[iTest - cTests];
6134
6135 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
6136 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
6137 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
6138 for (uint8_t iFz = 0; iFz < 2; iFz++)
6139 {
6140 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
6141 | (iRounding << X86_MXCSR_RC_SHIFT)
6142 | (iDaz ? X86_MXCSR_DAZ : 0)
6143 | (iFz ? X86_MXCSR_FZ : 0)
6144 | X86_MXCSR_XCPT_MASK;
6145 uint32_t fMxcsrM; RTFLOAT64U r64OutM;
6146 pfn(&State, &fMxcsrM, &r64OutM, &TestData.i64ValIn);
6147 TestData.fMxcsrIn = State.MXCSR;
6148 TestData.fMxcsrOut = fMxcsrM;
6149 TestData.r64ValOut = r64OutM;
6150 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6151
6152 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
6153 uint32_t fMxcsrU; RTFLOAT64U r64OutU;
6154 pfn(&State, &fMxcsrU, &r64OutU, &TestData.i64ValIn);
6155 TestData.fMxcsrIn = State.MXCSR;
6156 TestData.fMxcsrOut = fMxcsrU;
6157 TestData.r64ValOut = r64OutU;
6158 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6159
6160 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
6161 if (fXcpt)
6162 {
6163 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
6164 uint32_t fMxcsr1; RTFLOAT64U r64Out1;
6165 pfn(&State, &fMxcsr1, &r64Out1, &TestData.i64ValIn);
6166 TestData.fMxcsrIn = State.MXCSR;
6167 TestData.fMxcsrOut = fMxcsr1;
6168 TestData.r64ValOut = r64Out1;
6169 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6170
6171 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
6172 {
6173 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
6174 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
6175 uint32_t fMxcsr2; RTFLOAT64U r64Out2;
6176 pfn(&State, &fMxcsr2, &r64Out2, &TestData.i64ValIn);
6177 TestData.fMxcsrIn = State.MXCSR;
6178 TestData.fMxcsrOut = fMxcsr2;
6179 TestData.r64ValOut = r64Out2;
6180 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6181 }
6182 if (!RT_IS_POWER_OF_TWO(fXcpt))
6183 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
6184 if (fUnmasked & fXcpt)
6185 {
6186 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
6187 uint32_t fMxcsr3; RTFLOAT64U r64Out3;
6188 pfn(&State, &fMxcsr3, &r64Out3, &TestData.i64ValIn);
6189 TestData.fMxcsrIn = State.MXCSR;
6190 TestData.fMxcsrOut = fMxcsr3;
6191 TestData.r64ValOut = r64Out3;
6192 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6193 }
6194 }
6195 }
6196 }
6197 rc = RTStrmClose(pStrmOut);
6198 if (RT_FAILURE(rc))
6199 {
6200 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseBinaryR64I64[iFn].pszName, rc);
6201 return RTEXITCODE_FAILURE;
6202 }
6203 }
6204
6205 return RTEXITCODE_SUCCESS;
6206}
6207#endif
6208
6209
6210static void SseBinaryR64I64Test(void)
6211{
6212 X86FXSTATE State;
6213 RT_ZERO(State);
6214 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR64I64); iFn++)
6215 {
6216 if (!SubTestAndCheckIfEnabled(g_aSseBinaryR64I64[iFn].pszName))
6217 continue;
6218
6219 uint32_t const cTests = *g_aSseBinaryR64I64[iFn].pcTests;
6220 SSE_BINARY_R64_I64_TEST_T const * const paTests = g_aSseBinaryR64I64[iFn].paTests;
6221 PFNIEMAIMPLSSEF2R64I64 pfn = g_aSseBinaryR64I64[iFn].pfn;
6222 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryR64I64[iFn]);
6223 if (!cTests) RTTestSkipped(g_hTest, "no tests");
6224 for (uint32_t iVar = 0; iVar < cVars; iVar++)
6225 {
6226 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_BINARY_R64_I64_TEST_T); iTest++)
6227 {
6228 uint32_t fMxcsr = 0;
6229 RTFLOAT64U r64Dst; RT_ZERO(r64Dst);
6230
6231 State.MXCSR = paTests[iTest].fMxcsrIn;
6232 pfn(&State, &fMxcsr, &r64Dst, &paTests[iTest].i64ValIn);
6233 if ( fMxcsr != paTests[iTest].fMxcsrOut
6234 || !RTFLOAT64U_ARE_IDENTICAL(&r64Dst, &paTests[iTest].r64ValOut))
6235 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI64\n"
6236 "%s -> mxcsr=%#08x %s\n"
6237 "%s expected %#08x %s%s%s (%s)\n",
6238 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
6239 &paTests[iTest].i64ValIn,
6240 iVar ? " " : "", fMxcsr, FormatR64(&r64Dst),
6241 iVar ? " " : "", paTests[iTest].fMxcsrOut, FormatR64(&paTests[iTest].r64ValOut),
6242 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
6243 !RTFLOAT64U_ARE_IDENTICAL(&r64Dst, &paTests[iTest].r64ValOut)
6244 ? " - val" : "",
6245 FormatMxcsr(paTests[iTest].fMxcsrIn) );
6246 }
6247 }
6248 }
6249}
6250
6251
6252/*
6253 * SSE operations converting single signed double-word integers to single-precision floating point values (probably only cvtsi2ss).
6254 */
6255TYPEDEF_SUBTEST_TYPE(SSE_BINARY_R32_I32_T, SSE_BINARY_R32_I32_TEST_T, PFNIEMAIMPLSSEF2R32I32);
6256
6257static const SSE_BINARY_R32_I32_T g_aSseBinaryR32I32[] =
6258{
6259 ENTRY_BIN(cvtsi2ss_r32_i32),
6260};
6261
6262#ifdef TSTIEMAIMPL_WITH_GENERATOR
6263static RTEXITCODE SseBinaryR32I32Generate(const char *pszDataFileFmt, uint32_t cTests)
6264{
6265 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
6266
6267 static int32_t const s_aSpecials[] =
6268 {
6269 INT32_MIN,
6270 INT32_MAX,
6271 /** @todo More specials. */
6272 };
6273
6274 X86FXSTATE State;
6275 RT_ZERO(State);
6276 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR32I32); iFn++)
6277 {
6278 PFNIEMAIMPLSSEF2R32I32 const pfn = g_aSseBinaryR32I32[iFn].pfnNative ? g_aSseBinaryR32I32[iFn].pfnNative : g_aSseBinaryR32I32[iFn].pfn;
6279
6280 PRTSTREAM pStrmOut = NULL;
6281 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseBinaryR32I32[iFn].pszName);
6282 if (RT_FAILURE(rc))
6283 {
6284 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseBinaryR32I32[iFn].pszName, rc);
6285 return RTEXITCODE_FAILURE;
6286 }
6287
6288 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
6289 {
6290 SSE_BINARY_R32_I32_TEST_T TestData; RT_ZERO(TestData);
6291
6292 TestData.i32ValIn = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
6293
6294 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
6295 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
6296 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
6297 for (uint8_t iFz = 0; iFz < 2; iFz++)
6298 {
6299 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
6300 | (iRounding << X86_MXCSR_RC_SHIFT)
6301 | (iDaz ? X86_MXCSR_DAZ : 0)
6302 | (iFz ? X86_MXCSR_FZ : 0)
6303 | X86_MXCSR_XCPT_MASK;
6304 uint32_t fMxcsrM; RTFLOAT32U r32OutM;
6305 pfn(&State, &fMxcsrM, &r32OutM, &TestData.i32ValIn);
6306 TestData.fMxcsrIn = State.MXCSR;
6307 TestData.fMxcsrOut = fMxcsrM;
6308 TestData.r32ValOut = r32OutM;
6309 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6310
6311 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
6312 uint32_t fMxcsrU; RTFLOAT32U r32OutU;
6313 pfn(&State, &fMxcsrU, &r32OutU, &TestData.i32ValIn);
6314 TestData.fMxcsrIn = State.MXCSR;
6315 TestData.fMxcsrOut = fMxcsrU;
6316 TestData.r32ValOut = r32OutU;
6317 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6318
6319 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
6320 if (fXcpt)
6321 {
6322 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
6323 uint32_t fMxcsr1; RTFLOAT32U r32Out1;
6324 pfn(&State, &fMxcsr1, &r32Out1, &TestData.i32ValIn);
6325 TestData.fMxcsrIn = State.MXCSR;
6326 TestData.fMxcsrOut = fMxcsr1;
6327 TestData.r32ValOut = r32Out1;
6328 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6329
6330 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
6331 {
6332 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
6333 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
6334 uint32_t fMxcsr2; RTFLOAT32U r32Out2;
6335 pfn(&State, &fMxcsr2, &r32Out2, &TestData.i32ValIn);
6336 TestData.fMxcsrIn = State.MXCSR;
6337 TestData.fMxcsrOut = fMxcsr2;
6338 TestData.r32ValOut = r32Out2;
6339 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6340 }
6341 if (!RT_IS_POWER_OF_TWO(fXcpt))
6342 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
6343 if (fUnmasked & fXcpt)
6344 {
6345 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
6346 uint32_t fMxcsr3; RTFLOAT32U r32Out3;
6347 pfn(&State, &fMxcsr3, &r32Out3, &TestData.i32ValIn);
6348 TestData.fMxcsrIn = State.MXCSR;
6349 TestData.fMxcsrOut = fMxcsr3;
6350 TestData.r32ValOut = r32Out3;
6351 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6352 }
6353 }
6354 }
6355 }
6356 rc = RTStrmClose(pStrmOut);
6357 if (RT_FAILURE(rc))
6358 {
6359 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseBinaryR32I32[iFn].pszName, rc);
6360 return RTEXITCODE_FAILURE;
6361 }
6362 }
6363
6364 return RTEXITCODE_SUCCESS;
6365}
6366#endif
6367
6368
6369static void SseBinaryR32I32Test(void)
6370{
6371 X86FXSTATE State;
6372 RT_ZERO(State);
6373 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR32I32); iFn++)
6374 {
6375 if (!SubTestAndCheckIfEnabled(g_aSseBinaryR32I32[iFn].pszName))
6376 continue;
6377
6378 uint32_t const cTests = *g_aSseBinaryR32I32[iFn].pcTests;
6379 SSE_BINARY_R32_I32_TEST_T const * const paTests = g_aSseBinaryR32I32[iFn].paTests;
6380 PFNIEMAIMPLSSEF2R32I32 pfn = g_aSseBinaryR32I32[iFn].pfn;
6381 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryR32I32[iFn]);
6382 if (!cTests) RTTestSkipped(g_hTest, "no tests");
6383 for (uint32_t iVar = 0; iVar < cVars; iVar++)
6384 {
6385 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_BINARY_R32_I32_TEST_T); iTest++)
6386 {
6387 uint32_t fMxcsr = 0;
6388 RTFLOAT32U r32Dst; RT_ZERO(r32Dst);
6389
6390 State.MXCSR = paTests[iTest].fMxcsrIn;
6391 pfn(&State, &fMxcsr, &r32Dst, &paTests[iTest].i32ValIn);
6392 if ( fMxcsr != paTests[iTest].fMxcsrOut
6393 || !RTFLOAT32U_ARE_IDENTICAL(&r32Dst, &paTests[iTest].r32ValOut))
6394 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI32\n"
6395 "%s -> mxcsr=%#08x %RI32\n"
6396 "%s expected %#08x %RI32%s%s (%s)\n",
6397 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
6398 &paTests[iTest].i32ValIn,
6399 iVar ? " " : "", fMxcsr, FormatR32(&r32Dst),
6400 iVar ? " " : "", paTests[iTest].fMxcsrOut, FormatR32(&paTests[iTest].r32ValOut),
6401 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
6402 !RTFLOAT32U_ARE_IDENTICAL(&r32Dst, &paTests[iTest].r32ValOut)
6403 ? " - val" : "",
6404 FormatMxcsr(paTests[iTest].fMxcsrIn) );
6405 }
6406 }
6407 }
6408}
6409
6410
6411/*
6412 * SSE operations converting single signed quad-word integers to single-precision floating point values (probably only cvtsi2ss).
6413 */
6414TYPEDEF_SUBTEST_TYPE(SSE_BINARY_R32_I64_T, SSE_BINARY_R32_I64_TEST_T, PFNIEMAIMPLSSEF2R32I64);
6415
6416static const SSE_BINARY_R32_I64_T g_aSseBinaryR32I64[] =
6417{
6418 ENTRY_BIN(cvtsi2ss_r32_i64),
6419};
6420
6421#ifdef TSTIEMAIMPL_WITH_GENERATOR
6422static RTEXITCODE SseBinaryR32I64Generate(const char *pszDataFileFmt, uint32_t cTests)
6423{
6424 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
6425
6426 static int64_t const s_aSpecials[] =
6427 {
6428 INT64_MIN,
6429 INT64_MAX
6430 /** @todo More specials. */
6431 };
6432
6433 X86FXSTATE State;
6434 RT_ZERO(State);
6435 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR32I64); iFn++)
6436 {
6437 PFNIEMAIMPLSSEF2R32I64 const pfn = g_aSseBinaryR32I64[iFn].pfnNative ? g_aSseBinaryR32I64[iFn].pfnNative : g_aSseBinaryR32I64[iFn].pfn;
6438
6439 PRTSTREAM pStrmOut = NULL;
6440 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseBinaryR32I64[iFn].pszName);
6441 if (RT_FAILURE(rc))
6442 {
6443 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseBinaryR32I64[iFn].pszName, rc);
6444 return RTEXITCODE_FAILURE;
6445 }
6446
6447 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
6448 {
6449 SSE_BINARY_R32_I64_TEST_T TestData; RT_ZERO(TestData);
6450
6451 TestData.i64ValIn = iTest < cTests ? RandI64Src(iTest) : s_aSpecials[iTest - cTests];
6452
6453 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
6454 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
6455 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
6456 for (uint8_t iFz = 0; iFz < 2; iFz++)
6457 {
6458 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
6459 | (iRounding << X86_MXCSR_RC_SHIFT)
6460 | (iDaz ? X86_MXCSR_DAZ : 0)
6461 | (iFz ? X86_MXCSR_FZ : 0)
6462 | X86_MXCSR_XCPT_MASK;
6463 uint32_t fMxcsrM; RTFLOAT32U r32OutM;
6464 pfn(&State, &fMxcsrM, &r32OutM, &TestData.i64ValIn);
6465 TestData.fMxcsrIn = State.MXCSR;
6466 TestData.fMxcsrOut = fMxcsrM;
6467 TestData.r32ValOut = r32OutM;
6468 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6469
6470 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
6471 uint32_t fMxcsrU; RTFLOAT32U r32OutU;
6472 pfn(&State, &fMxcsrU, &r32OutU, &TestData.i64ValIn);
6473 TestData.fMxcsrIn = State.MXCSR;
6474 TestData.fMxcsrOut = fMxcsrU;
6475 TestData.r32ValOut = r32OutU;
6476 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6477
6478 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
6479 if (fXcpt)
6480 {
6481 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
6482 uint32_t fMxcsr1; RTFLOAT32U r32Out1;
6483 pfn(&State, &fMxcsr1, &r32Out1, &TestData.i64ValIn);
6484 TestData.fMxcsrIn = State.MXCSR;
6485 TestData.fMxcsrOut = fMxcsr1;
6486 TestData.r32ValOut = r32Out1;
6487 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6488
6489 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
6490 {
6491 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
6492 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
6493 uint32_t fMxcsr2; RTFLOAT32U r32Out2;
6494 pfn(&State, &fMxcsr2, &r32Out2, &TestData.i64ValIn);
6495 TestData.fMxcsrIn = State.MXCSR;
6496 TestData.fMxcsrOut = fMxcsr2;
6497 TestData.r32ValOut = r32Out2;
6498 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6499 }
6500 if (!RT_IS_POWER_OF_TWO(fXcpt))
6501 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
6502 if (fUnmasked & fXcpt)
6503 {
6504 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
6505 uint32_t fMxcsr3; RTFLOAT32U r32Out3;
6506 pfn(&State, &fMxcsr3, &r32Out3, &TestData.i64ValIn);
6507 TestData.fMxcsrIn = State.MXCSR;
6508 TestData.fMxcsrOut = fMxcsr3;
6509 TestData.r32ValOut = r32Out3;
6510 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6511 }
6512 }
6513 }
6514 }
6515 rc = RTStrmClose(pStrmOut);
6516 if (RT_FAILURE(rc))
6517 {
6518 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseBinaryR32I64[iFn].pszName, rc);
6519 return RTEXITCODE_FAILURE;
6520 }
6521 }
6522
6523 return RTEXITCODE_SUCCESS;
6524}
6525#endif
6526
6527
6528static void SseBinaryR32I64Test(void)
6529{
6530 X86FXSTATE State;
6531 RT_ZERO(State);
6532 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseBinaryR32I64); iFn++)
6533 {
6534 if (!SubTestAndCheckIfEnabled(g_aSseBinaryR32I64[iFn].pszName))
6535 continue;
6536
6537 uint32_t const cTests = *g_aSseBinaryR32I64[iFn].pcTests;
6538 SSE_BINARY_R32_I64_TEST_T const * const paTests = g_aSseBinaryR32I64[iFn].paTests;
6539 PFNIEMAIMPLSSEF2R32I64 pfn = g_aSseBinaryR32I64[iFn].pfn;
6540 uint32_t const cVars = COUNT_VARIATIONS(g_aSseBinaryR32I64[iFn]);
6541 if (!cTests) RTTestSkipped(g_hTest, "no tests");
6542 for (uint32_t iVar = 0; iVar < cVars; iVar++)
6543 {
6544 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_BINARY_R32_I64_TEST_T); iTest++)
6545 {
6546 uint32_t fMxcsr = 0;
6547 RTFLOAT32U r32Dst; RT_ZERO(r32Dst);
6548
6549 State.MXCSR = paTests[iTest].fMxcsrIn;
6550 pfn(&State, &fMxcsr, &r32Dst, &paTests[iTest].i64ValIn);
6551 if ( fMxcsr != paTests[iTest].fMxcsrOut
6552 || !RTFLOAT32U_ARE_IDENTICAL(&r32Dst, &paTests[iTest].r32ValOut))
6553 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI64\n"
6554 "%s -> mxcsr=%#08x %RI32\n"
6555 "%s expected %#08x %RI32%s%s (%s)\n",
6556 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
6557 &paTests[iTest].i64ValIn,
6558 iVar ? " " : "", fMxcsr, FormatR32(&r32Dst),
6559 iVar ? " " : "", paTests[iTest].fMxcsrOut, FormatR32(&paTests[iTest].r32ValOut),
6560 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
6561 !RTFLOAT32U_ARE_IDENTICAL(&r32Dst, &paTests[iTest].r32ValOut)
6562 ? " - val" : "",
6563 FormatMxcsr(paTests[iTest].fMxcsrIn) );
6564 }
6565 }
6566 }
6567}
6568
6569
6570/*
6571 * Compare SSE operations on single single-precision floating point values - outputting only EFLAGS.
6572 */
6573TYPEDEF_SUBTEST_TYPE(SSE_COMPARE_EFL_R32_R32_T, SSE_COMPARE_EFL_R32_R32_TEST_T, PFNIEMAIMPLF2EFLMXCSR128);
6574
6575static const SSE_COMPARE_EFL_R32_R32_T g_aSseCompareEflR32R32[] =
6576{
6577 ENTRY_BIN(ucomiss_u128),
6578 ENTRY_BIN(comiss_u128),
6579 ENTRY_BIN_AVX(vucomiss_u128),
6580 ENTRY_BIN_AVX(vcomiss_u128),
6581};
6582
6583#ifdef TSTIEMAIMPL_WITH_GENERATOR
6584static RTEXITCODE SseCompareEflR32R32Generate(const char *pszDataFileFmt, uint32_t cTests)
6585{
6586 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
6587
6588 static struct { RTFLOAT32U Val1, Val2; } const s_aSpecials[] =
6589 {
6590 { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0) },
6591 { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(1) },
6592 { RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(0) },
6593 { RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(1) },
6594 { RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(0) },
6595 { RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(1) },
6596 { RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(0) },
6597 { RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(1) },
6598 /** @todo More specials. */
6599 };
6600
6601 uint32_t cMinNormalPairs = (cTests - 144) / 4;
6602 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareEflR32R32); iFn++)
6603 {
6604 PFNIEMAIMPLF2EFLMXCSR128 const pfn = g_aSseCompareEflR32R32[iFn].pfnNative ? g_aSseCompareEflR32R32[iFn].pfnNative : g_aSseCompareEflR32R32[iFn].pfn;
6605
6606 PRTSTREAM pStrmOut = NULL;
6607 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseCompareEflR32R32[iFn].pszName);
6608 if (RT_FAILURE(rc))
6609 {
6610 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseCompareEflR32R32[iFn].pszName, rc);
6611 return RTEXITCODE_FAILURE;
6612 }
6613
6614 uint32_t cNormalInputPairs = 0;
6615 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
6616 {
6617 SSE_COMPARE_EFL_R32_R32_TEST_T TestData; RT_ZERO(TestData);
6618 X86XMMREG ValIn1; RT_ZERO(ValIn1);
6619 X86XMMREG ValIn2; RT_ZERO(ValIn2);
6620
6621 TestData.r32ValIn1 = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val1;
6622 TestData.r32ValIn2 = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val2;
6623
6624 ValIn1.ar32[0] = TestData.r32ValIn1;
6625 ValIn2.ar32[0] = TestData.r32ValIn2;
6626
6627 if ( RTFLOAT32U_IS_NORMAL(&TestData.r32ValIn1)
6628 && RTFLOAT32U_IS_NORMAL(&TestData.r32ValIn2))
6629 cNormalInputPairs++;
6630 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
6631 {
6632 iTest -= 1;
6633 continue;
6634 }
6635
6636 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
6637 uint32_t const fEFlags = RandEFlags();
6638 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
6639 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
6640 for (uint8_t iFz = 0; iFz < 2; iFz++)
6641 {
6642 uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
6643 | (iRounding << X86_MXCSR_RC_SHIFT)
6644 | (iDaz ? X86_MXCSR_DAZ : 0)
6645 | (iFz ? X86_MXCSR_FZ : 0)
6646 | X86_MXCSR_XCPT_MASK;
6647 uint32_t fMxcsrM = fMxcsrIn;
6648 uint32_t fEFlagsM = fEFlags;
6649 pfn(&fMxcsrM, &fEFlagsM, &ValIn1, &ValIn2);
6650 TestData.fMxcsrIn = fMxcsrIn;
6651 TestData.fMxcsrOut = fMxcsrM;
6652 TestData.fEflIn = fEFlags;
6653 TestData.fEflOut = fEFlagsM;
6654 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6655
6656 fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
6657 uint32_t fMxcsrU = fMxcsrIn;
6658 uint32_t fEFlagsU = fEFlags;
6659 pfn(&fMxcsrU, &fEFlagsU, &ValIn1, &ValIn2);
6660 TestData.fMxcsrIn = fMxcsrIn;
6661 TestData.fMxcsrOut = fMxcsrU;
6662 TestData.fEflIn = fEFlags;
6663 TestData.fEflOut = fEFlagsU;
6664 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6665
6666 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
6667 if (fXcpt)
6668 {
6669 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
6670 uint32_t fMxcsr1 = fMxcsrIn;
6671 uint32_t fEFlags1 = fEFlags;
6672 pfn(&fMxcsr1, &fEFlags1, &ValIn1, &ValIn2);
6673 TestData.fMxcsrIn = fMxcsrIn;
6674 TestData.fMxcsrOut = fMxcsr1;
6675 TestData.fEflIn = fEFlags;
6676 TestData.fEflOut = fEFlags1;
6677 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6678
6679 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
6680 {
6681 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
6682 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
6683 uint32_t fMxcsr2 = fMxcsrIn;
6684 uint32_t fEFlags2 = fEFlags;
6685 pfn(&fMxcsr2, &fEFlags2, &ValIn1, &ValIn2);
6686 TestData.fMxcsrIn = fMxcsrIn;
6687 TestData.fMxcsrOut = fMxcsr2;
6688 TestData.fEflIn = fEFlags;
6689 TestData.fEflOut = fEFlags2;
6690 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6691 }
6692 if (!RT_IS_POWER_OF_TWO(fXcpt))
6693 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
6694 if (fUnmasked & fXcpt)
6695 {
6696 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
6697 uint32_t fMxcsr3 = fMxcsrIn;
6698 uint32_t fEFlags3 = fEFlags;
6699 pfn(&fMxcsr3, &fEFlags3, &ValIn1, &ValIn2);
6700 TestData.fMxcsrIn = fMxcsrIn;
6701 TestData.fMxcsrOut = fMxcsr3;
6702 TestData.fEflIn = fEFlags;
6703 TestData.fEflOut = fEFlags3;
6704 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6705 }
6706 }
6707 }
6708 }
6709 rc = RTStrmClose(pStrmOut);
6710 if (RT_FAILURE(rc))
6711 {
6712 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseCompareEflR32R32[iFn].pszName, rc);
6713 return RTEXITCODE_FAILURE;
6714 }
6715 }
6716
6717 return RTEXITCODE_SUCCESS;
6718}
6719#endif
6720
6721static void SseCompareEflR32R32Test(void)
6722{
6723 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareEflR32R32); iFn++)
6724 {
6725 if (!SubTestAndCheckIfEnabled(g_aSseCompareEflR32R32[iFn].pszName))
6726 continue;
6727
6728 uint32_t const cTests = *g_aSseCompareEflR32R32[iFn].pcTests;
6729 SSE_COMPARE_EFL_R32_R32_TEST_T const * const paTests = g_aSseCompareEflR32R32[iFn].paTests;
6730 PFNIEMAIMPLF2EFLMXCSR128 pfn = g_aSseCompareEflR32R32[iFn].pfn;
6731 uint32_t const cVars = COUNT_VARIATIONS(g_aSseCompareEflR32R32[iFn]);
6732 if (!cTests) RTTestSkipped(g_hTest, "no tests");
6733 for (uint32_t iVar = 0; iVar < cVars; iVar++)
6734 {
6735 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_COMPARE_EFL_R32_R32_TEST_T); iTest++)
6736 {
6737 X86XMMREG ValIn1; RT_ZERO(ValIn1);
6738 X86XMMREG ValIn2; RT_ZERO(ValIn2);
6739
6740 ValIn1.ar32[0] = paTests[iTest].r32ValIn1;
6741 ValIn2.ar32[0] = paTests[iTest].r32ValIn2;
6742 uint32_t fMxcsr = paTests[iTest].fMxcsrIn;
6743 uint32_t fEFlags = paTests[iTest].fEflIn;
6744 pfn(&fMxcsr, &fEFlags, &ValIn1, &ValIn2);
6745 if ( fMxcsr != paTests[iTest].fMxcsrOut
6746 || fEFlags != paTests[iTest].fEflOut)
6747 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x efl=%#08x in1=%s in2=%s\n"
6748 "%s -> mxcsr=%#08x %#08x\n"
6749 "%s expected %#08x %#08x%s (%s) (EFL: %s)\n",
6750 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn, paTests[iTest].fEflIn,
6751 FormatR32(&paTests[iTest].r32ValIn1), FormatR32(&paTests[iTest].r32ValIn2),
6752 iVar ? " " : "", fMxcsr, fEFlags,
6753 iVar ? " " : "", paTests[iTest].fMxcsrOut, paTests[iTest].fEflOut,
6754 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
6755 FormatMxcsr(paTests[iTest].fMxcsrIn),
6756 EFlagsDiff(fEFlags, paTests[iTest].fEflOut));
6757 }
6758 }
6759 }
6760}
6761
6762
6763/*
6764 * Compare SSE operations on single single-precision floating point values - outputting only EFLAGS.
6765 */
6766TYPEDEF_SUBTEST_TYPE(SSE_COMPARE_EFL_R64_R64_T, SSE_COMPARE_EFL_R64_R64_TEST_T, PFNIEMAIMPLF2EFLMXCSR128);
6767
6768static const SSE_COMPARE_EFL_R64_R64_T g_aSseCompareEflR64R64[] =
6769{
6770 ENTRY_BIN(ucomisd_u128),
6771 ENTRY_BIN(comisd_u128),
6772 ENTRY_BIN_AVX(vucomisd_u128),
6773 ENTRY_BIN_AVX(vcomisd_u128)
6774};
6775
6776#ifdef TSTIEMAIMPL_WITH_GENERATOR
6777static RTEXITCODE SseCompareEflR64R64Generate(const char *pszDataFileFmt, uint32_t cTests)
6778{
6779 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
6780
6781 static struct { RTFLOAT64U Val1, Val2; } const s_aSpecials[] =
6782 {
6783 { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) },
6784 { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(1) },
6785 { RTFLOAT64U_INIT_ZERO(1), RTFLOAT64U_INIT_ZERO(0) },
6786 { RTFLOAT64U_INIT_ZERO(1), RTFLOAT64U_INIT_ZERO(1) },
6787 { RTFLOAT64U_INIT_INF(0), RTFLOAT64U_INIT_INF(0) },
6788 { RTFLOAT64U_INIT_INF(0), RTFLOAT64U_INIT_INF(1) },
6789 { RTFLOAT64U_INIT_INF(1), RTFLOAT64U_INIT_INF(0) },
6790 { RTFLOAT64U_INIT_INF(1), RTFLOAT64U_INIT_INF(1) },
6791 /** @todo More specials. */
6792 };
6793
6794 uint32_t cMinNormalPairs = (cTests - 144) / 4;
6795 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareEflR64R64); iFn++)
6796 {
6797 PFNIEMAIMPLF2EFLMXCSR128 const pfn = g_aSseCompareEflR64R64[iFn].pfnNative ? g_aSseCompareEflR64R64[iFn].pfnNative : g_aSseCompareEflR64R64[iFn].pfn;
6798
6799 PRTSTREAM pStrmOut = NULL;
6800 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseCompareEflR64R64[iFn].pszName);
6801 if (RT_FAILURE(rc))
6802 {
6803 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseCompareEflR64R64[iFn].pszName, rc);
6804 return RTEXITCODE_FAILURE;
6805 }
6806
6807 uint32_t cNormalInputPairs = 0;
6808 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
6809 {
6810 SSE_COMPARE_EFL_R64_R64_TEST_T TestData; RT_ZERO(TestData);
6811 X86XMMREG ValIn1; RT_ZERO(ValIn1);
6812 X86XMMREG ValIn2; RT_ZERO(ValIn2);
6813
6814 TestData.r64ValIn1 = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val1;
6815 TestData.r64ValIn2 = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val2;
6816
6817 ValIn1.ar64[0] = TestData.r64ValIn1;
6818 ValIn2.ar64[0] = TestData.r64ValIn2;
6819
6820 if ( RTFLOAT64U_IS_NORMAL(&TestData.r64ValIn1)
6821 && RTFLOAT64U_IS_NORMAL(&TestData.r64ValIn2))
6822 cNormalInputPairs++;
6823 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
6824 {
6825 iTest -= 1;
6826 continue;
6827 }
6828
6829 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
6830 uint32_t const fEFlags = RandEFlags();
6831 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
6832 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
6833 for (uint8_t iFz = 0; iFz < 2; iFz++)
6834 {
6835 uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
6836 | (iRounding << X86_MXCSR_RC_SHIFT)
6837 | (iDaz ? X86_MXCSR_DAZ : 0)
6838 | (iFz ? X86_MXCSR_FZ : 0)
6839 | X86_MXCSR_XCPT_MASK;
6840 uint32_t fMxcsrM = fMxcsrIn;
6841 uint32_t fEFlagsM = fEFlags;
6842 pfn(&fMxcsrM, &fEFlagsM, &ValIn1, &ValIn2);
6843 TestData.fMxcsrIn = fMxcsrIn;
6844 TestData.fMxcsrOut = fMxcsrM;
6845 TestData.fEflIn = fEFlags;
6846 TestData.fEflOut = fEFlagsM;
6847 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6848
6849 fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
6850 uint32_t fMxcsrU = fMxcsrIn;
6851 uint32_t fEFlagsU = fEFlags;
6852 pfn(&fMxcsrU, &fEFlagsU, &ValIn1, &ValIn2);
6853 TestData.fMxcsrIn = fMxcsrIn;
6854 TestData.fMxcsrOut = fMxcsrU;
6855 TestData.fEflIn = fEFlags;
6856 TestData.fEflOut = fEFlagsU;
6857 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6858
6859 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
6860 if (fXcpt)
6861 {
6862 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
6863 uint32_t fMxcsr1 = fMxcsrIn;
6864 uint32_t fEFlags1 = fEFlags;
6865 pfn(&fMxcsr1, &fEFlags1, &ValIn1, &ValIn2);
6866 TestData.fMxcsrIn = fMxcsrIn;
6867 TestData.fMxcsrOut = fMxcsr1;
6868 TestData.fEflIn = fEFlags;
6869 TestData.fEflOut = fEFlags1;
6870 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6871
6872 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
6873 {
6874 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
6875 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
6876 uint32_t fMxcsr2 = fMxcsrIn;
6877 uint32_t fEFlags2 = fEFlags;
6878 pfn(&fMxcsr2, &fEFlags2, &ValIn1, &ValIn2);
6879 TestData.fMxcsrIn = fMxcsrIn;
6880 TestData.fMxcsrOut = fMxcsr2;
6881 TestData.fEflIn = fEFlags;
6882 TestData.fEflOut = fEFlags2;
6883 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6884 }
6885 if (!RT_IS_POWER_OF_TWO(fXcpt))
6886 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
6887 if (fUnmasked & fXcpt)
6888 {
6889 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
6890 uint32_t fMxcsr3 = fMxcsrIn;
6891 uint32_t fEFlags3 = fEFlags;
6892 pfn(&fMxcsr3, &fEFlags3, &ValIn1, &ValIn2);
6893 TestData.fMxcsrIn = fMxcsrIn;
6894 TestData.fMxcsrOut = fMxcsr3;
6895 TestData.fEflIn = fEFlags;
6896 TestData.fEflOut = fEFlags3;
6897 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
6898 }
6899 }
6900 }
6901 }
6902 rc = RTStrmClose(pStrmOut);
6903 if (RT_FAILURE(rc))
6904 {
6905 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseCompareEflR64R64[iFn].pszName, rc);
6906 return RTEXITCODE_FAILURE;
6907 }
6908 }
6909
6910 return RTEXITCODE_SUCCESS;
6911}
6912#endif
6913
6914static void SseCompareEflR64R64Test(void)
6915{
6916 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareEflR64R64); iFn++)
6917 {
6918 if (!SubTestAndCheckIfEnabled(g_aSseCompareEflR64R64[iFn].pszName))
6919 continue;
6920
6921 uint32_t const cTests = *g_aSseCompareEflR64R64[iFn].pcTests;
6922 SSE_COMPARE_EFL_R64_R64_TEST_T const * const paTests = g_aSseCompareEflR64R64[iFn].paTests;
6923 PFNIEMAIMPLF2EFLMXCSR128 pfn = g_aSseCompareEflR64R64[iFn].pfn;
6924 uint32_t const cVars = COUNT_VARIATIONS(g_aSseCompareEflR64R64[iFn]);
6925 if (!cTests) RTTestSkipped(g_hTest, "no tests");
6926 for (uint32_t iVar = 0; iVar < cVars; iVar++)
6927 {
6928 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_COMPARE_EFL_R64_R64_TEST_T); iTest++)
6929 {
6930 X86XMMREG ValIn1; RT_ZERO(ValIn1);
6931 X86XMMREG ValIn2; RT_ZERO(ValIn2);
6932
6933 ValIn1.ar64[0] = paTests[iTest].r64ValIn1;
6934 ValIn2.ar64[0] = paTests[iTest].r64ValIn2;
6935 uint32_t fMxcsr = paTests[iTest].fMxcsrIn;
6936 uint32_t fEFlags = paTests[iTest].fEflIn;
6937 pfn(&fMxcsr, &fEFlags, &ValIn1, &ValIn2);
6938 if ( fMxcsr != paTests[iTest].fMxcsrOut
6939 || fEFlags != paTests[iTest].fEflOut)
6940 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x efl=%#08x in1=%s in2=%s\n"
6941 "%s -> mxcsr=%#08x %#08x\n"
6942 "%s expected %#08x %#08x%s (%s) (EFL: %s)\n",
6943 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn, paTests[iTest].fEflIn,
6944 FormatR64(&paTests[iTest].r64ValIn1), FormatR64(&paTests[iTest].r64ValIn2),
6945 iVar ? " " : "", fMxcsr, fEFlags,
6946 iVar ? " " : "", paTests[iTest].fMxcsrOut, paTests[iTest].fEflOut,
6947 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
6948 FormatMxcsr(paTests[iTest].fMxcsrIn),
6949 EFlagsDiff(fEFlags, paTests[iTest].fEflOut));
6950 }
6951 }
6952 }
6953}
6954
6955
6956/*
6957 * Compare SSE operations on packed and single single-precision floating point values - outputting a mask.
6958 */
6959/** Maximum immediate to try to keep the testdata size under control (at least a little bit)- */
6960#define SSE_COMPARE_F2_XMM_IMM8_MAX 0x1f
6961
6962TYPEDEF_SUBTEST_TYPE(SSE_COMPARE_F2_XMM_IMM8_T, SSE_COMPARE_F2_XMM_IMM8_TEST_T, PFNIEMAIMPLMXCSRF2XMMIMM8);
6963
6964static const SSE_COMPARE_F2_XMM_IMM8_T g_aSseCompareF2XmmR32Imm8[] =
6965{
6966 ENTRY_BIN(cmpps_u128),
6967 ENTRY_BIN(cmpss_u128)
6968};
6969
6970#ifdef TSTIEMAIMPL_WITH_GENERATOR
6971static RTEXITCODE SseCompareF2XmmR32Imm8Generate(const char *pszDataFileFmt, uint32_t cTests)
6972{
6973 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
6974
6975 static struct { RTFLOAT32U Val1, Val2; } const s_aSpecials[] =
6976 {
6977 { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0) },
6978 { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(1) },
6979 { RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(0) },
6980 { RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(1) },
6981 { RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(0) },
6982 { RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(1) },
6983 { RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(0) },
6984 { RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(1) },
6985 /** @todo More specials. */
6986 };
6987
6988 uint32_t cMinNormalPairs = (cTests - 144) / 4;
6989 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareF2XmmR32Imm8); iFn++)
6990 {
6991 PFNIEMAIMPLMXCSRF2XMMIMM8 const pfn = g_aSseCompareF2XmmR32Imm8[iFn].pfnNative ? g_aSseCompareF2XmmR32Imm8[iFn].pfnNative : g_aSseCompareF2XmmR32Imm8[iFn].pfn;
6992
6993 PRTSTREAM pStrmOut = NULL;
6994 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseCompareF2XmmR32Imm8[iFn].pszName);
6995 if (RT_FAILURE(rc))
6996 {
6997 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseCompareF2XmmR32Imm8[iFn].pszName, rc);
6998 return RTEXITCODE_FAILURE;
6999 }
7000
7001 uint32_t cNormalInputPairs = 0;
7002 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
7003 {
7004 SSE_COMPARE_F2_XMM_IMM8_TEST_T TestData; RT_ZERO(TestData);
7005
7006 TestData.InVal1.ar32[0] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val1;
7007 TestData.InVal1.ar32[1] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val1;
7008 TestData.InVal1.ar32[2] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val1;
7009 TestData.InVal1.ar32[3] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val1;
7010
7011 TestData.InVal2.ar32[0] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val2;
7012 TestData.InVal2.ar32[1] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val2;
7013 TestData.InVal2.ar32[2] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val2;
7014 TestData.InVal2.ar32[3] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].Val2;
7015
7016 if ( RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[0])
7017 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[1])
7018 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[2])
7019 && RTFLOAT32U_IS_NORMAL(&TestData.InVal1.ar32[3])
7020 && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[0])
7021 && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[1])
7022 && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[2])
7023 && RTFLOAT32U_IS_NORMAL(&TestData.InVal2.ar32[3]))
7024 cNormalInputPairs++;
7025 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
7026 {
7027 iTest -= 1;
7028 continue;
7029 }
7030
7031 IEMMEDIAF2XMMSRC Src;
7032 Src.uSrc1 = TestData.InVal1;
7033 Src.uSrc2 = TestData.InVal2;
7034 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
7035 for (uint8_t bImm = 0; bImm <= SSE_COMPARE_F2_XMM_IMM8_MAX; bImm++)
7036 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
7037 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
7038 for (uint8_t iFz = 0; iFz < 2; iFz++)
7039 {
7040 uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
7041 | (iRounding << X86_MXCSR_RC_SHIFT)
7042 | (iDaz ? X86_MXCSR_DAZ : 0)
7043 | (iFz ? X86_MXCSR_FZ : 0)
7044 | X86_MXCSR_XCPT_MASK;
7045 uint32_t fMxcsrM = fMxcsrIn;
7046 X86XMMREG ResM;
7047 pfn(&fMxcsrM, &ResM, &Src, bImm);
7048 TestData.fMxcsrIn = fMxcsrIn;
7049 TestData.fMxcsrOut = fMxcsrM;
7050 TestData.bImm = bImm;
7051 TestData.OutVal = ResM;
7052 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7053
7054 fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
7055 uint32_t fMxcsrU = fMxcsrIn;
7056 X86XMMREG ResU;
7057 pfn(&fMxcsrU, &ResU, &Src, bImm);
7058 TestData.fMxcsrIn = fMxcsrIn;
7059 TestData.fMxcsrOut = fMxcsrU;
7060 TestData.bImm = bImm;
7061 TestData.OutVal = ResU;
7062 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7063
7064 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
7065 if (fXcpt)
7066 {
7067 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
7068 uint32_t fMxcsr1 = fMxcsrIn;
7069 X86XMMREG Res1;
7070 pfn(&fMxcsr1, &Res1, &Src, bImm);
7071 TestData.fMxcsrIn = fMxcsrIn;
7072 TestData.fMxcsrOut = fMxcsr1;
7073 TestData.bImm = bImm;
7074 TestData.OutVal = Res1;
7075 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7076
7077 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
7078 {
7079 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
7080 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
7081 uint32_t fMxcsr2 = fMxcsrIn;
7082 X86XMMREG Res2;
7083 pfn(&fMxcsr2, &Res2, &Src, bImm);
7084 TestData.fMxcsrIn = fMxcsrIn;
7085 TestData.fMxcsrOut = fMxcsr2;
7086 TestData.bImm = bImm;
7087 TestData.OutVal = Res2;
7088 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7089 }
7090 if (!RT_IS_POWER_OF_TWO(fXcpt))
7091 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
7092 if (fUnmasked & fXcpt)
7093 {
7094 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
7095 uint32_t fMxcsr3 = fMxcsrIn;
7096 X86XMMREG Res3;
7097 pfn(&fMxcsr3, &Res3, &Src, bImm);
7098 TestData.fMxcsrIn = fMxcsrIn;
7099 TestData.fMxcsrOut = fMxcsr3;
7100 TestData.bImm = bImm;
7101 TestData.OutVal = Res3;
7102 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7103 }
7104 }
7105 }
7106 }
7107 rc = RTStrmClose(pStrmOut);
7108 if (RT_FAILURE(rc))
7109 {
7110 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseCompareF2XmmR32Imm8[iFn].pszName, rc);
7111 return RTEXITCODE_FAILURE;
7112 }
7113 }
7114
7115 return RTEXITCODE_SUCCESS;
7116}
7117#endif
7118
7119static void SseCompareF2XmmR32Imm8Test(void)
7120{
7121 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareF2XmmR32Imm8); iFn++)
7122 {
7123 if (!SubTestAndCheckIfEnabled(g_aSseCompareF2XmmR32Imm8[iFn].pszName))
7124 continue;
7125
7126 uint32_t const cTests = *g_aSseCompareF2XmmR32Imm8[iFn].pcTests;
7127 SSE_COMPARE_F2_XMM_IMM8_TEST_T const * const paTests = g_aSseCompareF2XmmR32Imm8[iFn].paTests;
7128 PFNIEMAIMPLMXCSRF2XMMIMM8 pfn = g_aSseCompareF2XmmR32Imm8[iFn].pfn;
7129 uint32_t const cVars = COUNT_VARIATIONS(g_aSseCompareF2XmmR32Imm8[iFn]);
7130 if (!cTests) RTTestSkipped(g_hTest, "no tests");
7131 for (uint32_t iVar = 0; iVar < cVars; iVar++)
7132 {
7133 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_COMPARE_F2_XMM_IMM8_TEST_T); iTest++)
7134 {
7135 IEMMEDIAF2XMMSRC Src;
7136 X86XMMREG ValOut;
7137
7138 Src.uSrc1 = paTests[iTest].InVal1;
7139 Src.uSrc2 = paTests[iTest].InVal2;
7140 uint32_t fMxcsr = paTests[iTest].fMxcsrIn;
7141 pfn(&fMxcsr, &ValOut, &Src, paTests[iTest].bImm);
7142 if ( fMxcsr != paTests[iTest].fMxcsrOut
7143 || ValOut.au32[0] != paTests[iTest].OutVal.au32[0]
7144 || ValOut.au32[1] != paTests[iTest].OutVal.au32[1]
7145 || ValOut.au32[2] != paTests[iTest].OutVal.au32[2]
7146 || ValOut.au32[3] != paTests[iTest].OutVal.au32[3])
7147 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s'%s'%s in2=%s'%s'%s'%s imm8=%x\n"
7148 "%s -> mxcsr=%#08x %RX32'%RX32'%RX32'%RX32\n"
7149 "%s expected %#08x %RX32'%RX32'%RX32'%RX32%s%s (%s)\n",
7150 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
7151 FormatR32(&paTests[iTest].InVal1.ar32[0]), FormatR32(&paTests[iTest].InVal1.ar32[1]),
7152 FormatR32(&paTests[iTest].InVal1.ar32[2]), FormatR32(&paTests[iTest].InVal1.ar32[3]),
7153 FormatR32(&paTests[iTest].InVal2.ar32[0]), FormatR32(&paTests[iTest].InVal2.ar32[1]),
7154 FormatR32(&paTests[iTest].InVal2.ar32[2]), FormatR32(&paTests[iTest].InVal2.ar32[3]),
7155 paTests[iTest].bImm,
7156 iVar ? " " : "", fMxcsr, ValOut.au32[0], ValOut.au32[1], ValOut.au32[2], ValOut.au32[3],
7157 iVar ? " " : "", paTests[iTest].fMxcsrOut,
7158 paTests[iTest].OutVal.au32[0], paTests[iTest].OutVal.au32[1],
7159 paTests[iTest].OutVal.au32[2], paTests[iTest].OutVal.au32[3],
7160 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
7161 ( ValOut.au32[0] != paTests[iTest].OutVal.au32[0]
7162 || ValOut.au32[1] != paTests[iTest].OutVal.au32[1]
7163 || ValOut.au32[2] != paTests[iTest].OutVal.au32[2]
7164 || ValOut.au32[3] != paTests[iTest].OutVal.au32[3])
7165 ? " - val" : "",
7166 FormatMxcsr(paTests[iTest].fMxcsrIn));
7167 }
7168 }
7169 }
7170}
7171
7172
7173/*
7174 * Compare SSE operations on packed and single double-precision floating point values - outputting a mask.
7175 */
7176static const SSE_COMPARE_F2_XMM_IMM8_T g_aSseCompareF2XmmR64Imm8[] =
7177{
7178 ENTRY_BIN(cmppd_u128),
7179 ENTRY_BIN(cmpsd_u128)
7180};
7181
7182#ifdef TSTIEMAIMPL_WITH_GENERATOR
7183static RTEXITCODE SseCompareF2XmmR64Imm8Generate(const char *pszDataFileFmt, uint32_t cTests)
7184{
7185 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
7186
7187 static struct { RTFLOAT64U Val1, Val2; } const s_aSpecials[] =
7188 {
7189 { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) },
7190 { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(1) },
7191 { RTFLOAT64U_INIT_ZERO(1), RTFLOAT64U_INIT_ZERO(0) },
7192 { RTFLOAT64U_INIT_ZERO(1), RTFLOAT64U_INIT_ZERO(1) },
7193 { RTFLOAT64U_INIT_INF(0), RTFLOAT64U_INIT_INF(0) },
7194 { RTFLOAT64U_INIT_INF(0), RTFLOAT64U_INIT_INF(1) },
7195 { RTFLOAT64U_INIT_INF(1), RTFLOAT64U_INIT_INF(0) },
7196 { RTFLOAT64U_INIT_INF(1), RTFLOAT64U_INIT_INF(1) },
7197 /** @todo More specials. */
7198 };
7199
7200 uint32_t cMinNormalPairs = (cTests - 144) / 4;
7201 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareF2XmmR64Imm8); iFn++)
7202 {
7203 PFNIEMAIMPLMXCSRF2XMMIMM8 const pfn = g_aSseCompareF2XmmR64Imm8[iFn].pfnNative ? g_aSseCompareF2XmmR64Imm8[iFn].pfnNative : g_aSseCompareF2XmmR64Imm8[iFn].pfn;
7204
7205 PRTSTREAM pStrmOut = NULL;
7206 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseCompareF2XmmR64Imm8[iFn].pszName);
7207 if (RT_FAILURE(rc))
7208 {
7209 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseCompareF2XmmR64Imm8[iFn].pszName, rc);
7210 return RTEXITCODE_FAILURE;
7211 }
7212
7213 uint32_t cNormalInputPairs = 0;
7214 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
7215 {
7216 SSE_COMPARE_F2_XMM_IMM8_TEST_T TestData; RT_ZERO(TestData);
7217
7218 TestData.InVal1.ar64[0] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val1;
7219 TestData.InVal1.ar64[1] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val1;
7220
7221 TestData.InVal2.ar64[0] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val2;
7222 TestData.InVal2.ar64[1] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].Val2;
7223
7224 if ( RTFLOAT64U_IS_NORMAL(&TestData.InVal1.ar64[0])
7225 && RTFLOAT64U_IS_NORMAL(&TestData.InVal1.ar64[1])
7226 && RTFLOAT64U_IS_NORMAL(&TestData.InVal2.ar64[0])
7227 && RTFLOAT64U_IS_NORMAL(&TestData.InVal2.ar64[1]))
7228 cNormalInputPairs++;
7229 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
7230 {
7231 iTest -= 1;
7232 continue;
7233 }
7234
7235 IEMMEDIAF2XMMSRC Src;
7236 Src.uSrc1 = TestData.InVal1;
7237 Src.uSrc2 = TestData.InVal2;
7238 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
7239 for (uint8_t bImm = 0; bImm <= SSE_COMPARE_F2_XMM_IMM8_MAX; bImm++)
7240 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
7241 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
7242 for (uint8_t iFz = 0; iFz < 2; iFz++)
7243 {
7244 uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
7245 | (iRounding << X86_MXCSR_RC_SHIFT)
7246 | (iDaz ? X86_MXCSR_DAZ : 0)
7247 | (iFz ? X86_MXCSR_FZ : 0)
7248 | X86_MXCSR_XCPT_MASK;
7249 uint32_t fMxcsrM = fMxcsrIn;
7250 X86XMMREG ResM;
7251 pfn(&fMxcsrM, &ResM, &Src, bImm);
7252 TestData.fMxcsrIn = fMxcsrIn;
7253 TestData.fMxcsrOut = fMxcsrM;
7254 TestData.bImm = bImm;
7255 TestData.OutVal = ResM;
7256 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7257
7258 fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
7259 uint32_t fMxcsrU = fMxcsrIn;
7260 X86XMMREG ResU;
7261 pfn(&fMxcsrU, &ResU, &Src, bImm);
7262 TestData.fMxcsrIn = fMxcsrIn;
7263 TestData.fMxcsrOut = fMxcsrU;
7264 TestData.bImm = bImm;
7265 TestData.OutVal = ResU;
7266 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7267
7268 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
7269 if (fXcpt)
7270 {
7271 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
7272 uint32_t fMxcsr1 = fMxcsrIn;
7273 X86XMMREG Res1;
7274 pfn(&fMxcsr1, &Res1, &Src, bImm);
7275 TestData.fMxcsrIn = fMxcsrIn;
7276 TestData.fMxcsrOut = fMxcsr1;
7277 TestData.bImm = bImm;
7278 TestData.OutVal = Res1;
7279 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7280
7281 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
7282 {
7283 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
7284 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
7285 uint32_t fMxcsr2 = fMxcsrIn;
7286 X86XMMREG Res2;
7287 pfn(&fMxcsr2, &Res2, &Src, bImm);
7288 TestData.fMxcsrIn = fMxcsrIn;
7289 TestData.fMxcsrOut = fMxcsr2;
7290 TestData.bImm = bImm;
7291 TestData.OutVal = Res2;
7292 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7293 }
7294 if (!RT_IS_POWER_OF_TWO(fXcpt))
7295 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
7296 if (fUnmasked & fXcpt)
7297 {
7298 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
7299 uint32_t fMxcsr3 = fMxcsrIn;
7300 X86XMMREG Res3;
7301 pfn(&fMxcsr3, &Res3, &Src, bImm);
7302 TestData.fMxcsrIn = fMxcsrIn;
7303 TestData.fMxcsrOut = fMxcsr3;
7304 TestData.bImm = bImm;
7305 TestData.OutVal = Res3;
7306 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7307 }
7308 }
7309 }
7310 }
7311 rc = RTStrmClose(pStrmOut);
7312 if (RT_FAILURE(rc))
7313 {
7314 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseCompareF2XmmR64Imm8[iFn].pszName, rc);
7315 return RTEXITCODE_FAILURE;
7316 }
7317 }
7318
7319 return RTEXITCODE_SUCCESS;
7320}
7321#endif
7322
7323static void SseCompareF2XmmR64Imm8Test(void)
7324{
7325 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseCompareF2XmmR64Imm8); iFn++)
7326 {
7327 if (!SubTestAndCheckIfEnabled(g_aSseCompareF2XmmR64Imm8[iFn].pszName))
7328 continue;
7329
7330 uint32_t const cTests = *g_aSseCompareF2XmmR64Imm8[iFn].pcTests;
7331 SSE_COMPARE_F2_XMM_IMM8_TEST_T const * const paTests = g_aSseCompareF2XmmR64Imm8[iFn].paTests;
7332 PFNIEMAIMPLMXCSRF2XMMIMM8 pfn = g_aSseCompareF2XmmR64Imm8[iFn].pfn;
7333 uint32_t const cVars = COUNT_VARIATIONS(g_aSseCompareF2XmmR64Imm8[iFn]);
7334 if (!cTests) RTTestSkipped(g_hTest, "no tests");
7335 for (uint32_t iVar = 0; iVar < cVars; iVar++)
7336 {
7337 for (uint32_t iTest = 0; iTest < cTests / sizeof(SSE_COMPARE_F2_XMM_IMM8_TEST_T); iTest++)
7338 {
7339 IEMMEDIAF2XMMSRC Src;
7340 X86XMMREG ValOut;
7341
7342 Src.uSrc1 = paTests[iTest].InVal1;
7343 Src.uSrc2 = paTests[iTest].InVal2;
7344 uint32_t fMxcsr = paTests[iTest].fMxcsrIn;
7345 pfn(&fMxcsr, &ValOut, &Src, paTests[iTest].bImm);
7346 if ( fMxcsr != paTests[iTest].fMxcsrOut
7347 || ValOut.au64[0] != paTests[iTest].OutVal.au64[0]
7348 || ValOut.au64[1] != paTests[iTest].OutVal.au64[1])
7349 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s in2=%s'%s imm8=%x\n"
7350 "%s -> mxcsr=%#08x %RX64'%RX64\n"
7351 "%s expected %#08x %RX64'%RX64%s%s (%s)\n",
7352 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
7353 FormatR64(&paTests[iTest].InVal1.ar64[0]), FormatR64(&paTests[iTest].InVal1.ar64[1]),
7354 FormatR64(&paTests[iTest].InVal2.ar64[0]), FormatR64(&paTests[iTest].InVal2.ar64[1]),
7355 paTests[iTest].bImm,
7356 iVar ? " " : "", fMxcsr, ValOut.au64[0], ValOut.au64[1],
7357 iVar ? " " : "", paTests[iTest].fMxcsrOut,
7358 paTests[iTest].OutVal.au64[0], paTests[iTest].OutVal.au64[1],
7359 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
7360 ( ValOut.au64[0] != paTests[iTest].OutVal.au64[0]
7361 || ValOut.au64[1] != paTests[iTest].OutVal.au64[1])
7362 ? " - val" : "",
7363 FormatMxcsr(paTests[iTest].fMxcsrIn));
7364 }
7365 }
7366 }
7367}
7368
7369
7370/*
7371 * Convert SSE operations converting signed double-words to single-precision floating point values.
7372 */
7373TYPEDEF_SUBTEST_TYPE(SSE_CONVERT_XMM_T, SSE_CONVERT_XMM_TEST_T, PFNIEMAIMPLFPSSEF2U128);
7374
7375static const SSE_CONVERT_XMM_T g_aSseConvertXmmI32R32[] =
7376{
7377 ENTRY_BIN(cvtdq2ps_u128)
7378};
7379
7380#ifdef TSTIEMAIMPL_WITH_GENERATOR
7381static RTEXITCODE SseConvertXmmI32R32Generate(const char *pszDataFileFmt, uint32_t cTests)
7382{
7383 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
7384
7385 static int32_t const s_aSpecials[] =
7386 {
7387 INT32_MIN,
7388 INT32_MIN / 2,
7389 0,
7390 INT32_MAX / 2,
7391 INT32_MAX,
7392 (int32_t)0x80000000
7393 /** @todo More specials. */
7394 };
7395
7396 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmI32R32); iFn++)
7397 {
7398 PFNIEMAIMPLFPSSEF2U128 const pfn = g_aSseConvertXmmI32R32[iFn].pfnNative ? g_aSseConvertXmmI32R32[iFn].pfnNative : g_aSseConvertXmmI32R32[iFn].pfn;
7399
7400 PRTSTREAM pStrmOut = NULL;
7401 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseConvertXmmI32R32[iFn].pszName);
7402 if (RT_FAILURE(rc))
7403 {
7404 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseConvertXmmI32R32[iFn].pszName, rc);
7405 return RTEXITCODE_FAILURE;
7406 }
7407
7408 X86FXSTATE State;
7409 RT_ZERO(State);
7410 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
7411 {
7412 SSE_CONVERT_XMM_TEST_T TestData; RT_ZERO(TestData);
7413
7414 TestData.InVal.ai32[0] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
7415 TestData.InVal.ai32[1] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
7416 TestData.InVal.ai32[2] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
7417 TestData.InVal.ai32[3] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
7418
7419 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
7420 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
7421 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
7422 for (uint8_t iFz = 0; iFz < 2; iFz++)
7423 {
7424 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
7425 | (iRounding << X86_MXCSR_RC_SHIFT)
7426 | (iDaz ? X86_MXCSR_DAZ : 0)
7427 | (iFz ? X86_MXCSR_FZ : 0)
7428 | X86_MXCSR_XCPT_MASK;
7429 IEMSSERESULT ResM; RT_ZERO(ResM);
7430 pfn(&State, &ResM, &ResM.uResult, &TestData.InVal);
7431 TestData.fMxcsrIn = State.MXCSR;
7432 TestData.fMxcsrOut = ResM.MXCSR;
7433 TestData.OutVal = ResM.uResult;
7434 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7435
7436 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
7437 IEMSSERESULT ResU; RT_ZERO(ResU);
7438 pfn(&State, &ResU, &ResU.uResult, &TestData.InVal);
7439 TestData.fMxcsrIn = State.MXCSR;
7440 TestData.fMxcsrOut = ResU.MXCSR;
7441 TestData.OutVal = ResU.uResult;
7442 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7443
7444 uint16_t fXcpt = (ResM.MXCSR | ResU.MXCSR) & X86_MXCSR_XCPT_FLAGS;
7445 if (fXcpt)
7446 {
7447 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
7448 IEMSSERESULT Res1; RT_ZERO(Res1);
7449 pfn(&State, &Res1, &Res1.uResult, &TestData.InVal);
7450 TestData.fMxcsrIn = State.MXCSR;
7451 TestData.fMxcsrOut = Res1.MXCSR;
7452 TestData.OutVal = Res1.uResult;
7453 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7454
7455 if (((Res1.MXCSR & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (Res1.MXCSR & X86_MXCSR_XCPT_FLAGS))
7456 {
7457 fXcpt |= Res1.MXCSR & X86_MXCSR_XCPT_FLAGS;
7458 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
7459 IEMSSERESULT Res2; RT_ZERO(Res2);
7460 pfn(&State, &Res2, &Res2.uResult, &TestData.InVal);
7461 TestData.fMxcsrIn = State.MXCSR;
7462 TestData.fMxcsrOut = Res2.MXCSR;
7463 TestData.OutVal = Res2.uResult;
7464 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7465 }
7466 if (!RT_IS_POWER_OF_TWO(fXcpt))
7467 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
7468 if (fUnmasked & fXcpt)
7469 {
7470 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
7471 IEMSSERESULT Res3; RT_ZERO(Res3);
7472 pfn(&State, &Res3, &Res3.uResult, &TestData.InVal);
7473 TestData.fMxcsrIn = State.MXCSR;
7474 TestData.fMxcsrOut = Res3.MXCSR;
7475 TestData.OutVal = Res3.uResult;
7476 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7477 }
7478 }
7479 }
7480 }
7481 rc = RTStrmClose(pStrmOut);
7482 if (RT_FAILURE(rc))
7483 {
7484 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseConvertXmmI32R32[iFn].pszName, rc);
7485 return RTEXITCODE_FAILURE;
7486 }
7487 }
7488
7489 return RTEXITCODE_SUCCESS;
7490}
7491#endif
7492
7493static void SseConvertXmmI32R32Test(void)
7494{
7495 X86FXSTATE State;
7496 RT_ZERO(State);
7497
7498 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmI32R32); iFn++)
7499 {
7500 if (!SubTestAndCheckIfEnabled(g_aSseConvertXmmI32R32[iFn].pszName))
7501 continue;
7502
7503 uint32_t const cTests = *g_aSseConvertXmmI32R32[iFn].pcTests;
7504 SSE_CONVERT_XMM_TEST_T const * const paTests = g_aSseConvertXmmI32R32[iFn].paTests;
7505 PFNIEMAIMPLFPSSEF2U128 pfn = g_aSseConvertXmmI32R32[iFn].pfn;
7506 uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertXmmI32R32[iFn]);
7507 if (!cTests) RTTestSkipped(g_hTest, "no tests");
7508 for (uint32_t iVar = 0; iVar < cVars; iVar++)
7509 {
7510 for (uint32_t iTest = 0; iTest < cTests / sizeof(*paTests); iTest++)
7511 {
7512 IEMSSERESULT Res; RT_ZERO(Res);
7513
7514 State.MXCSR = paTests[iTest].fMxcsrIn;
7515 pfn(&State, &Res, &Res.uResult, &paTests[iTest].InVal);
7516 if ( Res.MXCSR != paTests[iTest].fMxcsrOut
7517 || !RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[0], &paTests[iTest].OutVal.ar32[0])
7518 || !RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[1], &paTests[iTest].OutVal.ar32[1])
7519 || !RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[2], &paTests[iTest].OutVal.ar32[2])
7520 || !RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[3], &paTests[iTest].OutVal.ar32[3]))
7521 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI32'%RI32'%RI32'%RI32 \n"
7522 "%s -> mxcsr=%#08x %s'%s'%s'%s\n"
7523 "%s expected %#08x %s'%s'%s'%s%s%s (%s)\n",
7524 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
7525 paTests[iTest].InVal.ai32[0], paTests[iTest].InVal.ai32[1],
7526 paTests[iTest].InVal.ai32[2], paTests[iTest].InVal.ai32[3],
7527 iVar ? " " : "", Res.MXCSR,
7528 FormatR32(&Res.uResult.ar32[0]), FormatR32(&Res.uResult.ar32[1]),
7529 FormatR32(&Res.uResult.ar32[2]), FormatR32(&Res.uResult.ar32[3]),
7530 iVar ? " " : "", paTests[iTest].fMxcsrOut,
7531 FormatR32(&paTests[iTest].OutVal.ar32[0]), FormatR32(&paTests[iTest].OutVal.ar32[1]),
7532 FormatR32(&paTests[iTest].OutVal.ar32[2]), FormatR32(&paTests[iTest].OutVal.ar32[3]),
7533 MxcsrDiff(Res.MXCSR, paTests[iTest].fMxcsrOut),
7534 ( !RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[0], &paTests[iTest].OutVal.ar32[0])
7535 || !RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[1], &paTests[iTest].OutVal.ar32[1])
7536 || !RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[2], &paTests[iTest].OutVal.ar32[2])
7537 || !RTFLOAT32U_ARE_IDENTICAL(&Res.uResult.ar32[3], &paTests[iTest].OutVal.ar32[3]))
7538 ? " - val" : "",
7539 FormatMxcsr(paTests[iTest].fMxcsrIn));
7540 }
7541 }
7542 }
7543}
7544
7545
7546/*
7547 * Convert SSE operations converting signed double-words to single-precision floating point values.
7548 */
7549static const SSE_CONVERT_XMM_T g_aSseConvertXmmR32I32[] =
7550{
7551 ENTRY_BIN(cvtps2dq_u128),
7552 ENTRY_BIN(cvttps2dq_u128)
7553};
7554
7555#ifdef TSTIEMAIMPL_WITH_GENERATOR
7556static RTEXITCODE SseConvertXmmR32I32Generate(const char *pszDataFileFmt, uint32_t cTests)
7557{
7558 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
7559
7560 static struct { RTFLOAT32U aVal1[4]; } const s_aSpecials[] =
7561 {
7562 { { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0) } },
7563 { { RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(1) } },
7564 { { RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(0) } },
7565 { { RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(1) } }
7566 /** @todo More specials. */
7567 };
7568
7569 X86FXSTATE State;
7570 RT_ZERO(State);
7571 uint32_t cMinNormalPairs = (cTests - 144) / 4;
7572 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR32I32); iFn++)
7573 {
7574 PFNIEMAIMPLFPSSEF2U128 const pfn = g_aSseConvertXmmR32I32[iFn].pfnNative ? g_aSseConvertXmmR32I32[iFn].pfnNative : g_aSseConvertXmmR32I32[iFn].pfn;
7575
7576 PRTSTREAM pStrmOut = NULL;
7577 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseConvertXmmR32I32[iFn].pszName);
7578 if (RT_FAILURE(rc))
7579 {
7580 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseConvertXmmR32I32[iFn].pszName, rc);
7581 return RTEXITCODE_FAILURE;
7582 }
7583
7584 uint32_t cNormalInputPairs = 0;
7585 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
7586 {
7587 SSE_CONVERT_XMM_TEST_T TestData; RT_ZERO(TestData);
7588
7589 TestData.InVal.ar32[0] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
7590 TestData.InVal.ar32[1] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
7591 TestData.InVal.ar32[2] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[2];
7592 TestData.InVal.ar32[3] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[3];
7593
7594 if ( RTFLOAT32U_IS_NORMAL(&TestData.InVal.ar32[0])
7595 && RTFLOAT32U_IS_NORMAL(&TestData.InVal.ar32[1])
7596 && RTFLOAT32U_IS_NORMAL(&TestData.InVal.ar32[2])
7597 && RTFLOAT32U_IS_NORMAL(&TestData.InVal.ar32[3]))
7598 cNormalInputPairs++;
7599 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
7600 {
7601 iTest -= 1;
7602 continue;
7603 }
7604
7605 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
7606 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
7607 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
7608 for (uint8_t iFz = 0; iFz < 2; iFz++)
7609 {
7610 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
7611 | (iRounding << X86_MXCSR_RC_SHIFT)
7612 | (iDaz ? X86_MXCSR_DAZ : 0)
7613 | (iFz ? X86_MXCSR_FZ : 0)
7614 | X86_MXCSR_XCPT_MASK;
7615 IEMSSERESULT ResM; RT_ZERO(ResM);
7616 pfn(&State, &ResM, &ResM.uResult, &TestData.InVal);
7617 TestData.fMxcsrIn = State.MXCSR;
7618 TestData.fMxcsrOut = ResM.MXCSR;
7619 TestData.OutVal = ResM.uResult;
7620 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7621
7622 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
7623 IEMSSERESULT ResU; RT_ZERO(ResU);
7624 pfn(&State, &ResU, &ResU.uResult, &TestData.InVal);
7625 TestData.fMxcsrIn = State.MXCSR;
7626 TestData.fMxcsrOut = ResU.MXCSR;
7627 TestData.OutVal = ResU.uResult;
7628 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7629
7630 uint16_t fXcpt = (ResM.MXCSR | ResU.MXCSR) & X86_MXCSR_XCPT_FLAGS;
7631 if (fXcpt)
7632 {
7633 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
7634 IEMSSERESULT Res1; RT_ZERO(Res1);
7635 pfn(&State, &Res1, &Res1.uResult, &TestData.InVal);
7636 TestData.fMxcsrIn = State.MXCSR;
7637 TestData.fMxcsrOut = Res1.MXCSR;
7638 TestData.OutVal = Res1.uResult;
7639 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7640
7641 if (((Res1.MXCSR & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (Res1.MXCSR & X86_MXCSR_XCPT_FLAGS))
7642 {
7643 fXcpt |= Res1.MXCSR & X86_MXCSR_XCPT_FLAGS;
7644 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
7645 IEMSSERESULT Res2; RT_ZERO(Res2);
7646 pfn(&State, &Res2, &Res2.uResult, &TestData.InVal);
7647 TestData.fMxcsrIn = State.MXCSR;
7648 TestData.fMxcsrOut = Res2.MXCSR;
7649 TestData.OutVal = Res2.uResult;
7650 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7651 }
7652 if (!RT_IS_POWER_OF_TWO(fXcpt))
7653 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
7654 if (fUnmasked & fXcpt)
7655 {
7656 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
7657 IEMSSERESULT Res3; RT_ZERO(Res3);
7658 pfn(&State, &Res3, &Res3.uResult, &TestData.InVal);
7659 TestData.fMxcsrIn = State.MXCSR;
7660 TestData.fMxcsrOut = Res3.MXCSR;
7661 TestData.OutVal = Res3.uResult;
7662 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7663 }
7664 }
7665 }
7666 }
7667 rc = RTStrmClose(pStrmOut);
7668 if (RT_FAILURE(rc))
7669 {
7670 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseConvertXmmR32I32[iFn].pszName, rc);
7671 return RTEXITCODE_FAILURE;
7672 }
7673 }
7674
7675 return RTEXITCODE_SUCCESS;
7676}
7677#endif
7678
7679static void SseConvertXmmR32I32Test(void)
7680{
7681 X86FXSTATE State;
7682 RT_ZERO(State);
7683
7684 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR32I32); iFn++)
7685 {
7686 if (!SubTestAndCheckIfEnabled(g_aSseConvertXmmR32I32[iFn].pszName))
7687 continue;
7688
7689 uint32_t const cTests = *g_aSseConvertXmmR32I32[iFn].pcTests;
7690 SSE_CONVERT_XMM_TEST_T const * const paTests = g_aSseConvertXmmR32I32[iFn].paTests;
7691 PFNIEMAIMPLFPSSEF2U128 pfn = g_aSseConvertXmmR32I32[iFn].pfn;
7692 uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertXmmR32I32[iFn]);
7693 if (!cTests) RTTestSkipped(g_hTest, "no tests");
7694 for (uint32_t iVar = 0; iVar < cVars; iVar++)
7695 {
7696 for (uint32_t iTest = 0; iTest < cTests / sizeof(*paTests); iTest++)
7697 {
7698 IEMSSERESULT Res; RT_ZERO(Res);
7699
7700 State.MXCSR = paTests[iTest].fMxcsrIn;
7701 pfn(&State, &Res, &Res.uResult, &paTests[iTest].InVal);
7702 if ( Res.MXCSR != paTests[iTest].fMxcsrOut
7703 || Res.uResult.ai32[0] != paTests[iTest].OutVal.ai32[0]
7704 || Res.uResult.ai32[1] != paTests[iTest].OutVal.ai32[1]
7705 || Res.uResult.ai32[2] != paTests[iTest].OutVal.ai32[2]
7706 || Res.uResult.ai32[3] != paTests[iTest].OutVal.ai32[3])
7707 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s'%s'%s \n"
7708 "%s -> mxcsr=%#08x %RI32'%RI32'%RI32'%RI32\n"
7709 "%s expected %#08x %RI32'%RI32'%RI32'%RI32%s%s (%s)\n",
7710 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
7711 FormatR32(&paTests[iTest].InVal.ar32[0]), FormatR32(&paTests[iTest].InVal.ar32[1]),
7712 FormatR32(&paTests[iTest].InVal.ar32[2]), FormatR32(&paTests[iTest].InVal.ar32[3]),
7713 iVar ? " " : "", Res.MXCSR,
7714 Res.uResult.ai32[0], Res.uResult.ai32[1],
7715 Res.uResult.ai32[2], Res.uResult.ai32[3],
7716 iVar ? " " : "", paTests[iTest].fMxcsrOut,
7717 paTests[iTest].OutVal.ai32[0], paTests[iTest].OutVal.ai32[1],
7718 paTests[iTest].OutVal.ai32[2], paTests[iTest].OutVal.ai32[3],
7719 MxcsrDiff(Res.MXCSR, paTests[iTest].fMxcsrOut),
7720 ( Res.uResult.ai32[0] != paTests[iTest].OutVal.ai32[0]
7721 || Res.uResult.ai32[1] != paTests[iTest].OutVal.ai32[1]
7722 || Res.uResult.ai32[2] != paTests[iTest].OutVal.ai32[2]
7723 || Res.uResult.ai32[3] != paTests[iTest].OutVal.ai32[3])
7724 ? " - val" : "",
7725 FormatMxcsr(paTests[iTest].fMxcsrIn));
7726 }
7727 }
7728 }
7729}
7730
7731
7732/*
7733 * Convert SSE operations converting signed double-words to double-precision floating point values.
7734 */
7735static const SSE_CONVERT_XMM_T g_aSseConvertXmmI32R64[] =
7736{
7737 ENTRY_BIN(cvtdq2pd_u128)
7738};
7739
7740#ifdef TSTIEMAIMPL_WITH_GENERATOR
7741static RTEXITCODE SseConvertXmmI32R64Generate(const char *pszDataFileFmt, uint32_t cTests)
7742{
7743 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
7744
7745 static int32_t const s_aSpecials[] =
7746 {
7747 INT32_MIN,
7748 INT32_MIN / 2,
7749 0,
7750 INT32_MAX / 2,
7751 INT32_MAX,
7752 (int32_t)0x80000000
7753 /** @todo More specials. */
7754 };
7755
7756 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmI32R64); iFn++)
7757 {
7758 PFNIEMAIMPLFPSSEF2U128 const pfn = g_aSseConvertXmmI32R64[iFn].pfnNative ? g_aSseConvertXmmI32R64[iFn].pfnNative : g_aSseConvertXmmI32R64[iFn].pfn;
7759
7760 PRTSTREAM pStrmOut = NULL;
7761 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseConvertXmmI32R64[iFn].pszName);
7762 if (RT_FAILURE(rc))
7763 {
7764 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseConvertXmmI32R64[iFn].pszName, rc);
7765 return RTEXITCODE_FAILURE;
7766 }
7767
7768 X86FXSTATE State;
7769 RT_ZERO(State);
7770 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
7771 {
7772 SSE_CONVERT_XMM_TEST_T TestData; RT_ZERO(TestData);
7773
7774 TestData.InVal.ai32[0] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
7775 TestData.InVal.ai32[1] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
7776 TestData.InVal.ai32[2] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
7777 TestData.InVal.ai32[3] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests];
7778
7779 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
7780 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
7781 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
7782 for (uint8_t iFz = 0; iFz < 2; iFz++)
7783 {
7784 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
7785 | (iRounding << X86_MXCSR_RC_SHIFT)
7786 | (iDaz ? X86_MXCSR_DAZ : 0)
7787 | (iFz ? X86_MXCSR_FZ : 0)
7788 | X86_MXCSR_XCPT_MASK;
7789 IEMSSERESULT ResM; RT_ZERO(ResM);
7790 pfn(&State, &ResM, &ResM.uResult, &TestData.InVal);
7791 TestData.fMxcsrIn = State.MXCSR;
7792 TestData.fMxcsrOut = ResM.MXCSR;
7793 TestData.OutVal = ResM.uResult;
7794 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7795
7796 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
7797 IEMSSERESULT ResU; RT_ZERO(ResU);
7798 pfn(&State, &ResU, &ResU.uResult, &TestData.InVal);
7799 TestData.fMxcsrIn = State.MXCSR;
7800 TestData.fMxcsrOut = ResU.MXCSR;
7801 TestData.OutVal = ResU.uResult;
7802 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7803
7804 uint16_t fXcpt = (ResM.MXCSR | ResU.MXCSR) & X86_MXCSR_XCPT_FLAGS;
7805 if (fXcpt)
7806 {
7807 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
7808 IEMSSERESULT Res1; RT_ZERO(Res1);
7809 pfn(&State, &Res1, &Res1.uResult, &TestData.InVal);
7810 TestData.fMxcsrIn = State.MXCSR;
7811 TestData.fMxcsrOut = Res1.MXCSR;
7812 TestData.OutVal = Res1.uResult;
7813 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7814
7815 if (((Res1.MXCSR & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (Res1.MXCSR & X86_MXCSR_XCPT_FLAGS))
7816 {
7817 fXcpt |= Res1.MXCSR & X86_MXCSR_XCPT_FLAGS;
7818 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
7819 IEMSSERESULT Res2; RT_ZERO(Res2);
7820 pfn(&State, &Res2, &Res2.uResult, &TestData.InVal);
7821 TestData.fMxcsrIn = State.MXCSR;
7822 TestData.fMxcsrOut = Res2.MXCSR;
7823 TestData.OutVal = Res2.uResult;
7824 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7825 }
7826 if (!RT_IS_POWER_OF_TWO(fXcpt))
7827 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
7828 if (fUnmasked & fXcpt)
7829 {
7830 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
7831 IEMSSERESULT Res3; RT_ZERO(Res3);
7832 pfn(&State, &Res3, &Res3.uResult, &TestData.InVal);
7833 TestData.fMxcsrIn = State.MXCSR;
7834 TestData.fMxcsrOut = Res3.MXCSR;
7835 TestData.OutVal = Res3.uResult;
7836 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7837 }
7838 }
7839 }
7840 }
7841 rc = RTStrmClose(pStrmOut);
7842 if (RT_FAILURE(rc))
7843 {
7844 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseConvertXmmI32R64[iFn].pszName, rc);
7845 return RTEXITCODE_FAILURE;
7846 }
7847 }
7848
7849 return RTEXITCODE_SUCCESS;
7850}
7851#endif
7852
7853static void SseConvertXmmI32R64Test(void)
7854{
7855 X86FXSTATE State;
7856 RT_ZERO(State);
7857
7858 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmI32R64); iFn++)
7859 {
7860 if (!SubTestAndCheckIfEnabled(g_aSseConvertXmmI32R64[iFn].pszName))
7861 continue;
7862
7863 uint32_t const cTests = *g_aSseConvertXmmI32R64[iFn].pcTests;
7864 SSE_CONVERT_XMM_TEST_T const * const paTests = g_aSseConvertXmmI32R64[iFn].paTests;
7865 PFNIEMAIMPLFPSSEF2U128 pfn = g_aSseConvertXmmI32R64[iFn].pfn;
7866 uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertXmmI32R64[iFn]);
7867 if (!cTests) RTTestSkipped(g_hTest, "no tests");
7868 for (uint32_t iVar = 0; iVar < cVars; iVar++)
7869 {
7870 for (uint32_t iTest = 0; iTest < cTests / sizeof(*paTests); iTest++)
7871 {
7872 IEMSSERESULT Res; RT_ZERO(Res);
7873
7874 State.MXCSR = paTests[iTest].fMxcsrIn;
7875 pfn(&State, &Res, &Res.uResult, &paTests[iTest].InVal);
7876 if ( Res.MXCSR != paTests[iTest].fMxcsrOut
7877 || !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[0], &paTests[iTest].OutVal.ar64[0])
7878 || !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[1], &paTests[iTest].OutVal.ar64[1]))
7879 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI32'%RI32'%RI32'%RI32 \n"
7880 "%s -> mxcsr=%#08x %s'%s\n"
7881 "%s expected %#08x %s'%s%s%s (%s)\n",
7882 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
7883 paTests[iTest].InVal.ai32[0], paTests[iTest].InVal.ai32[1],
7884 paTests[iTest].InVal.ai32[2], paTests[iTest].InVal.ai32[3],
7885 iVar ? " " : "", Res.MXCSR,
7886 FormatR64(&Res.uResult.ar64[0]), FormatR64(&Res.uResult.ar64[1]),
7887 iVar ? " " : "", paTests[iTest].fMxcsrOut,
7888 FormatR64(&paTests[iTest].OutVal.ar64[0]), FormatR64(&paTests[iTest].OutVal.ar64[1]),
7889 MxcsrDiff(Res.MXCSR, paTests[iTest].fMxcsrOut),
7890 ( !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[0], &paTests[iTest].OutVal.ar64[0])
7891 || !RTFLOAT64U_ARE_IDENTICAL(&Res.uResult.ar64[1], &paTests[iTest].OutVal.ar64[1]))
7892 ? " - val" : "",
7893 FormatMxcsr(paTests[iTest].fMxcsrIn));
7894 }
7895 }
7896 }
7897}
7898
7899
7900/*
7901 * Convert SSE operations converting signed double-words to double-precision floating point values.
7902 */
7903static const SSE_CONVERT_XMM_T g_aSseConvertXmmR64I32[] =
7904{
7905 ENTRY_BIN(cvtpd2dq_u128),
7906 ENTRY_BIN(cvttpd2dq_u128)
7907};
7908
7909#ifdef TSTIEMAIMPL_WITH_GENERATOR
7910static RTEXITCODE SseConvertXmmR64I32Generate(const char *pszDataFileFmt, uint32_t cTests)
7911{
7912 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
7913
7914 static struct { RTFLOAT64U aVal1[2]; } const s_aSpecials[] =
7915 {
7916 { { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) } },
7917 { { RTFLOAT64U_INIT_ZERO(1), RTFLOAT64U_INIT_ZERO(1) } },
7918 { { RTFLOAT64U_INIT_INF(0), RTFLOAT64U_INIT_INF(0) } },
7919 { { RTFLOAT64U_INIT_INF(1), RTFLOAT64U_INIT_INF(1) } }
7920 /** @todo More specials. */
7921 };
7922
7923 X86FXSTATE State;
7924 RT_ZERO(State);
7925 uint32_t cMinNormalPairs = (cTests - 144) / 4;
7926 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR64I32); iFn++)
7927 {
7928 PFNIEMAIMPLFPSSEF2U128 const pfn = g_aSseConvertXmmR64I32[iFn].pfnNative ? g_aSseConvertXmmR64I32[iFn].pfnNative : g_aSseConvertXmmR64I32[iFn].pfn;
7929
7930 PRTSTREAM pStrmOut = NULL;
7931 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseConvertXmmR64I32[iFn].pszName);
7932 if (RT_FAILURE(rc))
7933 {
7934 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseConvertXmmR64I32[iFn].pszName, rc);
7935 return RTEXITCODE_FAILURE;
7936 }
7937
7938 uint32_t cNormalInputPairs = 0;
7939 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
7940 {
7941 SSE_CONVERT_XMM_TEST_T TestData; RT_ZERO(TestData);
7942
7943 TestData.InVal.ar64[0] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
7944 TestData.InVal.ar64[1] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
7945
7946 if ( RTFLOAT64U_IS_NORMAL(&TestData.InVal.ar64[0])
7947 && RTFLOAT64U_IS_NORMAL(&TestData.InVal.ar64[1]))
7948 cNormalInputPairs++;
7949 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
7950 {
7951 iTest -= 1;
7952 continue;
7953 }
7954
7955 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
7956 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
7957 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
7958 for (uint8_t iFz = 0; iFz < 2; iFz++)
7959 {
7960 State.MXCSR = (fMxcsr & ~X86_MXCSR_RC_MASK)
7961 | (iRounding << X86_MXCSR_RC_SHIFT)
7962 | (iDaz ? X86_MXCSR_DAZ : 0)
7963 | (iFz ? X86_MXCSR_FZ : 0)
7964 | X86_MXCSR_XCPT_MASK;
7965 IEMSSERESULT ResM; RT_ZERO(ResM);
7966 pfn(&State, &ResM, &ResM.uResult, &TestData.InVal);
7967 TestData.fMxcsrIn = State.MXCSR;
7968 TestData.fMxcsrOut = ResM.MXCSR;
7969 TestData.OutVal = ResM.uResult;
7970 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7971
7972 State.MXCSR = State.MXCSR & ~X86_MXCSR_XCPT_MASK;
7973 IEMSSERESULT ResU; RT_ZERO(ResU);
7974 pfn(&State, &ResU, &ResU.uResult, &TestData.InVal);
7975 TestData.fMxcsrIn = State.MXCSR;
7976 TestData.fMxcsrOut = ResU.MXCSR;
7977 TestData.OutVal = ResU.uResult;
7978 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7979
7980 uint16_t fXcpt = (ResM.MXCSR | ResU.MXCSR) & X86_MXCSR_XCPT_FLAGS;
7981 if (fXcpt)
7982 {
7983 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | fXcpt;
7984 IEMSSERESULT Res1; RT_ZERO(Res1);
7985 pfn(&State, &Res1, &Res1.uResult, &TestData.InVal);
7986 TestData.fMxcsrIn = State.MXCSR;
7987 TestData.fMxcsrOut = Res1.MXCSR;
7988 TestData.OutVal = Res1.uResult;
7989 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
7990
7991 if (((Res1.MXCSR & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (Res1.MXCSR & X86_MXCSR_XCPT_FLAGS))
7992 {
7993 fXcpt |= Res1.MXCSR & X86_MXCSR_XCPT_FLAGS;
7994 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
7995 IEMSSERESULT Res2; RT_ZERO(Res2);
7996 pfn(&State, &Res2, &Res2.uResult, &TestData.InVal);
7997 TestData.fMxcsrIn = State.MXCSR;
7998 TestData.fMxcsrOut = Res2.MXCSR;
7999 TestData.OutVal = Res2.uResult;
8000 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8001 }
8002 if (!RT_IS_POWER_OF_TWO(fXcpt))
8003 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
8004 if (fUnmasked & fXcpt)
8005 {
8006 State.MXCSR = (State.MXCSR & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
8007 IEMSSERESULT Res3; RT_ZERO(Res3);
8008 pfn(&State, &Res3, &Res3.uResult, &TestData.InVal);
8009 TestData.fMxcsrIn = State.MXCSR;
8010 TestData.fMxcsrOut = Res3.MXCSR;
8011 TestData.OutVal = Res3.uResult;
8012 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8013 }
8014 }
8015 }
8016 }
8017 rc = RTStrmClose(pStrmOut);
8018 if (RT_FAILURE(rc))
8019 {
8020 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseConvertXmmR64I32[iFn].pszName, rc);
8021 return RTEXITCODE_FAILURE;
8022 }
8023 }
8024
8025 return RTEXITCODE_SUCCESS;
8026}
8027#endif
8028
8029static void SseConvertXmmR64I32Test(void)
8030{
8031 X86FXSTATE State;
8032 RT_ZERO(State);
8033
8034 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR64I32); iFn++)
8035 {
8036 if (!SubTestAndCheckIfEnabled(g_aSseConvertXmmR64I32[iFn].pszName))
8037 continue;
8038
8039 uint32_t const cTests = *g_aSseConvertXmmR64I32[iFn].pcTests;
8040 SSE_CONVERT_XMM_TEST_T const * const paTests = g_aSseConvertXmmR64I32[iFn].paTests;
8041 PFNIEMAIMPLFPSSEF2U128 pfn = g_aSseConvertXmmR64I32[iFn].pfn;
8042 uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertXmmR64I32[iFn]);
8043 if (!cTests) RTTestSkipped(g_hTest, "no tests");
8044 for (uint32_t iVar = 0; iVar < cVars; iVar++)
8045 {
8046 for (uint32_t iTest = 0; iTest < cTests / sizeof(*paTests); iTest++)
8047 {
8048 IEMSSERESULT Res; RT_ZERO(Res);
8049
8050 State.MXCSR = paTests[iTest].fMxcsrIn;
8051 pfn(&State, &Res, &Res.uResult, &paTests[iTest].InVal);
8052 if ( Res.MXCSR != paTests[iTest].fMxcsrOut
8053 || Res.uResult.ai32[0] != paTests[iTest].OutVal.ai32[0]
8054 || Res.uResult.ai32[1] != paTests[iTest].OutVal.ai32[1]
8055 || Res.uResult.ai32[2] != paTests[iTest].OutVal.ai32[2]
8056 || Res.uResult.ai32[3] != paTests[iTest].OutVal.ai32[3])
8057 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s \n"
8058 "%s -> mxcsr=%#08x %RI32'%RI32'%RI32'%RI32\n"
8059 "%s expected %#08x %RI32'%RI32'%RI32'%RI32%s%s (%s)\n",
8060 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
8061 FormatR64(&paTests[iTest].InVal.ar64[0]), FormatR64(&paTests[iTest].InVal.ar64[1]),
8062 iVar ? " " : "", Res.MXCSR,
8063 Res.uResult.ai32[0], Res.uResult.ai32[1],
8064 Res.uResult.ai32[2], Res.uResult.ai32[3],
8065 iVar ? " " : "", paTests[iTest].fMxcsrOut,
8066 paTests[iTest].OutVal.ai32[0], paTests[iTest].OutVal.ai32[1],
8067 paTests[iTest].OutVal.ai32[2], paTests[iTest].OutVal.ai32[3],
8068 MxcsrDiff(Res.MXCSR, paTests[iTest].fMxcsrOut),
8069 ( Res.uResult.ai32[0] != paTests[iTest].OutVal.ai32[0]
8070 || Res.uResult.ai32[1] != paTests[iTest].OutVal.ai32[1]
8071 || Res.uResult.ai32[2] != paTests[iTest].OutVal.ai32[2]
8072 || Res.uResult.ai32[3] != paTests[iTest].OutVal.ai32[3])
8073 ? " - val" : "",
8074 FormatMxcsr(paTests[iTest].fMxcsrIn));
8075 }
8076 }
8077 }
8078}
8079
8080
8081/*
8082 * Convert SSE operations converting double-precision floating point values to signed double-word values.
8083 */
8084TYPEDEF_SUBTEST_TYPE(SSE_CONVERT_MM_XMM_T, SSE_CONVERT_MM_XMM_TEST_T, PFNIEMAIMPLMXCSRU64U128);
8085
8086static const SSE_CONVERT_MM_XMM_T g_aSseConvertMmXmm[] =
8087{
8088 ENTRY_BIN(cvtpd2pi_u128),
8089 ENTRY_BIN(cvttpd2pi_u128)
8090};
8091
8092#ifdef TSTIEMAIMPL_WITH_GENERATOR
8093static RTEXITCODE SseConvertMmXmmGenerate(const char *pszDataFileFmt, uint32_t cTests)
8094{
8095 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
8096
8097 static struct { RTFLOAT64U aVal1[2]; } const s_aSpecials[] =
8098 {
8099 { { RTFLOAT64U_INIT_ZERO(0), RTFLOAT64U_INIT_ZERO(0) } },
8100 { { RTFLOAT64U_INIT_ZERO(1), RTFLOAT64U_INIT_ZERO(1) } },
8101 { { RTFLOAT64U_INIT_INF(0), RTFLOAT64U_INIT_INF(0) } },
8102 { { RTFLOAT64U_INIT_INF(1), RTFLOAT64U_INIT_INF(1) } }
8103 /** @todo More specials. */
8104 };
8105
8106 uint32_t cMinNormalPairs = (cTests - 144) / 4;
8107 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertMmXmm); iFn++)
8108 {
8109 PFNIEMAIMPLMXCSRU64U128 const pfn = g_aSseConvertMmXmm[iFn].pfnNative ? g_aSseConvertMmXmm[iFn].pfnNative : g_aSseConvertMmXmm[iFn].pfn;
8110
8111 PRTSTREAM pStrmOut = NULL;
8112 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseConvertMmXmm[iFn].pszName);
8113 if (RT_FAILURE(rc))
8114 {
8115 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseConvertMmXmm[iFn].pszName, rc);
8116 return RTEXITCODE_FAILURE;
8117 }
8118
8119 uint32_t cNormalInputPairs = 0;
8120 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
8121 {
8122 SSE_CONVERT_MM_XMM_TEST_T TestData; RT_ZERO(TestData);
8123
8124 TestData.InVal.ar64[0] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
8125 TestData.InVal.ar64[1] = iTest < cTests ? RandR64Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
8126
8127 if ( RTFLOAT64U_IS_NORMAL(&TestData.InVal.ar64[0])
8128 && RTFLOAT64U_IS_NORMAL(&TestData.InVal.ar64[1]))
8129 cNormalInputPairs++;
8130 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
8131 {
8132 iTest -= 1;
8133 continue;
8134 }
8135
8136 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
8137 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
8138 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
8139 for (uint8_t iFz = 0; iFz < 2; iFz++)
8140 {
8141 uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
8142 | (iRounding << X86_MXCSR_RC_SHIFT)
8143 | (iDaz ? X86_MXCSR_DAZ : 0)
8144 | (iFz ? X86_MXCSR_FZ : 0)
8145 | X86_MXCSR_XCPT_MASK;
8146 uint32_t fMxcsrM = fMxcsrIn;
8147 uint64_t u64ResM;
8148 pfn(&fMxcsrM, &u64ResM, &TestData.InVal);
8149 TestData.fMxcsrIn = fMxcsrIn;
8150 TestData.fMxcsrOut = fMxcsrM;
8151 TestData.OutVal.u = u64ResM;
8152 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8153
8154 fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
8155 uint32_t fMxcsrU = fMxcsrIn;
8156 uint64_t u64ResU;
8157 pfn(&fMxcsrU, &u64ResU, &TestData.InVal);
8158 TestData.fMxcsrIn = fMxcsrIn;
8159 TestData.fMxcsrOut = fMxcsrU;
8160 TestData.OutVal.u = u64ResU;
8161 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8162
8163 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
8164 if (fXcpt)
8165 {
8166 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
8167 uint32_t fMxcsr1 = fMxcsrIn;
8168 uint64_t u64Res1;
8169 pfn(&fMxcsr1, &u64Res1, &TestData.InVal);
8170 TestData.fMxcsrIn = fMxcsrIn;
8171 TestData.fMxcsrOut = fMxcsr1;
8172 TestData.OutVal.u = u64Res1;
8173 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8174
8175 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
8176 {
8177 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
8178 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
8179 uint32_t fMxcsr2 = fMxcsrIn;
8180 uint64_t u64Res2;
8181 pfn(&fMxcsr2, &u64Res2, &TestData.InVal);
8182 TestData.fMxcsrIn = fMxcsrIn;
8183 TestData.fMxcsrOut = fMxcsr2;
8184 TestData.OutVal.u = u64Res2;
8185 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8186 }
8187 if (!RT_IS_POWER_OF_TWO(fXcpt))
8188 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
8189 if (fUnmasked & fXcpt)
8190 {
8191 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
8192 uint32_t fMxcsr3 = fMxcsrIn;
8193 uint64_t u64Res3;
8194 pfn(&fMxcsr3, &u64Res3, &TestData.InVal);
8195 TestData.fMxcsrIn = fMxcsrIn;
8196 TestData.fMxcsrOut = fMxcsr3;
8197 TestData.OutVal.u = u64Res3;
8198 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8199 }
8200 }
8201 }
8202 }
8203 rc = RTStrmClose(pStrmOut);
8204 if (RT_FAILURE(rc))
8205 {
8206 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseConvertMmXmm[iFn].pszName, rc);
8207 return RTEXITCODE_FAILURE;
8208 }
8209 }
8210
8211 return RTEXITCODE_SUCCESS;
8212}
8213#endif
8214
8215static void SseConvertMmXmmTest(void)
8216{
8217 X86FXSTATE State;
8218 RT_ZERO(State);
8219
8220 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertMmXmm); iFn++)
8221 {
8222 if (!SubTestAndCheckIfEnabled(g_aSseConvertMmXmm[iFn].pszName))
8223 continue;
8224
8225 uint32_t const cTests = *g_aSseConvertMmXmm[iFn].pcTests;
8226 SSE_CONVERT_MM_XMM_TEST_T const * const paTests = g_aSseConvertMmXmm[iFn].paTests;
8227 PFNIEMAIMPLMXCSRU64U128 pfn = g_aSseConvertMmXmm[iFn].pfn;
8228 uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertMmXmm[iFn]);
8229 if (!cTests) RTTestSkipped(g_hTest, "no tests");
8230 for (uint32_t iVar = 0; iVar < cVars; iVar++)
8231 {
8232 for (uint32_t iTest = 0; iTest < cTests / sizeof(*paTests); iTest++)
8233 {
8234 RTUINT64U ValOut;
8235 uint32_t fMxcsr = paTests[iTest].fMxcsrIn;
8236 pfn(&fMxcsr, &ValOut.u, &paTests[iTest].InVal);
8237 if ( fMxcsr != paTests[iTest].fMxcsrOut
8238 || ValOut.ai32[0] != paTests[iTest].OutVal.ai32[0]
8239 || ValOut.ai32[1] != paTests[iTest].OutVal.ai32[1])
8240 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s\n"
8241 "%s -> mxcsr=%#08x %RI32'%RI32\n"
8242 "%s expected %#08x %RI32'%RI32%s%s (%s)\n",
8243 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
8244 FormatR64(&paTests[iTest].InVal.ar64[0]), FormatR64(&paTests[iTest].InVal.ar64[1]),
8245 iVar ? " " : "", fMxcsr, ValOut.ai32[0], ValOut.ai32[1],
8246 iVar ? " " : "", paTests[iTest].fMxcsrOut,
8247 paTests[iTest].OutVal.ai32[0], paTests[iTest].OutVal.ai32[1],
8248 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
8249 ( ValOut.ai32[0] != paTests[iTest].OutVal.ai32[0]
8250 || ValOut.ai32[1] != paTests[iTest].OutVal.ai32[1])
8251 ? " - val" : "",
8252 FormatMxcsr(paTests[iTest].fMxcsrIn));
8253 }
8254 }
8255 }
8256}
8257
8258
8259/*
8260 * Convert SSE operations converting signed double-word values to double precision floating-point values (probably only cvtpi2pd).
8261 */
8262TYPEDEF_SUBTEST_TYPE(SSE_CONVERT_XMM_R64_MM_T, SSE_CONVERT_XMM_MM_TEST_T, PFNIEMAIMPLMXCSRU128U64);
8263
8264static const SSE_CONVERT_XMM_R64_MM_T g_aSseConvertXmmR64Mm[] =
8265{
8266 ENTRY_BIN(cvtpi2pd_u128)
8267};
8268
8269#ifdef TSTIEMAIMPL_WITH_GENERATOR
8270static RTEXITCODE SseConvertXmmR64MmGenerate(const char *pszDataFileFmt, uint32_t cTests)
8271{
8272 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
8273
8274 static struct { int32_t aVal[2]; } const s_aSpecials[] =
8275 {
8276 { { INT32_MIN, INT32_MIN } },
8277 { { INT32_MAX, INT32_MAX } }
8278 /** @todo More specials. */
8279 };
8280
8281 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR64Mm); iFn++)
8282 {
8283 PFNIEMAIMPLMXCSRU128U64 const pfn = g_aSseConvertXmmR64Mm[iFn].pfnNative ? g_aSseConvertXmmR64Mm[iFn].pfnNative : g_aSseConvertXmmR64Mm[iFn].pfn;
8284
8285 PRTSTREAM pStrmOut = NULL;
8286 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseConvertXmmR64Mm[iFn].pszName);
8287 if (RT_FAILURE(rc))
8288 {
8289 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseConvertXmmR64Mm[iFn].pszName, rc);
8290 return RTEXITCODE_FAILURE;
8291 }
8292
8293 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
8294 {
8295 SSE_CONVERT_XMM_MM_TEST_T TestData; RT_ZERO(TestData);
8296
8297 TestData.InVal.ai32[0] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests].aVal[0];
8298 TestData.InVal.ai32[1] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests].aVal[1];
8299
8300 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
8301 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
8302 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
8303 for (uint8_t iFz = 0; iFz < 2; iFz++)
8304 {
8305 uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
8306 | (iRounding << X86_MXCSR_RC_SHIFT)
8307 | (iDaz ? X86_MXCSR_DAZ : 0)
8308 | (iFz ? X86_MXCSR_FZ : 0)
8309 | X86_MXCSR_XCPT_MASK;
8310 uint32_t fMxcsrM = fMxcsrIn;
8311 pfn(&fMxcsrM, &TestData.OutVal, TestData.InVal.u);
8312 TestData.fMxcsrIn = fMxcsrIn;
8313 TestData.fMxcsrOut = fMxcsrM;
8314 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8315
8316 fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
8317 uint32_t fMxcsrU = fMxcsrIn;
8318 pfn(&fMxcsrU, &TestData.OutVal, TestData.InVal.u);
8319 TestData.fMxcsrIn = fMxcsrIn;
8320 TestData.fMxcsrOut = fMxcsrU;
8321 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8322
8323 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
8324 if (fXcpt)
8325 {
8326 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
8327 uint32_t fMxcsr1 = fMxcsrIn;
8328 pfn(&fMxcsr1, &TestData.OutVal, TestData.InVal.u);
8329 TestData.fMxcsrIn = fMxcsrIn;
8330 TestData.fMxcsrOut = fMxcsr1;
8331 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8332
8333 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
8334 {
8335 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
8336 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
8337 uint32_t fMxcsr2 = fMxcsrIn;
8338 pfn(&fMxcsr2, &TestData.OutVal, TestData.InVal.u);
8339 TestData.fMxcsrIn = fMxcsrIn;
8340 TestData.fMxcsrOut = fMxcsr2;
8341 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8342 }
8343 if (!RT_IS_POWER_OF_TWO(fXcpt))
8344 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
8345 if (fUnmasked & fXcpt)
8346 {
8347 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
8348 uint32_t fMxcsr3 = fMxcsrIn;
8349 pfn(&fMxcsr3, &TestData.OutVal, TestData.InVal.u);
8350 TestData.fMxcsrIn = fMxcsrIn;
8351 TestData.fMxcsrOut = fMxcsr3;
8352 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8353 }
8354 }
8355 }
8356 }
8357 rc = RTStrmClose(pStrmOut);
8358 if (RT_FAILURE(rc))
8359 {
8360 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseConvertXmmR64Mm[iFn].pszName, rc);
8361 return RTEXITCODE_FAILURE;
8362 }
8363 }
8364
8365 return RTEXITCODE_SUCCESS;
8366}
8367#endif
8368
8369static void SseConvertXmmR64MmTest(void)
8370{
8371 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR64Mm); iFn++)
8372 {
8373 if (!SubTestAndCheckIfEnabled(g_aSseConvertXmmR64Mm[iFn].pszName))
8374 continue;
8375
8376 uint32_t const cTests = *g_aSseConvertXmmR64Mm[iFn].pcTests;
8377 SSE_CONVERT_XMM_MM_TEST_T const * const paTests = g_aSseConvertXmmR64Mm[iFn].paTests;
8378 PFNIEMAIMPLMXCSRU128U64 pfn = g_aSseConvertXmmR64Mm[iFn].pfn;
8379 uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertXmmR64Mm[iFn]);
8380 if (!cTests) RTTestSkipped(g_hTest, "no tests");
8381 for (uint32_t iVar = 0; iVar < cVars; iVar++)
8382 {
8383 for (uint32_t iTest = 0; iTest < cTests / sizeof(*paTests); iTest++)
8384 {
8385 X86XMMREG ValOut;
8386 uint32_t fMxcsr = paTests[iTest].fMxcsrIn;
8387 pfn(&fMxcsr, &ValOut, paTests[iTest].InVal.u);
8388 if ( fMxcsr != paTests[iTest].fMxcsrOut
8389 || !RTFLOAT64U_ARE_IDENTICAL(&ValOut.ar64[0], &paTests[iTest].OutVal.ar64[0])
8390 || !RTFLOAT64U_ARE_IDENTICAL(&ValOut.ar64[1], &paTests[iTest].OutVal.ar64[1]))
8391 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI32'%RI32\n"
8392 "%s -> mxcsr=%#08x %s'%s\n"
8393 "%s expected %#08x %s'%s%s%s (%s)\n",
8394 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
8395 paTests[iTest].InVal.ai32[0], paTests[iTest].InVal.ai32[1],
8396 iVar ? " " : "", fMxcsr,
8397 FormatR64(&ValOut.ar64[0]), FormatR64(&ValOut.ar64[1]),
8398 iVar ? " " : "", paTests[iTest].fMxcsrOut,
8399 FormatR64(&paTests[iTest].OutVal.ar64[0]), FormatR64(&paTests[iTest].OutVal.ar64[1]),
8400 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
8401 ( !RTFLOAT64U_ARE_IDENTICAL(&ValOut.ar64[0], &paTests[iTest].OutVal.ar64[0])
8402 || !RTFLOAT64U_ARE_IDENTICAL(&ValOut.ar64[1], &paTests[iTest].OutVal.ar64[1]))
8403 ? " - val" : "",
8404 FormatMxcsr(paTests[iTest].fMxcsrIn));
8405 }
8406 }
8407 }
8408}
8409
8410
8411/*
8412 * Convert SSE operations converting signed double-word values to double precision floating-point values (probably only cvtpi2pd).
8413 */
8414TYPEDEF_SUBTEST_TYPE(SSE_CONVERT_XMM_R32_MM_T, SSE_CONVERT_XMM_MM_TEST_T, PFNIEMAIMPLMXCSRU128U64);
8415
8416static const SSE_CONVERT_XMM_R32_MM_T g_aSseConvertXmmR32Mm[] =
8417{
8418 ENTRY_BIN(cvtpi2ps_u128)
8419};
8420
8421#ifdef TSTIEMAIMPL_WITH_GENERATOR
8422static RTEXITCODE SseConvertXmmR32MmGenerate(const char *pszDataFileFmt, uint32_t cTests)
8423{
8424 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
8425
8426 static struct { int32_t aVal[2]; } const s_aSpecials[] =
8427 {
8428 { { INT32_MIN, INT32_MIN } },
8429 { { INT32_MAX, INT32_MAX } }
8430 /** @todo More specials. */
8431 };
8432
8433 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR32Mm); iFn++)
8434 {
8435 PFNIEMAIMPLMXCSRU128U64 const pfn = g_aSseConvertXmmR32Mm[iFn].pfnNative ? g_aSseConvertXmmR32Mm[iFn].pfnNative : g_aSseConvertXmmR32Mm[iFn].pfn;
8436
8437 PRTSTREAM pStrmOut = NULL;
8438 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseConvertXmmR32Mm[iFn].pszName);
8439 if (RT_FAILURE(rc))
8440 {
8441 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseConvertXmmR32Mm[iFn].pszName, rc);
8442 return RTEXITCODE_FAILURE;
8443 }
8444
8445 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
8446 {
8447 SSE_CONVERT_XMM_MM_TEST_T TestData; RT_ZERO(TestData);
8448
8449 TestData.InVal.ai32[0] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests].aVal[0];
8450 TestData.InVal.ai32[1] = iTest < cTests ? RandI32Src2(iTest) : s_aSpecials[iTest - cTests].aVal[1];
8451
8452 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
8453 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
8454 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
8455 for (uint8_t iFz = 0; iFz < 2; iFz++)
8456 {
8457 uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
8458 | (iRounding << X86_MXCSR_RC_SHIFT)
8459 | (iDaz ? X86_MXCSR_DAZ : 0)
8460 | (iFz ? X86_MXCSR_FZ : 0)
8461 | X86_MXCSR_XCPT_MASK;
8462 uint32_t fMxcsrM = fMxcsrIn;
8463 pfn(&fMxcsrM, &TestData.OutVal, TestData.InVal.u);
8464 TestData.fMxcsrIn = fMxcsrIn;
8465 TestData.fMxcsrOut = fMxcsrM;
8466 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8467
8468 fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
8469 uint32_t fMxcsrU = fMxcsrIn;
8470 pfn(&fMxcsrU, &TestData.OutVal, TestData.InVal.u);
8471 TestData.fMxcsrIn = fMxcsrIn;
8472 TestData.fMxcsrOut = fMxcsrU;
8473 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8474
8475 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
8476 if (fXcpt)
8477 {
8478 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
8479 uint32_t fMxcsr1 = fMxcsrIn;
8480 pfn(&fMxcsr1, &TestData.OutVal, TestData.InVal.u);
8481 TestData.fMxcsrIn = fMxcsrIn;
8482 TestData.fMxcsrOut = fMxcsr1;
8483 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8484
8485 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
8486 {
8487 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
8488 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
8489 uint32_t fMxcsr2 = fMxcsrIn;
8490 pfn(&fMxcsr2, &TestData.OutVal, TestData.InVal.u);
8491 TestData.fMxcsrIn = fMxcsrIn;
8492 TestData.fMxcsrOut = fMxcsr2;
8493 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8494 }
8495 if (!RT_IS_POWER_OF_TWO(fXcpt))
8496 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
8497 if (fUnmasked & fXcpt)
8498 {
8499 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
8500 uint32_t fMxcsr3 = fMxcsrIn;
8501 pfn(&fMxcsr3, &TestData.OutVal, TestData.InVal.u);
8502 TestData.fMxcsrIn = fMxcsrIn;
8503 TestData.fMxcsrOut = fMxcsr3;
8504 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8505 }
8506 }
8507 }
8508 }
8509 rc = RTStrmClose(pStrmOut);
8510 if (RT_FAILURE(rc))
8511 {
8512 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseConvertXmmR32Mm[iFn].pszName, rc);
8513 return RTEXITCODE_FAILURE;
8514 }
8515 }
8516
8517 return RTEXITCODE_SUCCESS;
8518}
8519#endif
8520
8521static void SseConvertXmmR32MmTest(void)
8522{
8523 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertXmmR32Mm); iFn++)
8524 {
8525 if (!SubTestAndCheckIfEnabled(g_aSseConvertXmmR32Mm[iFn].pszName))
8526 continue;
8527
8528 uint32_t const cTests = *g_aSseConvertXmmR32Mm[iFn].pcTests;
8529 SSE_CONVERT_XMM_MM_TEST_T const * const paTests = g_aSseConvertXmmR32Mm[iFn].paTests;
8530 PFNIEMAIMPLMXCSRU128U64 pfn = g_aSseConvertXmmR32Mm[iFn].pfn;
8531 uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertXmmR32Mm[iFn]);
8532 if (!cTests) RTTestSkipped(g_hTest, "no tests");
8533 for (uint32_t iVar = 0; iVar < cVars; iVar++)
8534 {
8535 for (uint32_t iTest = 0; iTest < cTests / sizeof(*paTests); iTest++)
8536 {
8537 X86XMMREG ValOut;
8538 uint32_t fMxcsr = paTests[iTest].fMxcsrIn;
8539 pfn(&fMxcsr, &ValOut, paTests[iTest].InVal.u);
8540 if ( fMxcsr != paTests[iTest].fMxcsrOut
8541 || !RTFLOAT32U_ARE_IDENTICAL(&ValOut.ar32[0], &paTests[iTest].OutVal.ar32[0])
8542 || !RTFLOAT32U_ARE_IDENTICAL(&ValOut.ar32[1], &paTests[iTest].OutVal.ar32[1]))
8543 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%RI32'%RI32\n"
8544 "%s -> mxcsr=%#08x %s'%s\n"
8545 "%s expected %#08x %s'%s%s%s (%s)\n",
8546 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
8547 paTests[iTest].InVal.ai32[0], paTests[iTest].InVal.ai32[1],
8548 iVar ? " " : "", fMxcsr,
8549 FormatR32(&ValOut.ar32[0]), FormatR32(&ValOut.ar32[1]),
8550 iVar ? " " : "", paTests[iTest].fMxcsrOut,
8551 FormatR32(&paTests[iTest].OutVal.ar32[0]), FormatR32(&paTests[iTest].OutVal.ar32[1]),
8552 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
8553 ( !RTFLOAT32U_ARE_IDENTICAL(&ValOut.ar32[0], &paTests[iTest].OutVal.ar32[0])
8554 || !RTFLOAT32U_ARE_IDENTICAL(&ValOut.ar32[1], &paTests[iTest].OutVal.ar32[1]))
8555 ? " - val" : "",
8556 FormatMxcsr(paTests[iTest].fMxcsrIn));
8557 }
8558 }
8559 }
8560}
8561
8562
8563/*
8564 * Convert SSE operations converting single-precision floating point values to signed double-word values.
8565 */
8566TYPEDEF_SUBTEST_TYPE(SSE_CONVERT_MM_I32_XMM_R32_T, SSE_CONVERT_MM_R32_TEST_T, PFNIEMAIMPLMXCSRU64U64);
8567
8568static const SSE_CONVERT_MM_I32_XMM_R32_T g_aSseConvertMmI32XmmR32[] =
8569{
8570 ENTRY_BIN(cvtps2pi_u128),
8571 ENTRY_BIN(cvttps2pi_u128)
8572};
8573
8574#ifdef TSTIEMAIMPL_WITH_GENERATOR
8575static RTEXITCODE SseConvertMmI32XmmR32Generate(const char *pszDataFileFmt, uint32_t cTests)
8576{
8577 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
8578
8579 static struct { RTFLOAT32U aVal1[2]; } const s_aSpecials[] =
8580 {
8581 { { RTFLOAT32U_INIT_ZERO(0), RTFLOAT32U_INIT_ZERO(0) } },
8582 { { RTFLOAT32U_INIT_ZERO(1), RTFLOAT32U_INIT_ZERO(1) } },
8583 { { RTFLOAT32U_INIT_INF(0), RTFLOAT32U_INIT_INF(0) } },
8584 { { RTFLOAT32U_INIT_INF(1), RTFLOAT32U_INIT_INF(1) } }
8585 /** @todo More specials. */
8586 };
8587
8588 uint32_t cMinNormalPairs = (cTests - 144) / 4;
8589 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertMmI32XmmR32); iFn++)
8590 {
8591 PFNIEMAIMPLMXCSRU64U64 const pfn = g_aSseConvertMmI32XmmR32[iFn].pfnNative ? g_aSseConvertMmI32XmmR32[iFn].pfnNative : g_aSseConvertMmI32XmmR32[iFn].pfn;
8592
8593 PRTSTREAM pStrmOut = NULL;
8594 int rc = RTStrmOpenF("wb", &pStrmOut, pszDataFileFmt, g_aSseConvertMmI32XmmR32[iFn].pszName);
8595 if (RT_FAILURE(rc))
8596 {
8597 RTMsgError("Failed to open data file for %s for writing: %Rrc", g_aSseConvertMmI32XmmR32[iFn].pszName, rc);
8598 return RTEXITCODE_FAILURE;
8599 }
8600
8601 uint32_t cNormalInputPairs = 0;
8602 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
8603 {
8604 SSE_CONVERT_MM_R32_TEST_T TestData; RT_ZERO(TestData);
8605
8606 TestData.ar32InVal[0] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[0];
8607 TestData.ar32InVal[1] = iTest < cTests ? RandR32Src(iTest) : s_aSpecials[iTest - cTests].aVal1[1];
8608
8609 if ( RTFLOAT32U_IS_NORMAL(&TestData.ar32InVal[0])
8610 && RTFLOAT32U_IS_NORMAL(&TestData.ar32InVal[1]))
8611 cNormalInputPairs++;
8612 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
8613 {
8614 iTest -= 1;
8615 continue;
8616 }
8617
8618 RTFLOAT64U TestVal;
8619 TestVal.au32[0] = TestData.ar32InVal[0].u;
8620 TestVal.au32[1] = TestData.ar32InVal[1].u;
8621
8622 uint32_t const fMxcsr = RandMxcsr() & X86_MXCSR_XCPT_FLAGS;
8623 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
8624 for (uint8_t iDaz = 0; iDaz < 2; iDaz++)
8625 for (uint8_t iFz = 0; iFz < 2; iFz++)
8626 {
8627 uint32_t fMxcsrIn = (fMxcsr & ~X86_MXCSR_RC_MASK)
8628 | (iRounding << X86_MXCSR_RC_SHIFT)
8629 | (iDaz ? X86_MXCSR_DAZ : 0)
8630 | (iFz ? X86_MXCSR_FZ : 0)
8631 | X86_MXCSR_XCPT_MASK;
8632 uint32_t fMxcsrM = fMxcsrIn;
8633 uint64_t u64ResM;
8634 pfn(&fMxcsrM, &u64ResM, TestVal.u);
8635 TestData.fMxcsrIn = fMxcsrIn;
8636 TestData.fMxcsrOut = fMxcsrM;
8637 TestData.OutVal.u = u64ResM;
8638 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8639
8640 fMxcsrIn &= ~X86_MXCSR_XCPT_MASK;
8641 uint32_t fMxcsrU = fMxcsrIn;
8642 uint64_t u64ResU;
8643 pfn(&fMxcsrU, &u64ResU, TestVal.u);
8644 TestData.fMxcsrIn = fMxcsrIn;
8645 TestData.fMxcsrOut = fMxcsrU;
8646 TestData.OutVal.u = u64ResU;
8647 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8648
8649 uint16_t fXcpt = (fMxcsrM | fMxcsrU) & X86_MXCSR_XCPT_FLAGS;
8650 if (fXcpt)
8651 {
8652 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | fXcpt;
8653 uint32_t fMxcsr1 = fMxcsrIn;
8654 uint64_t u64Res1;
8655 pfn(&fMxcsr1, &u64Res1, TestVal.u);
8656 TestData.fMxcsrIn = fMxcsrIn;
8657 TestData.fMxcsrOut = fMxcsr1;
8658 TestData.OutVal.u = u64Res1;
8659 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8660
8661 if (((fMxcsr1 & X86_MXCSR_XCPT_FLAGS) & fXcpt) != (fMxcsr1 & X86_MXCSR_XCPT_FLAGS))
8662 {
8663 fXcpt |= fMxcsr1 & X86_MXCSR_XCPT_FLAGS;
8664 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | (fXcpt << X86_MXCSR_XCPT_MASK_SHIFT);
8665 uint32_t fMxcsr2 = fMxcsrIn;
8666 uint64_t u64Res2;
8667 pfn(&fMxcsr2, &u64Res2, TestVal.u);
8668 TestData.fMxcsrIn = fMxcsrIn;
8669 TestData.fMxcsrOut = fMxcsr2;
8670 TestData.OutVal.u = u64Res2;
8671 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8672 }
8673 if (!RT_IS_POWER_OF_TWO(fXcpt))
8674 for (uint16_t fUnmasked = 1; fUnmasked <= X86_MXCSR_PE; fUnmasked <<= 1)
8675 if (fUnmasked & fXcpt)
8676 {
8677 fMxcsrIn = (fMxcsrIn & ~X86_MXCSR_XCPT_MASK) | ((fXcpt & ~fUnmasked) << X86_MXCSR_XCPT_MASK_SHIFT);
8678 uint32_t fMxcsr3 = fMxcsrIn;
8679 uint64_t u64Res3;
8680 pfn(&fMxcsr3, &u64Res3, TestVal.u);
8681 TestData.fMxcsrIn = fMxcsrIn;
8682 TestData.fMxcsrOut = fMxcsr3;
8683 TestData.OutVal.u = u64Res3;
8684 RTStrmWrite(pStrmOut, &TestData, sizeof(TestData));
8685 }
8686 }
8687 }
8688 }
8689 rc = RTStrmClose(pStrmOut);
8690 if (RT_FAILURE(rc))
8691 {
8692 RTMsgError("Failed to close data file for %s: %Rrc", g_aSseConvertMmI32XmmR32[iFn].pszName, rc);
8693 return RTEXITCODE_FAILURE;
8694 }
8695 }
8696
8697 return RTEXITCODE_SUCCESS;
8698}
8699#endif
8700
8701static void SseConvertMmI32XmmR32Test(void)
8702{
8703 X86FXSTATE State;
8704 RT_ZERO(State);
8705
8706 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aSseConvertMmI32XmmR32); iFn++)
8707 {
8708 if (!SubTestAndCheckIfEnabled(g_aSseConvertMmI32XmmR32[iFn].pszName))
8709 continue;
8710
8711 uint32_t const cTests = *g_aSseConvertMmI32XmmR32[iFn].pcTests;
8712 SSE_CONVERT_MM_R32_TEST_T const * const paTests = g_aSseConvertMmI32XmmR32[iFn].paTests;
8713 PFNIEMAIMPLMXCSRU64U64 pfn = g_aSseConvertMmI32XmmR32[iFn].pfn;
8714 uint32_t const cVars = COUNT_VARIATIONS(g_aSseConvertMmI32XmmR32[iFn]);
8715 if (!cTests) RTTestSkipped(g_hTest, "no tests");
8716 for (uint32_t iVar = 0; iVar < cVars; iVar++)
8717 {
8718 for (uint32_t iTest = 0; iTest < cTests / sizeof(*paTests); iTest++)
8719 {
8720 RTUINT64U ValOut;
8721 RTUINT64U ValIn;
8722
8723 ValIn.au32[0] = paTests[iTest].ar32InVal[0].u;
8724 ValIn.au32[1] = paTests[iTest].ar32InVal[1].u;
8725
8726 uint32_t fMxcsr = paTests[iTest].fMxcsrIn;
8727 pfn(&fMxcsr, &ValOut.u, ValIn.u);
8728 if ( fMxcsr != paTests[iTest].fMxcsrOut
8729 || ValOut.ai32[0] != paTests[iTest].OutVal.ai32[0]
8730 || ValOut.ai32[1] != paTests[iTest].OutVal.ai32[1])
8731 RTTestFailed(g_hTest, "#%04u%s: mxcsr=%#08x in1=%s'%s \n"
8732 "%s -> mxcsr=%#08x %RI32'%RI32\n"
8733 "%s expected %#08x %RI32'%RI32%s%s (%s)\n",
8734 iTest, iVar ? "/n" : "", paTests[iTest].fMxcsrIn,
8735 FormatR32(&paTests[iTest].ar32InVal[0]), FormatR32(&paTests[iTest].ar32InVal[1]),
8736 iVar ? " " : "", fMxcsr,
8737 ValOut.ai32[0], ValOut.ai32[1],
8738 iVar ? " " : "", paTests[iTest].fMxcsrOut,
8739 paTests[iTest].OutVal.ai32[0], paTests[iTest].OutVal.ai32[1],
8740 MxcsrDiff(fMxcsr, paTests[iTest].fMxcsrOut),
8741 ( ValOut.ai32[0] != paTests[iTest].OutVal.ai32[0]
8742 || ValOut.ai32[1] != paTests[iTest].OutVal.ai32[1])
8743 ? " - val" : "",
8744 FormatMxcsr(paTests[iTest].fMxcsrIn));
8745 }
8746 }
8747 }
8748}
8749
8750
8751
8752int main(int argc, char **argv)
8753{
8754 int rc = RTR3InitExe(argc, &argv, 0);
8755 if (RT_FAILURE(rc))
8756 return RTMsgInitFailure(rc);
8757
8758 /*
8759 * Determin the host CPU.
8760 * If not using the IEMAllAImpl.asm code, this will be set to Intel.
8761 */
8762#if (defined(RT_ARCH_X86) || defined(RT_ARCH_AMD64)) && !defined(IEM_WITHOUT_ASSEMBLY)
8763 g_idxCpuEflFlavour = ASMIsAmdCpu() || ASMIsHygonCpu()
8764 ? IEMTARGETCPU_EFL_BEHAVIOR_AMD
8765 : IEMTARGETCPU_EFL_BEHAVIOR_INTEL;
8766#else
8767 g_idxCpuEflFlavour = IEMTARGETCPU_EFL_BEHAVIOR_INTEL;
8768#endif
8769
8770 /*
8771 * Parse arguments.
8772 */
8773 enum { kModeNotSet, kModeTest, kModeGenerate }
8774 enmMode = kModeNotSet;
8775 bool fInt = true;
8776 bool fFpuLdSt = true;
8777 bool fFpuBinary1 = true;
8778 bool fFpuBinary2 = true;
8779 bool fFpuOther = true;
8780 bool fCpuData = true;
8781 bool fCommonData = true;
8782 bool fSseFpBinary = true;
8783 bool fSseFpOther = true;
8784 uint32_t const cDefaultTests = 96;
8785 uint32_t cTests = cDefaultTests;
8786 RTGETOPTDEF const s_aOptions[] =
8787 {
8788 // mode:
8789 { "--generate", 'g', RTGETOPT_REQ_NOTHING },
8790 { "--test", 't', RTGETOPT_REQ_NOTHING },
8791 // test selection (both)
8792 { "--all", 'a', RTGETOPT_REQ_NOTHING },
8793 { "--none", 'z', RTGETOPT_REQ_NOTHING },
8794 { "--zap", 'z', RTGETOPT_REQ_NOTHING },
8795 { "--fpu-ld-st", 'F', RTGETOPT_REQ_NOTHING }, /* FPU stuff is upper case */
8796 { "--fpu-load-store", 'F', RTGETOPT_REQ_NOTHING },
8797 { "--fpu-binary-1", 'B', RTGETOPT_REQ_NOTHING },
8798 { "--fpu-binary-2", 'P', RTGETOPT_REQ_NOTHING },
8799 { "--fpu-other", 'O', RTGETOPT_REQ_NOTHING },
8800 { "--sse-fp-binary", 'S', RTGETOPT_REQ_NOTHING },
8801 { "--sse-fp-other", 'T', RTGETOPT_REQ_NOTHING },
8802 { "--int", 'i', RTGETOPT_REQ_NOTHING },
8803 { "--include", 'I', RTGETOPT_REQ_STRING },
8804 { "--exclude", 'X', RTGETOPT_REQ_STRING },
8805 // generation parameters
8806 { "--common", 'm', RTGETOPT_REQ_NOTHING },
8807 { "--cpu", 'c', RTGETOPT_REQ_NOTHING },
8808 { "--number-of-tests", 'n', RTGETOPT_REQ_UINT32 },
8809 { "--verbose", 'v', RTGETOPT_REQ_NOTHING },
8810 { "--quiet", 'q', RTGETOPT_REQ_NOTHING },
8811 };
8812
8813 RTGETOPTSTATE State;
8814 rc = RTGetOptInit(&State, argc, argv, s_aOptions, RT_ELEMENTS(s_aOptions), 1, 0);
8815 AssertRCReturn(rc, RTEXITCODE_FAILURE);
8816
8817 RTGETOPTUNION ValueUnion;
8818 while ((rc = RTGetOpt(&State, &ValueUnion)))
8819 {
8820 switch (rc)
8821 {
8822 case 'g':
8823 enmMode = kModeGenerate;
8824 break;
8825 case 't':
8826 enmMode = kModeTest;
8827 break;
8828
8829 case 'a':
8830 fCpuData = true;
8831 fCommonData = true;
8832 fInt = true;
8833 fFpuLdSt = true;
8834 fFpuBinary1 = true;
8835 fFpuBinary2 = true;
8836 fFpuOther = true;
8837 fSseFpBinary = true;
8838 fSseFpOther = true;
8839 break;
8840 case 'z':
8841 fCpuData = false;
8842 fCommonData = false;
8843 fInt = false;
8844 fFpuLdSt = false;
8845 fFpuBinary1 = false;
8846 fFpuBinary2 = false;
8847 fFpuOther = false;
8848 fSseFpBinary = false;
8849 fSseFpOther = false;
8850 break;
8851
8852 case 'F':
8853 fFpuLdSt = true;
8854 break;
8855 case 'O':
8856 fFpuOther = true;
8857 break;
8858 case 'B':
8859 fFpuBinary1 = true;
8860 break;
8861 case 'P':
8862 fFpuBinary2 = true;
8863 break;
8864 case 'S':
8865 fSseFpBinary = true;
8866 break;
8867 case 'T':
8868 fSseFpOther = true;
8869 break;
8870 case 'i':
8871 fInt = true;
8872 break;
8873
8874 case 'I':
8875 if (g_cIncludeTestPatterns >= RT_ELEMENTS(g_apszIncludeTestPatterns))
8876 return RTMsgErrorExit(RTEXITCODE_SYNTAX, "Too many include patterns (max %zu)",
8877 RT_ELEMENTS(g_apszIncludeTestPatterns));
8878 g_apszIncludeTestPatterns[g_cIncludeTestPatterns++] = ValueUnion.psz;
8879 break;
8880 case 'X':
8881 if (g_cExcludeTestPatterns >= RT_ELEMENTS(g_apszExcludeTestPatterns))
8882 return RTMsgErrorExit(RTEXITCODE_SYNTAX, "Too many exclude patterns (max %zu)",
8883 RT_ELEMENTS(g_apszExcludeTestPatterns));
8884 g_apszExcludeTestPatterns[g_cExcludeTestPatterns++] = ValueUnion.psz;
8885 break;
8886
8887 case 'm':
8888 fCommonData = true;
8889 break;
8890 case 'c':
8891 fCpuData = true;
8892 break;
8893 case 'n':
8894 cTests = ValueUnion.u32;
8895 break;
8896
8897 case 'q':
8898 g_cVerbosity = 0;
8899 break;
8900 case 'v':
8901 g_cVerbosity++;
8902 break;
8903
8904 case 'h':
8905 RTPrintf("usage: %s <-g|-t> [options]\n"
8906 "\n"
8907 "Mode:\n"
8908 " -g, --generate\n"
8909 " Generate test data.\n"
8910 " -t, --test\n"
8911 " Execute tests.\n"
8912 "\n"
8913 "Test selection (both modes):\n"
8914 " -a, --all\n"
8915 " Enable all tests and generated test data. (default)\n"
8916 " -z, --zap, --none\n"
8917 " Disable all tests and test data types.\n"
8918 " -i, --int\n"
8919 " Enable non-FPU tests.\n"
8920 " -F, --fpu-ld-st\n"
8921 " Enable FPU load and store tests.\n"
8922 " -B, --fpu-binary-1\n"
8923 " Enable FPU binary 80-bit FP tests.\n"
8924 " -P, --fpu-binary-2\n"
8925 " Enable FPU binary 64- and 32-bit FP tests.\n"
8926 " -O, --fpu-other\n"
8927 " Enable FPU binary 64- and 32-bit FP tests.\n"
8928 " -S, --sse-fp-binary\n"
8929 " Enable SSE binary 64- and 32-bit FP tests.\n"
8930 " -T, --sse-fp-other\n"
8931 " Enable misc SSE 64- and 32-bit FP tests.\n"
8932 " -I,--include=<test-patter>\n"
8933 " Enable tests matching the given pattern.\n"
8934 " -X,--exclude=<test-patter>\n"
8935 " Skip tests matching the given pattern (overrides --include).\n"
8936 "\n"
8937 "Generation:\n"
8938 " -m, --common\n"
8939 " Enable generating common test data.\n"
8940 " -c, --only-cpu\n"
8941 " Enable generating CPU specific test data.\n"
8942 " -n, --number-of-test <count>\n"
8943 " Number of tests to generate. Default: %u\n"
8944 "\n"
8945 "Other:\n"
8946 " -v, --verbose\n"
8947 " -q, --quiet\n"
8948 " Noise level. Default: --quiet\n"
8949 , argv[0], cDefaultTests);
8950 return RTEXITCODE_SUCCESS;
8951 default:
8952 return RTGetOptPrintError(rc, &ValueUnion);
8953 }
8954 }
8955
8956 /*
8957 * Generate data?
8958 */
8959 if (enmMode == kModeGenerate)
8960 {
8961#ifdef TSTIEMAIMPL_WITH_GENERATOR
8962 char szCpuDesc[256] = {0};
8963 RTMpGetDescription(NIL_RTCPUID, szCpuDesc, sizeof(szCpuDesc));
8964 const char * const pszCpuType = g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD ? "Amd" : "Intel";
8965# if defined(RT_OS_WINDOWS) || defined(RT_OS_OS2)
8966 const char * const pszBitBucket = "NUL";
8967# else
8968 const char * const pszBitBucket = "/dev/null";
8969# endif
8970
8971 if (cTests == 0)
8972 cTests = cDefaultTests;
8973 g_cZeroDstTests = RT_MIN(cTests / 16, 32);
8974 g_cZeroSrcTests = g_cZeroDstTests * 2;
8975
8976 if (fInt)
8977 {
8978 const char *pszDataFile = fCommonData ? "tstIEMAImplDataInt.cpp" : pszBitBucket;
8979 PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
8980 const char *pszDataCpuFile = !fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
8981 ? "tstIEMAImplDataInt-Amd.cpp" : "tstIEMAImplDataInt-Intel.cpp";
8982 PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
8983 if (!pStrmData || !pStrmDataCpu)
8984 return RTEXITCODE_FAILURE;
8985
8986 BinU8Generate( pStrmData, pStrmDataCpu, cTests);
8987 BinU16Generate(pStrmData, pStrmDataCpu, cTests);
8988 BinU32Generate(pStrmData, pStrmDataCpu, cTests);
8989 BinU64Generate(pStrmData, pStrmDataCpu, cTests);
8990 ShiftDblGenerate(pStrmDataCpu, RT_MAX(cTests, 128));
8991 UnaryGenerate(pStrmData, cTests);
8992 ShiftGenerate(pStrmDataCpu, cTests);
8993 MulDivGenerate(pStrmDataCpu, cTests);
8994
8995 RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
8996 GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
8997 if (rcExit != RTEXITCODE_SUCCESS)
8998 return rcExit;
8999 }
9000
9001 if (fFpuLdSt)
9002 {
9003 const char *pszDataFile = fCommonData ? "tstIEMAImplDataFpuLdSt.cpp" : pszBitBucket;
9004 PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
9005 const char *pszDataCpuFile = !fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
9006 ? "tstIEMAImplDataFpuLdSt-Amd.cpp" : "tstIEMAImplDataFpuLdSt-Intel.cpp";
9007 PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
9008 if (!pStrmData || !pStrmDataCpu)
9009 return RTEXITCODE_FAILURE;
9010
9011 FpuLdConstGenerate(pStrmData, cTests);
9012 FpuLdIntGenerate(pStrmData, cTests);
9013 FpuLdD80Generate(pStrmData, cTests);
9014 FpuStIntGenerate(pStrmData, pStrmDataCpu, cTests);
9015 FpuStD80Generate(pStrmData, cTests);
9016 uint32_t const cTests2 = RT_MAX(cTests, 384); /* need better coverage for the next ones. */
9017 FpuLdMemGenerate(pStrmData, cTests2);
9018 FpuStMemGenerate(pStrmData, cTests2);
9019
9020 RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
9021 GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
9022 if (rcExit != RTEXITCODE_SUCCESS)
9023 return rcExit;
9024 }
9025
9026 if (fFpuBinary1)
9027 {
9028 const char *pszDataFile = fCommonData ? "tstIEMAImplDataFpuBinary1.cpp" : pszBitBucket;
9029 PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
9030 const char *pszDataCpuFile = !fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
9031 ? "tstIEMAImplDataFpuBinary1-Amd.cpp" : "tstIEMAImplDataFpuBinary1-Intel.cpp";
9032 PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
9033 if (!pStrmData || !pStrmDataCpu)
9034 return RTEXITCODE_FAILURE;
9035
9036 FpuBinaryR80Generate(pStrmData, pStrmDataCpu, cTests);
9037 FpuBinaryFswR80Generate(pStrmData, cTests);
9038 FpuBinaryEflR80Generate(pStrmData, cTests);
9039
9040 RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
9041 GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
9042 if (rcExit != RTEXITCODE_SUCCESS)
9043 return rcExit;
9044 }
9045
9046 if (fFpuBinary2)
9047 {
9048 const char *pszDataFile = fCommonData ? "tstIEMAImplDataFpuBinary2.cpp" : pszBitBucket;
9049 PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
9050 const char *pszDataCpuFile = pszBitBucket; /*!fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
9051 ? "tstIEMAImplDataFpuBinary2-Amd.cpp" : "tstIEMAImplDataFpuBinary2-Intel.cpp"; */
9052 PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
9053 if (!pStrmData || !pStrmDataCpu)
9054 return RTEXITCODE_FAILURE;
9055
9056 FpuBinaryR64Generate(pStrmData, cTests);
9057 FpuBinaryR32Generate(pStrmData, cTests);
9058 FpuBinaryI32Generate(pStrmData, cTests);
9059 FpuBinaryI16Generate(pStrmData, cTests);
9060 FpuBinaryFswR64Generate(pStrmData, cTests);
9061 FpuBinaryFswR32Generate(pStrmData, cTests);
9062 FpuBinaryFswI32Generate(pStrmData, cTests);
9063 FpuBinaryFswI16Generate(pStrmData, cTests);
9064
9065 RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
9066 GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
9067 if (rcExit != RTEXITCODE_SUCCESS)
9068 return rcExit;
9069 }
9070
9071 if (fFpuOther)
9072 {
9073 const char *pszDataFile = fCommonData ? "tstIEMAImplDataFpuOther.cpp" : pszBitBucket;
9074 PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
9075 const char *pszDataCpuFile = !fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
9076 ? "tstIEMAImplDataFpuOther-Amd.cpp" : "tstIEMAImplDataFpuOther-Intel.cpp";
9077 PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
9078 if (!pStrmData || !pStrmDataCpu)
9079 return RTEXITCODE_FAILURE;
9080
9081 FpuUnaryR80Generate(pStrmData, pStrmDataCpu, cTests);
9082 FpuUnaryFswR80Generate(pStrmData, pStrmDataCpu, cTests);
9083 FpuUnaryTwoR80Generate(pStrmData, pStrmDataCpu, cTests);
9084
9085 RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
9086 GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
9087 if (rcExit != RTEXITCODE_SUCCESS)
9088 return rcExit;
9089 }
9090
9091 if (fSseFpBinary)
9092 {
9093 const char *pszDataFileFmt = fCommonData ? "tstIEMAImplDataSseBinary-%s.bin" : pszBitBucket;
9094
9095 RTEXITCODE rcExit = SseBinaryR32Generate(pszDataFileFmt, cTests);
9096 if (rcExit == RTEXITCODE_SUCCESS)
9097 rcExit = SseBinaryR64Generate(pszDataFileFmt, cTests);
9098 if (rcExit == RTEXITCODE_SUCCESS)
9099 rcExit = SseBinaryU128R32Generate(pszDataFileFmt, cTests);
9100 if (rcExit == RTEXITCODE_SUCCESS)
9101 rcExit = SseBinaryU128R64Generate(pszDataFileFmt, cTests);
9102
9103 if (rcExit == RTEXITCODE_SUCCESS)
9104 rcExit = SseBinaryI32R64Generate(pszDataFileFmt, cTests);
9105 if (rcExit == RTEXITCODE_SUCCESS)
9106 rcExit = SseBinaryI64R64Generate(pszDataFileFmt, cTests);
9107 if (rcExit == RTEXITCODE_SUCCESS)
9108 rcExit = SseBinaryI32R32Generate(pszDataFileFmt, cTests);
9109 if (rcExit == RTEXITCODE_SUCCESS)
9110 rcExit = SseBinaryI64R32Generate(pszDataFileFmt, cTests);
9111
9112 if (rcExit == RTEXITCODE_SUCCESS)
9113 rcExit = SseBinaryR64I32Generate(pszDataFileFmt, cTests);
9114 if (rcExit == RTEXITCODE_SUCCESS)
9115 rcExit = SseBinaryR64I64Generate(pszDataFileFmt, cTests);
9116 if (rcExit == RTEXITCODE_SUCCESS)
9117 rcExit = SseBinaryR32I32Generate(pszDataFileFmt, cTests);
9118 if (rcExit == RTEXITCODE_SUCCESS)
9119 rcExit = SseBinaryR32I64Generate(pszDataFileFmt, cTests);
9120 if (rcExit != RTEXITCODE_SUCCESS)
9121 return rcExit;
9122 }
9123
9124 if (fSseFpOther)
9125 {
9126 const char *pszDataFileFmtCmp = fCommonData ? "tstIEMAImplDataSseCompare-%s.bin" : pszBitBucket;
9127 const char *pszDataFileFmtConv = fCommonData ? "tstIEMAImplDataSseConvert-%s.bin" : pszBitBucket;
9128
9129 RTEXITCODE rcExit = SseCompareEflR32R32Generate(pszDataFileFmtCmp, cTests);
9130 if (rcExit == RTEXITCODE_SUCCESS)
9131 rcExit = SseCompareEflR64R64Generate(pszDataFileFmtCmp, cTests);
9132 if (rcExit == RTEXITCODE_SUCCESS)
9133 rcExit = SseCompareF2XmmR32Imm8Generate(pszDataFileFmtCmp, cTests);
9134 if (rcExit == RTEXITCODE_SUCCESS)
9135 rcExit = SseCompareF2XmmR64Imm8Generate(pszDataFileFmtCmp, cTests);
9136 if (rcExit == RTEXITCODE_SUCCESS)
9137 rcExit = SseConvertXmmI32R32Generate(pszDataFileFmtConv, cTests);
9138 if (rcExit == RTEXITCODE_SUCCESS)
9139 rcExit = SseConvertXmmR32I32Generate(pszDataFileFmtConv, cTests);
9140 if (rcExit == RTEXITCODE_SUCCESS)
9141 rcExit = SseConvertXmmI32R64Generate(pszDataFileFmtConv, cTests);
9142 if (rcExit == RTEXITCODE_SUCCESS)
9143 rcExit = SseConvertXmmR64I32Generate(pszDataFileFmtConv, cTests);
9144 if (rcExit == RTEXITCODE_SUCCESS)
9145 rcExit = SseConvertMmXmmGenerate(pszDataFileFmtConv, cTests);
9146 if (rcExit == RTEXITCODE_SUCCESS)
9147 rcExit = SseConvertXmmR32MmGenerate(pszDataFileFmtConv, cTests);
9148 if (rcExit == RTEXITCODE_SUCCESS)
9149 rcExit = SseConvertXmmR64MmGenerate(pszDataFileFmtConv, cTests);
9150 if (rcExit == RTEXITCODE_SUCCESS)
9151 rcExit = SseConvertMmI32XmmR32Generate(pszDataFileFmtConv, cTests);
9152 if (rcExit != RTEXITCODE_SUCCESS)
9153 return rcExit;
9154 }
9155
9156 return RTEXITCODE_SUCCESS;
9157#else
9158 return RTMsgErrorExitFailure("Test data generator not compiled in!");
9159#endif
9160 }
9161
9162 /*
9163 * Do testing. Currrently disabled by default as data needs to be checked
9164 * on both intel and AMD systems first.
9165 */
9166 rc = RTTestCreate("tstIEMAimpl", &g_hTest);
9167 AssertRCReturn(rc, RTEXITCODE_FAILURE);
9168 if (enmMode == kModeTest)
9169 {
9170 RTTestBanner(g_hTest);
9171
9172 /* Allocate guarded memory for use in the tests. */
9173#define ALLOC_GUARDED_VAR(a_puVar) do { \
9174 rc = RTTestGuardedAlloc(g_hTest, sizeof(*a_puVar), sizeof(*a_puVar), false /*fHead*/, (void **)&a_puVar); \
9175 if (RT_FAILURE(rc)) RTTestFailed(g_hTest, "Failed to allocate guarded mem: " #a_puVar); \
9176 } while (0)
9177 ALLOC_GUARDED_VAR(g_pu8);
9178 ALLOC_GUARDED_VAR(g_pu16);
9179 ALLOC_GUARDED_VAR(g_pu32);
9180 ALLOC_GUARDED_VAR(g_pu64);
9181 ALLOC_GUARDED_VAR(g_pu128);
9182 ALLOC_GUARDED_VAR(g_pu8Two);
9183 ALLOC_GUARDED_VAR(g_pu16Two);
9184 ALLOC_GUARDED_VAR(g_pu32Two);
9185 ALLOC_GUARDED_VAR(g_pu64Two);
9186 ALLOC_GUARDED_VAR(g_pu128Two);
9187 ALLOC_GUARDED_VAR(g_pfEfl);
9188 if (RTTestErrorCount(g_hTest) == 0)
9189 {
9190 if (fInt)
9191 {
9192 BinU8Test();
9193 BinU16Test();
9194 BinU32Test();
9195 BinU64Test();
9196 XchgTest();
9197 XaddTest();
9198 CmpXchgTest();
9199 CmpXchg8bTest();
9200 CmpXchg16bTest();
9201 ShiftDblTest();
9202 UnaryTest();
9203 ShiftTest();
9204 MulDivTest();
9205 BswapTest();
9206 }
9207
9208 if (fFpuLdSt)
9209 {
9210 FpuLoadConstTest();
9211 FpuLdMemTest();
9212 FpuLdIntTest();
9213 FpuLdD80Test();
9214 FpuStMemTest();
9215 FpuStIntTest();
9216 FpuStD80Test();
9217 }
9218
9219 if (fFpuBinary1)
9220 {
9221 FpuBinaryR80Test();
9222 FpuBinaryFswR80Test();
9223 FpuBinaryEflR80Test();
9224 }
9225
9226 if (fFpuBinary2)
9227 {
9228 FpuBinaryR64Test();
9229 FpuBinaryR32Test();
9230 FpuBinaryI32Test();
9231 FpuBinaryI16Test();
9232 FpuBinaryFswR64Test();
9233 FpuBinaryFswR32Test();
9234 FpuBinaryFswI32Test();
9235 FpuBinaryFswI16Test();
9236 }
9237
9238 if (fFpuOther)
9239 {
9240 FpuUnaryR80Test();
9241 FpuUnaryFswR80Test();
9242 FpuUnaryTwoR80Test();
9243 }
9244
9245 if (fSseFpBinary)
9246 {
9247 SseBinaryR32Test();
9248 SseBinaryR64Test();
9249 SseBinaryU128R32Test();
9250 SseBinaryU128R64Test();
9251
9252 SseBinaryI32R64Test();
9253 SseBinaryI64R64Test();
9254 SseBinaryI32R32Test();
9255 SseBinaryI64R32Test();
9256
9257 SseBinaryR64I32Test();
9258 SseBinaryR64I64Test();
9259 SseBinaryR32I32Test();
9260 SseBinaryR32I64Test();
9261 }
9262
9263 if (fSseFpOther)
9264 {
9265 SseCompareEflR32R32Test();
9266 SseCompareEflR64R64Test();
9267 SseCompareEflR64R64Test();
9268 SseCompareF2XmmR32Imm8Test();
9269 SseCompareF2XmmR64Imm8Test();
9270 SseConvertXmmI32R32Test();
9271 SseConvertXmmR32I32Test();
9272 SseConvertXmmI32R64Test();
9273 SseConvertXmmR64I32Test();
9274 SseConvertMmXmmTest();
9275 SseConvertXmmR32MmTest();
9276 SseConvertXmmR64MmTest();
9277 SseConvertMmI32XmmR32Test();
9278 }
9279 }
9280 return RTTestSummaryAndDestroy(g_hTest);
9281 }
9282 return RTTestSkipAndDestroy(g_hTest, "unfinished testcase");
9283}
9284
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette