VirtualBox

source: vbox/trunk/src/VBox/VMM/testcase/tstIEMAImpl.cpp@ 94991

Last change on this file since 94991 was 94695, checked in by vboxsync, 3 years ago

tstIEMAImpl: verbose/quiet options. bugref:9898

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 211.1 KB
Line 
1/* $Id: tstIEMAImpl.cpp 94695 2022-04-22 23:13:12Z vboxsync $ */
2/** @file
3 * IEM Assembly Instruction Helper Testcase.
4 */
5
6/*
7 * Copyright (C) 2022 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*********************************************************************************************************************************
20* Header Files *
21*********************************************************************************************************************************/
22#include "../include/IEMInternal.h"
23
24#include <iprt/errcore.h>
25#include <VBox/log.h>
26#include <iprt/assert.h>
27#include <iprt/ctype.h>
28#include <iprt/getopt.h>
29#include <iprt/initterm.h>
30#include <iprt/message.h>
31#include <iprt/mp.h>
32#include <iprt/rand.h>
33#include <iprt/stream.h>
34#include <iprt/string.h>
35#include <iprt/test.h>
36
37#include "tstIEMAImpl.h"
38
39
40/*********************************************************************************************************************************
41* Defined Constants And Macros *
42*********************************************************************************************************************************/
43#define ENTRY(a_Name) ENTRY_EX(a_Name, 0)
44#define ENTRY_EX(a_Name, a_uExtra) \
45 { RT_XSTR(a_Name), iemAImpl_ ## a_Name, NULL, \
46 g_aTests_ ## a_Name, &g_cTests_ ## a_Name, \
47 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_NATIVE /* means same for all here */ }
48
49#define ENTRY_INTEL(a_Name, a_fEflUndef) ENTRY_INTEL_EX(a_Name, a_fEflUndef, 0)
50#define ENTRY_INTEL_EX(a_Name, a_fEflUndef, a_uExtra) \
51 { RT_XSTR(a_Name) "_intel", iemAImpl_ ## a_Name ## _intel, iemAImpl_ ## a_Name, \
52 g_aTests_ ## a_Name ## _intel, &g_cTests_ ## a_Name ## _intel, \
53 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_INTEL }
54
55#define ENTRY_AMD(a_Name, a_fEflUndef) ENTRY_AMD_EX(a_Name, a_fEflUndef, 0)
56#define ENTRY_AMD_EX(a_Name, a_fEflUndef, a_uExtra) \
57 { RT_XSTR(a_Name) "_amd", iemAImpl_ ## a_Name ## _amd, iemAImpl_ ## a_Name, \
58 g_aTests_ ## a_Name ## _amd, &g_cTests_ ## a_Name ## _amd, \
59 a_uExtra, IEMTARGETCPU_EFL_BEHAVIOR_AMD }
60
61#define TYPEDEF_SUBTEST_TYPE(a_TypeName, a_TestType, a_FunctionPtrType) \
62 typedef struct a_TypeName \
63 { \
64 const char *pszName; \
65 a_FunctionPtrType pfn; \
66 a_FunctionPtrType pfnNative; \
67 a_TestType const *paTests; \
68 uint32_t const *pcTests; \
69 uint32_t uExtra; \
70 uint8_t idxCpuEflFlavour; \
71 } a_TypeName
72
73#define COUNT_VARIATIONS(a_SubTest) \
74 (1 + ((a_SubTest).idxCpuEflFlavour == g_idxCpuEflFlavour && (a_SubTest).pfnNative) )
75
76
77/*********************************************************************************************************************************
78* Global Variables *
79*********************************************************************************************************************************/
80static RTTEST g_hTest;
81static uint8_t g_idxCpuEflFlavour = IEMTARGETCPU_EFL_BEHAVIOR_INTEL;
82#ifdef TSTIEMAIMPL_WITH_GENERATOR
83static uint32_t g_cZeroDstTests = 2;
84static uint32_t g_cZeroSrcTests = 4;
85#endif
86static uint8_t *g_pu8, *g_pu8Two;
87static uint16_t *g_pu16, *g_pu16Two;
88static uint32_t *g_pu32, *g_pu32Two, *g_pfEfl;
89static uint64_t *g_pu64, *g_pu64Two;
90static RTUINT128U *g_pu128, *g_pu128Two;
91
92static char g_aszBuf[16][256];
93static unsigned g_idxBuf = 0;
94
95static uint32_t g_cIncludeTestPatterns;
96static uint32_t g_cExcludeTestPatterns;
97static const char *g_apszIncludeTestPatterns[64];
98static const char *g_apszExcludeTestPatterns[64];
99
100static unsigned g_cVerbosity = 0;
101
102
103/*********************************************************************************************************************************
104* Internal Functions *
105*********************************************************************************************************************************/
106static const char *FormatR80(PCRTFLOAT80U pr80);
107static const char *FormatR64(PCRTFLOAT64U pr64);
108static const char *FormatR32(PCRTFLOAT32U pr32);
109
110
111/*
112 * Random helpers.
113 */
114
115static uint32_t RandEFlags(void)
116{
117 uint32_t fEfl = RTRandU32();
118 return (fEfl & X86_EFL_LIVE_MASK) | X86_EFL_RA1_MASK;
119}
120
121#ifdef TSTIEMAIMPL_WITH_GENERATOR
122
123static uint8_t RandU8(void)
124{
125 return RTRandU32Ex(0, 0xff);
126}
127
128
129static uint16_t RandU16(void)
130{
131 return RTRandU32Ex(0, 0xffff);
132}
133
134
135static uint32_t RandU32(void)
136{
137 return RTRandU32();
138}
139
140#endif
141
142static uint64_t RandU64(void)
143{
144 return RTRandU64();
145}
146
147
148static RTUINT128U RandU128(void)
149{
150 RTUINT128U Ret;
151 Ret.s.Hi = RTRandU64();
152 Ret.s.Lo = RTRandU64();
153 return Ret;
154}
155
156#ifdef TSTIEMAIMPL_WITH_GENERATOR
157
158static uint8_t RandU8Dst(uint32_t iTest)
159{
160 if (iTest < g_cZeroDstTests)
161 return 0;
162 return RandU8();
163}
164
165
166static uint8_t RandU8Src(uint32_t iTest)
167{
168 if (iTest < g_cZeroSrcTests)
169 return 0;
170 return RandU8();
171}
172
173
174static uint16_t RandU16Dst(uint32_t iTest)
175{
176 if (iTest < g_cZeroDstTests)
177 return 0;
178 return RandU16();
179}
180
181
182static uint16_t RandU16Src(uint32_t iTest)
183{
184 if (iTest < g_cZeroSrcTests)
185 return 0;
186 return RandU16();
187}
188
189
190static uint32_t RandU32Dst(uint32_t iTest)
191{
192 if (iTest < g_cZeroDstTests)
193 return 0;
194 return RandU32();
195}
196
197
198static uint32_t RandU32Src(uint32_t iTest)
199{
200 if (iTest < g_cZeroSrcTests)
201 return 0;
202 return RandU32();
203}
204
205
206static uint64_t RandU64Dst(uint32_t iTest)
207{
208 if (iTest < g_cZeroDstTests)
209 return 0;
210 return RandU64();
211}
212
213
214static uint64_t RandU64Src(uint32_t iTest)
215{
216 if (iTest < g_cZeroSrcTests)
217 return 0;
218 return RandU64();
219}
220
221
222/** 2nd operand for and FPU instruction, pairing with RandR80Src1. */
223static int16_t RandI16Src2(uint32_t iTest)
224{
225 if (iTest < 18 * 4)
226 switch (iTest % 4)
227 {
228 case 0: return 0;
229 case 1: return INT16_MAX;
230 case 2: return INT16_MIN;
231 case 3: break;
232 }
233 return (int16_t)RandU16();
234}
235
236
237/** 2nd operand for and FPU instruction, pairing with RandR80Src1. */
238static int32_t RandI32Src2(uint32_t iTest)
239{
240 if (iTest < 18 * 4)
241 switch (iTest % 4)
242 {
243 case 0: return 0;
244 case 1: return INT32_MAX;
245 case 2: return INT32_MIN;
246 case 3: break;
247 }
248 return (int32_t)RandU32();
249}
250
251
252#if 0
253static int64_t RandI64Src(uint32_t iTest)
254{
255 RT_NOREF(iTest);
256 return (int64_t)RandU64();
257}
258#endif
259
260
261static uint16_t RandFcw(void)
262{
263 return RandU16() & ~X86_FCW_ZERO_MASK;
264}
265
266
267static uint16_t RandFsw(void)
268{
269 AssertCompile((X86_FSW_C_MASK | X86_FSW_XCPT_ES_MASK | X86_FSW_TOP_MASK | X86_FSW_B) == 0xffff);
270 return RandU16();
271}
272
273
274static void SafeR80FractionShift(PRTFLOAT80U pr80, uint8_t cShift)
275{
276 if (pr80->sj64.uFraction >= RT_BIT_64(cShift))
277 pr80->sj64.uFraction >>= cShift;
278 else
279 pr80->sj64.uFraction = (cShift % 19) + 1;
280}
281
282
283
284static RTFLOAT80U RandR80Ex(uint8_t bType, unsigned cTarget = 80, bool fIntTarget = false)
285{
286 Assert(cTarget == (!fIntTarget ? 80U : 16U) || cTarget == 64U || cTarget == 32U || (cTarget == 59U && fIntTarget));
287
288 RTFLOAT80U r80;
289 r80.au64[0] = RandU64();
290 r80.au16[4] = RandU16();
291
292 /*
293 * Adjust the random stuff according to bType.
294 */
295 bType &= 0x1f;
296 if (bType == 0 || bType == 1 || bType == 2 || bType == 3)
297 {
298 /* Zero (0), Pseudo-Infinity (1), Infinity (2), Indefinite (3). We only keep fSign here. */
299 r80.sj64.uExponent = bType == 0 ? 0 : 0x7fff;
300 r80.sj64.uFraction = bType <= 2 ? 0 : RT_BIT_64(62);
301 r80.sj64.fInteger = bType >= 2 ? 1 : 0;
302 AssertMsg(bType != 0 || RTFLOAT80U_IS_ZERO(&r80), ("%s\n", FormatR80(&r80)));
303 AssertMsg(bType != 1 || RTFLOAT80U_IS_PSEUDO_INF(&r80), ("%s\n", FormatR80(&r80)));
304 Assert( bType != 1 || RTFLOAT80U_IS_387_INVALID(&r80));
305 AssertMsg(bType != 2 || RTFLOAT80U_IS_INF(&r80), ("%s\n", FormatR80(&r80)));
306 AssertMsg(bType != 3 || RTFLOAT80U_IS_INDEFINITE(&r80), ("%s\n", FormatR80(&r80)));
307 }
308 else if (bType == 4 || bType == 5 || bType == 6 || bType == 7)
309 {
310 /* Denormals (4,5) and Pseudo denormals (6,7) */
311 if (bType & 1)
312 SafeR80FractionShift(&r80, r80.sj64.uExponent % 62);
313 else if (r80.sj64.uFraction == 0 && bType < 6)
314 r80.sj64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT80U_FRACTION_BITS) - 1);
315 r80.sj64.uExponent = 0;
316 r80.sj64.fInteger = bType >= 6;
317 AssertMsg(bType >= 6 || RTFLOAT80U_IS_DENORMAL(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
318 AssertMsg(bType < 6 || RTFLOAT80U_IS_PSEUDO_DENORMAL(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
319 }
320 else if (bType == 8 || bType == 9)
321 {
322 /* Pseudo NaN. */
323 if (bType & 1)
324 SafeR80FractionShift(&r80, r80.sj64.uExponent % 62);
325 else if (r80.sj64.uFraction == 0 && !r80.sj64.fInteger)
326 r80.sj64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT80U_FRACTION_BITS) - 1);
327 r80.sj64.uExponent = 0x7fff;
328 if (r80.sj64.fInteger)
329 r80.sj64.uFraction |= RT_BIT_64(62);
330 else
331 r80.sj64.uFraction &= ~RT_BIT_64(62);
332 r80.sj64.fInteger = 0;
333 AssertMsg(RTFLOAT80U_IS_PSEUDO_NAN(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
334 AssertMsg(RTFLOAT80U_IS_NAN(&r80), ("%s bType=%#x\n", FormatR80(&r80), bType));
335 Assert(RTFLOAT80U_IS_387_INVALID(&r80));
336 }
337 else if (bType == 10 || bType == 11 || bType == 12 || bType == 13)
338 {
339 /* Quiet and signalling NaNs. */
340 if (bType & 1)
341 SafeR80FractionShift(&r80, r80.sj64.uExponent % 62);
342 else if (r80.sj64.uFraction == 0)
343 r80.sj64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT80U_FRACTION_BITS) - 1);
344 r80.sj64.uExponent = 0x7fff;
345 if (bType < 12)
346 r80.sj64.uFraction |= RT_BIT_64(62); /* quiet */
347 else
348 r80.sj64.uFraction &= ~RT_BIT_64(62); /* signaling */
349 r80.sj64.fInteger = 1;
350 AssertMsg(bType >= 12 || RTFLOAT80U_IS_QUIET_NAN(&r80), ("%s\n", FormatR80(&r80)));
351 AssertMsg(bType < 12 || RTFLOAT80U_IS_SIGNALLING_NAN(&r80), ("%s\n", FormatR80(&r80)));
352 AssertMsg(RTFLOAT80U_IS_SIGNALLING_NAN(&r80) || RTFLOAT80U_IS_QUIET_NAN(&r80), ("%s\n", FormatR80(&r80)));
353 AssertMsg(RTFLOAT80U_IS_QUIET_OR_SIGNALLING_NAN(&r80), ("%s\n", FormatR80(&r80)));
354 AssertMsg(RTFLOAT80U_IS_NAN(&r80), ("%s\n", FormatR80(&r80)));
355 }
356 else if (bType == 14 || bType == 15)
357 {
358 /* Unnormals */
359 if (bType & 1)
360 SafeR80FractionShift(&r80, RandU8() % 62);
361 r80.sj64.fInteger = 0;
362 if (r80.sj64.uExponent == RTFLOAT80U_EXP_MAX || r80.sj64.uExponent == 0)
363 r80.sj64.uExponent = (uint16_t)RTRandU32Ex(1, RTFLOAT80U_EXP_MAX - 1);
364 AssertMsg(RTFLOAT80U_IS_UNNORMAL(&r80), ("%s\n", FormatR80(&r80)));
365 Assert(RTFLOAT80U_IS_387_INVALID(&r80));
366 }
367 else if (bType < 26)
368 {
369 /* Make sure we have lots of normalized values. */
370 if (!fIntTarget)
371 {
372 const unsigned uMinExp = cTarget == 64 ? RTFLOAT80U_EXP_BIAS - RTFLOAT64U_EXP_BIAS
373 : cTarget == 32 ? RTFLOAT80U_EXP_BIAS - RTFLOAT32U_EXP_BIAS : 0;
374 const unsigned uMaxExp = cTarget == 64 ? uMinExp + RTFLOAT64U_EXP_MAX
375 : cTarget == 32 ? uMinExp + RTFLOAT32U_EXP_MAX : RTFLOAT80U_EXP_MAX;
376 r80.sj64.fInteger = 1;
377 if (r80.sj64.uExponent <= uMinExp)
378 r80.sj64.uExponent = uMinExp + 1;
379 else if (r80.sj64.uExponent >= uMaxExp)
380 r80.sj64.uExponent = uMaxExp - 1;
381
382 if (bType == 16)
383 { /* All 1s is useful to testing rounding. Also try trigger special
384 behaviour by sometimes rounding out of range, while we're at it. */
385 r80.sj64.uFraction = RT_BIT_64(63) - 1;
386 uint8_t bExp = RandU8();
387 if ((bExp & 3) == 0)
388 r80.sj64.uExponent = uMaxExp - 1;
389 else if ((bExp & 3) == 1)
390 r80.sj64.uExponent = uMinExp + 1;
391 else if ((bExp & 3) == 2)
392 r80.sj64.uExponent = uMinExp - (bExp & 15); /* (small numbers are mapped to subnormal values) */
393 }
394 }
395 else
396 {
397 /* integer target: */
398 const unsigned uMinExp = RTFLOAT80U_EXP_BIAS;
399 const unsigned uMaxExp = RTFLOAT80U_EXP_BIAS + cTarget - 2;
400 r80.sj64.fInteger = 1;
401 if (r80.sj64.uExponent < uMinExp)
402 r80.sj64.uExponent = uMinExp;
403 else if (r80.sj64.uExponent > uMaxExp)
404 r80.sj64.uExponent = uMaxExp;
405
406 if (bType == 16)
407 { /* All 1s is useful to testing rounding. Also try trigger special
408 behaviour by sometimes rounding out of range, while we're at it. */
409 r80.sj64.uFraction = RT_BIT_64(63) - 1;
410 uint8_t bExp = RandU8();
411 if ((bExp & 3) == 0)
412 r80.sj64.uExponent = uMaxExp;
413 else if ((bExp & 3) == 1)
414 r80.sj64.uFraction &= ~(RT_BIT_64(cTarget - 1 - r80.sj64.uExponent) - 1); /* no rounding */
415 }
416 }
417
418 AssertMsg(RTFLOAT80U_IS_NORMAL(&r80), ("%s\n", FormatR80(&r80)));
419 }
420 return r80;
421}
422
423
424static RTFLOAT80U RandR80(unsigned cTarget = 80, bool fIntTarget = false)
425{
426 /*
427 * Make it more likely that we get a good selection of special values.
428 */
429 return RandR80Ex(RandU8(), cTarget, fIntTarget);
430
431}
432
433
434static RTFLOAT80U RandR80Src(uint32_t iTest, unsigned cTarget = 80, bool fIntTarget = false)
435{
436 /* Make sure we cover all the basic types first before going for random selection: */
437 if (iTest <= 18)
438 return RandR80Ex(18 - iTest, cTarget, fIntTarget); /* Starting with 3 normals. */
439 return RandR80(cTarget, fIntTarget);
440}
441
442
443/**
444 * Helper for RandR80Src1 and RandR80Src2 that converts bType from a 0..11 range
445 * to a 0..17, covering all basic value types.
446 */
447static uint8_t RandR80Src12RemapType(uint8_t bType)
448{
449 switch (bType)
450 {
451 case 0: return 18; /* normal */
452 case 1: return 16; /* normal extreme rounding */
453 case 2: return 14; /* unnormal */
454 case 3: return 12; /* Signalling NaN */
455 case 4: return 10; /* Quiet NaN */
456 case 5: return 8; /* PseudoNaN */
457 case 6: return 6; /* Pseudo Denormal */
458 case 7: return 4; /* Denormal */
459 case 8: return 3; /* Indefinite */
460 case 9: return 2; /* Infinity */
461 case 10: return 1; /* Pseudo-Infinity */
462 case 11: return 0; /* Zero */
463 default: AssertFailedReturn(18);
464 }
465}
466
467
468/**
469 * This works in tandem with RandR80Src2 to make sure we cover all operand
470 * type mixes first before we venture into regular random testing.
471 *
472 * There are 11 basic variations, when we leave out the five odd ones using
473 * SafeR80FractionShift. Because of the special normalized value targetting at
474 * rounding, we make it an even 12. So 144 combinations for two operands.
475 */
476static RTFLOAT80U RandR80Src1(uint32_t iTest, unsigned cPartnerBits = 80, bool fPartnerInt = false)
477{
478 if (cPartnerBits == 80)
479 {
480 Assert(!fPartnerInt);
481 if (iTest < 12 * 12)
482 return RandR80Ex(RandR80Src12RemapType(iTest / 12));
483 }
484 else if ((cPartnerBits == 64 || cPartnerBits == 32) && !fPartnerInt)
485 {
486 if (iTest < 12 * 10)
487 return RandR80Ex(RandR80Src12RemapType(iTest / 10));
488 }
489 else if (iTest < 18 * 4 && fPartnerInt)
490 return RandR80Ex(iTest / 4);
491 return RandR80();
492}
493
494
495/** Partner to RandR80Src1. */
496static RTFLOAT80U RandR80Src2(uint32_t iTest)
497{
498 if (iTest < 12 * 12)
499 return RandR80Ex(RandR80Src12RemapType(iTest % 12));
500 return RandR80();
501}
502
503
504static void SafeR64FractionShift(PRTFLOAT64U pr64, uint8_t cShift)
505{
506 if (pr64->s64.uFraction >= RT_BIT_64(cShift))
507 pr64->s64.uFraction >>= cShift;
508 else
509 pr64->s64.uFraction = (cShift % 19) + 1;
510}
511
512
513static RTFLOAT64U RandR64Ex(uint8_t bType)
514{
515 RTFLOAT64U r64;
516 r64.u = RandU64();
517
518 /*
519 * Make it more likely that we get a good selection of special values.
520 * On average 6 out of 16 calls should return a special value.
521 */
522 bType &= 0xf;
523 if (bType == 0 || bType == 1)
524 {
525 /* 0 or Infinity. We only keep fSign here. */
526 r64.s.uExponent = bType == 0 ? 0 : 0x7ff;
527 r64.s.uFractionHigh = 0;
528 r64.s.uFractionLow = 0;
529 AssertMsg(bType != 0 || RTFLOAT64U_IS_ZERO(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
530 AssertMsg(bType != 1 || RTFLOAT64U_IS_INF(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
531 }
532 else if (bType == 2 || bType == 3)
533 {
534 /* Subnormals */
535 if (bType == 3)
536 SafeR64FractionShift(&r64, r64.s64.uExponent % 51);
537 else if (r64.s64.uFraction == 0)
538 r64.s64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT64U_FRACTION_BITS) - 1);
539 r64.s64.uExponent = 0;
540 AssertMsg(RTFLOAT64U_IS_SUBNORMAL(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
541 }
542 else if (bType == 4 || bType == 5 || bType == 6 || bType == 7)
543 {
544 /* NaNs */
545 if (bType & 1)
546 SafeR64FractionShift(&r64, r64.s64.uExponent % 51);
547 else if (r64.s64.uFraction == 0)
548 r64.s64.uFraction = RTRandU64Ex(1, RT_BIT_64(RTFLOAT64U_FRACTION_BITS) - 1);
549 r64.s64.uExponent = 0x7ff;
550 if (bType < 6)
551 r64.s64.uFraction |= RT_BIT_64(RTFLOAT64U_FRACTION_BITS - 1); /* quiet */
552 else
553 r64.s64.uFraction &= ~RT_BIT_64(RTFLOAT64U_FRACTION_BITS - 1); /* signalling */
554 AssertMsg(bType >= 6 || RTFLOAT64U_IS_QUIET_NAN(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
555 AssertMsg(bType < 6 || RTFLOAT64U_IS_SIGNALLING_NAN(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
556 AssertMsg(RTFLOAT64U_IS_NAN(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
557 }
558 else if (bType < 12)
559 {
560 /* Make sure we have lots of normalized values. */
561 if (r64.s.uExponent == 0)
562 r64.s.uExponent = 1;
563 else if (r64.s.uExponent == 0x7ff)
564 r64.s.uExponent = 0x7fe;
565 AssertMsg(RTFLOAT64U_IS_NORMAL(&r64), ("%s bType=%#x\n", FormatR64(&r64), bType));
566 }
567 return r64;
568}
569
570
571static RTFLOAT64U RandR64Src(uint32_t iTest)
572{
573 if (iTest < 16)
574 return RandR64Ex(iTest);
575 return RandR64Ex(RandU8());
576}
577
578
579/** Pairing with a 80-bit floating point arg. */
580static RTFLOAT64U RandR64Src2(uint32_t iTest)
581{
582 if (iTest < 12 * 10)
583 return RandR64Ex(9 - iTest % 10); /* start with normal values */
584 return RandR64Ex(RandU8());
585}
586
587
588static void SafeR32FractionShift(PRTFLOAT32U pr32, uint8_t cShift)
589{
590 if (pr32->s.uFraction >= RT_BIT_32(cShift))
591 pr32->s.uFraction >>= cShift;
592 else
593 pr32->s.uFraction = (cShift % 19) + 1;
594}
595
596
597static RTFLOAT32U RandR32Ex(uint8_t bType)
598{
599 RTFLOAT32U r32;
600 r32.u = RandU32();
601
602 /*
603 * Make it more likely that we get a good selection of special values.
604 * On average 6 out of 16 calls should return a special value.
605 */
606 bType &= 0xf;
607 if (bType == 0 || bType == 1)
608 {
609 /* 0 or Infinity. We only keep fSign here. */
610 r32.s.uExponent = bType == 0 ? 0 : 0xff;
611 r32.s.uFraction = 0;
612 AssertMsg(bType != 0 || RTFLOAT32U_IS_ZERO(&r32), ("%s\n", FormatR32(&r32)));
613 AssertMsg(bType != 1 || RTFLOAT32U_IS_INF(&r32), ("%s\n", FormatR32(&r32)));
614 }
615 else if (bType == 2 || bType == 3)
616 {
617 /* Subnormals */
618 if (bType == 3)
619 SafeR32FractionShift(&r32, r32.s.uExponent % 22);
620 else if (r32.s.uFraction == 0)
621 r32.s.uFraction = RTRandU32Ex(1, RT_BIT_32(RTFLOAT32U_FRACTION_BITS) - 1);
622 r32.s.uExponent = 0;
623 AssertMsg(RTFLOAT32U_IS_SUBNORMAL(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
624 }
625 else if (bType == 4 || bType == 5 || bType == 6 || bType == 7)
626 {
627 /* NaNs */
628 if (bType & 1)
629 SafeR32FractionShift(&r32, r32.s.uExponent % 22);
630 else if (r32.s.uFraction == 0)
631 r32.s.uFraction = RTRandU32Ex(1, RT_BIT_32(RTFLOAT32U_FRACTION_BITS) - 1);
632 r32.s.uExponent = 0xff;
633 if (bType < 6)
634 r32.s.uFraction |= RT_BIT_32(RTFLOAT32U_FRACTION_BITS - 1); /* quiet */
635 else
636 r32.s.uFraction &= ~RT_BIT_32(RTFLOAT32U_FRACTION_BITS - 1); /* signalling */
637 AssertMsg(bType >= 6 || RTFLOAT32U_IS_QUIET_NAN(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
638 AssertMsg(bType < 6 || RTFLOAT32U_IS_SIGNALLING_NAN(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
639 AssertMsg(RTFLOAT32U_IS_NAN(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
640 }
641 else if (bType < 12)
642 {
643 /* Make sure we have lots of normalized values. */
644 if (r32.s.uExponent == 0)
645 r32.s.uExponent = 1;
646 else if (r32.s.uExponent == 0xff)
647 r32.s.uExponent = 0xfe;
648 AssertMsg(RTFLOAT32U_IS_NORMAL(&r32), ("%s bType=%#x\n", FormatR32(&r32), bType));
649 }
650 return r32;
651}
652
653
654static RTFLOAT32U RandR32Src(uint32_t iTest)
655{
656 if (iTest < 16)
657 return RandR32Ex(iTest);
658 return RandR32Ex(RandU8());
659}
660
661
662/** Pairing with a 80-bit floating point arg. */
663static RTFLOAT32U RandR32Src2(uint32_t iTest)
664{
665 if (iTest < 12 * 10)
666 return RandR32Ex(9 - iTest % 10); /* start with normal values */
667 return RandR32Ex(RandU8());
668}
669
670
671static RTPBCD80U RandD80Src(uint32_t iTest)
672{
673 if (iTest < 3)
674 {
675 RTPBCD80U d80Zero = RTPBCD80U_INIT_ZERO(!(iTest & 1));
676 return d80Zero;
677 }
678 if (iTest < 5)
679 {
680 RTPBCD80U d80Ind = RTPBCD80U_INIT_INDEFINITE();
681 return d80Ind;
682 }
683
684 RTPBCD80U d80;
685 uint8_t b = RandU8();
686 d80.s.fSign = b & 1;
687
688 if ((iTest & 7) >= 6)
689 {
690 /* Illegal */
691 d80.s.uPad = (iTest & 7) == 7 ? b >> 1 : 0;
692 for (size_t iPair = 0; iPair < RT_ELEMENTS(d80.s.abPairs); iPair++)
693 d80.s.abPairs[iPair] = RandU8();
694 }
695 else
696 {
697 /* Normal */
698 d80.s.uPad = 0;
699 for (size_t iPair = 0; iPair < RT_ELEMENTS(d80.s.abPairs); iPair++)
700 {
701 uint8_t const uLo = (uint8_t)RTRandU32Ex(0, 9);
702 uint8_t const uHi = (uint8_t)RTRandU32Ex(0, 9);
703 d80.s.abPairs[iPair] = RTPBCD80U_MAKE_PAIR(uHi, uLo);
704 }
705 }
706 return d80;
707}
708
709
710const char *GenFormatR80(PCRTFLOAT80U plrd)
711{
712 if (RTFLOAT80U_IS_ZERO(plrd))
713 return plrd->s.fSign ? "RTFLOAT80U_INIT_ZERO(1)" : "RTFLOAT80U_INIT_ZERO(0)";
714 if (RTFLOAT80U_IS_INF(plrd))
715 return plrd->s.fSign ? "RTFLOAT80U_INIT_INF(1)" : "RTFLOAT80U_INIT_INF(0)";
716 if (RTFLOAT80U_IS_INDEFINITE(plrd))
717 return plrd->s.fSign ? "RTFLOAT80U_INIT_IND(1)" : "RTFLOAT80U_INIT_IND(0)";
718 if (RTFLOAT80U_IS_QUIET_NAN(plrd) && (plrd->s.uMantissa & (RT_BIT_64(62) - 1)) == 1)
719 return plrd->s.fSign ? "RTFLOAT80U_INIT_QNAN(1)" : "RTFLOAT80U_INIT_QNAN(0)";
720 if (RTFLOAT80U_IS_SIGNALLING_NAN(plrd) && (plrd->s.uMantissa & (RT_BIT_64(62) - 1)) == 1)
721 return plrd->s.fSign ? "RTFLOAT80U_INIT_SNAN(1)" : "RTFLOAT80U_INIT_SNAN(0)";
722
723 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
724 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTFLOAT80U_INIT_C(%d,%#RX64,%u)",
725 plrd->s.fSign, plrd->s.uMantissa, plrd->s.uExponent);
726 return pszBuf;
727}
728
729const char *GenFormatR64(PCRTFLOAT64U prd)
730{
731 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
732 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTFLOAT64U_INIT_C(%d,%#RX64,%u)",
733 prd->s.fSign, RT_MAKE_U64(prd->s.uFractionLow, prd->s.uFractionHigh), prd->s.uExponent);
734 return pszBuf;
735}
736
737
738const char *GenFormatR32(PCRTFLOAT32U pr)
739{
740 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
741 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTFLOAT32U_INIT_C(%d,%#RX32,%u)", pr->s.fSign, pr->s.uFraction, pr->s.uExponent);
742 return pszBuf;
743}
744
745
746const char *GenFormatD80(PCRTPBCD80U pd80)
747{
748 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
749 size_t off;
750 if (pd80->s.uPad == 0)
751 off = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTPBCD80U_INIT_C(%d", pd80->s.fSign);
752 else
753 off = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "RTPBCD80U_INIT_EX_C(%#x,%d", pd80->s.uPad, pd80->s.fSign);
754 size_t iPair = RT_ELEMENTS(pd80->s.abPairs);
755 while (iPair-- > 0)
756 off += RTStrPrintf(&pszBuf[off], sizeof(g_aszBuf[0]) - off, ",%d,%d",
757 RTPBCD80U_HI_DIGIT(pd80->s.abPairs[iPair]),
758 RTPBCD80U_LO_DIGIT(pd80->s.abPairs[iPair]));
759 pszBuf[off++] = ')';
760 pszBuf[off++] = '\0';
761 return pszBuf;
762}
763
764
765const char *GenFormatI64(int64_t i64)
766{
767 if (i64 == INT64_MIN) /* This one is problematic */
768 return "INT64_MIN";
769 if (i64 == INT64_MAX)
770 return "INT64_MAX";
771 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
772 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "INT64_C(%RI64)", i64);
773 return pszBuf;
774}
775
776
777const char *GenFormatI64(int64_t const *pi64)
778{
779 return GenFormatI64(*pi64);
780}
781
782
783const char *GenFormatI32(int32_t i32)
784{
785 if (i32 == INT32_MIN) /* This one is problematic */
786 return "INT32_MIN";
787 if (i32 == INT32_MAX)
788 return "INT32_MAX";
789 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
790 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "INT32_C(%RI32)", i32);
791 return pszBuf;
792}
793
794
795const char *GenFormatI32(int32_t const *pi32)
796{
797 return GenFormatI32(*pi32);
798}
799
800
801const char *GenFormatI16(int16_t i16)
802{
803 if (i16 == INT16_MIN) /* This one is problematic */
804 return "INT16_MIN";
805 if (i16 == INT16_MAX)
806 return "INT16_MAX";
807 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
808 RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), "INT16_C(%RI16)", i16);
809 return pszBuf;
810}
811
812
813const char *GenFormatI16(int16_t const *pi16)
814{
815 return GenFormatI16(*pi16);
816}
817
818
819static void GenerateHeader(PRTSTREAM pOut, const char *pszCpuDesc, const char *pszCpuType)
820{
821 /* We want to tag the generated source code with the revision that produced it. */
822 static char s_szRev[] = "$Revision: 94695 $";
823 const char *pszRev = RTStrStripL(strchr(s_szRev, ':') + 1);
824 size_t cchRev = 0;
825 while (RT_C_IS_DIGIT(pszRev[cchRev]))
826 cchRev++;
827
828 RTStrmPrintf(pOut,
829 "/* $Id: tstIEMAImpl.cpp 94695 2022-04-22 23:13:12Z vboxsync $ */\n"
830 "/** @file\n"
831 " * IEM Assembly Instruction Helper Testcase Data%s%s - r%.*s on %s.\n"
832 " */\n"
833 "\n"
834 "/*\n"
835 " * Copyright (C) 2022 Oracle Corporation\n"
836 " *\n"
837 " * This file is part of VirtualBox Open Source Edition (OSE), as\n"
838 " * available from http://www.virtualbox.org. This file is free software;\n"
839 " * you can redistribute it and/or modify it under the terms of the GNU\n"
840 " * General Public License (GPL) as published by the Free Software\n"
841 " * Foundation, in version 2 as it comes in the \"COPYING\" file of the\n"
842 " * VirtualBox OSE distribution. VirtualBox OSE is distributed in the\n"
843 " * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.\n"
844 " */\n"
845 "\n"
846 "#include \"tstIEMAImpl.h\"\n"
847 "\n"
848 ,
849 pszCpuType ? " " : "", pszCpuType ? pszCpuType : "", cchRev, pszRev, pszCpuDesc);
850}
851
852
853static PRTSTREAM GenerateOpenWithHdr(const char *pszFilename, const char *pszCpuDesc, const char *pszCpuType)
854{
855 PRTSTREAM pOut = NULL;
856 int rc = RTStrmOpen(pszFilename, "w", &pOut);
857 if (RT_SUCCESS(rc))
858 {
859 GenerateHeader(pOut, pszCpuDesc, pszCpuType);
860 return pOut;
861 }
862 RTMsgError("Failed to open %s for writing: %Rrc", pszFilename, rc);
863 return NULL;
864}
865
866
867static RTEXITCODE GenerateFooterAndClose(PRTSTREAM pOut, const char *pszFilename, RTEXITCODE rcExit)
868{
869 RTStrmPrintf(pOut,
870 "\n"
871 "/* end of file */\n");
872 int rc = RTStrmClose(pOut);
873 if (RT_SUCCESS(rc))
874 return rcExit;
875 return RTMsgErrorExitFailure("RTStrmClose failed on %s: %Rrc", pszFilename, rc);
876}
877
878
879static void GenerateArrayStart(PRTSTREAM pOut, const char *pszName, const char *pszType)
880{
881 RTStrmPrintf(pOut, "%s const g_aTests_%s[] =\n{\n", pszType, pszName);
882}
883
884
885static void GenerateArrayEnd(PRTSTREAM pOut, const char *pszName)
886{
887 RTStrmPrintf(pOut,
888 "};\n"
889 "uint32_t const g_cTests_%s = RT_ELEMENTS(g_aTests_%s);\n"
890 "\n",
891 pszName, pszName);
892}
893
894#endif /* TSTIEMAIMPL_WITH_GENERATOR */
895
896
897/*
898 * Test helpers.
899 */
900static bool IsTestEnabled(const char *pszName)
901{
902 /* Process excludes first: */
903 uint32_t i = g_cExcludeTestPatterns;
904 while (i-- > 0)
905 if (RTStrSimplePatternMultiMatch(g_apszExcludeTestPatterns[i], RTSTR_MAX, pszName, RTSTR_MAX, NULL))
906 return false;
907
908 /* If no include patterns, everything is included: */
909 i = g_cIncludeTestPatterns;
910 if (!i)
911 return true;
912
913 /* Otherwise only tests in the include patters gets tested: */
914 while (i-- > 0)
915 if (RTStrSimplePatternMultiMatch(g_apszIncludeTestPatterns[i], RTSTR_MAX, pszName, RTSTR_MAX, NULL))
916 return true;
917
918 return false;
919}
920
921
922static bool SubTestAndCheckIfEnabled(const char *pszName)
923{
924 RTTestSub(g_hTest, pszName);
925 if (IsTestEnabled(pszName))
926 return true;
927 RTTestSkipped(g_hTest, g_cVerbosity > 0 ? "excluded" : NULL);
928 return false;
929}
930
931
932static const char *EFlagsDiff(uint32_t fActual, uint32_t fExpected)
933{
934 if (fActual == fExpected)
935 return "";
936
937 uint32_t const fXor = fActual ^ fExpected;
938 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
939 size_t cch = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), " - %#x", fXor);
940
941 static struct
942 {
943 const char *pszName;
944 uint32_t fFlag;
945 } const s_aFlags[] =
946 {
947#define EFL_ENTRY(a_Flags) { #a_Flags, X86_EFL_ ## a_Flags }
948 EFL_ENTRY(CF),
949 EFL_ENTRY(PF),
950 EFL_ENTRY(AF),
951 EFL_ENTRY(ZF),
952 EFL_ENTRY(SF),
953 EFL_ENTRY(TF),
954 EFL_ENTRY(IF),
955 EFL_ENTRY(DF),
956 EFL_ENTRY(OF),
957 EFL_ENTRY(IOPL),
958 EFL_ENTRY(NT),
959 EFL_ENTRY(RF),
960 EFL_ENTRY(VM),
961 EFL_ENTRY(AC),
962 EFL_ENTRY(VIF),
963 EFL_ENTRY(VIP),
964 EFL_ENTRY(ID),
965 };
966 for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
967 if (s_aFlags[i].fFlag & fXor)
968 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch,
969 s_aFlags[i].fFlag & fActual ? "/%s" : "/!%s", s_aFlags[i].pszName);
970 RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
971 return pszBuf;
972}
973
974
975static const char *FswDiff(uint16_t fActual, uint16_t fExpected)
976{
977 if (fActual == fExpected)
978 return "";
979
980 uint16_t const fXor = fActual ^ fExpected;
981 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
982 size_t cch = RTStrPrintf(pszBuf, sizeof(g_aszBuf[0]), " - %#x", fXor);
983
984 static struct
985 {
986 const char *pszName;
987 uint32_t fFlag;
988 } const s_aFlags[] =
989 {
990#define FSW_ENTRY(a_Flags) { #a_Flags, X86_FSW_ ## a_Flags }
991 FSW_ENTRY(IE),
992 FSW_ENTRY(DE),
993 FSW_ENTRY(ZE),
994 FSW_ENTRY(OE),
995 FSW_ENTRY(UE),
996 FSW_ENTRY(PE),
997 FSW_ENTRY(SF),
998 FSW_ENTRY(ES),
999 FSW_ENTRY(C0),
1000 FSW_ENTRY(C1),
1001 FSW_ENTRY(C2),
1002 FSW_ENTRY(C3),
1003 FSW_ENTRY(B),
1004 };
1005 for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1006 if (s_aFlags[i].fFlag & fXor)
1007 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch,
1008 s_aFlags[i].fFlag & fActual ? "/%s" : "/!%s", s_aFlags[i].pszName);
1009 if (fXor & X86_FSW_TOP_MASK)
1010 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "/TOP%u!%u",
1011 X86_FSW_TOP_GET(fActual), X86_FSW_TOP_GET(fExpected));
1012#if 0 /* For debugging fprem & fprem1 */
1013 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, " - Q=%d (vs %d)",
1014 X86_FSW_CX_TO_QUOTIENT(fActual), X86_FSW_CX_TO_QUOTIENT(fExpected));
1015#endif
1016 RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
1017 return pszBuf;
1018}
1019
1020
1021static const char *FormatFcw(uint16_t fFcw)
1022{
1023 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1024
1025 const char *pszPC = NULL; /* (msc+gcc are too stupid) */
1026 switch (fFcw & X86_FCW_PC_MASK)
1027 {
1028 case X86_FCW_PC_24: pszPC = "PC24"; break;
1029 case X86_FCW_PC_RSVD: pszPC = "PCRSVD!"; break;
1030 case X86_FCW_PC_53: pszPC = "PC53"; break;
1031 case X86_FCW_PC_64: pszPC = "PC64"; break;
1032 }
1033
1034 const char *pszRC = NULL; /* (msc+gcc are too stupid) */
1035 switch (fFcw & X86_FCW_RC_MASK)
1036 {
1037 case X86_FCW_RC_NEAREST: pszRC = "NEAR"; break;
1038 case X86_FCW_RC_DOWN: pszRC = "DOWN"; break;
1039 case X86_FCW_RC_UP: pszRC = "UP"; break;
1040 case X86_FCW_RC_ZERO: pszRC = "ZERO"; break;
1041 }
1042 size_t cch = RTStrPrintf(&pszBuf[0], sizeof(g_aszBuf[0]), "%s %s", pszPC, pszRC);
1043
1044 static struct
1045 {
1046 const char *pszName;
1047 uint32_t fFlag;
1048 } const s_aFlags[] =
1049 {
1050#define FCW_ENTRY(a_Flags) { #a_Flags, X86_FCW_ ## a_Flags }
1051 FCW_ENTRY(IM),
1052 FCW_ENTRY(DM),
1053 FCW_ENTRY(ZM),
1054 FCW_ENTRY(OM),
1055 FCW_ENTRY(UM),
1056 FCW_ENTRY(PM),
1057 { "6M", 64 },
1058 };
1059 for (size_t i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1060 if (fFcw & s_aFlags[i].fFlag)
1061 cch += RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, " %s", s_aFlags[i].pszName);
1062
1063 RTStrPrintf(&pszBuf[cch], sizeof(g_aszBuf[0]) - cch, "");
1064 return pszBuf;
1065}
1066
1067
1068static const char *FormatR80(PCRTFLOAT80U pr80)
1069{
1070 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1071 RTStrFormatR80(pszBuf, sizeof(g_aszBuf[0]), pr80, 0, 0, RTSTR_F_SPECIAL);
1072 return pszBuf;
1073}
1074
1075
1076static const char *FormatR64(PCRTFLOAT64U pr64)
1077{
1078 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1079 RTStrFormatR64(pszBuf, sizeof(g_aszBuf[0]), pr64, 0, 0, RTSTR_F_SPECIAL);
1080 return pszBuf;
1081}
1082
1083
1084static const char *FormatR32(PCRTFLOAT32U pr32)
1085{
1086 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1087 RTStrFormatR32(pszBuf, sizeof(g_aszBuf[0]), pr32, 0, 0, RTSTR_F_SPECIAL);
1088 return pszBuf;
1089}
1090
1091
1092static const char *FormatD80(PCRTPBCD80U pd80)
1093{
1094 /* There is only one indefinite endcoding (same as for 80-bit
1095 floating point), so get it out of the way first: */
1096 if (RTPBCD80U_IS_INDEFINITE(pd80))
1097 return "Ind";
1098
1099 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1100 size_t off = 0;
1101 pszBuf[off++] = pd80->s.fSign ? '-' : '+';
1102 unsigned cBadDigits = 0;
1103 size_t iPair = RT_ELEMENTS(pd80->s.abPairs);
1104 while (iPair-- > 0)
1105 {
1106 static const char s_szDigits[] = "0123456789abcdef";
1107 static const uint8_t s_bBadDigits[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1 };
1108 pszBuf[off++] = s_szDigits[RTPBCD80U_HI_DIGIT(pd80->s.abPairs[iPair])];
1109 pszBuf[off++] = s_szDigits[RTPBCD80U_LO_DIGIT(pd80->s.abPairs[iPair])];
1110 cBadDigits += s_bBadDigits[RTPBCD80U_HI_DIGIT(pd80->s.abPairs[iPair])]
1111 + s_bBadDigits[RTPBCD80U_LO_DIGIT(pd80->s.abPairs[iPair])];
1112 }
1113 if (cBadDigits || pd80->s.uPad != 0)
1114 off += RTStrPrintf(&pszBuf[off], sizeof(g_aszBuf[0]) - off, "[%u,%#x]", cBadDigits, pd80->s.uPad);
1115 pszBuf[off] = '\0';
1116 return pszBuf;
1117}
1118
1119
1120#if 0
1121static const char *FormatI64(int64_t const *piVal)
1122{
1123 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1124 RTStrFormatU64(pszBuf, sizeof(g_aszBuf[0]), *piVal, 16, 0, 0, RTSTR_F_SPECIAL | RTSTR_F_VALSIGNED);
1125 return pszBuf;
1126}
1127#endif
1128
1129
1130static const char *FormatI32(int32_t const *piVal)
1131{
1132 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1133 RTStrFormatU32(pszBuf, sizeof(g_aszBuf[0]), *piVal, 16, 0, 0, RTSTR_F_SPECIAL | RTSTR_F_VALSIGNED);
1134 return pszBuf;
1135}
1136
1137
1138static const char *FormatI16(int16_t const *piVal)
1139{
1140 char *pszBuf = g_aszBuf[g_idxBuf++ % RT_ELEMENTS(g_aszBuf)];
1141 RTStrFormatU16(pszBuf, sizeof(g_aszBuf[0]), *piVal, 16, 0, 0, RTSTR_F_SPECIAL | RTSTR_F_VALSIGNED);
1142 return pszBuf;
1143}
1144
1145
1146/*
1147 * Binary operations.
1148 */
1149TYPEDEF_SUBTEST_TYPE(BINU8_T, BINU8_TEST_T, PFNIEMAIMPLBINU8);
1150TYPEDEF_SUBTEST_TYPE(BINU16_T, BINU16_TEST_T, PFNIEMAIMPLBINU16);
1151TYPEDEF_SUBTEST_TYPE(BINU32_T, BINU32_TEST_T, PFNIEMAIMPLBINU32);
1152TYPEDEF_SUBTEST_TYPE(BINU64_T, BINU64_TEST_T, PFNIEMAIMPLBINU64);
1153
1154#ifdef TSTIEMAIMPL_WITH_GENERATOR
1155# define GEN_BINARY_TESTS(a_cBits, a_Fmt, a_TestType) \
1156static void BinU ## a_cBits ## Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests) \
1157{ \
1158 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aBinU ## a_cBits); iFn++) \
1159 { \
1160 PFNIEMAIMPLBINU ## a_cBits const pfn = g_aBinU ## a_cBits[iFn].pfnNative \
1161 ? g_aBinU ## a_cBits[iFn].pfnNative : g_aBinU ## a_cBits[iFn].pfn; \
1162 PRTSTREAM pOutFn = pOut; \
1163 if (g_aBinU ## a_cBits[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE) \
1164 { \
1165 if (g_aBinU ## a_cBits[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
1166 continue; \
1167 pOutFn = pOutCpu; \
1168 } \
1169 \
1170 GenerateArrayStart(pOutFn, g_aBinU ## a_cBits[iFn].pszName, #a_TestType); \
1171 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1172 { \
1173 a_TestType Test; \
1174 Test.fEflIn = RandEFlags(); \
1175 Test.fEflOut = Test.fEflIn; \
1176 Test.uDstIn = RandU ## a_cBits ## Dst(iTest); \
1177 Test.uDstOut = Test.uDstIn; \
1178 Test.uSrcIn = RandU ## a_cBits ## Src(iTest); \
1179 if (g_aBinU ## a_cBits[iFn].uExtra) \
1180 Test.uSrcIn &= a_cBits - 1; /* Restrict bit index according to operand width */ \
1181 Test.uMisc = 0; \
1182 pfn(&Test.uDstOut, Test.uSrcIn, &Test.fEflOut); \
1183 RTStrmPrintf(pOutFn, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", " a_Fmt ", %#x }, /* #%u */\n", \
1184 Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uSrcIn, Test.uMisc, iTest); \
1185 } \
1186 GenerateArrayEnd(pOutFn, g_aBinU ## a_cBits[iFn].pszName); \
1187 } \
1188}
1189#else
1190# define GEN_BINARY_TESTS(a_cBits, a_Fmt, a_TestType)
1191#endif
1192
1193#define TEST_BINARY_OPS(a_cBits, a_uType, a_Fmt, a_TestType, a_aSubTests) \
1194GEN_BINARY_TESTS(a_cBits, a_Fmt, a_TestType) \
1195\
1196static void BinU ## a_cBits ## Test(void) \
1197{ \
1198 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
1199 { \
1200 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
1201 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
1202 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
1203 PFNIEMAIMPLBINU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
1204 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
1205 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1206 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
1207 { \
1208 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1209 { \
1210 uint32_t fEfl = paTests[iTest].fEflIn; \
1211 a_uType uDst = paTests[iTest].uDstIn; \
1212 pfn(&uDst, paTests[iTest].uSrcIn, &fEfl); \
1213 if ( uDst != paTests[iTest].uDstOut \
1214 || fEfl != paTests[iTest].fEflOut) \
1215 RTTestFailed(g_hTest, "#%u%s: efl=%#08x dst=" a_Fmt " src=" a_Fmt " -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s - %s\n", \
1216 iTest, !iVar ? "" : "/n", paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uSrcIn, \
1217 fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
1218 EFlagsDiff(fEfl, paTests[iTest].fEflOut), \
1219 uDst == paTests[iTest].uDstOut ? "eflags" : fEfl == paTests[iTest].fEflOut ? "dst" : "both"); \
1220 else \
1221 { \
1222 *g_pu ## a_cBits = paTests[iTest].uDstIn; \
1223 *g_pfEfl = paTests[iTest].fEflIn; \
1224 pfn(g_pu ## a_cBits, paTests[iTest].uSrcIn, g_pfEfl); \
1225 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
1226 RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
1227 } \
1228 } \
1229 pfn = a_aSubTests[iFn].pfnNative; \
1230 } \
1231 } \
1232}
1233
1234
1235/*
1236 * 8-bit binary operations.
1237 */
1238static const BINU8_T g_aBinU8[] =
1239{
1240 ENTRY(add_u8),
1241 ENTRY(add_u8_locked),
1242 ENTRY(adc_u8),
1243 ENTRY(adc_u8_locked),
1244 ENTRY(sub_u8),
1245 ENTRY(sub_u8_locked),
1246 ENTRY(sbb_u8),
1247 ENTRY(sbb_u8_locked),
1248 ENTRY(or_u8),
1249 ENTRY(or_u8_locked),
1250 ENTRY(xor_u8),
1251 ENTRY(xor_u8_locked),
1252 ENTRY(and_u8),
1253 ENTRY(and_u8_locked),
1254 ENTRY(cmp_u8),
1255 ENTRY(test_u8),
1256};
1257TEST_BINARY_OPS(8, uint8_t, "%#04x", BINU8_TEST_T, g_aBinU8)
1258
1259
1260/*
1261 * 16-bit binary operations.
1262 */
1263static const BINU16_T g_aBinU16[] =
1264{
1265 ENTRY(add_u16),
1266 ENTRY(add_u16_locked),
1267 ENTRY(adc_u16),
1268 ENTRY(adc_u16_locked),
1269 ENTRY(sub_u16),
1270 ENTRY(sub_u16_locked),
1271 ENTRY(sbb_u16),
1272 ENTRY(sbb_u16_locked),
1273 ENTRY(or_u16),
1274 ENTRY(or_u16_locked),
1275 ENTRY(xor_u16),
1276 ENTRY(xor_u16_locked),
1277 ENTRY(and_u16),
1278 ENTRY(and_u16_locked),
1279 ENTRY(cmp_u16),
1280 ENTRY(test_u16),
1281 ENTRY_EX(bt_u16, 1),
1282 ENTRY_EX(btc_u16, 1),
1283 ENTRY_EX(btc_u16_locked, 1),
1284 ENTRY_EX(btr_u16, 1),
1285 ENTRY_EX(btr_u16_locked, 1),
1286 ENTRY_EX(bts_u16, 1),
1287 ENTRY_EX(bts_u16_locked, 1),
1288 ENTRY_AMD( bsf_u16, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1289 ENTRY_INTEL(bsf_u16, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1290 ENTRY_AMD( bsr_u16, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1291 ENTRY_INTEL(bsr_u16, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1292 ENTRY_AMD( imul_two_u16, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1293 ENTRY_INTEL(imul_two_u16, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1294 ENTRY(arpl),
1295};
1296TEST_BINARY_OPS(16, uint16_t, "%#06x", BINU16_TEST_T, g_aBinU16)
1297
1298
1299/*
1300 * 32-bit binary operations.
1301 */
1302static const BINU32_T g_aBinU32[] =
1303{
1304 ENTRY(add_u32),
1305 ENTRY(add_u32_locked),
1306 ENTRY(adc_u32),
1307 ENTRY(adc_u32_locked),
1308 ENTRY(sub_u32),
1309 ENTRY(sub_u32_locked),
1310 ENTRY(sbb_u32),
1311 ENTRY(sbb_u32_locked),
1312 ENTRY(or_u32),
1313 ENTRY(or_u32_locked),
1314 ENTRY(xor_u32),
1315 ENTRY(xor_u32_locked),
1316 ENTRY(and_u32),
1317 ENTRY(and_u32_locked),
1318 ENTRY(cmp_u32),
1319 ENTRY(test_u32),
1320 ENTRY_EX(bt_u32, 1),
1321 ENTRY_EX(btc_u32, 1),
1322 ENTRY_EX(btc_u32_locked, 1),
1323 ENTRY_EX(btr_u32, 1),
1324 ENTRY_EX(btr_u32_locked, 1),
1325 ENTRY_EX(bts_u32, 1),
1326 ENTRY_EX(bts_u32_locked, 1),
1327 ENTRY_AMD( bsf_u32, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1328 ENTRY_INTEL(bsf_u32, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1329 ENTRY_AMD( bsr_u32, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1330 ENTRY_INTEL(bsr_u32, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1331 ENTRY_AMD( imul_two_u32, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1332 ENTRY_INTEL(imul_two_u32, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1333};
1334TEST_BINARY_OPS(32, uint32_t, "%#010RX32", BINU32_TEST_T, g_aBinU32)
1335
1336
1337/*
1338 * 64-bit binary operations.
1339 */
1340static const BINU64_T g_aBinU64[] =
1341{
1342 ENTRY(add_u64),
1343 ENTRY(add_u64_locked),
1344 ENTRY(adc_u64),
1345 ENTRY(adc_u64_locked),
1346 ENTRY(sub_u64),
1347 ENTRY(sub_u64_locked),
1348 ENTRY(sbb_u64),
1349 ENTRY(sbb_u64_locked),
1350 ENTRY(or_u64),
1351 ENTRY(or_u64_locked),
1352 ENTRY(xor_u64),
1353 ENTRY(xor_u64_locked),
1354 ENTRY(and_u64),
1355 ENTRY(and_u64_locked),
1356 ENTRY(cmp_u64),
1357 ENTRY(test_u64),
1358 ENTRY_EX(bt_u64, 1),
1359 ENTRY_EX(btc_u64, 1),
1360 ENTRY_EX(btc_u64_locked, 1),
1361 ENTRY_EX(btr_u64, 1),
1362 ENTRY_EX(btr_u64_locked, 1),
1363 ENTRY_EX(bts_u64, 1),
1364 ENTRY_EX(bts_u64_locked, 1),
1365 ENTRY_AMD( bsf_u64, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1366 ENTRY_INTEL(bsf_u64, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1367 ENTRY_AMD( bsr_u64, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1368 ENTRY_INTEL(bsr_u64, X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF),
1369 ENTRY_AMD( imul_two_u64, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1370 ENTRY_INTEL(imul_two_u64, X86_EFL_PF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF),
1371};
1372TEST_BINARY_OPS(64, uint64_t, "%#018RX64", BINU64_TEST_T, g_aBinU64)
1373
1374
1375/*
1376 * XCHG
1377 */
1378static void XchgTest(void)
1379{
1380 if (!SubTestAndCheckIfEnabled("xchg"))
1381 return;
1382 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU8, (uint8_t *pu8Mem, uint8_t *pu8Reg));
1383 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU16,(uint16_t *pu16Mem, uint16_t *pu16Reg));
1384 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU32,(uint32_t *pu32Mem, uint32_t *pu32Reg));
1385 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXCHGU64,(uint64_t *pu64Mem, uint64_t *pu64Reg));
1386
1387 static struct
1388 {
1389 uint8_t cb; uint64_t fMask;
1390 union
1391 {
1392 uintptr_t pfn;
1393 FNIEMAIMPLXCHGU8 *pfnU8;
1394 FNIEMAIMPLXCHGU16 *pfnU16;
1395 FNIEMAIMPLXCHGU32 *pfnU32;
1396 FNIEMAIMPLXCHGU64 *pfnU64;
1397 } u;
1398 }
1399 s_aXchgWorkers[] =
1400 {
1401 { 1, UINT8_MAX, { (uintptr_t)iemAImpl_xchg_u8_locked } },
1402 { 2, UINT16_MAX, { (uintptr_t)iemAImpl_xchg_u16_locked } },
1403 { 4, UINT32_MAX, { (uintptr_t)iemAImpl_xchg_u32_locked } },
1404 { 8, UINT64_MAX, { (uintptr_t)iemAImpl_xchg_u64_locked } },
1405 { 1, UINT8_MAX, { (uintptr_t)iemAImpl_xchg_u8_unlocked } },
1406 { 2, UINT16_MAX, { (uintptr_t)iemAImpl_xchg_u16_unlocked } },
1407 { 4, UINT32_MAX, { (uintptr_t)iemAImpl_xchg_u32_unlocked } },
1408 { 8, UINT64_MAX, { (uintptr_t)iemAImpl_xchg_u64_unlocked } },
1409 };
1410 for (size_t i = 0; i < RT_ELEMENTS(s_aXchgWorkers); i++)
1411 {
1412 RTUINT64U uIn1, uIn2, uMem, uDst;
1413 uMem.u = uIn1.u = RTRandU64Ex(0, s_aXchgWorkers[i].fMask);
1414 uDst.u = uIn2.u = RTRandU64Ex(0, s_aXchgWorkers[i].fMask);
1415 if (uIn1.u == uIn2.u)
1416 uDst.u = uIn2.u = ~uIn2.u;
1417
1418 switch (s_aXchgWorkers[i].cb)
1419 {
1420 case 1:
1421 s_aXchgWorkers[i].u.pfnU8(g_pu8, g_pu8Two);
1422 s_aXchgWorkers[i].u.pfnU8(&uMem.au8[0], &uDst.au8[0]);
1423 break;
1424 case 2:
1425 s_aXchgWorkers[i].u.pfnU16(g_pu16, g_pu16Two);
1426 s_aXchgWorkers[i].u.pfnU16(&uMem.Words.w0, &uDst.Words.w0);
1427 break;
1428 case 4:
1429 s_aXchgWorkers[i].u.pfnU32(g_pu32, g_pu32Two);
1430 s_aXchgWorkers[i].u.pfnU32(&uMem.DWords.dw0, &uDst.DWords.dw0);
1431 break;
1432 case 8:
1433 s_aXchgWorkers[i].u.pfnU64(g_pu64, g_pu64Two);
1434 s_aXchgWorkers[i].u.pfnU64(&uMem.u, &uDst.u);
1435 break;
1436 default: RTTestFailed(g_hTest, "%d\n", s_aXchgWorkers[i].cb); break;
1437 }
1438
1439 if (uMem.u != uIn2.u || uDst.u != uIn1.u)
1440 RTTestFailed(g_hTest, "i=%u: %#RX64, %#RX64 -> %#RX64, %#RX64\n", i, uIn1.u, uIn2.u, uMem.u, uDst.u);
1441 }
1442}
1443
1444
1445/*
1446 * XADD
1447 */
1448static void XaddTest(void)
1449{
1450#define TEST_XADD(a_cBits, a_Type, a_Fmt) do { \
1451 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLXADDU ## a_cBits, (a_Type *, a_Type *, uint32_t *)); \
1452 static struct \
1453 { \
1454 const char *pszName; \
1455 FNIEMAIMPLXADDU ## a_cBits *pfn; \
1456 BINU ## a_cBits ## _TEST_T const *paTests; \
1457 uint32_t const *pcTests; \
1458 } const s_aFuncs[] = \
1459 { \
1460 { "xadd_u" # a_cBits, iemAImpl_xadd_u ## a_cBits, \
1461 g_aTests_add_u ## a_cBits, &g_cTests_add_u ## a_cBits }, \
1462 { "xadd_u" # a_cBits "8_locked", iemAImpl_xadd_u ## a_cBits ## _locked, \
1463 g_aTests_add_u ## a_cBits, &g_cTests_add_u ## a_cBits }, \
1464 }; \
1465 for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++) \
1466 { \
1467 if (!SubTestAndCheckIfEnabled(s_aFuncs[iFn].pszName)) continue; \
1468 uint32_t const cTests = *s_aFuncs[iFn].pcTests; \
1469 BINU ## a_cBits ## _TEST_T const * const paTests = s_aFuncs[iFn].paTests; \
1470 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1471 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
1472 { \
1473 uint32_t fEfl = paTests[iTest].fEflIn; \
1474 a_Type uSrc = paTests[iTest].uSrcIn; \
1475 *g_pu ## a_cBits = paTests[iTest].uDstIn; \
1476 s_aFuncs[iFn].pfn(g_pu ## a_cBits, &uSrc, &fEfl); \
1477 if ( fEfl != paTests[iTest].fEflOut \
1478 || *g_pu ## a_cBits != paTests[iTest].uDstOut \
1479 || uSrc != paTests[iTest].uDstIn) \
1480 RTTestFailed(g_hTest, "%s/#%u: efl=%#08x dst=" a_Fmt " src=" a_Fmt " -> efl=%#08x dst=" a_Fmt " src=" a_Fmt ", expected %#08x, " a_Fmt ", " a_Fmt "%s\n", \
1481 s_aFuncs[iFn].pszName, iTest, paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uSrcIn, \
1482 fEfl, *g_pu ## a_cBits, uSrc, paTests[iTest].fEflOut, paTests[iTest].uDstOut, paTests[iTest].uDstIn, \
1483 EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
1484 } \
1485 } \
1486 } while(0)
1487 TEST_XADD(8, uint8_t, "%#04x");
1488 TEST_XADD(16, uint16_t, "%#06x");
1489 TEST_XADD(32, uint32_t, "%#010RX32");
1490 TEST_XADD(64, uint64_t, "%#010RX64");
1491}
1492
1493
1494/*
1495 * CMPXCHG
1496 */
1497
1498static void CmpXchgTest(void)
1499{
1500#define TEST_CMPXCHG(a_cBits, a_Type, a_Fmt) do {\
1501 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLCMPXCHGU ## a_cBits, (a_Type *, a_Type *, a_Type, uint32_t *)); \
1502 static struct \
1503 { \
1504 const char *pszName; \
1505 FNIEMAIMPLCMPXCHGU ## a_cBits *pfn; \
1506 PFNIEMAIMPLBINU ## a_cBits pfnSub; \
1507 BINU ## a_cBits ## _TEST_T const *paTests; \
1508 uint32_t const *pcTests; \
1509 } const s_aFuncs[] = \
1510 { \
1511 { "cmpxchg_u" # a_cBits, iemAImpl_cmpxchg_u ## a_cBits, iemAImpl_sub_u ## a_cBits, \
1512 g_aTests_cmp_u ## a_cBits, &g_cTests_cmp_u ## a_cBits }, \
1513 { "cmpxchg_u" # a_cBits "_locked", iemAImpl_cmpxchg_u ## a_cBits ## _locked, iemAImpl_sub_u ## a_cBits, \
1514 g_aTests_cmp_u ## a_cBits, &g_cTests_cmp_u ## a_cBits }, \
1515 }; \
1516 for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++) \
1517 { \
1518 if (!SubTestAndCheckIfEnabled(s_aFuncs[iFn].pszName)) continue; \
1519 BINU ## a_cBits ## _TEST_T const * const paTests = s_aFuncs[iFn].paTests; \
1520 uint32_t const cTests = *s_aFuncs[iFn].pcTests; \
1521 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1522 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
1523 { \
1524 /* as is (99% likely to be negative). */ \
1525 uint32_t fEfl = paTests[iTest].fEflIn; \
1526 a_Type const uNew = paTests[iTest].uSrcIn + 0x42; \
1527 a_Type uA = paTests[iTest].uDstIn; \
1528 *g_pu ## a_cBits = paTests[iTest].uSrcIn; \
1529 a_Type const uExpect = uA != paTests[iTest].uSrcIn ? paTests[iTest].uSrcIn : uNew; \
1530 s_aFuncs[iFn].pfn(g_pu ## a_cBits, &uA, uNew, &fEfl); \
1531 if ( fEfl != paTests[iTest].fEflOut \
1532 || *g_pu ## a_cBits != uExpect \
1533 || uA != paTests[iTest].uSrcIn) \
1534 RTTestFailed(g_hTest, "%s/#%ua: efl=%#08x dst=" a_Fmt " cmp=" a_Fmt " new=" a_Fmt " -> efl=%#08x dst=" a_Fmt " old=" a_Fmt ", expected %#08x, " a_Fmt ", " a_Fmt "%s\n", \
1535 s_aFuncs[iFn].pszName, iTest, paTests[iTest].fEflIn, paTests[iTest].uSrcIn, paTests[iTest].uDstIn, \
1536 uNew, fEfl, *g_pu ## a_cBits, uA, paTests[iTest].fEflOut, uExpect, paTests[iTest].uSrcIn, \
1537 EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
1538 /* positive */ \
1539 uint32_t fEflExpect = paTests[iTest].fEflIn; \
1540 uA = paTests[iTest].uDstIn; \
1541 s_aFuncs[iFn].pfnSub(&uA, uA, &fEflExpect); \
1542 fEfl = paTests[iTest].fEflIn; \
1543 uA = paTests[iTest].uDstIn; \
1544 *g_pu ## a_cBits = uA; \
1545 s_aFuncs[iFn].pfn(g_pu ## a_cBits, &uA, uNew, &fEfl); \
1546 if ( fEfl != fEflExpect \
1547 || *g_pu ## a_cBits != uNew \
1548 || uA != paTests[iTest].uDstIn) \
1549 RTTestFailed(g_hTest, "%s/#%ua: efl=%#08x dst=" a_Fmt " cmp=" a_Fmt " new=" a_Fmt " -> efl=%#08x dst=" a_Fmt " old=" a_Fmt ", expected %#08x, " a_Fmt ", " a_Fmt "%s\n", \
1550 s_aFuncs[iFn].pszName, iTest, paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uDstIn, \
1551 uNew, fEfl, *g_pu ## a_cBits, uA, fEflExpect, uNew, paTests[iTest].uDstIn, \
1552 EFlagsDiff(fEfl, fEflExpect)); \
1553 } \
1554 } \
1555 } while(0)
1556 TEST_CMPXCHG(8, uint8_t, "%#04RX8");
1557 TEST_CMPXCHG(16, uint16_t, "%#06x");
1558 TEST_CMPXCHG(32, uint32_t, "%#010RX32");
1559#if ARCH_BITS != 32 /* calling convension issue, skipping as it's an unsupported host */
1560 TEST_CMPXCHG(64, uint64_t, "%#010RX64");
1561#endif
1562}
1563
1564static void CmpXchg8bTest(void)
1565{
1566 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLCMPXCHG8B,(uint64_t *, PRTUINT64U, PRTUINT64U, uint32_t *));
1567 static struct
1568 {
1569 const char *pszName;
1570 FNIEMAIMPLCMPXCHG8B *pfn;
1571 } const s_aFuncs[] =
1572 {
1573 { "cmpxchg8b", iemAImpl_cmpxchg8b },
1574 { "cmpxchg8b_locked", iemAImpl_cmpxchg8b_locked },
1575 };
1576 for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++)
1577 {
1578 if (!SubTestAndCheckIfEnabled(s_aFuncs[iFn].pszName))
1579 continue;
1580 for (uint32_t iTest = 0; iTest < 4; iTest += 2)
1581 {
1582 uint64_t const uOldValue = RandU64();
1583 uint64_t const uNewValue = RandU64();
1584
1585 /* positive test. */
1586 RTUINT64U uA, uB;
1587 uB.u = uNewValue;
1588 uA.u = uOldValue;
1589 *g_pu64 = uOldValue;
1590 uint32_t fEflIn = RandEFlags();
1591 uint32_t fEfl = fEflIn;
1592 s_aFuncs[iFn].pfn(g_pu64, &uA, &uB, &fEfl);
1593 if ( fEfl != (fEflIn | X86_EFL_ZF)
1594 || *g_pu64 != uNewValue
1595 || uA.u != uOldValue)
1596 RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64 cmp=%#018RX64 new=%#018RX64\n -> efl=%#08x dst=%#018RX64 old=%#018RX64,\n wanted %#08x, %#018RX64, %#018RX64%s\n",
1597 iTest, fEflIn, uOldValue, uOldValue, uNewValue,
1598 fEfl, *g_pu64, uA.u,
1599 (fEflIn | X86_EFL_ZF), uNewValue, uOldValue, EFlagsDiff(fEfl, fEflIn | X86_EFL_ZF));
1600 RTTEST_CHECK(g_hTest, uB.u == uNewValue);
1601
1602 /* negative */
1603 uint64_t const uExpect = ~uOldValue;
1604 *g_pu64 = uExpect;
1605 uA.u = uOldValue;
1606 uB.u = uNewValue;
1607 fEfl = fEflIn = RandEFlags();
1608 s_aFuncs[iFn].pfn(g_pu64, &uA, &uB, &fEfl);
1609 if ( fEfl != (fEflIn & ~X86_EFL_ZF)
1610 || *g_pu64 != uExpect
1611 || uA.u != uExpect)
1612 RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64 cmp=%#018RX64 new=%#018RX64\n -> efl=%#08x dst=%#018RX64 old=%#018RX64,\n wanted %#08x, %#018RX64, %#018RX64%s\n",
1613 iTest + 1, fEflIn, uExpect, uOldValue, uNewValue,
1614 fEfl, *g_pu64, uA.u,
1615 (fEflIn & ~X86_EFL_ZF), uExpect, uExpect, EFlagsDiff(fEfl, fEflIn & ~X86_EFL_ZF));
1616 RTTEST_CHECK(g_hTest, uB.u == uNewValue);
1617 }
1618 }
1619}
1620
1621static void CmpXchg16bTest(void)
1622{
1623 typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLCMPXCHG16B,(PRTUINT128U, PRTUINT128U, PRTUINT128U, uint32_t *));
1624 static struct
1625 {
1626 const char *pszName;
1627 FNIEMAIMPLCMPXCHG16B *pfn;
1628 } const s_aFuncs[] =
1629 {
1630 { "cmpxchg16b", iemAImpl_cmpxchg16b },
1631 { "cmpxchg16b_locked", iemAImpl_cmpxchg16b_locked },
1632#if !defined(RT_ARCH_ARM64)
1633 { "cmpxchg16b_fallback", iemAImpl_cmpxchg16b_fallback },
1634#endif
1635 };
1636 for (size_t iFn = 0; iFn < RT_ELEMENTS(s_aFuncs); iFn++)
1637 {
1638 if (!SubTestAndCheckIfEnabled(s_aFuncs[iFn].pszName))
1639 continue;
1640#if !defined(IEM_WITHOUT_ASSEMBLY) && defined(RT_ARCH_AMD64)
1641 if (!(ASMCpuId_ECX(1) & X86_CPUID_FEATURE_ECX_CX16))
1642 {
1643 RTTestSkipped(g_hTest, "no hardware cmpxchg16b");
1644 continue;
1645 }
1646#endif
1647 for (uint32_t iTest = 0; iTest < 4; iTest += 2)
1648 {
1649 RTUINT128U const uOldValue = RandU128();
1650 RTUINT128U const uNewValue = RandU128();
1651
1652 /* positive test. */
1653 RTUINT128U uA, uB;
1654 uB = uNewValue;
1655 uA = uOldValue;
1656 *g_pu128 = uOldValue;
1657 uint32_t fEflIn = RandEFlags();
1658 uint32_t fEfl = fEflIn;
1659 s_aFuncs[iFn].pfn(g_pu128, &uA, &uB, &fEfl);
1660 if ( fEfl != (fEflIn | X86_EFL_ZF)
1661 || g_pu128->s.Lo != uNewValue.s.Lo
1662 || g_pu128->s.Hi != uNewValue.s.Hi
1663 || uA.s.Lo != uOldValue.s.Lo
1664 || uA.s.Hi != uOldValue.s.Hi)
1665 RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64'%016RX64 cmp=%#018RX64'%016RX64 new=%#018RX64'%016RX64\n"
1666 " -> efl=%#08x dst=%#018RX64'%016RX64 old=%#018RX64'%016RX64,\n"
1667 " wanted %#08x, %#018RX64'%016RX64, %#018RX64'%016RX64%s\n",
1668 iTest, fEflIn, uOldValue.s.Hi, uOldValue.s.Lo, uOldValue.s.Hi, uOldValue.s.Lo, uNewValue.s.Hi, uNewValue.s.Lo,
1669 fEfl, g_pu128->s.Hi, g_pu128->s.Lo, uA.s.Hi, uA.s.Lo,
1670 (fEflIn | X86_EFL_ZF), uNewValue.s.Hi, uNewValue.s.Lo, uOldValue.s.Hi, uOldValue.s.Lo,
1671 EFlagsDiff(fEfl, fEflIn | X86_EFL_ZF));
1672 RTTEST_CHECK(g_hTest, uB.s.Lo == uNewValue.s.Lo && uB.s.Hi == uNewValue.s.Hi);
1673
1674 /* negative */
1675 RTUINT128U const uExpect = RTUINT128_INIT(~uOldValue.s.Hi, ~uOldValue.s.Lo);
1676 *g_pu128 = uExpect;
1677 uA = uOldValue;
1678 uB = uNewValue;
1679 fEfl = fEflIn = RandEFlags();
1680 s_aFuncs[iFn].pfn(g_pu128, &uA, &uB, &fEfl);
1681 if ( fEfl != (fEflIn & ~X86_EFL_ZF)
1682 || g_pu128->s.Lo != uExpect.s.Lo
1683 || g_pu128->s.Hi != uExpect.s.Hi
1684 || uA.s.Lo != uExpect.s.Lo
1685 || uA.s.Hi != uExpect.s.Hi)
1686 RTTestFailed(g_hTest, "#%u: efl=%#08x dst=%#018RX64'%016RX64 cmp=%#018RX64'%016RX64 new=%#018RX64'%016RX64\n"
1687 " -> efl=%#08x dst=%#018RX64'%016RX64 old=%#018RX64'%016RX64,\n"
1688 " wanted %#08x, %#018RX64'%016RX64, %#018RX64'%016RX64%s\n",
1689 iTest + 1, fEflIn, uExpect.s.Hi, uExpect.s.Lo, uOldValue.s.Hi, uOldValue.s.Lo, uNewValue.s.Hi, uNewValue.s.Lo,
1690 fEfl, g_pu128->s.Hi, g_pu128->s.Lo, uA.s.Hi, uA.s.Lo,
1691 (fEflIn & ~X86_EFL_ZF), uExpect.s.Hi, uExpect.s.Lo, uExpect.s.Hi, uExpect.s.Lo,
1692 EFlagsDiff(fEfl, fEflIn & ~X86_EFL_ZF));
1693 RTTEST_CHECK(g_hTest, uB.s.Lo == uNewValue.s.Lo && uB.s.Hi == uNewValue.s.Hi);
1694 }
1695 }
1696}
1697
1698
1699/*
1700 * Double shifts.
1701 *
1702 * Note! We use BINUxx_TEST_T with the shift value in the uMisc field.
1703 */
1704#ifdef TSTIEMAIMPL_WITH_GENERATOR
1705# define GEN_SHIFT_DBL(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
1706void ShiftDblU ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
1707{ \
1708 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
1709 { \
1710 if ( a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE \
1711 && a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
1712 continue; \
1713 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
1714 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1715 { \
1716 a_TestType Test; \
1717 Test.fEflIn = RandEFlags(); \
1718 Test.fEflOut = Test.fEflIn; \
1719 Test.uDstIn = RandU ## a_cBits ## Dst(iTest); \
1720 Test.uDstOut = Test.uDstIn; \
1721 Test.uSrcIn = RandU ## a_cBits ## Src(iTest); \
1722 Test.uMisc = RandU8() & (a_cBits * 4 - 1); /* need to go way beyond the a_cBits limit */ \
1723 a_aSubTests[iFn].pfnNative(&Test.uDstOut, Test.uSrcIn, Test.uMisc, &Test.fEflOut); \
1724 RTStrmPrintf(pOut, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", " a_Fmt ", %2u }, /* #%u */\n", \
1725 Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uSrcIn, Test.uMisc, iTest); \
1726 } \
1727 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
1728 } \
1729}
1730#else
1731# define GEN_SHIFT_DBL(a_cBits, a_Fmt, a_TestType, a_aSubTests)
1732#endif
1733
1734#define TEST_SHIFT_DBL(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType, a_aSubTests) \
1735TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLSHIFTDBLU ## a_cBits); \
1736\
1737static a_SubTestType const a_aSubTests[] = \
1738{ \
1739 ENTRY_AMD(shld_u ## a_cBits, X86_EFL_OF | X86_EFL_CF), \
1740 ENTRY_INTEL(shld_u ## a_cBits, X86_EFL_OF | X86_EFL_CF), \
1741 ENTRY_AMD(shrd_u ## a_cBits, X86_EFL_OF | X86_EFL_CF), \
1742 ENTRY_INTEL(shrd_u ## a_cBits, X86_EFL_OF | X86_EFL_CF), \
1743}; \
1744\
1745GEN_SHIFT_DBL(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
1746\
1747static void ShiftDblU ## a_cBits ## Test(void) \
1748{ \
1749 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
1750 { \
1751 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
1752 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
1753 PFNIEMAIMPLSHIFTDBLU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
1754 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
1755 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
1756 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1757 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
1758 { \
1759 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1760 { \
1761 uint32_t fEfl = paTests[iTest].fEflIn; \
1762 a_Type uDst = paTests[iTest].uDstIn; \
1763 pfn(&uDst, paTests[iTest].uSrcIn, paTests[iTest].uMisc, &fEfl); \
1764 if ( uDst != paTests[iTest].uDstOut \
1765 || fEfl != paTests[iTest].fEflOut) \
1766 RTTestFailed(g_hTest, "#%03u%s: efl=%#08x dst=" a_Fmt " src=" a_Fmt " shift=%-2u -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s%s\n", \
1767 iTest, iVar == 0 ? "" : "/n", paTests[iTest].fEflIn, \
1768 paTests[iTest].uDstIn, paTests[iTest].uSrcIn, (unsigned)paTests[iTest].uMisc, \
1769 fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
1770 EFlagsDiff(fEfl, paTests[iTest].fEflOut), uDst == paTests[iTest].uDstOut ? "" : " dst!"); \
1771 else \
1772 { \
1773 *g_pu ## a_cBits = paTests[iTest].uDstIn; \
1774 *g_pfEfl = paTests[iTest].fEflIn; \
1775 pfn(g_pu ## a_cBits, paTests[iTest].uSrcIn, paTests[iTest].uMisc, g_pfEfl); \
1776 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
1777 RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
1778 } \
1779 } \
1780 pfn = a_aSubTests[iFn].pfnNative; \
1781 } \
1782 } \
1783}
1784TEST_SHIFT_DBL(16, uint16_t, "%#06RX16", BINU16_TEST_T, SHIFT_DBL_U16_T, g_aShiftDblU16)
1785TEST_SHIFT_DBL(32, uint32_t, "%#010RX32", BINU32_TEST_T, SHIFT_DBL_U32_T, g_aShiftDblU32)
1786TEST_SHIFT_DBL(64, uint64_t, "%#018RX64", BINU64_TEST_T, SHIFT_DBL_U64_T, g_aShiftDblU64)
1787
1788#ifdef TSTIEMAIMPL_WITH_GENERATOR
1789static void ShiftDblGenerate(PRTSTREAM pOut, uint32_t cTests)
1790{
1791 ShiftDblU16Generate(pOut, cTests);
1792 ShiftDblU32Generate(pOut, cTests);
1793 ShiftDblU64Generate(pOut, cTests);
1794}
1795#endif
1796
1797static void ShiftDblTest(void)
1798{
1799 ShiftDblU16Test();
1800 ShiftDblU32Test();
1801 ShiftDblU64Test();
1802}
1803
1804
1805/*
1806 * Unary operators.
1807 *
1808 * Note! We use BINUxx_TEST_T ignoreing uSrcIn and uMisc.
1809 */
1810#ifdef TSTIEMAIMPL_WITH_GENERATOR
1811# define GEN_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType) \
1812void UnaryU ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
1813{ \
1814 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aUnaryU ## a_cBits); iFn++) \
1815 { \
1816 GenerateArrayStart(pOut, g_aUnaryU ## a_cBits[iFn].pszName, #a_TestType); \
1817 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1818 { \
1819 a_TestType Test; \
1820 Test.fEflIn = RandEFlags(); \
1821 Test.fEflOut = Test.fEflIn; \
1822 Test.uDstIn = RandU ## a_cBits(); \
1823 Test.uDstOut = Test.uDstIn; \
1824 Test.uSrcIn = 0; \
1825 Test.uMisc = 0; \
1826 g_aUnaryU ## a_cBits[iFn].pfn(&Test.uDstOut, &Test.fEflOut); \
1827 RTStrmPrintf(pOut, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", 0, 0 }, /* #%u */\n", \
1828 Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, iTest); \
1829 } \
1830 GenerateArrayEnd(pOut, g_aUnaryU ## a_cBits[iFn].pszName); \
1831 } \
1832}
1833#else
1834# define GEN_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType)
1835#endif
1836
1837#define TEST_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType) \
1838TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLUNARYU ## a_cBits); \
1839static a_SubTestType const g_aUnaryU ## a_cBits [] = \
1840{ \
1841 ENTRY(inc_u ## a_cBits), \
1842 ENTRY(inc_u ## a_cBits ## _locked), \
1843 ENTRY(dec_u ## a_cBits), \
1844 ENTRY(dec_u ## a_cBits ## _locked), \
1845 ENTRY(not_u ## a_cBits), \
1846 ENTRY(not_u ## a_cBits ## _locked), \
1847 ENTRY(neg_u ## a_cBits), \
1848 ENTRY(neg_u ## a_cBits ## _locked), \
1849}; \
1850\
1851GEN_UNARY(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType) \
1852\
1853static void UnaryU ## a_cBits ## Test(void) \
1854{ \
1855 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aUnaryU ## a_cBits); iFn++) \
1856 { \
1857 if (!SubTestAndCheckIfEnabled(g_aUnaryU ## a_cBits[iFn].pszName)) continue; \
1858 a_TestType const * const paTests = g_aUnaryU ## a_cBits[iFn].paTests; \
1859 uint32_t const cTests = *g_aUnaryU ## a_cBits[iFn].pcTests; \
1860 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1861 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1862 { \
1863 uint32_t fEfl = paTests[iTest].fEflIn; \
1864 a_Type uDst = paTests[iTest].uDstIn; \
1865 g_aUnaryU ## a_cBits[iFn].pfn(&uDst, &fEfl); \
1866 if ( uDst != paTests[iTest].uDstOut \
1867 || fEfl != paTests[iTest].fEflOut) \
1868 RTTestFailed(g_hTest, "#%u: efl=%#08x dst=" a_Fmt " -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s\n", \
1869 iTest, paTests[iTest].fEflIn, paTests[iTest].uDstIn, \
1870 fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
1871 EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
1872 else \
1873 { \
1874 *g_pu ## a_cBits = paTests[iTest].uDstIn; \
1875 *g_pfEfl = paTests[iTest].fEflIn; \
1876 g_aUnaryU ## a_cBits[iFn].pfn(g_pu ## a_cBits, g_pfEfl); \
1877 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
1878 RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
1879 } \
1880 } \
1881 } \
1882}
1883TEST_UNARY(8, uint8_t, "%#04RX8", BINU8_TEST_T, INT_UNARY_U8_T)
1884TEST_UNARY(16, uint16_t, "%#06RX16", BINU16_TEST_T, INT_UNARY_U16_T)
1885TEST_UNARY(32, uint32_t, "%#010RX32", BINU32_TEST_T, INT_UNARY_U32_T)
1886TEST_UNARY(64, uint64_t, "%#018RX64", BINU64_TEST_T, INT_UNARY_U64_T)
1887
1888#ifdef TSTIEMAIMPL_WITH_GENERATOR
1889static void UnaryGenerate(PRTSTREAM pOut, uint32_t cTests)
1890{
1891 UnaryU8Generate(pOut, cTests);
1892 UnaryU16Generate(pOut, cTests);
1893 UnaryU32Generate(pOut, cTests);
1894 UnaryU64Generate(pOut, cTests);
1895}
1896#endif
1897
1898static void UnaryTest(void)
1899{
1900 UnaryU8Test();
1901 UnaryU16Test();
1902 UnaryU32Test();
1903 UnaryU64Test();
1904}
1905
1906
1907/*
1908 * Shifts.
1909 *
1910 * Note! We use BINUxx_TEST_T with the shift count in uMisc and uSrcIn unused.
1911 */
1912#ifdef TSTIEMAIMPL_WITH_GENERATOR
1913# define GEN_SHIFT(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
1914void ShiftU ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
1915{ \
1916 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
1917 { \
1918 if ( a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE \
1919 && a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
1920 continue; \
1921 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
1922 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1923 { \
1924 a_TestType Test; \
1925 Test.fEflIn = RandEFlags(); \
1926 Test.fEflOut = Test.fEflIn; \
1927 Test.uDstIn = RandU ## a_cBits ## Dst(iTest); \
1928 Test.uDstOut = Test.uDstIn; \
1929 Test.uSrcIn = 0; \
1930 Test.uMisc = RandU8() & (a_cBits * 4 - 1); /* need to go way beyond the a_cBits limit */ \
1931 a_aSubTests[iFn].pfnNative(&Test.uDstOut, Test.uMisc, &Test.fEflOut); \
1932 RTStrmPrintf(pOut, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", 0, %-2u }, /* #%u */\n", \
1933 Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uMisc, iTest); \
1934 \
1935 Test.fEflIn = (~Test.fEflIn & X86_EFL_LIVE_MASK) | X86_EFL_RA1_MASK; \
1936 Test.fEflOut = Test.fEflIn; \
1937 Test.uDstOut = Test.uDstIn; \
1938 a_aSubTests[iFn].pfnNative(&Test.uDstOut, Test.uMisc, &Test.fEflOut); \
1939 RTStrmPrintf(pOut, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", 0, %-2u }, /* #%u b */\n", \
1940 Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uMisc, iTest); \
1941 } \
1942 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
1943 } \
1944}
1945#else
1946# define GEN_SHIFT(a_cBits, a_Fmt, a_TestType, a_aSubTests)
1947#endif
1948
1949#define TEST_SHIFT(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType, a_aSubTests) \
1950TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLSHIFTU ## a_cBits); \
1951static a_SubTestType const a_aSubTests[] = \
1952{ \
1953 ENTRY_AMD( rol_u ## a_cBits, X86_EFL_OF), \
1954 ENTRY_INTEL(rol_u ## a_cBits, X86_EFL_OF), \
1955 ENTRY_AMD( ror_u ## a_cBits, X86_EFL_OF), \
1956 ENTRY_INTEL(ror_u ## a_cBits, X86_EFL_OF), \
1957 ENTRY_AMD( rcl_u ## a_cBits, X86_EFL_OF), \
1958 ENTRY_INTEL(rcl_u ## a_cBits, X86_EFL_OF), \
1959 ENTRY_AMD( rcr_u ## a_cBits, X86_EFL_OF), \
1960 ENTRY_INTEL(rcr_u ## a_cBits, X86_EFL_OF), \
1961 ENTRY_AMD( shl_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
1962 ENTRY_INTEL(shl_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
1963 ENTRY_AMD( shr_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
1964 ENTRY_INTEL(shr_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
1965 ENTRY_AMD( sar_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
1966 ENTRY_INTEL(sar_u ## a_cBits, X86_EFL_OF | X86_EFL_AF), \
1967}; \
1968\
1969GEN_SHIFT(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
1970\
1971static void ShiftU ## a_cBits ## Test(void) \
1972{ \
1973 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
1974 { \
1975 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
1976 PFNIEMAIMPLSHIFTU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
1977 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
1978 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
1979 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
1980 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
1981 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
1982 { \
1983 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
1984 { \
1985 uint32_t fEfl = paTests[iTest].fEflIn; \
1986 a_Type uDst = paTests[iTest].uDstIn; \
1987 pfn(&uDst, paTests[iTest].uMisc, &fEfl); \
1988 if ( uDst != paTests[iTest].uDstOut \
1989 || fEfl != paTests[iTest].fEflOut ) \
1990 RTTestFailed(g_hTest, "#%u%s: efl=%#08x dst=" a_Fmt " shift=%2u -> efl=%#08x dst=" a_Fmt ", expected %#08x & " a_Fmt "%s\n", \
1991 iTest, iVar == 0 ? "" : "/n", \
1992 paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uMisc, \
1993 fEfl, uDst, paTests[iTest].fEflOut, paTests[iTest].uDstOut, \
1994 EFlagsDiff(fEfl, paTests[iTest].fEflOut)); \
1995 else \
1996 { \
1997 *g_pu ## a_cBits = paTests[iTest].uDstIn; \
1998 *g_pfEfl = paTests[iTest].fEflIn; \
1999 pfn(g_pu ## a_cBits, paTests[iTest].uMisc, g_pfEfl); \
2000 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDstOut); \
2001 RTTEST_CHECK(g_hTest, *g_pfEfl == paTests[iTest].fEflOut); \
2002 } \
2003 } \
2004 pfn = a_aSubTests[iFn].pfnNative; \
2005 } \
2006 } \
2007}
2008TEST_SHIFT(8, uint8_t, "%#04RX8", BINU8_TEST_T, INT_BINARY_U8_T, g_aShiftU8)
2009TEST_SHIFT(16, uint16_t, "%#06RX16", BINU16_TEST_T, INT_BINARY_U16_T, g_aShiftU16)
2010TEST_SHIFT(32, uint32_t, "%#010RX32", BINU32_TEST_T, INT_BINARY_U32_T, g_aShiftU32)
2011TEST_SHIFT(64, uint64_t, "%#018RX64", BINU64_TEST_T, INT_BINARY_U64_T, g_aShiftU64)
2012
2013#ifdef TSTIEMAIMPL_WITH_GENERATOR
2014static void ShiftGenerate(PRTSTREAM pOut, uint32_t cTests)
2015{
2016 ShiftU8Generate(pOut, cTests);
2017 ShiftU16Generate(pOut, cTests);
2018 ShiftU32Generate(pOut, cTests);
2019 ShiftU64Generate(pOut, cTests);
2020}
2021#endif
2022
2023static void ShiftTest(void)
2024{
2025 ShiftU8Test();
2026 ShiftU16Test();
2027 ShiftU32Test();
2028 ShiftU64Test();
2029}
2030
2031
2032/*
2033 * Multiplication and division.
2034 *
2035 * Note! The 8-bit functions has a different format, so we need to duplicate things.
2036 * Note! Currently ignoring undefined bits.
2037 */
2038
2039/* U8 */
2040TYPEDEF_SUBTEST_TYPE(INT_MULDIV_U8_T, MULDIVU8_TEST_T, PFNIEMAIMPLMULDIVU8);
2041static INT_MULDIV_U8_T const g_aMulDivU8[] =
2042{
2043 ENTRY_AMD_EX(mul_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF,
2044 X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF),
2045 ENTRY_INTEL_EX(mul_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0),
2046 ENTRY_AMD_EX(imul_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF,
2047 X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF),
2048 ENTRY_INTEL_EX(imul_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0),
2049 ENTRY_AMD_EX(div_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0),
2050 ENTRY_INTEL_EX(div_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0),
2051 ENTRY_AMD_EX(idiv_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0),
2052 ENTRY_INTEL_EX(idiv_u8, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0),
2053};
2054
2055#ifdef TSTIEMAIMPL_WITH_GENERATOR
2056static void MulDivU8Generate(PRTSTREAM pOut, uint32_t cTests)
2057{
2058 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aMulDivU8); iFn++)
2059 {
2060 if ( g_aMulDivU8[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE
2061 && g_aMulDivU8[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
2062 continue;
2063 GenerateArrayStart(pOut, g_aMulDivU8[iFn].pszName, "MULDIVU8_TEST_T"); \
2064 for (uint32_t iTest = 0; iTest < cTests; iTest++ )
2065 {
2066 MULDIVU8_TEST_T Test;
2067 Test.fEflIn = RandEFlags();
2068 Test.fEflOut = Test.fEflIn;
2069 Test.uDstIn = RandU16Dst(iTest);
2070 Test.uDstOut = Test.uDstIn;
2071 Test.uSrcIn = RandU8Src(iTest);
2072 Test.rc = g_aMulDivU8[iFn].pfnNative(&Test.uDstOut, Test.uSrcIn, &Test.fEflOut);
2073 RTStrmPrintf(pOut, " { %#08x, %#08x, %#06RX16, %#06RX16, %#04RX8, %d }, /* #%u */\n",
2074 Test.fEflIn, Test.fEflOut, Test.uDstIn, Test.uDstOut, Test.uSrcIn, Test.rc, iTest);
2075 }
2076 GenerateArrayEnd(pOut, g_aMulDivU8[iFn].pszName);
2077 }
2078}
2079#endif
2080
2081static void MulDivU8Test(void)
2082{
2083 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aMulDivU8); iFn++)
2084 {
2085 if (!SubTestAndCheckIfEnabled(g_aMulDivU8[iFn].pszName)) continue; \
2086 MULDIVU8_TEST_T const * const paTests = g_aMulDivU8[iFn].paTests;
2087 uint32_t const cTests = *g_aMulDivU8[iFn].pcTests;
2088 uint32_t const fEflIgn = g_aMulDivU8[iFn].uExtra;
2089 PFNIEMAIMPLMULDIVU8 pfn = g_aMulDivU8[iFn].pfn;
2090 uint32_t const cVars = COUNT_VARIATIONS(g_aMulDivU8[iFn]); \
2091 if (!cTests) RTTestSkipped(g_hTest, "no tests");
2092 for (uint32_t iVar = 0; iVar < cVars; iVar++)
2093 {
2094 for (uint32_t iTest = 0; iTest < cTests; iTest++ )
2095 {
2096 uint32_t fEfl = paTests[iTest].fEflIn;
2097 uint16_t uDst = paTests[iTest].uDstIn;
2098 int rc = g_aMulDivU8[iFn].pfn(&uDst, paTests[iTest].uSrcIn, &fEfl);
2099 if ( uDst != paTests[iTest].uDstOut
2100 || (fEfl | fEflIgn) != (paTests[iTest].fEflOut | fEflIgn)
2101 || rc != paTests[iTest].rc)
2102 RTTestFailed(g_hTest, "#%02u%s: efl=%#08x dst=%#06RX16 src=%#04RX8\n"
2103 " %s-> efl=%#08x dst=%#06RX16 rc=%d\n"
2104 "%sexpected %#08x %#06RX16 %d%s\n",
2105 iTest, iVar ? "/n" : "", paTests[iTest].fEflIn, paTests[iTest].uDstIn, paTests[iTest].uSrcIn,
2106 iVar ? " " : "", fEfl, uDst, rc,
2107 iVar ? " " : "", paTests[iTest].fEflOut, paTests[iTest].uDstOut, paTests[iTest].rc,
2108 EFlagsDiff(fEfl | fEflIgn, paTests[iTest].fEflOut | fEflIgn));
2109 else
2110 {
2111 *g_pu16 = paTests[iTest].uDstIn;
2112 *g_pfEfl = paTests[iTest].fEflIn;
2113 rc = g_aMulDivU8[iFn].pfn(g_pu16, paTests[iTest].uSrcIn, g_pfEfl);
2114 RTTEST_CHECK(g_hTest, *g_pu16 == paTests[iTest].uDstOut);
2115 RTTEST_CHECK(g_hTest, (*g_pfEfl | fEflIgn) == (paTests[iTest].fEflOut | fEflIgn));
2116 RTTEST_CHECK(g_hTest, rc == paTests[iTest].rc);
2117 }
2118 }
2119 pfn = g_aMulDivU8[iFn].pfnNative;
2120 }
2121 }
2122}
2123
2124#ifdef TSTIEMAIMPL_WITH_GENERATOR
2125# define GEN_MULDIV(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
2126void MulDivU ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
2127{ \
2128 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2129 { \
2130 if ( a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE \
2131 && a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
2132 continue; \
2133 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
2134 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2135 { \
2136 a_TestType Test; \
2137 Test.fEflIn = RandEFlags(); \
2138 Test.fEflOut = Test.fEflIn; \
2139 Test.uDst1In = RandU ## a_cBits ## Dst(iTest); \
2140 Test.uDst1Out = Test.uDst1In; \
2141 Test.uDst2In = RandU ## a_cBits ## Dst(iTest); \
2142 Test.uDst2Out = Test.uDst2In; \
2143 Test.uSrcIn = RandU ## a_cBits ## Src(iTest); \
2144 Test.rc = a_aSubTests[iFn].pfnNative(&Test.uDst1Out, &Test.uDst2Out, Test.uSrcIn, &Test.fEflOut); \
2145 RTStrmPrintf(pOut, " { %#08x, %#08x, " a_Fmt ", " a_Fmt ", " a_Fmt ", " a_Fmt ", " a_Fmt ", %d }, /* #%u */\n", \
2146 Test.fEflIn, Test.fEflOut, Test.uDst1In, Test.uDst1Out, Test.uDst2In, Test.uDst2Out, Test.uSrcIn, \
2147 Test.rc, iTest); \
2148 } \
2149 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
2150 } \
2151}
2152#else
2153# define GEN_MULDIV(a_cBits, a_Fmt, a_TestType, a_aSubTests)
2154#endif
2155
2156#define TEST_MULDIV(a_cBits, a_Type, a_Fmt, a_TestType, a_SubTestType, a_aSubTests) \
2157TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLMULDIVU ## a_cBits); \
2158static a_SubTestType const a_aSubTests [] = \
2159{ \
2160 ENTRY_AMD_EX(mul_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0), \
2161 ENTRY_INTEL_EX(mul_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0), \
2162 ENTRY_AMD_EX(imul_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0), \
2163 ENTRY_INTEL_EX(imul_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, 0), \
2164 ENTRY_AMD_EX(div_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0), \
2165 ENTRY_INTEL_EX(div_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0), \
2166 ENTRY_AMD_EX(idiv_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0), \
2167 ENTRY_INTEL_EX(idiv_u ## a_cBits, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF | X86_EFL_OF, 0), \
2168}; \
2169\
2170GEN_MULDIV(a_cBits, a_Fmt, a_TestType, a_aSubTests) \
2171\
2172static void MulDivU ## a_cBits ## Test(void) \
2173{ \
2174 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2175 { \
2176 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
2177 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2178 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2179 uint32_t const fEflIgn = a_aSubTests[iFn].uExtra; \
2180 PFNIEMAIMPLMULDIVU ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2181 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2182 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2183 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2184 { \
2185 for (uint32_t iTest = 0; iTest < cTests; iTest++ ) \
2186 { \
2187 uint32_t fEfl = paTests[iTest].fEflIn; \
2188 a_Type uDst1 = paTests[iTest].uDst1In; \
2189 a_Type uDst2 = paTests[iTest].uDst2In; \
2190 int rc = pfn(&uDst1, &uDst2, paTests[iTest].uSrcIn, &fEfl); \
2191 if ( uDst1 != paTests[iTest].uDst1Out \
2192 || uDst2 != paTests[iTest].uDst2Out \
2193 || (fEfl | fEflIgn) != (paTests[iTest].fEflOut | fEflIgn)\
2194 || rc != paTests[iTest].rc) \
2195 RTTestFailed(g_hTest, "#%02u%s: efl=%#08x dst1=" a_Fmt " dst2=" a_Fmt " src=" a_Fmt "\n" \
2196 " -> efl=%#08x dst1=" a_Fmt " dst2=" a_Fmt " rc=%d\n" \
2197 "expected %#08x " a_Fmt " " a_Fmt " %d%s -%s%s%s\n", \
2198 iTest, iVar == 0 ? "" : "/n", \
2199 paTests[iTest].fEflIn, paTests[iTest].uDst1In, paTests[iTest].uDst2In, paTests[iTest].uSrcIn, \
2200 fEfl, uDst1, uDst2, rc, \
2201 paTests[iTest].fEflOut, paTests[iTest].uDst1Out, paTests[iTest].uDst2Out, paTests[iTest].rc, \
2202 EFlagsDiff(fEfl | fEflIgn, paTests[iTest].fEflOut | fEflIgn), \
2203 uDst1 != paTests[iTest].uDst1Out ? " dst1" : "", uDst2 != paTests[iTest].uDst2Out ? " dst2" : "", \
2204 (fEfl | fEflIgn) != (paTests[iTest].fEflOut | fEflIgn) ? " eflags" : ""); \
2205 else \
2206 { \
2207 *g_pu ## a_cBits = paTests[iTest].uDst1In; \
2208 *g_pu ## a_cBits ## Two = paTests[iTest].uDst2In; \
2209 *g_pfEfl = paTests[iTest].fEflIn; \
2210 rc = pfn(g_pu ## a_cBits, g_pu ## a_cBits ## Two, paTests[iTest].uSrcIn, g_pfEfl); \
2211 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits == paTests[iTest].uDst1Out); \
2212 RTTEST_CHECK(g_hTest, *g_pu ## a_cBits ## Two == paTests[iTest].uDst2Out); \
2213 RTTEST_CHECK(g_hTest, (*g_pfEfl | fEflIgn) == (paTests[iTest].fEflOut | fEflIgn)); \
2214 RTTEST_CHECK(g_hTest, rc == paTests[iTest].rc); \
2215 } \
2216 } \
2217 pfn = a_aSubTests[iFn].pfnNative; \
2218 } \
2219 } \
2220}
2221TEST_MULDIV(16, uint16_t, "%#06RX16", MULDIVU16_TEST_T, INT_MULDIV_U16_T, g_aMulDivU16)
2222TEST_MULDIV(32, uint32_t, "%#010RX32", MULDIVU32_TEST_T, INT_MULDIV_U32_T, g_aMulDivU32)
2223TEST_MULDIV(64, uint64_t, "%#018RX64", MULDIVU64_TEST_T, INT_MULDIV_U64_T, g_aMulDivU64)
2224
2225#ifdef TSTIEMAIMPL_WITH_GENERATOR
2226static void MulDivGenerate(PRTSTREAM pOut, uint32_t cTests)
2227{
2228 MulDivU8Generate(pOut, cTests);
2229 MulDivU16Generate(pOut, cTests);
2230 MulDivU32Generate(pOut, cTests);
2231 MulDivU64Generate(pOut, cTests);
2232}
2233#endif
2234
2235static void MulDivTest(void)
2236{
2237 MulDivU8Test();
2238 MulDivU16Test();
2239 MulDivU32Test();
2240 MulDivU64Test();
2241}
2242
2243
2244/*
2245 * BSWAP
2246 */
2247static void BswapTest(void)
2248{
2249 if (SubTestAndCheckIfEnabled("bswap_u16"))
2250 {
2251 *g_pu32 = UINT32_C(0x12345678);
2252 iemAImpl_bswap_u16(g_pu32);
2253#if 0
2254 RTTEST_CHECK_MSG(g_hTest, *g_pu32 == UINT32_C(0x12347856), (g_hTest, "*g_pu32=%#RX32\n", *g_pu32));
2255#else
2256 RTTEST_CHECK_MSG(g_hTest, *g_pu32 == UINT32_C(0x12340000), (g_hTest, "*g_pu32=%#RX32\n", *g_pu32));
2257#endif
2258 *g_pu32 = UINT32_C(0xffff1122);
2259 iemAImpl_bswap_u16(g_pu32);
2260#if 0
2261 RTTEST_CHECK_MSG(g_hTest, *g_pu32 == UINT32_C(0xffff2211), (g_hTest, "*g_pu32=%#RX32\n", *g_pu32));
2262#else
2263 RTTEST_CHECK_MSG(g_hTest, *g_pu32 == UINT32_C(0xffff0000), (g_hTest, "*g_pu32=%#RX32\n", *g_pu32));
2264#endif
2265 }
2266
2267 if (SubTestAndCheckIfEnabled("bswap_u32"))
2268 {
2269 *g_pu32 = UINT32_C(0x12345678);
2270 iemAImpl_bswap_u32(g_pu32);
2271 RTTEST_CHECK(g_hTest, *g_pu32 == UINT32_C(0x78563412));
2272 }
2273
2274 if (SubTestAndCheckIfEnabled("bswap_u64"))
2275 {
2276 *g_pu64 = UINT64_C(0x0123456789abcdef);
2277 iemAImpl_bswap_u64(g_pu64);
2278 RTTEST_CHECK(g_hTest, *g_pu64 == UINT64_C(0xefcdab8967452301));
2279 }
2280}
2281
2282
2283
2284/*********************************************************************************************************************************
2285* Floating point (x87 style) *
2286*********************************************************************************************************************************/
2287
2288/*
2289 * FPU constant loading.
2290 */
2291TYPEDEF_SUBTEST_TYPE(FPU_LD_CONST_T, FPU_LD_CONST_TEST_T, PFNIEMAIMPLFPUR80LDCONST);
2292
2293static const FPU_LD_CONST_T g_aFpuLdConst[] =
2294{
2295 ENTRY(fld1),
2296 ENTRY(fldl2t),
2297 ENTRY(fldl2e),
2298 ENTRY(fldpi),
2299 ENTRY(fldlg2),
2300 ENTRY(fldln2),
2301 ENTRY(fldz),
2302};
2303
2304#ifdef TSTIEMAIMPL_WITH_GENERATOR
2305static void FpuLdConstGenerate(PRTSTREAM pOut, uint32_t cTests)
2306{
2307 X86FXSTATE State;
2308 RT_ZERO(State);
2309 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdConst); iFn++)
2310 {
2311 GenerateArrayStart(pOut, g_aFpuLdConst[iFn].pszName, "FPU_LD_CONST_TEST_T");
2312 for (uint32_t iTest = 0; iTest < cTests; iTest += 4)
2313 {
2314 State.FCW = RandFcw();
2315 State.FSW = RandFsw();
2316
2317 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
2318 {
2319 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
2320 State.FCW = (State.FCW & ~X86_FCW_RC_MASK) | (iRounding << X86_FCW_RC_SHIFT);
2321 g_aFpuLdConst[iFn].pfn(&State, &Res);
2322 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s }, /* #%u */\n",
2323 State.FCW, State.FSW, Res.FSW, GenFormatR80(&Res.r80Result), iTest + iRounding);
2324 }
2325 }
2326 GenerateArrayEnd(pOut, g_aFpuLdConst[iFn].pszName);
2327 }
2328}
2329#endif
2330
2331static void FpuLoadConstTest(void)
2332{
2333 /*
2334 * Inputs:
2335 * - FSW: C0, C1, C2, C3
2336 * - FCW: Exception masks, Precision control, Rounding control.
2337 *
2338 * C1 set to 1 on stack overflow, zero otherwise. C0, C2, and C3 are "undefined".
2339 */
2340 X86FXSTATE State;
2341 RT_ZERO(State);
2342 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdConst); iFn++)
2343 {
2344 if (!SubTestAndCheckIfEnabled(g_aFpuLdConst[iFn].pszName))
2345 continue;
2346
2347 uint32_t const cTests = *g_aFpuLdConst[iFn].pcTests;
2348 FPU_LD_CONST_TEST_T const *paTests = g_aFpuLdConst[iFn].paTests;
2349 PFNIEMAIMPLFPUR80LDCONST pfn = g_aFpuLdConst[iFn].pfn;
2350 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuLdConst[iFn]); \
2351 if (!cTests) RTTestSkipped(g_hTest, "no tests");
2352 for (uint32_t iVar = 0; iVar < cVars; iVar++)
2353 {
2354 for (uint32_t iTest = 0; iTest < cTests; iTest++)
2355 {
2356 State.FCW = paTests[iTest].fFcw;
2357 State.FSW = paTests[iTest].fFswIn;
2358 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
2359 pfn(&State, &Res);
2360 if ( Res.FSW != paTests[iTest].fFswOut
2361 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult))
2362 RTTestFailed(g_hTest, "#%u%s: fcw=%#06x fsw=%#06x -> fsw=%#06x %s, expected %#06x %s%s%s (%s)\n",
2363 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
2364 Res.FSW, FormatR80(&Res.r80Result),
2365 paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult),
2366 FswDiff(Res.FSW, paTests[iTest].fFswOut),
2367 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "",
2368 FormatFcw(paTests[iTest].fFcw) );
2369 }
2370 pfn = g_aFpuLdConst[iFn].pfnNative;
2371 }
2372 }
2373}
2374
2375
2376/*
2377 * Load floating point values from memory.
2378 */
2379#ifdef TSTIEMAIMPL_WITH_GENERATOR
2380# define GEN_FPU_LOAD(a_cBits, a_rdTypeIn, a_aSubTests, a_TestType) \
2381static void FpuLdR ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
2382{ \
2383 X86FXSTATE State; \
2384 RT_ZERO(State); \
2385 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2386 { \
2387 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
2388 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2389 { \
2390 State.FCW = RandFcw(); \
2391 State.FSW = RandFsw(); \
2392 a_rdTypeIn InVal = RandR ## a_cBits ## Src(iTest); \
2393 \
2394 for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
2395 { \
2396 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
2397 State.FCW = (State.FCW & ~X86_FCW_RC_MASK) | (iRounding << X86_FCW_RC_SHIFT); \
2398 a_aSubTests[iFn].pfn(&State, &Res, &InVal); \
2399 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u */\n", \
2400 State.FCW, State.FSW, Res.FSW, GenFormatR80(&Res.r80Result), \
2401 GenFormatR ## a_cBits(&InVal), iTest, iRounding); \
2402 } \
2403 } \
2404 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
2405 } \
2406}
2407#else
2408# define GEN_FPU_LOAD(a_cBits, a_rdTypeIn, a_aSubTests, a_TestType)
2409#endif
2410
2411#define TEST_FPU_LOAD(a_cBits, a_rdTypeIn, a_SubTestType, a_aSubTests, a_TestType) \
2412typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPULDR80FROM ## a_cBits,(PCX86FXSTATE, PIEMFPURESULT, PC ## a_rdTypeIn)); \
2413typedef FNIEMAIMPLFPULDR80FROM ## a_cBits *PFNIEMAIMPLFPULDR80FROM ## a_cBits; \
2414TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPULDR80FROM ## a_cBits); \
2415\
2416static const a_SubTestType a_aSubTests[] = \
2417{ \
2418 ENTRY(RT_CONCAT(fld_r80_from_r,a_cBits)) \
2419}; \
2420GEN_FPU_LOAD(a_cBits, a_rdTypeIn, a_aSubTests, a_TestType) \
2421\
2422static void FpuLdR ## a_cBits ## Test(void) \
2423{ \
2424 X86FXSTATE State; \
2425 RT_ZERO(State); \
2426 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2427 { \
2428 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
2429 \
2430 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2431 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2432 PFNIEMAIMPLFPULDR80FROM ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2433 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2434 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2435 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2436 { \
2437 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2438 { \
2439 a_rdTypeIn const InVal = paTests[iTest].InVal; \
2440 State.FCW = paTests[iTest].fFcw; \
2441 State.FSW = paTests[iTest].fFswIn; \
2442 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
2443 pfn(&State, &Res, &InVal); \
2444 if ( Res.FSW != paTests[iTest].fFswOut \
2445 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult)) \
2446 RTTestFailed(g_hTest, "#%03u%s: fcw=%#06x fsw=%#06x in=%s\n" \
2447 "%s -> fsw=%#06x %s\n" \
2448 "%s expected %#06x %s%s%s (%s)\n", \
2449 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
2450 FormatR ## a_cBits(&paTests[iTest].InVal), \
2451 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result), \
2452 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult), \
2453 FswDiff(Res.FSW, paTests[iTest].fFswOut), \
2454 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "", \
2455 FormatFcw(paTests[iTest].fFcw) ); \
2456 } \
2457 pfn = a_aSubTests[iFn].pfnNative; \
2458 } \
2459 } \
2460}
2461
2462TEST_FPU_LOAD(80, RTFLOAT80U, FPU_LD_R80_T, g_aFpuLdR80, FPU_R80_IN_TEST_T)
2463TEST_FPU_LOAD(64, RTFLOAT64U, FPU_LD_R64_T, g_aFpuLdR64, FPU_R64_IN_TEST_T)
2464TEST_FPU_LOAD(32, RTFLOAT32U, FPU_LD_R32_T, g_aFpuLdR32, FPU_R32_IN_TEST_T)
2465
2466#ifdef TSTIEMAIMPL_WITH_GENERATOR
2467static void FpuLdMemGenerate(PRTSTREAM pOut, uint32_t cTests)
2468{
2469 FpuLdR80Generate(pOut, cTests);
2470 FpuLdR64Generate(pOut, cTests);
2471 FpuLdR32Generate(pOut, cTests);
2472}
2473#endif
2474
2475static void FpuLdMemTest(void)
2476{
2477 FpuLdR80Test();
2478 FpuLdR64Test();
2479 FpuLdR32Test();
2480}
2481
2482
2483/*
2484 * Load integer values from memory.
2485 */
2486#ifdef TSTIEMAIMPL_WITH_GENERATOR
2487# define GEN_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_aSubTests, a_TestType) \
2488static void FpuLdI ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
2489{ \
2490 X86FXSTATE State; \
2491 RT_ZERO(State); \
2492 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2493 { \
2494 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
2495 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2496 { \
2497 State.FCW = RandFcw(); \
2498 State.FSW = RandFsw(); \
2499 a_iTypeIn InVal = (a_iTypeIn)RandU ## a_cBits ## Src(iTest); \
2500 \
2501 for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
2502 { \
2503 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
2504 State.FCW = (State.FCW & ~X86_FCW_RC_MASK) | (iRounding << X86_FCW_RC_SHIFT); \
2505 a_aSubTests[iFn].pfn(&State, &Res, &InVal); \
2506 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, " a_szFmtIn " }, /* #%u/%u */\n", \
2507 State.FCW, State.FSW, Res.FSW, GenFormatR80(&Res.r80Result), InVal, iTest, iRounding); \
2508 } \
2509 } \
2510 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
2511 } \
2512}
2513#else
2514# define GEN_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_aSubTests, a_TestType)
2515#endif
2516
2517#define TEST_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_SubTestType, a_aSubTests, a_TestType) \
2518typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPULDR80FROMI ## a_cBits,(PCX86FXSTATE, PIEMFPURESULT, a_iTypeIn const *)); \
2519typedef FNIEMAIMPLFPULDR80FROMI ## a_cBits *PFNIEMAIMPLFPULDR80FROMI ## a_cBits; \
2520TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPULDR80FROMI ## a_cBits); \
2521\
2522static const a_SubTestType a_aSubTests[] = \
2523{ \
2524 ENTRY(RT_CONCAT(fild_r80_from_i,a_cBits)) \
2525}; \
2526GEN_FPU_LOAD_INT(a_cBits, a_iTypeIn, a_szFmtIn, a_aSubTests, a_TestType) \
2527\
2528static void FpuLdI ## a_cBits ## Test(void) \
2529{ \
2530 X86FXSTATE State; \
2531 RT_ZERO(State); \
2532 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2533 { \
2534 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
2535 \
2536 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2537 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2538 PFNIEMAIMPLFPULDR80FROMI ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2539 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2540 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2541 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2542 { \
2543 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2544 { \
2545 a_iTypeIn const iInVal = paTests[iTest].iInVal; \
2546 State.FCW = paTests[iTest].fFcw; \
2547 State.FSW = paTests[iTest].fFswIn; \
2548 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
2549 pfn(&State, &Res, &iInVal); \
2550 if ( Res.FSW != paTests[iTest].fFswOut \
2551 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult)) \
2552 RTTestFailed(g_hTest, "#%03u%s: fcw=%#06x fsw=%#06x in=" a_szFmtIn "\n" \
2553 "%s -> fsw=%#06x %s\n" \
2554 "%s expected %#06x %s%s%s (%s)\n", \
2555 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, paTests[iTest].iInVal, \
2556 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result), \
2557 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult), \
2558 FswDiff(Res.FSW, paTests[iTest].fFswOut), \
2559 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "", \
2560 FormatFcw(paTests[iTest].fFcw) ); \
2561 } \
2562 pfn = a_aSubTests[iFn].pfnNative; \
2563 } \
2564 } \
2565}
2566
2567TEST_FPU_LOAD_INT(64, int64_t, "%RI64", FPU_LD_I64_T, g_aFpuLdU64, FPU_I64_IN_TEST_T)
2568TEST_FPU_LOAD_INT(32, int32_t, "%RI32", FPU_LD_I32_T, g_aFpuLdU32, FPU_I32_IN_TEST_T)
2569TEST_FPU_LOAD_INT(16, int16_t, "%RI16", FPU_LD_I16_T, g_aFpuLdU16, FPU_I16_IN_TEST_T)
2570
2571#ifdef TSTIEMAIMPL_WITH_GENERATOR
2572static void FpuLdIntGenerate(PRTSTREAM pOut, uint32_t cTests)
2573{
2574 FpuLdI64Generate(pOut, cTests);
2575 FpuLdI32Generate(pOut, cTests);
2576 FpuLdI16Generate(pOut, cTests);
2577}
2578#endif
2579
2580static void FpuLdIntTest(void)
2581{
2582 FpuLdI64Test();
2583 FpuLdI32Test();
2584 FpuLdI16Test();
2585}
2586
2587
2588/*
2589 * Load binary coded decimal values from memory.
2590 */
2591typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPULDR80FROMD80,(PCX86FXSTATE, PIEMFPURESULT, PCRTPBCD80U));
2592typedef FNIEMAIMPLFPULDR80FROMD80 *PFNIEMAIMPLFPULDR80FROMD80;
2593TYPEDEF_SUBTEST_TYPE(FPU_LD_D80_T, FPU_D80_IN_TEST_T, PFNIEMAIMPLFPULDR80FROMD80);
2594
2595static const FPU_LD_D80_T g_aFpuLdD80[] =
2596{
2597 ENTRY(fld_r80_from_d80)
2598};
2599
2600#ifdef TSTIEMAIMPL_WITH_GENERATOR
2601static void FpuLdD80Generate(PRTSTREAM pOut, uint32_t cTests)
2602{
2603 X86FXSTATE State;
2604 RT_ZERO(State);
2605 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdD80); iFn++)
2606 {
2607 GenerateArrayStart(pOut, g_aFpuLdD80[iFn].pszName, "FPU_D80_IN_TEST_T");
2608 for (uint32_t iTest = 0; iTest < cTests; iTest++)
2609 {
2610 State.FCW = RandFcw();
2611 State.FSW = RandFsw();
2612 RTPBCD80U InVal = RandD80Src(iTest);
2613
2614 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
2615 {
2616 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
2617 State.FCW = (State.FCW & ~X86_FCW_RC_MASK) | (iRounding << X86_FCW_RC_SHIFT);
2618 g_aFpuLdD80[iFn].pfn(&State, &Res, &InVal);
2619 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u */\n",
2620 State.FCW, State.FSW, Res.FSW, GenFormatR80(&Res.r80Result), GenFormatD80(&InVal),
2621 iTest, iRounding);
2622 }
2623 }
2624 GenerateArrayEnd(pOut, g_aFpuLdD80[iFn].pszName);
2625 }
2626}
2627#endif
2628
2629static void FpuLdD80Test(void)
2630{
2631 X86FXSTATE State;
2632 RT_ZERO(State);
2633 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuLdD80); iFn++)
2634 {
2635 if (!SubTestAndCheckIfEnabled(g_aFpuLdD80[iFn].pszName))
2636 continue;
2637
2638 uint32_t const cTests = *g_aFpuLdD80[iFn].pcTests;
2639 FPU_D80_IN_TEST_T const * const paTests = g_aFpuLdD80[iFn].paTests;
2640 PFNIEMAIMPLFPULDR80FROMD80 pfn = g_aFpuLdD80[iFn].pfn;
2641 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuLdD80[iFn]);
2642 if (!cTests) RTTestSkipped(g_hTest, "no tests");
2643 for (uint32_t iVar = 0; iVar < cVars; iVar++)
2644 {
2645 for (uint32_t iTest = 0; iTest < cTests; iTest++)
2646 {
2647 RTPBCD80U const InVal = paTests[iTest].InVal;
2648 State.FCW = paTests[iTest].fFcw;
2649 State.FSW = paTests[iTest].fFswIn;
2650 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
2651 pfn(&State, &Res, &InVal);
2652 if ( Res.FSW != paTests[iTest].fFswOut
2653 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult))
2654 RTTestFailed(g_hTest, "#%03u%s: fcw=%#06x fsw=%#06x in=%s\n"
2655 "%s -> fsw=%#06x %s\n"
2656 "%s expected %#06x %s%s%s (%s)\n",
2657 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
2658 FormatD80(&paTests[iTest].InVal),
2659 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result),
2660 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].rdResult),
2661 FswDiff(Res.FSW, paTests[iTest].fFswOut),
2662 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].rdResult) ? " - val" : "",
2663 FormatFcw(paTests[iTest].fFcw) );
2664 }
2665 pfn = g_aFpuLdD80[iFn].pfnNative;
2666 }
2667 }
2668}
2669
2670
2671/*
2672 * Store values floating point values to memory.
2673 */
2674#ifdef TSTIEMAIMPL_WITH_GENERATOR
2675static const RTFLOAT80U g_aFpuStR32Specials[] =
2676{
2677 RTFLOAT80U_INIT_C(0, 0xffffff8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
2678 RTFLOAT80U_INIT_C(1, 0xffffff8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
2679 RTFLOAT80U_INIT_C(0, 0xfffffe8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding */
2680 RTFLOAT80U_INIT_C(1, 0xfffffe8000000000, RTFLOAT80U_EXP_BIAS), /* near rounding */
2681};
2682static const RTFLOAT80U g_aFpuStR64Specials[] =
2683{
2684 RTFLOAT80U_INIT_C(0, 0xfffffffffffffc00, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
2685 RTFLOAT80U_INIT_C(1, 0xfffffffffffffc00, RTFLOAT80U_EXP_BIAS), /* near rounding with carry */
2686 RTFLOAT80U_INIT_C(0, 0xfffffffffffff400, RTFLOAT80U_EXP_BIAS), /* near rounding */
2687 RTFLOAT80U_INIT_C(1, 0xfffffffffffff400, RTFLOAT80U_EXP_BIAS), /* near rounding */
2688 RTFLOAT80U_INIT_C(0, 0xd0b9e6fdda887400, 687 + RTFLOAT80U_EXP_BIAS), /* random example for this */
2689};
2690static const RTFLOAT80U g_aFpuStR80Specials[] =
2691{
2692 RTFLOAT80U_INIT_C(0, 0x8000000000000000, RTFLOAT80U_EXP_BIAS), /* placeholder */
2693};
2694# define GEN_FPU_STORE(a_cBits, a_rdType, a_aSubTests, a_TestType) \
2695static void FpuStR ## a_cBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
2696{ \
2697 uint32_t const cTotalTests = cTests + RT_ELEMENTS(g_aFpuStR ## a_cBits ## Specials); \
2698 X86FXSTATE State; \
2699 RT_ZERO(State); \
2700 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2701 { \
2702 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
2703 for (uint32_t iTest = 0; iTest < cTotalTests; iTest++) \
2704 { \
2705 uint16_t const fFcw = RandFcw(); \
2706 State.FSW = RandFsw(); \
2707 RTFLOAT80U const InVal = iTest < cTests ? RandR80Src(iTest, a_cBits) \
2708 : g_aFpuStR ## a_cBits ## Specials[iTest - cTests]; \
2709 \
2710 for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
2711 { \
2712 /* PC doesn't influence these, so leave as is. */ \
2713 AssertCompile(X86_FCW_OM_BIT + 1 == X86_FCW_UM_BIT && X86_FCW_UM_BIT + 1 == X86_FCW_PM_BIT); \
2714 for (uint16_t iMask = 0; iMask < 16; iMask += 2 /*1*/) \
2715 { \
2716 uint16_t uFswOut = 0; \
2717 a_rdType OutVal; \
2718 RT_ZERO(OutVal); \
2719 memset(&OutVal, 0xfe, sizeof(OutVal)); \
2720 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_OM | X86_FCW_UM | X86_FCW_PM)) \
2721 | (iRounding << X86_FCW_RC_SHIFT); \
2722 /*if (iMask & 1) State.FCW ^= X86_FCW_MASK_ALL;*/ \
2723 State.FCW |= (iMask >> 1) << X86_FCW_OM_BIT; \
2724 a_aSubTests[iFn].pfn(&State, &uFswOut, &OutVal, &InVal); \
2725 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u */\n", \
2726 State.FCW, State.FSW, uFswOut, GenFormatR80(&InVal), \
2727 GenFormatR ## a_cBits(&OutVal), iTest, iRounding, iMask); \
2728 } \
2729 } \
2730 } \
2731 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
2732 } \
2733}
2734#else
2735# define GEN_FPU_STORE(a_cBits, a_rdType, a_aSubTests, a_TestType)
2736#endif
2737
2738#define TEST_FPU_STORE(a_cBits, a_rdType, a_SubTestType, a_aSubTests, a_TestType) \
2739typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPUSTR80TOR ## a_cBits,(PCX86FXSTATE, uint16_t *, \
2740 PRTFLOAT ## a_cBits ## U, PCRTFLOAT80U)); \
2741typedef FNIEMAIMPLFPUSTR80TOR ## a_cBits *PFNIEMAIMPLFPUSTR80TOR ## a_cBits; \
2742TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPUSTR80TOR ## a_cBits); \
2743\
2744static const a_SubTestType a_aSubTests[] = \
2745{ \
2746 ENTRY(RT_CONCAT(fst_r80_to_r,a_cBits)) \
2747}; \
2748GEN_FPU_STORE(a_cBits, a_rdType, a_aSubTests, a_TestType) \
2749\
2750static void FpuStR ## a_cBits ## Test(void) \
2751{ \
2752 X86FXSTATE State; \
2753 RT_ZERO(State); \
2754 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2755 { \
2756 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
2757 \
2758 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2759 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2760 PFNIEMAIMPLFPUSTR80TOR ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2761 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2762 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2763 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2764 { \
2765 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
2766 { \
2767 RTFLOAT80U const InVal = paTests[iTest].InVal; \
2768 uint16_t uFswOut = 0; \
2769 a_rdType OutVal; \
2770 RT_ZERO(OutVal); \
2771 memset(&OutVal, 0xfe, sizeof(OutVal)); \
2772 State.FCW = paTests[iTest].fFcw; \
2773 State.FSW = paTests[iTest].fFswIn; \
2774 pfn(&State, &uFswOut, &OutVal, &InVal); \
2775 if ( uFswOut != paTests[iTest].fFswOut \
2776 || !RTFLOAT ## a_cBits ## U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal)) \
2777 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n" \
2778 "%s -> fsw=%#06x %s\n" \
2779 "%s expected %#06x %s%s%s (%s)\n", \
2780 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
2781 FormatR80(&paTests[iTest].InVal), \
2782 iVar ? " " : "", uFswOut, FormatR ## a_cBits(&OutVal), \
2783 iVar ? " " : "", paTests[iTest].fFswOut, FormatR ## a_cBits(&paTests[iTest].OutVal), \
2784 FswDiff(uFswOut, paTests[iTest].fFswOut), \
2785 !RTFLOAT ## a_cBits ## U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal) ? " - val" : "", \
2786 FormatFcw(paTests[iTest].fFcw) ); \
2787 } \
2788 pfn = a_aSubTests[iFn].pfnNative; \
2789 } \
2790 } \
2791}
2792
2793TEST_FPU_STORE(80, RTFLOAT80U, FPU_ST_R80_T, g_aFpuStR80, FPU_ST_R80_TEST_T)
2794TEST_FPU_STORE(64, RTFLOAT64U, FPU_ST_R64_T, g_aFpuStR64, FPU_ST_R64_TEST_T)
2795TEST_FPU_STORE(32, RTFLOAT32U, FPU_ST_R32_T, g_aFpuStR32, FPU_ST_R32_TEST_T)
2796
2797#ifdef TSTIEMAIMPL_WITH_GENERATOR
2798static void FpuStMemGenerate(PRTSTREAM pOut, uint32_t cTests)
2799{
2800 FpuStR80Generate(pOut, cTests);
2801 FpuStR64Generate(pOut, cTests);
2802 FpuStR32Generate(pOut, cTests);
2803}
2804#endif
2805
2806static void FpuStMemTest(void)
2807{
2808 FpuStR80Test();
2809 FpuStR64Test();
2810 FpuStR32Test();
2811}
2812
2813
2814/*
2815 * Store integer values to memory or register.
2816 */
2817TYPEDEF_SUBTEST_TYPE(FPU_ST_I16_T, FPU_ST_I16_TEST_T, PFNIEMAIMPLFPUSTR80TOI16);
2818TYPEDEF_SUBTEST_TYPE(FPU_ST_I32_T, FPU_ST_I32_TEST_T, PFNIEMAIMPLFPUSTR80TOI32);
2819TYPEDEF_SUBTEST_TYPE(FPU_ST_I64_T, FPU_ST_I64_TEST_T, PFNIEMAIMPLFPUSTR80TOI64);
2820
2821static const FPU_ST_I16_T g_aFpuStI16[] =
2822{
2823 ENTRY(fist_r80_to_i16),
2824 ENTRY_AMD( fistt_r80_to_i16, 0),
2825 ENTRY_INTEL(fistt_r80_to_i16, 0),
2826};
2827static const FPU_ST_I32_T g_aFpuStI32[] =
2828{
2829 ENTRY(fist_r80_to_i32),
2830 ENTRY(fistt_r80_to_i32),
2831};
2832static const FPU_ST_I64_T g_aFpuStI64[] =
2833{
2834 ENTRY(fist_r80_to_i64),
2835 ENTRY(fistt_r80_to_i64),
2836};
2837
2838#ifdef TSTIEMAIMPL_WITH_GENERATOR
2839static const RTFLOAT80U g_aFpuStI16Specials[] = /* 16-bit variant borrows properties from the 32-bit one, thus all this stuff. */
2840{
2841 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 13 + RTFLOAT80U_EXP_BIAS),
2842 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 13 + RTFLOAT80U_EXP_BIAS),
2843 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 14 + RTFLOAT80U_EXP_BIAS),
2844 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 14 + RTFLOAT80U_EXP_BIAS),
2845 RTFLOAT80U_INIT_C(0, 0x8000080000000000, 14 + RTFLOAT80U_EXP_BIAS),
2846 RTFLOAT80U_INIT_C(1, 0x8000080000000000, 14 + RTFLOAT80U_EXP_BIAS),
2847 RTFLOAT80U_INIT_C(0, 0x8000100000000000, 14 + RTFLOAT80U_EXP_BIAS),
2848 RTFLOAT80U_INIT_C(1, 0x8000100000000000, 14 + RTFLOAT80U_EXP_BIAS),
2849 RTFLOAT80U_INIT_C(0, 0x8000200000000000, 14 + RTFLOAT80U_EXP_BIAS),
2850 RTFLOAT80U_INIT_C(1, 0x8000200000000000, 14 + RTFLOAT80U_EXP_BIAS),
2851 RTFLOAT80U_INIT_C(0, 0x8000400000000000, 14 + RTFLOAT80U_EXP_BIAS),
2852 RTFLOAT80U_INIT_C(1, 0x8000400000000000, 14 + RTFLOAT80U_EXP_BIAS),
2853 RTFLOAT80U_INIT_C(0, 0x8000800000000000, 14 + RTFLOAT80U_EXP_BIAS),
2854 RTFLOAT80U_INIT_C(1, 0x8000800000000000, 14 + RTFLOAT80U_EXP_BIAS),
2855 RTFLOAT80U_INIT_C(1, 0x8000ffffffffffff, 14 + RTFLOAT80U_EXP_BIAS),
2856 RTFLOAT80U_INIT_C(0, 0x8001000000000000, 14 + RTFLOAT80U_EXP_BIAS),
2857 RTFLOAT80U_INIT_C(1, 0x8001000000000000, 14 + RTFLOAT80U_EXP_BIAS),
2858 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 14 + RTFLOAT80U_EXP_BIAS),
2859 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 14 + RTFLOAT80U_EXP_BIAS),
2860 RTFLOAT80U_INIT_C(0, 0xffff800000000000, 14 + RTFLOAT80U_EXP_BIAS),
2861 RTFLOAT80U_INIT_C(0, 0xffff000000000000, 14 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
2862 RTFLOAT80U_INIT_C(0, 0xfffe000000000000, 14 + RTFLOAT80U_EXP_BIAS),
2863 RTFLOAT80U_INIT_C(1, 0xffff800000000000, 14 + RTFLOAT80U_EXP_BIAS),
2864 RTFLOAT80U_INIT_C(1, 0xffff000000000000, 14 + RTFLOAT80U_EXP_BIAS), /* min */
2865 RTFLOAT80U_INIT_C(1, 0xfffe000000000000, 14 + RTFLOAT80U_EXP_BIAS),
2866 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 15 + RTFLOAT80U_EXP_BIAS),
2867 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 15 + RTFLOAT80U_EXP_BIAS),
2868 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 16 + RTFLOAT80U_EXP_BIAS),
2869 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 17 + RTFLOAT80U_EXP_BIAS),
2870 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 20 + RTFLOAT80U_EXP_BIAS),
2871 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 24 + RTFLOAT80U_EXP_BIAS),
2872 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 28 + RTFLOAT80U_EXP_BIAS),
2873 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
2874 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
2875 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS),
2876 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS),
2877 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
2878 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
2879 RTFLOAT80U_INIT_C(0, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
2880 RTFLOAT80U_INIT_C(1, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
2881 RTFLOAT80U_INIT_C(0, 0x8000ffffffffffff, 31 + RTFLOAT80U_EXP_BIAS),
2882 RTFLOAT80U_INIT_C(1, 0x8000ffffffffffff, 31 + RTFLOAT80U_EXP_BIAS),
2883 RTFLOAT80U_INIT_C(0, 0x8001000000000000, 31 + RTFLOAT80U_EXP_BIAS),
2884 RTFLOAT80U_INIT_C(1, 0x8001000000000000, 31 + RTFLOAT80U_EXP_BIAS),
2885 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
2886 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
2887 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 32 + RTFLOAT80U_EXP_BIAS),
2888};
2889static const RTFLOAT80U g_aFpuStI32Specials[] =
2890{
2891 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
2892 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 30 + RTFLOAT80U_EXP_BIAS),
2893 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
2894 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 30 + RTFLOAT80U_EXP_BIAS), /* min */
2895 RTFLOAT80U_INIT_C(0, 0xffffffff80000000, 30 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
2896 RTFLOAT80U_INIT_C(1, 0xffffffff80000000, 30 + RTFLOAT80U_EXP_BIAS), /* min */
2897 RTFLOAT80U_INIT_C(0, 0xffffffff00000000, 30 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
2898 RTFLOAT80U_INIT_C(1, 0xffffffff00000000, 30 + RTFLOAT80U_EXP_BIAS), /* min */
2899 RTFLOAT80U_INIT_C(0, 0xfffffffe00000000, 30 + RTFLOAT80U_EXP_BIAS),
2900 RTFLOAT80U_INIT_C(1, 0xfffffffe00000000, 30 + RTFLOAT80U_EXP_BIAS),
2901 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
2902 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 31 + RTFLOAT80U_EXP_BIAS),
2903 RTFLOAT80U_INIT_C(0, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
2904 RTFLOAT80U_INIT_C(1, 0x8000000000000001, 31 + RTFLOAT80U_EXP_BIAS),
2905 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
2906 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 31 + RTFLOAT80U_EXP_BIAS),
2907};
2908static const RTFLOAT80U g_aFpuStI64Specials[] =
2909{
2910 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 61 + RTFLOAT80U_EXP_BIAS),
2911 RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, 61 + RTFLOAT80U_EXP_BIAS),
2912 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 62 + RTFLOAT80U_EXP_BIAS),
2913 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 62 + RTFLOAT80U_EXP_BIAS),
2914 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 62 + RTFLOAT80U_EXP_BIAS),
2915 RTFLOAT80U_INIT_C(1, 0xfffffffffffffff0, 62 + RTFLOAT80U_EXP_BIAS),
2916 RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, 62 + RTFLOAT80U_EXP_BIAS), /* overflow to min/nan */
2917 RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, 62 + RTFLOAT80U_EXP_BIAS), /* min */
2918 RTFLOAT80U_INIT_C(0, 0xfffffffffffffffe, 62 + RTFLOAT80U_EXP_BIAS),
2919 RTFLOAT80U_INIT_C(1, 0xfffffffffffffffe, 62 + RTFLOAT80U_EXP_BIAS),
2920 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 63 + RTFLOAT80U_EXP_BIAS),
2921 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 63 + RTFLOAT80U_EXP_BIAS),
2922 RTFLOAT80U_INIT_C(0, 0x8000000000000001, 63 + RTFLOAT80U_EXP_BIAS),
2923 RTFLOAT80U_INIT_C(1, 0x8000000000000001, 63 + RTFLOAT80U_EXP_BIAS),
2924 RTFLOAT80U_INIT_C(0, 0x8000000000000002, 63 + RTFLOAT80U_EXP_BIAS),
2925 RTFLOAT80U_INIT_C(1, 0x8000000000000002, 63 + RTFLOAT80U_EXP_BIAS),
2926 RTFLOAT80U_INIT_C(0, 0xfffffffffffffff0, 63 + RTFLOAT80U_EXP_BIAS),
2927};
2928
2929# define GEN_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_aSubTests, a_TestType) \
2930static void FpuStI ## a_cBits ## Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests) \
2931{ \
2932 X86FXSTATE State; \
2933 RT_ZERO(State); \
2934 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2935 { \
2936 PFNIEMAIMPLFPUSTR80TOI ## a_cBits const pfn = a_aSubTests[iFn].pfnNative \
2937 ? a_aSubTests[iFn].pfnNative : a_aSubTests[iFn].pfn; \
2938 PRTSTREAM pOutFn = pOut; \
2939 if (a_aSubTests[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE) \
2940 { \
2941 if (a_aSubTests[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour) \
2942 continue; \
2943 pOutFn = pOutCpu; \
2944 } \
2945 \
2946 GenerateArrayStart(pOutFn, a_aSubTests[iFn].pszName, #a_TestType); \
2947 uint32_t const cTotalTests = cTests + RT_ELEMENTS(g_aFpuStI ## a_cBits ## Specials); \
2948 for (uint32_t iTest = 0; iTest < cTotalTests; iTest++) \
2949 { \
2950 uint16_t const fFcw = RandFcw(); \
2951 State.FSW = RandFsw(); \
2952 RTFLOAT80U const InVal = iTest < cTests ? RandR80Src(iTest, a_cBits, true) \
2953 : g_aFpuStI ## a_cBits ## Specials[iTest - cTests]; \
2954 \
2955 for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
2956 { \
2957 /* PC doesn't influence these, so leave as is. */ \
2958 AssertCompile(X86_FCW_OM_BIT + 1 == X86_FCW_UM_BIT && X86_FCW_UM_BIT + 1 == X86_FCW_PM_BIT); \
2959 for (uint16_t iMask = 0; iMask < 16; iMask += 2 /*1*/) \
2960 { \
2961 uint16_t uFswOut = 0; \
2962 a_iType iOutVal = ~(a_iType)2; \
2963 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_OM | X86_FCW_UM | X86_FCW_PM)) \
2964 | (iRounding << X86_FCW_RC_SHIFT); \
2965 /*if (iMask & 1) State.FCW ^= X86_FCW_MASK_ALL;*/ \
2966 State.FCW |= (iMask >> 1) << X86_FCW_OM_BIT; \
2967 pfn(&State, &uFswOut, &iOutVal, &InVal); \
2968 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u */\n", \
2969 State.FCW, State.FSW, uFswOut, GenFormatR80(&InVal), \
2970 GenFormatI ## a_cBits(iOutVal), iTest, iRounding, iMask); \
2971 } \
2972 } \
2973 } \
2974 GenerateArrayEnd(pOutFn, a_aSubTests[iFn].pszName); \
2975 } \
2976}
2977#else
2978# define GEN_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_aSubTests, a_TestType)
2979#endif
2980
2981#define TEST_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_SubTestType, a_aSubTests, a_TestType) \
2982GEN_FPU_STORE_INT(a_cBits, a_iType, a_szFmt, a_aSubTests, a_TestType) \
2983\
2984static void FpuStI ## a_cBits ## Test(void) \
2985{ \
2986 X86FXSTATE State; \
2987 RT_ZERO(State); \
2988 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
2989 { \
2990 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
2991 \
2992 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
2993 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
2994 PFNIEMAIMPLFPUSTR80TOI ## a_cBits pfn = a_aSubTests[iFn].pfn; \
2995 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
2996 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
2997 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
2998 { \
2999 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
3000 { \
3001 RTFLOAT80U const InVal = paTests[iTest].InVal; \
3002 uint16_t uFswOut = 0; \
3003 a_iType iOutVal = ~(a_iType)2; \
3004 State.FCW = paTests[iTest].fFcw; \
3005 State.FSW = paTests[iTest].fFswIn; \
3006 pfn(&State, &uFswOut, &iOutVal, &InVal); \
3007 if ( uFswOut != paTests[iTest].fFswOut \
3008 || iOutVal != paTests[iTest].iOutVal) \
3009 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n" \
3010 "%s -> fsw=%#06x " a_szFmt "\n" \
3011 "%s expected %#06x " a_szFmt "%s%s (%s)\n", \
3012 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
3013 FormatR80(&paTests[iTest].InVal), \
3014 iVar ? " " : "", uFswOut, iOutVal, \
3015 iVar ? " " : "", paTests[iTest].fFswOut, paTests[iTest].iOutVal, \
3016 FswDiff(uFswOut, paTests[iTest].fFswOut), \
3017 iOutVal != paTests[iTest].iOutVal ? " - val" : "", FormatFcw(paTests[iTest].fFcw) ); \
3018 } \
3019 pfn = a_aSubTests[iFn].pfnNative; \
3020 } \
3021 } \
3022}
3023
3024//fistt_r80_to_i16 diffs for AMD, of course :-)
3025
3026TEST_FPU_STORE_INT(64, int64_t, "%RI64", FPU_ST_I64_T, g_aFpuStI64, FPU_ST_I64_TEST_T)
3027TEST_FPU_STORE_INT(32, int32_t, "%RI32", FPU_ST_I32_T, g_aFpuStI32, FPU_ST_I32_TEST_T)
3028TEST_FPU_STORE_INT(16, int16_t, "%RI16", FPU_ST_I16_T, g_aFpuStI16, FPU_ST_I16_TEST_T)
3029
3030#ifdef TSTIEMAIMPL_WITH_GENERATOR
3031static void FpuStIntGenerate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
3032{
3033 FpuStI64Generate(pOut, pOutCpu, cTests);
3034 FpuStI32Generate(pOut, pOutCpu, cTests);
3035 FpuStI16Generate(pOut, pOutCpu, cTests);
3036}
3037#endif
3038
3039static void FpuStIntTest(void)
3040{
3041 FpuStI64Test();
3042 FpuStI32Test();
3043 FpuStI16Test();
3044}
3045
3046
3047/*
3048 * Store as packed BCD value (memory).
3049 */
3050typedef IEM_DECL_IMPL_TYPE(void, FNIEMAIMPLFPUSTR80TOD80,(PCX86FXSTATE, uint16_t *, PRTPBCD80U, PCRTFLOAT80U));
3051typedef FNIEMAIMPLFPUSTR80TOD80 *PFNIEMAIMPLFPUSTR80TOD80;
3052TYPEDEF_SUBTEST_TYPE(FPU_ST_D80_T, FPU_ST_D80_TEST_T, PFNIEMAIMPLFPUSTR80TOD80);
3053
3054static const FPU_ST_D80_T g_aFpuStD80[] =
3055{
3056 ENTRY(fst_r80_to_d80),
3057};
3058
3059#ifdef TSTIEMAIMPL_WITH_GENERATOR
3060static void FpuStD80Generate(PRTSTREAM pOut, uint32_t cTests)
3061{
3062 static RTFLOAT80U const s_aSpecials[] =
3063 {
3064 RTFLOAT80U_INIT_C(0, 0xde0b6b3a763fffe0, RTFLOAT80U_EXP_BIAS + 59), /* 1 below max */
3065 RTFLOAT80U_INIT_C(1, 0xde0b6b3a763fffe0, RTFLOAT80U_EXP_BIAS + 59), /* 1 above min */
3066 RTFLOAT80U_INIT_C(0, 0xde0b6b3a763ffff0, RTFLOAT80U_EXP_BIAS + 59), /* exact max */
3067 RTFLOAT80U_INIT_C(1, 0xde0b6b3a763ffff0, RTFLOAT80U_EXP_BIAS + 59), /* exact min */
3068 RTFLOAT80U_INIT_C(0, 0xde0b6b3a763fffff, RTFLOAT80U_EXP_BIAS + 59), /* max & all rounded off bits set */
3069 RTFLOAT80U_INIT_C(1, 0xde0b6b3a763fffff, RTFLOAT80U_EXP_BIAS + 59), /* min & all rounded off bits set */
3070 RTFLOAT80U_INIT_C(0, 0xde0b6b3a763ffff8, RTFLOAT80U_EXP_BIAS + 59), /* max & some rounded off bits set */
3071 RTFLOAT80U_INIT_C(1, 0xde0b6b3a763ffff8, RTFLOAT80U_EXP_BIAS + 59), /* min & some rounded off bits set */
3072 RTFLOAT80U_INIT_C(0, 0xde0b6b3a763ffff1, RTFLOAT80U_EXP_BIAS + 59), /* max & some other rounded off bits set */
3073 RTFLOAT80U_INIT_C(1, 0xde0b6b3a763ffff1, RTFLOAT80U_EXP_BIAS + 59), /* min & some other rounded off bits set */
3074 RTFLOAT80U_INIT_C(0, 0xde0b6b3a76400000, RTFLOAT80U_EXP_BIAS + 59), /* 1 above max */
3075 RTFLOAT80U_INIT_C(1, 0xde0b6b3a76400000, RTFLOAT80U_EXP_BIAS + 59), /* 1 below min */
3076 };
3077
3078 X86FXSTATE State;
3079 RT_ZERO(State);
3080 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuStD80); iFn++)
3081 {
3082 GenerateArrayStart(pOut, g_aFpuStD80[iFn].pszName, "FPU_ST_D80_TEST_T");
3083 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
3084 {
3085 uint16_t const fFcw = RandFcw();
3086 State.FSW = RandFsw();
3087 RTFLOAT80U const InVal = iTest < cTests ? RandR80Src(iTest, 59, true) : s_aSpecials[iTest - cTests];
3088
3089 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
3090 {
3091 /* PC doesn't influence these, so leave as is. */
3092 AssertCompile(X86_FCW_OM_BIT + 1 == X86_FCW_UM_BIT && X86_FCW_UM_BIT + 1 == X86_FCW_PM_BIT);
3093 for (uint16_t iMask = 0; iMask < 16; iMask += 2 /*1*/)
3094 {
3095 uint16_t uFswOut = 0;
3096 RTPBCD80U OutVal = RTPBCD80U_INIT_ZERO(0);
3097 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_OM | X86_FCW_UM | X86_FCW_PM))
3098 | (iRounding << X86_FCW_RC_SHIFT);
3099 /*if (iMask & 1) State.FCW ^= X86_FCW_MASK_ALL;*/
3100 State.FCW |= (iMask >> 1) << X86_FCW_OM_BIT;
3101 g_aFpuStD80[iFn].pfn(&State, &uFswOut, &OutVal, &InVal);
3102 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u */\n",
3103 State.FCW, State.FSW, uFswOut, GenFormatR80(&InVal),
3104 GenFormatD80(&OutVal), iTest, iRounding, iMask);
3105 }
3106 }
3107 }
3108 GenerateArrayEnd(pOut, g_aFpuStD80[iFn].pszName);
3109 }
3110}
3111#endif
3112
3113
3114static void FpuStD80Test(void)
3115{
3116 X86FXSTATE State;
3117 RT_ZERO(State);
3118 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuStD80); iFn++)
3119 {
3120 if (!SubTestAndCheckIfEnabled(g_aFpuStD80[iFn].pszName))
3121 continue;
3122
3123 uint32_t const cTests = *g_aFpuStD80[iFn].pcTests;
3124 FPU_ST_D80_TEST_T const * const paTests = g_aFpuStD80[iFn].paTests;
3125 PFNIEMAIMPLFPUSTR80TOD80 pfn = g_aFpuStD80[iFn].pfn;
3126 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuStD80[iFn]);
3127 if (!cTests) RTTestSkipped(g_hTest, "no tests");
3128 for (uint32_t iVar = 0; iVar < cVars; iVar++)
3129 {
3130 for (uint32_t iTest = 0; iTest < cTests; iTest++)
3131 {
3132 RTFLOAT80U const InVal = paTests[iTest].InVal;
3133 uint16_t uFswOut = 0;
3134 RTPBCD80U OutVal = RTPBCD80U_INIT_ZERO(0);
3135 State.FCW = paTests[iTest].fFcw;
3136 State.FSW = paTests[iTest].fFswIn;
3137 pfn(&State, &uFswOut, &OutVal, &InVal);
3138 if ( uFswOut != paTests[iTest].fFswOut
3139 || !RTPBCD80U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal))
3140 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
3141 "%s -> fsw=%#06x %s\n"
3142 "%s expected %#06x %s%s%s (%s)\n",
3143 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
3144 FormatR80(&paTests[iTest].InVal),
3145 iVar ? " " : "", uFswOut, FormatD80(&OutVal),
3146 iVar ? " " : "", paTests[iTest].fFswOut, FormatD80(&paTests[iTest].OutVal),
3147 FswDiff(uFswOut, paTests[iTest].fFswOut),
3148 RTPBCD80U_ARE_IDENTICAL(&OutVal, &paTests[iTest].OutVal) ? " - val" : "",
3149 FormatFcw(paTests[iTest].fFcw) );
3150 }
3151 pfn = g_aFpuStD80[iFn].pfnNative;
3152 }
3153 }
3154}
3155
3156
3157
3158/*********************************************************************************************************************************
3159* x87 FPU Binary Operations *
3160*********************************************************************************************************************************/
3161
3162/*
3163 * Binary FPU operations on two 80-bit floating point values.
3164 */
3165TYPEDEF_SUBTEST_TYPE(FPU_BINARY_R80_T, FPU_BINARY_R80_TEST_T, PFNIEMAIMPLFPUR80);
3166enum { kFpuBinaryHint_fprem = 1, };
3167
3168static const FPU_BINARY_R80_T g_aFpuBinaryR80[] =
3169{
3170 ENTRY(fadd_r80_by_r80),
3171 ENTRY(fsub_r80_by_r80),
3172 ENTRY(fsubr_r80_by_r80),
3173 ENTRY(fmul_r80_by_r80),
3174 ENTRY(fdiv_r80_by_r80),
3175 ENTRY(fdivr_r80_by_r80),
3176 ENTRY_EX(fprem_r80_by_r80, kFpuBinaryHint_fprem),
3177 ENTRY_EX(fprem1_r80_by_r80, kFpuBinaryHint_fprem),
3178 ENTRY(fscale_r80_by_r80),
3179 ENTRY_AMD( fpatan_r80_by_r80, 0), // C1 and rounding differs on AMD
3180 ENTRY_INTEL(fpatan_r80_by_r80, 0), // C1 and rounding differs on AMD
3181 ENTRY_AMD( fyl2x_r80_by_r80, 0), // C1 and rounding differs on AMD
3182 ENTRY_INTEL(fyl2x_r80_by_r80, 0), // C1 and rounding differs on AMD
3183 ENTRY_AMD( fyl2xp1_r80_by_r80, 0), // C1 and rounding differs on AMD
3184 ENTRY_INTEL(fyl2xp1_r80_by_r80, 0), // C1 and rounding differs on AMD
3185};
3186
3187#ifdef TSTIEMAIMPL_WITH_GENERATOR
3188static void FpuBinaryR80Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
3189{
3190 cTests = RT_MAX(192, cTests); /* there are 144 standard input variations */
3191
3192 static struct { RTFLOAT80U Val1, Val2; } const s_aSpecials[] =
3193 {
3194 { RTFLOAT80U_INIT_C(1, 0xdd762f07f2e80eef, 30142), /* causes weird overflows with DOWN and NEAR rounding. */
3195 RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1) },
3196 { RTFLOAT80U_INIT_ZERO(0), /* causes weird overflows with UP and NEAR rounding when precision is lower than 64. */
3197 RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1) },
3198 { RTFLOAT80U_INIT_ZERO(0), /* minus variant */
3199 RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1) },
3200 { RTFLOAT80U_INIT_C(0, 0xcef238bb9a0afd86, 577 + RTFLOAT80U_EXP_BIAS), /* for fprem and fprem1, max sequence length */
3201 RTFLOAT80U_INIT_C(0, 0xf11684ec0beaad94, 1 + RTFLOAT80U_EXP_BIAS) },
3202 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, -13396 + RTFLOAT80U_EXP_BIAS), /* for fdiv. We missed PE. */
3203 RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, 16383 + RTFLOAT80U_EXP_BIAS) },
3204 { RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1 + RTFLOAT80U_EXP_BIAS), /* for fprem/fprem1 */
3205 RTFLOAT80U_INIT_C(0, 0xe000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
3206 { RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1 + RTFLOAT80U_EXP_BIAS), /* for fprem/fprem1 */
3207 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
3208 /* fscale: This may seriously increase the exponent, and it turns out overflow and underflow behaviour changes
3209 once RTFLOAT80U_EXP_BIAS_ADJUST is exceeded. */
3210 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^1 */
3211 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
3212 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^64 */
3213 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 6 + RTFLOAT80U_EXP_BIAS) },
3214 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^1024 */
3215 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 10 + RTFLOAT80U_EXP_BIAS) },
3216 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^4096 */
3217 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 12 + RTFLOAT80U_EXP_BIAS) },
3218 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^16384 */
3219 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 49150 */
3220 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3221 RTFLOAT80U_INIT_C(0, 0xc000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 57342 - within 10980XE range */
3222 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^24577 */
3223 RTFLOAT80U_INIT_C(0, 0xc002000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 57343 - outside 10980XE range, behaviour changes! */
3224 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^32768 - result is within range on 10980XE */
3225 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 15 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 65534 */
3226 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^65536 */
3227 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 16 + RTFLOAT80U_EXP_BIAS) },
3228 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^1048576 */
3229 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 20 + RTFLOAT80U_EXP_BIAS) },
3230 { RTFLOAT80U_INIT_C(0, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^16777216 */
3231 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 24 + RTFLOAT80U_EXP_BIAS) },
3232 { RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1), /* for fscale: min * 2^-24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3233 RTFLOAT80U_INIT_C(1, 0xc000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: -24575 - within 10980XE range */
3234 { RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1), /* for fscale: max * 2^-24577 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3235 RTFLOAT80U_INIT_C(1, 0xc002000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: -24576 - outside 10980XE range, behaviour changes! */
3236 /* fscale: Negative variants for the essentials of the above. */
3237 { RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3238 RTFLOAT80U_INIT_C(0, 0xc000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 57342 - within 10980XE range */
3239 { RTFLOAT80U_INIT_C(1, 0xffffffffffffffff, RTFLOAT80U_EXP_MAX - 1), /* for fscale: max * 2^24577 */
3240 RTFLOAT80U_INIT_C(0, 0xc002000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: 57343 - outside 10980XE range, behaviour changes! */
3241 { RTFLOAT80U_INIT_C(1, 0x8000000000000000, 1), /* for fscale: min * 2^-24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3242 RTFLOAT80U_INIT_C(1, 0xc000000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: -57342 - within 10980XE range */
3243 { RTFLOAT80U_INIT_C(1, 0x8000000000000000, 1), /* for fscale: max * 2^-24576 (RTFLOAT80U_EXP_BIAS_ADJUST) */
3244 RTFLOAT80U_INIT_C(1, 0xc002000000000000, 14 + RTFLOAT80U_EXP_BIAS) }, /* resulting exponent: -57343 - outside 10980XE range, behaviour changes! */
3245 /* fscale: Some fun with denormals and pseudo-denormals. */
3246 { RTFLOAT80U_INIT_C(0, 0x0800000000000000, 0), /* for fscale: max * 2^-4 */
3247 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 2 + RTFLOAT80U_EXP_BIAS) },
3248 { RTFLOAT80U_INIT_C(0, 0x0800000000000000, 0), /* for fscale: max * 2^+1 */
3249 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
3250 { RTFLOAT80U_INIT_C(0, 0x0800000000000000, 0), RTFLOAT80U_INIT_ZERO(0) }, /* for fscale: max * 2^+0 */
3251 { RTFLOAT80U_INIT_C(0, 0x0000000000000008, 0), /* for fscale: max * 2^-4 => underflow */
3252 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 2 + RTFLOAT80U_EXP_BIAS) },
3253 { RTFLOAT80U_INIT_C(0, 0x8005000300020001, 0), RTFLOAT80U_INIT_ZERO(0) }, /* pseudo-normal number * 2^+0. */
3254 { RTFLOAT80U_INIT_C(1, 0x8005000300020001, 0), RTFLOAT80U_INIT_ZERO(0) }, /* pseudo-normal number * 2^+0. */
3255 { RTFLOAT80U_INIT_C(0, 0x8005000300020001, 0), /* pseudo-normal number * 2^-4 */
3256 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 2 + RTFLOAT80U_EXP_BIAS) },
3257 { RTFLOAT80U_INIT_C(0, 0x8005000300020001, 0), /* pseudo-normal number * 2^+0 */
3258 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0 + RTFLOAT80U_EXP_BIAS) },
3259 { RTFLOAT80U_INIT_C(0, 0x8005000300020001, 0), /* pseudo-normal number * 2^+1 */
3260 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 1 + RTFLOAT80U_EXP_BIAS) },
3261 };
3262
3263 X86FXSTATE State;
3264 RT_ZERO(State);
3265 uint32_t cMinNormalPairs = (cTests - 144) / 4;
3266 uint32_t cMinTargetRangeInputs = cMinNormalPairs / 2;
3267 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryR80); iFn++)
3268 {
3269 PFNIEMAIMPLFPUR80 const pfn = g_aFpuBinaryR80[iFn].pfnNative ? g_aFpuBinaryR80[iFn].pfnNative : g_aFpuBinaryR80[iFn].pfn;
3270 PRTSTREAM pOutFn = pOut;
3271 if (g_aFpuBinaryR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE)
3272 {
3273 if (g_aFpuBinaryR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
3274 continue;
3275 pOutFn = pOutCpu;
3276 }
3277
3278 GenerateArrayStart(pOutFn, g_aFpuBinaryR80[iFn].pszName, "FPU_BINARY_R80_TEST_T");
3279 uint32_t iTestOutput = 0;
3280 uint32_t cNormalInputPairs = 0;
3281 uint32_t cTargetRangeInputs = 0;
3282 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
3283 {
3284 RTFLOAT80U InVal1 = iTest < cTests ? RandR80Src1(iTest) : s_aSpecials[iTest - cTests].Val1;
3285 RTFLOAT80U InVal2 = iTest < cTests ? RandR80Src2(iTest) : s_aSpecials[iTest - cTests].Val2;
3286 bool fTargetRange = false;
3287 if (RTFLOAT80U_IS_NORMAL(&InVal1) && RTFLOAT80U_IS_NORMAL(&InVal2))
3288 {
3289 cNormalInputPairs++;
3290 if ( g_aFpuBinaryR80[iFn].uExtra == kFpuBinaryHint_fprem
3291 && (uint32_t)InVal1.s.uExponent - (uint32_t)InVal2.s.uExponent - (uint32_t)64 <= (uint32_t)512)
3292 cTargetRangeInputs += fTargetRange = true;
3293 else if (cTargetRangeInputs < cMinTargetRangeInputs && iTest < cTests)
3294 if (g_aFpuBinaryR80[iFn].uExtra == kFpuBinaryHint_fprem)
3295 { /* The aim is two values with an exponent difference between 64 and 640 so we can do the whole sequence. */
3296 InVal2.s.uExponent = RTRandU32Ex(1, RTFLOAT80U_EXP_MAX - 66);
3297 InVal1.s.uExponent = RTRandU32Ex(InVal2.s.uExponent + 64, RT_MIN(InVal2.s.uExponent + 512, RTFLOAT80U_EXP_MAX - 1));
3298 cTargetRangeInputs += fTargetRange = true;
3299 }
3300 }
3301 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
3302 {
3303 iTest -= 1;
3304 continue;
3305 }
3306
3307 uint16_t const fFcwExtra = 0;
3308 uint16_t const fFcw = RandFcw();
3309 State.FSW = RandFsw();
3310
3311 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
3312 for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
3313 {
3314 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_PC_MASK | X86_FCW_MASK_ALL))
3315 | (iRounding << X86_FCW_RC_SHIFT)
3316 | (iPrecision << X86_FCW_PC_SHIFT)
3317 | X86_FCW_MASK_ALL;
3318 IEMFPURESULT ResM = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3319 pfn(&State, &ResM, &InVal1, &InVal2);
3320 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/m = #%u */\n",
3321 State.FCW | fFcwExtra, State.FSW, ResM.FSW, GenFormatR80(&InVal1), GenFormatR80(&InVal2),
3322 GenFormatR80(&ResM.r80Result), iTest, iRounding, iPrecision, iTestOutput++);
3323
3324 State.FCW = State.FCW & ~X86_FCW_MASK_ALL;
3325 IEMFPURESULT ResU = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3326 pfn(&State, &ResU, &InVal1, &InVal2);
3327 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/u = #%u */\n",
3328 State.FCW | fFcwExtra, State.FSW, ResU.FSW, GenFormatR80(&InVal1), GenFormatR80(&InVal2),
3329 GenFormatR80(&ResU.r80Result), iTest, iRounding, iPrecision, iTestOutput++);
3330
3331 uint16_t fXcpt = (ResM.FSW | ResU.FSW) & X86_FSW_XCPT_MASK & ~X86_FSW_SF;
3332 if (fXcpt)
3333 {
3334 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
3335 IEMFPURESULT Res1 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3336 pfn(&State, &Res1, &InVal1, &InVal2);
3337 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/%#x = #%u */\n",
3338 State.FCW | fFcwExtra, State.FSW, Res1.FSW, GenFormatR80(&InVal1), GenFormatR80(&InVal2),
3339 GenFormatR80(&Res1.r80Result), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
3340 if (((Res1.FSW & X86_FSW_XCPT_MASK) & fXcpt) != (Res1.FSW & X86_FSW_XCPT_MASK))
3341 {
3342 fXcpt |= Res1.FSW & X86_FSW_XCPT_MASK;
3343 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
3344 IEMFPURESULT Res2 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3345 pfn(&State, &Res2, &InVal1, &InVal2);
3346 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/%#x[!] = #%u */\n",
3347 State.FCW | fFcwExtra, State.FSW, Res2.FSW, GenFormatR80(&InVal1), GenFormatR80(&InVal2),
3348 GenFormatR80(&Res2.r80Result), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
3349 }
3350 if (!RT_IS_POWER_OF_TWO(fXcpt))
3351 for (uint16_t fUnmasked = 1; fUnmasked <= X86_FCW_PM; fUnmasked <<= 1)
3352 if (fUnmasked & fXcpt)
3353 {
3354 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | (fXcpt & ~fUnmasked);
3355 IEMFPURESULT Res3 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3356 pfn(&State, &Res3, &InVal1, &InVal2);
3357 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/u%#x = #%u */\n",
3358 State.FCW | fFcwExtra, State.FSW, Res3.FSW, GenFormatR80(&InVal1), GenFormatR80(&InVal2),
3359 GenFormatR80(&Res3.r80Result), iTest, iRounding, iPrecision, fUnmasked, iTestOutput++);
3360 }
3361 }
3362
3363 /* If the values are in range and caused no exceptions, do the whole series of
3364 partial reminders till we get the non-partial one or run into an exception. */
3365 if (fTargetRange && fXcpt == 0 && g_aFpuBinaryR80[iFn].uExtra == kFpuBinaryHint_fprem)
3366 {
3367 IEMFPURESULT ResPrev = ResM;
3368 for (unsigned i = 0; i < 32 && (ResPrev.FSW & (X86_FSW_C2 | X86_FSW_XCPT_MASK)) == X86_FSW_C2; i++)
3369 {
3370 State.FCW = State.FCW | X86_FCW_MASK_ALL;
3371 State.FSW = ResPrev.FSW;
3372 IEMFPURESULT ResSeq = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3373 pfn(&State, &ResSeq, &ResPrev.r80Result, &InVal2);
3374 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/seq%u = #%u */\n",
3375 State.FCW | fFcwExtra, State.FSW, ResSeq.FSW, GenFormatR80(&ResPrev.r80Result),
3376 GenFormatR80(&InVal2), GenFormatR80(&ResSeq.r80Result),
3377 iTest, iRounding, iPrecision, i + 1, iTestOutput++);
3378 ResPrev = ResSeq;
3379 }
3380 }
3381 }
3382 }
3383 GenerateArrayEnd(pOutFn, g_aFpuBinaryR80[iFn].pszName);
3384 }
3385}
3386#endif
3387
3388
3389static void FpuBinaryR80Test(void)
3390{
3391 X86FXSTATE State;
3392 RT_ZERO(State);
3393 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryR80); iFn++)
3394 {
3395 if (!SubTestAndCheckIfEnabled(g_aFpuBinaryR80[iFn].pszName))
3396 continue;
3397
3398 uint32_t const cTests = *g_aFpuBinaryR80[iFn].pcTests;
3399 FPU_BINARY_R80_TEST_T const * const paTests = g_aFpuBinaryR80[iFn].paTests;
3400 PFNIEMAIMPLFPUR80 pfn = g_aFpuBinaryR80[iFn].pfn;
3401 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuBinaryR80[iFn]);
3402 if (!cTests) RTTestSkipped(g_hTest, "no tests");
3403 for (uint32_t iVar = 0; iVar < cVars; iVar++)
3404 {
3405 for (uint32_t iTest = 0; iTest < cTests; iTest++)
3406 {
3407 RTFLOAT80U const InVal1 = paTests[iTest].InVal1;
3408 RTFLOAT80U const InVal2 = paTests[iTest].InVal2;
3409 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3410 State.FCW = paTests[iTest].fFcw;
3411 State.FSW = paTests[iTest].fFswIn;
3412 pfn(&State, &Res, &InVal1, &InVal2);
3413 if ( Res.FSW != paTests[iTest].fFswOut
3414 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal))
3415 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n"
3416 "%s -> fsw=%#06x %s\n"
3417 "%s expected %#06x %s%s%s (%s)\n",
3418 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
3419 FormatR80(&paTests[iTest].InVal1), FormatR80(&paTests[iTest].InVal2),
3420 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result),
3421 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].OutVal),
3422 FswDiff(Res.FSW, paTests[iTest].fFswOut),
3423 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal) ? " - val" : "",
3424 FormatFcw(paTests[iTest].fFcw) );
3425 }
3426 pfn = g_aFpuBinaryR80[iFn].pfnNative;
3427 }
3428 }
3429}
3430
3431
3432/*
3433 * Binary FPU operations on one 80-bit floating point value and one 64-bit or 32-bit one.
3434 */
3435#define int64_t_IS_NORMAL(a) 1
3436#define int32_t_IS_NORMAL(a) 1
3437#define int16_t_IS_NORMAL(a) 1
3438
3439#ifdef TSTIEMAIMPL_WITH_GENERATOR
3440static struct { RTFLOAT80U Val1; RTFLOAT64U Val2; } const s_aFpuBinaryR64Specials[] =
3441{
3442 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3443 RTFLOAT64U_INIT_C(0, 0xfeeeeddddcccc, RTFLOAT64U_EXP_BIAS) }, /* whatever */
3444};
3445static struct { RTFLOAT80U Val1; RTFLOAT32U Val2; } const s_aFpuBinaryR32Specials[] =
3446{
3447 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3448 RTFLOAT32U_INIT_C(0, 0x7fffee, RTFLOAT32U_EXP_BIAS) }, /* whatever */
3449};
3450static struct { RTFLOAT80U Val1; int32_t Val2; } const s_aFpuBinaryI32Specials[] =
3451{
3452 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT32_MAX }, /* whatever */
3453};
3454static struct { RTFLOAT80U Val1; int16_t Val2; } const s_aFpuBinaryI16Specials[] =
3455{
3456 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT16_MAX }, /* whatever */
3457};
3458
3459# define GEN_FPU_BINARY_SMALL(a_fIntType, a_cBits, a_LoBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
3460static void FpuBinary ## a_UpBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
3461{ \
3462 cTests = RT_MAX(160, cTests); /* there are 144 standard input variations for r80 by r80 */ \
3463 \
3464 X86FXSTATE State; \
3465 RT_ZERO(State); \
3466 uint32_t cMinNormalPairs = (cTests - 144) / 4; \
3467 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3468 { \
3469 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
3470 uint32_t cNormalInputPairs = 0; \
3471 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aFpuBinary ## a_UpBits ## Specials); iTest += 1) \
3472 { \
3473 RTFLOAT80U const InVal1 = iTest < cTests ? RandR80Src1(iTest, a_cBits, a_fIntType) \
3474 : s_aFpuBinary ## a_UpBits ## Specials[iTest - cTests].Val1; \
3475 a_Type2 const InVal2 = iTest < cTests ? Rand ## a_UpBits ## Src2(iTest) \
3476 : s_aFpuBinary ## a_UpBits ## Specials[iTest - cTests].Val2; \
3477 if (RTFLOAT80U_IS_NORMAL(&InVal1) && a_Type2 ## _IS_NORMAL(&InVal2)) \
3478 cNormalInputPairs++; \
3479 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests) \
3480 { \
3481 iTest -= 1; \
3482 continue; \
3483 } \
3484 \
3485 uint16_t const fFcw = RandFcw(); \
3486 State.FSW = RandFsw(); \
3487 \
3488 for (uint16_t iRounding = 0; iRounding < 4; iRounding++) \
3489 { \
3490 for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++) \
3491 { \
3492 for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL) \
3493 { \
3494 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_PC_MASK | X86_FCW_MASK_ALL)) \
3495 | (iRounding << X86_FCW_RC_SHIFT) \
3496 | (iPrecision << X86_FCW_PC_SHIFT) \
3497 | iMask; \
3498 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
3499 a_aSubTests[iFn].pfn(&State, &Res, &InVal1, &InVal2); \
3500 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/%c */\n", \
3501 State.FCW, State.FSW, Res.FSW, GenFormatR80(&InVal1), GenFormat ## a_UpBits(&InVal2), \
3502 GenFormatR80(&Res.r80Result), iTest, iRounding, iPrecision, iMask ? 'c' : 'u'); \
3503 } \
3504 } \
3505 } \
3506 } \
3507 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
3508 } \
3509}
3510#else
3511# define GEN_FPU_BINARY_SMALL(a_fIntType, a_cBits, a_LoBits, a_UpBits, a_Type2, a_aSubTests, a_TestType)
3512#endif
3513
3514#define TEST_FPU_BINARY_SMALL(a_fIntType, a_cBits, a_LoBits, a_UpBits, a_I, a_Type2, a_SubTestType, a_aSubTests, a_TestType) \
3515TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPU ## a_UpBits); \
3516\
3517static const a_SubTestType a_aSubTests[] = \
3518{ \
3519 ENTRY(RT_CONCAT4(f, a_I, add_r80_by_, a_LoBits)), \
3520 ENTRY(RT_CONCAT4(f, a_I, mul_r80_by_, a_LoBits)), \
3521 ENTRY(RT_CONCAT4(f, a_I, sub_r80_by_, a_LoBits)), \
3522 ENTRY(RT_CONCAT4(f, a_I, subr_r80_by_, a_LoBits)), \
3523 ENTRY(RT_CONCAT4(f, a_I, div_r80_by_, a_LoBits)), \
3524 ENTRY(RT_CONCAT4(f, a_I, divr_r80_by_, a_LoBits)), \
3525}; \
3526\
3527GEN_FPU_BINARY_SMALL(a_fIntType, a_cBits, a_LoBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
3528\
3529static void FpuBinary ## a_UpBits ## Test(void) \
3530{ \
3531 X86FXSTATE State; \
3532 RT_ZERO(State); \
3533 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3534 { \
3535 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
3536 \
3537 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
3538 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
3539 PFNIEMAIMPLFPU ## a_UpBits pfn = a_aSubTests[iFn].pfn; \
3540 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
3541 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
3542 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
3543 { \
3544 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
3545 { \
3546 RTFLOAT80U const InVal1 = paTests[iTest].InVal1; \
3547 a_Type2 const InVal2 = paTests[iTest].InVal2; \
3548 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 }; \
3549 State.FCW = paTests[iTest].fFcw; \
3550 State.FSW = paTests[iTest].fFswIn; \
3551 pfn(&State, &Res, &InVal1, &InVal2); \
3552 if ( Res.FSW != paTests[iTest].fFswOut \
3553 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal)) \
3554 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n" \
3555 "%s -> fsw=%#06x %s\n" \
3556 "%s expected %#06x %s%s%s (%s)\n", \
3557 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
3558 FormatR80(&paTests[iTest].InVal1), Format ## a_UpBits(&paTests[iTest].InVal2), \
3559 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result), \
3560 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].OutVal), \
3561 FswDiff(Res.FSW, paTests[iTest].fFswOut), \
3562 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal) ? " - val" : "", \
3563 FormatFcw(paTests[iTest].fFcw) ); \
3564 } \
3565 pfn = a_aSubTests[iFn].pfnNative; \
3566 } \
3567 } \
3568}
3569
3570TEST_FPU_BINARY_SMALL(0, 64, r64, R64, RT_NOTHING, RTFLOAT64U, FPU_BINARY_R64_T, g_aFpuBinaryR64, FPU_BINARY_R64_TEST_T)
3571TEST_FPU_BINARY_SMALL(0, 32, r32, R32, RT_NOTHING, RTFLOAT32U, FPU_BINARY_R32_T, g_aFpuBinaryR32, FPU_BINARY_R32_TEST_T)
3572TEST_FPU_BINARY_SMALL(1, 32, i32, I32, i, int32_t, FPU_BINARY_I32_T, g_aFpuBinaryI32, FPU_BINARY_I32_TEST_T)
3573TEST_FPU_BINARY_SMALL(1, 16, i16, I16, i, int16_t, FPU_BINARY_I16_T, g_aFpuBinaryI16, FPU_BINARY_I16_TEST_T)
3574
3575
3576/*
3577 * Binary operations on 80-, 64- and 32-bit floating point only affecting FSW.
3578 */
3579#ifdef TSTIEMAIMPL_WITH_GENERATOR
3580static struct { RTFLOAT80U Val1, Val2; } const s_aFpuBinaryFswR80Specials[] =
3581{
3582 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3583 RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS) }, /* whatever */
3584};
3585static struct { RTFLOAT80U Val1; RTFLOAT64U Val2; } const s_aFpuBinaryFswR64Specials[] =
3586{
3587 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3588 RTFLOAT64U_INIT_C(0, 0xfeeeeddddcccc, RTFLOAT64U_EXP_BIAS) }, /* whatever */
3589};
3590static struct { RTFLOAT80U Val1; RTFLOAT32U Val2; } const s_aFpuBinaryFswR32Specials[] =
3591{
3592 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3593 RTFLOAT32U_INIT_C(0, 0x7fffee, RTFLOAT32U_EXP_BIAS) }, /* whatever */
3594};
3595static struct { RTFLOAT80U Val1; int32_t Val2; } const s_aFpuBinaryFswI32Specials[] =
3596{
3597 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT32_MAX }, /* whatever */
3598};
3599static struct { RTFLOAT80U Val1; int16_t Val2; } const s_aFpuBinaryFswI16Specials[] =
3600{
3601 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), INT16_MAX }, /* whatever */
3602};
3603
3604# define GEN_FPU_BINARY_FSW(a_fIntType, a_cBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
3605static void FpuBinaryFsw ## a_UpBits ## Generate(PRTSTREAM pOut, uint32_t cTests) \
3606{ \
3607 cTests = RT_MAX(160, cTests); /* there are 144 standard input variations for r80 by r80 */ \
3608 \
3609 X86FXSTATE State; \
3610 RT_ZERO(State); \
3611 uint32_t cMinNormalPairs = (cTests - 144) / 4; \
3612 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3613 { \
3614 GenerateArrayStart(pOut, a_aSubTests[iFn].pszName, #a_TestType); \
3615 uint32_t cNormalInputPairs = 0; \
3616 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aFpuBinaryFsw ## a_UpBits ## Specials); iTest += 1) \
3617 { \
3618 RTFLOAT80U const InVal1 = iTest < cTests ? RandR80Src1(iTest, a_cBits, a_fIntType) \
3619 : s_aFpuBinaryFsw ## a_UpBits ## Specials[iTest - cTests].Val1; \
3620 a_Type2 const InVal2 = iTest < cTests ? Rand ## a_UpBits ## Src2(iTest) \
3621 : s_aFpuBinaryFsw ## a_UpBits ## Specials[iTest - cTests].Val2; \
3622 if (RTFLOAT80U_IS_NORMAL(&InVal1) && a_Type2 ## _IS_NORMAL(&InVal2)) \
3623 cNormalInputPairs++; \
3624 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests) \
3625 { \
3626 iTest -= 1; \
3627 continue; \
3628 } \
3629 \
3630 uint16_t const fFcw = RandFcw(); \
3631 State.FSW = RandFsw(); \
3632 \
3633 /* Guess these aren't affected by precision or rounding, so just flip the exception mask. */ \
3634 for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL) \
3635 { \
3636 State.FCW = (fFcw & ~(X86_FCW_MASK_ALL)) | iMask; \
3637 uint16_t fFswOut = 0; \
3638 a_aSubTests[iFn].pfn(&State, &fFswOut, &InVal1, &InVal2); \
3639 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%c */\n", \
3640 State.FCW, State.FSW, fFswOut, GenFormatR80(&InVal1), GenFormat ## a_UpBits(&InVal2), \
3641 iTest, iMask ? 'c' : 'u'); \
3642 } \
3643 } \
3644 GenerateArrayEnd(pOut, a_aSubTests[iFn].pszName); \
3645 } \
3646}
3647#else
3648# define GEN_FPU_BINARY_FSW(a_fIntType, a_cBits, a_UpBits, a_Type2, a_aSubTests, a_TestType)
3649#endif
3650
3651#define TEST_FPU_BINARY_FSW(a_fIntType, a_cBits, a_UpBits, a_Type2, a_SubTestType, a_aSubTests, a_TestType, ...) \
3652TYPEDEF_SUBTEST_TYPE(a_SubTestType, a_TestType, PFNIEMAIMPLFPU ## a_UpBits ## FSW); \
3653\
3654static const a_SubTestType a_aSubTests[] = \
3655{ \
3656 __VA_ARGS__ \
3657}; \
3658\
3659GEN_FPU_BINARY_FSW(a_fIntType, a_cBits, a_UpBits, a_Type2, a_aSubTests, a_TestType) \
3660\
3661static void FpuBinaryFsw ## a_UpBits ## Test(void) \
3662{ \
3663 X86FXSTATE State; \
3664 RT_ZERO(State); \
3665 for (size_t iFn = 0; iFn < RT_ELEMENTS(a_aSubTests); iFn++) \
3666 { \
3667 if (!SubTestAndCheckIfEnabled(a_aSubTests[iFn].pszName)) continue; \
3668 \
3669 uint32_t const cTests = *a_aSubTests[iFn].pcTests; \
3670 a_TestType const * const paTests = a_aSubTests[iFn].paTests; \
3671 PFNIEMAIMPLFPU ## a_UpBits ## FSW pfn = a_aSubTests[iFn].pfn; \
3672 uint32_t const cVars = COUNT_VARIATIONS(a_aSubTests[iFn]); \
3673 if (!cTests) RTTestSkipped(g_hTest, "no tests"); \
3674 for (uint32_t iVar = 0; iVar < cVars; iVar++) \
3675 { \
3676 for (uint32_t iTest = 0; iTest < cTests; iTest++) \
3677 { \
3678 uint16_t fFswOut = 0; \
3679 RTFLOAT80U const InVal1 = paTests[iTest].InVal1; \
3680 a_Type2 const InVal2 = paTests[iTest].InVal2; \
3681 State.FCW = paTests[iTest].fFcw; \
3682 State.FSW = paTests[iTest].fFswIn; \
3683 pfn(&State, &fFswOut, &InVal1, &InVal2); \
3684 if (fFswOut != paTests[iTest].fFswOut) \
3685 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n" \
3686 "%s -> fsw=%#06x\n" \
3687 "%s expected %#06x %s (%s)\n", \
3688 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn, \
3689 FormatR80(&paTests[iTest].InVal1), Format ## a_UpBits(&paTests[iTest].InVal2), \
3690 iVar ? " " : "", fFswOut, \
3691 iVar ? " " : "", paTests[iTest].fFswOut, \
3692 FswDiff(fFswOut, paTests[iTest].fFswOut), FormatFcw(paTests[iTest].fFcw) ); \
3693 } \
3694 pfn = a_aSubTests[iFn].pfnNative; \
3695 } \
3696 } \
3697}
3698
3699TEST_FPU_BINARY_FSW(0, 80, R80, RTFLOAT80U, FPU_BINARY_FSW_R80_T, g_aFpuBinaryFswR80, FPU_BINARY_R80_TEST_T, ENTRY(fcom_r80_by_r80), ENTRY(fucom_r80_by_r80))
3700TEST_FPU_BINARY_FSW(0, 64, R64, RTFLOAT64U, FPU_BINARY_FSW_R64_T, g_aFpuBinaryFswR64, FPU_BINARY_R64_TEST_T, ENTRY(fcom_r80_by_r64))
3701TEST_FPU_BINARY_FSW(0, 32, R32, RTFLOAT32U, FPU_BINARY_FSW_R32_T, g_aFpuBinaryFswR32, FPU_BINARY_R32_TEST_T, ENTRY(fcom_r80_by_r32))
3702TEST_FPU_BINARY_FSW(1, 32, I32, int32_t, FPU_BINARY_FSW_I32_T, g_aFpuBinaryFswI32, FPU_BINARY_I32_TEST_T, ENTRY(ficom_r80_by_i32))
3703TEST_FPU_BINARY_FSW(1, 16, I16, int16_t, FPU_BINARY_FSW_I16_T, g_aFpuBinaryFswI16, FPU_BINARY_I16_TEST_T, ENTRY(ficom_r80_by_i16))
3704
3705
3706/*
3707 * Binary operations on 80-bit floating point that effects only EFLAGS and possibly FSW.
3708 */
3709TYPEDEF_SUBTEST_TYPE(FPU_BINARY_EFL_R80_T, FPU_BINARY_EFL_R80_TEST_T, PFNIEMAIMPLFPUR80EFL);
3710
3711static const FPU_BINARY_EFL_R80_T g_aFpuBinaryEflR80[] =
3712{
3713 ENTRY(fcomi_r80_by_r80),
3714 ENTRY(fucomi_r80_by_r80),
3715};
3716
3717#ifdef TSTIEMAIMPL_WITH_GENERATOR
3718static struct { RTFLOAT80U Val1, Val2; } const s_aFpuBinaryEflR80Specials[] =
3719{
3720 { RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS),
3721 RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS) }, /* whatever */
3722};
3723
3724static void FpuBinaryEflR80Generate(PRTSTREAM pOut, uint32_t cTests)
3725{
3726 cTests = RT_MAX(160, cTests); /* there are 144 standard input variations */
3727
3728 X86FXSTATE State;
3729 RT_ZERO(State);
3730 uint32_t cMinNormalPairs = (cTests - 144) / 4;
3731 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryEflR80); iFn++)
3732 {
3733 GenerateArrayStart(pOut, g_aFpuBinaryEflR80[iFn].pszName, "FPU_BINARY_EFL_R80_TEST_T");
3734 uint32_t cNormalInputPairs = 0;
3735 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aFpuBinaryEflR80Specials); iTest += 1)
3736 {
3737 RTFLOAT80U const InVal1 = iTest < cTests ? RandR80Src1(iTest) : s_aFpuBinaryEflR80Specials[iTest - cTests].Val1;
3738 RTFLOAT80U const InVal2 = iTest < cTests ? RandR80Src2(iTest) : s_aFpuBinaryEflR80Specials[iTest - cTests].Val2;
3739 if (RTFLOAT80U_IS_NORMAL(&InVal1) && RTFLOAT80U_IS_NORMAL(&InVal2))
3740 cNormalInputPairs++;
3741 else if (cNormalInputPairs < cMinNormalPairs && iTest + cMinNormalPairs >= cTests && iTest < cTests)
3742 {
3743 iTest -= 1;
3744 continue;
3745 }
3746
3747 uint16_t const fFcw = RandFcw();
3748 State.FSW = RandFsw();
3749
3750 /* Guess these aren't affected by precision or rounding, so just flip the exception mask. */
3751 for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL)
3752 {
3753 State.FCW = (fFcw & ~(X86_FCW_MASK_ALL)) | iMask;
3754 uint16_t uFswOut = 0;
3755 uint32_t fEflOut = g_aFpuBinaryEflR80[iFn].pfn(&State, &uFswOut, &InVal1, &InVal2);
3756 RTStrmPrintf(pOut, " { %#06x, %#06x, %#06x, %s, %s, %#08x }, /* #%u/%c */\n",
3757 State.FCW, State.FSW, uFswOut, GenFormatR80(&InVal1), GenFormatR80(&InVal2), fEflOut,
3758 iTest, iMask ? 'c' : 'u');
3759 }
3760 }
3761 GenerateArrayEnd(pOut, g_aFpuBinaryEflR80[iFn].pszName);
3762 }
3763}
3764#endif /*TSTIEMAIMPL_WITH_GENERATOR*/
3765
3766static void FpuBinaryEflR80Test(void)
3767{
3768 X86FXSTATE State;
3769 RT_ZERO(State);
3770 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuBinaryEflR80); iFn++)
3771 {
3772 if (!SubTestAndCheckIfEnabled(g_aFpuBinaryEflR80[iFn].pszName))
3773 continue;
3774
3775 uint32_t const cTests = *g_aFpuBinaryEflR80[iFn].pcTests;
3776 FPU_BINARY_EFL_R80_TEST_T const * const paTests = g_aFpuBinaryEflR80[iFn].paTests;
3777 PFNIEMAIMPLFPUR80EFL pfn = g_aFpuBinaryEflR80[iFn].pfn;
3778 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuBinaryEflR80[iFn]);
3779 if (!cTests) RTTestSkipped(g_hTest, "no tests");
3780 for (uint32_t iVar = 0; iVar < cVars; iVar++)
3781 {
3782 for (uint32_t iTest = 0; iTest < cTests; iTest++)
3783 {
3784 RTFLOAT80U const InVal1 = paTests[iTest].InVal1;
3785 RTFLOAT80U const InVal2 = paTests[iTest].InVal2;
3786 State.FCW = paTests[iTest].fFcw;
3787 State.FSW = paTests[iTest].fFswIn;
3788 uint16_t uFswOut = 0;
3789 uint32_t fEflOut = pfn(&State, &uFswOut, &InVal1, &InVal2);
3790 if ( uFswOut != paTests[iTest].fFswOut
3791 || fEflOut != paTests[iTest].fEflOut)
3792 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in1=%s in2=%s\n"
3793 "%s -> fsw=%#06x efl=%#08x\n"
3794 "%s expected %#06x %#08x %s%s (%s)\n",
3795 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
3796 FormatR80(&paTests[iTest].InVal1), FormatR80(&paTests[iTest].InVal2),
3797 iVar ? " " : "", uFswOut, fEflOut,
3798 iVar ? " " : "", paTests[iTest].fFswOut, paTests[iTest].fEflOut,
3799 FswDiff(uFswOut, paTests[iTest].fFswOut), EFlagsDiff(fEflOut, paTests[iTest].fEflOut),
3800 FormatFcw(paTests[iTest].fFcw));
3801 }
3802 pfn = g_aFpuBinaryEflR80[iFn].pfnNative;
3803 }
3804 }
3805}
3806
3807
3808/*********************************************************************************************************************************
3809* x87 FPU Unary Operations *
3810*********************************************************************************************************************************/
3811
3812/*
3813 * Unary FPU operations on one 80-bit floating point value.
3814 *
3815 * Note! The FCW reserved bit 7 is used to indicate whether a test may produce
3816 * a rounding error or not.
3817 */
3818TYPEDEF_SUBTEST_TYPE(FPU_UNARY_R80_T, FPU_UNARY_R80_TEST_T, PFNIEMAIMPLFPUR80UNARY);
3819
3820enum { kUnary_Accurate = 0, kUnary_Accurate_Trigonometry /*probably not accurate, but need impl to know*/, kUnary_Rounding_F2xm1 };
3821static const FPU_UNARY_R80_T g_aFpuUnaryR80[] =
3822{
3823 ENTRY_EX( fabs_r80, kUnary_Accurate),
3824 ENTRY_EX( fchs_r80, kUnary_Accurate),
3825 ENTRY_AMD_EX( f2xm1_r80, 0, kUnary_Accurate), // C1 differs for -1m0x3fb263cc2c331e15^-2654 (different ln2 constant?)
3826 ENTRY_INTEL_EX(f2xm1_r80, 0, kUnary_Rounding_F2xm1),
3827 ENTRY_EX( fsqrt_r80, kUnary_Accurate),
3828 ENTRY_EX( frndint_r80, kUnary_Accurate),
3829 ENTRY_AMD_EX( fsin_r80, 0, kUnary_Accurate_Trigonometry), // value & C1 differences for pseudo denormals and others (e.g. -1m0x2b1e5683cbca5725^-3485)
3830 ENTRY_INTEL_EX(fsin_r80, 0, kUnary_Accurate_Trigonometry),
3831 ENTRY_AMD_EX( fcos_r80, 0, kUnary_Accurate_Trigonometry), // value & C1 differences
3832 ENTRY_INTEL_EX(fcos_r80, 0, kUnary_Accurate_Trigonometry),
3833};
3834
3835#ifdef TSTIEMAIMPL_WITH_GENERATOR
3836
3837static bool FpuUnaryR80MayHaveRoundingError(PCRTFLOAT80U pr80Val, int enmKind)
3838{
3839 if ( enmKind == kUnary_Rounding_F2xm1
3840 && RTFLOAT80U_IS_NORMAL(pr80Val)
3841 && pr80Val->s.uExponent < RTFLOAT80U_EXP_BIAS
3842 && pr80Val->s.uExponent >= RTFLOAT80U_EXP_BIAS - 69)
3843 return true;
3844 return false;
3845}
3846
3847static void FpuUnaryR80Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
3848{
3849 static RTFLOAT80U const s_aSpecials[] =
3850 {
3851 RTFLOAT80U_INIT_C(0, 0x8000000000000000, RTFLOAT80U_EXP_BIAS - 1), /* 0.5 (for f2xm1) */
3852 RTFLOAT80U_INIT_C(1, 0x8000000000000000, RTFLOAT80U_EXP_BIAS - 1), /* -0.5 (for f2xm1) */
3853 RTFLOAT80U_INIT_C(0, 0x8000000000000000, RTFLOAT80U_EXP_BIAS), /* 1.0 (for f2xm1) */
3854 RTFLOAT80U_INIT_C(1, 0x8000000000000000, RTFLOAT80U_EXP_BIAS), /* -1.0 (for f2xm1) */
3855 RTFLOAT80U_INIT_C(0, 0x8000000000000000, 0), /* +1.0^-16382 */
3856 RTFLOAT80U_INIT_C(1, 0x8000000000000000, 0), /* -1.0^-16382 */
3857 RTFLOAT80U_INIT_C(0, 0xc000000000000000, 0), /* +1.1^-16382 */
3858 RTFLOAT80U_INIT_C(1, 0xc000000000000000, 0), /* -1.1^-16382 */
3859 RTFLOAT80U_INIT_C(0, 0xc000100000000000, 0), /* +1.1xxx1^-16382 */
3860 RTFLOAT80U_INIT_C(1, 0xc000100000000000, 0), /* -1.1xxx1^-16382 */
3861 };
3862 X86FXSTATE State;
3863 RT_ZERO(State);
3864 uint32_t cMinNormals = cTests / 4;
3865 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryR80); iFn++)
3866 {
3867 PFNIEMAIMPLFPUR80UNARY const pfn = g_aFpuUnaryR80[iFn].pfnNative ? g_aFpuUnaryR80[iFn].pfnNative : g_aFpuUnaryR80[iFn].pfn;
3868 PRTSTREAM pOutFn = pOut;
3869 if (g_aFpuUnaryR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE)
3870 {
3871 if (g_aFpuUnaryR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
3872 continue;
3873 pOutFn = pOutCpu;
3874 }
3875
3876 GenerateArrayStart(pOutFn, g_aFpuUnaryR80[iFn].pszName, "FPU_UNARY_R80_TEST_T");
3877 uint32_t iTestOutput = 0;
3878 uint32_t cNormalInputs = 0;
3879 uint32_t cTargetRangeInputs = 0;
3880 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
3881 {
3882 RTFLOAT80U InVal = iTest < cTests ? RandR80Src(iTest) : s_aSpecials[iTest - cTests];
3883 if (RTFLOAT80U_IS_NORMAL(&InVal))
3884 {
3885 if (g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1)
3886 {
3887 unsigned uTargetExp = g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1
3888 ? RTFLOAT80U_EXP_BIAS /* 2^0..2^-69 */ : RTFLOAT80U_EXP_BIAS + 63 + 1 /* 2^64..2^-64 */;
3889 unsigned cTargetExp = g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1 ? 69 : 63*2 + 2;
3890 if (InVal.s.uExponent <= uTargetExp && InVal.s.uExponent >= uTargetExp - cTargetExp)
3891 cTargetRangeInputs++;
3892 else if (cTargetRangeInputs < cMinNormals / 2 && iTest + cMinNormals / 2 >= cTests && iTest < cTests)
3893 {
3894 InVal.s.uExponent = RTRandU32Ex(uTargetExp - cTargetExp, uTargetExp);
3895 cTargetRangeInputs++;
3896 }
3897 }
3898 cNormalInputs++;
3899 }
3900 else if (cNormalInputs < cMinNormals && iTest + cMinNormals >= cTests && iTest < cTests)
3901 {
3902 iTest -= 1;
3903 continue;
3904 }
3905
3906 uint16_t const fFcwExtra = FpuUnaryR80MayHaveRoundingError(&InVal, g_aFpuUnaryR80[iFn].uExtra) ? 0x80 : 0;
3907 uint16_t const fFcw = RandFcw();
3908 State.FSW = RandFsw();
3909
3910 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
3911 for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
3912 {
3913 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_PC_MASK | X86_FCW_MASK_ALL))
3914 | (iRounding << X86_FCW_RC_SHIFT)
3915 | (iPrecision << X86_FCW_PC_SHIFT)
3916 | X86_FCW_MASK_ALL;
3917 IEMFPURESULT ResM = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3918 pfn(&State, &ResM, &InVal);
3919 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u/m = #%u */\n",
3920 State.FCW | fFcwExtra, State.FSW, ResM.FSW, GenFormatR80(&InVal),
3921 GenFormatR80(&ResM.r80Result), iTest, iRounding, iPrecision, iTestOutput++);
3922
3923 State.FCW = State.FCW & ~X86_FCW_MASK_ALL;
3924 IEMFPURESULT ResU = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3925 pfn(&State, &ResU, &InVal);
3926 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u/u = #%u */\n",
3927 State.FCW | fFcwExtra, State.FSW, ResU.FSW, GenFormatR80(&InVal),
3928 GenFormatR80(&ResU.r80Result), iTest, iRounding, iPrecision, iTestOutput++);
3929
3930 uint16_t fXcpt = (ResM.FSW | ResU.FSW) & X86_FSW_XCPT_MASK & ~X86_FSW_SF;
3931 if (fXcpt)
3932 {
3933 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
3934 IEMFPURESULT Res1 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3935 pfn(&State, &Res1, &InVal);
3936 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u/%#x = #%u */\n",
3937 State.FCW | fFcwExtra, State.FSW, Res1.FSW, GenFormatR80(&InVal),
3938 GenFormatR80(&Res1.r80Result), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
3939 if (((Res1.FSW & X86_FSW_XCPT_MASK) & fXcpt) != (Res1.FSW & X86_FSW_XCPT_MASK))
3940 {
3941 fXcpt |= Res1.FSW & X86_FSW_XCPT_MASK;
3942 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
3943 IEMFPURESULT Res2 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3944 pfn(&State, &Res2, &InVal);
3945 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u/%#x[!] = #%u */\n",
3946 State.FCW | fFcwExtra, State.FSW, Res2.FSW, GenFormatR80(&InVal),
3947 GenFormatR80(&Res2.r80Result), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
3948 }
3949 if (!RT_IS_POWER_OF_TWO(fXcpt))
3950 for (uint16_t fUnmasked = 1; fUnmasked <= X86_FCW_PM; fUnmasked <<= 1)
3951 if (fUnmasked & fXcpt)
3952 {
3953 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | (fXcpt & ~fUnmasked);
3954 IEMFPURESULT Res3 = { RTFLOAT80U_INIT(0, 0, 0), 0 };
3955 pfn(&State, &Res3, &InVal);
3956 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s }, /* #%u/%u/%u/u%#x = #%u */\n",
3957 State.FCW | fFcwExtra, State.FSW, Res3.FSW, GenFormatR80(&InVal),
3958 GenFormatR80(&Res3.r80Result), iTest, iRounding, iPrecision, fUnmasked, iTestOutput++);
3959 }
3960 }
3961 }
3962 }
3963 GenerateArrayEnd(pOutFn, g_aFpuUnaryR80[iFn].pszName);
3964 }
3965}
3966#endif
3967
3968static bool FpuIsEqualFcwMaybeIgnoreRoundErr(uint16_t fFcw1, uint16_t fFcw2, bool fRndErrOk, bool *pfRndErr)
3969{
3970 if (fFcw1 == fFcw2)
3971 return true;
3972 if (fRndErrOk && (fFcw1 & ~X86_FSW_C1) == (fFcw2 & ~X86_FSW_C1))
3973 {
3974 *pfRndErr = true;
3975 return true;
3976 }
3977 return false;
3978}
3979
3980static bool FpuIsEqualR80MaybeIgnoreRoundErr(PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2, bool fRndErrOk, bool *pfRndErr)
3981{
3982 if (RTFLOAT80U_ARE_IDENTICAL(pr80Val1, pr80Val2))
3983 return true;
3984 if ( fRndErrOk
3985 && pr80Val1->s.fSign == pr80Val2->s.fSign)
3986 {
3987 if ( ( pr80Val1->s.uExponent == pr80Val2->s.uExponent
3988 && ( pr80Val1->s.uMantissa > pr80Val2->s.uMantissa
3989 ? pr80Val1->s.uMantissa - pr80Val2->s.uMantissa == 1
3990 : pr80Val2->s.uMantissa - pr80Val1->s.uMantissa == 1))
3991 ||
3992 ( pr80Val1->s.uExponent + 1 == pr80Val2->s.uExponent
3993 && pr80Val1->s.uMantissa == UINT64_MAX
3994 && pr80Val2->s.uMantissa == RT_BIT_64(63))
3995 ||
3996 ( pr80Val1->s.uExponent == pr80Val2->s.uExponent + 1
3997 && pr80Val2->s.uMantissa == UINT64_MAX
3998 && pr80Val1->s.uMantissa == RT_BIT_64(63)) )
3999 {
4000 *pfRndErr = true;
4001 return true;
4002 }
4003 }
4004 return false;
4005}
4006
4007
4008static void FpuUnaryR80Test(void)
4009{
4010 X86FXSTATE State;
4011 RT_ZERO(State);
4012 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryR80); iFn++)
4013 {
4014 if (!SubTestAndCheckIfEnabled(g_aFpuUnaryR80[iFn].pszName))
4015 continue;
4016
4017 uint32_t const cTests = *g_aFpuUnaryR80[iFn].pcTests;
4018 FPU_UNARY_R80_TEST_T const * const paTests = g_aFpuUnaryR80[iFn].paTests;
4019 PFNIEMAIMPLFPUR80UNARY pfn = g_aFpuUnaryR80[iFn].pfn;
4020 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuUnaryR80[iFn]);
4021 uint32_t cRndErrs = 0;
4022 uint32_t cPossibleRndErrs = 0;
4023 if (!cTests) RTTestSkipped(g_hTest, "no tests");
4024 for (uint32_t iVar = 0; iVar < cVars; iVar++)
4025 {
4026 for (uint32_t iTest = 0; iTest < cTests; iTest++)
4027 {
4028 RTFLOAT80U const InVal = paTests[iTest].InVal;
4029 IEMFPURESULT Res = { RTFLOAT80U_INIT(0, 0, 0), 0 };
4030 bool const fRndErrOk = RT_BOOL(paTests[iTest].fFcw & 0x80);
4031 State.FCW = paTests[iTest].fFcw & ~(uint16_t)0x80;
4032 State.FSW = paTests[iTest].fFswIn;
4033 pfn(&State, &Res, &InVal);
4034 bool fRndErr = false;
4035 if ( !FpuIsEqualFcwMaybeIgnoreRoundErr(Res.FSW, paTests[iTest].fFswOut, fRndErrOk, &fRndErr)
4036 || !FpuIsEqualR80MaybeIgnoreRoundErr(&Res.r80Result, &paTests[iTest].OutVal, fRndErrOk, &fRndErr))
4037 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
4038 "%s -> fsw=%#06x %s\n"
4039 "%s expected %#06x %s%s%s%s (%s)\n",
4040 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
4041 FormatR80(&paTests[iTest].InVal),
4042 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result),
4043 iVar ? " " : "", paTests[iTest].fFswOut, FormatR80(&paTests[iTest].OutVal),
4044 FswDiff(Res.FSW, paTests[iTest].fFswOut),
4045 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result, &paTests[iTest].OutVal) ? " - val" : "",
4046 fRndErrOk ? " - rounding errors ok" : "", FormatFcw(paTests[iTest].fFcw));
4047 cRndErrs += fRndErr;
4048 cPossibleRndErrs += fRndErrOk;
4049 }
4050 pfn = g_aFpuUnaryR80[iFn].pfnNative;
4051 }
4052 if (cPossibleRndErrs > 0)
4053 RTTestPrintf(g_hTest, RTTESTLVL_ALWAYS, "rounding errors: %u out of %u\n", cRndErrs, cPossibleRndErrs);
4054 }
4055}
4056
4057
4058/*
4059 * Unary FPU operations on one 80-bit floating point value, but only affects the FSW.
4060 */
4061TYPEDEF_SUBTEST_TYPE(FPU_UNARY_FSW_R80_T, FPU_UNARY_R80_TEST_T, PFNIEMAIMPLFPUR80UNARYFSW);
4062
4063static const FPU_UNARY_FSW_R80_T g_aFpuUnaryFswR80[] =
4064{
4065 ENTRY(ftst_r80),
4066 ENTRY_EX(fxam_r80, 1),
4067};
4068
4069#ifdef TSTIEMAIMPL_WITH_GENERATOR
4070static void FpuUnaryFswR80Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
4071{
4072 static RTFLOAT80U const s_aSpecials[] =
4073 {
4074 RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), /* whatever */
4075 };
4076
4077 X86FXSTATE State;
4078 RT_ZERO(State);
4079 uint32_t cMinNormals = cTests / 4;
4080 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryFswR80); iFn++)
4081 {
4082 bool const fIsFxam = g_aFpuUnaryFswR80[iFn].uExtra == 1;
4083 PFNIEMAIMPLFPUR80UNARYFSW const pfn = g_aFpuUnaryFswR80[iFn].pfnNative ? g_aFpuUnaryFswR80[iFn].pfnNative : g_aFpuUnaryFswR80[iFn].pfn;
4084 PRTSTREAM pOutFn = pOut;
4085 if (g_aFpuUnaryFswR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE)
4086 {
4087 if (g_aFpuUnaryFswR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
4088 continue;
4089 pOutFn = pOutCpu;
4090 }
4091 State.FTW = 0;
4092
4093 GenerateArrayStart(pOutFn, g_aFpuUnaryFswR80[iFn].pszName, "FPU_UNARY_R80_TEST_T");
4094 uint32_t cNormalInputs = 0;
4095 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
4096 {
4097 RTFLOAT80U const InVal = iTest < cTests ? RandR80Src(iTest) : s_aSpecials[iTest - cTests];
4098 if (RTFLOAT80U_IS_NORMAL(&InVal))
4099 cNormalInputs++;
4100 else if (cNormalInputs < cMinNormals && iTest + cMinNormals >= cTests && iTest < cTests)
4101 {
4102 iTest -= 1;
4103 continue;
4104 }
4105
4106 uint16_t const fFcw = RandFcw();
4107 State.FSW = RandFsw();
4108 if (!fIsFxam)
4109 {
4110 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
4111 {
4112 for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
4113 {
4114 for (uint16_t iMask = 0; iMask <= X86_FCW_MASK_ALL; iMask += X86_FCW_MASK_ALL)
4115 {
4116 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_PC_MASK | X86_FCW_MASK_ALL))
4117 | (iRounding << X86_FCW_RC_SHIFT)
4118 | (iPrecision << X86_FCW_PC_SHIFT)
4119 | iMask;
4120 uint16_t fFswOut = 0;
4121 pfn(&State, &fFswOut, &InVal);
4122 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s }, /* #%u/%u/%u/%c */\n",
4123 State.FCW, State.FSW, fFswOut, GenFormatR80(&InVal),
4124 iTest, iRounding, iPrecision, iMask ? 'c' : 'u');
4125 }
4126 }
4127 }
4128 }
4129 else
4130 {
4131 uint16_t fFswOut = 0;
4132 uint16_t const fEmpty = RTRandU32Ex(0, 3) == 3 ? 0x80 : 0; /* Using MBZ bit 7 in FCW to indicate empty tag value. */
4133 State.FTW = !fEmpty ? 1 << X86_FSW_TOP_GET(State.FSW) : 0;
4134 State.FCW = fFcw;
4135 pfn(&State, &fFswOut, &InVal);
4136 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s }, /* #%u%s */\n",
4137 fFcw | fEmpty, State.FSW, fFswOut, GenFormatR80(&InVal), iTest, fEmpty ? "/empty" : "");
4138 }
4139 }
4140 GenerateArrayEnd(pOutFn, g_aFpuUnaryFswR80[iFn].pszName);
4141 }
4142}
4143#endif
4144
4145
4146static void FpuUnaryFswR80Test(void)
4147{
4148 X86FXSTATE State;
4149 RT_ZERO(State);
4150 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryFswR80); iFn++)
4151 {
4152 if (!SubTestAndCheckIfEnabled(g_aFpuUnaryFswR80[iFn].pszName))
4153 continue;
4154
4155 uint32_t const cTests = *g_aFpuUnaryFswR80[iFn].pcTests;
4156 FPU_UNARY_R80_TEST_T const * const paTests = g_aFpuUnaryFswR80[iFn].paTests;
4157 PFNIEMAIMPLFPUR80UNARYFSW pfn = g_aFpuUnaryFswR80[iFn].pfn;
4158 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuUnaryFswR80[iFn]);
4159 if (!cTests) RTTestSkipped(g_hTest, "no tests");
4160 for (uint32_t iVar = 0; iVar < cVars; iVar++)
4161 {
4162 for (uint32_t iTest = 0; iTest < cTests; iTest++)
4163 {
4164 RTFLOAT80U const InVal = paTests[iTest].InVal;
4165 uint16_t fFswOut = 0;
4166 State.FSW = paTests[iTest].fFswIn;
4167 State.FCW = paTests[iTest].fFcw & ~(uint16_t)0x80; /* see generator code */
4168 State.FTW = paTests[iTest].fFcw & 0x80 ? 0 : 1 << X86_FSW_TOP_GET(paTests[iTest].fFswIn);
4169 pfn(&State, &fFswOut, &InVal);
4170 if (fFswOut != paTests[iTest].fFswOut)
4171 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
4172 "%s -> fsw=%#06x\n"
4173 "%s expected %#06x %s (%s%s)\n",
4174 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
4175 FormatR80(&paTests[iTest].InVal),
4176 iVar ? " " : "", fFswOut,
4177 iVar ? " " : "", paTests[iTest].fFswOut,
4178 FswDiff(fFswOut, paTests[iTest].fFswOut), FormatFcw(paTests[iTest].fFcw),
4179 paTests[iTest].fFcw & 0x80 ? " empty" : "");
4180 }
4181 pfn = g_aFpuUnaryFswR80[iFn].pfnNative;
4182 }
4183 }
4184}
4185
4186/*
4187 * Unary FPU operations on one 80-bit floating point value, but with two outputs.
4188 */
4189TYPEDEF_SUBTEST_TYPE(FPU_UNARY_TWO_R80_T, FPU_UNARY_TWO_R80_TEST_T, PFNIEMAIMPLFPUR80UNARYTWO);
4190
4191static const FPU_UNARY_TWO_R80_T g_aFpuUnaryTwoR80[] =
4192{
4193 ENTRY(fxtract_r80_r80),
4194 ENTRY_AMD( fptan_r80_r80, 0), // rounding differences
4195 ENTRY_INTEL(fptan_r80_r80, 0),
4196 ENTRY_AMD( fsincos_r80_r80, 0), // C1 differences & value differences (e.g. -1m0x235cf2f580244a27^-1696)
4197 ENTRY_INTEL(fsincos_r80_r80, 0),
4198};
4199
4200#ifdef TSTIEMAIMPL_WITH_GENERATOR
4201static void FpuUnaryTwoR80Generate(PRTSTREAM pOut, PRTSTREAM pOutCpu, uint32_t cTests)
4202{
4203 static RTFLOAT80U const s_aSpecials[] =
4204 {
4205 RTFLOAT80U_INIT_C(0, 0xffffeeeeddddcccc, RTFLOAT80U_EXP_BIAS), /* whatever */
4206 };
4207
4208 X86FXSTATE State;
4209 RT_ZERO(State);
4210 uint32_t cMinNormals = cTests / 4;
4211 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryTwoR80); iFn++)
4212 {
4213 PFNIEMAIMPLFPUR80UNARYTWO const pfn = g_aFpuUnaryTwoR80[iFn].pfnNative ? g_aFpuUnaryTwoR80[iFn].pfnNative : g_aFpuUnaryTwoR80[iFn].pfn;
4214 PRTSTREAM pOutFn = pOut;
4215 if (g_aFpuUnaryTwoR80[iFn].idxCpuEflFlavour != IEMTARGETCPU_EFL_BEHAVIOR_NATIVE)
4216 {
4217 if (g_aFpuUnaryTwoR80[iFn].idxCpuEflFlavour != g_idxCpuEflFlavour)
4218 continue;
4219 pOutFn = pOutCpu;
4220 }
4221
4222 GenerateArrayStart(pOutFn, g_aFpuUnaryTwoR80[iFn].pszName, "FPU_UNARY_TWO_R80_TEST_T");
4223 uint32_t iTestOutput = 0;
4224 uint32_t cNormalInputs = 0;
4225 uint32_t cTargetRangeInputs = 0;
4226 for (uint32_t iTest = 0; iTest < cTests + RT_ELEMENTS(s_aSpecials); iTest += 1)
4227 {
4228 RTFLOAT80U InVal = iTest < cTests ? RandR80Src(iTest) : s_aSpecials[iTest - cTests];
4229 if (RTFLOAT80U_IS_NORMAL(&InVal))
4230 {
4231 if (iFn != 0)
4232 {
4233 unsigned uTargetExp = RTFLOAT80U_EXP_BIAS + 63 + 1 /* 2^64..2^-64 */;
4234 unsigned cTargetExp = g_aFpuUnaryR80[iFn].uExtra == kUnary_Rounding_F2xm1 ? 69 : 63*2 + 2;
4235 if (InVal.s.uExponent <= uTargetExp && InVal.s.uExponent >= uTargetExp - cTargetExp)
4236 cTargetRangeInputs++;
4237 else if (cTargetRangeInputs < cMinNormals / 2 && iTest + cMinNormals / 2 >= cTests && iTest < cTests)
4238 {
4239 InVal.s.uExponent = RTRandU32Ex(uTargetExp - cTargetExp, uTargetExp);
4240 cTargetRangeInputs++;
4241 }
4242 }
4243 cNormalInputs++;
4244 }
4245 else if (cNormalInputs < cMinNormals && iTest + cMinNormals >= cTests && iTest < cTests)
4246 {
4247 iTest -= 1;
4248 continue;
4249 }
4250
4251 uint16_t const fFcwExtra = 0; /* for rounding error indication */
4252 uint16_t const fFcw = RandFcw();
4253 State.FSW = RandFsw();
4254
4255 for (uint16_t iRounding = 0; iRounding < 4; iRounding++)
4256 for (uint16_t iPrecision = 0; iPrecision < 4; iPrecision++)
4257 {
4258 State.FCW = (fFcw & ~(X86_FCW_RC_MASK | X86_FCW_PC_MASK | X86_FCW_MASK_ALL))
4259 | (iRounding << X86_FCW_RC_SHIFT)
4260 | (iPrecision << X86_FCW_PC_SHIFT)
4261 | X86_FCW_MASK_ALL;
4262 IEMFPURESULTTWO ResM = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4263 pfn(&State, &ResM, &InVal);
4264 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/m = #%u */\n",
4265 State.FCW | fFcwExtra, State.FSW, ResM.FSW, GenFormatR80(&InVal), GenFormatR80(&ResM.r80Result1),
4266 GenFormatR80(&ResM.r80Result2), iTest, iRounding, iPrecision, iTestOutput++);
4267
4268 State.FCW = State.FCW & ~X86_FCW_MASK_ALL;
4269 IEMFPURESULTTWO ResU = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4270 pfn(&State, &ResU, &InVal);
4271 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/u = #%u */\n",
4272 State.FCW | fFcwExtra, State.FSW, ResU.FSW, GenFormatR80(&InVal), GenFormatR80(&ResU.r80Result1),
4273 GenFormatR80(&ResU.r80Result2), iTest, iRounding, iPrecision, iTestOutput++);
4274
4275 uint16_t fXcpt = (ResM.FSW | ResU.FSW) & X86_FSW_XCPT_MASK & ~X86_FSW_SF;
4276 if (fXcpt)
4277 {
4278 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
4279 IEMFPURESULTTWO Res1 = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4280 pfn(&State, &Res1, &InVal);
4281 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/%#x = #%u */\n",
4282 State.FCW | fFcwExtra, State.FSW, Res1.FSW, GenFormatR80(&InVal), GenFormatR80(&Res1.r80Result1),
4283 GenFormatR80(&Res1.r80Result2), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
4284 if (((Res1.FSW & X86_FSW_XCPT_MASK) & fXcpt) != (Res1.FSW & X86_FSW_XCPT_MASK))
4285 {
4286 fXcpt |= Res1.FSW & X86_FSW_XCPT_MASK;
4287 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | fXcpt;
4288 IEMFPURESULTTWO Res2 = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4289 pfn(&State, &Res2, &InVal);
4290 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/%#x[!] = #%u */\n",
4291 State.FCW | fFcwExtra, State.FSW, Res2.FSW, GenFormatR80(&InVal), GenFormatR80(&Res2.r80Result1),
4292 GenFormatR80(&Res2.r80Result2), iTest, iRounding, iPrecision, fXcpt, iTestOutput++);
4293 }
4294 if (!RT_IS_POWER_OF_TWO(fXcpt))
4295 for (uint16_t fUnmasked = 1; fUnmasked <= X86_FCW_PM; fUnmasked <<= 1)
4296 if (fUnmasked & fXcpt)
4297 {
4298 State.FCW = (State.FCW & ~X86_FCW_MASK_ALL) | (fXcpt & ~fUnmasked);
4299 IEMFPURESULTTWO Res3 = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4300 pfn(&State, &Res3, &InVal);
4301 RTStrmPrintf(pOutFn, " { %#06x, %#06x, %#06x, %s, %s, %s }, /* #%u/%u/%u/u%#x = #%u */\n",
4302 State.FCW | fFcwExtra, State.FSW, Res3.FSW, GenFormatR80(&InVal), GenFormatR80(&Res3.r80Result1),
4303 GenFormatR80(&Res3.r80Result2), iTest, iRounding, iPrecision, fUnmasked, iTestOutput++);
4304 }
4305 }
4306 }
4307 }
4308 GenerateArrayEnd(pOutFn, g_aFpuUnaryTwoR80[iFn].pszName);
4309 }
4310}
4311#endif
4312
4313
4314static void FpuUnaryTwoR80Test(void)
4315{
4316 X86FXSTATE State;
4317 RT_ZERO(State);
4318 for (size_t iFn = 0; iFn < RT_ELEMENTS(g_aFpuUnaryTwoR80); iFn++)
4319 {
4320 if (!SubTestAndCheckIfEnabled(g_aFpuUnaryTwoR80[iFn].pszName))
4321 continue;
4322
4323 uint32_t const cTests = *g_aFpuUnaryTwoR80[iFn].pcTests;
4324 FPU_UNARY_TWO_R80_TEST_T const * const paTests = g_aFpuUnaryTwoR80[iFn].paTests;
4325 PFNIEMAIMPLFPUR80UNARYTWO pfn = g_aFpuUnaryTwoR80[iFn].pfn;
4326 uint32_t const cVars = COUNT_VARIATIONS(g_aFpuUnaryTwoR80[iFn]);
4327 if (!cTests) RTTestSkipped(g_hTest, "no tests");
4328 for (uint32_t iVar = 0; iVar < cVars; iVar++)
4329 {
4330 for (uint32_t iTest = 0; iTest < cTests; iTest++)
4331 {
4332 IEMFPURESULTTWO Res = { RTFLOAT80U_INIT(0, 0, 0), 0, RTFLOAT80U_INIT(0, 0, 0) };
4333 RTFLOAT80U const InVal = paTests[iTest].InVal;
4334 State.FCW = paTests[iTest].fFcw;
4335 State.FSW = paTests[iTest].fFswIn;
4336 pfn(&State, &Res, &InVal);
4337 if ( Res.FSW != paTests[iTest].fFswOut
4338 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result1, &paTests[iTest].OutVal1)
4339 || !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result2, &paTests[iTest].OutVal2) )
4340 RTTestFailed(g_hTest, "#%04u%s: fcw=%#06x fsw=%#06x in=%s\n"
4341 "%s -> fsw=%#06x %s %s\n"
4342 "%s expected %#06x %s %s %s%s%s (%s)\n",
4343 iTest, iVar ? "/n" : "", paTests[iTest].fFcw, paTests[iTest].fFswIn,
4344 FormatR80(&paTests[iTest].InVal),
4345 iVar ? " " : "", Res.FSW, FormatR80(&Res.r80Result1), FormatR80(&Res.r80Result2),
4346 iVar ? " " : "", paTests[iTest].fFswOut,
4347 FormatR80(&paTests[iTest].OutVal1), FormatR80(&paTests[iTest].OutVal2),
4348 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result1, &paTests[iTest].OutVal1) ? " - val1" : "",
4349 !RTFLOAT80U_ARE_IDENTICAL(&Res.r80Result2, &paTests[iTest].OutVal2) ? " - val2" : "",
4350 FswDiff(Res.FSW, paTests[iTest].fFswOut), FormatFcw(paTests[iTest].fFcw) );
4351 }
4352 pfn = g_aFpuUnaryTwoR80[iFn].pfnNative;
4353 }
4354 }
4355}
4356
4357
4358
4359int main(int argc, char **argv)
4360{
4361 int rc = RTR3InitExe(argc, &argv, 0);
4362 if (RT_FAILURE(rc))
4363 return RTMsgInitFailure(rc);
4364
4365 /*
4366 * Determin the host CPU.
4367 * If not using the IEMAllAImpl.asm code, this will be set to Intel.
4368 */
4369#if (defined(RT_ARCH_X86) || defined(RT_ARCH_AMD64)) && !defined(IEM_WITHOUT_ASSEMBLY)
4370 g_idxCpuEflFlavour = ASMIsAmdCpu() || ASMIsHygonCpu()
4371 ? IEMTARGETCPU_EFL_BEHAVIOR_AMD
4372 : IEMTARGETCPU_EFL_BEHAVIOR_INTEL;
4373#else
4374 g_idxCpuEflFlavour = IEMTARGETCPU_EFL_BEHAVIOR_INTEL;
4375#endif
4376
4377 /*
4378 * Parse arguments.
4379 */
4380 enum { kModeNotSet, kModeTest, kModeGenerate }
4381 enmMode = kModeNotSet;
4382 bool fInt = true;
4383 bool fFpuLdSt = true;
4384 bool fFpuBinary1 = true;
4385 bool fFpuBinary2 = true;
4386 bool fFpuOther = true;
4387 bool fCpuData = true;
4388 bool fCommonData = true;
4389 uint32_t const cDefaultTests = 96;
4390 uint32_t cTests = cDefaultTests;
4391 RTGETOPTDEF const s_aOptions[] =
4392 {
4393 // mode:
4394 { "--generate", 'g', RTGETOPT_REQ_NOTHING },
4395 { "--test", 't', RTGETOPT_REQ_NOTHING },
4396 // test selection (both)
4397 { "--all", 'a', RTGETOPT_REQ_NOTHING },
4398 { "--none", 'z', RTGETOPT_REQ_NOTHING },
4399 { "--zap", 'z', RTGETOPT_REQ_NOTHING },
4400 { "--fpu-ld-st", 'F', RTGETOPT_REQ_NOTHING }, /* FPU stuff is upper case */
4401 { "--fpu-load-store", 'F', RTGETOPT_REQ_NOTHING },
4402 { "--fpu-binary-1", 'B', RTGETOPT_REQ_NOTHING },
4403 { "--fpu-binary-2", 'P', RTGETOPT_REQ_NOTHING },
4404 { "--fpu-other", 'O', RTGETOPT_REQ_NOTHING },
4405 { "--int", 'i', RTGETOPT_REQ_NOTHING },
4406 { "--include", 'I', RTGETOPT_REQ_STRING },
4407 { "--exclude", 'X', RTGETOPT_REQ_STRING },
4408 // generation parameters
4409 { "--common", 'm', RTGETOPT_REQ_NOTHING },
4410 { "--cpu", 'c', RTGETOPT_REQ_NOTHING },
4411 { "--number-of-tests", 'n', RTGETOPT_REQ_UINT32 },
4412 { "--verbose", 'v', RTGETOPT_REQ_NOTHING },
4413 { "--quiet", 'q', RTGETOPT_REQ_NOTHING },
4414 };
4415
4416 RTGETOPTSTATE State;
4417 rc = RTGetOptInit(&State, argc, argv, s_aOptions, RT_ELEMENTS(s_aOptions), 1, 0);
4418 AssertRCReturn(rc, RTEXITCODE_FAILURE);
4419
4420 RTGETOPTUNION ValueUnion;
4421 while ((rc = RTGetOpt(&State, &ValueUnion)))
4422 {
4423 switch (rc)
4424 {
4425 case 'g':
4426 enmMode = kModeGenerate;
4427 break;
4428 case 't':
4429 enmMode = kModeTest;
4430 break;
4431
4432 case 'a':
4433 fCpuData = true;
4434 fCommonData = true;
4435 fInt = true;
4436 fFpuLdSt = true;
4437 fFpuBinary1 = true;
4438 fFpuBinary2 = true;
4439 fFpuOther = true;
4440 break;
4441 case 'z':
4442 fCpuData = false;
4443 fCommonData = false;
4444 fInt = false;
4445 fFpuLdSt = false;
4446 fFpuBinary1 = false;
4447 fFpuBinary2 = false;
4448 fFpuOther = false;
4449 break;
4450
4451 case 'F':
4452 fFpuLdSt = true;
4453 break;
4454 case 'O':
4455 fFpuOther = true;
4456 break;
4457 case 'B':
4458 fFpuBinary1 = true;
4459 break;
4460 case 'P':
4461 fFpuBinary2 = true;
4462 break;
4463 case 'i':
4464 fInt = true;
4465 break;
4466
4467 case 'I':
4468 if (g_cIncludeTestPatterns >= RT_ELEMENTS(g_apszIncludeTestPatterns))
4469 return RTMsgErrorExit(RTEXITCODE_SYNTAX, "Too many include patterns (max %zu)",
4470 RT_ELEMENTS(g_apszIncludeTestPatterns));
4471 g_apszIncludeTestPatterns[g_cIncludeTestPatterns++] = ValueUnion.psz;
4472 break;
4473 case 'X':
4474 if (g_cExcludeTestPatterns >= RT_ELEMENTS(g_apszExcludeTestPatterns))
4475 return RTMsgErrorExit(RTEXITCODE_SYNTAX, "Too many exclude patterns (max %zu)",
4476 RT_ELEMENTS(g_apszExcludeTestPatterns));
4477 g_apszExcludeTestPatterns[g_cExcludeTestPatterns++] = ValueUnion.psz;
4478 break;
4479
4480 case 'm':
4481 fCommonData = true;
4482 break;
4483 case 'c':
4484 fCpuData = true;
4485 break;
4486 case 'n':
4487 cTests = ValueUnion.u32;
4488 break;
4489
4490 case 'q':
4491 g_cVerbosity = 0;
4492 break;
4493 case 'v':
4494 g_cVerbosity++;
4495 break;
4496
4497 case 'h':
4498 RTPrintf("usage: %s <-g|-t> [options]\n"
4499 "\n"
4500 "Mode:\n"
4501 " -g, --generate\n"
4502 " Generate test data.\n"
4503 " -t, --test\n"
4504 " Execute tests.\n"
4505 "\n"
4506 "Test selection (both modes):\n"
4507 " -a, --all\n"
4508 " Enable all tests and generated test data. (default)\n"
4509 " -z, --zap, --none\n"
4510 " Disable all tests and test data types.\n"
4511 " -i, --int\n"
4512 " Enable non-FPU tests.\n"
4513 " -F, --fpu-ld-st\n"
4514 " Enable FPU load and store tests.\n"
4515 " -B, --fpu-binary-1\n"
4516 " Enable FPU binary 80-bit FP tests.\n"
4517 " -P, --fpu-binary-2\n"
4518 " Enable FPU binary 64- and 32-bit FP tests.\n"
4519 " -O, --fpu-other\n"
4520 " Enable other FPU tests.\n"
4521 " -I,--include=<test-patter>\n"
4522 " Enable tests matching the given pattern.\n"
4523 " -X,--exclude=<test-patter>\n"
4524 " Skip tests matching the given pattern (overrides --include).\n"
4525 "\n"
4526 "Generation:\n"
4527 " -m, --common\n"
4528 " Enable generating common test data.\n"
4529 " -c, --only-cpu\n"
4530 " Enable generating CPU specific test data.\n"
4531 " -n, --number-of-test <count>\n"
4532 " Number of tests to generate. Default: %u\n"
4533 "\n"
4534 "Other:\n"
4535 " -v, --verbose\n"
4536 " -q, --quiet\n"
4537 " Noise level. Default: --quiet\n"
4538 , argv[0], cDefaultTests);
4539 return RTEXITCODE_SUCCESS;
4540 default:
4541 return RTGetOptPrintError(rc, &ValueUnion);
4542 }
4543 }
4544
4545 /*
4546 * Generate data?
4547 */
4548 if (enmMode == kModeGenerate)
4549 {
4550#ifdef TSTIEMAIMPL_WITH_GENERATOR
4551 char szCpuDesc[256] = {0};
4552 RTMpGetDescription(NIL_RTCPUID, szCpuDesc, sizeof(szCpuDesc));
4553 const char * const pszCpuType = g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD ? "Amd" : "Intel";
4554# if defined(RT_OS_WINDOWS) || defined(RT_OS_OS2)
4555 const char * const pszBitBucket = "NUL";
4556# else
4557 const char * const pszBitBucket = "/dev/null";
4558# endif
4559
4560 if (cTests == 0)
4561 cTests = cDefaultTests;
4562 g_cZeroDstTests = RT_MIN(cTests / 16, 32);
4563 g_cZeroSrcTests = g_cZeroDstTests * 2;
4564
4565 if (fInt)
4566 {
4567 const char *pszDataFile = fCommonData ? "tstIEMAImplDataInt.cpp" : pszBitBucket;
4568 PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
4569 const char *pszDataCpuFile = !fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
4570 ? "tstIEMAImplDataInt-Amd.cpp" : "tstIEMAImplDataInt-Intel.cpp";
4571 PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
4572 if (!pStrmData || !pStrmDataCpu)
4573 return RTEXITCODE_FAILURE;
4574
4575 BinU8Generate( pStrmData, pStrmDataCpu, cTests);
4576 BinU16Generate(pStrmData, pStrmDataCpu, cTests);
4577 BinU32Generate(pStrmData, pStrmDataCpu, cTests);
4578 BinU64Generate(pStrmData, pStrmDataCpu, cTests);
4579 ShiftDblGenerate(pStrmDataCpu, RT_MAX(cTests, 128));
4580 UnaryGenerate(pStrmData, cTests);
4581 ShiftGenerate(pStrmDataCpu, cTests);
4582 MulDivGenerate(pStrmDataCpu, cTests);
4583
4584 RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
4585 GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
4586 if (rcExit != RTEXITCODE_SUCCESS)
4587 return rcExit;
4588 }
4589
4590 if (fFpuLdSt)
4591 {
4592 const char *pszDataFile = fCommonData ? "tstIEMAImplDataFpuLdSt.cpp" : pszBitBucket;
4593 PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
4594 const char *pszDataCpuFile = !fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
4595 ? "tstIEMAImplDataFpuLdSt-Amd.cpp" : "tstIEMAImplDataFpuLdSt-Intel.cpp";
4596 PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
4597 if (!pStrmData || !pStrmDataCpu)
4598 return RTEXITCODE_FAILURE;
4599
4600 FpuLdConstGenerate(pStrmData, cTests);
4601 FpuLdIntGenerate(pStrmData, cTests);
4602 FpuLdD80Generate(pStrmData, cTests);
4603 FpuStIntGenerate(pStrmData, pStrmDataCpu, cTests);
4604 FpuStD80Generate(pStrmData, cTests);
4605 uint32_t const cTests2 = RT_MAX(cTests, 384); /* need better coverage for the next ones. */
4606 FpuLdMemGenerate(pStrmData, cTests2);
4607 FpuStMemGenerate(pStrmData, cTests2);
4608
4609 RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
4610 GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
4611 if (rcExit != RTEXITCODE_SUCCESS)
4612 return rcExit;
4613 }
4614
4615 if (fFpuBinary1)
4616 {
4617 const char *pszDataFile = fCommonData ? "tstIEMAImplDataFpuBinary1.cpp" : pszBitBucket;
4618 PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
4619 const char *pszDataCpuFile = !fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
4620 ? "tstIEMAImplDataFpuBinary1-Amd.cpp" : "tstIEMAImplDataFpuBinary1-Intel.cpp";
4621 PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
4622 if (!pStrmData || !pStrmDataCpu)
4623 return RTEXITCODE_FAILURE;
4624
4625 FpuBinaryR80Generate(pStrmData, pStrmDataCpu, cTests);
4626 FpuBinaryFswR80Generate(pStrmData, cTests);
4627 FpuBinaryEflR80Generate(pStrmData, cTests);
4628
4629 RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
4630 GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
4631 if (rcExit != RTEXITCODE_SUCCESS)
4632 return rcExit;
4633 }
4634
4635 if (fFpuBinary2)
4636 {
4637 const char *pszDataFile = fCommonData ? "tstIEMAImplDataFpuBinary2.cpp" : pszBitBucket;
4638 PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
4639 const char *pszDataCpuFile = pszBitBucket; /*!fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
4640 ? "tstIEMAImplDataFpuBinary2-Amd.cpp" : "tstIEMAImplDataFpuBinary2-Intel.cpp"; */
4641 PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
4642 if (!pStrmData || !pStrmDataCpu)
4643 return RTEXITCODE_FAILURE;
4644
4645 FpuBinaryR64Generate(pStrmData, cTests);
4646 FpuBinaryR32Generate(pStrmData, cTests);
4647 FpuBinaryI32Generate(pStrmData, cTests);
4648 FpuBinaryI16Generate(pStrmData, cTests);
4649 FpuBinaryFswR64Generate(pStrmData, cTests);
4650 FpuBinaryFswR32Generate(pStrmData, cTests);
4651 FpuBinaryFswI32Generate(pStrmData, cTests);
4652 FpuBinaryFswI16Generate(pStrmData, cTests);
4653
4654 RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
4655 GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
4656 if (rcExit != RTEXITCODE_SUCCESS)
4657 return rcExit;
4658 }
4659
4660 if (fFpuOther)
4661 {
4662 const char *pszDataFile = fCommonData ? "tstIEMAImplDataFpuOther.cpp" : pszBitBucket;
4663 PRTSTREAM pStrmData = GenerateOpenWithHdr(pszDataFile, szCpuDesc, NULL);
4664 const char *pszDataCpuFile = !fCpuData ? pszBitBucket : g_idxCpuEflFlavour == IEMTARGETCPU_EFL_BEHAVIOR_AMD
4665 ? "tstIEMAImplDataFpuOther-Amd.cpp" : "tstIEMAImplDataFpuOther-Intel.cpp";
4666 PRTSTREAM pStrmDataCpu = GenerateOpenWithHdr(pszDataCpuFile, szCpuDesc, pszCpuType);
4667 if (!pStrmData || !pStrmDataCpu)
4668 return RTEXITCODE_FAILURE;
4669
4670 FpuUnaryR80Generate(pStrmData, pStrmDataCpu, cTests);
4671 FpuUnaryFswR80Generate(pStrmData, pStrmDataCpu, cTests);
4672 FpuUnaryTwoR80Generate(pStrmData, pStrmDataCpu, cTests);
4673
4674 RTEXITCODE rcExit = GenerateFooterAndClose(pStrmDataCpu, pszDataCpuFile,
4675 GenerateFooterAndClose(pStrmData, pszDataFile, RTEXITCODE_SUCCESS));
4676 if (rcExit != RTEXITCODE_SUCCESS)
4677 return rcExit;
4678 }
4679
4680 return RTEXITCODE_SUCCESS;
4681#else
4682 return RTMsgErrorExitFailure("Test data generator not compiled in!");
4683#endif
4684 }
4685
4686 /*
4687 * Do testing. Currrently disabled by default as data needs to be checked
4688 * on both intel and AMD systems first.
4689 */
4690 rc = RTTestCreate("tstIEMAimpl", &g_hTest);
4691 AssertRCReturn(rc, RTEXITCODE_FAILURE);
4692 if (enmMode == kModeTest)
4693 {
4694 RTTestBanner(g_hTest);
4695
4696 /* Allocate guarded memory for use in the tests. */
4697#define ALLOC_GUARDED_VAR(a_puVar) do { \
4698 rc = RTTestGuardedAlloc(g_hTest, sizeof(*a_puVar), sizeof(*a_puVar), false /*fHead*/, (void **)&a_puVar); \
4699 if (RT_FAILURE(rc)) RTTestFailed(g_hTest, "Failed to allocate guarded mem: " #a_puVar); \
4700 } while (0)
4701 ALLOC_GUARDED_VAR(g_pu8);
4702 ALLOC_GUARDED_VAR(g_pu16);
4703 ALLOC_GUARDED_VAR(g_pu32);
4704 ALLOC_GUARDED_VAR(g_pu64);
4705 ALLOC_GUARDED_VAR(g_pu128);
4706 ALLOC_GUARDED_VAR(g_pu8Two);
4707 ALLOC_GUARDED_VAR(g_pu16Two);
4708 ALLOC_GUARDED_VAR(g_pu32Two);
4709 ALLOC_GUARDED_VAR(g_pu64Two);
4710 ALLOC_GUARDED_VAR(g_pu128Two);
4711 ALLOC_GUARDED_VAR(g_pfEfl);
4712 if (RTTestErrorCount(g_hTest) == 0)
4713 {
4714 if (fInt)
4715 {
4716 BinU8Test();
4717 BinU16Test();
4718 BinU32Test();
4719 BinU64Test();
4720 XchgTest();
4721 XaddTest();
4722 CmpXchgTest();
4723 CmpXchg8bTest();
4724 CmpXchg16bTest();
4725 ShiftDblTest();
4726 UnaryTest();
4727 ShiftTest();
4728 MulDivTest();
4729 BswapTest();
4730 }
4731
4732 if (fFpuLdSt)
4733 {
4734 FpuLoadConstTest();
4735 FpuLdMemTest();
4736 FpuLdIntTest();
4737 FpuLdD80Test();
4738 FpuStMemTest();
4739 FpuStIntTest();
4740 FpuStD80Test();
4741 }
4742
4743 if (fFpuBinary1)
4744 {
4745 FpuBinaryR80Test();
4746 FpuBinaryFswR80Test();
4747 FpuBinaryEflR80Test();
4748 }
4749
4750 if (fFpuBinary2)
4751 {
4752 FpuBinaryR64Test();
4753 FpuBinaryR32Test();
4754 FpuBinaryI32Test();
4755 FpuBinaryI16Test();
4756 FpuBinaryFswR64Test();
4757 FpuBinaryFswR32Test();
4758 FpuBinaryFswI32Test();
4759 FpuBinaryFswI16Test();
4760 }
4761
4762 if (fFpuOther)
4763 {
4764 FpuUnaryR80Test();
4765 FpuUnaryFswR80Test();
4766 FpuUnaryTwoR80Test();
4767 }
4768 }
4769 return RTTestSummaryAndDestroy(g_hTest);
4770 }
4771 return RTTestSkipAndDestroy(g_hTest, "unfinished testcase");
4772}
4773
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette