VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllAImplC.cpp@ 94620

Last change on this file since 94620 was 94614, checked in by vboxsync, 3 years ago

VMM/IEM: C implementation of fdiv and fdivr instruction helpers. bugref:9898

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 236.2 KB
Line 
1/* $Id: IEMAllAImplC.cpp 94614 2022-04-15 01:29:02Z vboxsync $ */
2/** @file
3 * IEM - Instruction Implementation in Assembly, portable C variant.
4 */
5
6/*
7 * Copyright (C) 2011-2022 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*********************************************************************************************************************************
20* Header Files *
21*********************************************************************************************************************************/
22#include "IEMInternal.h"
23#include <VBox/vmm/vmcc.h>
24#include <iprt/errcore.h>
25#include <iprt/x86.h>
26#include <iprt/uint128.h>
27#include <iprt/uint256.h>
28
29RT_C_DECLS_BEGIN
30#include <softfloat.h>
31RT_C_DECLS_END
32
33
34/*********************************************************************************************************************************
35* Defined Constants And Macros *
36*********************************************************************************************************************************/
37/** @def IEM_WITHOUT_ASSEMBLY
38 * Enables all the code in this file.
39 */
40#if !defined(IEM_WITHOUT_ASSEMBLY)
41# if defined(RT_ARCH_ARM32) || defined(RT_ARCH_ARM64) || defined(DOXYGEN_RUNNING)
42# define IEM_WITHOUT_ASSEMBLY
43# endif
44#endif
45/* IEM_WITH_ASSEMBLY trumps IEM_WITHOUT_ASSEMBLY for tstIEMAImplAsm purposes. */
46#ifdef IEM_WITH_ASSEMBLY
47# undef IEM_WITHOUT_ASSEMBLY
48#endif
49
50/**
51 * Calculates the signed flag value given a result and it's bit width.
52 *
53 * The signed flag (SF) is a duplication of the most significant bit in the
54 * result.
55 *
56 * @returns X86_EFL_SF or 0.
57 * @param a_uResult Unsigned result value.
58 * @param a_cBitsWidth The width of the result (8, 16, 32, 64).
59 */
60#define X86_EFL_CALC_SF(a_uResult, a_cBitsWidth) \
61 ( (uint32_t)((a_uResult) >> ((a_cBitsWidth) - X86_EFL_SF_BIT - 1)) & X86_EFL_SF )
62
63/**
64 * Calculates the zero flag value given a result.
65 *
66 * The zero flag (ZF) indicates whether the result is zero or not.
67 *
68 * @returns X86_EFL_ZF or 0.
69 * @param a_uResult Unsigned result value.
70 */
71#define X86_EFL_CALC_ZF(a_uResult) \
72 ( (uint32_t)((a_uResult) == 0) << X86_EFL_ZF_BIT )
73
74/**
75 * Extracts the OF flag from a OF calculation result.
76 *
77 * These are typically used by concating with a bitcount. The problem is that
78 * 8-bit values needs shifting in the other direction than the others.
79 */
80#define X86_EFL_GET_OF_8(a_uValue) (((uint32_t)(a_uValue) << (X86_EFL_OF_BIT - 8 + 1)) & X86_EFL_OF)
81#define X86_EFL_GET_OF_16(a_uValue) ((uint32_t)((a_uValue) >> (16 - X86_EFL_OF_BIT - 1)) & X86_EFL_OF)
82#define X86_EFL_GET_OF_32(a_uValue) ((uint32_t)((a_uValue) >> (32 - X86_EFL_OF_BIT - 1)) & X86_EFL_OF)
83#define X86_EFL_GET_OF_64(a_uValue) ((uint32_t)((a_uValue) >> (64 - X86_EFL_OF_BIT - 1)) & X86_EFL_OF)
84
85/**
86 * Updates the status bits (CF, PF, AF, ZF, SF, and OF) after arithmetic op.
87 *
88 * @returns Status bits.
89 * @param a_pfEFlags Pointer to the 32-bit EFLAGS value to update.
90 * @param a_uResult Unsigned result value.
91 * @param a_uSrc The source value (for AF calc).
92 * @param a_uDst The original destination value (for AF calc).
93 * @param a_cBitsWidth The width of the result (8, 16, 32, 64).
94 * @param a_CfExpr Bool expression for the carry flag (CF).
95 * @param a_uSrcOf The a_uSrc value to use for overflow calculation.
96 */
97#define IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(a_pfEFlags, a_uResult, a_uDst, a_uSrc, a_cBitsWidth, a_CfExpr, a_uSrcOf) \
98 do { \
99 uint32_t fEflTmp = *(a_pfEFlags); \
100 fEflTmp &= ~X86_EFL_STATUS_BITS; \
101 fEflTmp |= (a_CfExpr) << X86_EFL_CF_BIT; \
102 fEflTmp |= g_afParity[(a_uResult) & 0xff]; \
103 fEflTmp |= ((uint32_t)(a_uResult) ^ (uint32_t)(a_uSrc) ^ (uint32_t)(a_uDst)) & X86_EFL_AF; \
104 fEflTmp |= X86_EFL_CALC_ZF(a_uResult); \
105 fEflTmp |= X86_EFL_CALC_SF(a_uResult, a_cBitsWidth); \
106 \
107 /* Overflow during ADDition happens when both inputs have the same signed \
108 bit value and the result has a different sign bit value. \
109 \
110 Since subtraction can be rewritten as addition: 2 - 1 == 2 + -1, it \
111 follows that for SUBtraction the signed bit value must differ between \
112 the two inputs and the result's signed bit diff from the first input. \
113 Note! Must xor with sign bit to convert, not do (0 - a_uSrc). \
114 \
115 See also: http://teaching.idallen.com/dat2343/10f/notes/040_overflow.txt */ \
116 fEflTmp |= X86_EFL_GET_OF_ ## a_cBitsWidth( ( ((uint ## a_cBitsWidth ## _t)~((a_uDst) ^ (a_uSrcOf))) \
117 & RT_BIT_64(a_cBitsWidth - 1)) \
118 & ((a_uResult) ^ (a_uDst)) ); \
119 *(a_pfEFlags) = fEflTmp; \
120 } while (0)
121
122/**
123 * Updates the status bits (CF, PF, AF, ZF, SF, and OF) after a logical op.
124 *
125 * CF and OF are defined to be 0 by logical operations. AF on the other hand is
126 * undefined. We do not set AF, as that seems to make the most sense (which
127 * probably makes it the most wrong in real life).
128 *
129 * @returns Status bits.
130 * @param a_pfEFlags Pointer to the 32-bit EFLAGS value to update.
131 * @param a_uResult Unsigned result value.
132 * @param a_cBitsWidth The width of the result (8, 16, 32, 64).
133 * @param a_fExtra Additional bits to set.
134 */
135#define IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(a_pfEFlags, a_uResult, a_cBitsWidth, a_fExtra) \
136 do { \
137 uint32_t fEflTmp = *(a_pfEFlags); \
138 fEflTmp &= ~X86_EFL_STATUS_BITS; \
139 fEflTmp |= g_afParity[(a_uResult) & 0xff]; \
140 fEflTmp |= X86_EFL_CALC_ZF(a_uResult); \
141 fEflTmp |= X86_EFL_CALC_SF(a_uResult, a_cBitsWidth); \
142 fEflTmp |= (a_fExtra); \
143 *(a_pfEFlags) = fEflTmp; \
144 } while (0)
145
146
147/*********************************************************************************************************************************
148* Global Variables *
149*********************************************************************************************************************************/
150/**
151 * Parity calculation table.
152 *
153 * This is also used by iemAllAImpl.asm.
154 *
155 * The generator code:
156 * @code
157 * #include <stdio.h>
158 *
159 * int main()
160 * {
161 * unsigned b;
162 * for (b = 0; b < 256; b++)
163 * {
164 * int cOnes = ( b & 1)
165 * + ((b >> 1) & 1)
166 * + ((b >> 2) & 1)
167 * + ((b >> 3) & 1)
168 * + ((b >> 4) & 1)
169 * + ((b >> 5) & 1)
170 * + ((b >> 6) & 1)
171 * + ((b >> 7) & 1);
172 * printf(" /" "* %#04x = %u%u%u%u%u%u%u%ub *" "/ %s,\n",
173 * b,
174 * (b >> 7) & 1,
175 * (b >> 6) & 1,
176 * (b >> 5) & 1,
177 * (b >> 4) & 1,
178 * (b >> 3) & 1,
179 * (b >> 2) & 1,
180 * (b >> 1) & 1,
181 * b & 1,
182 * cOnes & 1 ? "0" : "X86_EFL_PF");
183 * }
184 * return 0;
185 * }
186 * @endcode
187 */
188uint8_t const g_afParity[256] =
189{
190 /* 0000 = 00000000b */ X86_EFL_PF,
191 /* 0x01 = 00000001b */ 0,
192 /* 0x02 = 00000010b */ 0,
193 /* 0x03 = 00000011b */ X86_EFL_PF,
194 /* 0x04 = 00000100b */ 0,
195 /* 0x05 = 00000101b */ X86_EFL_PF,
196 /* 0x06 = 00000110b */ X86_EFL_PF,
197 /* 0x07 = 00000111b */ 0,
198 /* 0x08 = 00001000b */ 0,
199 /* 0x09 = 00001001b */ X86_EFL_PF,
200 /* 0x0a = 00001010b */ X86_EFL_PF,
201 /* 0x0b = 00001011b */ 0,
202 /* 0x0c = 00001100b */ X86_EFL_PF,
203 /* 0x0d = 00001101b */ 0,
204 /* 0x0e = 00001110b */ 0,
205 /* 0x0f = 00001111b */ X86_EFL_PF,
206 /* 0x10 = 00010000b */ 0,
207 /* 0x11 = 00010001b */ X86_EFL_PF,
208 /* 0x12 = 00010010b */ X86_EFL_PF,
209 /* 0x13 = 00010011b */ 0,
210 /* 0x14 = 00010100b */ X86_EFL_PF,
211 /* 0x15 = 00010101b */ 0,
212 /* 0x16 = 00010110b */ 0,
213 /* 0x17 = 00010111b */ X86_EFL_PF,
214 /* 0x18 = 00011000b */ X86_EFL_PF,
215 /* 0x19 = 00011001b */ 0,
216 /* 0x1a = 00011010b */ 0,
217 /* 0x1b = 00011011b */ X86_EFL_PF,
218 /* 0x1c = 00011100b */ 0,
219 /* 0x1d = 00011101b */ X86_EFL_PF,
220 /* 0x1e = 00011110b */ X86_EFL_PF,
221 /* 0x1f = 00011111b */ 0,
222 /* 0x20 = 00100000b */ 0,
223 /* 0x21 = 00100001b */ X86_EFL_PF,
224 /* 0x22 = 00100010b */ X86_EFL_PF,
225 /* 0x23 = 00100011b */ 0,
226 /* 0x24 = 00100100b */ X86_EFL_PF,
227 /* 0x25 = 00100101b */ 0,
228 /* 0x26 = 00100110b */ 0,
229 /* 0x27 = 00100111b */ X86_EFL_PF,
230 /* 0x28 = 00101000b */ X86_EFL_PF,
231 /* 0x29 = 00101001b */ 0,
232 /* 0x2a = 00101010b */ 0,
233 /* 0x2b = 00101011b */ X86_EFL_PF,
234 /* 0x2c = 00101100b */ 0,
235 /* 0x2d = 00101101b */ X86_EFL_PF,
236 /* 0x2e = 00101110b */ X86_EFL_PF,
237 /* 0x2f = 00101111b */ 0,
238 /* 0x30 = 00110000b */ X86_EFL_PF,
239 /* 0x31 = 00110001b */ 0,
240 /* 0x32 = 00110010b */ 0,
241 /* 0x33 = 00110011b */ X86_EFL_PF,
242 /* 0x34 = 00110100b */ 0,
243 /* 0x35 = 00110101b */ X86_EFL_PF,
244 /* 0x36 = 00110110b */ X86_EFL_PF,
245 /* 0x37 = 00110111b */ 0,
246 /* 0x38 = 00111000b */ 0,
247 /* 0x39 = 00111001b */ X86_EFL_PF,
248 /* 0x3a = 00111010b */ X86_EFL_PF,
249 /* 0x3b = 00111011b */ 0,
250 /* 0x3c = 00111100b */ X86_EFL_PF,
251 /* 0x3d = 00111101b */ 0,
252 /* 0x3e = 00111110b */ 0,
253 /* 0x3f = 00111111b */ X86_EFL_PF,
254 /* 0x40 = 01000000b */ 0,
255 /* 0x41 = 01000001b */ X86_EFL_PF,
256 /* 0x42 = 01000010b */ X86_EFL_PF,
257 /* 0x43 = 01000011b */ 0,
258 /* 0x44 = 01000100b */ X86_EFL_PF,
259 /* 0x45 = 01000101b */ 0,
260 /* 0x46 = 01000110b */ 0,
261 /* 0x47 = 01000111b */ X86_EFL_PF,
262 /* 0x48 = 01001000b */ X86_EFL_PF,
263 /* 0x49 = 01001001b */ 0,
264 /* 0x4a = 01001010b */ 0,
265 /* 0x4b = 01001011b */ X86_EFL_PF,
266 /* 0x4c = 01001100b */ 0,
267 /* 0x4d = 01001101b */ X86_EFL_PF,
268 /* 0x4e = 01001110b */ X86_EFL_PF,
269 /* 0x4f = 01001111b */ 0,
270 /* 0x50 = 01010000b */ X86_EFL_PF,
271 /* 0x51 = 01010001b */ 0,
272 /* 0x52 = 01010010b */ 0,
273 /* 0x53 = 01010011b */ X86_EFL_PF,
274 /* 0x54 = 01010100b */ 0,
275 /* 0x55 = 01010101b */ X86_EFL_PF,
276 /* 0x56 = 01010110b */ X86_EFL_PF,
277 /* 0x57 = 01010111b */ 0,
278 /* 0x58 = 01011000b */ 0,
279 /* 0x59 = 01011001b */ X86_EFL_PF,
280 /* 0x5a = 01011010b */ X86_EFL_PF,
281 /* 0x5b = 01011011b */ 0,
282 /* 0x5c = 01011100b */ X86_EFL_PF,
283 /* 0x5d = 01011101b */ 0,
284 /* 0x5e = 01011110b */ 0,
285 /* 0x5f = 01011111b */ X86_EFL_PF,
286 /* 0x60 = 01100000b */ X86_EFL_PF,
287 /* 0x61 = 01100001b */ 0,
288 /* 0x62 = 01100010b */ 0,
289 /* 0x63 = 01100011b */ X86_EFL_PF,
290 /* 0x64 = 01100100b */ 0,
291 /* 0x65 = 01100101b */ X86_EFL_PF,
292 /* 0x66 = 01100110b */ X86_EFL_PF,
293 /* 0x67 = 01100111b */ 0,
294 /* 0x68 = 01101000b */ 0,
295 /* 0x69 = 01101001b */ X86_EFL_PF,
296 /* 0x6a = 01101010b */ X86_EFL_PF,
297 /* 0x6b = 01101011b */ 0,
298 /* 0x6c = 01101100b */ X86_EFL_PF,
299 /* 0x6d = 01101101b */ 0,
300 /* 0x6e = 01101110b */ 0,
301 /* 0x6f = 01101111b */ X86_EFL_PF,
302 /* 0x70 = 01110000b */ 0,
303 /* 0x71 = 01110001b */ X86_EFL_PF,
304 /* 0x72 = 01110010b */ X86_EFL_PF,
305 /* 0x73 = 01110011b */ 0,
306 /* 0x74 = 01110100b */ X86_EFL_PF,
307 /* 0x75 = 01110101b */ 0,
308 /* 0x76 = 01110110b */ 0,
309 /* 0x77 = 01110111b */ X86_EFL_PF,
310 /* 0x78 = 01111000b */ X86_EFL_PF,
311 /* 0x79 = 01111001b */ 0,
312 /* 0x7a = 01111010b */ 0,
313 /* 0x7b = 01111011b */ X86_EFL_PF,
314 /* 0x7c = 01111100b */ 0,
315 /* 0x7d = 01111101b */ X86_EFL_PF,
316 /* 0x7e = 01111110b */ X86_EFL_PF,
317 /* 0x7f = 01111111b */ 0,
318 /* 0x80 = 10000000b */ 0,
319 /* 0x81 = 10000001b */ X86_EFL_PF,
320 /* 0x82 = 10000010b */ X86_EFL_PF,
321 /* 0x83 = 10000011b */ 0,
322 /* 0x84 = 10000100b */ X86_EFL_PF,
323 /* 0x85 = 10000101b */ 0,
324 /* 0x86 = 10000110b */ 0,
325 /* 0x87 = 10000111b */ X86_EFL_PF,
326 /* 0x88 = 10001000b */ X86_EFL_PF,
327 /* 0x89 = 10001001b */ 0,
328 /* 0x8a = 10001010b */ 0,
329 /* 0x8b = 10001011b */ X86_EFL_PF,
330 /* 0x8c = 10001100b */ 0,
331 /* 0x8d = 10001101b */ X86_EFL_PF,
332 /* 0x8e = 10001110b */ X86_EFL_PF,
333 /* 0x8f = 10001111b */ 0,
334 /* 0x90 = 10010000b */ X86_EFL_PF,
335 /* 0x91 = 10010001b */ 0,
336 /* 0x92 = 10010010b */ 0,
337 /* 0x93 = 10010011b */ X86_EFL_PF,
338 /* 0x94 = 10010100b */ 0,
339 /* 0x95 = 10010101b */ X86_EFL_PF,
340 /* 0x96 = 10010110b */ X86_EFL_PF,
341 /* 0x97 = 10010111b */ 0,
342 /* 0x98 = 10011000b */ 0,
343 /* 0x99 = 10011001b */ X86_EFL_PF,
344 /* 0x9a = 10011010b */ X86_EFL_PF,
345 /* 0x9b = 10011011b */ 0,
346 /* 0x9c = 10011100b */ X86_EFL_PF,
347 /* 0x9d = 10011101b */ 0,
348 /* 0x9e = 10011110b */ 0,
349 /* 0x9f = 10011111b */ X86_EFL_PF,
350 /* 0xa0 = 10100000b */ X86_EFL_PF,
351 /* 0xa1 = 10100001b */ 0,
352 /* 0xa2 = 10100010b */ 0,
353 /* 0xa3 = 10100011b */ X86_EFL_PF,
354 /* 0xa4 = 10100100b */ 0,
355 /* 0xa5 = 10100101b */ X86_EFL_PF,
356 /* 0xa6 = 10100110b */ X86_EFL_PF,
357 /* 0xa7 = 10100111b */ 0,
358 /* 0xa8 = 10101000b */ 0,
359 /* 0xa9 = 10101001b */ X86_EFL_PF,
360 /* 0xaa = 10101010b */ X86_EFL_PF,
361 /* 0xab = 10101011b */ 0,
362 /* 0xac = 10101100b */ X86_EFL_PF,
363 /* 0xad = 10101101b */ 0,
364 /* 0xae = 10101110b */ 0,
365 /* 0xaf = 10101111b */ X86_EFL_PF,
366 /* 0xb0 = 10110000b */ 0,
367 /* 0xb1 = 10110001b */ X86_EFL_PF,
368 /* 0xb2 = 10110010b */ X86_EFL_PF,
369 /* 0xb3 = 10110011b */ 0,
370 /* 0xb4 = 10110100b */ X86_EFL_PF,
371 /* 0xb5 = 10110101b */ 0,
372 /* 0xb6 = 10110110b */ 0,
373 /* 0xb7 = 10110111b */ X86_EFL_PF,
374 /* 0xb8 = 10111000b */ X86_EFL_PF,
375 /* 0xb9 = 10111001b */ 0,
376 /* 0xba = 10111010b */ 0,
377 /* 0xbb = 10111011b */ X86_EFL_PF,
378 /* 0xbc = 10111100b */ 0,
379 /* 0xbd = 10111101b */ X86_EFL_PF,
380 /* 0xbe = 10111110b */ X86_EFL_PF,
381 /* 0xbf = 10111111b */ 0,
382 /* 0xc0 = 11000000b */ X86_EFL_PF,
383 /* 0xc1 = 11000001b */ 0,
384 /* 0xc2 = 11000010b */ 0,
385 /* 0xc3 = 11000011b */ X86_EFL_PF,
386 /* 0xc4 = 11000100b */ 0,
387 /* 0xc5 = 11000101b */ X86_EFL_PF,
388 /* 0xc6 = 11000110b */ X86_EFL_PF,
389 /* 0xc7 = 11000111b */ 0,
390 /* 0xc8 = 11001000b */ 0,
391 /* 0xc9 = 11001001b */ X86_EFL_PF,
392 /* 0xca = 11001010b */ X86_EFL_PF,
393 /* 0xcb = 11001011b */ 0,
394 /* 0xcc = 11001100b */ X86_EFL_PF,
395 /* 0xcd = 11001101b */ 0,
396 /* 0xce = 11001110b */ 0,
397 /* 0xcf = 11001111b */ X86_EFL_PF,
398 /* 0xd0 = 11010000b */ 0,
399 /* 0xd1 = 11010001b */ X86_EFL_PF,
400 /* 0xd2 = 11010010b */ X86_EFL_PF,
401 /* 0xd3 = 11010011b */ 0,
402 /* 0xd4 = 11010100b */ X86_EFL_PF,
403 /* 0xd5 = 11010101b */ 0,
404 /* 0xd6 = 11010110b */ 0,
405 /* 0xd7 = 11010111b */ X86_EFL_PF,
406 /* 0xd8 = 11011000b */ X86_EFL_PF,
407 /* 0xd9 = 11011001b */ 0,
408 /* 0xda = 11011010b */ 0,
409 /* 0xdb = 11011011b */ X86_EFL_PF,
410 /* 0xdc = 11011100b */ 0,
411 /* 0xdd = 11011101b */ X86_EFL_PF,
412 /* 0xde = 11011110b */ X86_EFL_PF,
413 /* 0xdf = 11011111b */ 0,
414 /* 0xe0 = 11100000b */ 0,
415 /* 0xe1 = 11100001b */ X86_EFL_PF,
416 /* 0xe2 = 11100010b */ X86_EFL_PF,
417 /* 0xe3 = 11100011b */ 0,
418 /* 0xe4 = 11100100b */ X86_EFL_PF,
419 /* 0xe5 = 11100101b */ 0,
420 /* 0xe6 = 11100110b */ 0,
421 /* 0xe7 = 11100111b */ X86_EFL_PF,
422 /* 0xe8 = 11101000b */ X86_EFL_PF,
423 /* 0xe9 = 11101001b */ 0,
424 /* 0xea = 11101010b */ 0,
425 /* 0xeb = 11101011b */ X86_EFL_PF,
426 /* 0xec = 11101100b */ 0,
427 /* 0xed = 11101101b */ X86_EFL_PF,
428 /* 0xee = 11101110b */ X86_EFL_PF,
429 /* 0xef = 11101111b */ 0,
430 /* 0xf0 = 11110000b */ X86_EFL_PF,
431 /* 0xf1 = 11110001b */ 0,
432 /* 0xf2 = 11110010b */ 0,
433 /* 0xf3 = 11110011b */ X86_EFL_PF,
434 /* 0xf4 = 11110100b */ 0,
435 /* 0xf5 = 11110101b */ X86_EFL_PF,
436 /* 0xf6 = 11110110b */ X86_EFL_PF,
437 /* 0xf7 = 11110111b */ 0,
438 /* 0xf8 = 11111000b */ 0,
439 /* 0xf9 = 11111001b */ X86_EFL_PF,
440 /* 0xfa = 11111010b */ X86_EFL_PF,
441 /* 0xfb = 11111011b */ 0,
442 /* 0xfc = 11111100b */ X86_EFL_PF,
443 /* 0xfd = 11111101b */ 0,
444 /* 0xfe = 11111110b */ 0,
445 /* 0xff = 11111111b */ X86_EFL_PF,
446};
447
448/* for clang: */
449extern const RTFLOAT80U g_ar80Zero[];
450extern const RTFLOAT80U g_ar80One[];
451extern const RTFLOAT80U g_r80Indefinite;
452extern const RTFLOAT80U g_ar80Infinity[];
453extern const RTFLOAT128U g_r128Ln2;
454extern const RTUINT128U g_u128Ln2Mantissa;
455extern const RTUINT128U g_u128Ln2MantissaIntel;
456extern const RTFLOAT128U g_ar128F2xm1HornerConsts[];
457
458/** Zero values (indexed by fSign). */
459RTFLOAT80U const g_ar80Zero[] = { RTFLOAT80U_INIT_ZERO(0), RTFLOAT80U_INIT_ZERO(1) };
460
461/** One values (indexed by fSign). */
462RTFLOAT80U const g_ar80One[] =
463{ RTFLOAT80U_INIT(0, RT_BIT_64(63), RTFLOAT80U_EXP_BIAS), RTFLOAT80U_INIT(1, RT_BIT_64(63), RTFLOAT80U_EXP_BIAS) };
464
465/** Indefinite (negative). */
466RTFLOAT80U const g_r80Indefinite = RTFLOAT80U_INIT_INDEFINITE(1);
467
468/** Infinities (indexed by fSign). */
469RTFLOAT80U const g_ar80Infinity[] = { RTFLOAT80U_INIT_INF(0), RTFLOAT80U_INIT_INF(1) };
470
471#if 0
472/** 128-bit floating point constant: 2.0 */
473const RTFLOAT128U g_r128Two = RTFLOAT128U_INIT_C(0, 0, 0, RTFLOAT128U_EXP_BIAS + 1);
474#endif
475
476
477/* The next section is generated by tools/IEMGenFpuConstants: */
478
479/** The ln2 constant as 128-bit floating point value.
480 * base-10: 6.93147180559945309417232121458176575e-1
481 * base-16: b.17217f7d1cf79abc9e3b39803f30@-1
482 * base-2 : 1.0110001011100100001011111110111110100011100111101111001101010111100100111100011101100111001100000000011111100110e-1 */
483//const RTFLOAT128U g_r128Ln2 = RTFLOAT128U_INIT_C(0, 0x62e42fefa39e, 0xf35793c7673007e6, 0x3ffe);
484const RTFLOAT128U g_r128Ln2 = RTFLOAT128U_INIT_C(0, 0x62e42fefa39e, 0xf357900000000000, 0x3ffe);
485/** High precision ln2 value.
486 * base-10: 6.931471805599453094172321214581765680747e-1
487 * base-16: b.17217f7d1cf79abc9e3b39803f2f6af0@-1
488 * base-2 : 1.0110001011100100001011111110111110100011100111101111001101010111100100111100011101100111001100000000011111100101111011010101111e-1 */
489const RTUINT128U g_u128Ln2Mantissa = RTUINT128_INIT_C(0xb17217f7d1cf79ab, 0xc9e3b39803f2f6af);
490/** High precision ln2 value, compatible with f2xm1 results on intel 10980XE.
491 * base-10: 6.931471805599453094151379470289064954613e-1
492 * base-16: b.17217f7d1cf79abc0000000000000000@-1
493 * base-2 : 1.0110001011100100001011111110111110100011100111101111001101010111100000000000000000000000000000000000000000000000000000000000000e-1 */
494const RTUINT128U g_u128Ln2MantissaIntel = RTUINT128_INIT_C(0xb17217f7d1cf79ab, 0xc000000000000000);
495
496/** Horner constants for f2xm1 */
497const RTFLOAT128U g_ar128F2xm1HornerConsts[] =
498{
499 /* a0
500 * base-10: 1.00000000000000000000000000000000000e0
501 * base-16: 1.0000000000000000000000000000@0
502 * base-2 : 1.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000e0 */
503 RTFLOAT128U_INIT_C(0, 0x000000000000, 0x0000000000000000, 0x3fff),
504 /* a1
505 * base-10: 5.00000000000000000000000000000000000e-1
506 * base-16: 8.0000000000000000000000000000@-1
507 * base-2 : 1.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000e-1 */
508 RTFLOAT128U_INIT_C(0, 0x000000000000, 0x0000000000000000, 0x3ffe),
509 /* a2
510 * base-10: 1.66666666666666666666666666666666658e-1
511 * base-16: 2.aaaaaaaaaaaaaaaaaaaaaaaaaaaa@-1
512 * base-2 : 1.0101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101e-3 */
513 RTFLOAT128U_INIT_C(0, 0x555555555555, 0x5555555555555555, 0x3ffc),
514 /* a3
515 * base-10: 4.16666666666666666666666666666666646e-2
516 * base-16: a.aaaaaaaaaaaaaaaaaaaaaaaaaaa8@-2
517 * base-2 : 1.0101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101010101e-5 */
518 RTFLOAT128U_INIT_C(0, 0x555555555555, 0x5555555555555555, 0x3ffa),
519 /* a4
520 * base-10: 8.33333333333333333333333333333333323e-3
521 * base-16: 2.2222222222222222222222222222@-2
522 * base-2 : 1.0001000100010001000100010001000100010001000100010001000100010001000100010001000100010001000100010001000100010001e-7 */
523 RTFLOAT128U_INIT_C(0, 0x111111111111, 0x1111111111111111, 0x3ff8),
524 /* a5
525 * base-10: 1.38888888888888888888888888888888874e-3
526 * base-16: 5.b05b05b05b05b05b05b05b05b058@-3
527 * base-2 : 1.0110110000010110110000010110110000010110110000010110110000010110110000010110110000010110110000010110110000010110e-10 */
528 RTFLOAT128U_INIT_C(0, 0x6c16c16c16c1, 0x6c16c16c16c16c16, 0x3ff5),
529 /* a6
530 * base-10: 1.98412698412698412698412698412698412e-4
531 * base-16: d.00d00d00d00d00d00d00d00d00d0@-4
532 * base-2 : 1.1010000000011010000000011010000000011010000000011010000000011010000000011010000000011010000000011010000000011010e-13 */
533 RTFLOAT128U_INIT_C(0, 0xa01a01a01a01, 0xa01a01a01a01a01a, 0x3ff2),
534 /* a7
535 * base-10: 2.48015873015873015873015873015873015e-5
536 * base-16: 1.a01a01a01a01a01a01a01a01a01a@-4
537 * base-2 : 1.1010000000011010000000011010000000011010000000011010000000011010000000011010000000011010000000011010000000011010e-16 */
538 RTFLOAT128U_INIT_C(0, 0xa01a01a01a01, 0xa01a01a01a01a01a, 0x3fef),
539 /* a8
540 * base-10: 2.75573192239858906525573192239858902e-6
541 * base-16: 2.e3bc74aad8e671f5583911ca002e@-5
542 * base-2 : 1.0111000111011110001110100101010101101100011100110011100011111010101011000001110010001000111001010000000000010111e-19 */
543 RTFLOAT128U_INIT_C(0, 0x71de3a556c73, 0x38faac1c88e50017, 0x3fec),
544 /* a9
545 * base-10: 2.75573192239858906525573192239858865e-7
546 * base-16: 4.9f93edde27d71cbbc05b4fa999e0@-6
547 * base-2 : 1.0010011111100100111110110111011110001001111101011100011100101110111100000001011011010011111010100110011001111000e-22 */
548 RTFLOAT128U_INIT_C(0, 0x27e4fb7789f5, 0xc72ef016d3ea6678, 0x3fe9),
549 /* a10
550 * base-10: 2.50521083854417187750521083854417184e-8
551 * base-16: 6.b99159fd5138e3f9d1f92e0df71c@-7
552 * base-2 : 1.1010111001100100010101100111111101010100010011100011100011111110011101000111111001001011100000110111110111000111e-26 */
553 RTFLOAT128U_INIT_C(0, 0xae64567f544e, 0x38fe747e4b837dc7, 0x3fe5),
554 /* a11
555 * base-10: 2.08767569878680989792100903212014296e-9
556 * base-16: 8.f76c77fc6c4bdaa26d4c3d67f420@-8
557 * base-2 : 1.0001111011101101100011101111111110001101100010010111101101010100010011011010100110000111101011001111111010000100e-29 */
558 RTFLOAT128U_INIT_C(0, 0x1eed8eff8d89, 0x7b544da987acfe84, 0x3fe2),
559 /* a12
560 * base-10: 1.60590438368216145993923771701549472e-10
561 * base-16: b.092309d43684be51c198e91d7b40@-9
562 * base-2 : 1.0110000100100100011000010011101010000110110100001001011111001010001110000011001100011101001000111010111101101000e-33 */
563 RTFLOAT128U_INIT_C(0, 0x6124613a86d0, 0x97ca38331d23af68, 0x3fde),
564 /* a13
565 * base-10: 1.14707455977297247138516979786821043e-11
566 * base-16: c.9cba54603e4e905d6f8a2efd1f20@-10
567 * base-2 : 1.1001001110010111010010101000110000000111110010011101001000001011101011011111000101000101110111111010001111100100e-37 */
568 RTFLOAT128U_INIT_C(0, 0x93974a8c07c9, 0xd20badf145dfa3e4, 0x3fda),
569 /* a14
570 * base-10: 7.64716373181981647590113198578806964e-13
571 * base-16: d.73f9f399dc0f88ec32b587746578@-11
572 * base-2 : 1.1010111001111111001111100111001100111011100000011111000100011101100001100101011010110000111011101000110010101111e-41 */
573 RTFLOAT128U_INIT_C(0, 0xae7f3e733b81, 0xf11d8656b0ee8caf, 0x3fd6),
574 /* a15
575 * base-10: 4.77947733238738529743820749111754352e-14
576 * base-16: d.73f9f399dc0f88ec32b587746578@-12
577 * base-2 : 1.1010111001111111001111100111001100111011100000011111000100011101100001100101011010110000111011101000110010101111e-45 */
578 RTFLOAT128U_INIT_C(0, 0xae7f3e733b81, 0xf11d8656b0ee8caf, 0x3fd2),
579 /* a16
580 * base-10: 2.81145725434552076319894558301031970e-15
581 * base-16: c.a963b81856a53593028cbbb8d7f8@-13
582 * base-2 : 1.1001010100101100011101110000001100001010110101001010011010110010011000000101000110010111011101110001101011111111e-49 */
583 RTFLOAT128U_INIT_C(0, 0x952c77030ad4, 0xa6b2605197771aff, 0x3fce),
584 /* a17
585 * base-10: 1.56192069685862264622163643500573321e-16
586 * base-16: b.413c31dcbecbbdd8024435161550@-14
587 * base-2 : 1.0110100000100111100001100011101110010111110110010111011110111011000000000100100010000110101000101100001010101010e-53 */
588 RTFLOAT128U_INIT_C(0, 0x6827863b97d9, 0x77bb004886a2c2aa, 0x3fca),
589 /* a18
590 * base-10: 8.22063524662432971695598123687227980e-18
591 * base-16: 9.7a4da340a0ab92650f61dbdcb3a0@-15
592 * base-2 : 1.0010111101001001101101000110100000010100000101010111001001001100101000011110110000111011011110111001011001110100e-57 */
593 RTFLOAT128U_INIT_C(0, 0x2f49b4681415, 0x724ca1ec3b7b9674, 0x3fc6),
594 /* a19
595 * base-10: 4.11031762331216485847799061843614006e-19
596 * base-16: 7.950ae900808941ea72b4afe3c2e8@-16
597 * base-2 : 1.1110010101000010101110100100000000100000001000100101000001111010100111001010110100101011111110001111000010111010e-62 */
598 RTFLOAT128U_INIT_C(0, 0xe542ba402022, 0x507a9cad2bf8f0ba, 0x3fc1),
599 /* a20
600 * base-10: 7.04351638180413298434020229233492164e-20
601 * base-16: 1.4c9ee35db1d1f3c946fdcd48fd88@-16
602 * base-2 : 1.0100110010011110111000110101110110110001110100011111001111001001010001101111110111001101010010001111110110001000e-64 */
603 RTFLOAT128U_INIT_C(0, 0x4c9ee35db1d1, 0xf3c946fdcd48fd88, 0x3fbf),
604 /* a21
605 * base-10: 5.81527769640186708776361513365257702e-20
606 * base-16: 1.129e64bff606a2b9c9fc624481cd@-16
607 * base-2 : 1.0001001010011110011001001011111111110110000001101010001010111001110010011111110001100010010001001000000111001101e-64 */
608 RTFLOAT128U_INIT_C(0, 0x129e64bff606, 0xa2b9c9fc624481cd, 0x3fbf),
609};
610
611
612/*
613 * There are a few 64-bit on 32-bit things we'd rather do in C. Actually, doing
614 * it all in C is probably safer atm., optimize what's necessary later, maybe.
615 */
616#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
617
618
619/*********************************************************************************************************************************
620* Binary Operations *
621*********************************************************************************************************************************/
622
623/*
624 * ADD
625 */
626
627IEM_DECL_IMPL_DEF(void, iemAImpl_add_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
628{
629 uint64_t uDst = *puDst;
630 uint64_t uResult = uDst + uSrc;
631 *puDst = uResult;
632 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 64, uResult < uDst, uSrc);
633}
634
635# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
636
637IEM_DECL_IMPL_DEF(void, iemAImpl_add_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
638{
639 uint32_t uDst = *puDst;
640 uint32_t uResult = uDst + uSrc;
641 *puDst = uResult;
642 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 32, uResult < uDst, uSrc);
643}
644
645
646IEM_DECL_IMPL_DEF(void, iemAImpl_add_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
647{
648 uint16_t uDst = *puDst;
649 uint16_t uResult = uDst + uSrc;
650 *puDst = uResult;
651 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 16, uResult < uDst, uSrc);
652}
653
654
655IEM_DECL_IMPL_DEF(void, iemAImpl_add_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
656{
657 uint8_t uDst = *puDst;
658 uint8_t uResult = uDst + uSrc;
659 *puDst = uResult;
660 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 8, uResult < uDst, uSrc);
661}
662
663# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
664
665/*
666 * ADC
667 */
668
669IEM_DECL_IMPL_DEF(void, iemAImpl_adc_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
670{
671 if (!(*pfEFlags & X86_EFL_CF))
672 iemAImpl_add_u64(puDst, uSrc, pfEFlags);
673 else
674 {
675 uint64_t uDst = *puDst;
676 uint64_t uResult = uDst + uSrc + 1;
677 *puDst = uResult;
678 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 64, uResult <= uDst, uSrc);
679 }
680}
681
682# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
683
684IEM_DECL_IMPL_DEF(void, iemAImpl_adc_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
685{
686 if (!(*pfEFlags & X86_EFL_CF))
687 iemAImpl_add_u32(puDst, uSrc, pfEFlags);
688 else
689 {
690 uint32_t uDst = *puDst;
691 uint32_t uResult = uDst + uSrc + 1;
692 *puDst = uResult;
693 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 32, uResult <= uDst, uSrc);
694 }
695}
696
697
698IEM_DECL_IMPL_DEF(void, iemAImpl_adc_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
699{
700 if (!(*pfEFlags & X86_EFL_CF))
701 iemAImpl_add_u16(puDst, uSrc, pfEFlags);
702 else
703 {
704 uint16_t uDst = *puDst;
705 uint16_t uResult = uDst + uSrc + 1;
706 *puDst = uResult;
707 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 16, uResult <= uDst, uSrc);
708 }
709}
710
711
712IEM_DECL_IMPL_DEF(void, iemAImpl_adc_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
713{
714 if (!(*pfEFlags & X86_EFL_CF))
715 iemAImpl_add_u8(puDst, uSrc, pfEFlags);
716 else
717 {
718 uint8_t uDst = *puDst;
719 uint8_t uResult = uDst + uSrc + 1;
720 *puDst = uResult;
721 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 8, uResult <= uDst, uSrc);
722 }
723}
724
725# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
726
727/*
728 * SUB
729 */
730
731IEM_DECL_IMPL_DEF(void, iemAImpl_sub_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
732{
733 uint64_t uDst = *puDst;
734 uint64_t uResult = uDst - uSrc;
735 *puDst = uResult;
736 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 64, uDst < uSrc, uSrc ^ RT_BIT_64(63));
737}
738
739# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
740
741IEM_DECL_IMPL_DEF(void, iemAImpl_sub_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
742{
743 uint32_t uDst = *puDst;
744 uint32_t uResult = uDst - uSrc;
745 *puDst = uResult;
746 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 32, uDst < uSrc, uSrc ^ RT_BIT_32(31));
747}
748
749
750IEM_DECL_IMPL_DEF(void, iemAImpl_sub_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
751{
752 uint16_t uDst = *puDst;
753 uint16_t uResult = uDst - uSrc;
754 *puDst = uResult;
755 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 16, uDst < uSrc, uSrc ^ (uint16_t)0x8000);
756}
757
758
759IEM_DECL_IMPL_DEF(void, iemAImpl_sub_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
760{
761 uint8_t uDst = *puDst;
762 uint8_t uResult = uDst - uSrc;
763 *puDst = uResult;
764 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 8, uDst < uSrc, uSrc ^ (uint8_t)0x80);
765}
766
767# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
768
769/*
770 * SBB
771 */
772
773IEM_DECL_IMPL_DEF(void, iemAImpl_sbb_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
774{
775 if (!(*pfEFlags & X86_EFL_CF))
776 iemAImpl_sub_u64(puDst, uSrc, pfEFlags);
777 else
778 {
779 uint64_t uDst = *puDst;
780 uint64_t uResult = uDst - uSrc - 1;
781 *puDst = uResult;
782 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 64, uDst <= uSrc, uSrc ^ RT_BIT_64(63));
783 }
784}
785
786# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
787
788IEM_DECL_IMPL_DEF(void, iemAImpl_sbb_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
789{
790 if (!(*pfEFlags & X86_EFL_CF))
791 iemAImpl_sub_u32(puDst, uSrc, pfEFlags);
792 else
793 {
794 uint32_t uDst = *puDst;
795 uint32_t uResult = uDst - uSrc - 1;
796 *puDst = uResult;
797 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 32, uDst <= uSrc, uSrc ^ RT_BIT_32(31));
798 }
799}
800
801
802IEM_DECL_IMPL_DEF(void, iemAImpl_sbb_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
803{
804 if (!(*pfEFlags & X86_EFL_CF))
805 iemAImpl_sub_u16(puDst, uSrc, pfEFlags);
806 else
807 {
808 uint16_t uDst = *puDst;
809 uint16_t uResult = uDst - uSrc - 1;
810 *puDst = uResult;
811 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 16, uDst <= uSrc, uSrc ^ (uint16_t)0x8000);
812 }
813}
814
815
816IEM_DECL_IMPL_DEF(void, iemAImpl_sbb_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
817{
818 if (!(*pfEFlags & X86_EFL_CF))
819 iemAImpl_sub_u8(puDst, uSrc, pfEFlags);
820 else
821 {
822 uint8_t uDst = *puDst;
823 uint8_t uResult = uDst - uSrc - 1;
824 *puDst = uResult;
825 IEM_EFL_UPDATE_STATUS_BITS_FOR_ARITHMETIC(pfEFlags, uResult, uDst, uSrc, 8, uDst <= uSrc, uSrc ^ (uint8_t)0x80);
826 }
827}
828
829# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
830
831
832/*
833 * OR
834 */
835
836IEM_DECL_IMPL_DEF(void, iemAImpl_or_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
837{
838 uint64_t uResult = *puDst | uSrc;
839 *puDst = uResult;
840 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 64, 0);
841}
842
843# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
844
845IEM_DECL_IMPL_DEF(void, iemAImpl_or_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
846{
847 uint32_t uResult = *puDst | uSrc;
848 *puDst = uResult;
849 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 32, 0);
850}
851
852
853IEM_DECL_IMPL_DEF(void, iemAImpl_or_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
854{
855 uint16_t uResult = *puDst | uSrc;
856 *puDst = uResult;
857 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 16, 0);
858}
859
860
861IEM_DECL_IMPL_DEF(void, iemAImpl_or_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
862{
863 uint8_t uResult = *puDst | uSrc;
864 *puDst = uResult;
865 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 8, 0);
866}
867
868# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
869
870/*
871 * XOR
872 */
873
874IEM_DECL_IMPL_DEF(void, iemAImpl_xor_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
875{
876 uint64_t uResult = *puDst ^ uSrc;
877 *puDst = uResult;
878 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 64, 0);
879}
880
881# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
882
883IEM_DECL_IMPL_DEF(void, iemAImpl_xor_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
884{
885 uint32_t uResult = *puDst ^ uSrc;
886 *puDst = uResult;
887 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 32, 0);
888}
889
890
891IEM_DECL_IMPL_DEF(void, iemAImpl_xor_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
892{
893 uint16_t uResult = *puDst ^ uSrc;
894 *puDst = uResult;
895 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 16, 0);
896}
897
898
899IEM_DECL_IMPL_DEF(void, iemAImpl_xor_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
900{
901 uint8_t uResult = *puDst ^ uSrc;
902 *puDst = uResult;
903 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 8, 0);
904}
905
906# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
907
908/*
909 * AND
910 */
911
912IEM_DECL_IMPL_DEF(void, iemAImpl_and_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
913{
914 uint64_t uResult = *puDst & uSrc;
915 *puDst = uResult;
916 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 64, 0);
917}
918
919# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
920
921IEM_DECL_IMPL_DEF(void, iemAImpl_and_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
922{
923 uint32_t uResult = *puDst & uSrc;
924 *puDst = uResult;
925 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 32, 0);
926}
927
928
929IEM_DECL_IMPL_DEF(void, iemAImpl_and_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
930{
931 uint16_t uResult = *puDst & uSrc;
932 *puDst = uResult;
933 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 16, 0);
934}
935
936
937IEM_DECL_IMPL_DEF(void, iemAImpl_and_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
938{
939 uint8_t uResult = *puDst & uSrc;
940 *puDst = uResult;
941 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 8, 0);
942}
943
944# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
945
946/*
947 * CMP
948 */
949
950IEM_DECL_IMPL_DEF(void, iemAImpl_cmp_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
951{
952 uint64_t uDstTmp = *puDst;
953 iemAImpl_sub_u64(&uDstTmp, uSrc, pfEFlags);
954}
955
956# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
957
958IEM_DECL_IMPL_DEF(void, iemAImpl_cmp_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
959{
960 uint32_t uDstTmp = *puDst;
961 iemAImpl_sub_u32(&uDstTmp, uSrc, pfEFlags);
962}
963
964
965IEM_DECL_IMPL_DEF(void, iemAImpl_cmp_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
966{
967 uint16_t uDstTmp = *puDst;
968 iemAImpl_sub_u16(&uDstTmp, uSrc, pfEFlags);
969}
970
971
972IEM_DECL_IMPL_DEF(void, iemAImpl_cmp_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
973{
974 uint8_t uDstTmp = *puDst;
975 iemAImpl_sub_u8(&uDstTmp, uSrc, pfEFlags);
976}
977
978# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
979
980/*
981 * TEST
982 */
983
984IEM_DECL_IMPL_DEF(void, iemAImpl_test_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
985{
986 uint64_t uResult = *puDst & uSrc;
987 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 64, 0);
988}
989
990# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
991
992IEM_DECL_IMPL_DEF(void, iemAImpl_test_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
993{
994 uint32_t uResult = *puDst & uSrc;
995 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 32, 0);
996}
997
998
999IEM_DECL_IMPL_DEF(void, iemAImpl_test_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1000{
1001 uint16_t uResult = *puDst & uSrc;
1002 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 16, 0);
1003}
1004
1005
1006IEM_DECL_IMPL_DEF(void, iemAImpl_test_u8,(uint8_t *puDst, uint8_t uSrc, uint32_t *pfEFlags))
1007{
1008 uint8_t uResult = *puDst & uSrc;
1009 IEM_EFL_UPDATE_STATUS_BITS_FOR_LOGIC(pfEFlags, uResult, 8, 0);
1010}
1011
1012# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1013
1014
1015/*
1016 * LOCK prefixed variants of the above
1017 */
1018
1019/** 64-bit locked binary operand operation. */
1020# define DO_LOCKED_BIN_OP(a_Mnemonic, a_cBitsWidth) \
1021 do { \
1022 uint ## a_cBitsWidth ## _t uOld = ASMAtomicUoReadU ## a_cBitsWidth(puDst); \
1023 uint ## a_cBitsWidth ## _t uTmp; \
1024 uint32_t fEflTmp; \
1025 do \
1026 { \
1027 uTmp = uOld; \
1028 fEflTmp = *pfEFlags; \
1029 iemAImpl_ ## a_Mnemonic ## _u ## a_cBitsWidth(&uTmp, uSrc, &fEflTmp); \
1030 } while (!ASMAtomicCmpXchgExU ## a_cBitsWidth(puDst, uTmp, uOld, &uOld)); \
1031 *pfEFlags = fEflTmp; \
1032 } while (0)
1033
1034
1035#define EMIT_LOCKED_BIN_OP(a_Mnemonic, a_cBitsWidth) \
1036 IEM_DECL_IMPL_DEF(void, iemAImpl_ ## a_Mnemonic ## _u ## a_cBitsWidth ## _locked,(uint ## a_cBitsWidth ## _t *puDst, \
1037 uint ## a_cBitsWidth ## _t uSrc, \
1038 uint32_t *pfEFlags)) \
1039 { \
1040 DO_LOCKED_BIN_OP(a_Mnemonic, a_cBitsWidth); \
1041 }
1042
1043EMIT_LOCKED_BIN_OP(add, 64)
1044EMIT_LOCKED_BIN_OP(adc, 64)
1045EMIT_LOCKED_BIN_OP(sub, 64)
1046EMIT_LOCKED_BIN_OP(sbb, 64)
1047EMIT_LOCKED_BIN_OP(or, 64)
1048EMIT_LOCKED_BIN_OP(xor, 64)
1049EMIT_LOCKED_BIN_OP(and, 64)
1050# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1051EMIT_LOCKED_BIN_OP(add, 32)
1052EMIT_LOCKED_BIN_OP(adc, 32)
1053EMIT_LOCKED_BIN_OP(sub, 32)
1054EMIT_LOCKED_BIN_OP(sbb, 32)
1055EMIT_LOCKED_BIN_OP(or, 32)
1056EMIT_LOCKED_BIN_OP(xor, 32)
1057EMIT_LOCKED_BIN_OP(and, 32)
1058
1059EMIT_LOCKED_BIN_OP(add, 16)
1060EMIT_LOCKED_BIN_OP(adc, 16)
1061EMIT_LOCKED_BIN_OP(sub, 16)
1062EMIT_LOCKED_BIN_OP(sbb, 16)
1063EMIT_LOCKED_BIN_OP(or, 16)
1064EMIT_LOCKED_BIN_OP(xor, 16)
1065EMIT_LOCKED_BIN_OP(and, 16)
1066
1067EMIT_LOCKED_BIN_OP(add, 8)
1068EMIT_LOCKED_BIN_OP(adc, 8)
1069EMIT_LOCKED_BIN_OP(sub, 8)
1070EMIT_LOCKED_BIN_OP(sbb, 8)
1071EMIT_LOCKED_BIN_OP(or, 8)
1072EMIT_LOCKED_BIN_OP(xor, 8)
1073EMIT_LOCKED_BIN_OP(and, 8)
1074# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1075
1076
1077/*
1078 * Bit operations (same signature as above).
1079 */
1080
1081/*
1082 * BT
1083 */
1084
1085IEM_DECL_IMPL_DEF(void, iemAImpl_bt_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1086{
1087 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. However, it seems they're
1088 not modified by either AMD (3990x) or Intel (i9-9980HK). */
1089 Assert(uSrc < 64);
1090 uint64_t uDst = *puDst;
1091 if (uDst & RT_BIT_64(uSrc))
1092 *pfEFlags |= X86_EFL_CF;
1093 else
1094 *pfEFlags &= ~X86_EFL_CF;
1095}
1096
1097# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1098
1099IEM_DECL_IMPL_DEF(void, iemAImpl_bt_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1100{
1101 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. However, it seems they're
1102 not modified by either AMD (3990x) or Intel (i9-9980HK). */
1103 Assert(uSrc < 32);
1104 uint32_t uDst = *puDst;
1105 if (uDst & RT_BIT_32(uSrc))
1106 *pfEFlags |= X86_EFL_CF;
1107 else
1108 *pfEFlags &= ~X86_EFL_CF;
1109}
1110
1111IEM_DECL_IMPL_DEF(void, iemAImpl_bt_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1112{
1113 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. However, it seems they're
1114 not modified by either AMD (3990x) or Intel (i9-9980HK). */
1115 Assert(uSrc < 16);
1116 uint16_t uDst = *puDst;
1117 if (uDst & RT_BIT_32(uSrc))
1118 *pfEFlags |= X86_EFL_CF;
1119 else
1120 *pfEFlags &= ~X86_EFL_CF;
1121}
1122
1123# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1124
1125/*
1126 * BTC
1127 */
1128
1129IEM_DECL_IMPL_DEF(void, iemAImpl_btc_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1130{
1131 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. However, it seems they're
1132 not modified by either AMD (3990x) or Intel (i9-9980HK). */
1133 Assert(uSrc < 64);
1134 uint64_t fMask = RT_BIT_64(uSrc);
1135 uint64_t uDst = *puDst;
1136 if (uDst & fMask)
1137 {
1138 uDst &= ~fMask;
1139 *puDst = uDst;
1140 *pfEFlags |= X86_EFL_CF;
1141 }
1142 else
1143 {
1144 uDst |= fMask;
1145 *puDst = uDst;
1146 *pfEFlags &= ~X86_EFL_CF;
1147 }
1148}
1149
1150# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1151
1152IEM_DECL_IMPL_DEF(void, iemAImpl_btc_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1153{
1154 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. However, it seems they're
1155 not modified by either AMD (3990x) or Intel (i9-9980HK). */
1156 Assert(uSrc < 32);
1157 uint32_t fMask = RT_BIT_32(uSrc);
1158 uint32_t uDst = *puDst;
1159 if (uDst & fMask)
1160 {
1161 uDst &= ~fMask;
1162 *puDst = uDst;
1163 *pfEFlags |= X86_EFL_CF;
1164 }
1165 else
1166 {
1167 uDst |= fMask;
1168 *puDst = uDst;
1169 *pfEFlags &= ~X86_EFL_CF;
1170 }
1171}
1172
1173
1174IEM_DECL_IMPL_DEF(void, iemAImpl_btc_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1175{
1176 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. However, it seems they're
1177 not modified by either AMD (3990x) or Intel (i9-9980HK). */
1178 Assert(uSrc < 16);
1179 uint16_t fMask = RT_BIT_32(uSrc);
1180 uint16_t uDst = *puDst;
1181 if (uDst & fMask)
1182 {
1183 uDst &= ~fMask;
1184 *puDst = uDst;
1185 *pfEFlags |= X86_EFL_CF;
1186 }
1187 else
1188 {
1189 uDst |= fMask;
1190 *puDst = uDst;
1191 *pfEFlags &= ~X86_EFL_CF;
1192 }
1193}
1194
1195# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1196
1197/*
1198 * BTR
1199 */
1200
1201IEM_DECL_IMPL_DEF(void, iemAImpl_btr_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1202{
1203 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
1204 logical operation (AND/OR/whatever). */
1205 Assert(uSrc < 64);
1206 uint64_t fMask = RT_BIT_64(uSrc);
1207 uint64_t uDst = *puDst;
1208 if (uDst & fMask)
1209 {
1210 uDst &= ~fMask;
1211 *puDst = uDst;
1212 *pfEFlags |= X86_EFL_CF;
1213 }
1214 else
1215 *pfEFlags &= ~X86_EFL_CF;
1216}
1217
1218# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1219
1220IEM_DECL_IMPL_DEF(void, iemAImpl_btr_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1221{
1222 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
1223 logical operation (AND/OR/whatever). */
1224 Assert(uSrc < 32);
1225 uint32_t fMask = RT_BIT_32(uSrc);
1226 uint32_t uDst = *puDst;
1227 if (uDst & fMask)
1228 {
1229 uDst &= ~fMask;
1230 *puDst = uDst;
1231 *pfEFlags |= X86_EFL_CF;
1232 }
1233 else
1234 *pfEFlags &= ~X86_EFL_CF;
1235}
1236
1237
1238IEM_DECL_IMPL_DEF(void, iemAImpl_btr_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1239{
1240 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
1241 logical operation (AND/OR/whatever). */
1242 Assert(uSrc < 16);
1243 uint16_t fMask = RT_BIT_32(uSrc);
1244 uint16_t uDst = *puDst;
1245 if (uDst & fMask)
1246 {
1247 uDst &= ~fMask;
1248 *puDst = uDst;
1249 *pfEFlags |= X86_EFL_CF;
1250 }
1251 else
1252 *pfEFlags &= ~X86_EFL_CF;
1253}
1254
1255# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1256
1257/*
1258 * BTS
1259 */
1260
1261IEM_DECL_IMPL_DEF(void, iemAImpl_bts_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1262{
1263 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
1264 logical operation (AND/OR/whatever). */
1265 Assert(uSrc < 64);
1266 uint64_t fMask = RT_BIT_64(uSrc);
1267 uint64_t uDst = *puDst;
1268 if (uDst & fMask)
1269 *pfEFlags |= X86_EFL_CF;
1270 else
1271 {
1272 uDst |= fMask;
1273 *puDst = uDst;
1274 *pfEFlags &= ~X86_EFL_CF;
1275 }
1276}
1277
1278# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1279
1280IEM_DECL_IMPL_DEF(void, iemAImpl_bts_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1281{
1282 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
1283 logical operation (AND/OR/whatever). */
1284 Assert(uSrc < 32);
1285 uint32_t fMask = RT_BIT_32(uSrc);
1286 uint32_t uDst = *puDst;
1287 if (uDst & fMask)
1288 *pfEFlags |= X86_EFL_CF;
1289 else
1290 {
1291 uDst |= fMask;
1292 *puDst = uDst;
1293 *pfEFlags &= ~X86_EFL_CF;
1294 }
1295}
1296
1297
1298IEM_DECL_IMPL_DEF(void, iemAImpl_bts_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1299{
1300 /* Note! "undefined" flags: OF, SF, ZF, AF, PF. We set them as after an
1301 logical operation (AND/OR/whatever). */
1302 Assert(uSrc < 16);
1303 uint16_t fMask = RT_BIT_32(uSrc);
1304 uint32_t uDst = *puDst;
1305 if (uDst & fMask)
1306 *pfEFlags |= X86_EFL_CF;
1307 else
1308 {
1309 uDst |= fMask;
1310 *puDst = uDst;
1311 *pfEFlags &= ~X86_EFL_CF;
1312 }
1313}
1314
1315# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1316
1317
1318EMIT_LOCKED_BIN_OP(btc, 64)
1319EMIT_LOCKED_BIN_OP(btr, 64)
1320EMIT_LOCKED_BIN_OP(bts, 64)
1321# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1322EMIT_LOCKED_BIN_OP(btc, 32)
1323EMIT_LOCKED_BIN_OP(btr, 32)
1324EMIT_LOCKED_BIN_OP(bts, 32)
1325
1326EMIT_LOCKED_BIN_OP(btc, 16)
1327EMIT_LOCKED_BIN_OP(btr, 16)
1328EMIT_LOCKED_BIN_OP(bts, 16)
1329# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1330
1331
1332/*
1333 * Helpers for BSR and BSF.
1334 *
1335 * Note! "undefined" flags: OF, SF, AF, PF, CF.
1336 * Intel behavior modelled on 10980xe, AMD on 3990X. Other marchs may
1337 * produce different result (see https://www.sandpile.org/x86/flags.htm),
1338 * but we restrict ourselves to emulating these recent marchs.
1339 */
1340#define SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlag, a_iBit) do { \
1341 unsigned iBit = (a_iBit); \
1342 uint32_t fEfl = *pfEFlags & ~(X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF); \
1343 if (iBit) \
1344 { \
1345 *puDst = --iBit; \
1346 fEfl |= g_afParity[iBit]; \
1347 } \
1348 else \
1349 fEfl |= X86_EFL_ZF | X86_EFL_PF; \
1350 *pfEFlags = fEfl; \
1351 } while (0)
1352#define SET_BIT_SEARCH_RESULT_AMD(puDst, pfEFlag, a_iBit) do { \
1353 unsigned const iBit = (a_iBit); \
1354 if (iBit) \
1355 { \
1356 *puDst = iBit - 1; \
1357 *pfEFlags &= ~X86_EFL_ZF; \
1358 } \
1359 else \
1360 *pfEFlags |= X86_EFL_ZF; \
1361 } while (0)
1362
1363
1364/*
1365 * BSF - first (least significant) bit set
1366 */
1367IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1368{
1369 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitFirstSetU64(uSrc));
1370}
1371
1372IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u64_intel,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1373{
1374 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitFirstSetU64(uSrc));
1375}
1376
1377IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u64_amd,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1378{
1379 SET_BIT_SEARCH_RESULT_AMD(puDst, pfEFlags, ASMBitFirstSetU64(uSrc));
1380}
1381
1382# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1383
1384IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1385{
1386 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitFirstSetU32(uSrc));
1387}
1388
1389IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u32_intel,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1390{
1391 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitFirstSetU32(uSrc));
1392}
1393
1394IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u32_amd,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1395{
1396 SET_BIT_SEARCH_RESULT_AMD(puDst, pfEFlags, ASMBitFirstSetU32(uSrc));
1397}
1398
1399
1400IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1401{
1402 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitFirstSetU16(uSrc));
1403}
1404
1405IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u16_intel,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1406{
1407 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitFirstSetU16(uSrc));
1408}
1409
1410IEM_DECL_IMPL_DEF(void, iemAImpl_bsf_u16_amd,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1411{
1412 SET_BIT_SEARCH_RESULT_AMD(puDst, pfEFlags, ASMBitFirstSetU16(uSrc));
1413}
1414
1415# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1416
1417
1418/*
1419 * BSR - last (most significant) bit set
1420 */
1421IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u64,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1422{
1423 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitLastSetU64(uSrc));
1424}
1425
1426IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u64_intel,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1427{
1428 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitLastSetU64(uSrc));
1429}
1430
1431IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u64_amd,(uint64_t *puDst, uint64_t uSrc, uint32_t *pfEFlags))
1432{
1433 SET_BIT_SEARCH_RESULT_AMD(puDst, pfEFlags, ASMBitLastSetU64(uSrc));
1434}
1435
1436# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1437
1438IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u32,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1439{
1440 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitLastSetU32(uSrc));
1441}
1442
1443IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u32_intel,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1444{
1445 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitLastSetU32(uSrc));
1446}
1447
1448IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u32_amd,(uint32_t *puDst, uint32_t uSrc, uint32_t *pfEFlags))
1449{
1450 SET_BIT_SEARCH_RESULT_AMD(puDst, pfEFlags, ASMBitLastSetU32(uSrc));
1451}
1452
1453
1454IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u16,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1455{
1456 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitLastSetU16(uSrc));
1457}
1458
1459IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u16_intel,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1460{
1461 SET_BIT_SEARCH_RESULT_INTEL(puDst, pfEFlags, ASMBitLastSetU16(uSrc));
1462}
1463
1464IEM_DECL_IMPL_DEF(void, iemAImpl_bsr_u16_amd,(uint16_t *puDst, uint16_t uSrc, uint32_t *pfEFlags))
1465{
1466 SET_BIT_SEARCH_RESULT_AMD(puDst, pfEFlags, ASMBitLastSetU16(uSrc));
1467}
1468
1469# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1470
1471
1472/*
1473 * XCHG
1474 */
1475
1476IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u64_locked,(uint64_t *puMem, uint64_t *puReg))
1477{
1478#if ARCH_BITS >= 64
1479 *puReg = ASMAtomicXchgU64(puMem, *puReg);
1480#else
1481 uint64_t uOldMem = *puMem;
1482 while (!ASMAtomicCmpXchgExU64(puMem, *puReg, uOldMem, &uOldMem))
1483 ASMNopPause();
1484 *puReg = uOldMem;
1485#endif
1486}
1487
1488# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1489
1490IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u32_locked,(uint32_t *puMem, uint32_t *puReg))
1491{
1492 *puReg = ASMAtomicXchgU32(puMem, *puReg);
1493}
1494
1495
1496IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u16_locked,(uint16_t *puMem, uint16_t *puReg))
1497{
1498 *puReg = ASMAtomicXchgU16(puMem, *puReg);
1499}
1500
1501
1502IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u8_locked,(uint8_t *puMem, uint8_t *puReg))
1503{
1504 *puReg = ASMAtomicXchgU8(puMem, *puReg);
1505}
1506
1507# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1508
1509
1510/* Unlocked variants for fDisregardLock mode: */
1511
1512IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u64_unlocked,(uint64_t *puMem, uint64_t *puReg))
1513{
1514 uint64_t const uOld = *puMem;
1515 *puMem = *puReg;
1516 *puReg = uOld;
1517}
1518
1519# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1520
1521IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u32_unlocked,(uint32_t *puMem, uint32_t *puReg))
1522{
1523 uint32_t const uOld = *puMem;
1524 *puMem = *puReg;
1525 *puReg = uOld;
1526}
1527
1528
1529IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u16_unlocked,(uint16_t *puMem, uint16_t *puReg))
1530{
1531 uint16_t const uOld = *puMem;
1532 *puMem = *puReg;
1533 *puReg = uOld;
1534}
1535
1536
1537IEM_DECL_IMPL_DEF(void, iemAImpl_xchg_u8_unlocked,(uint8_t *puMem, uint8_t *puReg))
1538{
1539 uint8_t const uOld = *puMem;
1540 *puMem = *puReg;
1541 *puReg = uOld;
1542}
1543
1544# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1545
1546
1547/*
1548 * XADD and LOCK XADD.
1549 */
1550#define EMIT_XADD(a_cBitsWidth, a_Type) \
1551IEM_DECL_IMPL_DEF(void, iemAImpl_xadd_u ## a_cBitsWidth,(a_Type *puDst, a_Type *puReg, uint32_t *pfEFlags)) \
1552{ \
1553 a_Type uDst = *puDst; \
1554 a_Type uResult = uDst; \
1555 iemAImpl_add_u ## a_cBitsWidth(&uResult, *puReg, pfEFlags); \
1556 *puDst = uResult; \
1557 *puReg = uDst; \
1558} \
1559\
1560IEM_DECL_IMPL_DEF(void, iemAImpl_xadd_u ## a_cBitsWidth ## _locked,(a_Type *puDst, a_Type *puReg, uint32_t *pfEFlags)) \
1561{ \
1562 a_Type uOld = ASMAtomicUoReadU ## a_cBitsWidth(puDst); \
1563 a_Type uResult; \
1564 uint32_t fEflTmp; \
1565 do \
1566 { \
1567 uResult = uOld; \
1568 fEflTmp = *pfEFlags; \
1569 iemAImpl_add_u ## a_cBitsWidth(&uResult, *puReg, &fEflTmp); \
1570 } while (!ASMAtomicCmpXchgExU ## a_cBitsWidth(puDst, uResult, uOld, &uOld)); \
1571 *puReg = uOld; \
1572 *pfEFlags = fEflTmp; \
1573}
1574EMIT_XADD(64, uint64_t)
1575# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1576EMIT_XADD(32, uint32_t)
1577EMIT_XADD(16, uint16_t)
1578EMIT_XADD(8, uint8_t)
1579# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1580
1581#endif
1582
1583/*
1584 * CMPXCHG, CMPXCHG8B, CMPXCHG16B
1585 *
1586 * Note! We don't have non-locking/atomic cmpxchg primitives, so all cmpxchg
1587 * instructions are emulated as locked.
1588 */
1589#if defined(IEM_WITHOUT_ASSEMBLY)
1590
1591IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u8_locked, (uint8_t *pu8Dst, uint8_t *puAl, uint8_t uSrcReg, uint32_t *pEFlags))
1592{
1593 uint8_t uOld = *puAl;
1594 if (ASMAtomicCmpXchgExU8(pu8Dst, uSrcReg, uOld, puAl))
1595 Assert(*puAl == uOld);
1596 iemAImpl_cmp_u8(&uOld, *puAl, pEFlags);
1597}
1598
1599
1600IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u16_locked,(uint16_t *pu16Dst, uint16_t *puAx, uint16_t uSrcReg, uint32_t *pEFlags))
1601{
1602 uint16_t uOld = *puAx;
1603 if (ASMAtomicCmpXchgExU16(pu16Dst, uSrcReg, uOld, puAx))
1604 Assert(*puAx == uOld);
1605 iemAImpl_cmp_u16(&uOld, *puAx, pEFlags);
1606}
1607
1608
1609IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u32_locked,(uint32_t *pu32Dst, uint32_t *puEax, uint32_t uSrcReg, uint32_t *pEFlags))
1610{
1611 uint32_t uOld = *puEax;
1612 if (ASMAtomicCmpXchgExU32(pu32Dst, uSrcReg, uOld, puEax))
1613 Assert(*puEax == uOld);
1614 iemAImpl_cmp_u32(&uOld, *puEax, pEFlags);
1615}
1616
1617
1618# if ARCH_BITS == 32
1619IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u64_locked,(uint64_t *pu64Dst, uint64_t *puRax, uint64_t *puSrcReg, uint32_t *pEFlags))
1620# else
1621IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u64_locked,(uint64_t *pu64Dst, uint64_t *puRax, uint64_t uSrcReg, uint32_t *pEFlags))
1622# endif
1623{
1624# if ARCH_BITS == 32
1625 uint64_t const uSrcReg = *puSrcReg;
1626# endif
1627 uint64_t uOld = *puRax;
1628 if (ASMAtomicCmpXchgExU64(pu64Dst, uSrcReg, uOld, puRax))
1629 Assert(*puRax == uOld);
1630 iemAImpl_cmp_u64(&uOld, *puRax, pEFlags);
1631}
1632
1633
1634IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg8b_locked,(uint64_t *pu64Dst, PRTUINT64U pu64EaxEdx, PRTUINT64U pu64EbxEcx,
1635 uint32_t *pEFlags))
1636{
1637 uint64_t const uNew = pu64EbxEcx->u;
1638 uint64_t const uOld = pu64EaxEdx->u;
1639 if (ASMAtomicCmpXchgExU64(pu64Dst, uNew, uOld, &pu64EaxEdx->u))
1640 {
1641 Assert(pu64EaxEdx->u == uOld);
1642 *pEFlags |= X86_EFL_ZF;
1643 }
1644 else
1645 *pEFlags &= ~X86_EFL_ZF;
1646}
1647
1648
1649# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_ARM64)
1650IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg16b_locked,(PRTUINT128U pu128Dst, PRTUINT128U pu128RaxRdx, PRTUINT128U pu128RbxRcx,
1651 uint32_t *pEFlags))
1652{
1653# ifdef VBOX_STRICT
1654 RTUINT128U const uOld = *pu128RaxRdx;
1655# endif
1656# if defined(RT_ARCH_AMD64)
1657 if (ASMAtomicCmpXchgU128v2(&pu128Dst->u, pu128RbxRcx->s.Hi, pu128RbxRcx->s.Lo, pu128RaxRdx->s.Hi, pu128RaxRdx->s.Lo,
1658 &pu128RaxRdx->u))
1659# else
1660 if (ASMAtomicCmpXchgU128(&pu128Dst->u, pu128RbxRcx->u, pu128RaxRdx->u, &pu128RaxRdx->u))
1661# endif
1662 {
1663 Assert(pu128RaxRdx->s.Lo == uOld.s.Lo && pu128RaxRdx->s.Hi == uOld.s.Hi);
1664 *pEFlags |= X86_EFL_ZF;
1665 }
1666 else
1667 *pEFlags &= ~X86_EFL_ZF;
1668}
1669# endif
1670
1671#endif /* defined(IEM_WITHOUT_ASSEMBLY) */
1672
1673# if !defined(RT_ARCH_ARM64) /** @todo may need this for unaligned accesses... */
1674IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg16b_fallback,(PRTUINT128U pu128Dst, PRTUINT128U pu128RaxRdx,
1675 PRTUINT128U pu128RbxRcx, uint32_t *pEFlags))
1676{
1677 RTUINT128U u128Tmp = *pu128Dst;
1678 if ( u128Tmp.s.Lo == pu128RaxRdx->s.Lo
1679 && u128Tmp.s.Hi == pu128RaxRdx->s.Hi)
1680 {
1681 *pu128Dst = *pu128RbxRcx;
1682 *pEFlags |= X86_EFL_ZF;
1683 }
1684 else
1685 {
1686 *pu128RaxRdx = u128Tmp;
1687 *pEFlags &= ~X86_EFL_ZF;
1688 }
1689}
1690#endif /* !RT_ARCH_ARM64 */
1691
1692#if defined(IEM_WITHOUT_ASSEMBLY)
1693
1694/* Unlocked versions mapped to the locked ones: */
1695
1696IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u8, (uint8_t *pu8Dst, uint8_t *puAl, uint8_t uSrcReg, uint32_t *pEFlags))
1697{
1698 iemAImpl_cmpxchg_u8_locked(pu8Dst, puAl, uSrcReg, pEFlags);
1699}
1700
1701
1702IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u16, (uint16_t *pu16Dst, uint16_t *puAx, uint16_t uSrcReg, uint32_t *pEFlags))
1703{
1704 iemAImpl_cmpxchg_u16_locked(pu16Dst, puAx, uSrcReg, pEFlags);
1705}
1706
1707
1708IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u32, (uint32_t *pu32Dst, uint32_t *puEax, uint32_t uSrcReg, uint32_t *pEFlags))
1709{
1710 iemAImpl_cmpxchg_u32_locked(pu32Dst, puEax, uSrcReg, pEFlags);
1711}
1712
1713
1714# if ARCH_BITS == 32
1715IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u64, (uint64_t *pu64Dst, uint64_t *puRax, uint64_t *puSrcReg, uint32_t *pEFlags))
1716{
1717 iemAImpl_cmpxchg_u64_locked(pu64Dst, puRax, puSrcReg, pEFlags);
1718}
1719# else
1720IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg_u64, (uint64_t *pu64Dst, uint64_t *puRax, uint64_t uSrcReg, uint32_t *pEFlags))
1721{
1722 iemAImpl_cmpxchg_u64_locked(pu64Dst, puRax, uSrcReg, pEFlags);
1723}
1724# endif
1725
1726
1727IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg8b,(uint64_t *pu64Dst, PRTUINT64U pu64EaxEdx, PRTUINT64U pu64EbxEcx, uint32_t *pEFlags))
1728{
1729 iemAImpl_cmpxchg8b_locked(pu64Dst, pu64EaxEdx, pu64EbxEcx, pEFlags);
1730}
1731
1732
1733IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg16b,(PRTUINT128U pu128Dst, PRTUINT128U pu128RaxRdx, PRTUINT128U pu128RbxRcx,
1734 uint32_t *pEFlags))
1735{
1736 iemAImpl_cmpxchg16b_locked(pu128Dst, pu128RaxRdx, pu128RbxRcx, pEFlags);
1737}
1738
1739#endif /* defined(IEM_WITHOUT_ASSEMBLY) */
1740
1741#if (!defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)) \
1742 && !defined(DOXYGEN_RUNNING) /* Doxygen has some groking issues here and ends up mixing up input. Not worth tracking down now. */
1743
1744/*
1745 * MUL, IMUL, DIV and IDIV helpers.
1746 *
1747 * - The U64 versions must use 128-bit intermediates, so we need to abstract the
1748 * division step so we can select between using C operators and
1749 * RTUInt128DivRem/RTUInt128MulU64ByU64.
1750 *
1751 * - The U8 versions work returns output in AL + AH instead of xDX + xAX, with the
1752 * IDIV/DIV taking all the input in AX too. This means we have to abstract some
1753 * input loads and the result storing.
1754 */
1755
1756DECLINLINE(void) RTUInt128DivRemByU64(PRTUINT128U pQuotient, PRTUINT128U pRemainder, PCRTUINT128U pDividend, uint64_t u64Divisor)
1757{
1758# ifdef __GNUC__ /* GCC maybe really annoying in function. */
1759 pQuotient->s.Lo = 0;
1760 pQuotient->s.Hi = 0;
1761# endif
1762 RTUINT128U Divisor;
1763 Divisor.s.Lo = u64Divisor;
1764 Divisor.s.Hi = 0;
1765 RTUInt128DivRem(pQuotient, pRemainder, pDividend, &Divisor);
1766}
1767
1768# define DIV_LOAD(a_Dividend) \
1769 a_Dividend.s.Lo = *puA, a_Dividend.s.Hi = *puD
1770# define DIV_LOAD_U8(a_Dividend) \
1771 a_Dividend.u = *puAX
1772
1773# define DIV_STORE(a_Quotient, a_uReminder) *puA = (a_Quotient), *puD = (a_uReminder)
1774# define DIV_STORE_U8(a_Quotient, a_uReminder) *puAX = (uint8_t)(a_Quotient) | ((uint16_t)(a_uReminder) << 8)
1775
1776# define MUL_LOAD_F1() *puA
1777# define MUL_LOAD_F1_U8() ((uint8_t)*puAX)
1778
1779# define MUL_STORE(a_Result) *puA = (a_Result).s.Lo, *puD = (a_Result).s.Hi
1780# define MUL_STORE_U8(a_Result) *puAX = a_Result.u
1781
1782# define MULDIV_NEG(a_Value, a_cBitsWidth2x) \
1783 (a_Value).u = UINT ## a_cBitsWidth2x ## _C(0) - (a_Value).u
1784# define MULDIV_NEG_U128(a_Value, a_cBitsWidth2x) \
1785 RTUInt128AssignNeg(&(a_Value))
1786
1787# define MULDIV_MUL(a_Result, a_Factor1, a_Factor2, a_cBitsWidth2x) \
1788 (a_Result).u = (uint ## a_cBitsWidth2x ## _t)(a_Factor1) * (a_Factor2)
1789# define MULDIV_MUL_U128(a_Result, a_Factor1, a_Factor2, a_cBitsWidth2x) \
1790 RTUInt128MulU64ByU64(&(a_Result), a_Factor1, a_Factor2);
1791
1792# define MULDIV_MODDIV(a_Quotient, a_Remainder, a_Dividend, a_uDivisor) \
1793 a_Quotient.u = (a_Dividend).u / (a_uDivisor), \
1794 a_Remainder.u = (a_Dividend).u % (a_uDivisor)
1795# define MULDIV_MODDIV_U128(a_Quotient, a_Remainder, a_Dividend, a_uDivisor) \
1796 RTUInt128DivRemByU64(&a_Quotient, &a_Remainder, &a_Dividend, a_uDivisor)
1797
1798
1799/*
1800 * MUL
1801 */
1802# define EMIT_MUL_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoadF1, a_fnStore, a_fnMul, a_Suffix, a_fIntelFlags) \
1803IEM_DECL_IMPL_DEF(int, RT_CONCAT3(iemAImpl_mul_u,a_cBitsWidth,a_Suffix), a_Args) \
1804{ \
1805 RTUINT ## a_cBitsWidth2x ## U Result; \
1806 a_fnMul(Result, a_fnLoadF1(), uFactor, a_cBitsWidth2x); \
1807 a_fnStore(Result); \
1808 \
1809 /* Calc EFLAGS: */ \
1810 uint32_t fEfl = *pfEFlags; \
1811 if (a_fIntelFlags) \
1812 { /* Intel: 6700K and 10980XE behavior */ \
1813 fEfl &= ~(X86_EFL_SF | X86_EFL_CF | X86_EFL_OF | X86_EFL_AF | X86_EFL_ZF | X86_EFL_PF); \
1814 if (Result.s.Lo & RT_BIT_64(a_cBitsWidth - 1)) \
1815 fEfl |= X86_EFL_SF; \
1816 fEfl |= g_afParity[Result.s.Lo & 0xff]; \
1817 if (Result.s.Hi != 0) \
1818 fEfl |= X86_EFL_CF | X86_EFL_OF; \
1819 } \
1820 else \
1821 { /* AMD: 3990X */ \
1822 if (Result.s.Hi != 0) \
1823 fEfl |= X86_EFL_CF | X86_EFL_OF; \
1824 else \
1825 fEfl &= ~(X86_EFL_CF | X86_EFL_OF); \
1826 } \
1827 *pfEFlags = fEfl; \
1828 return 0; \
1829} \
1830
1831# define EMIT_MUL(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoadF1, a_fnStore, a_fnMul) \
1832 EMIT_MUL_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoadF1, a_fnStore, a_fnMul, RT_NOTHING, 1) \
1833 EMIT_MUL_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoadF1, a_fnStore, a_fnMul, _intel, 1) \
1834 EMIT_MUL_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoadF1, a_fnStore, a_fnMul, _amd, 0) \
1835
1836# ifndef DOXYGEN_RUNNING /* this totally confuses doxygen for some reason */
1837EMIT_MUL(64, 128, (uint64_t *puA, uint64_t *puD, uint64_t uFactor, uint32_t *pfEFlags), (puA, puD, uFactor, pfEFlags),
1838 MUL_LOAD_F1, MUL_STORE, MULDIV_MUL_U128)
1839# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1840EMIT_MUL(32, 64, (uint32_t *puA, uint32_t *puD, uint32_t uFactor, uint32_t *pfEFlags), (puA, puD, uFactor, pfEFlags),
1841 MUL_LOAD_F1, MUL_STORE, MULDIV_MUL)
1842EMIT_MUL(16, 32, (uint16_t *puA, uint16_t *puD, uint16_t uFactor, uint32_t *pfEFlags), (puA, puD, uFactor, pfEFlags),
1843 MUL_LOAD_F1, MUL_STORE, MULDIV_MUL)
1844EMIT_MUL(8, 16, (uint16_t *puAX, uint8_t uFactor, uint32_t *pfEFlags), (puAX, uFactor, pfEFlags),
1845 MUL_LOAD_F1_U8, MUL_STORE_U8, MULDIV_MUL)
1846# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1847# endif /* !DOXYGEN_RUNNING */
1848
1849
1850/*
1851 * IMUL
1852 *
1853 * The SF, ZF, AF and PF flags are "undefined". AMD (3990x) leaves these
1854 * flags as is. Whereas Intel skylake (6700K and 10980X (Cascade Lake)) always
1855 * clear AF and ZF and calculates SF and PF as per the lower half of the result.
1856 */
1857# define EMIT_IMUL_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoadF1, a_fnStore, a_fnNeg, a_fnMul, \
1858 a_Suffix, a_fIntelFlags) \
1859IEM_DECL_IMPL_DEF(int, RT_CONCAT3(iemAImpl_imul_u,a_cBitsWidth,a_Suffix),a_Args) \
1860{ \
1861 RTUINT ## a_cBitsWidth2x ## U Result; \
1862 uint32_t fEfl = *pfEFlags & ~(X86_EFL_CF | X86_EFL_OF); \
1863 \
1864 uint ## a_cBitsWidth ## _t const uFactor1 = a_fnLoadF1(); \
1865 if (!(uFactor1 & RT_BIT_64(a_cBitsWidth - 1))) \
1866 { \
1867 if (!(uFactor2 & RT_BIT_64(a_cBitsWidth - 1))) \
1868 { \
1869 a_fnMul(Result, uFactor1, uFactor2, a_cBitsWidth2x); \
1870 if (Result.s.Hi != 0 || Result.s.Lo >= RT_BIT_64(a_cBitsWidth - 1)) \
1871 fEfl |= X86_EFL_CF | X86_EFL_OF; \
1872 } \
1873 else \
1874 { \
1875 uint ## a_cBitsWidth ## _t const uPositiveFactor2 = UINT ## a_cBitsWidth ## _C(0) - uFactor2; \
1876 a_fnMul(Result, uFactor1, uPositiveFactor2, a_cBitsWidth2x); \
1877 if (Result.s.Hi != 0 || Result.s.Lo > RT_BIT_64(a_cBitsWidth - 1)) \
1878 fEfl |= X86_EFL_CF | X86_EFL_OF; \
1879 a_fnNeg(Result, a_cBitsWidth2x); \
1880 } \
1881 } \
1882 else \
1883 { \
1884 if (!(uFactor2 & RT_BIT_64(a_cBitsWidth - 1))) \
1885 { \
1886 uint ## a_cBitsWidth ## _t const uPositiveFactor1 = UINT ## a_cBitsWidth ## _C(0) - uFactor1; \
1887 a_fnMul(Result, uPositiveFactor1, uFactor2, a_cBitsWidth2x); \
1888 if (Result.s.Hi != 0 || Result.s.Lo > RT_BIT_64(a_cBitsWidth - 1)) \
1889 fEfl |= X86_EFL_CF | X86_EFL_OF; \
1890 a_fnNeg(Result, a_cBitsWidth2x); \
1891 } \
1892 else \
1893 { \
1894 uint ## a_cBitsWidth ## _t const uPositiveFactor1 = UINT ## a_cBitsWidth ## _C(0) - uFactor1; \
1895 uint ## a_cBitsWidth ## _t const uPositiveFactor2 = UINT ## a_cBitsWidth ## _C(0) - uFactor2; \
1896 a_fnMul(Result, uPositiveFactor1, uPositiveFactor2, a_cBitsWidth2x); \
1897 if (Result.s.Hi != 0 || Result.s.Lo >= RT_BIT_64(a_cBitsWidth - 1)) \
1898 fEfl |= X86_EFL_CF | X86_EFL_OF; \
1899 } \
1900 } \
1901 a_fnStore(Result); \
1902 \
1903 if (a_fIntelFlags) \
1904 { \
1905 fEfl &= ~(X86_EFL_AF | X86_EFL_ZF | X86_EFL_SF | X86_EFL_PF); \
1906 if (Result.s.Lo & RT_BIT_64(a_cBitsWidth - 1)) \
1907 fEfl |= X86_EFL_SF; \
1908 fEfl |= g_afParity[Result.s.Lo & 0xff]; \
1909 } \
1910 *pfEFlags = fEfl; \
1911 return 0; \
1912}
1913# define EMIT_IMUL(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoadF1, a_fnStore, a_fnNeg, a_fnMul) \
1914 EMIT_IMUL_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoadF1, a_fnStore, a_fnNeg, a_fnMul, RT_NOTHING, 1) \
1915 EMIT_IMUL_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoadF1, a_fnStore, a_fnNeg, a_fnMul, _intel, 1) \
1916 EMIT_IMUL_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoadF1, a_fnStore, a_fnNeg, a_fnMul, _amd, 0)
1917
1918# ifndef DOXYGEN_RUNNING /* this totally confuses doxygen for some reason */
1919EMIT_IMUL(64, 128, (uint64_t *puA, uint64_t *puD, uint64_t uFactor2, uint32_t *pfEFlags), (puA, puD, uFactor2, pfEFlags),
1920 MUL_LOAD_F1, MUL_STORE, MULDIV_NEG_U128, MULDIV_MUL_U128)
1921# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1922EMIT_IMUL(32, 64, (uint32_t *puA, uint32_t *puD, uint32_t uFactor2, uint32_t *pfEFlags), (puA, puD, uFactor2, pfEFlags),
1923 MUL_LOAD_F1, MUL_STORE, MULDIV_NEG, MULDIV_MUL)
1924EMIT_IMUL(16, 32, (uint16_t *puA, uint16_t *puD, uint16_t uFactor2, uint32_t *pfEFlags), (puA, puD, uFactor2, pfEFlags),
1925 MUL_LOAD_F1, MUL_STORE, MULDIV_NEG, MULDIV_MUL)
1926EMIT_IMUL(8, 16, (uint16_t *puAX, uint8_t uFactor2, uint32_t *pfEFlags), (puAX, uFactor2, pfEFlags),
1927 MUL_LOAD_F1_U8, MUL_STORE_U8, MULDIV_NEG, MULDIV_MUL)
1928# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
1929# endif /* !DOXYGEN_RUNNING */
1930
1931
1932/*
1933 * IMUL with two operands are mapped onto the three operand variant, ignoring
1934 * the high part of the product.
1935 */
1936# define EMIT_IMUL_TWO(a_cBits, a_uType) \
1937IEM_DECL_IMPL_DEF(void, iemAImpl_imul_two_u ## a_cBits,(a_uType *puDst, a_uType uSrc, uint32_t *pfEFlags)) \
1938{ \
1939 a_uType uIgn; \
1940 iemAImpl_imul_u ## a_cBits(puDst, &uIgn, uSrc, pfEFlags); \
1941} \
1942\
1943IEM_DECL_IMPL_DEF(void, iemAImpl_imul_two_u ## a_cBits ## _intel,(a_uType *puDst, a_uType uSrc, uint32_t *pfEFlags)) \
1944{ \
1945 a_uType uIgn; \
1946 iemAImpl_imul_u ## a_cBits ## _intel(puDst, &uIgn, uSrc, pfEFlags); \
1947} \
1948\
1949IEM_DECL_IMPL_DEF(void, iemAImpl_imul_two_u ## a_cBits ## _amd,(a_uType *puDst, a_uType uSrc, uint32_t *pfEFlags)) \
1950{ \
1951 a_uType uIgn; \
1952 iemAImpl_imul_u ## a_cBits ## _amd(puDst, &uIgn, uSrc, pfEFlags); \
1953}
1954
1955EMIT_IMUL_TWO(64, uint64_t)
1956# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1957EMIT_IMUL_TWO(32, uint32_t)
1958EMIT_IMUL_TWO(16, uint16_t)
1959# endif
1960
1961
1962/*
1963 * DIV
1964 */
1965# define EMIT_DIV_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoad, a_fnStore, a_fnDivRem, \
1966 a_Suffix, a_fIntelFlags) \
1967IEM_DECL_IMPL_DEF(int, RT_CONCAT3(iemAImpl_div_u,a_cBitsWidth,a_Suffix),a_Args) \
1968{ \
1969 RTUINT ## a_cBitsWidth2x ## U Dividend; \
1970 a_fnLoad(Dividend); \
1971 if ( uDivisor != 0 \
1972 && Dividend.s.Hi < uDivisor) \
1973 { \
1974 RTUINT ## a_cBitsWidth2x ## U Remainder, Quotient; \
1975 a_fnDivRem(Quotient, Remainder, Dividend, uDivisor); \
1976 a_fnStore(Quotient.s.Lo, Remainder.s.Lo); \
1977 \
1978 /* Calc EFLAGS: Intel 6700K and 10980XE leaves them alone. AMD 3990X sets AF and clears PF, ZF and SF. */ \
1979 if (!a_fIntelFlags) \
1980 *pfEFlags = (*pfEFlags & ~(X86_EFL_PF | X86_EFL_ZF | X86_EFL_SF)) | X86_EFL_AF; \
1981 return 0; \
1982 } \
1983 /* #DE */ \
1984 return -1; \
1985}
1986# define EMIT_DIV(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoad, a_fnStore, a_fnDivRem) \
1987 EMIT_DIV_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoad, a_fnStore, a_fnDivRem, RT_NOTHING, 1) \
1988 EMIT_DIV_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoad, a_fnStore, a_fnDivRem, _intel, 1) \
1989 EMIT_DIV_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoad, a_fnStore, a_fnDivRem, _amd, 0)
1990
1991# ifndef DOXYGEN_RUNNING /* this totally confuses doxygen for some reason */
1992EMIT_DIV(64,128,(uint64_t *puA, uint64_t *puD, uint64_t uDivisor, uint32_t *pfEFlags), (puA, puD, uDivisor, pfEFlags),
1993 DIV_LOAD, DIV_STORE, MULDIV_MODDIV_U128)
1994# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
1995EMIT_DIV(32,64, (uint32_t *puA, uint32_t *puD, uint32_t uDivisor, uint32_t *pfEFlags), (puA, puD, uDivisor, pfEFlags),
1996 DIV_LOAD, DIV_STORE, MULDIV_MODDIV)
1997EMIT_DIV(16,32, (uint16_t *puA, uint16_t *puD, uint16_t uDivisor, uint32_t *pfEFlags), (puA, puD, uDivisor, pfEFlags),
1998 DIV_LOAD, DIV_STORE, MULDIV_MODDIV)
1999EMIT_DIV(8,16, (uint16_t *puAX, uint8_t uDivisor, uint32_t *pfEFlags), (puAX, uDivisor, pfEFlags),
2000 DIV_LOAD_U8, DIV_STORE_U8, MULDIV_MODDIV)
2001# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
2002# endif /* !DOXYGEN_RUNNING */
2003
2004
2005/*
2006 * IDIV
2007 *
2008 * EFLAGS are ignored and left as-is by Intel 6700K and 10980XE. AMD 3990X will
2009 * set AF and clear PF, ZF and SF just like it does for DIV.
2010 *
2011 */
2012# define EMIT_IDIV_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoad, a_fnStore, a_fnNeg, a_fnDivRem, \
2013 a_Suffix, a_fIntelFlags) \
2014IEM_DECL_IMPL_DEF(int, RT_CONCAT3(iemAImpl_idiv_u,a_cBitsWidth,a_Suffix),a_Args) \
2015{ \
2016 /* Note! Skylake leaves all flags alone. */ \
2017 \
2018 /** @todo overflow checks */ \
2019 if (uDivisor != 0) \
2020 { \
2021 /* \
2022 * Convert to unsigned division. \
2023 */ \
2024 RTUINT ## a_cBitsWidth2x ## U Dividend; \
2025 a_fnLoad(Dividend); \
2026 bool const fSignedDividend = RT_BOOL(Dividend.s.Hi & RT_BIT_64(a_cBitsWidth - 1)); \
2027 if (fSignedDividend) \
2028 a_fnNeg(Dividend, a_cBitsWidth2x); \
2029 \
2030 uint ## a_cBitsWidth ## _t uDivisorPositive; \
2031 if (!(uDivisor & RT_BIT_64(a_cBitsWidth - 1))) \
2032 uDivisorPositive = uDivisor; \
2033 else \
2034 uDivisorPositive = UINT ## a_cBitsWidth ## _C(0) - uDivisor; \
2035 \
2036 RTUINT ## a_cBitsWidth2x ## U Remainder, Quotient; \
2037 a_fnDivRem(Quotient, Remainder, Dividend, uDivisorPositive); \
2038 \
2039 /* \
2040 * Setup the result, checking for overflows. \
2041 */ \
2042 if (!(uDivisor & RT_BIT_64(a_cBitsWidth - 1))) \
2043 { \
2044 if (!fSignedDividend) \
2045 { \
2046 /* Positive divisor, positive dividend => result positive. */ \
2047 if (Quotient.s.Hi == 0 && Quotient.s.Lo <= (uint ## a_cBitsWidth ## _t)INT ## a_cBitsWidth ## _MAX) \
2048 { \
2049 a_fnStore(Quotient.s.Lo, Remainder.s.Lo); \
2050 if (!a_fIntelFlags) \
2051 *pfEFlags = (*pfEFlags & ~(X86_EFL_PF | X86_EFL_ZF | X86_EFL_SF)) | X86_EFL_AF; \
2052 return 0; \
2053 } \
2054 } \
2055 else \
2056 { \
2057 /* Positive divisor, negative dividend => result negative. */ \
2058 if (Quotient.s.Hi == 0 && Quotient.s.Lo <= RT_BIT_64(a_cBitsWidth - 1)) \
2059 { \
2060 a_fnStore(UINT ## a_cBitsWidth ## _C(0) - Quotient.s.Lo, UINT ## a_cBitsWidth ## _C(0) - Remainder.s.Lo); \
2061 if (!a_fIntelFlags) \
2062 *pfEFlags = (*pfEFlags & ~(X86_EFL_PF | X86_EFL_ZF | X86_EFL_SF)) | X86_EFL_AF; \
2063 return 0; \
2064 } \
2065 } \
2066 } \
2067 else \
2068 { \
2069 if (!fSignedDividend) \
2070 { \
2071 /* Negative divisor, positive dividend => negative quotient, positive remainder. */ \
2072 if (Quotient.s.Hi == 0 && Quotient.s.Lo <= RT_BIT_64(a_cBitsWidth - 1)) \
2073 { \
2074 a_fnStore(UINT ## a_cBitsWidth ## _C(0) - Quotient.s.Lo, Remainder.s.Lo); \
2075 if (!a_fIntelFlags) \
2076 *pfEFlags = (*pfEFlags & ~(X86_EFL_PF | X86_EFL_ZF | X86_EFL_SF)) | X86_EFL_AF; \
2077 return 0; \
2078 } \
2079 } \
2080 else \
2081 { \
2082 /* Negative divisor, negative dividend => positive quotient, negative remainder. */ \
2083 if (Quotient.s.Hi == 0 && Quotient.s.Lo <= (uint ## a_cBitsWidth ## _t)INT ## a_cBitsWidth ## _MAX) \
2084 { \
2085 a_fnStore(Quotient.s.Lo, UINT ## a_cBitsWidth ## _C(0) - Remainder.s.Lo); \
2086 if (!a_fIntelFlags) \
2087 *pfEFlags = (*pfEFlags & ~(X86_EFL_PF | X86_EFL_ZF | X86_EFL_SF)) | X86_EFL_AF; \
2088 return 0; \
2089 } \
2090 } \
2091 } \
2092 } \
2093 /* #DE */ \
2094 return -1; \
2095}
2096# define EMIT_IDIV(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoad, a_fnStore, a_fnNeg, a_fnDivRem) \
2097 EMIT_IDIV_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoad, a_fnStore, a_fnNeg, a_fnDivRem, RT_NOTHING, 1) \
2098 EMIT_IDIV_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoad, a_fnStore, a_fnNeg, a_fnDivRem, _intel, 1) \
2099 EMIT_IDIV_INNER(a_cBitsWidth, a_cBitsWidth2x, a_Args, a_CallArgs, a_fnLoad, a_fnStore, a_fnNeg, a_fnDivRem, _amd, 0)
2100
2101# ifndef DOXYGEN_RUNNING /* this totally confuses doxygen for some reason */
2102EMIT_IDIV(64,128,(uint64_t *puA, uint64_t *puD, uint64_t uDivisor, uint32_t *pfEFlags), (puA, puD, uDivisor, pfEFlags),
2103 DIV_LOAD, DIV_STORE, MULDIV_NEG_U128, MULDIV_MODDIV_U128)
2104# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2105EMIT_IDIV(32,64,(uint32_t *puA, uint32_t *puD, uint32_t uDivisor, uint32_t *pfEFlags), (puA, puD, uDivisor, pfEFlags),
2106 DIV_LOAD, DIV_STORE, MULDIV_NEG, MULDIV_MODDIV)
2107EMIT_IDIV(16,32,(uint16_t *puA, uint16_t *puD, uint16_t uDivisor, uint32_t *pfEFlags), (puA, puD, uDivisor, pfEFlags),
2108 DIV_LOAD, DIV_STORE, MULDIV_NEG, MULDIV_MODDIV)
2109EMIT_IDIV(8,16,(uint16_t *puAX, uint8_t uDivisor, uint32_t *pfEFlags), (puAX, uDivisor, pfEFlags),
2110 DIV_LOAD_U8, DIV_STORE_U8, MULDIV_NEG, MULDIV_MODDIV)
2111# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
2112# endif /* !DOXYGEN_RUNNING */
2113
2114#endif /* (!defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)) && !defined(DOXYGEN_RUNNING) */
2115
2116
2117/*********************************************************************************************************************************
2118* Unary operations. *
2119*********************************************************************************************************************************/
2120#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2121
2122/** @def IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC
2123 * Updates the status bits (CF, PF, AF, ZF, SF, and OF) for an INC or DEC instruction.
2124 *
2125 * CF is NOT modified for hysterical raisins (allegedly for carrying and
2126 * borrowing in arithmetic loops on intel 8008).
2127 *
2128 * @returns Status bits.
2129 * @param a_pfEFlags Pointer to the 32-bit EFLAGS value to update.
2130 * @param a_uResult Unsigned result value.
2131 * @param a_uDst The original destination value (for AF calc).
2132 * @param a_cBitsWidth The width of the result (8, 16, 32, 64).
2133 * @param a_OfMethod 0 for INC-style, 1 for DEC-style.
2134 */
2135#define IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(a_pfEFlags, a_uResult, a_uDst, a_cBitsWidth, a_OfMethod) \
2136 do { \
2137 uint32_t fEflTmp = *(a_pfEFlags); \
2138 fEflTmp &= ~X86_EFL_STATUS_BITS | X86_EFL_CF; \
2139 fEflTmp |= g_afParity[(a_uResult) & 0xff]; \
2140 fEflTmp |= ((uint32_t)(a_uResult) ^ (uint32_t)(a_uDst)) & X86_EFL_AF; \
2141 fEflTmp |= X86_EFL_CALC_ZF(a_uResult); \
2142 fEflTmp |= X86_EFL_CALC_SF(a_uResult, a_cBitsWidth); \
2143 fEflTmp |= X86_EFL_GET_OF_ ## a_cBitsWidth(a_OfMethod == 0 ? (((a_uDst) ^ RT_BIT_64(a_cBitsWidth - 1)) & (a_uResult)) \
2144 : ((a_uDst) & ((a_uResult) ^ RT_BIT_64(a_cBitsWidth - 1))) ); \
2145 *(a_pfEFlags) = fEflTmp; \
2146 } while (0)
2147
2148/*
2149 * INC
2150 */
2151
2152IEM_DECL_IMPL_DEF(void, iemAImpl_inc_u64,(uint64_t *puDst, uint32_t *pfEFlags))
2153{
2154 uint64_t uDst = *puDst;
2155 uint64_t uResult = uDst + 1;
2156 *puDst = uResult;
2157 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 64, 0 /*INC*/);
2158}
2159
2160# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2161
2162IEM_DECL_IMPL_DEF(void, iemAImpl_inc_u32,(uint32_t *puDst, uint32_t *pfEFlags))
2163{
2164 uint32_t uDst = *puDst;
2165 uint32_t uResult = uDst + 1;
2166 *puDst = uResult;
2167 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 32, 0 /*INC*/);
2168}
2169
2170
2171IEM_DECL_IMPL_DEF(void, iemAImpl_inc_u16,(uint16_t *puDst, uint32_t *pfEFlags))
2172{
2173 uint16_t uDst = *puDst;
2174 uint16_t uResult = uDst + 1;
2175 *puDst = uResult;
2176 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 16, 0 /*INC*/);
2177}
2178
2179IEM_DECL_IMPL_DEF(void, iemAImpl_inc_u8,(uint8_t *puDst, uint32_t *pfEFlags))
2180{
2181 uint8_t uDst = *puDst;
2182 uint8_t uResult = uDst + 1;
2183 *puDst = uResult;
2184 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 8, 0 /*INC*/);
2185}
2186
2187# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
2188
2189
2190/*
2191 * DEC
2192 */
2193
2194IEM_DECL_IMPL_DEF(void, iemAImpl_dec_u64,(uint64_t *puDst, uint32_t *pfEFlags))
2195{
2196 uint64_t uDst = *puDst;
2197 uint64_t uResult = uDst - 1;
2198 *puDst = uResult;
2199 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 64, 1 /*INC*/);
2200}
2201
2202# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2203
2204IEM_DECL_IMPL_DEF(void, iemAImpl_dec_u32,(uint32_t *puDst, uint32_t *pfEFlags))
2205{
2206 uint32_t uDst = *puDst;
2207 uint32_t uResult = uDst - 1;
2208 *puDst = uResult;
2209 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 32, 1 /*INC*/);
2210}
2211
2212
2213IEM_DECL_IMPL_DEF(void, iemAImpl_dec_u16,(uint16_t *puDst, uint32_t *pfEFlags))
2214{
2215 uint16_t uDst = *puDst;
2216 uint16_t uResult = uDst - 1;
2217 *puDst = uResult;
2218 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 16, 1 /*INC*/);
2219}
2220
2221
2222IEM_DECL_IMPL_DEF(void, iemAImpl_dec_u8,(uint8_t *puDst, uint32_t *pfEFlags))
2223{
2224 uint8_t uDst = *puDst;
2225 uint8_t uResult = uDst - 1;
2226 *puDst = uResult;
2227 IEM_EFL_UPDATE_STATUS_BITS_FOR_INC_DEC(pfEFlags, uResult, uDst, 8, 1 /*INC*/);
2228}
2229
2230# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
2231
2232
2233/*
2234 * NOT
2235 */
2236
2237IEM_DECL_IMPL_DEF(void, iemAImpl_not_u64,(uint64_t *puDst, uint32_t *pfEFlags))
2238{
2239 uint64_t uDst = *puDst;
2240 uint64_t uResult = ~uDst;
2241 *puDst = uResult;
2242 /* EFLAGS are not modified. */
2243 RT_NOREF_PV(pfEFlags);
2244}
2245
2246# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2247
2248IEM_DECL_IMPL_DEF(void, iemAImpl_not_u32,(uint32_t *puDst, uint32_t *pfEFlags))
2249{
2250 uint32_t uDst = *puDst;
2251 uint32_t uResult = ~uDst;
2252 *puDst = uResult;
2253 /* EFLAGS are not modified. */
2254 RT_NOREF_PV(pfEFlags);
2255}
2256
2257IEM_DECL_IMPL_DEF(void, iemAImpl_not_u16,(uint16_t *puDst, uint32_t *pfEFlags))
2258{
2259 uint16_t uDst = *puDst;
2260 uint16_t uResult = ~uDst;
2261 *puDst = uResult;
2262 /* EFLAGS are not modified. */
2263 RT_NOREF_PV(pfEFlags);
2264}
2265
2266IEM_DECL_IMPL_DEF(void, iemAImpl_not_u8,(uint8_t *puDst, uint32_t *pfEFlags))
2267{
2268 uint8_t uDst = *puDst;
2269 uint8_t uResult = ~uDst;
2270 *puDst = uResult;
2271 /* EFLAGS are not modified. */
2272 RT_NOREF_PV(pfEFlags);
2273}
2274
2275# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
2276
2277
2278/*
2279 * NEG
2280 */
2281
2282/**
2283 * Updates the status bits (CF, PF, AF, ZF, SF, and OF) for an NEG instruction.
2284 *
2285 * @returns Status bits.
2286 * @param a_pfEFlags Pointer to the 32-bit EFLAGS value to update.
2287 * @param a_uResult Unsigned result value.
2288 * @param a_uDst The original destination value (for AF calc).
2289 * @param a_cBitsWidth The width of the result (8, 16, 32, 64).
2290 */
2291#define IEM_EFL_UPDATE_STATUS_BITS_FOR_NEG(a_pfEFlags, a_uResult, a_uDst, a_cBitsWidth) \
2292 do { \
2293 uint32_t fEflTmp = *(a_pfEFlags); \
2294 fEflTmp &= ~X86_EFL_STATUS_BITS & ~X86_EFL_CF; \
2295 fEflTmp |= ((a_uDst) != 0) << X86_EFL_CF_BIT; \
2296 fEflTmp |= g_afParity[(a_uResult) & 0xff]; \
2297 fEflTmp |= ((uint32_t)(a_uResult) ^ (uint32_t)(a_uDst)) & X86_EFL_AF; \
2298 fEflTmp |= X86_EFL_CALC_ZF(a_uResult); \
2299 fEflTmp |= X86_EFL_CALC_SF(a_uResult, a_cBitsWidth); \
2300 fEflTmp |= X86_EFL_GET_OF_ ## a_cBitsWidth((a_uDst) & (a_uResult)); \
2301 *(a_pfEFlags) = fEflTmp; \
2302 } while (0)
2303
2304IEM_DECL_IMPL_DEF(void, iemAImpl_neg_u64,(uint64_t *puDst, uint32_t *pfEFlags))
2305{
2306 uint64_t uDst = *puDst;
2307 uint64_t uResult = (uint64_t)0 - uDst;
2308 *puDst = uResult;
2309 IEM_EFL_UPDATE_STATUS_BITS_FOR_NEG(pfEFlags, uResult, uDst, 64);
2310}
2311
2312# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2313
2314IEM_DECL_IMPL_DEF(void, iemAImpl_neg_u32,(uint32_t *puDst, uint32_t *pfEFlags))
2315{
2316 uint32_t uDst = *puDst;
2317 uint32_t uResult = (uint32_t)0 - uDst;
2318 *puDst = uResult;
2319 IEM_EFL_UPDATE_STATUS_BITS_FOR_NEG(pfEFlags, uResult, uDst, 32);
2320}
2321
2322
2323IEM_DECL_IMPL_DEF(void, iemAImpl_neg_u16,(uint16_t *puDst, uint32_t *pfEFlags))
2324{
2325 uint16_t uDst = *puDst;
2326 uint16_t uResult = (uint16_t)0 - uDst;
2327 *puDst = uResult;
2328 IEM_EFL_UPDATE_STATUS_BITS_FOR_NEG(pfEFlags, uResult, uDst, 16);
2329}
2330
2331
2332IEM_DECL_IMPL_DEF(void, iemAImpl_neg_u8,(uint8_t *puDst, uint32_t *pfEFlags))
2333{
2334 uint8_t uDst = *puDst;
2335 uint8_t uResult = (uint8_t)0 - uDst;
2336 *puDst = uResult;
2337 IEM_EFL_UPDATE_STATUS_BITS_FOR_NEG(pfEFlags, uResult, uDst, 8);
2338}
2339
2340# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
2341
2342/*
2343 * Locked variants.
2344 */
2345
2346/** Emit a function for doing a locked unary operand operation. */
2347# define EMIT_LOCKED_UNARY_OP(a_Mnemonic, a_cBitsWidth) \
2348 IEM_DECL_IMPL_DEF(void, iemAImpl_ ## a_Mnemonic ## _u ## a_cBitsWidth ## _locked,(uint ## a_cBitsWidth ## _t *puDst, \
2349 uint32_t *pfEFlags)) \
2350 { \
2351 uint ## a_cBitsWidth ## _t uOld = ASMAtomicUoReadU ## a_cBitsWidth(puDst); \
2352 uint ## a_cBitsWidth ## _t uTmp; \
2353 uint32_t fEflTmp; \
2354 do \
2355 { \
2356 uTmp = uOld; \
2357 fEflTmp = *pfEFlags; \
2358 iemAImpl_ ## a_Mnemonic ## _u ## a_cBitsWidth(&uTmp, &fEflTmp); \
2359 } while (!ASMAtomicCmpXchgExU ## a_cBitsWidth(puDst, uTmp, uOld, &uOld)); \
2360 *pfEFlags = fEflTmp; \
2361 }
2362
2363EMIT_LOCKED_UNARY_OP(inc, 64)
2364EMIT_LOCKED_UNARY_OP(dec, 64)
2365EMIT_LOCKED_UNARY_OP(not, 64)
2366EMIT_LOCKED_UNARY_OP(neg, 64)
2367# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
2368EMIT_LOCKED_UNARY_OP(inc, 32)
2369EMIT_LOCKED_UNARY_OP(dec, 32)
2370EMIT_LOCKED_UNARY_OP(not, 32)
2371EMIT_LOCKED_UNARY_OP(neg, 32)
2372
2373EMIT_LOCKED_UNARY_OP(inc, 16)
2374EMIT_LOCKED_UNARY_OP(dec, 16)
2375EMIT_LOCKED_UNARY_OP(not, 16)
2376EMIT_LOCKED_UNARY_OP(neg, 16)
2377
2378EMIT_LOCKED_UNARY_OP(inc, 8)
2379EMIT_LOCKED_UNARY_OP(dec, 8)
2380EMIT_LOCKED_UNARY_OP(not, 8)
2381EMIT_LOCKED_UNARY_OP(neg, 8)
2382# endif
2383
2384#endif /* !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY) */
2385
2386
2387/*********************************************************************************************************************************
2388* Shifting and Rotating *
2389*********************************************************************************************************************************/
2390
2391/*
2392 * ROL
2393 */
2394#define EMIT_ROL(a_cBitsWidth, a_uType, a_Suffix, a_fIntelFlags, a_fnHlp) \
2395IEM_DECL_IMPL_DEF(void, RT_CONCAT3(iemAImpl_rol_u,a_cBitsWidth,a_Suffix),(a_uType *puDst, uint8_t cShift, uint32_t *pfEFlags)) \
2396{ \
2397 cShift &= a_cBitsWidth >= 32 ? a_cBitsWidth - 1 : 31; \
2398 if (cShift) \
2399 { \
2400 if (a_cBitsWidth < 32) \
2401 cShift &= a_cBitsWidth - 1; \
2402 a_uType const uDst = *puDst; \
2403 a_uType const uResult = a_fnHlp(uDst, cShift); \
2404 *puDst = uResult; \
2405 \
2406 /* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement \
2407 it the same way as for 1 bit shifts. */ \
2408 AssertCompile(X86_EFL_CF_BIT == 0); \
2409 uint32_t fEfl = *pfEFlags; \
2410 fEfl &= ~(X86_EFL_CF | X86_EFL_OF); \
2411 uint32_t const fCarry = (uResult & X86_EFL_CF); \
2412 fEfl |= fCarry; \
2413 if (!a_fIntelFlags) /* AMD 3990X: According to the last sub-shift: */ \
2414 fEfl |= ((uResult >> (a_cBitsWidth - 1)) ^ fCarry) << X86_EFL_OF_BIT; \
2415 else /* Intel 10980XE: According to the first sub-shift: */ \
2416 fEfl |= X86_EFL_GET_OF_ ## a_cBitsWidth(uDst ^ (uDst << 1)); \
2417 *pfEFlags = fEfl; \
2418 } \
2419}
2420
2421#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2422EMIT_ROL(64, uint64_t, RT_NOTHING, 1, ASMRotateLeftU64)
2423#endif
2424EMIT_ROL(64, uint64_t, _intel, 1, ASMRotateLeftU64)
2425EMIT_ROL(64, uint64_t, _amd, 0, ASMRotateLeftU64)
2426
2427#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2428EMIT_ROL(32, uint32_t, RT_NOTHING, 1, ASMRotateLeftU32)
2429#endif
2430EMIT_ROL(32, uint32_t, _intel, 1, ASMRotateLeftU32)
2431EMIT_ROL(32, uint32_t, _amd, 0, ASMRotateLeftU32)
2432
2433DECL_FORCE_INLINE(uint16_t) iemAImpl_rol_u16_hlp(uint16_t uValue, uint8_t cShift)
2434{
2435 return (uValue << cShift) | (uValue >> (16 - cShift));
2436}
2437#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2438EMIT_ROL(16, uint16_t, RT_NOTHING, 1, iemAImpl_rol_u16_hlp)
2439#endif
2440EMIT_ROL(16, uint16_t, _intel, 1, iemAImpl_rol_u16_hlp)
2441EMIT_ROL(16, uint16_t, _amd, 0, iemAImpl_rol_u16_hlp)
2442
2443DECL_FORCE_INLINE(uint8_t) iemAImpl_rol_u8_hlp(uint8_t uValue, uint8_t cShift)
2444{
2445 return (uValue << cShift) | (uValue >> (8 - cShift));
2446}
2447#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2448EMIT_ROL(8, uint8_t, RT_NOTHING, 1, iemAImpl_rol_u8_hlp)
2449#endif
2450EMIT_ROL(8, uint8_t, _intel, 1, iemAImpl_rol_u8_hlp)
2451EMIT_ROL(8, uint8_t, _amd, 0, iemAImpl_rol_u8_hlp)
2452
2453
2454/*
2455 * ROR
2456 */
2457#define EMIT_ROR(a_cBitsWidth, a_uType, a_Suffix, a_fIntelFlags, a_fnHlp) \
2458IEM_DECL_IMPL_DEF(void, RT_CONCAT3(iemAImpl_ror_u,a_cBitsWidth,a_Suffix),(a_uType *puDst, uint8_t cShift, uint32_t *pfEFlags)) \
2459{ \
2460 cShift &= a_cBitsWidth >= 32 ? a_cBitsWidth - 1 : 31; \
2461 if (cShift) \
2462 { \
2463 if (a_cBitsWidth < 32) \
2464 cShift &= a_cBitsWidth - 1; \
2465 a_uType const uDst = *puDst; \
2466 a_uType const uResult = a_fnHlp(uDst, cShift); \
2467 *puDst = uResult; \
2468 \
2469 /* Calc EFLAGS: */ \
2470 AssertCompile(X86_EFL_CF_BIT == 0); \
2471 uint32_t fEfl = *pfEFlags; \
2472 fEfl &= ~(X86_EFL_CF | X86_EFL_OF); \
2473 uint32_t const fCarry = (uResult >> ((a_cBitsWidth) - 1)) & X86_EFL_CF; \
2474 fEfl |= fCarry; \
2475 if (!a_fIntelFlags) /* AMD 3990X: According to the last sub-shift: */ \
2476 fEfl |= (((uResult >> ((a_cBitsWidth) - 2)) ^ fCarry) & 1) << X86_EFL_OF_BIT; \
2477 else /* Intel 10980XE: According to the first sub-shift: */ \
2478 fEfl |= X86_EFL_GET_OF_ ## a_cBitsWidth(uDst ^ (uDst << (a_cBitsWidth - 1))); \
2479 *pfEFlags = fEfl; \
2480 } \
2481}
2482
2483#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2484EMIT_ROR(64, uint64_t, RT_NOTHING, 1, ASMRotateRightU64)
2485#endif
2486EMIT_ROR(64, uint64_t, _intel, 1, ASMRotateRightU64)
2487EMIT_ROR(64, uint64_t, _amd, 0, ASMRotateRightU64)
2488
2489#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2490EMIT_ROR(32, uint32_t, RT_NOTHING, 1, ASMRotateRightU32)
2491#endif
2492EMIT_ROR(32, uint32_t, _intel, 1, ASMRotateRightU32)
2493EMIT_ROR(32, uint32_t, _amd, 0, ASMRotateRightU32)
2494
2495DECL_FORCE_INLINE(uint16_t) iemAImpl_ror_u16_hlp(uint16_t uValue, uint8_t cShift)
2496{
2497 return (uValue >> cShift) | (uValue << (16 - cShift));
2498}
2499#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2500EMIT_ROR(16, uint16_t, RT_NOTHING, 1, iemAImpl_ror_u16_hlp)
2501#endif
2502EMIT_ROR(16, uint16_t, _intel, 1, iemAImpl_ror_u16_hlp)
2503EMIT_ROR(16, uint16_t, _amd, 0, iemAImpl_ror_u16_hlp)
2504
2505DECL_FORCE_INLINE(uint8_t) iemAImpl_ror_u8_hlp(uint8_t uValue, uint8_t cShift)
2506{
2507 return (uValue >> cShift) | (uValue << (8 - cShift));
2508}
2509#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2510EMIT_ROR(8, uint8_t, RT_NOTHING, 1, iemAImpl_ror_u8_hlp)
2511#endif
2512EMIT_ROR(8, uint8_t, _intel, 1, iemAImpl_ror_u8_hlp)
2513EMIT_ROR(8, uint8_t, _amd, 0, iemAImpl_ror_u8_hlp)
2514
2515
2516/*
2517 * RCL
2518 */
2519#define EMIT_RCL(a_cBitsWidth, a_uType, a_Suffix, a_fIntelFlags) \
2520IEM_DECL_IMPL_DEF(void, RT_CONCAT3(iemAImpl_rcl_u,a_cBitsWidth,a_Suffix),(a_uType *puDst, uint8_t cShift, uint32_t *pfEFlags)) \
2521{ \
2522 cShift &= a_cBitsWidth >= 32 ? a_cBitsWidth - 1 : 31; \
2523 if (a_cBitsWidth < 32 && a_fIntelFlags) \
2524 cShift %= a_cBitsWidth + 1; \
2525 if (cShift) \
2526 { \
2527 if (a_cBitsWidth < 32 && !a_fIntelFlags) \
2528 cShift %= a_cBitsWidth + 1; \
2529 a_uType const uDst = *puDst; \
2530 a_uType uResult = uDst << cShift; \
2531 if (cShift > 1) \
2532 uResult |= uDst >> (a_cBitsWidth + 1 - cShift); \
2533 \
2534 AssertCompile(X86_EFL_CF_BIT == 0); \
2535 uint32_t fEfl = *pfEFlags; \
2536 uint32_t fInCarry = fEfl & X86_EFL_CF; \
2537 uResult |= (a_uType)fInCarry << (cShift - 1); \
2538 \
2539 *puDst = uResult; \
2540 \
2541 /* Calc EFLAGS. */ \
2542 fEfl &= ~(X86_EFL_CF | X86_EFL_OF); \
2543 uint32_t const fOutCarry = a_cBitsWidth >= 32 || a_fIntelFlags || cShift \
2544 ? (uDst >> (a_cBitsWidth - cShift)) & X86_EFL_CF : fInCarry; \
2545 fEfl |= fOutCarry; \
2546 if (!a_fIntelFlags) /* AMD 3990X: According to the last sub-shift: */ \
2547 fEfl |= ((uResult >> (a_cBitsWidth - 1)) ^ fOutCarry) << X86_EFL_OF_BIT; \
2548 else /* Intel 10980XE: According to the first sub-shift: */ \
2549 fEfl |= X86_EFL_GET_OF_ ## a_cBitsWidth(uDst ^ (uDst << 1)); \
2550 *pfEFlags = fEfl; \
2551 } \
2552}
2553
2554#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2555EMIT_RCL(64, uint64_t, RT_NOTHING, 1)
2556#endif
2557EMIT_RCL(64, uint64_t, _intel, 1)
2558EMIT_RCL(64, uint64_t, _amd, 0)
2559
2560#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2561EMIT_RCL(32, uint32_t, RT_NOTHING, 1)
2562#endif
2563EMIT_RCL(32, uint32_t, _intel, 1)
2564EMIT_RCL(32, uint32_t, _amd, 0)
2565
2566#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2567EMIT_RCL(16, uint16_t, RT_NOTHING, 1)
2568#endif
2569EMIT_RCL(16, uint16_t, _intel, 1)
2570EMIT_RCL(16, uint16_t, _amd, 0)
2571
2572#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2573EMIT_RCL(8, uint8_t, RT_NOTHING, 1)
2574#endif
2575EMIT_RCL(8, uint8_t, _intel, 1)
2576EMIT_RCL(8, uint8_t, _amd, 0)
2577
2578
2579/*
2580 * RCR
2581 */
2582#define EMIT_RCR(a_cBitsWidth, a_uType, a_Suffix, a_fIntelFlags) \
2583IEM_DECL_IMPL_DEF(void, RT_CONCAT3(iemAImpl_rcr_u,a_cBitsWidth,a_Suffix),(a_uType *puDst, uint8_t cShift, uint32_t *pfEFlags)) \
2584{ \
2585 cShift &= a_cBitsWidth >= 32 ? a_cBitsWidth - 1 : 31; \
2586 if (a_cBitsWidth < 32 && a_fIntelFlags) \
2587 cShift %= a_cBitsWidth + 1; \
2588 if (cShift) \
2589 { \
2590 if (a_cBitsWidth < 32 && !a_fIntelFlags) \
2591 cShift %= a_cBitsWidth + 1; \
2592 a_uType const uDst = *puDst; \
2593 a_uType uResult = uDst >> cShift; \
2594 if (cShift > 1) \
2595 uResult |= uDst << (a_cBitsWidth + 1 - cShift); \
2596 \
2597 AssertCompile(X86_EFL_CF_BIT == 0); \
2598 uint32_t fEfl = *pfEFlags; \
2599 uint32_t fInCarry = fEfl & X86_EFL_CF; \
2600 uResult |= (a_uType)fInCarry << (a_cBitsWidth - cShift); \
2601 *puDst = uResult; \
2602 \
2603 /* Calc EFLAGS. The OF bit is undefined if cShift > 1, we implement \
2604 it the same way as for 1 bit shifts. */ \
2605 fEfl &= ~(X86_EFL_CF | X86_EFL_OF); \
2606 uint32_t const fOutCarry = a_cBitsWidth >= 32 || a_fIntelFlags || cShift \
2607 ? (uDst >> (cShift - 1)) & X86_EFL_CF : fInCarry; \
2608 fEfl |= fOutCarry; \
2609 if (!a_fIntelFlags) /* AMD 3990X: XOR two most signficant bits of the result: */ \
2610 fEfl |= X86_EFL_GET_OF_ ## a_cBitsWidth(uResult ^ (uResult << 1)); \
2611 else /* Intel 10980XE: same as AMD, but only for the first sub-shift: */ \
2612 fEfl |= (fInCarry ^ (uint32_t)(uDst >> (a_cBitsWidth - 1))) << X86_EFL_OF_BIT; \
2613 *pfEFlags = fEfl; \
2614 } \
2615}
2616
2617#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2618EMIT_RCR(64, uint64_t, RT_NOTHING, 1)
2619#endif
2620EMIT_RCR(64, uint64_t, _intel, 1)
2621EMIT_RCR(64, uint64_t, _amd, 0)
2622
2623#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2624EMIT_RCR(32, uint32_t, RT_NOTHING, 1)
2625#endif
2626EMIT_RCR(32, uint32_t, _intel, 1)
2627EMIT_RCR(32, uint32_t, _amd, 0)
2628
2629#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2630EMIT_RCR(16, uint16_t, RT_NOTHING, 1)
2631#endif
2632EMIT_RCR(16, uint16_t, _intel, 1)
2633EMIT_RCR(16, uint16_t, _amd, 0)
2634
2635#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2636EMIT_RCR(8, uint8_t, RT_NOTHING, 1)
2637#endif
2638EMIT_RCR(8, uint8_t, _intel, 1)
2639EMIT_RCR(8, uint8_t, _amd, 0)
2640
2641
2642/*
2643 * SHL
2644 */
2645#define EMIT_SHL(a_cBitsWidth, a_uType, a_Suffix, a_fIntelFlags) \
2646IEM_DECL_IMPL_DEF(void, RT_CONCAT3(iemAImpl_shl_u,a_cBitsWidth,a_Suffix),(a_uType *puDst, uint8_t cShift, uint32_t *pfEFlags)) \
2647{ \
2648 cShift &= a_cBitsWidth >= 32 ? a_cBitsWidth - 1 : 31; \
2649 if (cShift) \
2650 { \
2651 a_uType const uDst = *puDst; \
2652 a_uType uResult = uDst << cShift; \
2653 *puDst = uResult; \
2654 \
2655 /* Calc EFLAGS. */ \
2656 AssertCompile(X86_EFL_CF_BIT == 0); \
2657 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS; \
2658 uint32_t fCarry = (uDst >> (a_cBitsWidth - cShift)) & X86_EFL_CF; \
2659 fEfl |= fCarry; \
2660 if (!a_fIntelFlags) \
2661 fEfl |= ((uResult >> (a_cBitsWidth - 1)) ^ fCarry) << X86_EFL_OF_BIT; /* AMD 3990X: Last shift result. */ \
2662 else \
2663 fEfl |= X86_EFL_GET_OF_ ## a_cBitsWidth(uDst ^ (uDst << 1)); /* Intel 10980XE: First shift result. */ \
2664 fEfl |= X86_EFL_CALC_SF(uResult, a_cBitsWidth); \
2665 fEfl |= X86_EFL_CALC_ZF(uResult); \
2666 fEfl |= g_afParity[uResult & 0xff]; \
2667 if (!a_fIntelFlags) \
2668 fEfl |= X86_EFL_AF; /* AMD 3990x sets it unconditionally, Intel 10980XE does the oposite */ \
2669 *pfEFlags = fEfl; \
2670 } \
2671}
2672
2673#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2674EMIT_SHL(64, uint64_t, RT_NOTHING, 1)
2675#endif
2676EMIT_SHL(64, uint64_t, _intel, 1)
2677EMIT_SHL(64, uint64_t, _amd, 0)
2678
2679#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2680EMIT_SHL(32, uint32_t, RT_NOTHING, 1)
2681#endif
2682EMIT_SHL(32, uint32_t, _intel, 1)
2683EMIT_SHL(32, uint32_t, _amd, 0)
2684
2685#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2686EMIT_SHL(16, uint16_t, RT_NOTHING, 1)
2687#endif
2688EMIT_SHL(16, uint16_t, _intel, 1)
2689EMIT_SHL(16, uint16_t, _amd, 0)
2690
2691#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2692EMIT_SHL(8, uint8_t, RT_NOTHING, 1)
2693#endif
2694EMIT_SHL(8, uint8_t, _intel, 1)
2695EMIT_SHL(8, uint8_t, _amd, 0)
2696
2697
2698/*
2699 * SHR
2700 */
2701#define EMIT_SHR(a_cBitsWidth, a_uType, a_Suffix, a_fIntelFlags) \
2702IEM_DECL_IMPL_DEF(void, RT_CONCAT3(iemAImpl_shr_u,a_cBitsWidth,a_Suffix),(a_uType *puDst, uint8_t cShift, uint32_t *pfEFlags)) \
2703{ \
2704 cShift &= a_cBitsWidth >= 32 ? a_cBitsWidth - 1 : 31; \
2705 if (cShift) \
2706 { \
2707 a_uType const uDst = *puDst; \
2708 a_uType uResult = uDst >> cShift; \
2709 *puDst = uResult; \
2710 \
2711 /* Calc EFLAGS. */ \
2712 AssertCompile(X86_EFL_CF_BIT == 0); \
2713 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS; \
2714 fEfl |= (uDst >> (cShift - 1)) & X86_EFL_CF; \
2715 if (a_fIntelFlags || cShift == 1) /* AMD 3990x does what intel documents; Intel 10980XE does this for all shift counts. */ \
2716 fEfl |= (uDst >> (a_cBitsWidth - 1)) << X86_EFL_OF_BIT; \
2717 fEfl |= X86_EFL_CALC_SF(uResult, a_cBitsWidth); \
2718 fEfl |= X86_EFL_CALC_ZF(uResult); \
2719 fEfl |= g_afParity[uResult & 0xff]; \
2720 if (!a_fIntelFlags) \
2721 fEfl |= X86_EFL_AF; /* AMD 3990x sets it unconditionally, Intel 10980XE does the oposite */ \
2722 *pfEFlags = fEfl; \
2723 } \
2724}
2725
2726#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2727EMIT_SHR(64, uint64_t, RT_NOTHING, 1)
2728#endif
2729EMIT_SHR(64, uint64_t, _intel, 1)
2730EMIT_SHR(64, uint64_t, _amd, 0)
2731
2732#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2733EMIT_SHR(32, uint32_t, RT_NOTHING, 1)
2734#endif
2735EMIT_SHR(32, uint32_t, _intel, 1)
2736EMIT_SHR(32, uint32_t, _amd, 0)
2737
2738#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2739EMIT_SHR(16, uint16_t, RT_NOTHING, 1)
2740#endif
2741EMIT_SHR(16, uint16_t, _intel, 1)
2742EMIT_SHR(16, uint16_t, _amd, 0)
2743
2744#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2745EMIT_SHR(8, uint8_t, RT_NOTHING, 1)
2746#endif
2747EMIT_SHR(8, uint8_t, _intel, 1)
2748EMIT_SHR(8, uint8_t, _amd, 0)
2749
2750
2751/*
2752 * SAR
2753 */
2754#define EMIT_SAR(a_cBitsWidth, a_uType, a_iType, a_Suffix, a_fIntelFlags) \
2755IEM_DECL_IMPL_DEF(void, RT_CONCAT3(iemAImpl_sar_u,a_cBitsWidth,a_Suffix),(a_uType *puDst, uint8_t cShift, uint32_t *pfEFlags)) \
2756{ \
2757 cShift &= a_cBitsWidth >= 32 ? a_cBitsWidth - 1 : 31; \
2758 if (cShift) \
2759 { \
2760 a_iType const iDst = (a_iType)*puDst; \
2761 a_uType uResult = iDst >> cShift; \
2762 *puDst = uResult; \
2763 \
2764 /* Calc EFLAGS. \
2765 Note! The OF flag is always zero because the result never differs from the input. */ \
2766 AssertCompile(X86_EFL_CF_BIT == 0); \
2767 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS; \
2768 fEfl |= (iDst >> (cShift - 1)) & X86_EFL_CF; \
2769 fEfl |= X86_EFL_CALC_SF(uResult, a_cBitsWidth); \
2770 fEfl |= X86_EFL_CALC_ZF(uResult); \
2771 fEfl |= g_afParity[uResult & 0xff]; \
2772 if (!a_fIntelFlags) \
2773 fEfl |= X86_EFL_AF; /* AMD 3990x sets it unconditionally, Intel 10980XE does the oposite */ \
2774 *pfEFlags = fEfl; \
2775 } \
2776}
2777
2778#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2779EMIT_SAR(64, uint64_t, int64_t, RT_NOTHING, 1)
2780#endif
2781EMIT_SAR(64, uint64_t, int64_t, _intel, 1)
2782EMIT_SAR(64, uint64_t, int64_t, _amd, 0)
2783
2784#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2785EMIT_SAR(32, uint32_t, int32_t, RT_NOTHING, 1)
2786#endif
2787EMIT_SAR(32, uint32_t, int32_t, _intel, 1)
2788EMIT_SAR(32, uint32_t, int32_t, _amd, 0)
2789
2790#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2791EMIT_SAR(16, uint16_t, int16_t, RT_NOTHING, 1)
2792#endif
2793EMIT_SAR(16, uint16_t, int16_t, _intel, 1)
2794EMIT_SAR(16, uint16_t, int16_t, _amd, 0)
2795
2796#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2797EMIT_SAR(8, uint8_t, int8_t, RT_NOTHING, 1)
2798#endif
2799EMIT_SAR(8, uint8_t, int8_t, _intel, 1)
2800EMIT_SAR(8, uint8_t, int8_t, _amd, 0)
2801
2802
2803/*
2804 * SHLD
2805 *
2806 * - CF is the last bit shifted out of puDst.
2807 * - AF is always cleared by Intel 10980XE.
2808 * - AF is always set by AMD 3990X.
2809 * - OF is set according to the first shift on Intel 10980XE, it seems.
2810 * - OF is set according to the last sub-shift on AMD 3990X.
2811 * - ZF, SF and PF are calculated according to the result by both vendors.
2812 *
2813 * For 16-bit shifts the count mask isn't 15, but 31, and the CPU will
2814 * pick either the source register or the destination register for input bits
2815 * when going beyond 16. According to https://www.sandpile.org/x86/flags.htm
2816 * intel has changed behaviour here several times. We implement what current
2817 * skylake based does for now, we can extend this later as needed.
2818 */
2819#define EMIT_SHLD(a_cBitsWidth, a_uType, a_Suffix, a_fIntelFlags) \
2820IEM_DECL_IMPL_DEF(void, RT_CONCAT3(iemAImpl_shld_u,a_cBitsWidth,a_Suffix),(a_uType *puDst, a_uType uSrc, uint8_t cShift, \
2821 uint32_t *pfEFlags)) \
2822{ \
2823 cShift &= a_cBitsWidth - 1; \
2824 if (cShift) \
2825 { \
2826 a_uType const uDst = *puDst; \
2827 a_uType uResult = uDst << cShift; \
2828 uResult |= uSrc >> (a_cBitsWidth - cShift); \
2829 *puDst = uResult; \
2830 \
2831 /* CALC EFLAGS: */ \
2832 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS; \
2833 if (a_fIntelFlags) \
2834 /* Intel 6700K & 10980XE: Set according to the first shift. AF always cleared. */ \
2835 fEfl |= X86_EFL_GET_OF_ ## a_cBitsWidth(uDst ^ (uDst << 1)); \
2836 else \
2837 { /* AMD 3990X: Set according to last shift. AF always set. */ \
2838 fEfl |= X86_EFL_GET_OF_ ## a_cBitsWidth((uDst << (cShift - 1)) ^ uResult); \
2839 fEfl |= X86_EFL_AF; \
2840 } \
2841 AssertCompile(X86_EFL_CF_BIT == 0); \
2842 fEfl |= (uDst >> (a_cBitsWidth - cShift)) & X86_EFL_CF; /* CF = last bit shifted out */ \
2843 fEfl |= g_afParity[uResult & 0xff]; \
2844 fEfl |= X86_EFL_CALC_SF(uResult, a_cBitsWidth); \
2845 fEfl |= X86_EFL_CALC_ZF(uResult); \
2846 *pfEFlags = fEfl; \
2847 } \
2848}
2849
2850#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2851EMIT_SHLD(64, uint64_t, RT_NOTHING, 1)
2852#endif
2853EMIT_SHLD(64, uint64_t, _intel, 1)
2854EMIT_SHLD(64, uint64_t, _amd, 0)
2855
2856#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2857EMIT_SHLD(32, uint32_t, RT_NOTHING, 1)
2858#endif
2859EMIT_SHLD(32, uint32_t, _intel, 1)
2860EMIT_SHLD(32, uint32_t, _amd, 0)
2861
2862#define EMIT_SHLD_16(a_Suffix, a_fIntelFlags) \
2863IEM_DECL_IMPL_DEF(void, RT_CONCAT(iemAImpl_shld_u16,a_Suffix),(uint16_t *puDst, uint16_t uSrc, uint8_t cShift, uint32_t *pfEFlags)) \
2864{ \
2865 cShift &= 31; \
2866 if (cShift) \
2867 { \
2868 uint16_t const uDst = *puDst; \
2869 uint64_t const uTmp = a_fIntelFlags \
2870 ? ((uint64_t)uDst << 32) | ((uint32_t)uSrc << 16) | uDst \
2871 : ((uint64_t)uDst << 32) | ((uint32_t)uSrc << 16) | uSrc; \
2872 uint16_t const uResult = (uint16_t)((uTmp << cShift) >> 32); \
2873 *puDst = uResult; \
2874 \
2875 /* CALC EFLAGS: */ \
2876 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS; \
2877 AssertCompile(X86_EFL_CF_BIT == 0); \
2878 if (a_fIntelFlags) \
2879 { \
2880 fEfl |= (uTmp >> (48 - cShift)) & X86_EFL_CF; /* CF = last bit shifted out of the combined operand */ \
2881 /* Intel 6700K & 10980XE: OF is et according to the first shift. AF always cleared. */ \
2882 fEfl |= X86_EFL_GET_OF_16(uDst ^ (uDst << 1)); \
2883 } \
2884 else \
2885 { \
2886 /* AMD 3990X: OF is set according to last shift, with some weirdness. AF always set. CF = last bit shifted out of uDst. */ \
2887 if (cShift < 16) \
2888 { \
2889 fEfl |= (uDst >> (16 - cShift)) & X86_EFL_CF; \
2890 fEfl |= X86_EFL_GET_OF_16((uDst << (cShift - 1)) ^ uResult); \
2891 } \
2892 else \
2893 { \
2894 if (cShift == 16) \
2895 fEfl |= uDst & X86_EFL_CF; \
2896 fEfl |= X86_EFL_GET_OF_16((uDst << (cShift - 1)) ^ 0); \
2897 } \
2898 fEfl |= X86_EFL_AF; \
2899 } \
2900 fEfl |= g_afParity[uResult & 0xff]; \
2901 fEfl |= X86_EFL_CALC_SF(uResult, 16); \
2902 fEfl |= X86_EFL_CALC_ZF(uResult); \
2903 *pfEFlags = fEfl; \
2904 } \
2905}
2906
2907#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2908EMIT_SHLD_16(RT_NOTHING, 1)
2909#endif
2910EMIT_SHLD_16(_intel, 1)
2911EMIT_SHLD_16(_amd, 0)
2912
2913
2914/*
2915 * SHRD
2916 *
2917 * EFLAGS behaviour seems to be the same as with SHLD:
2918 * - CF is the last bit shifted out of puDst.
2919 * - AF is always cleared by Intel 10980XE.
2920 * - AF is always set by AMD 3990X.
2921 * - OF is set according to the first shift on Intel 10980XE, it seems.
2922 * - OF is set according to the last sub-shift on AMD 3990X.
2923 * - ZF, SF and PF are calculated according to the result by both vendors.
2924 *
2925 * For 16-bit shifts the count mask isn't 15, but 31, and the CPU will
2926 * pick either the source register or the destination register for input bits
2927 * when going beyond 16. According to https://www.sandpile.org/x86/flags.htm
2928 * intel has changed behaviour here several times. We implement what current
2929 * skylake based does for now, we can extend this later as needed.
2930 */
2931#define EMIT_SHRD(a_cBitsWidth, a_uType, a_Suffix, a_fIntelFlags) \
2932IEM_DECL_IMPL_DEF(void, RT_CONCAT3(iemAImpl_shrd_u,a_cBitsWidth,a_Suffix),(a_uType *puDst, a_uType uSrc, uint8_t cShift, uint32_t *pfEFlags)) \
2933{ \
2934 cShift &= a_cBitsWidth - 1; \
2935 if (cShift) \
2936 { \
2937 a_uType const uDst = *puDst; \
2938 a_uType uResult = uDst >> cShift; \
2939 uResult |= uSrc << (a_cBitsWidth - cShift); \
2940 *puDst = uResult; \
2941 \
2942 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS; \
2943 AssertCompile(X86_EFL_CF_BIT == 0); \
2944 fEfl |= (uDst >> (cShift - 1)) & X86_EFL_CF; \
2945 if (a_fIntelFlags) \
2946 /* Intel 6700K & 10980XE: Set according to the first shift. AF always cleared. */ \
2947 fEfl |= X86_EFL_GET_OF_ ## a_cBitsWidth(uDst ^ (uSrc << (a_cBitsWidth - 1))); \
2948 else \
2949 { /* AMD 3990X: Set according to last shift. AF always set. */ \
2950 if (cShift > 1) /* Set according to last shift. */ \
2951 fEfl |= X86_EFL_GET_OF_ ## a_cBitsWidth((uSrc << (a_cBitsWidth - cShift + 1)) ^ uResult); \
2952 else \
2953 fEfl |= X86_EFL_GET_OF_ ## a_cBitsWidth(uDst ^ uResult); \
2954 fEfl |= X86_EFL_AF; \
2955 } \
2956 fEfl |= X86_EFL_CALC_SF(uResult, a_cBitsWidth); \
2957 fEfl |= X86_EFL_CALC_ZF(uResult); \
2958 fEfl |= g_afParity[uResult & 0xff]; \
2959 *pfEFlags = fEfl; \
2960 } \
2961}
2962
2963#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
2964EMIT_SHRD(64, uint64_t, RT_NOTHING, 1)
2965#endif
2966EMIT_SHRD(64, uint64_t, _intel, 1)
2967EMIT_SHRD(64, uint64_t, _amd, 0)
2968
2969#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
2970EMIT_SHRD(32, uint32_t, RT_NOTHING, 1)
2971#endif
2972EMIT_SHRD(32, uint32_t, _intel, 1)
2973EMIT_SHRD(32, uint32_t, _amd, 0)
2974
2975#define EMIT_SHRD_16(a_Suffix, a_fIntelFlags) \
2976IEM_DECL_IMPL_DEF(void, RT_CONCAT(iemAImpl_shrd_u16,a_Suffix),(uint16_t *puDst, uint16_t uSrc, uint8_t cShift, uint32_t *pfEFlags)) \
2977{ \
2978 cShift &= 31; \
2979 if (cShift) \
2980 { \
2981 uint16_t const uDst = *puDst; \
2982 uint64_t const uTmp = a_fIntelFlags \
2983 ? uDst | ((uint32_t)uSrc << 16) | ((uint64_t)uDst << 32) \
2984 : uDst | ((uint32_t)uSrc << 16) | ((uint64_t)uSrc << 32); \
2985 uint16_t const uResult = (uint16_t)(uTmp >> cShift); \
2986 *puDst = uResult; \
2987 \
2988 uint32_t fEfl = *pfEFlags & ~X86_EFL_STATUS_BITS; \
2989 AssertCompile(X86_EFL_CF_BIT == 0); \
2990 if (a_fIntelFlags) \
2991 { \
2992 /* Intel 10980XE: The CF is the last shifted out of the combined uTmp operand. */ \
2993 fEfl |= (uTmp >> (cShift - 1)) & X86_EFL_CF; \
2994 /* Intel 6700K & 10980XE: Set according to the first shift. AF always cleared. */ \
2995 fEfl |= X86_EFL_GET_OF_16(uDst ^ (uSrc << 15)); \
2996 } \
2997 else \
2998 { \
2999 /* AMD 3990X: CF flag seems to be last bit shifted out of uDst, not the combined uSrc:uSrc:uDst operand. */ \
3000 fEfl |= (uDst >> (cShift - 1)) & X86_EFL_CF; \
3001 /* AMD 3990X: Set according to last shift. AF always set. */ \
3002 if (cShift > 1) /* Set according to last shift. */ \
3003 fEfl |= X86_EFL_GET_OF_16((uint16_t)(uTmp >> (cShift - 1)) ^ uResult); \
3004 else \
3005 fEfl |= X86_EFL_GET_OF_16(uDst ^ uResult); \
3006 fEfl |= X86_EFL_AF; \
3007 } \
3008 fEfl |= X86_EFL_CALC_SF(uResult, 16); \
3009 fEfl |= X86_EFL_CALC_ZF(uResult); \
3010 fEfl |= g_afParity[uResult & 0xff]; \
3011 *pfEFlags = fEfl; \
3012 } \
3013}
3014
3015#if (!defined(RT_ARCH_X86) && !defined(RT_ARCH_AMD64)) || defined(IEM_WITHOUT_ASSEMBLY)
3016EMIT_SHRD_16(RT_NOTHING, 1)
3017#endif
3018EMIT_SHRD_16(_intel, 1)
3019EMIT_SHRD_16(_amd, 0)
3020
3021
3022#if !defined(RT_ARCH_AMD64) || defined(IEM_WITHOUT_ASSEMBLY)
3023
3024# if !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY)
3025/*
3026 * BSWAP
3027 */
3028
3029IEM_DECL_IMPL_DEF(void, iemAImpl_bswap_u64,(uint64_t *puDst))
3030{
3031 *puDst = ASMByteSwapU64(*puDst);
3032}
3033
3034
3035IEM_DECL_IMPL_DEF(void, iemAImpl_bswap_u32,(uint32_t *puDst))
3036{
3037 *puDst = ASMByteSwapU32(*puDst);
3038}
3039
3040
3041/* Note! undocument, so 32-bit arg */
3042IEM_DECL_IMPL_DEF(void, iemAImpl_bswap_u16,(uint32_t *puDst))
3043{
3044#if 0
3045 *(uint16_t *)puDst = ASMByteSwapU16(*(uint16_t *)puDst);
3046#else
3047 /* This is the behaviour AMD 3990x (64-bit mode): */
3048 *(uint16_t *)puDst = 0;
3049#endif
3050}
3051
3052# endif /* !defined(RT_ARCH_X86) || defined(IEM_WITHOUT_ASSEMBLY) */
3053
3054
3055
3056# if defined(IEM_WITHOUT_ASSEMBLY)
3057
3058/*
3059 * LFENCE, SFENCE & MFENCE.
3060 */
3061
3062IEM_DECL_IMPL_DEF(void, iemAImpl_lfence,(void))
3063{
3064 ASMReadFence();
3065}
3066
3067
3068IEM_DECL_IMPL_DEF(void, iemAImpl_sfence,(void))
3069{
3070 ASMWriteFence();
3071}
3072
3073
3074IEM_DECL_IMPL_DEF(void, iemAImpl_mfence,(void))
3075{
3076 ASMMemoryFence();
3077}
3078
3079
3080# ifndef RT_ARCH_ARM64
3081IEM_DECL_IMPL_DEF(void, iemAImpl_alt_mem_fence,(void))
3082{
3083 ASMMemoryFence();
3084}
3085# endif
3086
3087# endif
3088
3089#endif /* !RT_ARCH_AMD64 || IEM_WITHOUT_ASSEMBLY */
3090
3091
3092IEM_DECL_IMPL_DEF(void, iemAImpl_arpl,(uint16_t *pu16Dst, uint16_t u16Src, uint32_t *pfEFlags))
3093{
3094 if ((*pu16Dst & X86_SEL_RPL) < (u16Src & X86_SEL_RPL))
3095 {
3096 *pu16Dst &= X86_SEL_MASK_OFF_RPL;
3097 *pu16Dst |= u16Src & X86_SEL_RPL;
3098
3099 *pfEFlags |= X86_EFL_ZF;
3100 }
3101 else
3102 *pfEFlags &= ~X86_EFL_ZF;
3103}
3104
3105
3106#if defined(IEM_WITHOUT_ASSEMBLY)
3107
3108/*********************************************************************************************************************************
3109* x87 FPU Loads *
3110*********************************************************************************************************************************/
3111
3112IEM_DECL_IMPL_DEF(void, iemAImpl_fld_r80_from_r32,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT32U pr32Val))
3113{
3114 pFpuRes->FSW = (7 << X86_FSW_TOP_SHIFT) | (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)); /* see iemAImpl_fld1 */
3115 if (RTFLOAT32U_IS_NORMAL(pr32Val))
3116 {
3117 pFpuRes->r80Result.sj64.fSign = pr32Val->s.fSign;
3118 pFpuRes->r80Result.sj64.fInteger = 1;
3119 pFpuRes->r80Result.sj64.uFraction = (uint64_t)pr32Val->s.uFraction
3120 << (RTFLOAT80U_FRACTION_BITS - RTFLOAT32U_FRACTION_BITS);
3121 pFpuRes->r80Result.sj64.uExponent = pr32Val->s.uExponent - RTFLOAT32U_EXP_BIAS + RTFLOAT80U_EXP_BIAS;
3122 Assert(RTFLOAT80U_IS_NORMAL(&pFpuRes->r80Result));
3123 }
3124 else if (RTFLOAT32U_IS_ZERO(pr32Val))
3125 {
3126 pFpuRes->r80Result.s.fSign = pr32Val->s.fSign;
3127 pFpuRes->r80Result.s.uExponent = 0;
3128 pFpuRes->r80Result.s.uMantissa = 0;
3129 Assert(RTFLOAT80U_IS_ZERO(&pFpuRes->r80Result));
3130 }
3131 else if (RTFLOAT32U_IS_SUBNORMAL(pr32Val))
3132 {
3133 /* Subnormal values gets normalized. */
3134 pFpuRes->r80Result.sj64.fSign = pr32Val->s.fSign;
3135 pFpuRes->r80Result.sj64.fInteger = 1;
3136 unsigned const cExtraShift = RTFLOAT32U_FRACTION_BITS - ASMBitLastSetU32(pr32Val->s.uFraction);
3137 pFpuRes->r80Result.sj64.uFraction = (uint64_t)pr32Val->s.uFraction
3138 << (RTFLOAT80U_FRACTION_BITS - RTFLOAT32U_FRACTION_BITS + cExtraShift + 1);
3139 pFpuRes->r80Result.sj64.uExponent = pr32Val->s.uExponent - RTFLOAT32U_EXP_BIAS + RTFLOAT80U_EXP_BIAS - cExtraShift;
3140 pFpuRes->FSW |= X86_FSW_DE;
3141 if (!(pFpuState->FCW & X86_FCW_DM))
3142 pFpuRes->FSW |= X86_FSW_ES | X86_FSW_B; /* The value is still pushed. */
3143 }
3144 else if (RTFLOAT32U_IS_INF(pr32Val))
3145 {
3146 pFpuRes->r80Result.s.fSign = pr32Val->s.fSign;
3147 pFpuRes->r80Result.s.uExponent = RTFLOAT80U_EXP_MAX;
3148 pFpuRes->r80Result.s.uMantissa = RT_BIT_64(63);
3149 Assert(RTFLOAT80U_IS_INF(&pFpuRes->r80Result));
3150 }
3151 else
3152 {
3153 /* Signalling and quiet NaNs, both turn into quiet ones when loaded (weird). */
3154 Assert(RTFLOAT32U_IS_NAN(pr32Val));
3155 pFpuRes->r80Result.sj64.fSign = pr32Val->s.fSign;
3156 pFpuRes->r80Result.sj64.uExponent = RTFLOAT80U_EXP_MAX;
3157 pFpuRes->r80Result.sj64.fInteger = 1;
3158 pFpuRes->r80Result.sj64.uFraction = (uint64_t)pr32Val->s.uFraction
3159 << (RTFLOAT80U_FRACTION_BITS - RTFLOAT32U_FRACTION_BITS);
3160 if (RTFLOAT32U_IS_SIGNALLING_NAN(pr32Val))
3161 {
3162 pFpuRes->r80Result.sj64.uFraction |= RT_BIT_64(62); /* make quiet */
3163 Assert(RTFLOAT80U_IS_QUIET_NAN(&pFpuRes->r80Result));
3164 pFpuRes->FSW |= X86_FSW_IE;
3165
3166 if (!(pFpuState->FCW & X86_FCW_IM))
3167 {
3168 /* The value is not pushed. */
3169 pFpuRes->FSW &= ~X86_FSW_TOP_MASK;
3170 pFpuRes->FSW |= X86_FSW_ES | X86_FSW_B;
3171 pFpuRes->r80Result.au64[0] = 0;
3172 pFpuRes->r80Result.au16[4] = 0;
3173 }
3174 }
3175 else
3176 Assert(RTFLOAT80U_IS_QUIET_NAN(&pFpuRes->r80Result));
3177 }
3178}
3179
3180
3181IEM_DECL_IMPL_DEF(void, iemAImpl_fld_r80_from_r64,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT64U pr64Val))
3182{
3183 pFpuRes->FSW = (7 << X86_FSW_TOP_SHIFT) | (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)); /* see iemAImpl_fld1 */
3184 if (RTFLOAT64U_IS_NORMAL(pr64Val))
3185 {
3186 pFpuRes->r80Result.sj64.fSign = pr64Val->s.fSign;
3187 pFpuRes->r80Result.sj64.fInteger = 1;
3188 pFpuRes->r80Result.sj64.uFraction = pr64Val->s64.uFraction << (RTFLOAT80U_FRACTION_BITS - RTFLOAT64U_FRACTION_BITS);
3189 pFpuRes->r80Result.sj64.uExponent = pr64Val->s.uExponent - RTFLOAT64U_EXP_BIAS + RTFLOAT80U_EXP_BIAS;
3190 Assert(RTFLOAT80U_IS_NORMAL(&pFpuRes->r80Result));
3191 }
3192 else if (RTFLOAT64U_IS_ZERO(pr64Val))
3193 {
3194 pFpuRes->r80Result.s.fSign = pr64Val->s.fSign;
3195 pFpuRes->r80Result.s.uExponent = 0;
3196 pFpuRes->r80Result.s.uMantissa = 0;
3197 Assert(RTFLOAT80U_IS_ZERO(&pFpuRes->r80Result));
3198 }
3199 else if (RTFLOAT64U_IS_SUBNORMAL(pr64Val))
3200 {
3201 /* Subnormal values gets normalized. */
3202 pFpuRes->r80Result.sj64.fSign = pr64Val->s.fSign;
3203 pFpuRes->r80Result.sj64.fInteger = 1;
3204 unsigned const cExtraShift = RTFLOAT64U_FRACTION_BITS - ASMBitLastSetU64(pr64Val->s64.uFraction);
3205 pFpuRes->r80Result.sj64.uFraction = pr64Val->s64.uFraction
3206 << (RTFLOAT80U_FRACTION_BITS - RTFLOAT64U_FRACTION_BITS + cExtraShift + 1);
3207 pFpuRes->r80Result.sj64.uExponent = pr64Val->s.uExponent - RTFLOAT64U_EXP_BIAS + RTFLOAT80U_EXP_BIAS - cExtraShift;
3208 pFpuRes->FSW |= X86_FSW_DE;
3209 if (!(pFpuState->FCW & X86_FCW_DM))
3210 pFpuRes->FSW |= X86_FSW_ES | X86_FSW_B; /* The value is still pushed. */
3211 }
3212 else if (RTFLOAT64U_IS_INF(pr64Val))
3213 {
3214 pFpuRes->r80Result.s.fSign = pr64Val->s.fSign;
3215 pFpuRes->r80Result.s.uExponent = RTFLOAT80U_EXP_MAX;
3216 pFpuRes->r80Result.s.uMantissa = RT_BIT_64(63);
3217 Assert(RTFLOAT80U_IS_INF(&pFpuRes->r80Result));
3218 }
3219 else
3220 {
3221 /* Signalling and quiet NaNs, both turn into quiet ones when loaded (weird). */
3222 Assert(RTFLOAT64U_IS_NAN(pr64Val));
3223 pFpuRes->r80Result.sj64.fSign = pr64Val->s.fSign;
3224 pFpuRes->r80Result.sj64.uExponent = RTFLOAT80U_EXP_MAX;
3225 pFpuRes->r80Result.sj64.fInteger = 1;
3226 pFpuRes->r80Result.sj64.uFraction = pr64Val->s64.uFraction << (RTFLOAT80U_FRACTION_BITS - RTFLOAT64U_FRACTION_BITS);
3227 if (RTFLOAT64U_IS_SIGNALLING_NAN(pr64Val))
3228 {
3229 pFpuRes->r80Result.sj64.uFraction |= RT_BIT_64(62); /* make quiet */
3230 Assert(RTFLOAT80U_IS_QUIET_NAN(&pFpuRes->r80Result));
3231 pFpuRes->FSW |= X86_FSW_IE;
3232
3233 if (!(pFpuState->FCW & X86_FCW_IM))
3234 {
3235 /* The value is not pushed. */
3236 pFpuRes->FSW &= ~X86_FSW_TOP_MASK;
3237 pFpuRes->FSW |= X86_FSW_ES | X86_FSW_B;
3238 pFpuRes->r80Result.au64[0] = 0;
3239 pFpuRes->r80Result.au16[4] = 0;
3240 }
3241 }
3242 else
3243 Assert(RTFLOAT80U_IS_QUIET_NAN(&pFpuRes->r80Result));
3244 }
3245}
3246
3247
3248IEM_DECL_IMPL_DEF(void, iemAImpl_fld_r80_from_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
3249{
3250 pFpuRes->r80Result.au64[0] = pr80Val->au64[0];
3251 pFpuRes->r80Result.au16[4] = pr80Val->au16[4];
3252 /* Raises no exceptions. */
3253 pFpuRes->FSW = (7 << X86_FSW_TOP_SHIFT) | (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)); /* see iemAImpl_fld1 */
3254}
3255
3256
3257IEM_DECL_IMPL_DEF(void, iemAImpl_fld1,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes))
3258{
3259 pFpuRes->r80Result.sj64.fSign = 0;
3260 pFpuRes->r80Result.sj64.uExponent = 0 + 16383;
3261 pFpuRes->r80Result.sj64.fInteger = 1;
3262 pFpuRes->r80Result.sj64.uFraction = 0;
3263
3264 /*
3265 * FPU status word:
3266 * - TOP is irrelevant, but we must match x86 assembly version.
3267 * - C1 is always cleared as we don't have any stack overflows.
3268 * - C0, C2, and C3 are undefined and Intel 10980XE does not touch them.
3269 */
3270 pFpuRes->FSW = (7 << X86_FSW_TOP_SHIFT) | (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3));
3271}
3272
3273
3274IEM_DECL_IMPL_DEF(void, iemAImpl_fldl2e,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes))
3275{
3276 pFpuRes->r80Result.sj64.fSign = 0;
3277 pFpuRes->r80Result.sj64.uExponent = 0 + 16383;
3278 pFpuRes->r80Result.sj64.fInteger = 1;
3279 pFpuRes->r80Result.sj64.uFraction = (pFpuState->FCW & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST
3280 || (pFpuState->FCW & X86_FCW_RC_MASK) == X86_FCW_RC_UP
3281 ? UINT64_C(0x38aa3b295c17f0bc) : UINT64_C(0x38aa3b295c17f0bb);
3282 pFpuRes->FSW = (7 << X86_FSW_TOP_SHIFT) | (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)); /* see iemAImpl_fld1 */
3283}
3284
3285
3286IEM_DECL_IMPL_DEF(void, iemAImpl_fldl2t,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes))
3287{
3288 pFpuRes->r80Result.sj64.fSign = 0;
3289 pFpuRes->r80Result.sj64.uExponent = 1 + 16383;
3290 pFpuRes->r80Result.sj64.fInteger = 1;
3291 pFpuRes->r80Result.sj64.uFraction = (pFpuState->FCW & X86_FCW_RC_MASK) != X86_FCW_RC_UP
3292 ? UINT64_C(0x549a784bcd1b8afe) : UINT64_C(0x549a784bcd1b8aff);
3293 pFpuRes->FSW = (7 << X86_FSW_TOP_SHIFT) | (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)); /* see iemAImpl_fld1 */
3294}
3295
3296
3297IEM_DECL_IMPL_DEF(void, iemAImpl_fldlg2,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes))
3298{
3299 pFpuRes->r80Result.sj64.fSign = 0;
3300 pFpuRes->r80Result.sj64.uExponent = -2 + 16383;
3301 pFpuRes->r80Result.sj64.fInteger = 1;
3302 pFpuRes->r80Result.sj64.uFraction = (pFpuState->FCW & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST
3303 || (pFpuState->FCW & X86_FCW_RC_MASK) == X86_FCW_RC_UP
3304 ? UINT64_C(0x1a209a84fbcff799) : UINT64_C(0x1a209a84fbcff798);
3305 pFpuRes->FSW = (7 << X86_FSW_TOP_SHIFT) | (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)); /* see iemAImpl_fld1 */
3306}
3307
3308
3309IEM_DECL_IMPL_DEF(void, iemAImpl_fldln2,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes))
3310{
3311 pFpuRes->r80Result.sj64.fSign = 0;
3312 pFpuRes->r80Result.sj64.uExponent = -1 + 16383;
3313 pFpuRes->r80Result.sj64.fInteger = 1;
3314 pFpuRes->r80Result.sj64.uFraction = (pFpuState->FCW & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST
3315 || (pFpuState->FCW & X86_FCW_RC_MASK) == X86_FCW_RC_UP
3316 ? UINT64_C(0x317217f7d1cf79ac) : UINT64_C(0x317217f7d1cf79ab);
3317 pFpuRes->FSW = (7 << X86_FSW_TOP_SHIFT) | (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)); /* see iemAImpl_fld1 */
3318}
3319
3320
3321IEM_DECL_IMPL_DEF(void, iemAImpl_fldpi,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes))
3322{
3323 pFpuRes->r80Result.sj64.fSign = 0;
3324 pFpuRes->r80Result.sj64.uExponent = 1 + 16383;
3325 pFpuRes->r80Result.sj64.fInteger = 1;
3326 pFpuRes->r80Result.sj64.uFraction = (pFpuState->FCW & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST
3327 || (pFpuState->FCW & X86_FCW_RC_MASK) == X86_FCW_RC_UP
3328 ? UINT64_C(0x490fdaa22168c235) : UINT64_C(0x490fdaa22168c234);
3329 pFpuRes->FSW = (7 << X86_FSW_TOP_SHIFT) | (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)); /* see iemAImpl_fld1 */
3330}
3331
3332
3333IEM_DECL_IMPL_DEF(void, iemAImpl_fldz,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes))
3334{
3335 pFpuRes->r80Result.s.fSign = 0;
3336 pFpuRes->r80Result.s.uExponent = 0;
3337 pFpuRes->r80Result.s.uMantissa = 0;
3338 pFpuRes->FSW = (7 << X86_FSW_TOP_SHIFT) | (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)); /* see iemAImpl_fld1 */
3339}
3340
3341#define EMIT_FILD(a_cBits) \
3342IEM_DECL_IMPL_DEF(void, iemAImpl_fild_r80_from_i ## a_cBits,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, \
3343 int ## a_cBits ## _t const *piVal)) \
3344{ \
3345 int ## a_cBits ## _t iVal = *piVal; \
3346 if (iVal == 0) \
3347 { \
3348 pFpuRes->r80Result.s.fSign = 0; \
3349 pFpuRes->r80Result.s.uExponent = 0; \
3350 pFpuRes->r80Result.s.uMantissa = 0; \
3351 } \
3352 else \
3353 { \
3354 if (iVal > 0) \
3355 pFpuRes->r80Result.s.fSign = 0; \
3356 else \
3357 { \
3358 pFpuRes->r80Result.s.fSign = 1; \
3359 iVal = -iVal; \
3360 } \
3361 unsigned const cBits = ASMBitLastSetU ## a_cBits((uint ## a_cBits ## _t)iVal); \
3362 pFpuRes->r80Result.s.uExponent = cBits - 1 + RTFLOAT80U_EXP_BIAS; \
3363 pFpuRes->r80Result.s.uMantissa = (uint64_t)iVal << (RTFLOAT80U_FRACTION_BITS + 1 - cBits); \
3364 } \
3365 pFpuRes->FSW = (7 << X86_FSW_TOP_SHIFT) | (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)); /* see iemAImpl_fld1 */ \
3366}
3367EMIT_FILD(16)
3368EMIT_FILD(32)
3369EMIT_FILD(64)
3370
3371
3372IEM_DECL_IMPL_DEF(void, iemAImpl_fld_r80_from_d80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTPBCD80U pd80Val))
3373{
3374 pFpuRes->FSW = (7 << X86_FSW_TOP_SHIFT) | (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)); /* see iemAImpl_fld1 */
3375 if ( pd80Val->s.abPairs[0] == 0
3376 && pd80Val->s.abPairs[1] == 0
3377 && pd80Val->s.abPairs[2] == 0
3378 && pd80Val->s.abPairs[3] == 0
3379 && pd80Val->s.abPairs[4] == 0
3380 && pd80Val->s.abPairs[5] == 0
3381 && pd80Val->s.abPairs[6] == 0
3382 && pd80Val->s.abPairs[7] == 0
3383 && pd80Val->s.abPairs[8] == 0)
3384 {
3385 pFpuRes->r80Result.s.fSign = pd80Val->s.fSign;
3386 pFpuRes->r80Result.s.uExponent = 0;
3387 pFpuRes->r80Result.s.uMantissa = 0;
3388 }
3389 else
3390 {
3391 pFpuRes->r80Result.s.fSign = pd80Val->s.fSign;
3392
3393 size_t cPairs = RT_ELEMENTS(pd80Val->s.abPairs);
3394 while (cPairs > 0 && pd80Val->s.abPairs[cPairs - 1] == 0)
3395 cPairs--;
3396
3397 uint64_t uVal = 0;
3398 uint64_t uFactor = 1;
3399 for (size_t iPair = 0; iPair < cPairs; iPair++, uFactor *= 100)
3400 uVal += RTPBCD80U_LO_DIGIT(pd80Val->s.abPairs[iPair]) * uFactor
3401 + RTPBCD80U_HI_DIGIT(pd80Val->s.abPairs[iPair]) * uFactor * 10;
3402
3403 unsigned const cBits = ASMBitLastSetU64(uVal);
3404 pFpuRes->r80Result.s.uExponent = cBits - 1 + RTFLOAT80U_EXP_BIAS;
3405 pFpuRes->r80Result.s.uMantissa = uVal << (RTFLOAT80U_FRACTION_BITS + 1 - cBits);
3406 }
3407}
3408
3409
3410/*********************************************************************************************************************************
3411* x87 FPU Stores *
3412*********************************************************************************************************************************/
3413
3414/**
3415 * Helper for storing a deconstructed and normal R80 value as a 64-bit one.
3416 *
3417 * This uses the rounding rules indicated by fFcw and returns updated fFsw.
3418 *
3419 * @returns Updated FPU status word value.
3420 * @param fSignIn Incoming sign indicator.
3421 * @param uMantissaIn Incoming mantissa (dot between bit 63 and 62).
3422 * @param iExponentIn Unbiased exponent.
3423 * @param fFcw The FPU control word.
3424 * @param fFsw Prepped FPU status word, i.e. exceptions and C1 clear.
3425 * @param pr32Dst Where to return the output value, if one should be
3426 * returned.
3427 *
3428 * @note Tailored as a helper for iemAImpl_fst_r80_to_r32 right now.
3429 * @note Exact same logic as iemAImpl_StoreNormalR80AsR64.
3430 */
3431static uint16_t iemAImpl_StoreNormalR80AsR32(bool fSignIn, uint64_t uMantissaIn, int32_t iExponentIn,
3432 uint16_t fFcw, uint16_t fFsw, PRTFLOAT32U pr32Dst)
3433{
3434 uint64_t const fRoundingOffMask = RT_BIT_64(RTFLOAT80U_FRACTION_BITS - RTFLOAT32U_FRACTION_BITS) - 1; /* 0x7ff */
3435 uint64_t const uRoundingAdd = (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST
3436 ? RT_BIT_64(RTFLOAT80U_FRACTION_BITS - RTFLOAT32U_FRACTION_BITS - 1) /* 0x400 */
3437 : (fFcw & X86_FCW_RC_MASK) == (fSignIn ? X86_FCW_RC_DOWN : X86_FCW_RC_UP)
3438 ? fRoundingOffMask
3439 : 0;
3440 uint64_t fRoundedOff = uMantissaIn & fRoundingOffMask;
3441
3442 /*
3443 * Deal with potential overflows/underflows first, optimizing for none.
3444 * 0 and MAX are used for special values; MAX-1 may be rounded up to MAX.
3445 */
3446 int32_t iExponentOut = (int32_t)iExponentIn + RTFLOAT32U_EXP_BIAS;
3447 if ((uint32_t)iExponentOut - 1 < (uint32_t)(RTFLOAT32U_EXP_MAX - 3))
3448 { /* likely? */ }
3449 /*
3450 * Underflow if the exponent zero or negative. This is attempted mapped
3451 * to a subnormal number when possible, with some additional trickery ofc.
3452 */
3453 else if (iExponentOut <= 0)
3454 {
3455 bool const fIsTiny = iExponentOut < 0
3456 || UINT64_MAX - uMantissaIn > uRoundingAdd;
3457 if (!(fFcw & X86_FCW_UM) && fIsTiny)
3458 /* Note! 754-1985 sec 7.4 has something about bias adjust of 192 here, not in 2008 & 2019. Perhaps only 8087 & 287? */
3459 return fFsw | X86_FSW_UE | X86_FSW_ES | X86_FSW_B;
3460
3461 if (iExponentOut <= 0)
3462 {
3463 uMantissaIn = iExponentOut <= -63
3464 ? uMantissaIn != 0
3465 : (uMantissaIn >> (-iExponentOut + 1)) | ((uMantissaIn & (RT_BIT_64(-iExponentOut + 1) - 1)) != 0);
3466 fRoundedOff = uMantissaIn & fRoundingOffMask;
3467 if (fRoundedOff && fIsTiny)
3468 fFsw |= X86_FSW_UE;
3469 iExponentOut = 0;
3470 }
3471 }
3472 /*
3473 * Overflow if at or above max exponent value or if we will reach max
3474 * when rounding. Will return +/-zero or +/-max value depending on
3475 * whether we're rounding or not.
3476 */
3477 else if ( iExponentOut >= RTFLOAT32U_EXP_MAX
3478 || ( iExponentOut == RTFLOAT32U_EXP_MAX - 1
3479 && UINT64_MAX - uMantissaIn <= uRoundingAdd))
3480 {
3481 fFsw |= X86_FSW_OE;
3482 if (!(fFcw & X86_FCW_OM))
3483 return fFsw | X86_FSW_ES | X86_FSW_B;
3484 fFsw |= X86_FSW_PE;
3485 if (uRoundingAdd)
3486 fFsw |= X86_FSW_C1;
3487 if (!(fFcw & X86_FCW_PM))
3488 fFsw |= X86_FSW_ES | X86_FSW_B;
3489
3490 pr32Dst->s.fSign = fSignIn;
3491 if (uRoundingAdd)
3492 { /* Zero */
3493 pr32Dst->s.uExponent = RTFLOAT32U_EXP_MAX;
3494 pr32Dst->s.uFraction = 0;
3495 }
3496 else
3497 { /* Max */
3498 pr32Dst->s.uExponent = RTFLOAT32U_EXP_MAX - 1;
3499 pr32Dst->s.uFraction = RT_BIT_32(RTFLOAT32U_FRACTION_BITS) - 1;
3500 }
3501 return fFsw;
3502 }
3503
3504 /*
3505 * Normal or subnormal number.
3506 */
3507 /* Do rounding - just truncate in near mode when midway on an even outcome. */
3508 uint64_t uMantissaOut = uMantissaIn;
3509 if ( (fFcw & X86_FCW_RC_MASK) != X86_FCW_RC_NEAREST
3510 || (uMantissaIn & RT_BIT_64(RTFLOAT80U_FRACTION_BITS - RTFLOAT32U_FRACTION_BITS))
3511 || fRoundedOff != uRoundingAdd)
3512 {
3513 uMantissaOut = uMantissaIn + uRoundingAdd;
3514 if (uMantissaOut >= uMantissaIn)
3515 { /* likely */ }
3516 else
3517 {
3518 uMantissaOut >>= 1; /* (We don't need to add bit 63 here (the integer bit), as it will be chopped off below.) */
3519 iExponentOut++;
3520 Assert(iExponentOut < RTFLOAT32U_EXP_MAX); /* checked above */
3521 fFsw |= X86_FSW_C1;
3522 }
3523 }
3524 else
3525 uMantissaOut = uMantissaIn;
3526
3527 /* Truncate the mantissa and set the return value. */
3528 uMantissaOut >>= RTFLOAT80U_FRACTION_BITS - RTFLOAT32U_FRACTION_BITS;
3529
3530 pr32Dst->s.uFraction = (uint32_t)uMantissaOut; /* Note! too big for bitfield if normal. */
3531 pr32Dst->s.uExponent = iExponentOut;
3532 pr32Dst->s.fSign = fSignIn;
3533
3534 /* Set status flags realted to rounding. */
3535 if (fRoundedOff)
3536 {
3537 fFsw |= X86_FSW_PE;
3538 if (uMantissaOut > (uMantissaIn >> (RTFLOAT80U_FRACTION_BITS - RTFLOAT32U_FRACTION_BITS)))
3539 fFsw |= X86_FSW_C1;
3540 if (!(fFcw & X86_FCW_PM))
3541 fFsw |= X86_FSW_ES | X86_FSW_B;
3542 }
3543
3544 return fFsw;
3545}
3546
3547
3548/**
3549 * @note Exact same logic as iemAImpl_fst_r80_to_r64.
3550 */
3551IEM_DECL_IMPL_DEF(void, iemAImpl_fst_r80_to_r32,(PCX86FXSTATE pFpuState, uint16_t *pu16FSW,
3552 PRTFLOAT32U pr32Dst, PCRTFLOAT80U pr80Src))
3553{
3554 uint16_t const fFcw = pFpuState->FCW;
3555 uint16_t fFsw = (7 << X86_FSW_TOP_SHIFT) | (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3));
3556 if (RTFLOAT80U_IS_NORMAL(pr80Src))
3557 fFsw = iemAImpl_StoreNormalR80AsR32(pr80Src->s.fSign, pr80Src->s.uMantissa,
3558 (int32_t)pr80Src->s.uExponent - RTFLOAT80U_EXP_BIAS, fFcw, fFsw, pr32Dst);
3559 else if (RTFLOAT80U_IS_ZERO(pr80Src))
3560 {
3561 pr32Dst->s.fSign = pr80Src->s.fSign;
3562 pr32Dst->s.uExponent = 0;
3563 pr32Dst->s.uFraction = 0;
3564 Assert(RTFLOAT32U_IS_ZERO(pr32Dst));
3565 }
3566 else if (RTFLOAT80U_IS_INF(pr80Src))
3567 {
3568 pr32Dst->s.fSign = pr80Src->s.fSign;
3569 pr32Dst->s.uExponent = RTFLOAT32U_EXP_MAX;
3570 pr32Dst->s.uFraction = 0;
3571 Assert(RTFLOAT32U_IS_INF(pr32Dst));
3572 }
3573 else if (RTFLOAT80U_IS_INDEFINITE(pr80Src))
3574 {
3575 /* Mapped to +/-QNaN */
3576 pr32Dst->s.fSign = pr80Src->s.fSign;
3577 pr32Dst->s.uExponent = RTFLOAT32U_EXP_MAX;
3578 pr32Dst->s.uFraction = RT_BIT_32(RTFLOAT32U_FRACTION_BITS - 1);
3579 }
3580 else if (RTFLOAT80U_IS_PSEUDO_INF(pr80Src) || RTFLOAT80U_IS_UNNORMAL(pr80Src) || RTFLOAT80U_IS_PSEUDO_NAN(pr80Src))
3581 {
3582 /* Pseudo-Inf / Pseudo-Nan / Unnormal -> QNaN (during load, probably) */
3583 if (fFcw & X86_FCW_IM)
3584 {
3585 pr32Dst->s.fSign = 1;
3586 pr32Dst->s.uExponent = RTFLOAT32U_EXP_MAX;
3587 pr32Dst->s.uFraction = RT_BIT_32(RTFLOAT32U_FRACTION_BITS - 1);
3588 fFsw |= X86_FSW_IE;
3589 }
3590 else
3591 fFsw |= X86_FSW_IE | X86_FSW_ES | X86_FSW_B;;
3592 }
3593 else if (RTFLOAT80U_IS_NAN(pr80Src))
3594 {
3595 /* IM applies to signalled NaN input only. Everything is converted to quiet NaN. */
3596 if ((fFcw & X86_FCW_IM) || !RTFLOAT80U_IS_SIGNALLING_NAN(pr80Src))
3597 {
3598 pr32Dst->s.fSign = pr80Src->s.fSign;
3599 pr32Dst->s.uExponent = RTFLOAT32U_EXP_MAX;
3600 pr32Dst->s.uFraction = (uint32_t)(pr80Src->sj64.uFraction >> (RTFLOAT80U_FRACTION_BITS - RTFLOAT32U_FRACTION_BITS));
3601 pr32Dst->s.uFraction |= RT_BIT_32(RTFLOAT32U_FRACTION_BITS - 1);
3602 if (RTFLOAT80U_IS_SIGNALLING_NAN(pr80Src))
3603 fFsw |= X86_FSW_IE;
3604 }
3605 else
3606 fFsw |= X86_FSW_IE | X86_FSW_ES | X86_FSW_B;
3607 }
3608 else
3609 {
3610 /* Denormal values causes both an underflow and precision exception. */
3611 Assert(RTFLOAT80U_IS_DENORMAL(pr80Src) || RTFLOAT80U_IS_PSEUDO_DENORMAL(pr80Src));
3612 if (fFcw & X86_FCW_UM)
3613 {
3614 pr32Dst->s.fSign = pr80Src->s.fSign;
3615 pr32Dst->s.uExponent = 0;
3616 if ((fFcw & X86_FCW_RC_MASK) == (!pr80Src->s.fSign ? X86_FCW_RC_UP : X86_FCW_RC_DOWN))
3617 {
3618 pr32Dst->s.uFraction = 1;
3619 fFsw |= X86_FSW_UE | X86_FSW_PE | X86_FSW_C1;
3620 if (!(fFcw & X86_FCW_PM))
3621 fFsw |= X86_FSW_ES | X86_FSW_B;
3622 }
3623 else
3624 {
3625 pr32Dst->s.uFraction = 0;
3626 fFsw |= X86_FSW_UE | X86_FSW_PE;
3627 if (!(fFcw & X86_FCW_PM))
3628 fFsw |= X86_FSW_ES | X86_FSW_B;
3629 }
3630 }
3631 else
3632 fFsw |= X86_FSW_UE | X86_FSW_ES | X86_FSW_B;
3633 }
3634 *pu16FSW = fFsw;
3635}
3636
3637
3638/**
3639 * Helper for storing a deconstructed and normal R80 value as a 64-bit one.
3640 *
3641 * This uses the rounding rules indicated by fFcw and returns updated fFsw.
3642 *
3643 * @returns Updated FPU status word value.
3644 * @param fSignIn Incoming sign indicator.
3645 * @param uMantissaIn Incoming mantissa (dot between bit 63 and 62).
3646 * @param iExponentIn Unbiased exponent.
3647 * @param fFcw The FPU control word.
3648 * @param fFsw Prepped FPU status word, i.e. exceptions and C1 clear.
3649 * @param pr64Dst Where to return the output value, if one should be
3650 * returned.
3651 *
3652 * @note Tailored as a helper for iemAImpl_fst_r80_to_r64 right now.
3653 * @note Exact same logic as iemAImpl_StoreNormalR80AsR32.
3654 */
3655static uint16_t iemAImpl_StoreNormalR80AsR64(bool fSignIn, uint64_t uMantissaIn, int32_t iExponentIn,
3656 uint16_t fFcw, uint16_t fFsw, PRTFLOAT64U pr64Dst)
3657{
3658 uint64_t const fRoundingOffMask = RT_BIT_64(RTFLOAT80U_FRACTION_BITS - RTFLOAT64U_FRACTION_BITS) - 1; /* 0x7ff */
3659 uint32_t const uRoundingAdd = (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST
3660 ? RT_BIT_64(RTFLOAT80U_FRACTION_BITS - RTFLOAT64U_FRACTION_BITS - 1) /* 0x400 */
3661 : (fFcw & X86_FCW_RC_MASK) == (fSignIn ? X86_FCW_RC_DOWN : X86_FCW_RC_UP)
3662 ? fRoundingOffMask
3663 : 0;
3664 uint32_t fRoundedOff = uMantissaIn & fRoundingOffMask;
3665
3666 /*
3667 * Deal with potential overflows/underflows first, optimizing for none.
3668 * 0 and MAX are used for special values; MAX-1 may be rounded up to MAX.
3669 */
3670 int32_t iExponentOut = (int32_t)iExponentIn + RTFLOAT64U_EXP_BIAS;
3671 if ((uint32_t)iExponentOut - 1 < (uint32_t)(RTFLOAT64U_EXP_MAX - 3))
3672 { /* likely? */ }
3673 /*
3674 * Underflow if the exponent zero or negative. This is attempted mapped
3675 * to a subnormal number when possible, with some additional trickery ofc.
3676 */
3677 else if (iExponentOut <= 0)
3678 {
3679 bool const fIsTiny = iExponentOut < 0
3680 || UINT64_MAX - uMantissaIn > uRoundingAdd;
3681 if (!(fFcw & X86_FCW_UM) && fIsTiny)
3682 /* Note! 754-1985 sec 7.4 has something about bias adjust of 1536 here, not in 2008 & 2019. Perhaps only 8087 & 287? */
3683 return fFsw | X86_FSW_UE | X86_FSW_ES | X86_FSW_B;
3684
3685 if (iExponentOut <= 0)
3686 {
3687 uMantissaIn = iExponentOut <= -63
3688 ? uMantissaIn != 0
3689 : (uMantissaIn >> (-iExponentOut + 1)) | ((uMantissaIn & (RT_BIT_64(-iExponentOut + 1) - 1)) != 0);
3690 fRoundedOff = uMantissaIn & fRoundingOffMask;
3691 if (fRoundedOff && fIsTiny)
3692 fFsw |= X86_FSW_UE;
3693 iExponentOut = 0;
3694 }
3695 }
3696 /*
3697 * Overflow if at or above max exponent value or if we will reach max
3698 * when rounding. Will return +/-zero or +/-max value depending on
3699 * whether we're rounding or not.
3700 */
3701 else if ( iExponentOut >= RTFLOAT64U_EXP_MAX
3702 || ( iExponentOut == RTFLOAT64U_EXP_MAX - 1
3703 && UINT64_MAX - uMantissaIn <= uRoundingAdd))
3704 {
3705 fFsw |= X86_FSW_OE;
3706 if (!(fFcw & X86_FCW_OM))
3707 return fFsw | X86_FSW_ES | X86_FSW_B;
3708 fFsw |= X86_FSW_PE;
3709 if (uRoundingAdd)
3710 fFsw |= X86_FSW_C1;
3711 if (!(fFcw & X86_FCW_PM))
3712 fFsw |= X86_FSW_ES | X86_FSW_B;
3713
3714 pr64Dst->s64.fSign = fSignIn;
3715 if (uRoundingAdd)
3716 { /* Zero */
3717 pr64Dst->s64.uExponent = RTFLOAT64U_EXP_MAX;
3718 pr64Dst->s64.uFraction = 0;
3719 }
3720 else
3721 { /* Max */
3722 pr64Dst->s64.uExponent = RTFLOAT64U_EXP_MAX - 1;
3723 pr64Dst->s64.uFraction = RT_BIT_64(RTFLOAT64U_FRACTION_BITS) - 1;
3724 }
3725 return fFsw;
3726 }
3727
3728 /*
3729 * Normal or subnormal number.
3730 */
3731 /* Do rounding - just truncate in near mode when midway on an even outcome. */
3732 uint64_t uMantissaOut = uMantissaIn;
3733 if ( (fFcw & X86_FCW_RC_MASK) != X86_FCW_RC_NEAREST
3734 || (uMantissaIn & RT_BIT_32(RTFLOAT80U_FRACTION_BITS - RTFLOAT64U_FRACTION_BITS))
3735 || fRoundedOff != uRoundingAdd)
3736 {
3737 uMantissaOut = uMantissaIn + uRoundingAdd;
3738 if (uMantissaOut >= uMantissaIn)
3739 { /* likely */ }
3740 else
3741 {
3742 uMantissaOut >>= 1; /* (We don't need to add bit 63 here (the integer bit), as it will be chopped off below.) */
3743 iExponentOut++;
3744 Assert(iExponentOut < RTFLOAT64U_EXP_MAX); /* checked above */
3745 fFsw |= X86_FSW_C1;
3746 }
3747 }
3748 else
3749 uMantissaOut = uMantissaIn;
3750
3751 /* Truncate the mantissa and set the return value. */
3752 uMantissaOut >>= RTFLOAT80U_FRACTION_BITS - RTFLOAT64U_FRACTION_BITS;
3753
3754 pr64Dst->s64.uFraction = uMantissaOut; /* Note! too big for bitfield if normal. */
3755 pr64Dst->s64.uExponent = iExponentOut;
3756 pr64Dst->s64.fSign = fSignIn;
3757
3758 /* Set status flags realted to rounding. */
3759 if (fRoundedOff)
3760 {
3761 fFsw |= X86_FSW_PE;
3762 if (uMantissaOut > (uMantissaIn >> (RTFLOAT80U_FRACTION_BITS - RTFLOAT64U_FRACTION_BITS)))
3763 fFsw |= X86_FSW_C1;
3764 if (!(fFcw & X86_FCW_PM))
3765 fFsw |= X86_FSW_ES | X86_FSW_B;
3766 }
3767
3768 return fFsw;
3769}
3770
3771
3772/**
3773 * @note Exact same logic as iemAImpl_fst_r80_to_r32.
3774 */
3775IEM_DECL_IMPL_DEF(void, iemAImpl_fst_r80_to_r64,(PCX86FXSTATE pFpuState, uint16_t *pu16FSW,
3776 PRTFLOAT64U pr64Dst, PCRTFLOAT80U pr80Src))
3777{
3778 uint16_t const fFcw = pFpuState->FCW;
3779 uint16_t fFsw = (7 << X86_FSW_TOP_SHIFT) | (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3));
3780 if (RTFLOAT80U_IS_NORMAL(pr80Src))
3781 fFsw = iemAImpl_StoreNormalR80AsR64(pr80Src->s.fSign, pr80Src->s.uMantissa,
3782 (int32_t)pr80Src->s.uExponent - RTFLOAT80U_EXP_BIAS, fFcw, fFsw, pr64Dst);
3783 else if (RTFLOAT80U_IS_ZERO(pr80Src))
3784 {
3785 pr64Dst->s64.fSign = pr80Src->s.fSign;
3786 pr64Dst->s64.uExponent = 0;
3787 pr64Dst->s64.uFraction = 0;
3788 Assert(RTFLOAT64U_IS_ZERO(pr64Dst));
3789 }
3790 else if (RTFLOAT80U_IS_INF(pr80Src))
3791 {
3792 pr64Dst->s64.fSign = pr80Src->s.fSign;
3793 pr64Dst->s64.uExponent = RTFLOAT64U_EXP_MAX;
3794 pr64Dst->s64.uFraction = 0;
3795 Assert(RTFLOAT64U_IS_INF(pr64Dst));
3796 }
3797 else if (RTFLOAT80U_IS_INDEFINITE(pr80Src))
3798 {
3799 /* Mapped to +/-QNaN */
3800 pr64Dst->s64.fSign = pr80Src->s.fSign;
3801 pr64Dst->s64.uExponent = RTFLOAT64U_EXP_MAX;
3802 pr64Dst->s64.uFraction = RT_BIT_64(RTFLOAT64U_FRACTION_BITS - 1);
3803 }
3804 else if (RTFLOAT80U_IS_PSEUDO_INF(pr80Src) || RTFLOAT80U_IS_UNNORMAL(pr80Src) || RTFLOAT80U_IS_PSEUDO_NAN(pr80Src))
3805 {
3806 /* Pseudo-Inf / Pseudo-Nan / Unnormal -> QNaN (during load, probably) */
3807 if (fFcw & X86_FCW_IM)
3808 {
3809 pr64Dst->s64.fSign = 1;
3810 pr64Dst->s64.uExponent = RTFLOAT64U_EXP_MAX;
3811 pr64Dst->s64.uFraction = RT_BIT_64(RTFLOAT64U_FRACTION_BITS - 1);
3812 fFsw |= X86_FSW_IE;
3813 }
3814 else
3815 fFsw |= X86_FSW_IE | X86_FSW_ES | X86_FSW_B;;
3816 }
3817 else if (RTFLOAT80U_IS_NAN(pr80Src))
3818 {
3819 /* IM applies to signalled NaN input only. Everything is converted to quiet NaN. */
3820 if ((fFcw & X86_FCW_IM) || !RTFLOAT80U_IS_SIGNALLING_NAN(pr80Src))
3821 {
3822 pr64Dst->s64.fSign = pr80Src->s.fSign;
3823 pr64Dst->s64.uExponent = RTFLOAT64U_EXP_MAX;
3824 pr64Dst->s64.uFraction = pr80Src->sj64.uFraction >> (RTFLOAT80U_FRACTION_BITS - RTFLOAT64U_FRACTION_BITS);
3825 pr64Dst->s64.uFraction |= RT_BIT_64(RTFLOAT64U_FRACTION_BITS - 1);
3826 if (RTFLOAT80U_IS_SIGNALLING_NAN(pr80Src))
3827 fFsw |= X86_FSW_IE;
3828 }
3829 else
3830 fFsw |= X86_FSW_IE | X86_FSW_ES | X86_FSW_B;
3831 }
3832 else
3833 {
3834 /* Denormal values causes both an underflow and precision exception. */
3835 Assert(RTFLOAT80U_IS_DENORMAL(pr80Src) || RTFLOAT80U_IS_PSEUDO_DENORMAL(pr80Src));
3836 if (fFcw & X86_FCW_UM)
3837 {
3838 pr64Dst->s64.fSign = pr80Src->s.fSign;
3839 pr64Dst->s64.uExponent = 0;
3840 if ((fFcw & X86_FCW_RC_MASK) == (!pr80Src->s.fSign ? X86_FCW_RC_UP : X86_FCW_RC_DOWN))
3841 {
3842 pr64Dst->s64.uFraction = 1;
3843 fFsw |= X86_FSW_UE | X86_FSW_PE | X86_FSW_C1;
3844 if (!(fFcw & X86_FCW_PM))
3845 fFsw |= X86_FSW_ES | X86_FSW_B;
3846 }
3847 else
3848 {
3849 pr64Dst->s64.uFraction = 0;
3850 fFsw |= X86_FSW_UE | X86_FSW_PE;
3851 if (!(fFcw & X86_FCW_PM))
3852 fFsw |= X86_FSW_ES | X86_FSW_B;
3853 }
3854 }
3855 else
3856 fFsw |= X86_FSW_UE | X86_FSW_ES | X86_FSW_B;
3857 }
3858 *pu16FSW = fFsw;
3859}
3860
3861
3862IEM_DECL_IMPL_DEF(void, iemAImpl_fst_r80_to_r80,(PCX86FXSTATE pFpuState, uint16_t *pu16FSW,
3863 PRTFLOAT80U pr80Dst, PCRTFLOAT80U pr80Src))
3864{
3865 /*
3866 * FPU status word:
3867 * - TOP is irrelevant, but we must match x86 assembly version (0).
3868 * - C1 is always cleared as we don't have any stack overflows.
3869 * - C0, C2, and C3 are undefined and Intel 10980XE does not touch them.
3870 */
3871 *pu16FSW = pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3); /* see iemAImpl_fld1 */
3872 *pr80Dst = *pr80Src;
3873}
3874
3875
3876/*
3877 *
3878 * Mantissa:
3879 * 63 56 48 40 32 24 16 8 0
3880 * v v v v v v v v v
3881 * 1[.]111 0000 1111 0000 1111 0000 1111 0000 1111 0000 1111 0000 1111 0000 1111 0000
3882 * \ \ \ \ \ \ \ \ \ \ \ \ \ \ \ \
3883 * Exp: 0 4 8 12 16 20 24 28 32 36 40 44 48 52 56 60
3884 *
3885 * int64_t has the same width, only bit 63 is the sign bit. So, the max we can map over
3886 * are bits 1 thru 63, dropping off bit 0, with an exponent of 62. The number of bits we
3887 * drop off from the mantissa increases with decreasing exponent, till an exponent of 0
3888 * where we'll drop off all but bit 63.
3889 */
3890#define EMIT_FIST(a_cBits, a_iType, a_iTypeMin, a_iTypeIndefinite) \
3891IEM_DECL_IMPL_DEF(void, iemAImpl_fist_r80_to_i ## a_cBits,(PCX86FXSTATE pFpuState, uint16_t *pu16FSW, \
3892 a_iType *piDst, PCRTFLOAT80U pr80Val)) \
3893{ \
3894 uint16_t const fFcw = pFpuState->FCW; \
3895 uint16_t fFsw = (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)); \
3896 bool const fSignIn = pr80Val->s.fSign; \
3897 \
3898 /* \
3899 * Deal with normal numbers first. \
3900 */ \
3901 if (RTFLOAT80U_IS_NORMAL(pr80Val)) \
3902 { \
3903 uint64_t uMantissa = pr80Val->s.uMantissa; \
3904 int32_t iExponent = (int32_t)pr80Val->s.uExponent - RTFLOAT80U_EXP_BIAS; \
3905 \
3906 if ((uint32_t)iExponent <= a_cBits - 2) \
3907 { \
3908 unsigned const cShiftOff = 63 - iExponent; \
3909 uint64_t const fRoundingOffMask = RT_BIT_64(cShiftOff) - 1; \
3910 uint64_t const uRoundingAdd = (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST \
3911 ? RT_BIT_64(cShiftOff - 1) \
3912 : (fFcw & X86_FCW_RC_MASK) == (fSignIn ? X86_FCW_RC_DOWN : X86_FCW_RC_UP) \
3913 ? fRoundingOffMask \
3914 : 0; \
3915 uint64_t fRoundedOff = uMantissa & fRoundingOffMask; \
3916 \
3917 uMantissa >>= cShiftOff; \
3918 uint64_t const uRounding = (fRoundedOff + uRoundingAdd) >> cShiftOff; \
3919 uMantissa += uRounding; \
3920 if (!(uMantissa & RT_BIT_64(a_cBits - 1))) \
3921 { \
3922 if (fRoundedOff) \
3923 { \
3924 if ((uMantissa & 1) && (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST && fRoundedOff == uRoundingAdd) \
3925 uMantissa &= ~(uint64_t)1; /* round to even number if equal distance between up/down. */ \
3926 else if (uRounding) \
3927 fFsw |= X86_FSW_C1; \
3928 fFsw |= X86_FSW_PE; \
3929 if (!(fFcw & X86_FCW_PM)) \
3930 fFsw |= X86_FSW_ES | X86_FSW_B; \
3931 } \
3932 \
3933 if (!fSignIn) \
3934 *piDst = (a_iType)uMantissa; \
3935 else \
3936 *piDst = -(a_iType)uMantissa; \
3937 } \
3938 else \
3939 { \
3940 /* overflowed after rounding. */ \
3941 AssertMsg(iExponent == a_cBits - 2 && uMantissa == RT_BIT_64(a_cBits - 1), \
3942 ("e=%d m=%#RX64 (org %#RX64) s=%d; shift=%d ro=%#RX64 rm=%#RX64 ra=%#RX64\n", iExponent, uMantissa, \
3943 pr80Val->s.uMantissa, fSignIn, cShiftOff, fRoundedOff, fRoundingOffMask, uRoundingAdd)); \
3944 \
3945 /* Special case for the integer minimum value. */ \
3946 if (fSignIn) \
3947 { \
3948 *piDst = a_iTypeMin; \
3949 fFsw |= X86_FSW_PE | X86_FSW_C1; \
3950 if (!(fFcw & X86_FCW_PM)) \
3951 fFsw |= X86_FSW_ES | X86_FSW_B; \
3952 } \
3953 else \
3954 { \
3955 fFsw |= X86_FSW_IE; \
3956 if (fFcw & X86_FCW_IM) \
3957 *piDst = a_iTypeMin; \
3958 else \
3959 fFsw |= X86_FSW_ES | X86_FSW_B | (7 << X86_FSW_TOP_SHIFT); \
3960 } \
3961 } \
3962 } \
3963 /* \
3964 * Tiny sub-zero numbers. \
3965 */ \
3966 else if (iExponent < 0) \
3967 { \
3968 if (!fSignIn) \
3969 { \
3970 if ( (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_UP \
3971 || (iExponent == -1 && (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST)) \
3972 { \
3973 *piDst = 1; \
3974 fFsw |= X86_FSW_C1; \
3975 } \
3976 else \
3977 *piDst = 0; \
3978 } \
3979 else \
3980 { \
3981 if ( (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_UP \
3982 || (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_ZERO \
3983 || (iExponent < -1 && (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST)) \
3984 *piDst = 0; \
3985 else \
3986 { \
3987 *piDst = -1; \
3988 fFsw |= X86_FSW_C1; \
3989 } \
3990 } \
3991 fFsw |= X86_FSW_PE; \
3992 if (!(fFcw & X86_FCW_PM)) \
3993 fFsw |= X86_FSW_ES | X86_FSW_B; \
3994 } \
3995 /* \
3996 * Special MIN case. \
3997 */ \
3998 else if ( fSignIn && iExponent == a_cBits - 1 \
3999 && ( a_cBits < 64 && (fFcw & X86_FCW_RC_MASK) != X86_FCW_RC_DOWN \
4000 ? uMantissa < (RT_BIT_64(63) | RT_BIT_64(65 - a_cBits)) \
4001 : uMantissa == RT_BIT_64(63))) \
4002 { \
4003 *piDst = a_iTypeMin; \
4004 if (uMantissa & (RT_BIT_64(64 - a_cBits + 1) - 1)) \
4005 { \
4006 fFsw |= X86_FSW_PE; \
4007 if (!(fFcw & X86_FCW_PM)) \
4008 fFsw |= X86_FSW_ES | X86_FSW_B; \
4009 } \
4010 } \
4011 /* \
4012 * Too large/small number outside the target integer range. \
4013 */ \
4014 else \
4015 { \
4016 fFsw |= X86_FSW_IE; \
4017 if (fFcw & X86_FCW_IM) \
4018 *piDst = a_iTypeIndefinite; \
4019 else \
4020 fFsw |= X86_FSW_ES | X86_FSW_B | (7 << X86_FSW_TOP_SHIFT); \
4021 } \
4022 } \
4023 /* \
4024 * Map both +0 and -0 to integer zero (signless/+). \
4025 */ \
4026 else if (RTFLOAT80U_IS_ZERO(pr80Val)) \
4027 *piDst = 0; \
4028 /* \
4029 * Denormals are just really tiny sub-zero numbers that are either rounded \
4030 * to zero, 1 or -1 depending on sign and rounding control. \
4031 */ \
4032 else if (RTFLOAT80U_IS_PSEUDO_DENORMAL(pr80Val) || RTFLOAT80U_IS_DENORMAL(pr80Val)) \
4033 { \
4034 if ((fFcw & X86_FCW_RC_MASK) != (fSignIn ? X86_FCW_RC_DOWN : X86_FCW_RC_UP)) \
4035 *piDst = 0; \
4036 else \
4037 { \
4038 *piDst = fSignIn ? -1 : 1; \
4039 fFsw |= X86_FSW_C1; \
4040 } \
4041 fFsw |= X86_FSW_PE; \
4042 if (!(fFcw & X86_FCW_PM)) \
4043 fFsw |= X86_FSW_ES | X86_FSW_B; \
4044 } \
4045 /* \
4046 * All other special values are considered invalid arguments and result \
4047 * in an IE exception and indefinite value if masked. \
4048 */ \
4049 else \
4050 { \
4051 fFsw |= X86_FSW_IE; \
4052 if (fFcw & X86_FCW_IM) \
4053 *piDst = a_iTypeIndefinite; \
4054 else \
4055 fFsw |= X86_FSW_ES | X86_FSW_B | (7 << X86_FSW_TOP_SHIFT); \
4056 } \
4057 *pu16FSW = fFsw; \
4058}
4059EMIT_FIST(64, int64_t, INT64_MIN, X86_FPU_INT64_INDEFINITE)
4060EMIT_FIST(32, int32_t, INT32_MIN, X86_FPU_INT32_INDEFINITE)
4061EMIT_FIST(16, int16_t, INT16_MIN, X86_FPU_INT16_INDEFINITE)
4062
4063#endif /*IEM_WITHOUT_ASSEMBLY */
4064
4065
4066/*
4067 * The FISTT instruction was added with SSE3 and are a lot simpler than FIST.
4068 *
4069 * The 16-bit version is a bit peculiar, though, as it seems to be raising IE
4070 * as if it was the 32-bit version (i.e. starting with exp 31 instead of 15),
4071 * thus the @a a_cBitsIn.
4072 */
4073#define EMIT_FISTT(a_cBits, a_cBitsIn, a_iType, a_iTypeMin, a_iTypeMax, a_iTypeIndefinite, a_Suffix, a_fIntelVersion) \
4074IEM_DECL_IMPL_DEF(void, RT_CONCAT3(iemAImpl_fistt_r80_to_i,a_cBits,a_Suffix),(PCX86FXSTATE pFpuState, uint16_t *pu16FSW, \
4075 a_iType *piDst, PCRTFLOAT80U pr80Val)) \
4076{ \
4077 uint16_t const fFcw = pFpuState->FCW; \
4078 uint16_t fFsw = (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)); \
4079 bool const fSignIn = pr80Val->s.fSign; \
4080 \
4081 /* \
4082 * Deal with normal numbers first. \
4083 */ \
4084 if (RTFLOAT80U_IS_NORMAL(pr80Val)) \
4085 { \
4086 uint64_t uMantissa = pr80Val->s.uMantissa; \
4087 int32_t iExponent = (int32_t)pr80Val->s.uExponent - RTFLOAT80U_EXP_BIAS; \
4088 \
4089 if ((uint32_t)iExponent <= a_cBitsIn - 2) \
4090 { \
4091 unsigned const cShiftOff = 63 - iExponent; \
4092 uint64_t const fRoundingOffMask = RT_BIT_64(cShiftOff) - 1; \
4093 uint64_t const fRoundedOff = uMantissa & fRoundingOffMask; \
4094 uMantissa >>= cShiftOff; \
4095 /*Assert(!(uMantissa & RT_BIT_64(a_cBits - 1)));*/ \
4096 if (!fSignIn) \
4097 *piDst = (a_iType)uMantissa; \
4098 else \
4099 *piDst = -(a_iType)uMantissa; \
4100 \
4101 if (fRoundedOff) \
4102 { \
4103 fFsw |= X86_FSW_PE; \
4104 if (!(fFcw & X86_FCW_PM)) \
4105 fFsw |= X86_FSW_ES | X86_FSW_B; \
4106 } \
4107 } \
4108 /* \
4109 * Tiny sub-zero numbers. \
4110 */ \
4111 else if (iExponent < 0) \
4112 { \
4113 *piDst = 0; \
4114 fFsw |= X86_FSW_PE; \
4115 if (!(fFcw & X86_FCW_PM)) \
4116 fFsw |= X86_FSW_ES | X86_FSW_B; \
4117 } \
4118 /* \
4119 * Special MIN case. \
4120 */ \
4121 else if ( fSignIn && iExponent == a_cBits - 1 \
4122 && (a_cBits < 64 \
4123 ? uMantissa < (RT_BIT_64(63) | RT_BIT_64(65 - a_cBits)) \
4124 : uMantissa == RT_BIT_64(63)) ) \
4125 { \
4126 *piDst = a_iTypeMin; \
4127 if (uMantissa & (RT_BIT_64(64 - a_cBits + 1) - 1)) \
4128 { \
4129 fFsw |= X86_FSW_PE; \
4130 if (!(fFcw & X86_FCW_PM)) \
4131 fFsw |= X86_FSW_ES | X86_FSW_B; \
4132 } \
4133 } \
4134 /* \
4135 * Figure this weirdness. \
4136 */ \
4137 else if (a_cBits == 16 && fSignIn && iExponent == 31 && uMantissa < UINT64_C(0x8000100000000000) ) \
4138 { \
4139 *piDst = 0; \
4140 if (uMantissa & (RT_BIT_64(64 - a_cBits + 1) - 1)) \
4141 { \
4142 fFsw |= X86_FSW_PE; \
4143 if (!(fFcw & X86_FCW_PM)) \
4144 fFsw |= X86_FSW_ES | X86_FSW_B; \
4145 } \
4146 } \
4147 /* \
4148 * Too large/small number outside the target integer range. \
4149 */ \
4150 else \
4151 { \
4152 fFsw |= X86_FSW_IE; \
4153 if (fFcw & X86_FCW_IM) \
4154 *piDst = a_iTypeIndefinite; \
4155 else \
4156 fFsw |= X86_FSW_ES | X86_FSW_B | (7 << X86_FSW_TOP_SHIFT); \
4157 } \
4158 } \
4159 /* \
4160 * Map both +0 and -0 to integer zero (signless/+). \
4161 */ \
4162 else if (RTFLOAT80U_IS_ZERO(pr80Val)) \
4163 *piDst = 0; \
4164 /* \
4165 * Denormals are just really tiny sub-zero numbers that are trucated to zero. \
4166 */ \
4167 else if (RTFLOAT80U_IS_PSEUDO_DENORMAL(pr80Val) || RTFLOAT80U_IS_DENORMAL(pr80Val)) \
4168 { \
4169 *piDst = 0; \
4170 fFsw |= X86_FSW_PE; \
4171 if (!(fFcw & X86_FCW_PM)) \
4172 fFsw |= X86_FSW_ES | X86_FSW_B; \
4173 } \
4174 /* \
4175 * All other special values are considered invalid arguments and result \
4176 * in an IE exception and indefinite value if masked. \
4177 */ \
4178 else \
4179 { \
4180 fFsw |= X86_FSW_IE; \
4181 if (fFcw & X86_FCW_IM) \
4182 *piDst = a_iTypeIndefinite; \
4183 else \
4184 fFsw |= X86_FSW_ES | X86_FSW_B | (7 << X86_FSW_TOP_SHIFT); \
4185 } \
4186 *pu16FSW = fFsw; \
4187}
4188#if defined(IEM_WITHOUT_ASSEMBLY)
4189EMIT_FISTT(64, 64, int64_t, INT64_MIN, INT64_MAX, X86_FPU_INT64_INDEFINITE, RT_NOTHING, 1)
4190EMIT_FISTT(32, 32, int32_t, INT32_MIN, INT32_MAX, X86_FPU_INT32_INDEFINITE, RT_NOTHING, 1)
4191EMIT_FISTT(16, 32, int16_t, INT16_MIN, INT16_MAX, 0 /* X86_FPU_INT16_INDEFINITE - weird weird weird! */, RT_NOTHING, 1)
4192#endif
4193EMIT_FISTT(16, 32, int16_t, INT16_MIN, INT16_MAX, 0 /* X86_FPU_INT16_INDEFINITE - weird weird weird! */, _intel, 1)
4194EMIT_FISTT(16, 32, int16_t, INT16_MIN, INT16_MAX, 0 /* X86_FPU_INT16_INDEFINITE - weird weird weird! */, _amd, 0)
4195
4196
4197#if defined(IEM_WITHOUT_ASSEMBLY)
4198
4199IEM_DECL_IMPL_DEF(void, iemAImpl_fst_r80_to_d80,(PCX86FXSTATE pFpuState, uint16_t *pu16FSW,
4200 PRTPBCD80U pd80Dst, PCRTFLOAT80U pr80Src))
4201{
4202 /*static RTPBCD80U const s_ad80MaxMin[2] = { RTPBCD80U_INIT_MAX(), RTPBCD80U_INIT_MIN() };*/
4203 static RTPBCD80U const s_ad80Zeros[2] = { RTPBCD80U_INIT_ZERO(0), RTPBCD80U_INIT_ZERO(1) };
4204 static RTPBCD80U const s_ad80One[2] = { RTPBCD80U_INIT_C(0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,1),
4205 RTPBCD80U_INIT_C(1, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,1) };
4206 static RTPBCD80U const s_d80Indefinite = RTPBCD80U_INIT_INDEFINITE();
4207
4208 uint16_t const fFcw = pFpuState->FCW;
4209 uint16_t fFsw = (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3));
4210 bool const fSignIn = pr80Src->s.fSign;
4211
4212 /*
4213 * Deal with normal numbers first.
4214 */
4215 if (RTFLOAT80U_IS_NORMAL(pr80Src))
4216 {
4217 uint64_t uMantissa = pr80Src->s.uMantissa;
4218 int32_t iExponent = (int32_t)pr80Src->s.uExponent - RTFLOAT80U_EXP_BIAS;
4219 if ( (uint32_t)iExponent <= 58
4220 || ((uint32_t)iExponent == 59 && uMantissa <= UINT64_C(0xde0b6b3a763fffff)) )
4221 {
4222 unsigned const cShiftOff = 63 - iExponent;
4223 uint64_t const fRoundingOffMask = RT_BIT_64(cShiftOff) - 1;
4224 uint64_t const uRoundingAdd = (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST
4225 ? RT_BIT_64(cShiftOff - 1)
4226 : (fFcw & X86_FCW_RC_MASK) == (fSignIn ? X86_FCW_RC_DOWN : X86_FCW_RC_UP)
4227 ? fRoundingOffMask
4228 : 0;
4229 uint64_t fRoundedOff = uMantissa & fRoundingOffMask;
4230
4231 uMantissa >>= cShiftOff;
4232 uint64_t const uRounding = (fRoundedOff + uRoundingAdd) >> cShiftOff;
4233 uMantissa += uRounding;
4234 if (uMantissa <= (uint64_t)RTPBCD80U_MAX)
4235 {
4236 if (fRoundedOff)
4237 {
4238 if ((uMantissa & 1) && (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST && fRoundedOff == uRoundingAdd)
4239 uMantissa &= ~(uint64_t)1; /* round to even number if equal distance between up/down. */
4240 else if (uRounding)
4241 fFsw |= X86_FSW_C1;
4242 fFsw |= X86_FSW_PE;
4243 if (!(fFcw & X86_FCW_PM))
4244 fFsw |= X86_FSW_ES | X86_FSW_B;
4245 }
4246
4247 pd80Dst->s.fSign = fSignIn;
4248 pd80Dst->s.uPad = 0;
4249 for (size_t iPair = 0; iPair < RT_ELEMENTS(pd80Dst->s.abPairs); iPair++)
4250 {
4251 unsigned const uDigits = uMantissa % 100;
4252 uMantissa /= 100;
4253 uint8_t const bLo = uDigits % 10;
4254 uint8_t const bHi = uDigits / 10;
4255 pd80Dst->s.abPairs[iPair] = RTPBCD80U_MAKE_PAIR(bHi, bLo);
4256 }
4257 }
4258 else
4259 {
4260 /* overflowed after rounding. */
4261 fFsw |= X86_FSW_IE;
4262 if (fFcw & X86_FCW_IM)
4263 *pd80Dst = s_d80Indefinite;
4264 else
4265 fFsw |= X86_FSW_ES | X86_FSW_B | (7 << X86_FSW_TOP_SHIFT);
4266 }
4267 }
4268 /*
4269 * Tiny sub-zero numbers.
4270 */
4271 else if (iExponent < 0)
4272 {
4273 if (!fSignIn)
4274 {
4275 if ( (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_UP
4276 || (iExponent == -1 && (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST))
4277 {
4278 *pd80Dst = s_ad80One[fSignIn];
4279 fFsw |= X86_FSW_C1;
4280 }
4281 else
4282 *pd80Dst = s_ad80Zeros[fSignIn];
4283 }
4284 else
4285 {
4286 if ( (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_UP
4287 || (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_ZERO
4288 || (iExponent < -1 && (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST))
4289 *pd80Dst = s_ad80Zeros[fSignIn];
4290 else
4291 {
4292 *pd80Dst = s_ad80One[fSignIn];
4293 fFsw |= X86_FSW_C1;
4294 }
4295 }
4296 fFsw |= X86_FSW_PE;
4297 if (!(fFcw & X86_FCW_PM))
4298 fFsw |= X86_FSW_ES | X86_FSW_B;
4299 }
4300 /*
4301 * Too large/small number outside the target integer range.
4302 */
4303 else
4304 {
4305 fFsw |= X86_FSW_IE;
4306 if (fFcw & X86_FCW_IM)
4307 *pd80Dst = s_d80Indefinite;
4308 else
4309 fFsw |= X86_FSW_ES | X86_FSW_B | (7 << X86_FSW_TOP_SHIFT);
4310 }
4311 }
4312 /*
4313 * Map both +0 and -0 to integer zero (signless/+).
4314 */
4315 else if (RTFLOAT80U_IS_ZERO(pr80Src))
4316 *pd80Dst = s_ad80Zeros[fSignIn];
4317 /*
4318 * Denormals are just really tiny sub-zero numbers that are either rounded
4319 * to zero, 1 or -1 depending on sign and rounding control.
4320 */
4321 else if (RTFLOAT80U_IS_PSEUDO_DENORMAL(pr80Src) || RTFLOAT80U_IS_DENORMAL(pr80Src))
4322 {
4323 if ((fFcw & X86_FCW_RC_MASK) != (fSignIn ? X86_FCW_RC_DOWN : X86_FCW_RC_UP))
4324 *pd80Dst = s_ad80Zeros[fSignIn];
4325 else
4326 {
4327 *pd80Dst = s_ad80One[fSignIn];
4328 fFsw |= X86_FSW_C1;
4329 }
4330 fFsw |= X86_FSW_PE;
4331 if (!(fFcw & X86_FCW_PM))
4332 fFsw |= X86_FSW_ES | X86_FSW_B;
4333 }
4334 /*
4335 * All other special values are considered invalid arguments and result
4336 * in an IE exception and indefinite value if masked.
4337 */
4338 else
4339 {
4340 fFsw |= X86_FSW_IE;
4341 if (fFcw & X86_FCW_IM)
4342 *pd80Dst = s_d80Indefinite;
4343 else
4344 fFsw |= X86_FSW_ES | X86_FSW_B | (7 << X86_FSW_TOP_SHIFT);
4345 }
4346 *pu16FSW = fFsw;
4347}
4348
4349
4350/*********************************************************************************************************************************
4351* FPU Helpers *
4352*********************************************************************************************************************************/
4353AssertCompileSize(RTFLOAT128U, 16);
4354AssertCompileSize(RTFLOAT80U, 10);
4355AssertCompileSize(RTFLOAT64U, 8);
4356AssertCompileSize(RTFLOAT32U, 4);
4357
4358/**
4359 * Normalizes a possible pseudo-normal value.
4360 *
4361 * Psuedo-normal values are some oddities from the 8087 & 287 days. They are
4362 * denormals with the J-bit set, so they can simply be rewritten as 2**-16382,
4363 * i.e. changing uExponent from 0 to 1.
4364 *
4365 * This macro will declare a RTFLOAT80U with the name given by
4366 * @a a_r80ValNormalized and update the @a a_pr80Val variable to point to it if
4367 * a normalization was performed.
4368 *
4369 * @note This must be applied before calling SoftFloat with a value that couldbe
4370 * a pseudo-denormal, as SoftFloat doesn't handle pseudo-denormals
4371 * correctly.
4372 */
4373#define IEM_NORMALIZE_PSEUDO_DENORMAL(a_pr80Val, a_r80ValNormalized) \
4374 RTFLOAT80U a_r80ValNormalized; \
4375 if (RTFLOAT80U_IS_PSEUDO_DENORMAL(a_pr80Val)) \
4376 { \
4377 a_r80ValNormalized = *a_pr80Val; \
4378 a_r80ValNormalized.s.uExponent = 1; \
4379 a_pr80Val = &a_r80ValNormalized; \
4380 } else do {} while (0)
4381
4382#ifdef IEM_WITH_FLOAT128_FOR_FPU
4383
4384DECLINLINE(int) iemFpuF128SetRounding(uint16_t fFcw)
4385{
4386 int fNew;
4387 switch (fFcw & X86_FCW_RC_MASK)
4388 {
4389 default:
4390 case X86_FCW_RC_NEAREST: fNew = FE_TONEAREST; break;
4391 case X86_FCW_RC_ZERO: fNew = FE_TOWARDZERO; break;
4392 case X86_FCW_RC_UP: fNew = FE_UPWARD; break;
4393 case X86_FCW_RC_DOWN: fNew = FE_DOWNWARD; break;
4394 }
4395 int fOld = fegetround();
4396 fesetround(fNew);
4397 return fOld;
4398}
4399
4400
4401DECLINLINE(void) iemFpuF128RestoreRounding(int fOld)
4402{
4403 fesetround(fOld);
4404}
4405
4406DECLINLINE(_Float128) iemFpuF128FromFloat80(PCRTFLOAT80U pr80Val, uint16_t fFcw)
4407{
4408 RT_NOREF(fFcw);
4409 RTFLOAT128U Tmp;
4410 Tmp.s2.uSignAndExponent = pr80Val->s2.uSignAndExponent;
4411 Tmp.s2.uFractionHigh = (uint16_t)((pr80Val->s2.uMantissa & (RT_BIT_64(63) - 1)) >> 48);
4412 Tmp.s2.uFractionMid = (uint32_t)((pr80Val->s2.uMantissa & UINT32_MAX) >> 16);
4413 Tmp.s2.uFractionLow = pr80Val->s2.uMantissa << 48;
4414 if (RTFLOAT80U_IS_PSEUDO_DENORMAL(pr80Val))
4415 {
4416 Assert(Tmp.s.uExponent == 0);
4417 Tmp.s2.uSignAndExponent++;
4418 }
4419 return *(_Float128 *)&Tmp;
4420}
4421
4422
4423DECLINLINE(uint16_t) iemFpuF128ToFloat80(PRTFLOAT80U pr80Dst, _Float128 rd128ValSrc, uint16_t fFcw, uint16_t fFsw)
4424{
4425 RT_NOREF(fFcw);
4426 RTFLOAT128U Tmp;
4427 *(_Float128 *)&Tmp = rd128ValSrc;
4428 ASMCompilerBarrier();
4429 if (RTFLOAT128U_IS_NORMAL(&Tmp))
4430 {
4431 pr80Dst->s.fSign = Tmp.s64.fSign;
4432 pr80Dst->s.uExponent = Tmp.s64.uExponent;
4433 uint64_t uFraction = Tmp.s64.uFractionHi << (63 - 48)
4434 | Tmp.s64.uFractionLo >> (64 - 15);
4435
4436 /* Do rounding - just truncate in near mode when midway on an even outcome. */
4437 unsigned const cShiftOff = 64 - 15;
4438 uint64_t const fRoundingOffMask = RT_BIT_64(cShiftOff) - 1;
4439 uint64_t const uRoundedOff = Tmp.s64.uFractionLo & fRoundingOffMask;
4440 if (uRoundedOff)
4441 {
4442 uint64_t const uRoundingAdd = (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST
4443 ? RT_BIT_64(cShiftOff - 1)
4444 : (fFcw & X86_FCW_RC_MASK) == (Tmp.s64.fSign ? X86_FCW_RC_DOWN : X86_FCW_RC_UP)
4445 ? fRoundingOffMask
4446 : 0;
4447 if ( (fFcw & X86_FCW_RC_MASK) != X86_FCW_RC_NEAREST
4448 || (Tmp.s64.uFractionLo & RT_BIT_64(cShiftOff))
4449 || uRoundedOff != uRoundingAdd)
4450 {
4451 if ((uRoundedOff + uRoundingAdd) >> cShiftOff)
4452 {
4453 uFraction += 1;
4454 if (!(uFraction & RT_BIT_64(63)))
4455 { /* likely */ }
4456 else
4457 {
4458 uFraction >>= 1;
4459 pr80Dst->s.uExponent++;
4460 if (pr80Dst->s.uExponent == RTFLOAT64U_EXP_MAX)
4461 return fFsw;
4462 }
4463 fFsw |= X86_FSW_C1;
4464 }
4465 }
4466 fFsw |= X86_FSW_PE;
4467 if (!(fFcw & X86_FCW_PM))
4468 fFsw |= X86_FSW_ES | X86_FSW_B;
4469 }
4470 pr80Dst->s.uMantissa = RT_BIT_64(63) | uFraction;
4471 }
4472 else if (RTFLOAT128U_IS_ZERO(&Tmp))
4473 {
4474 pr80Dst->s.fSign = Tmp.s64.fSign;
4475 pr80Dst->s.uExponent = 0;
4476 pr80Dst->s.uMantissa = 0;
4477 }
4478 else if (RTFLOAT128U_IS_INF(&Tmp))
4479 {
4480 pr80Dst->s.fSign = Tmp.s64.fSign;
4481 pr80Dst->s.uExponent = 0;
4482 pr80Dst->s.uMantissa = 0;
4483 }
4484 return fFsw;
4485}
4486
4487
4488#else /* !IEM_WITH_FLOAT128_FOR_FPU - SoftFloat */
4489
4490/** Initializer for the SoftFloat state structure. */
4491# define IEM_SOFTFLOAT_STATE_INITIALIZER_FROM_FCW(a_fFcw) \
4492 { \
4493 softfloat_tininess_afterRounding, \
4494 ((a_fFcw) & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST ? (uint8_t)softfloat_round_near_even \
4495 : ((a_fFcw) & X86_FCW_RC_MASK) == X86_FCW_RC_UP ? (uint8_t)softfloat_round_max \
4496 : ((a_fFcw) & X86_FCW_RC_MASK) == X86_FCW_RC_DOWN ? (uint8_t)softfloat_round_min \
4497 : (uint8_t)softfloat_round_minMag, \
4498 0, \
4499 (uint8_t)((a_fFcw) & X86_FCW_XCPT_MASK), \
4500 ((a_fFcw) & X86_FCW_PC_MASK) == X86_FCW_PC_53 ? (uint8_t)64 \
4501 : ((a_fFcw) & X86_FCW_PC_MASK) == X86_FCW_PC_24 ? (uint8_t)32 : (uint8_t)80 \
4502 }
4503
4504/** Returns updated FSW from a SoftFloat state and exception mask (FCW). */
4505# define IEM_SOFTFLOAT_STATE_TO_FSW(a_fFsw, a_pSoftState, a_fFcw) \
4506 ( (a_fFsw) \
4507 | (uint16_t)(((a_pSoftState)->exceptionFlags & softfloat_flag_c1) << 2) \
4508 | ((a_pSoftState)->exceptionFlags & X86_FSW_XCPT_MASK) \
4509 | ( ((a_pSoftState)->exceptionFlags & X86_FSW_XCPT_MASK) & (~(a_fFcw) & X86_FSW_XCPT_MASK) \
4510 ? X86_FSW_ES | X86_FSW_B : 0) )
4511
4512
4513DECLINLINE(float128_t) iemFpuSoftF128Precision(float128_t r128, unsigned cBits, uint16_t fFcw = X86_FCW_RC_NEAREST)
4514{
4515 RT_NOREF(fFcw);
4516 Assert(cBits > 64);
4517# if 0 /* rounding does not seem to help */
4518 uint64_t off = r128.v[0] & (RT_BIT_64(1 + 112 - cBits) - 1);
4519 r128.v[0] &= ~(RT_BIT_64(1 + 112 - cBits) - 1);
4520 if (off >= RT_BIT_64(1 + 112 - cBits - 1)
4521 && (r128.v[0] & RT_BIT_64(1 + 112 - cBits)))
4522 {
4523 uint64_t uOld = r128.v[0];
4524 r128.v[0] += RT_BIT_64(1 + 112 - cBits);
4525 if (r128.v[0] < uOld)
4526 r128.v[1] += 1;
4527 }
4528# else
4529 r128.v[0] &= ~(RT_BIT_64(1 + 112 - cBits) - 1);
4530# endif
4531 return r128;
4532}
4533
4534
4535DECLINLINE(float128_t) iemFpuSoftF128PrecisionIprt(PCRTFLOAT128U pr128, unsigned cBits, uint16_t fFcw = X86_FCW_RC_NEAREST)
4536{
4537 RT_NOREF(fFcw);
4538 Assert(cBits > 64);
4539# if 0 /* rounding does not seem to help, not even on constants */
4540 float128_t r128 = { pr128->au64[0], pr128->au64[1] };
4541 uint64_t off = r128.v[0] & (RT_BIT_64(1 + 112 - cBits) - 1);
4542 r128.v[0] &= ~(RT_BIT_64(1 + 112 - cBits) - 1);
4543 if (off >= RT_BIT_64(1 + 112 - cBits - 1)
4544 && (r128.v[0] & RT_BIT_64(1 + 112 - cBits)))
4545 {
4546 uint64_t uOld = r128.v[0];
4547 r128.v[0] += RT_BIT_64(1 + 112 - cBits);
4548 if (r128.v[0] < uOld)
4549 r128.v[1] += 1;
4550 }
4551 return r128;
4552# else
4553 float128_t r128 = { { pr128->au64[0] & ~(RT_BIT_64(1 + 112 - cBits) - 1), pr128->au64[1] } };
4554 return r128;
4555# endif
4556}
4557
4558
4559# if 0 /* unused */
4560DECLINLINE(float128_t) iemFpuSoftF128FromIprt(PCRTFLOAT128U pr128)
4561{
4562 float128_t r128 = { { pr128->au64[0], pr128->au64[1] } };
4563 return r128;
4564}
4565# endif
4566
4567
4568/** Converts a 80-bit floating point value to SoftFloat 128-bit floating point. */
4569DECLINLINE(float128_t) iemFpuSoftF128FromFloat80(PCRTFLOAT80U pr80Val)
4570{
4571 extFloat80_t Tmp;
4572 Tmp.signExp = pr80Val->s2.uSignAndExponent;
4573 Tmp.signif = pr80Val->s2.uMantissa;
4574 softfloat_state_t Ignored = SOFTFLOAT_STATE_INIT_DEFAULTS();
4575 return extF80_to_f128(Tmp, &Ignored);
4576}
4577
4578
4579/**
4580 * Converts from the packed IPRT 80-bit floating point (RTFLOAT80U) format to
4581 * the SoftFloat extended 80-bit floating point format (extFloat80_t).
4582 *
4583 * This is only a structure format conversion, nothing else.
4584 */
4585DECLINLINE(extFloat80_t) iemFpuSoftF80FromIprt(PCRTFLOAT80U pr80Val)
4586{
4587 extFloat80_t Tmp;
4588 Tmp.signExp = pr80Val->s2.uSignAndExponent;
4589 Tmp.signif = pr80Val->s2.uMantissa;
4590 return Tmp;
4591}
4592
4593
4594/**
4595 * Converts from SoftFloat extended 80-bit floating point format (extFloat80_t)
4596 * to the packed IPRT 80-bit floating point (RTFLOAT80U) format.
4597 *
4598 * This is only a structure format conversion, nothing else.
4599 */
4600DECLINLINE(PRTFLOAT80U) iemFpuSoftF80ToIprt(PRTFLOAT80U pr80Dst, extFloat80_t const r80XSrc)
4601{
4602 pr80Dst->s2.uSignAndExponent = r80XSrc.signExp;
4603 pr80Dst->s2.uMantissa = r80XSrc.signif;
4604 return pr80Dst;
4605}
4606
4607
4608DECLINLINE(uint16_t) iemFpuSoftF128ToFloat80(PRTFLOAT80U pr80Dst, float128_t r128Src, uint16_t fFcw, uint16_t fFsw)
4609{
4610 RT_NOREF(fFcw);
4611 RTFLOAT128U Tmp;
4612 *(float128_t *)&Tmp = r128Src;
4613 ASMCompilerBarrier();
4614
4615 if (RTFLOAT128U_IS_NORMAL(&Tmp))
4616 {
4617 pr80Dst->s.fSign = Tmp.s64.fSign;
4618 pr80Dst->s.uExponent = Tmp.s64.uExponent;
4619 uint64_t uFraction = Tmp.s64.uFractionHi << (63 - 48)
4620 | Tmp.s64.uFractionLo >> (64 - 15);
4621
4622 /* Do rounding - just truncate in near mode when midway on an even outcome. */
4623 unsigned const cShiftOff = 64 - 15;
4624 uint64_t const fRoundingOffMask = RT_BIT_64(cShiftOff) - 1;
4625 uint64_t const uRoundedOff = Tmp.s64.uFractionLo & fRoundingOffMask;
4626 if (uRoundedOff)
4627 {
4628 uint64_t const uRoundingAdd = (fFcw & X86_FCW_RC_MASK) == X86_FCW_RC_NEAREST
4629 ? RT_BIT_64(cShiftOff - 1)
4630 : (fFcw & X86_FCW_RC_MASK) == (Tmp.s64.fSign ? X86_FCW_RC_DOWN : X86_FCW_RC_UP)
4631 ? fRoundingOffMask
4632 : 0;
4633 if ( (fFcw & X86_FCW_RC_MASK) != X86_FCW_RC_NEAREST
4634 || (Tmp.s64.uFractionLo & RT_BIT_64(cShiftOff))
4635 || uRoundedOff != uRoundingAdd)
4636 {
4637 if ((uRoundedOff + uRoundingAdd) >> cShiftOff)
4638 {
4639 uFraction += 1;
4640 if (!(uFraction & RT_BIT_64(63)))
4641 { /* likely */ }
4642 else
4643 {
4644 uFraction >>= 1;
4645 pr80Dst->s.uExponent++;
4646 if (pr80Dst->s.uExponent == RTFLOAT64U_EXP_MAX)
4647 return fFsw;
4648 }
4649 fFsw |= X86_FSW_C1;
4650 }
4651 }
4652 fFsw |= X86_FSW_PE;
4653 if (!(fFcw & X86_FCW_PM))
4654 fFsw |= X86_FSW_ES | X86_FSW_B;
4655 }
4656
4657 pr80Dst->s.uMantissa = RT_BIT_64(63) | uFraction;
4658 }
4659 else if (RTFLOAT128U_IS_ZERO(&Tmp))
4660 {
4661 pr80Dst->s.fSign = Tmp.s64.fSign;
4662 pr80Dst->s.uExponent = 0;
4663 pr80Dst->s.uMantissa = 0;
4664 }
4665 else if (RTFLOAT128U_IS_INF(&Tmp))
4666 {
4667 pr80Dst->s.fSign = Tmp.s64.fSign;
4668 pr80Dst->s.uExponent = 0;
4669 pr80Dst->s.uMantissa = 0;
4670 }
4671 return fFsw;
4672}
4673
4674
4675/**
4676 * Helper for transfering exception and C1 to FSW and setting the result value
4677 * accordingly.
4678 *
4679 * @returns Updated FSW.
4680 * @param pSoftState The SoftFloat state following the operation.
4681 * @param r80XResult The result of the SoftFloat operation.
4682 * @param pr80Result Where to store the result for IEM.
4683 * @param fFcw The FPU control word.
4684 * @param fFsw The FSW before the operation, with necessary bits
4685 * cleared and such.
4686 * @param pr80XcptResult Alternative return value for use an unmasked \#IE is
4687 * raised.
4688 */
4689DECLINLINE(uint16_t) iemFpuSoftStateAndF80ToFswAndIprtResult(softfloat_state_t const *pSoftState, extFloat80_t r80XResult,
4690 PRTFLOAT80U pr80Result, uint16_t fFcw, uint16_t fFsw,
4691 PCRTFLOAT80U pr80XcptResult)
4692{
4693 fFsw |= (pSoftState->exceptionFlags & X86_FSW_XCPT_MASK)
4694 | (uint16_t)((pSoftState->exceptionFlags & softfloat_flag_c1) << 2);
4695 if (fFsw & ~fFcw & X86_FSW_XCPT_MASK)
4696 fFsw |= X86_FSW_ES | X86_FSW_B;
4697
4698 if (!(fFsw & ~fFcw & X86_FSW_IE))
4699 iemFpuSoftF80ToIprt(pr80Result, r80XResult);
4700 else
4701 *pr80Result = *pr80XcptResult;
4702 return fFsw;
4703}
4704
4705
4706/**
4707 * Helper doing polynomial evaluation using Horner's method.
4708 *
4709 * See https://en.wikipedia.org/wiki/Horner%27s_method for details.
4710 */
4711float128_t iemFpuSoftF128HornerPoly(float128_t z, PCRTFLOAT128U g_par128HornerConsts, size_t cHornerConsts,
4712 unsigned cPrecision, softfloat_state_t *pSoftState)
4713{
4714 Assert(cHornerConsts > 1);
4715 size_t i = cHornerConsts - 1;
4716 float128_t r128Result = iemFpuSoftF128PrecisionIprt(&g_par128HornerConsts[i], cPrecision);
4717 while (i-- > 0)
4718 {
4719 r128Result = iemFpuSoftF128Precision(f128_mul(r128Result, z, pSoftState), cPrecision);
4720 r128Result = f128_add(r128Result, iemFpuSoftF128PrecisionIprt(&g_par128HornerConsts[i], cPrecision), pSoftState);
4721 r128Result = iemFpuSoftF128Precision(r128Result, cPrecision);
4722 }
4723 return r128Result;
4724}
4725
4726#endif /* !IEM_WITH_FLOAT128_FOR_FPU - SoftFloat */
4727
4728
4729/**
4730 * Composes a normalized and rounded RTFLOAT80U result from a 192 bit wide
4731 * mantissa, exponent and sign.
4732 *
4733 * @returns Updated FSW.
4734 * @param pr80Dst Where to return the composed value.
4735 * @param fSign The sign.
4736 * @param puMantissa The mantissa, 256-bit type but the to 64-bits are
4737 * ignored and should be zero. This will probably be
4738 * modified during normalization and rounding.
4739 * @param iExponent Unbiased exponent.
4740 * @param fFcw The FPU control word.
4741 * @param fFsw The FPU status word.
4742 */
4743static uint16_t iemFpuFloat80RoundAndComposeFrom192(PRTFLOAT80U pr80Dst, bool fSign, PRTUINT256U puMantissa,
4744 int32_t iExponent, uint16_t fFcw, uint16_t fFsw)
4745{
4746 AssertStmt(puMantissa->QWords.qw3 == 0, puMantissa->QWords.qw3 = 0);
4747
4748 iExponent += RTFLOAT80U_EXP_BIAS;
4749
4750 /* Do normalization if necessary and possible. */
4751 unsigned cShifted = 0;
4752 if (!(puMantissa->QWords.qw2 & RT_BIT_64(63)))
4753 {
4754 int cShift = 192 - RTUInt256BitCount(puMantissa);
4755 if (iExponent > cShift)
4756 iExponent -= cShift;
4757 else
4758 {
4759 if (fFcw & X86_FCW_UM)
4760 {
4761 if (iExponent > 0)
4762 cShift = --iExponent;
4763 else
4764 cShift = 0;
4765 }
4766 iExponent -= cShift;
4767 }
4768 cShifted = cShift;
4769 RTUInt256AssignShiftLeft(puMantissa, cShift);
4770 }
4771
4772 /* Do rounding. */
4773 uint64_t uMantissa = puMantissa->QWords.qw2;
4774 if (puMantissa->QWords.qw1 || puMantissa->QWords.qw0)
4775 {
4776 bool fAdd;
4777 switch (fFcw & X86_FCW_RC_MASK)
4778 {
4779 default: /* (for the simple-minded MSC which otherwise things fAdd would be used uninitialized) */
4780 case X86_FCW_RC_NEAREST:
4781 if (puMantissa->QWords.qw1 & RT_BIT_64(63))
4782 {
4783 if ( (uMantissa & 1)
4784 || puMantissa->QWords.qw0 != 0
4785 || puMantissa->QWords.qw1 != RT_BIT_64(63))
4786 {
4787 fAdd = true;
4788 break;
4789 }
4790 uMantissa &= ~(uint64_t)1;
4791 }
4792 fAdd = false;
4793 break;
4794 case X86_FCW_RC_ZERO:
4795 fAdd = false;
4796 break;
4797 case X86_FCW_RC_UP:
4798 fAdd = !fSign;
4799 break;
4800 case X86_FCW_RC_DOWN:
4801 fAdd = fSign;
4802 break;
4803 }
4804 if (fAdd)
4805 {
4806 uint64_t const uTmp = uMantissa;
4807 uMantissa = uTmp + 1;
4808 if (uMantissa < uTmp)
4809 {
4810 uMantissa >>= 1;
4811 uMantissa |= RT_BIT_64(63);
4812 iExponent++;
4813 }
4814 fFsw |= X86_FSW_C1;
4815 }
4816 fFsw |= X86_FSW_PE;
4817 if (!(fFcw & X86_FCW_PM))
4818 fFsw |= X86_FSW_ES | X86_FSW_B;
4819 }
4820
4821 /* Check for underflow (denormals). */
4822 if (iExponent <= 0)
4823 {
4824 if (fFcw & X86_FCW_UM)
4825 {
4826 if (uMantissa & RT_BIT_64(63))
4827 uMantissa >>= 1;
4828 iExponent = 0;
4829 }
4830 else
4831 {
4832 iExponent += RTFLOAT80U_EXP_BIAS_ADJUST;
4833 fFsw |= X86_FSW_ES | X86_FSW_B;
4834 }
4835 fFsw |= X86_FSW_UE;
4836 }
4837 /* Check for overflow */
4838 else if (iExponent >= RTFLOAT80U_EXP_MAX)
4839 {
4840 Assert(iExponent < RTFLOAT80U_EXP_MAX);
4841 }
4842
4843 /* Compose the result. */
4844 pr80Dst->s.uMantissa = uMantissa;
4845 pr80Dst->s.uExponent = iExponent;
4846 pr80Dst->s.fSign = fSign;
4847 return fFsw;
4848}
4849
4850
4851
4852
4853/*********************************************************************************************************************************
4854* x86 FPU Division Operations *
4855*********************************************************************************************************************************/
4856
4857IEM_DECL_IMPL_DEF(void, iemAImpl_fdiv_r80_by_r32,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4858 PCRTFLOAT80U pr80Val1, PCRTFLOAT32U pr32Val2))
4859{
4860 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr32Val2);
4861 AssertReleaseFailed();
4862}
4863
4864
4865IEM_DECL_IMPL_DEF(void, iemAImpl_fdiv_r80_by_r64,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4866 PCRTFLOAT80U pr80Val1, PCRTFLOAT64U pr64Val2))
4867{
4868 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr64Val2);
4869 AssertReleaseFailed();
4870}
4871
4872
4873/** Worker for iemAImpl_fdiv_r80_by_r80 & iemAImpl_fdivr_r80_by_r80. */
4874static uint16_t iemAImpl_fdiv_f80_r80_worker(PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2, PRTFLOAT80U pr80Result,
4875 uint16_t fFcw, uint16_t fFsw, PCRTFLOAT80U pr80Val1Org)
4876{
4877 if (!RTFLOAT80U_IS_ZERO(pr80Val2) || RTFLOAT80U_IS_NAN(pr80Val1) || RTFLOAT80U_IS_INF(pr80Val1))
4878 {
4879 softfloat_state_t SoftState = IEM_SOFTFLOAT_STATE_INITIALIZER_FROM_FCW(fFcw);
4880 extFloat80_t r80XResult = extF80_div(iemFpuSoftF80FromIprt(pr80Val1), iemFpuSoftF80FromIprt(pr80Val2), &SoftState);
4881 return iemFpuSoftStateAndF80ToFswAndIprtResult(&SoftState, r80XResult, pr80Result, fFcw, fFsw, pr80Val1Org);
4882 }
4883 if (!RTFLOAT80U_IS_ZERO(pr80Val1))
4884 { /* Div by zero. */
4885 if (fFcw & X86_FCW_ZM)
4886 *pr80Result = g_ar80Infinity[pr80Val1->s.fSign != pr80Val2->s.fSign];
4887 else
4888 {
4889 *pr80Result = *pr80Val1Org;
4890 fFsw |= X86_FSW_ES | X86_FSW_B;
4891 }
4892 fFsw |= X86_FSW_ZE;
4893 }
4894 else
4895 { /* Invalid operand */
4896 if (fFcw & X86_FCW_IM)
4897 *pr80Result = g_r80Indefinite;
4898 else
4899 {
4900 *pr80Result = *pr80Val1Org;
4901 fFsw |= X86_FSW_ES | X86_FSW_B;
4902 }
4903 fFsw |= X86_FSW_IE;
4904 }
4905 return fFsw;
4906}
4907
4908
4909IEM_DECL_IMPL_DEF(void, iemAImpl_fdiv_r80_by_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4910 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
4911{
4912 uint16_t const fFcw = pFpuState->FCW;
4913 uint16_t fFsw = (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)) | (6 << X86_FSW_TOP_SHIFT);
4914
4915 /* SoftFloat does not check for Pseudo-Infinity, Pseudo-Nan and Unnormals. */
4916 if (RTFLOAT80U_IS_387_INVALID(pr80Val1) || RTFLOAT80U_IS_387_INVALID(pr80Val2))
4917 {
4918 if (fFcw & X86_FCW_IM)
4919 pFpuRes->r80Result = g_r80Indefinite;
4920 else
4921 {
4922 pFpuRes->r80Result = *pr80Val1;
4923 fFsw |= X86_FSW_ES | X86_FSW_B;
4924 }
4925 fFsw |= X86_FSW_IE;
4926 }
4927 /* SoftFloat does not check for denormals and certainly not report them to us. NaNs & /0 trumps denormals. */
4928 else if ( (RTFLOAT80U_IS_DENORMAL_OR_PSEUDO_DENORMAL(pr80Val1) && !RTFLOAT80U_IS_NAN(pr80Val2) && !RTFLOAT80U_IS_ZERO(pr80Val2))
4929 || (RTFLOAT80U_IS_DENORMAL_OR_PSEUDO_DENORMAL(pr80Val2) && !RTFLOAT80U_IS_NAN(pr80Val1)) )
4930 {
4931 if (fFcw & X86_FCW_DM)
4932 {
4933 PCRTFLOAT80U const pr80Val1Org = pr80Val1;
4934 IEM_NORMALIZE_PSEUDO_DENORMAL(pr80Val1, r80Val1Normalized);
4935 IEM_NORMALIZE_PSEUDO_DENORMAL(pr80Val2, r80Val2Normalized);
4936 fFsw = iemAImpl_fdiv_f80_r80_worker(pr80Val1, pr80Val2, &pFpuRes->r80Result, fFcw, fFsw, pr80Val1Org);
4937 }
4938 else
4939 {
4940 pFpuRes->r80Result = *pr80Val1;
4941 fFsw |= X86_FSW_ES | X86_FSW_B;
4942 }
4943 fFsw |= X86_FSW_DE;
4944 }
4945 /* SoftFloat can handle the rest: */
4946 else
4947 fFsw = iemAImpl_fdiv_f80_r80_worker(pr80Val1, pr80Val2, &pFpuRes->r80Result, fFcw, fFsw, pr80Val1);
4948
4949 pFpuRes->FSW = fFsw;
4950}
4951
4952
4953IEM_DECL_IMPL_DEF(void, iemAImpl_fdivr_r80_by_r32,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4954 PCRTFLOAT80U pr80Val1, PCRTFLOAT32U pr32Val2))
4955{
4956 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr32Val2);
4957 AssertReleaseFailed();
4958}
4959
4960
4961IEM_DECL_IMPL_DEF(void, iemAImpl_fdivr_r80_by_r64,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4962 PCRTFLOAT80U pr80Val1, PCRTFLOAT64U pr64Val2))
4963{
4964 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr64Val2);
4965 AssertReleaseFailed();
4966}
4967
4968
4969IEM_DECL_IMPL_DEF(void, iemAImpl_fdivr_r80_by_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
4970 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
4971{
4972 uint16_t const fFcw = pFpuState->FCW;
4973 uint16_t fFsw = (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)) | (6 << X86_FSW_TOP_SHIFT);
4974
4975 /* SoftFloat does not check for Pseudo-Infinity, Pseudo-Nan and Unnormals. */
4976 if (RTFLOAT80U_IS_387_INVALID(pr80Val1) || RTFLOAT80U_IS_387_INVALID(pr80Val2))
4977 {
4978 if (fFcw & X86_FCW_IM)
4979 pFpuRes->r80Result = g_r80Indefinite;
4980 else
4981 {
4982 pFpuRes->r80Result = *pr80Val1;
4983 fFsw |= X86_FSW_ES | X86_FSW_B;
4984 }
4985 fFsw |= X86_FSW_IE;
4986 }
4987 /* SoftFloat does not check for denormals and certainly not report them to us. NaNs & /0 trumps denormals. */
4988 else if ( (RTFLOAT80U_IS_DENORMAL_OR_PSEUDO_DENORMAL(pr80Val1) && !RTFLOAT80U_IS_NAN(pr80Val2))
4989 || (RTFLOAT80U_IS_DENORMAL_OR_PSEUDO_DENORMAL(pr80Val2) && !RTFLOAT80U_IS_NAN(pr80Val1) && !RTFLOAT80U_IS_ZERO(pr80Val1)) )
4990 {
4991 if (fFcw & X86_FCW_DM)
4992 {
4993 PCRTFLOAT80U const pr80Val1Org = pr80Val1;
4994 IEM_NORMALIZE_PSEUDO_DENORMAL(pr80Val1, r80Val1Normalized);
4995 IEM_NORMALIZE_PSEUDO_DENORMAL(pr80Val2, r80Val2Normalized);
4996 fFsw = iemAImpl_fdiv_f80_r80_worker(pr80Val2, pr80Val1, &pFpuRes->r80Result, fFcw, fFsw, pr80Val1Org);
4997 }
4998 else
4999 {
5000 pFpuRes->r80Result = *pr80Val1;
5001 fFsw |= X86_FSW_ES | X86_FSW_B;
5002 }
5003 fFsw |= X86_FSW_DE;
5004 }
5005 /* SoftFloat can handle the rest: */
5006 else
5007 fFsw = iemAImpl_fdiv_f80_r80_worker(pr80Val2, pr80Val1, &pFpuRes->r80Result, fFcw, fFsw, pr80Val1);
5008
5009 pFpuRes->FSW = fFsw;
5010}
5011
5012
5013IEM_DECL_IMPL_DEF(void, iemAImpl_fidiv_r80_by_i16,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5014 PCRTFLOAT80U pr80Val1, int16_t const *pi16Val2))
5015{
5016 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pi16Val2);
5017 AssertReleaseFailed();
5018}
5019
5020
5021IEM_DECL_IMPL_DEF(void, iemAImpl_fidiv_r80_by_i32,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5022 PCRTFLOAT80U pr80Val1, int32_t const *pi32Val2))
5023{
5024 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pi32Val2);
5025 AssertReleaseFailed();
5026}
5027
5028
5029IEM_DECL_IMPL_DEF(void, iemAImpl_fidivr_r80_by_i16,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5030 PCRTFLOAT80U pr80Val1, int16_t const *pi16Val2))
5031{
5032 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pi16Val2);
5033 AssertReleaseFailed();
5034}
5035
5036
5037IEM_DECL_IMPL_DEF(void, iemAImpl_fidivr_r80_by_i32,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5038 PCRTFLOAT80U pr80Val1, int32_t const *pi32Val2))
5039{
5040 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pi32Val2);
5041 AssertReleaseFailed();
5042}
5043
5044
5045IEM_DECL_IMPL_DEF(void, iemAImpl_fprem_r80_by_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5046 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
5047{
5048 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr80Val2);
5049 AssertReleaseFailed();
5050}
5051
5052
5053IEM_DECL_IMPL_DEF(void, iemAImpl_fprem1_r80_by_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5054 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
5055{
5056 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr80Val2);
5057 AssertReleaseFailed();
5058}
5059
5060
5061/*********************************************************************************************************************************
5062* x87 FPU Multiplication Operations *
5063*********************************************************************************************************************************/
5064
5065IEM_DECL_IMPL_DEF(void, iemAImpl_fmul_r80_by_r32,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5066 PCRTFLOAT80U pr80Val1, PCRTFLOAT32U pr32Val2))
5067{
5068 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr32Val2);
5069 AssertReleaseFailed();
5070}
5071
5072
5073IEM_DECL_IMPL_DEF(void, iemAImpl_fmul_r80_by_r64,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5074 PCRTFLOAT80U pr80Val1, PCRTFLOAT64U pr64Val2))
5075{
5076 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr64Val2);
5077 AssertReleaseFailed();
5078}
5079
5080
5081/** Worker for iemAImpl_fmul_r80_by_r80. */
5082static uint16_t iemAImpl_fmul_f80_r80_worker(PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2, PRTFLOAT80U pr80Result,
5083 uint16_t fFcw, uint16_t fFsw, PCRTFLOAT80U pr80Val1Org)
5084{
5085 softfloat_state_t SoftState = IEM_SOFTFLOAT_STATE_INITIALIZER_FROM_FCW(fFcw);
5086 extFloat80_t r80XResult = extF80_mul(iemFpuSoftF80FromIprt(pr80Val1), iemFpuSoftF80FromIprt(pr80Val2), &SoftState);
5087 return iemFpuSoftStateAndF80ToFswAndIprtResult(&SoftState, r80XResult, pr80Result, fFcw, fFsw, pr80Val1Org);
5088}
5089
5090
5091IEM_DECL_IMPL_DEF(void, iemAImpl_fmul_r80_by_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5092 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
5093{
5094 uint16_t const fFcw = pFpuState->FCW;
5095 uint16_t fFsw = (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)) | (6 << X86_FSW_TOP_SHIFT);
5096
5097 /* SoftFloat does not check for Pseudo-Infinity, Pseudo-Nan and Unnormals. */
5098 if (RTFLOAT80U_IS_387_INVALID(pr80Val1) || RTFLOAT80U_IS_387_INVALID(pr80Val2))
5099 {
5100 if (fFcw & X86_FCW_IM)
5101 pFpuRes->r80Result = g_r80Indefinite;
5102 else
5103 {
5104 pFpuRes->r80Result = *pr80Val1;
5105 fFsw |= X86_FSW_ES | X86_FSW_B;
5106 }
5107 fFsw |= X86_FSW_IE;
5108 }
5109 /* SoftFloat does not check for denormals and certainly not report them to us. NaNs trumps denormals. */
5110 else if ( (RTFLOAT80U_IS_DENORMAL_OR_PSEUDO_DENORMAL(pr80Val1) && !RTFLOAT80U_IS_NAN(pr80Val2))
5111 || (RTFLOAT80U_IS_DENORMAL_OR_PSEUDO_DENORMAL(pr80Val2) && !RTFLOAT80U_IS_NAN(pr80Val1)) )
5112 {
5113 if (fFcw & X86_FCW_DM)
5114 {
5115 PCRTFLOAT80U const pr80Val1Org = pr80Val1;
5116 IEM_NORMALIZE_PSEUDO_DENORMAL(pr80Val1, r80Val1Normalized);
5117 IEM_NORMALIZE_PSEUDO_DENORMAL(pr80Val2, r80Val2Normalized);
5118 fFsw = iemAImpl_fmul_f80_r80_worker(pr80Val1, pr80Val2, &pFpuRes->r80Result, fFcw, fFsw, pr80Val1Org);
5119 }
5120 else
5121 {
5122 pFpuRes->r80Result = *pr80Val1;
5123 fFsw |= X86_FSW_ES | X86_FSW_B;
5124 }
5125 fFsw |= X86_FSW_DE;
5126 }
5127 /* SoftFloat can handle the rest: */
5128 else
5129 fFsw = iemAImpl_fmul_f80_r80_worker(pr80Val1, pr80Val2, &pFpuRes->r80Result, fFcw, fFsw, pr80Val1);
5130
5131 pFpuRes->FSW = fFsw;
5132}
5133
5134
5135IEM_DECL_IMPL_DEF(void, iemAImpl_fimul_r80_by_i16,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5136 PCRTFLOAT80U pr80Val1, int16_t const *pi16Val2))
5137{
5138 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pi16Val2);
5139 AssertReleaseFailed();
5140}
5141
5142
5143IEM_DECL_IMPL_DEF(void, iemAImpl_fimul_r80_by_i32,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5144 PCRTFLOAT80U pr80Val1, int32_t const *pi32Val2))
5145{
5146 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pi32Val2);
5147 AssertReleaseFailed();
5148}
5149
5150
5151/*********************************************************************************************************************************
5152* x87 FPU Addition and Subtraction *
5153*********************************************************************************************************************************/
5154
5155IEM_DECL_IMPL_DEF(void, iemAImpl_fadd_r80_by_r32,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5156 PCRTFLOAT80U pr80Val1, PCRTFLOAT32U pr32Val2))
5157{
5158 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr32Val2);
5159 AssertReleaseFailed();
5160}
5161
5162
5163IEM_DECL_IMPL_DEF(void, iemAImpl_fadd_r80_by_r64,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5164 PCRTFLOAT80U pr80Val1, PCRTFLOAT64U pr64Val2))
5165{
5166 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr64Val2);
5167 AssertReleaseFailed();
5168}
5169
5170
5171/** Worker for iemAImpl_fadd_r80_by_r80. */
5172static uint16_t iemAImpl_fadd_f80_r80_worker(PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2, PRTFLOAT80U pr80Result,
5173 uint16_t fFcw, uint16_t fFsw, PCRTFLOAT80U pr80Val1Org)
5174{
5175 softfloat_state_t SoftState = IEM_SOFTFLOAT_STATE_INITIALIZER_FROM_FCW(fFcw);
5176 extFloat80_t r80XResult = extF80_add(iemFpuSoftF80FromIprt(pr80Val1), iemFpuSoftF80FromIprt(pr80Val2), &SoftState);
5177 return iemFpuSoftStateAndF80ToFswAndIprtResult(&SoftState, r80XResult, pr80Result, fFcw, fFsw, pr80Val1Org);
5178}
5179
5180
5181IEM_DECL_IMPL_DEF(void, iemAImpl_fadd_r80_by_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5182 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
5183{
5184 uint16_t const fFcw = pFpuState->FCW;
5185 uint16_t fFsw = (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)) | (6 << X86_FSW_TOP_SHIFT);
5186
5187 /* SoftFloat does not check for Pseudo-Infinity, Pseudo-Nan and Unnormals. */
5188 if (RTFLOAT80U_IS_387_INVALID(pr80Val1) || RTFLOAT80U_IS_387_INVALID(pr80Val2))
5189 {
5190 if (fFcw & X86_FCW_IM)
5191 pFpuRes->r80Result = g_r80Indefinite;
5192 else
5193 {
5194 pFpuRes->r80Result = *pr80Val1;
5195 fFsw |= X86_FSW_ES | X86_FSW_B;
5196 }
5197 fFsw |= X86_FSW_IE;
5198 }
5199 /* SoftFloat does not check for denormals and certainly not report them to us. NaNs trumps denormals. */
5200 else if ( (RTFLOAT80U_IS_DENORMAL_OR_PSEUDO_DENORMAL(pr80Val1) && !RTFLOAT80U_IS_NAN(pr80Val2))
5201 || (RTFLOAT80U_IS_DENORMAL_OR_PSEUDO_DENORMAL(pr80Val2) && !RTFLOAT80U_IS_NAN(pr80Val1)) )
5202 {
5203 if (fFcw & X86_FCW_DM)
5204 {
5205 PCRTFLOAT80U const pr80Val1Org = pr80Val1;
5206 IEM_NORMALIZE_PSEUDO_DENORMAL(pr80Val1, r80Val1Normalized);
5207 IEM_NORMALIZE_PSEUDO_DENORMAL(pr80Val2, r80Val2Normalized);
5208 fFsw = iemAImpl_fadd_f80_r80_worker(pr80Val1, pr80Val2, &pFpuRes->r80Result, fFcw, fFsw, pr80Val1Org);
5209 }
5210 else
5211 {
5212 pFpuRes->r80Result = *pr80Val1;
5213 fFsw |= X86_FSW_ES | X86_FSW_B;
5214 }
5215 fFsw |= X86_FSW_DE;
5216 }
5217 /* SoftFloat can handle the rest: */
5218 else
5219 fFsw = iemAImpl_fadd_f80_r80_worker(pr80Val1, pr80Val2, &pFpuRes->r80Result, fFcw, fFsw, pr80Val1);
5220
5221 pFpuRes->FSW = fFsw;
5222}
5223
5224
5225IEM_DECL_IMPL_DEF(void, iemAImpl_fiadd_r80_by_i16,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5226 PCRTFLOAT80U pr80Val1, int16_t const *pi16Val2))
5227{
5228 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pi16Val2);
5229 AssertReleaseFailed();
5230}
5231
5232
5233IEM_DECL_IMPL_DEF(void, iemAImpl_fiadd_r80_by_i32,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5234 PCRTFLOAT80U pr80Val1, int32_t const *pi32Val2))
5235{
5236 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pi32Val2);
5237 AssertReleaseFailed();
5238}
5239
5240
5241IEM_DECL_IMPL_DEF(void, iemAImpl_fisub_r80_by_i16,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5242 PCRTFLOAT80U pr80Val1, int16_t const *pi16Val2))
5243{
5244 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pi16Val2);
5245 AssertReleaseFailed();
5246}
5247
5248
5249IEM_DECL_IMPL_DEF(void, iemAImpl_fisub_r80_by_i32,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5250 PCRTFLOAT80U pr80Val1, int32_t const *pi32Val2))
5251{
5252 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pi32Val2);
5253 AssertReleaseFailed();
5254}
5255
5256
5257IEM_DECL_IMPL_DEF(void, iemAImpl_fisubr_r80_by_i16,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5258 PCRTFLOAT80U pr80Val1, int16_t const *pi16Val2))
5259{
5260 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pi16Val2);
5261 AssertReleaseFailed();
5262}
5263
5264
5265IEM_DECL_IMPL_DEF(void, iemAImpl_fisubr_r80_by_i32,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5266 PCRTFLOAT80U pr80Val1, int32_t const *pi32Val2))
5267{
5268 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pi32Val2);
5269 AssertReleaseFailed();
5270}
5271
5272
5273IEM_DECL_IMPL_DEF(void, iemAImpl_fsub_r80_by_r32,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5274 PCRTFLOAT80U pr80Val1, PCRTFLOAT32U pr32Val2))
5275{
5276 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr32Val2);
5277 AssertReleaseFailed();
5278}
5279
5280
5281IEM_DECL_IMPL_DEF(void, iemAImpl_fsub_r80_by_r64,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5282 PCRTFLOAT80U pr80Val1, PCRTFLOAT64U pr64Val2))
5283{
5284 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr64Val2);
5285 AssertReleaseFailed();
5286}
5287
5288
5289/** Worker for iemAImpl_fsub_r80_by_r80 and iemAImpl_fsubr_r80_by_r80. */
5290static uint16_t iemAImpl_fsub_f80_r80_worker(PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2, PRTFLOAT80U pr80Result,
5291 uint16_t fFcw, uint16_t fFsw, PCRTFLOAT80U pr80Val1Org)
5292{
5293 softfloat_state_t SoftState = IEM_SOFTFLOAT_STATE_INITIALIZER_FROM_FCW(fFcw);
5294 extFloat80_t r80XResult = extF80_sub(iemFpuSoftF80FromIprt(pr80Val1), iemFpuSoftF80FromIprt(pr80Val2), &SoftState);
5295 return iemFpuSoftStateAndF80ToFswAndIprtResult(&SoftState, r80XResult, pr80Result, fFcw, fFsw, pr80Val1Org);
5296}
5297
5298
5299IEM_DECL_IMPL_DEF(void, iemAImpl_fsub_r80_by_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5300 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
5301{
5302 uint16_t const fFcw = pFpuState->FCW;
5303 uint16_t fFsw = (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)) | (6 << X86_FSW_TOP_SHIFT);
5304
5305 /* SoftFloat does not check for Pseudo-Infinity, Pseudo-Nan and Unnormals. */
5306 if (RTFLOAT80U_IS_387_INVALID(pr80Val1) || RTFLOAT80U_IS_387_INVALID(pr80Val2))
5307 {
5308 if (fFcw & X86_FCW_IM)
5309 pFpuRes->r80Result = g_r80Indefinite;
5310 else
5311 {
5312 pFpuRes->r80Result = *pr80Val1;
5313 fFsw |= X86_FSW_ES | X86_FSW_B;
5314 }
5315 fFsw |= X86_FSW_IE;
5316 }
5317 /* SoftFloat does not check for denormals and certainly not report them to us. NaNs trumps denormals. */
5318 else if ( (RTFLOAT80U_IS_DENORMAL_OR_PSEUDO_DENORMAL(pr80Val1) && !RTFLOAT80U_IS_NAN(pr80Val2))
5319 || (RTFLOAT80U_IS_DENORMAL_OR_PSEUDO_DENORMAL(pr80Val2) && !RTFLOAT80U_IS_NAN(pr80Val1)) )
5320 {
5321 if (fFcw & X86_FCW_DM)
5322 {
5323 PCRTFLOAT80U const pr80Val1Org = pr80Val1;
5324 IEM_NORMALIZE_PSEUDO_DENORMAL(pr80Val1, r80Val1Normalized);
5325 IEM_NORMALIZE_PSEUDO_DENORMAL(pr80Val2, r80Val2Normalized);
5326 fFsw = iemAImpl_fsub_f80_r80_worker(pr80Val1, pr80Val2, &pFpuRes->r80Result, fFcw, fFsw, pr80Val1Org);
5327 }
5328 else
5329 {
5330 pFpuRes->r80Result = *pr80Val1;
5331 fFsw |= X86_FSW_ES | X86_FSW_B;
5332 }
5333 fFsw |= X86_FSW_DE;
5334 }
5335 /* SoftFloat can handle the rest: */
5336 else
5337 fFsw = iemAImpl_fsub_f80_r80_worker(pr80Val1, pr80Val2, &pFpuRes->r80Result, fFcw, fFsw, pr80Val1);
5338
5339 pFpuRes->FSW = fFsw;
5340}
5341
5342
5343IEM_DECL_IMPL_DEF(void, iemAImpl_fsubr_r80_by_r32,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5344 PCRTFLOAT80U pr80Val1, PCRTFLOAT32U pr32Val2))
5345{
5346 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr32Val2);
5347 AssertReleaseFailed();
5348}
5349
5350
5351IEM_DECL_IMPL_DEF(void, iemAImpl_fsubr_r80_by_r64,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5352 PCRTFLOAT80U pr80Val1, PCRTFLOAT64U pr64Val2))
5353{
5354 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr64Val2);
5355 AssertReleaseFailed();
5356}
5357
5358
5359/* Same as iemAImpl_fsub_r80_by_r80, but with input operands switched. */
5360IEM_DECL_IMPL_DEF(void, iemAImpl_fsubr_r80_by_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5361 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
5362{
5363 uint16_t const fFcw = pFpuState->FCW;
5364 uint16_t fFsw = (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)) | (6 << X86_FSW_TOP_SHIFT);
5365
5366 /* SoftFloat does not check for Pseudo-Infinity, Pseudo-Nan and Unnormals. */
5367 if (RTFLOAT80U_IS_387_INVALID(pr80Val1) || RTFLOAT80U_IS_387_INVALID(pr80Val2))
5368 {
5369 if (fFcw & X86_FCW_IM)
5370 pFpuRes->r80Result = g_r80Indefinite;
5371 else
5372 {
5373 pFpuRes->r80Result = *pr80Val1;
5374 fFsw |= X86_FSW_ES | X86_FSW_B;
5375 }
5376 fFsw |= X86_FSW_IE;
5377 }
5378 /* SoftFloat does not check for denormals and certainly not report them to us. NaNs trumps denormals. */
5379 else if ( (RTFLOAT80U_IS_DENORMAL_OR_PSEUDO_DENORMAL(pr80Val1) && !RTFLOAT80U_IS_NAN(pr80Val2))
5380 || (RTFLOAT80U_IS_DENORMAL_OR_PSEUDO_DENORMAL(pr80Val2) && !RTFLOAT80U_IS_NAN(pr80Val1)) )
5381 {
5382 if (fFcw & X86_FCW_DM)
5383 {
5384 PCRTFLOAT80U const pr80Val1Org = pr80Val1;
5385 IEM_NORMALIZE_PSEUDO_DENORMAL(pr80Val1, r80Val1Normalized);
5386 IEM_NORMALIZE_PSEUDO_DENORMAL(pr80Val2, r80Val2Normalized);
5387 fFsw = iemAImpl_fsub_f80_r80_worker(pr80Val2, pr80Val1, &pFpuRes->r80Result, fFcw, fFsw, pr80Val1Org);
5388 }
5389 else
5390 {
5391 pFpuRes->r80Result = *pr80Val1;
5392 fFsw |= X86_FSW_ES | X86_FSW_B;
5393 }
5394 fFsw |= X86_FSW_DE;
5395 }
5396 /* SoftFloat can handle the rest: */
5397 else
5398 fFsw = iemAImpl_fsub_f80_r80_worker(pr80Val2, pr80Val1, &pFpuRes->r80Result, fFcw, fFsw, pr80Val1);
5399
5400 pFpuRes->FSW = fFsw;
5401}
5402
5403
5404/*********************************************************************************************************************************
5405* x87 FPU Trigometric Operations *
5406*********************************************************************************************************************************/
5407
5408
5409IEM_DECL_IMPL_DEF(void, iemAImpl_fpatan_r80_by_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5410 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
5411{
5412 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr80Val2);
5413 AssertReleaseFailed();
5414}
5415
5416#endif /* IEM_WITHOUT_ASSEMBLY */
5417
5418IEM_DECL_IMPL_DEF(void, iemAImpl_fpatan_r80_by_r80_intel,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5419 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
5420{
5421 iemAImpl_fpatan_r80_by_r80(pFpuState, pFpuRes, pr80Val1, pr80Val2);
5422}
5423
5424IEM_DECL_IMPL_DEF(void, iemAImpl_fpatan_r80_by_r80_amd,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5425 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
5426{
5427 iemAImpl_fpatan_r80_by_r80(pFpuState, pFpuRes, pr80Val1, pr80Val2);
5428}
5429
5430
5431#if defined(IEM_WITHOUT_ASSEMBLY)
5432IEM_DECL_IMPL_DEF(void, iemAImpl_fptan_r80_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULTTWO pFpuResTwo, PCRTFLOAT80U pr80Val))
5433{
5434 RT_NOREF(pFpuState, pFpuResTwo, pr80Val);
5435 AssertReleaseFailed();
5436}
5437#endif /* IEM_WITHOUT_ASSEMBLY */
5438
5439IEM_DECL_IMPL_DEF(void, iemAImpl_fptan_r80_r80_amd,(PCX86FXSTATE pFpuState, PIEMFPURESULTTWO pFpuResTwo, PCRTFLOAT80U pr80Val))
5440{
5441 iemAImpl_fptan_r80_r80(pFpuState, pFpuResTwo, pr80Val);
5442}
5443
5444IEM_DECL_IMPL_DEF(void, iemAImpl_fptan_r80_r80_intel,(PCX86FXSTATE pFpuState, PIEMFPURESULTTWO pFpuResTwo, PCRTFLOAT80U pr80Val))
5445{
5446 iemAImpl_fptan_r80_r80(pFpuState, pFpuResTwo, pr80Val);
5447}
5448
5449
5450#ifdef IEM_WITHOUT_ASSEMBLY
5451IEM_DECL_IMPL_DEF(void, iemAImpl_fsin_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
5452{
5453 RT_NOREF(pFpuState, pFpuRes, pr80Val);
5454 AssertReleaseFailed();
5455}
5456#endif /* IEM_WITHOUT_ASSEMBLY */
5457
5458IEM_DECL_IMPL_DEF(void, iemAImpl_fsin_r80_amd,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
5459{
5460 iemAImpl_fsin_r80(pFpuState, pFpuRes, pr80Val);
5461}
5462
5463IEM_DECL_IMPL_DEF(void, iemAImpl_fsin_r80_intel,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
5464{
5465 iemAImpl_fsin_r80(pFpuState, pFpuRes, pr80Val);
5466}
5467
5468#ifdef IEM_WITHOUT_ASSEMBLY
5469IEM_DECL_IMPL_DEF(void, iemAImpl_fsincos_r80_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULTTWO pFpuResTwo, PCRTFLOAT80U pr80Val))
5470{
5471 RT_NOREF(pFpuState, pFpuResTwo, pr80Val);
5472 AssertReleaseFailed();
5473}
5474#endif /* IEM_WITHOUT_ASSEMBLY */
5475
5476IEM_DECL_IMPL_DEF(void, iemAImpl_fsincos_r80_r80_amd,(PCX86FXSTATE pFpuState, PIEMFPURESULTTWO pFpuResTwo, PCRTFLOAT80U pr80Val))
5477{
5478 iemAImpl_fsincos_r80_r80(pFpuState, pFpuResTwo, pr80Val);
5479}
5480
5481IEM_DECL_IMPL_DEF(void, iemAImpl_fsincos_r80_r80_intel,(PCX86FXSTATE pFpuState, PIEMFPURESULTTWO pFpuResTwo, PCRTFLOAT80U pr80Val))
5482{
5483 iemAImpl_fsincos_r80_r80(pFpuState, pFpuResTwo, pr80Val);
5484}
5485
5486
5487#ifdef IEM_WITHOUT_ASSEMBLY
5488IEM_DECL_IMPL_DEF(void, iemAImpl_fcos_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
5489{
5490 RT_NOREF(pFpuState, pFpuRes, pr80Val);
5491 AssertReleaseFailed();
5492}
5493#endif /* IEM_WITHOUT_ASSEMBLY */
5494
5495IEM_DECL_IMPL_DEF(void, iemAImpl_fcos_r80_amd,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
5496{
5497 iemAImpl_fcos_r80(pFpuState, pFpuRes, pr80Val);
5498}
5499
5500IEM_DECL_IMPL_DEF(void, iemAImpl_fcos_r80_intel,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
5501{
5502 iemAImpl_fcos_r80(pFpuState, pFpuRes, pr80Val);
5503}
5504
5505#ifdef IEM_WITHOUT_ASSEMBLY
5506
5507
5508/*********************************************************************************************************************************
5509* x87 FPU Compare and Testing Operations *
5510*********************************************************************************************************************************/
5511
5512IEM_DECL_IMPL_DEF(void, iemAImpl_ftst_r80,(PCX86FXSTATE pFpuState, uint16_t *pu16Fsw, PCRTFLOAT80U pr80Val))
5513{
5514 uint16_t fFsw = (7 << X86_FSW_TOP_SHIFT);
5515
5516 if (RTFLOAT80U_IS_ZERO(pr80Val))
5517 fFsw |= X86_FSW_C3;
5518 else if (RTFLOAT80U_IS_NORMAL(pr80Val) || RTFLOAT80U_IS_INF(pr80Val))
5519 fFsw |= pr80Val->s.fSign ? X86_FSW_C0 : 0;
5520 else if (RTFLOAT80U_IS_DENORMAL_OR_PSEUDO_DENORMAL(pr80Val))
5521 {
5522 fFsw |= pr80Val->s.fSign ? X86_FSW_C0 | X86_FSW_DE : X86_FSW_DE;
5523 if (!(pFpuState->FCW & X86_FCW_DM))
5524 fFsw |= X86_FSW_ES | X86_FSW_B;
5525 }
5526 else
5527 {
5528 fFsw |= X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3 | X86_FSW_IE;
5529 if (!(pFpuState->FCW & X86_FCW_IM))
5530 fFsw |= X86_FSW_ES | X86_FSW_B;
5531 }
5532
5533 *pu16Fsw = fFsw;
5534}
5535
5536
5537IEM_DECL_IMPL_DEF(void, iemAImpl_fxam_r80,(PCX86FXSTATE pFpuState, uint16_t *pu16Fsw, PCRTFLOAT80U pr80Val))
5538{
5539 RT_NOREF(pFpuState);
5540 uint16_t fFsw = (7 << X86_FSW_TOP_SHIFT);
5541
5542 /* C1 = sign bit (always, even if empty Intel says). */
5543 if (pr80Val->s.fSign)
5544 fFsw |= X86_FSW_C1;
5545
5546 /* Classify the value in C0, C2, C3. */
5547 if (!(pFpuState->FTW & RT_BIT_32(X86_FSW_TOP_GET(pFpuState->FSW))))
5548 fFsw |= X86_FSW_C0 | X86_FSW_C3; /* empty */
5549 else if (RTFLOAT80U_IS_NORMAL(pr80Val))
5550 fFsw |= X86_FSW_C2;
5551 else if (RTFLOAT80U_IS_ZERO(pr80Val))
5552 fFsw |= X86_FSW_C3;
5553 else if (RTFLOAT80U_IS_QUIET_OR_SIGNALLING_NAN(pr80Val))
5554 fFsw |= X86_FSW_C0;
5555 else if (RTFLOAT80U_IS_INF(pr80Val))
5556 fFsw |= X86_FSW_C0 | X86_FSW_C2;
5557 else if (RTFLOAT80U_IS_DENORMAL_OR_PSEUDO_DENORMAL(pr80Val))
5558 fFsw |= X86_FSW_C2 | X86_FSW_C3;
5559 /* whatever else: 0 */
5560
5561 *pu16Fsw = fFsw;
5562}
5563
5564
5565IEM_DECL_IMPL_DEF(void, iemAImpl_fcom_r80_by_r32,(PCX86FXSTATE pFpuState, uint16_t *pFSW,
5566 PCRTFLOAT80U pr80Val1, PCRTFLOAT32U pr32Val2))
5567{
5568 RT_NOREF(pFpuState, pFSW, pr80Val1, pr32Val2);
5569 AssertReleaseFailed();
5570}
5571
5572
5573IEM_DECL_IMPL_DEF(void, iemAImpl_fcom_r80_by_r64,(PCX86FXSTATE pFpuState, uint16_t *pFSW,
5574 PCRTFLOAT80U pr80Val1, PCRTFLOAT64U pr64Val2))
5575{
5576 RT_NOREF(pFpuState, pFSW, pr80Val1, pr64Val2);
5577 AssertReleaseFailed();
5578}
5579
5580
5581IEM_DECL_IMPL_DEF(void, iemAImpl_fcom_r80_by_r80,(PCX86FXSTATE pFpuState, uint16_t *pFSW,
5582 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
5583{
5584 RT_NOREF(pFpuState, pFSW, pr80Val1, pr80Val2);
5585 AssertReleaseFailed();
5586}
5587
5588
5589IEM_DECL_IMPL_DEF(uint32_t, iemAImpl_fcomi_r80_by_r80,(PCX86FXSTATE pFpuState, uint16_t *pFSW,
5590 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
5591{
5592 RT_NOREF(pFpuState, pFSW, pr80Val1, pr80Val2);
5593 AssertReleaseFailed();
5594 return 0;
5595}
5596
5597
5598IEM_DECL_IMPL_DEF(void, iemAImpl_fucom_r80_by_r80,(PCX86FXSTATE pFpuState, uint16_t *pFSW,
5599 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
5600{
5601 RT_NOREF(pFpuState, pFSW, pr80Val1, pr80Val2);
5602 AssertReleaseFailed();
5603}
5604
5605
5606IEM_DECL_IMPL_DEF(uint32_t, iemAImpl_fucomi_r80_by_r80,(PCX86FXSTATE pFpuState, uint16_t *pu16Fsw,
5607 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
5608{
5609 RT_NOREF(pFpuState, pu16Fsw, pr80Val1, pr80Val2);
5610 AssertReleaseFailed();
5611 return 0;
5612}
5613
5614
5615IEM_DECL_IMPL_DEF(void, iemAImpl_ficom_r80_by_i16,(PCX86FXSTATE pFpuState, uint16_t *pu16Fsw,
5616 PCRTFLOAT80U pr80Val1, int16_t const *pi16Val2))
5617{
5618 RT_NOREF(pFpuState, pu16Fsw, pr80Val1, pi16Val2);
5619 AssertReleaseFailed();
5620}
5621
5622
5623IEM_DECL_IMPL_DEF(void, iemAImpl_ficom_r80_by_i32,(PCX86FXSTATE pFpuState, uint16_t *pu16Fsw,
5624 PCRTFLOAT80U pr80Val1, int32_t const *pi32Val2))
5625{
5626 RT_NOREF(pFpuState, pu16Fsw, pr80Val1, pi32Val2);
5627 AssertReleaseFailed();
5628}
5629
5630
5631/*********************************************************************************************************************************
5632* x87 FPU Other Operations *
5633*********************************************************************************************************************************/
5634
5635/**
5636 * Helper for iemAImpl_frndint_r80, called both on normal and denormal numbers.
5637 */
5638static uint16_t iemAImpl_frndint_r80_normal(PCRTFLOAT80U pr80Val, PRTFLOAT80U pr80Result, uint16_t fFcw, uint16_t fFsw)
5639{
5640 softfloat_state_t SoftState = IEM_SOFTFLOAT_STATE_INITIALIZER_FROM_FCW(fFcw);
5641 iemFpuSoftF80ToIprt(pr80Result, extF80_roundToInt(iemFpuSoftF80FromIprt(pr80Val), SoftState.roundingMode,
5642 true /*exact / generate #PE */, &SoftState));
5643 return IEM_SOFTFLOAT_STATE_TO_FSW(fFsw, &SoftState, fFcw);
5644}
5645
5646
5647IEM_DECL_IMPL_DEF(void, iemAImpl_frndint_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
5648{
5649 uint16_t const fFcw = pFpuState->FCW;
5650 uint16_t fFsw = (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)) | (7 << X86_FSW_TOP_SHIFT);
5651
5652 if (RTFLOAT80U_IS_NORMAL(pr80Val))
5653 fFsw = iemAImpl_frndint_r80_normal(pr80Val, &pFpuRes->r80Result, fFcw, fFsw);
5654 else if ( RTFLOAT80U_IS_ZERO(pr80Val)
5655 || RTFLOAT80U_IS_QUIET_NAN(pr80Val)
5656 || RTFLOAT80U_IS_INDEFINITE(pr80Val)
5657 || RTFLOAT80U_IS_INF(pr80Val))
5658 pFpuRes->r80Result = *pr80Val;
5659 else if (RTFLOAT80U_IS_DENORMAL_OR_PSEUDO_DENORMAL(pr80Val))
5660 {
5661 fFsw |= X86_FSW_DE;
5662 if (fFcw & X86_FCW_DM)
5663 fFsw = iemAImpl_frndint_r80_normal(pr80Val, &pFpuRes->r80Result, fFcw, fFsw);
5664 else
5665 {
5666 pFpuRes->r80Result = *pr80Val;
5667 fFsw |= X86_FSW_ES | X86_FSW_B;
5668 }
5669 }
5670 else
5671 {
5672 if (fFcw & X86_FCW_IM)
5673 {
5674 if (!RTFLOAT80U_IS_SIGNALLING_NAN(pr80Val))
5675 pFpuRes->r80Result = g_r80Indefinite;
5676 else
5677 {
5678 pFpuRes->r80Result = *pr80Val;
5679 pFpuRes->r80Result.s.uMantissa |= RT_BIT_64(62); /* make it quiet */
5680 }
5681 }
5682 else
5683 {
5684 pFpuRes->r80Result = *pr80Val;
5685 fFsw |= X86_FSW_ES | X86_FSW_B;
5686 }
5687 fFsw |= X86_FSW_IE;
5688 }
5689 pFpuRes->FSW = fFsw;
5690}
5691
5692
5693IEM_DECL_IMPL_DEF(void, iemAImpl_fscale_r80_by_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
5694 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
5695{
5696 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr80Val2);
5697 AssertReleaseFailed();
5698}
5699
5700
5701/**
5702 * Helper for iemAImpl_fsqrt_r80, called both on normal and denormal numbers.
5703 */
5704static uint16_t iemAImpl_fsqrt_r80_normal(PCRTFLOAT80U pr80Val, PRTFLOAT80U pr80Result, uint16_t fFcw, uint16_t fFsw)
5705{
5706 Assert(!pr80Val->s.fSign);
5707 softfloat_state_t SoftState = IEM_SOFTFLOAT_STATE_INITIALIZER_FROM_FCW(fFcw);
5708 iemFpuSoftF80ToIprt(pr80Result, extF80_sqrt(iemFpuSoftF80FromIprt(pr80Val), &SoftState));
5709 return IEM_SOFTFLOAT_STATE_TO_FSW(fFsw, &SoftState, fFcw);
5710}
5711
5712
5713IEM_DECL_IMPL_DEF(void, iemAImpl_fsqrt_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
5714{
5715 uint16_t const fFcw = pFpuState->FCW;
5716 uint16_t fFsw = (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)) | (7 << X86_FSW_TOP_SHIFT);
5717
5718 if (RTFLOAT80U_IS_NORMAL(pr80Val) && !pr80Val->s.fSign)
5719 fFsw = iemAImpl_fsqrt_r80_normal(pr80Val, &pFpuRes->r80Result, fFcw, fFsw);
5720 else if ( RTFLOAT80U_IS_ZERO(pr80Val)
5721 || RTFLOAT80U_IS_QUIET_NAN(pr80Val)
5722 || RTFLOAT80U_IS_INDEFINITE(pr80Val)
5723 || (RTFLOAT80U_IS_INF(pr80Val) && !pr80Val->s.fSign))
5724 pFpuRes->r80Result = *pr80Val;
5725 else if (RTFLOAT80U_IS_DENORMAL_OR_PSEUDO_DENORMAL(pr80Val) && !pr80Val->s.fSign) /* Negative denormals only generate #IE! */
5726 {
5727 fFsw |= X86_FSW_DE;
5728 if (fFcw & X86_FCW_DM)
5729 fFsw = iemAImpl_fsqrt_r80_normal(pr80Val, &pFpuRes->r80Result, fFcw, fFsw);
5730 else
5731 {
5732 pFpuRes->r80Result = *pr80Val;
5733 fFsw |= X86_FSW_ES | X86_FSW_B;
5734 }
5735 }
5736 else
5737 {
5738 if (fFcw & X86_FCW_IM)
5739 {
5740 if (!RTFLOAT80U_IS_SIGNALLING_NAN(pr80Val))
5741 pFpuRes->r80Result = g_r80Indefinite;
5742 else
5743 {
5744 pFpuRes->r80Result = *pr80Val;
5745 pFpuRes->r80Result.s.uMantissa |= RT_BIT_64(62); /* make it quiet */
5746 }
5747 }
5748 else
5749 {
5750 pFpuRes->r80Result = *pr80Val;
5751 fFsw |= X86_FSW_ES | X86_FSW_B;
5752 }
5753 fFsw |= X86_FSW_IE;
5754 }
5755 pFpuRes->FSW = fFsw;
5756}
5757
5758
5759/**
5760 * @code{.unparsed}
5761 * x x * ln2
5762 * f(x) = 2 - 1 = e - 1
5763 *
5764 * @endcode
5765 *
5766 * We can approximate e^x by a Taylor/Maclaurin series (see
5767 * https://en.wikipedia.org/wiki/Taylor_series#Exponential_function):
5768 * @code{.unparsed}
5769 * n 0 1 2 3 4
5770 * inf x x x x x x
5771 * SUM ----- = --- + --- + --- + --- + --- + ...
5772 * n=0 n! 0! 1! 2! 3! 4!
5773 *
5774 * 2 3 4
5775 * x x x
5776 * = 1 + x + --- + --- + --- + ...
5777 * 2! 3! 4!
5778 * @endcode
5779 *
5780 * Given z = x * ln2, we get:
5781 * @code{.unparsed}
5782 * 2 3 4 n
5783 * z z z z z
5784 * e - 1 = z + --- + --- + --- + ... + ---
5785 * 2! 3! 4! n!
5786 * @endcode
5787 *
5788 * Wanting to use Horner's method, we move one z outside and get:
5789 * @code{.unparsed}
5790 * 2 3 (n-1)
5791 * z z z z
5792 * = z ( 1 + --- + --- + --- + ... + ------- )
5793 * 2! 3! 4! n!
5794 * @endcode
5795 *
5796 * The constants we need for using Horner's methods are 1 and 1 / n!.
5797 *
5798 * For very tiny x values, we can get away with f(x) = x * ln 2, because
5799 * because we don't have the necessary precision to represent 1.0 + z/3 + ...
5800 * and can approximate it to be 1.0. For a visual demonstration of this
5801 * check out https://www.desmos.com/calculator/vidcdxizd9 (for as long
5802 * as it valid), plotting f(x) = 2^x - 1 and f(x) = x * ln2.
5803 *
5804 *
5805 * As constant accuracy goes, figure 0.1 "80387 Block Diagram" in the "80387
5806 * Data Sheet" (order 231920-002; Appendix E in 80387 PRM 231917-001; Military
5807 * i387SX 271166-002), indicates that constants are 67-bit (constant rom block)
5808 * and the internal mantissa size is 68-bit (mantissa adder & barrel shifter
5809 * blocks). (The one bit difference is probably an implicit one missing from
5810 * the constant ROM.) A paper on division and sqrt on the AMD-K7 by Stuart F.
5811 * Oberman states that it internally used a 68 bit mantissa with a 18-bit
5812 * exponent.
5813 *
5814 * However, even when sticking to 67 constants / 68 mantissas, I have not yet
5815 * successfully reproduced the exact results from an Intel 10980XE, there is
5816 * always a portition of rounding differences. Not going to spend too much time
5817 * on getting this 100% the same, at least not now.
5818 *
5819 * P.S. If someone are really curious about 8087 and its contstants:
5820 * http://www.righto.com/2020/05/extracting-rom-constants-from-8087-math.html
5821 *
5822 *
5823 * @param pr80Val The exponent value (x), less than 1.0, greater than
5824 * -1.0 and not zero. This can be a normal, denormal
5825 * or pseudo-denormal value.
5826 * @param pr80Result Where to return the result.
5827 * @param fFcw FPU control word.
5828 * @param fFsw FPU status word.
5829 */
5830static uint16_t iemAImpl_f2xm1_r80_normal(PCRTFLOAT80U pr80Val, PRTFLOAT80U pr80Result, uint16_t fFcw, uint16_t fFsw)
5831{
5832 /* As mentioned above, we can skip the expensive polynomial calculation
5833 as it will be close enough to 1.0 that it makes no difference.
5834
5835 The cutoff point for intel 10980XE is exponents >= -69. Intel
5836 also seems to be using a 67-bit or 68-bit constant value, and we get
5837 a smattering of rounding differences if we go for higher precision. */
5838 if (pr80Val->s.uExponent <= RTFLOAT80U_EXP_BIAS - 69)
5839 {
5840 RTUINT256U u256;
5841 RTUInt128MulByU64Ex(&u256, &g_u128Ln2MantissaIntel, pr80Val->s.uMantissa);
5842 u256.QWords.qw0 |= 1; /* force #PE */
5843 fFsw = iemFpuFloat80RoundAndComposeFrom192(pr80Result, pr80Val->s.fSign, &u256,
5844 !RTFLOAT80U_IS_PSEUDO_DENORMAL(pr80Val) && !RTFLOAT80U_IS_DENORMAL(pr80Val)
5845 ? (int32_t)pr80Val->s.uExponent - RTFLOAT80U_EXP_BIAS
5846 : 1 - RTFLOAT80U_EXP_BIAS,
5847 fFcw, fFsw);
5848 }
5849 else
5850 {
5851#ifdef IEM_WITH_FLOAT128_FOR_FPU
5852 /* This approach is not good enough for small values, we end up with zero. */
5853 int const fOldRounding = iemFpuF128SetRounding(fFcw);
5854 _Float128 rd128Val = iemFpuF128FromFloat80(pr80Val, fFcw);
5855 _Float128 rd128Result = powf128(2.0L, rd128Val);
5856 rd128Result -= 1.0L;
5857 fFsw = iemFpuF128ToFloat80(pr80Result, rd128Result, fFcw, fFsw);
5858 iemFpuF128RestoreRounding(fOldRounding);
5859
5860# else
5861 softfloat_state_t SoftState = SOFTFLOAT_STATE_INIT_DEFAULTS();
5862 float128_t const x = iemFpuSoftF128FromFloat80(pr80Val);
5863
5864 /* As mentioned above, enforce 68-bit internal mantissa width to better
5865 match the Intel 10980XE results. */
5866 unsigned const cPrecision = 68;
5867
5868 /* first calculate z = x * ln2 */
5869 float128_t z = iemFpuSoftF128Precision(f128_mul(x, iemFpuSoftF128PrecisionIprt(&g_r128Ln2, cPrecision), &SoftState),
5870 cPrecision);
5871
5872 /* Then do the polynomial evaluation. */
5873 float128_t r = iemFpuSoftF128HornerPoly(z, g_ar128F2xm1HornerConsts, RT_ELEMENTS(g_ar128F2xm1HornerConsts),
5874 cPrecision, &SoftState);
5875 r = f128_mul(z, r, &SoftState);
5876
5877 /* Output the result. */
5878 fFsw = iemFpuSoftF128ToFloat80(pr80Result, r, fFcw, fFsw);
5879# endif
5880 }
5881 return fFsw;
5882}
5883
5884
5885IEM_DECL_IMPL_DEF(void, iemAImpl_f2xm1_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
5886{
5887 uint16_t const fFcw = pFpuState->FCW;
5888 uint16_t fFsw = (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)) | (7 << X86_FSW_TOP_SHIFT);
5889
5890 if (RTFLOAT80U_IS_NORMAL(pr80Val))
5891 {
5892 if (pr80Val->s.uExponent < RTFLOAT80U_EXP_BIAS)
5893 fFsw = iemAImpl_f2xm1_r80_normal(pr80Val, &pFpuRes->r80Result, fFcw, fFsw);
5894 else
5895 {
5896 /* Special case:
5897 2^+1.0 - 1.0 = 1.0
5898 2^-1.0 - 1.0 = -0.5 */
5899 if ( pr80Val->s.uExponent == RTFLOAT80U_EXP_BIAS
5900 && pr80Val->s.uMantissa == RT_BIT_64(63))
5901 {
5902 pFpuRes->r80Result.s.uMantissa = RT_BIT_64(63);
5903 pFpuRes->r80Result.s.uExponent = RTFLOAT80U_EXP_BIAS - pr80Val->s.fSign;
5904 pFpuRes->r80Result.s.fSign = pr80Val->s.fSign;
5905 }
5906 /* ST(0) > 1.0 || ST(0) < -1.0: undefined behavior */
5907 /** @todo 287 is documented to only accept values 0 <= ST(0) <= 0.5. */
5908 else
5909 pFpuRes->r80Result = *pr80Val;
5910 fFsw |= X86_FSW_PE;
5911 if (!(fFcw & X86_FCW_PM))
5912 fFsw |= X86_FSW_ES | X86_FSW_B;
5913 }
5914 }
5915 else if ( RTFLOAT80U_IS_ZERO(pr80Val)
5916 || RTFLOAT80U_IS_QUIET_NAN(pr80Val)
5917 || RTFLOAT80U_IS_INDEFINITE(pr80Val))
5918 pFpuRes->r80Result = *pr80Val;
5919 else if (RTFLOAT80U_IS_INF(pr80Val))
5920 pFpuRes->r80Result = pr80Val->s.fSign ? g_ar80One[1] : *pr80Val;
5921 else if (RTFLOAT80U_IS_DENORMAL_OR_PSEUDO_DENORMAL(pr80Val))
5922 {
5923 fFsw |= X86_FSW_DE;
5924 if (fFcw & X86_FCW_DM)
5925 fFsw = iemAImpl_f2xm1_r80_normal(pr80Val, &pFpuRes->r80Result, fFcw, fFsw);
5926 else
5927 {
5928 pFpuRes->r80Result = *pr80Val;
5929 fFsw |= X86_FSW_ES | X86_FSW_B;
5930 }
5931 }
5932 else
5933 {
5934 if ( ( RTFLOAT80U_IS_UNNORMAL(pr80Val)
5935 || RTFLOAT80U_IS_PSEUDO_NAN(pr80Val))
5936 && (fFcw & X86_FCW_IM))
5937 pFpuRes->r80Result = g_r80Indefinite;
5938 else
5939 {
5940 pFpuRes->r80Result = *pr80Val;
5941 if (RTFLOAT80U_IS_SIGNALLING_NAN(pr80Val) && (fFcw & X86_FCW_IM))
5942 pFpuRes->r80Result.s.uMantissa |= RT_BIT_64(62); /* make it quiet */
5943 }
5944 fFsw |= X86_FSW_IE;
5945 if (!(fFcw & X86_FCW_IM))
5946 fFsw |= X86_FSW_ES | X86_FSW_B;
5947 }
5948 pFpuRes->FSW = fFsw;
5949}
5950
5951#endif /* IEM_WITHOUT_ASSEMBLY */
5952
5953IEM_DECL_IMPL_DEF(void, iemAImpl_f2xm1_r80_amd,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
5954{
5955 iemAImpl_f2xm1_r80(pFpuState, pFpuRes, pr80Val);
5956}
5957
5958IEM_DECL_IMPL_DEF(void, iemAImpl_f2xm1_r80_intel,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
5959{
5960 iemAImpl_f2xm1_r80(pFpuState, pFpuRes, pr80Val);
5961}
5962
5963#ifdef IEM_WITHOUT_ASSEMBLY
5964
5965IEM_DECL_IMPL_DEF(void, iemAImpl_fabs_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
5966{
5967 pFpuRes->FSW = (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)) | (7 << X86_FSW_TOP_SHIFT);
5968 pFpuRes->r80Result = *pr80Val;
5969 pFpuRes->r80Result.s.fSign = 0;
5970}
5971
5972
5973IEM_DECL_IMPL_DEF(void, iemAImpl_fchs_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes, PCRTFLOAT80U pr80Val))
5974{
5975 pFpuRes->FSW = (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)) | (7 << X86_FSW_TOP_SHIFT);
5976 pFpuRes->r80Result = *pr80Val;
5977 pFpuRes->r80Result.s.fSign = !pr80Val->s.fSign;
5978}
5979
5980
5981IEM_DECL_IMPL_DEF(void, iemAImpl_fxtract_r80_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULTTWO pFpuResTwo, PCRTFLOAT80U pr80Val))
5982{
5983 uint16_t const fFcw = pFpuState->FCW;
5984 uint16_t fFsw = (pFpuState->FSW & (X86_FSW_C0 | X86_FSW_C2 | X86_FSW_C3)) | (6 << X86_FSW_TOP_SHIFT);
5985
5986 if (RTFLOAT80U_IS_NORMAL(pr80Val))
5987 {
5988 softfloat_state_t Ignored = SOFTFLOAT_STATE_INIT_DEFAULTS();
5989 iemFpuSoftF80ToIprt(&pFpuResTwo->r80Result1, i32_to_extF80((int32_t)pr80Val->s.uExponent - RTFLOAT80U_EXP_BIAS, &Ignored));
5990
5991 pFpuResTwo->r80Result2.s.fSign = pr80Val->s.fSign;
5992 pFpuResTwo->r80Result2.s.uExponent = RTFLOAT80U_EXP_BIAS;
5993 pFpuResTwo->r80Result2.s.uMantissa = pr80Val->s.uMantissa;
5994 }
5995 else if (RTFLOAT80U_IS_ZERO(pr80Val))
5996 {
5997 fFsw |= X86_FSW_ZE;
5998 if (fFcw & X86_FCW_ZM)
5999 {
6000 pFpuResTwo->r80Result1 = g_ar80Infinity[1];
6001 pFpuResTwo->r80Result2 = *pr80Val;
6002 }
6003 else
6004 {
6005 pFpuResTwo->r80Result2 = *pr80Val;
6006 fFsw = X86_FSW_ES | X86_FSW_B | (fFsw & ~X86_FSW_TOP_MASK) | (7 << X86_FSW_TOP_SHIFT);
6007 }
6008 }
6009 else if (RTFLOAT80U_IS_DENORMAL_OR_PSEUDO_DENORMAL(pr80Val))
6010 {
6011 fFsw |= X86_FSW_DE;
6012 if (fFcw & X86_FCW_DM)
6013 {
6014 pFpuResTwo->r80Result2.s.fSign = pr80Val->s.fSign;
6015 pFpuResTwo->r80Result2.s.uExponent = RTFLOAT80U_EXP_BIAS;
6016 pFpuResTwo->r80Result2.s.uMantissa = pr80Val->s.uMantissa;
6017 int32_t iExponent = -16382;
6018 while (!(pFpuResTwo->r80Result2.s.uMantissa & RT_BIT_64(63)))
6019 {
6020 pFpuResTwo->r80Result2.s.uMantissa <<= 1;
6021 iExponent--;
6022 }
6023
6024 softfloat_state_t Ignored = SOFTFLOAT_STATE_INIT_DEFAULTS();
6025 iemFpuSoftF80ToIprt(&pFpuResTwo->r80Result1, i32_to_extF80(iExponent, &Ignored));
6026 }
6027 else
6028 {
6029 pFpuResTwo->r80Result2 = *pr80Val;
6030 fFsw = X86_FSW_ES | X86_FSW_B | (fFsw & ~X86_FSW_TOP_MASK) | (7 << X86_FSW_TOP_SHIFT);
6031 }
6032 }
6033 else if ( RTFLOAT80U_IS_QUIET_NAN(pr80Val)
6034 || RTFLOAT80U_IS_INDEFINITE(pr80Val))
6035 {
6036 pFpuResTwo->r80Result1 = *pr80Val;
6037 pFpuResTwo->r80Result2 = *pr80Val;
6038 }
6039 else if (RTFLOAT80U_IS_INF(pr80Val))
6040 {
6041 pFpuResTwo->r80Result1 = g_ar80Infinity[0];
6042 pFpuResTwo->r80Result2 = *pr80Val;
6043 }
6044 else
6045 {
6046 if (fFcw & X86_FCW_IM)
6047 {
6048 if (!RTFLOAT80U_IS_SIGNALLING_NAN(pr80Val))
6049 pFpuResTwo->r80Result1 = g_r80Indefinite;
6050 else
6051 {
6052 pFpuResTwo->r80Result1 = *pr80Val;
6053 pFpuResTwo->r80Result1.s.uMantissa |= RT_BIT_64(62); /* make it quiet */
6054 }
6055 pFpuResTwo->r80Result2 = pFpuResTwo->r80Result1;
6056 }
6057 else
6058 {
6059 pFpuResTwo->r80Result2 = *pr80Val;
6060 fFsw = X86_FSW_ES | X86_FSW_B | (fFsw & ~X86_FSW_TOP_MASK) | (7 << X86_FSW_TOP_SHIFT);
6061 }
6062 fFsw |= X86_FSW_IE;
6063 }
6064 pFpuResTwo->FSW = fFsw;
6065}
6066
6067
6068IEM_DECL_IMPL_DEF(void, iemAImpl_fyl2x_r80_by_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
6069 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
6070{
6071 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr80Val2);
6072 AssertReleaseFailed();
6073}
6074
6075#endif /* IEM_WITHOUT_ASSEMBLY */
6076
6077IEM_DECL_IMPL_DEF(void, iemAImpl_fyl2x_r80_by_r80_intel,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
6078 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
6079{
6080 iemAImpl_fyl2x_r80_by_r80(pFpuState, pFpuRes, pr80Val1, pr80Val2);
6081}
6082
6083IEM_DECL_IMPL_DEF(void, iemAImpl_fyl2x_r80_by_r80_amd,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
6084 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
6085{
6086 iemAImpl_fyl2x_r80_by_r80(pFpuState, pFpuRes, pr80Val1, pr80Val2);
6087}
6088
6089#if defined(IEM_WITHOUT_ASSEMBLY)
6090
6091IEM_DECL_IMPL_DEF(void, iemAImpl_fyl2xp1_r80_by_r80,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
6092 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
6093{
6094 RT_NOREF(pFpuState, pFpuRes, pr80Val1, pr80Val2);
6095 AssertReleaseFailed();
6096}
6097
6098#endif /* IEM_WITHOUT_ASSEMBLY */
6099
6100IEM_DECL_IMPL_DEF(void, iemAImpl_fyl2xp1_r80_by_r80_intel,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
6101 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
6102{
6103 iemAImpl_fyl2xp1_r80_by_r80(pFpuState, pFpuRes, pr80Val1, pr80Val2);
6104}
6105
6106IEM_DECL_IMPL_DEF(void, iemAImpl_fyl2xp1_r80_by_r80_amd,(PCX86FXSTATE pFpuState, PIEMFPURESULT pFpuRes,
6107 PCRTFLOAT80U pr80Val1, PCRTFLOAT80U pr80Val2))
6108{
6109 iemAImpl_fyl2xp1_r80_by_r80(pFpuState, pFpuRes, pr80Val1, pr80Val2);
6110}
6111
6112
6113/*********************************************************************************************************************************
6114* MMX, SSE & AVX *
6115*********************************************************************************************************************************/
6116
6117IEM_DECL_IMPL_DEF(void, iemAImpl_movsldup,(PCX86FXSTATE pFpuState, PRTUINT128U puDst, PCRTUINT128U puSrc))
6118{
6119 RT_NOREF(pFpuState);
6120 puDst->au32[0] = puSrc->au32[0];
6121 puDst->au32[1] = puSrc->au32[0];
6122 puDst->au32[2] = puSrc->au32[2];
6123 puDst->au32[3] = puSrc->au32[2];
6124}
6125
6126#ifdef IEM_WITH_VEX
6127
6128IEM_DECL_IMPL_DEF(void, iemAImpl_vmovsldup_256_rr,(PX86XSAVEAREA pXState, uint8_t iYRegDst, uint8_t iYRegSrc))
6129{
6130 pXState->x87.aXMM[iYRegDst].au32[0] = pXState->x87.aXMM[iYRegSrc].au32[0];
6131 pXState->x87.aXMM[iYRegDst].au32[1] = pXState->x87.aXMM[iYRegSrc].au32[0];
6132 pXState->x87.aXMM[iYRegDst].au32[2] = pXState->x87.aXMM[iYRegSrc].au32[2];
6133 pXState->x87.aXMM[iYRegDst].au32[3] = pXState->x87.aXMM[iYRegSrc].au32[2];
6134 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[0] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au32[0];
6135 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[1] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au32[0];
6136 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[2] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au32[2];
6137 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[3] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au32[2];
6138}
6139
6140
6141IEM_DECL_IMPL_DEF(void, iemAImpl_vmovsldup_256_rm,(PX86XSAVEAREA pXState, uint8_t iYRegDst, PCRTUINT256U pSrc))
6142{
6143 pXState->x87.aXMM[iYRegDst].au32[0] = pSrc->au32[0];
6144 pXState->x87.aXMM[iYRegDst].au32[1] = pSrc->au32[0];
6145 pXState->x87.aXMM[iYRegDst].au32[2] = pSrc->au32[2];
6146 pXState->x87.aXMM[iYRegDst].au32[3] = pSrc->au32[2];
6147 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[0] = pSrc->au32[4];
6148 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[1] = pSrc->au32[4];
6149 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[2] = pSrc->au32[6];
6150 pXState->u.YmmHi.aYmmHi[iYRegDst].au32[3] = pSrc->au32[6];
6151}
6152
6153#endif /* IEM_WITH_VEX */
6154
6155
6156IEM_DECL_IMPL_DEF(void, iemAImpl_movshdup,(PCX86FXSTATE pFpuState, PRTUINT128U puDst, PCRTUINT128U puSrc))
6157{
6158 RT_NOREF(pFpuState);
6159 puDst->au32[0] = puSrc->au32[1];
6160 puDst->au32[1] = puSrc->au32[1];
6161 puDst->au32[2] = puSrc->au32[3];
6162 puDst->au32[3] = puSrc->au32[3];
6163}
6164
6165
6166IEM_DECL_IMPL_DEF(void, iemAImpl_movddup,(PCX86FXSTATE pFpuState, PRTUINT128U puDst, uint64_t uSrc))
6167{
6168 RT_NOREF(pFpuState);
6169 puDst->au64[0] = uSrc;
6170 puDst->au64[1] = uSrc;
6171}
6172
6173#ifdef IEM_WITH_VEX
6174
6175IEM_DECL_IMPL_DEF(void, iemAImpl_vmovddup_256_rr,(PX86XSAVEAREA pXState, uint8_t iYRegDst, uint8_t iYRegSrc))
6176{
6177 pXState->x87.aXMM[iYRegDst].au64[0] = pXState->x87.aXMM[iYRegSrc].au64[0];
6178 pXState->x87.aXMM[iYRegDst].au64[1] = pXState->x87.aXMM[iYRegSrc].au64[0];
6179 pXState->u.YmmHi.aYmmHi[iYRegDst].au64[0] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au64[0];
6180 pXState->u.YmmHi.aYmmHi[iYRegDst].au64[1] = pXState->u.YmmHi.aYmmHi[iYRegSrc].au64[0];
6181}
6182
6183IEM_DECL_IMPL_DEF(void, iemAImpl_vmovddup_256_rm,(PX86XSAVEAREA pXState, uint8_t iYRegDst, PCRTUINT256U pSrc))
6184{
6185 pXState->x87.aXMM[iYRegDst].au64[0] = pSrc->au64[0];
6186 pXState->x87.aXMM[iYRegDst].au64[1] = pSrc->au64[0];
6187 pXState->u.YmmHi.aYmmHi[iYRegDst].au64[0] = pSrc->au64[2];
6188 pXState->u.YmmHi.aYmmHi[iYRegDst].au64[1] = pSrc->au64[2];
6189}
6190
6191#endif /* IEM_WITH_VEX */
6192
6193#ifdef IEM_WITHOUT_ASSEMBLY
6194
6195IEM_DECL_IMPL_DEF(void, iemAImpl_pcmpeqb_u64,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint64_t const *pu64Src))
6196{
6197 RT_NOREF(pFpuState, pu64Dst, pu64Src);
6198 AssertReleaseFailed();
6199}
6200
6201
6202IEM_DECL_IMPL_DEF(void, iemAImpl_pcmpeqb_u128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, PCRTUINT128U pu128Src))
6203{
6204 RT_NOREF(pFpuState, pu128Dst, pu128Src);
6205 AssertReleaseFailed();
6206}
6207
6208
6209IEM_DECL_IMPL_DEF(void, iemAImpl_pcmpeqw_u64,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint64_t const *pu64Src))
6210{
6211 RT_NOREF(pFpuState, pu64Dst, pu64Src);
6212 AssertReleaseFailed();
6213}
6214
6215
6216IEM_DECL_IMPL_DEF(void, iemAImpl_pcmpeqw_u128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, PCRTUINT128U pu128Src))
6217{
6218 RT_NOREF(pFpuState, pu128Dst, pu128Src);
6219 AssertReleaseFailed();
6220}
6221
6222
6223IEM_DECL_IMPL_DEF(void, iemAImpl_pcmpeqd_u64,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint64_t const *pu64Src))
6224{
6225 RT_NOREF(pFpuState, pu64Dst, pu64Src);
6226 AssertReleaseFailed();
6227}
6228
6229
6230IEM_DECL_IMPL_DEF(void, iemAImpl_pcmpeqd_u128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, PCRTUINT128U pu128Src))
6231{
6232 RT_NOREF(pFpuState, pu128Dst, pu128Src);
6233 AssertReleaseFailed();
6234}
6235
6236
6237IEM_DECL_IMPL_DEF(void, iemAImpl_pxor_u64,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint64_t const *pu64Src))
6238{
6239 RT_NOREF(pFpuState, pu64Dst, pu64Src);
6240 AssertReleaseFailed();
6241}
6242
6243
6244IEM_DECL_IMPL_DEF(void, iemAImpl_pxor_u128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, PCRTUINT128U pu128Src))
6245{
6246 RT_NOREF(pFpuState, pu128Dst, pu128Src);
6247 AssertReleaseFailed();
6248}
6249
6250
6251IEM_DECL_IMPL_DEF(void, iemAImpl_pmovmskb_u64,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint64_t const *pu64Src))
6252{
6253 RT_NOREF(pFpuState, pu64Dst, pu64Src);
6254 AssertReleaseFailed();
6255
6256}
6257
6258
6259IEM_DECL_IMPL_DEF(void, iemAImpl_pmovmskb_u128,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, PCRTUINT128U pu128Src))
6260{
6261 RT_NOREF(pFpuState, pu64Dst, pu128Src);
6262 AssertReleaseFailed();
6263}
6264
6265
6266IEM_DECL_IMPL_DEF(void, iemAImpl_pshufw,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint64_t const *pu64Src, uint8_t bEvil))
6267{
6268 RT_NOREF(pFpuState, pu64Dst, pu64Src, bEvil);
6269 AssertReleaseFailed();
6270}
6271
6272
6273IEM_DECL_IMPL_DEF(void, iemAImpl_pshufhw,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, PCRTUINT128U pu128Src, uint8_t bEvil))
6274{
6275 RT_NOREF(pFpuState, pu128Dst, pu128Src, bEvil);
6276 AssertReleaseFailed();
6277}
6278
6279
6280IEM_DECL_IMPL_DEF(void, iemAImpl_pshuflw,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, PCRTUINT128U pu128Src, uint8_t bEvil))
6281{
6282 RT_NOREF(pFpuState, pu128Dst, pu128Src, bEvil);
6283 AssertReleaseFailed();
6284}
6285
6286
6287IEM_DECL_IMPL_DEF(void, iemAImpl_pshufd,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, PCRTUINT128U pu128Src, uint8_t bEvil))
6288{
6289 RT_NOREF(pFpuState, pu128Dst, pu128Src, bEvil);
6290 AssertReleaseFailed();
6291}
6292
6293/* PUNPCKHxxx */
6294
6295IEM_DECL_IMPL_DEF(void, iemAImpl_punpckhbw_u64,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint64_t const *pu64Src))
6296{
6297 RT_NOREF(pFpuState, pu64Dst, pu64Src);
6298 AssertReleaseFailed();
6299}
6300
6301
6302IEM_DECL_IMPL_DEF(void, iemAImpl_punpckhbw_u128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, PCRTUINT128U pu128Src))
6303{
6304 RT_NOREF(pFpuState, pu128Dst, pu128Src);
6305 AssertReleaseFailed();
6306}
6307
6308
6309IEM_DECL_IMPL_DEF(void, iemAImpl_punpckhwd_u64,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint64_t const *pu64Src))
6310{
6311 RT_NOREF(pFpuState, pu64Dst, pu64Src);
6312 AssertReleaseFailed();
6313}
6314
6315
6316IEM_DECL_IMPL_DEF(void, iemAImpl_punpckhwd_u128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, PCRTUINT128U pu128Src))
6317{
6318 RT_NOREF(pFpuState, pu128Dst, pu128Src);
6319 AssertReleaseFailed();
6320}
6321
6322
6323IEM_DECL_IMPL_DEF(void, iemAImpl_punpckhdq_u64,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint64_t const *pu64Src))
6324{
6325 RT_NOREF(pFpuState, pu64Dst, pu64Src);
6326 AssertReleaseFailed();
6327}
6328
6329
6330IEM_DECL_IMPL_DEF(void, iemAImpl_punpckhdq_u128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, PCRTUINT128U pu128Src))
6331{
6332 RT_NOREF(pFpuState, pu128Dst, pu128Src);
6333 AssertReleaseFailed();
6334}
6335
6336
6337IEM_DECL_IMPL_DEF(void, iemAImpl_punpckhqdq_u128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, PCRTUINT128U pu128Src))
6338{
6339 RT_NOREF(pFpuState, pu128Dst, pu128Src);
6340 AssertReleaseFailed();
6341}
6342
6343/* PUNPCKLxxx */
6344
6345IEM_DECL_IMPL_DEF(void, iemAImpl_punpcklbw_u64,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint32_t const *pu32Src))
6346{
6347 RT_NOREF(pFpuState, pu64Dst, pu32Src);
6348 AssertReleaseFailed();
6349}
6350
6351
6352IEM_DECL_IMPL_DEF(void, iemAImpl_punpcklbw_u128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, uint64_t const *pu64Src))
6353{
6354 RT_NOREF(pFpuState, pu128Dst, pu64Src);
6355 AssertReleaseFailed();
6356}
6357
6358
6359IEM_DECL_IMPL_DEF(void, iemAImpl_punpcklwd_u64,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint32_t const *pu32Src))
6360{
6361 RT_NOREF(pFpuState, pu64Dst, pu32Src);
6362 AssertReleaseFailed();
6363}
6364
6365
6366IEM_DECL_IMPL_DEF(void, iemAImpl_punpcklwd_u128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, uint64_t const *pu64Src))
6367{
6368 RT_NOREF(pFpuState, pu128Dst, pu64Src);
6369 AssertReleaseFailed();
6370}
6371
6372
6373IEM_DECL_IMPL_DEF(void, iemAImpl_punpckldq_u64,(PCX86FXSTATE pFpuState, uint64_t *pu64Dst, uint32_t const *pu32Src))
6374{
6375 RT_NOREF(pFpuState, pu64Dst, pu32Src);
6376 AssertReleaseFailed();
6377}
6378
6379
6380IEM_DECL_IMPL_DEF(void, iemAImpl_punpckldq_u128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, uint64_t const *pu64Src))
6381{
6382 RT_NOREF(pFpuState, pu128Dst, pu64Src);
6383 AssertReleaseFailed();
6384}
6385
6386
6387IEM_DECL_IMPL_DEF(void, iemAImpl_punpcklqdq_u128,(PCX86FXSTATE pFpuState, PRTUINT128U pu128Dst, uint64_t const *pu64Src))
6388{
6389 RT_NOREF(pFpuState, pu128Dst, pu64Src);
6390 AssertReleaseFailed();
6391}
6392
6393#endif /* IEM_WITHOUT_ASSEMBLY */
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette