VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllAImpl.asm@ 94620

Last change on this file since 94620 was 94440, checked in by vboxsync, 3 years ago

VMM/IEM: fxam does not raise any exceptions and has special classification result for an empty ST(0) register. bugref:9898

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 97.1 KB
Line 
1; $Id: IEMAllAImpl.asm 94440 2022-04-01 14:32:23Z vboxsync $
2;; @file
3; IEM - Instruction Implementation in Assembly.
4;
5
6;
7; Copyright (C) 2011-2022 Oracle Corporation
8;
9; This file is part of VirtualBox Open Source Edition (OSE), as
10; available from http://www.virtualbox.org. This file is free software;
11; you can redistribute it and/or modify it under the terms of the GNU
12; General Public License (GPL) as published by the Free Software
13; Foundation, in version 2 as it comes in the "COPYING" file of the
14; VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16;
17
18
19;*********************************************************************************************************************************
20;* Header Files *
21;*********************************************************************************************************************************
22%include "VBox/asmdefs.mac"
23%include "VBox/err.mac"
24%include "iprt/x86.mac"
25
26
27;*********************************************************************************************************************************
28;* Defined Constants And Macros *
29;*********************************************************************************************************************************
30
31;;
32; RET XX / RET wrapper for fastcall.
33;
34%macro RET_FASTCALL 1
35%ifdef RT_ARCH_X86
36 %ifdef RT_OS_WINDOWS
37 ret %1
38 %else
39 ret
40 %endif
41%else
42 ret
43%endif
44%endmacro
45
46;;
47; NAME for fastcall functions.
48;
49;; @todo 'global @fastcall@12' is still broken in yasm and requires dollar
50; escaping (or whatever the dollar is good for here). Thus the ugly
51; prefix argument.
52;
53%define NAME_FASTCALL(a_Name, a_cbArgs, a_Prefix) NAME(a_Name)
54%ifdef RT_ARCH_X86
55 %ifdef RT_OS_WINDOWS
56 %undef NAME_FASTCALL
57 %define NAME_FASTCALL(a_Name, a_cbArgs, a_Prefix) a_Prefix %+ a_Name %+ @ %+ a_cbArgs
58 %endif
59%endif
60
61;;
62; BEGINPROC for fastcall functions.
63;
64; @param 1 The function name (C).
65; @param 2 The argument size on x86.
66;
67%macro BEGINPROC_FASTCALL 2
68 %ifdef ASM_FORMAT_PE
69 export %1=NAME_FASTCALL(%1,%2,$@)
70 %endif
71 %ifdef __NASM__
72 %ifdef ASM_FORMAT_OMF
73 export NAME(%1) NAME_FASTCALL(%1,%2,$@)
74 %endif
75 %endif
76 %ifndef ASM_FORMAT_BIN
77 global NAME_FASTCALL(%1,%2,$@)
78 %endif
79NAME_FASTCALL(%1,%2,@):
80%endmacro
81
82
83;
84; We employ some macro assembly here to hid the calling convention differences.
85;
86%ifdef RT_ARCH_AMD64
87 %macro PROLOGUE_1_ARGS 0
88 %endmacro
89 %macro EPILOGUE_1_ARGS 0
90 ret
91 %endmacro
92 %macro EPILOGUE_1_ARGS_EX 0
93 ret
94 %endmacro
95
96 %macro PROLOGUE_2_ARGS 0
97 %endmacro
98 %macro EPILOGUE_2_ARGS 0
99 ret
100 %endmacro
101 %macro EPILOGUE_2_ARGS_EX 1
102 ret
103 %endmacro
104
105 %macro PROLOGUE_3_ARGS 0
106 %endmacro
107 %macro EPILOGUE_3_ARGS 0
108 ret
109 %endmacro
110 %macro EPILOGUE_3_ARGS_EX 1
111 ret
112 %endmacro
113
114 %macro PROLOGUE_4_ARGS 0
115 %endmacro
116 %macro EPILOGUE_4_ARGS 0
117 ret
118 %endmacro
119 %macro EPILOGUE_4_ARGS_EX 1
120 ret
121 %endmacro
122
123 %ifdef ASM_CALL64_GCC
124 %define A0 rdi
125 %define A0_32 edi
126 %define A0_16 di
127 %define A0_8 dil
128
129 %define A1 rsi
130 %define A1_32 esi
131 %define A1_16 si
132 %define A1_8 sil
133
134 %define A2 rdx
135 %define A2_32 edx
136 %define A2_16 dx
137 %define A2_8 dl
138
139 %define A3 rcx
140 %define A3_32 ecx
141 %define A3_16 cx
142 %endif
143
144 %ifdef ASM_CALL64_MSC
145 %define A0 rcx
146 %define A0_32 ecx
147 %define A0_16 cx
148 %define A0_8 cl
149
150 %define A1 rdx
151 %define A1_32 edx
152 %define A1_16 dx
153 %define A1_8 dl
154
155 %define A2 r8
156 %define A2_32 r8d
157 %define A2_16 r8w
158 %define A2_8 r8b
159
160 %define A3 r9
161 %define A3_32 r9d
162 %define A3_16 r9w
163 %endif
164
165 %define T0 rax
166 %define T0_32 eax
167 %define T0_16 ax
168 %define T0_8 al
169
170 %define T1 r11
171 %define T1_32 r11d
172 %define T1_16 r11w
173 %define T1_8 r11b
174
175 %define T2 r10 ; only AMD64
176 %define T2_32 r10d
177 %define T2_16 r10w
178 %define T2_8 r10b
179
180%else
181 ; x86
182 %macro PROLOGUE_1_ARGS 0
183 push edi
184 %endmacro
185 %macro EPILOGUE_1_ARGS 0
186 pop edi
187 ret 0
188 %endmacro
189 %macro EPILOGUE_1_ARGS_EX 1
190 pop edi
191 ret %1
192 %endmacro
193
194 %macro PROLOGUE_2_ARGS 0
195 push edi
196 %endmacro
197 %macro EPILOGUE_2_ARGS 0
198 pop edi
199 ret 0
200 %endmacro
201 %macro EPILOGUE_2_ARGS_EX 1
202 pop edi
203 ret %1
204 %endmacro
205
206 %macro PROLOGUE_3_ARGS 0
207 push ebx
208 mov ebx, [esp + 4 + 4]
209 push edi
210 %endmacro
211 %macro EPILOGUE_3_ARGS_EX 1
212 %if (%1) < 4
213 %error "With three args, at least 4 bytes must be remove from the stack upon return (32-bit)."
214 %endif
215 pop edi
216 pop ebx
217 ret %1
218 %endmacro
219 %macro EPILOGUE_3_ARGS 0
220 EPILOGUE_3_ARGS_EX 4
221 %endmacro
222
223 %macro PROLOGUE_4_ARGS 0
224 push ebx
225 push edi
226 push esi
227 mov ebx, [esp + 12 + 4 + 0]
228 mov esi, [esp + 12 + 4 + 4]
229 %endmacro
230 %macro EPILOGUE_4_ARGS_EX 1
231 %if (%1) < 8
232 %error "With four args, at least 8 bytes must be remove from the stack upon return (32-bit)."
233 %endif
234 pop esi
235 pop edi
236 pop ebx
237 ret %1
238 %endmacro
239 %macro EPILOGUE_4_ARGS 0
240 EPILOGUE_4_ARGS_EX 8
241 %endmacro
242
243 %define A0 ecx
244 %define A0_32 ecx
245 %define A0_16 cx
246 %define A0_8 cl
247
248 %define A1 edx
249 %define A1_32 edx
250 %define A1_16 dx
251 %define A1_8 dl
252
253 %define A2 ebx
254 %define A2_32 ebx
255 %define A2_16 bx
256 %define A2_8 bl
257
258 %define A3 esi
259 %define A3_32 esi
260 %define A3_16 si
261
262 %define T0 eax
263 %define T0_32 eax
264 %define T0_16 ax
265 %define T0_8 al
266
267 %define T1 edi
268 %define T1_32 edi
269 %define T1_16 di
270%endif
271
272
273;;
274; Load the relevant flags from [%1] if there are undefined flags (%3).
275;
276; @remarks Clobbers T0, stack. Changes EFLAGS.
277; @param A2 The register pointing to the flags.
278; @param 1 The parameter (A0..A3) pointing to the eflags.
279; @param 2 The set of modified flags.
280; @param 3 The set of undefined flags.
281;
282%macro IEM_MAYBE_LOAD_FLAGS 3
283 ;%if (%3) != 0
284 pushf ; store current flags
285 mov T0_32, [%1] ; load the guest flags
286 and dword [xSP], ~(%2 | %3) ; mask out the modified and undefined flags
287 and T0_32, (%2 | %3) ; select the modified and undefined flags.
288 or [xSP], T0 ; merge guest flags with host flags.
289 popf ; load the mixed flags.
290 ;%endif
291%endmacro
292
293;;
294; Update the flag.
295;
296; @remarks Clobbers T0, T1, stack.
297; @param 1 The register pointing to the EFLAGS.
298; @param 2 The mask of modified flags to save.
299; @param 3 The mask of undefined flags to (maybe) save.
300;
301%macro IEM_SAVE_FLAGS 3
302 %if (%2 | %3) != 0
303 pushf
304 pop T1
305 mov T0_32, [%1] ; flags
306 and T0_32, ~(%2 | %3) ; clear the modified & undefined flags.
307 and T1_32, (%2 | %3) ; select the modified and undefined flags.
308 or T0_32, T1_32 ; combine the flags.
309 mov [%1], T0_32 ; save the flags.
310 %endif
311%endmacro
312
313;;
314; Calculates the new EFLAGS based on the CPU EFLAGS and fixed clear and set bit masks.
315;
316; @remarks Clobbers T0, T1, stack.
317; @param 1 The register pointing to the EFLAGS.
318; @param 2 The mask of modified flags to save.
319; @param 3 Mask of additional flags to always clear
320; @param 4 Mask of additional flags to always set.
321;
322%macro IEM_SAVE_AND_ADJUST_FLAGS 4
323 %if (%2 | %3 | %4) != 0
324 pushf
325 pop T1
326 mov T0_32, [%1] ; load flags.
327 and T0_32, ~(%2 | %3) ; clear the modified and always cleared flags.
328 and T1_32, (%2) ; select the modified flags.
329 or T0_32, T1_32 ; combine the flags.
330 %if (%4) != 0
331 or T0_32, %4 ; add the always set flags.
332 %endif
333 mov [%1], T0_32 ; save the result.
334 %endif
335%endmacro
336
337;;
338; Calculates the new EFLAGS based on the CPU EFLAGS (%2), a clear mask (%3),
339; signed input (%4[%5]) and parity index (%6).
340;
341; This is used by MUL and IMUL, where we got result (%4 & %6) in xAX which is
342; also T0. So, we have to use T1 for the EFLAGS calculation and save T0/xAX
343; while we extract the %2 flags from the CPU EFLAGS or use T2 (only AMD64).
344;
345; @remarks Clobbers T0, T1, stack, %6, EFLAGS.
346; @param 1 The register pointing to the EFLAGS.
347; @param 2 The mask of modified flags to save.
348; @param 3 Mask of additional flags to always clear
349; @param 4 The result register to set SF by.
350; @param 5 The width of the %4 register in bits (8, 16, 32, or 64).
351; @param 6 The (full) register containing the parity table index. Will be modified!
352
353%macro IEM_SAVE_FLAGS_ADJUST_AND_CALC_SF_PF 6
354 %ifdef RT_ARCH_AMD64
355 pushf
356 pop T2
357 %else
358 push T0
359 pushf
360 pop T0
361 %endif
362 mov T1_32, [%1] ; load flags.
363 and T1_32, ~(%2 | %3 | X86_EFL_PF | X86_EFL_SF) ; clear the modified, always cleared flags and the two flags we calc.
364 %ifdef RT_ARCH_AMD64
365 and T2_32, (%2) ; select the modified flags.
366 or T1_32, T2_32 ; combine the flags.
367 %else
368 and T0_32, (%2) ; select the modified flags.
369 or T1_32, T0_32 ; combine the flags.
370 pop T0
371 %endif
372
373 ; First calculate SF as it's likely to be refereing to the same register as %6 does.
374 bt %4, %5 - 1
375 jnc %%sf_clear
376 or T1_32, X86_EFL_SF
377 %%sf_clear:
378
379 ; Parity last.
380 and %6, 0xff
381 %ifdef RT_ARCH_AMD64
382 lea T2, [NAME(g_afParity) xWrtRIP]
383 or T1_8, [T2 + %6]
384 %else
385 or T1_8, [NAME(g_afParity) + %6]
386 %endif
387
388 mov [%1], T1_32 ; save the result.
389%endmacro
390
391;;
392; Calculates the new EFLAGS using fixed clear and set bit masks.
393;
394; @remarks Clobbers T0.
395; @param 1 The register pointing to the EFLAGS.
396; @param 2 Mask of additional flags to always clear
397; @param 3 Mask of additional flags to always set.
398;
399%macro IEM_ADJUST_FLAGS 3
400 %if (%2 | %3) != 0
401 mov T0_32, [%1] ; Load flags.
402 %if (%2) != 0
403 and T0_32, ~(%2) ; Remove the always cleared flags.
404 %endif
405 %if (%3) != 0
406 or T0_32, %3 ; Add the always set flags.
407 %endif
408 mov [%1], T0_32 ; Save the result.
409 %endif
410%endmacro
411
412;;
413; Calculates the new EFLAGS using fixed clear and set bit masks.
414;
415; @remarks Clobbers T0, %4, EFLAGS.
416; @param 1 The register pointing to the EFLAGS.
417; @param 2 Mask of additional flags to always clear
418; @param 3 Mask of additional flags to always set.
419; @param 4 The (full) register containing the parity table index. Will be modified!
420;
421%macro IEM_ADJUST_FLAGS_WITH_PARITY 4
422 mov T0_32, [%1] ; Load flags.
423 and T0_32, ~(%2 | X86_EFL_PF) ; Remove PF and the always cleared flags.
424 %if (%3) != 0
425 or T0_32, %3 ; Add the always set flags.
426 %endif
427 and %4, 0xff
428 %ifdef RT_ARCH_AMD64
429 lea T2, [NAME(g_afParity) xWrtRIP]
430 or T0_8, [T2 + %4]
431 %else
432 or T0_8, [NAME(g_afParity) + %4]
433 %endif
434 mov [%1], T0_32 ; Save the result.
435%endmacro
436
437
438;*********************************************************************************************************************************
439;* External Symbols *
440;*********************************************************************************************************************************
441extern NAME(g_afParity)
442
443
444;;
445; Macro for implementing a binary operator.
446;
447; This will generate code for the 8, 16, 32 and 64 bit accesses with locked
448; variants, except on 32-bit system where the 64-bit accesses requires hand
449; coding.
450;
451; All the functions takes a pointer to the destination memory operand in A0,
452; the source register operand in A1 and a pointer to eflags in A2.
453;
454; @param 1 The instruction mnemonic.
455; @param 2 Non-zero if there should be a locked version.
456; @param 3 The modified flags.
457; @param 4 The undefined flags.
458;
459%macro IEMIMPL_BIN_OP 4
460BEGINCODE
461BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 12
462 PROLOGUE_3_ARGS
463 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
464 %1 byte [A0], A1_8
465 IEM_SAVE_FLAGS A2, %3, %4
466 EPILOGUE_3_ARGS
467ENDPROC iemAImpl_ %+ %1 %+ _u8
468
469BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
470 PROLOGUE_3_ARGS
471 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
472 %1 word [A0], A1_16
473 IEM_SAVE_FLAGS A2, %3, %4
474 EPILOGUE_3_ARGS
475ENDPROC iemAImpl_ %+ %1 %+ _u16
476
477BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
478 PROLOGUE_3_ARGS
479 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
480 %1 dword [A0], A1_32
481 IEM_SAVE_FLAGS A2, %3, %4
482 EPILOGUE_3_ARGS
483ENDPROC iemAImpl_ %+ %1 %+ _u32
484
485 %ifdef RT_ARCH_AMD64
486BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
487 PROLOGUE_3_ARGS
488 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
489 %1 qword [A0], A1
490 IEM_SAVE_FLAGS A2, %3, %4
491 EPILOGUE_3_ARGS_EX 8
492ENDPROC iemAImpl_ %+ %1 %+ _u64
493 %endif ; RT_ARCH_AMD64
494
495 %if %2 != 0 ; locked versions requested?
496
497BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8_locked, 12
498 PROLOGUE_3_ARGS
499 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
500 lock %1 byte [A0], A1_8
501 IEM_SAVE_FLAGS A2, %3, %4
502 EPILOGUE_3_ARGS
503ENDPROC iemAImpl_ %+ %1 %+ _u8_locked
504
505BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16_locked, 12
506 PROLOGUE_3_ARGS
507 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
508 lock %1 word [A0], A1_16
509 IEM_SAVE_FLAGS A2, %3, %4
510 EPILOGUE_3_ARGS
511ENDPROC iemAImpl_ %+ %1 %+ _u16_locked
512
513BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32_locked, 12
514 PROLOGUE_3_ARGS
515 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
516 lock %1 dword [A0], A1_32
517 IEM_SAVE_FLAGS A2, %3, %4
518 EPILOGUE_3_ARGS
519ENDPROC iemAImpl_ %+ %1 %+ _u32_locked
520
521 %ifdef RT_ARCH_AMD64
522BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 16
523 PROLOGUE_3_ARGS
524 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
525 lock %1 qword [A0], A1
526 IEM_SAVE_FLAGS A2, %3, %4
527 EPILOGUE_3_ARGS_EX 8
528ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
529 %endif ; RT_ARCH_AMD64
530 %endif ; locked
531%endmacro
532
533; instr,lock,modified-flags.
534IEMIMPL_BIN_OP add, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
535IEMIMPL_BIN_OP adc, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
536IEMIMPL_BIN_OP sub, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
537IEMIMPL_BIN_OP sbb, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
538IEMIMPL_BIN_OP or, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), X86_EFL_AF
539IEMIMPL_BIN_OP xor, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), X86_EFL_AF
540IEMIMPL_BIN_OP and, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), X86_EFL_AF
541IEMIMPL_BIN_OP cmp, 0, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
542IEMIMPL_BIN_OP test, 0, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), X86_EFL_AF
543
544
545;;
546; Macro for implementing a bit operator.
547;
548; This will generate code for the 16, 32 and 64 bit accesses with locked
549; variants, except on 32-bit system where the 64-bit accesses requires hand
550; coding.
551;
552; All the functions takes a pointer to the destination memory operand in A0,
553; the source register operand in A1 and a pointer to eflags in A2.
554;
555; @param 1 The instruction mnemonic.
556; @param 2 Non-zero if there should be a locked version.
557; @param 3 The modified flags.
558; @param 4 The undefined flags.
559;
560%macro IEMIMPL_BIT_OP 4
561BEGINCODE
562BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
563 PROLOGUE_3_ARGS
564 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
565 %1 word [A0], A1_16
566 IEM_SAVE_FLAGS A2, %3, %4
567 EPILOGUE_3_ARGS
568ENDPROC iemAImpl_ %+ %1 %+ _u16
569
570BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
571 PROLOGUE_3_ARGS
572 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
573 %1 dword [A0], A1_32
574 IEM_SAVE_FLAGS A2, %3, %4
575 EPILOGUE_3_ARGS
576ENDPROC iemAImpl_ %+ %1 %+ _u32
577
578 %ifdef RT_ARCH_AMD64
579BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
580 PROLOGUE_3_ARGS
581 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
582 %1 qword [A0], A1
583 IEM_SAVE_FLAGS A2, %3, %4
584 EPILOGUE_3_ARGS_EX 8
585ENDPROC iemAImpl_ %+ %1 %+ _u64
586 %endif ; RT_ARCH_AMD64
587
588 %if %2 != 0 ; locked versions requested?
589
590BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16_locked, 12
591 PROLOGUE_3_ARGS
592 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
593 lock %1 word [A0], A1_16
594 IEM_SAVE_FLAGS A2, %3, %4
595 EPILOGUE_3_ARGS
596ENDPROC iemAImpl_ %+ %1 %+ _u16_locked
597
598BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32_locked, 12
599 PROLOGUE_3_ARGS
600 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
601 lock %1 dword [A0], A1_32
602 IEM_SAVE_FLAGS A2, %3, %4
603 EPILOGUE_3_ARGS
604ENDPROC iemAImpl_ %+ %1 %+ _u32_locked
605
606 %ifdef RT_ARCH_AMD64
607BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 16
608 PROLOGUE_3_ARGS
609 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
610 lock %1 qword [A0], A1
611 IEM_SAVE_FLAGS A2, %3, %4
612 EPILOGUE_3_ARGS_EX 8
613ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
614 %endif ; RT_ARCH_AMD64
615 %endif ; locked
616%endmacro
617IEMIMPL_BIT_OP bt, 0, (X86_EFL_CF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
618IEMIMPL_BIT_OP btc, 1, (X86_EFL_CF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
619IEMIMPL_BIT_OP bts, 1, (X86_EFL_CF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
620IEMIMPL_BIT_OP btr, 1, (X86_EFL_CF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
621
622;;
623; Macro for implementing a bit search operator.
624;
625; This will generate code for the 16, 32 and 64 bit accesses, except on 32-bit
626; system where the 64-bit accesses requires hand coding.
627;
628; All the functions takes a pointer to the destination memory operand in A0,
629; the source register operand in A1 and a pointer to eflags in A2.
630;
631; In the ZF case the destination register is 'undefined', however it seems that
632; both AMD and Intel just leaves it as is. The undefined EFLAGS differs between
633; AMD and Intel and accoridng to https://www.sandpile.org/x86/flags.htm between
634; Intel microarchitectures. We only implement 'intel' and 'amd' variation with
635; the behaviour of more recent CPUs (Intel 10980X and AMD 3990X).
636;
637; @param 1 The instruction mnemonic.
638; @param 2 The modified flags.
639; @param 3 The undefined flags.
640;
641%macro IEMIMPL_BIT_OP 3
642BEGINCODE
643BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
644 PROLOGUE_3_ARGS
645 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
646 %1 T0_16, A1_16
647 jz .unchanged_dst
648 mov [A0], T0_16
649.unchanged_dst:
650 IEM_SAVE_FLAGS A2, %2, %3
651 EPILOGUE_3_ARGS
652ENDPROC iemAImpl_ %+ %1 %+ _u16
653
654BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16 %+ _intel, 12
655 PROLOGUE_3_ARGS
656 %1 T1_16, A1_16
657 jz .unchanged_dst
658 mov [A0], T1_16
659 IEM_ADJUST_FLAGS_WITH_PARITY A2, X86_EFL_OF | X86_EFL_SF | X86_EFL_AF | X86_EFL_CF | X86_EFL_ZF, 0, T1
660 EPILOGUE_3_ARGS
661.unchanged_dst:
662 IEM_ADJUST_FLAGS A2, X86_EFL_OF | X86_EFL_SF | X86_EFL_AF | X86_EFL_CF, X86_EFL_ZF | X86_EFL_PF
663 EPILOGUE_3_ARGS
664ENDPROC iemAImpl_ %+ %1 %+ _u16_intel
665
666BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16 %+ _amd, 12
667 PROLOGUE_3_ARGS
668 %1 T0_16, A1_16
669 jz .unchanged_dst
670 mov [A0], T0_16
671.unchanged_dst:
672 IEM_SAVE_AND_ADJUST_FLAGS A2, %2, 0, 0 ; Only the ZF flag is modified on AMD Zen 2.
673 EPILOGUE_3_ARGS
674ENDPROC iemAImpl_ %+ %1 %+ _u16_amd
675
676
677BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
678 PROLOGUE_3_ARGS
679 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
680 %1 T0_32, A1_32
681 jz .unchanged_dst
682 mov [A0], T0_32
683.unchanged_dst:
684 IEM_SAVE_FLAGS A2, %2, %3
685 EPILOGUE_3_ARGS
686ENDPROC iemAImpl_ %+ %1 %+ _u32
687
688BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32 %+ _intel, 12
689 PROLOGUE_3_ARGS
690 %1 T1_32, A1_32
691 jz .unchanged_dst
692 mov [A0], T1_32
693 IEM_ADJUST_FLAGS_WITH_PARITY A2, X86_EFL_OF | X86_EFL_SF | X86_EFL_AF | X86_EFL_CF | X86_EFL_ZF, 0, T1
694 EPILOGUE_3_ARGS
695.unchanged_dst:
696 IEM_ADJUST_FLAGS A2, X86_EFL_OF | X86_EFL_SF | X86_EFL_AF | X86_EFL_CF, X86_EFL_ZF | X86_EFL_PF
697 EPILOGUE_3_ARGS
698ENDPROC iemAImpl_ %+ %1 %+ _u32_intel
699
700BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32 %+ _amd, 12
701 PROLOGUE_3_ARGS
702 %1 T0_32, A1_32
703 jz .unchanged_dst
704 mov [A0], T0_32
705.unchanged_dst:
706 IEM_SAVE_AND_ADJUST_FLAGS A2, %2, 0, 0 ; Only the ZF flag is modified on AMD Zen 2.
707 EPILOGUE_3_ARGS
708ENDPROC iemAImpl_ %+ %1 %+ _u32_amd
709
710
711 %ifdef RT_ARCH_AMD64
712
713BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
714 PROLOGUE_3_ARGS
715 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
716 %1 T0, A1
717 jz .unchanged_dst
718 mov [A0], T0
719.unchanged_dst:
720 IEM_SAVE_FLAGS A2, %2, %3
721 EPILOGUE_3_ARGS_EX 8
722ENDPROC iemAImpl_ %+ %1 %+ _u64
723
724BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64 %+ _intel, 16
725 PROLOGUE_3_ARGS
726 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
727 %1 T1, A1
728 jz .unchanged_dst
729 mov [A0], T1
730 IEM_ADJUST_FLAGS_WITH_PARITY A2, X86_EFL_OF | X86_EFL_SF | X86_EFL_AF | X86_EFL_CF | X86_EFL_ZF, 0, T1
731 EPILOGUE_3_ARGS
732.unchanged_dst:
733 IEM_ADJUST_FLAGS A2, X86_EFL_OF | X86_EFL_SF | X86_EFL_AF | X86_EFL_CF, X86_EFL_ZF | X86_EFL_PF
734 EPILOGUE_3_ARGS
735ENDPROC iemAImpl_ %+ %1 %+ _u64_intel
736
737BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64 %+ _amd, 16
738 PROLOGUE_3_ARGS
739 %1 T0, A1
740 jz .unchanged_dst
741 mov [A0], T0
742.unchanged_dst:
743 IEM_SAVE_AND_ADJUST_FLAGS A2, %2, 0, 0 ; Only the ZF flag is modified on AMD Zen 2.
744 EPILOGUE_3_ARGS_EX 8
745ENDPROC iemAImpl_ %+ %1 %+ _u64_amd
746
747 %endif ; RT_ARCH_AMD64
748%endmacro
749
750IEMIMPL_BIT_OP bsf, (X86_EFL_ZF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF)
751IEMIMPL_BIT_OP bsr, (X86_EFL_ZF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF)
752
753
754;
755; IMUL is also a similar but yet different case (no lock, no mem dst).
756; The rDX:rAX variant of imul is handled together with mul further down.
757;
758BEGINCODE
759; @param 1 EFLAGS that are modified.
760; @param 2 Undefined EFLAGS.
761; @param 3 Function suffix.
762; @param 4 EFLAGS variation: 0 for native, 1 for intel (ignored),
763; 2 for AMD (set AF, clear PF, ZF and SF).
764%macro IEMIMPL_IMUL_TWO 4
765BEGINPROC_FASTCALL iemAImpl_imul_two_u16 %+ %3, 12
766 PROLOGUE_3_ARGS
767 IEM_MAYBE_LOAD_FLAGS A2, %1, %2
768 imul A1_16, word [A0]
769 mov [A0], A1_16
770 %if %4 != 1
771 IEM_SAVE_FLAGS A2, %1, %2
772 %else
773 IEM_SAVE_FLAGS_ADJUST_AND_CALC_SF_PF A2, %1, X86_EFL_AF | X86_EFL_ZF, A1_16, 16, A1
774 %endif
775 EPILOGUE_3_ARGS
776ENDPROC iemAImpl_imul_two_u16 %+ %3
777
778BEGINPROC_FASTCALL iemAImpl_imul_two_u32 %+ %3, 12
779 PROLOGUE_3_ARGS
780 IEM_MAYBE_LOAD_FLAGS A2, %1, %2
781 imul A1_32, dword [A0]
782 mov [A0], A1_32
783 %if %4 != 1
784 IEM_SAVE_FLAGS A2, %1, %2
785 %else
786 IEM_SAVE_FLAGS_ADJUST_AND_CALC_SF_PF A2, %1, X86_EFL_AF | X86_EFL_ZF, A1_32, 32, A1
787 %endif
788 EPILOGUE_3_ARGS
789ENDPROC iemAImpl_imul_two_u32 %+ %3
790
791 %ifdef RT_ARCH_AMD64
792BEGINPROC_FASTCALL iemAImpl_imul_two_u64 %+ %3, 16
793 PROLOGUE_3_ARGS
794 IEM_MAYBE_LOAD_FLAGS A2, %1, %2
795 imul A1, qword [A0]
796 mov [A0], A1
797 %if %4 != 1
798 IEM_SAVE_FLAGS A2, %1, %2
799 %else
800 IEM_SAVE_FLAGS_ADJUST_AND_CALC_SF_PF A2, %1, X86_EFL_AF | X86_EFL_ZF, A1, 64, A1
801 %endif
802 EPILOGUE_3_ARGS_EX 8
803ENDPROC iemAImpl_imul_two_u64 %+ %3
804 %endif ; RT_ARCH_AMD64
805%endmacro
806IEMIMPL_IMUL_TWO X86_EFL_OF | X86_EFL_CF, X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF, , 0
807IEMIMPL_IMUL_TWO X86_EFL_OF | X86_EFL_CF, 0, _intel, 1
808IEMIMPL_IMUL_TWO X86_EFL_OF | X86_EFL_CF, 0, _amd, 2
809
810
811;
812; XCHG for memory operands. This implies locking. No flag changes.
813;
814; Each function takes two arguments, first the pointer to the memory,
815; then the pointer to the register. They all return void.
816;
817BEGINCODE
818BEGINPROC_FASTCALL iemAImpl_xchg_u8_locked, 8
819 PROLOGUE_2_ARGS
820 mov T0_8, [A1]
821 xchg [A0], T0_8
822 mov [A1], T0_8
823 EPILOGUE_2_ARGS
824ENDPROC iemAImpl_xchg_u8_locked
825
826BEGINPROC_FASTCALL iemAImpl_xchg_u16_locked, 8
827 PROLOGUE_2_ARGS
828 mov T0_16, [A1]
829 xchg [A0], T0_16
830 mov [A1], T0_16
831 EPILOGUE_2_ARGS
832ENDPROC iemAImpl_xchg_u16_locked
833
834BEGINPROC_FASTCALL iemAImpl_xchg_u32_locked, 8
835 PROLOGUE_2_ARGS
836 mov T0_32, [A1]
837 xchg [A0], T0_32
838 mov [A1], T0_32
839 EPILOGUE_2_ARGS
840ENDPROC iemAImpl_xchg_u32_locked
841
842%ifdef RT_ARCH_AMD64
843BEGINPROC_FASTCALL iemAImpl_xchg_u64_locked, 8
844 PROLOGUE_2_ARGS
845 mov T0, [A1]
846 xchg [A0], T0
847 mov [A1], T0
848 EPILOGUE_2_ARGS
849ENDPROC iemAImpl_xchg_u64_locked
850%endif
851
852; Unlocked variants for fDisregardLock mode.
853
854BEGINPROC_FASTCALL iemAImpl_xchg_u8_unlocked, 8
855 PROLOGUE_2_ARGS
856 mov T0_8, [A1]
857 mov T1_8, [A0]
858 mov [A0], T0_8
859 mov [A1], T1_8
860 EPILOGUE_2_ARGS
861ENDPROC iemAImpl_xchg_u8_unlocked
862
863BEGINPROC_FASTCALL iemAImpl_xchg_u16_unlocked, 8
864 PROLOGUE_2_ARGS
865 mov T0_16, [A1]
866 mov T1_16, [A0]
867 mov [A0], T0_16
868 mov [A1], T1_16
869 EPILOGUE_2_ARGS
870ENDPROC iemAImpl_xchg_u16_unlocked
871
872BEGINPROC_FASTCALL iemAImpl_xchg_u32_unlocked, 8
873 PROLOGUE_2_ARGS
874 mov T0_32, [A1]
875 mov T1_32, [A0]
876 mov [A0], T0_32
877 mov [A1], T1_32
878 EPILOGUE_2_ARGS
879ENDPROC iemAImpl_xchg_u32_unlocked
880
881%ifdef RT_ARCH_AMD64
882BEGINPROC_FASTCALL iemAImpl_xchg_u64_unlocked, 8
883 PROLOGUE_2_ARGS
884 mov T0, [A1]
885 mov T1, [A0]
886 mov [A0], T0
887 mov [A1], T1
888 EPILOGUE_2_ARGS
889ENDPROC iemAImpl_xchg_u64_unlocked
890%endif
891
892
893;
894; XADD for memory operands.
895;
896; Each function takes three arguments, first the pointer to the
897; memory/register, then the pointer to the register, and finally a pointer to
898; eflags. They all return void.
899;
900BEGINCODE
901BEGINPROC_FASTCALL iemAImpl_xadd_u8, 12
902 PROLOGUE_3_ARGS
903 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
904 mov T0_8, [A1]
905 xadd [A0], T0_8
906 mov [A1], T0_8
907 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
908 EPILOGUE_3_ARGS
909ENDPROC iemAImpl_xadd_u8
910
911BEGINPROC_FASTCALL iemAImpl_xadd_u16, 12
912 PROLOGUE_3_ARGS
913 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
914 mov T0_16, [A1]
915 xadd [A0], T0_16
916 mov [A1], T0_16
917 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
918 EPILOGUE_3_ARGS
919ENDPROC iemAImpl_xadd_u16
920
921BEGINPROC_FASTCALL iemAImpl_xadd_u32, 12
922 PROLOGUE_3_ARGS
923 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
924 mov T0_32, [A1]
925 xadd [A0], T0_32
926 mov [A1], T0_32
927 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
928 EPILOGUE_3_ARGS
929ENDPROC iemAImpl_xadd_u32
930
931%ifdef RT_ARCH_AMD64
932BEGINPROC_FASTCALL iemAImpl_xadd_u64, 12
933 PROLOGUE_3_ARGS
934 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
935 mov T0, [A1]
936 xadd [A0], T0
937 mov [A1], T0
938 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
939 EPILOGUE_3_ARGS
940ENDPROC iemAImpl_xadd_u64
941%endif ; RT_ARCH_AMD64
942
943BEGINPROC_FASTCALL iemAImpl_xadd_u8_locked, 12
944 PROLOGUE_3_ARGS
945 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
946 mov T0_8, [A1]
947 lock xadd [A0], T0_8
948 mov [A1], T0_8
949 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
950 EPILOGUE_3_ARGS
951ENDPROC iemAImpl_xadd_u8_locked
952
953BEGINPROC_FASTCALL iemAImpl_xadd_u16_locked, 12
954 PROLOGUE_3_ARGS
955 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
956 mov T0_16, [A1]
957 lock xadd [A0], T0_16
958 mov [A1], T0_16
959 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
960 EPILOGUE_3_ARGS
961ENDPROC iemAImpl_xadd_u16_locked
962
963BEGINPROC_FASTCALL iemAImpl_xadd_u32_locked, 12
964 PROLOGUE_3_ARGS
965 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
966 mov T0_32, [A1]
967 lock xadd [A0], T0_32
968 mov [A1], T0_32
969 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
970 EPILOGUE_3_ARGS
971ENDPROC iemAImpl_xadd_u32_locked
972
973%ifdef RT_ARCH_AMD64
974BEGINPROC_FASTCALL iemAImpl_xadd_u64_locked, 12
975 PROLOGUE_3_ARGS
976 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
977 mov T0, [A1]
978 lock xadd [A0], T0
979 mov [A1], T0
980 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
981 EPILOGUE_3_ARGS
982ENDPROC iemAImpl_xadd_u64_locked
983%endif ; RT_ARCH_AMD64
984
985
986;
987; CMPXCHG8B.
988;
989; These are tricky register wise, so the code is duplicated for each calling
990; convention.
991;
992; WARNING! This code make ASSUMPTIONS about which registers T1 and T0 are mapped to!
993;
994; C-proto:
995; IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg8b,(uint64_t *pu64Dst, PRTUINT64U pu64EaxEdx, PRTUINT64U pu64EbxEcx,
996; uint32_t *pEFlags));
997;
998; Note! Identical to iemAImpl_cmpxchg16b.
999;
1000BEGINCODE
1001BEGINPROC_FASTCALL iemAImpl_cmpxchg8b, 16
1002%ifdef RT_ARCH_AMD64
1003 %ifdef ASM_CALL64_MSC
1004 push rbx
1005
1006 mov r11, rdx ; pu64EaxEdx (is also T1)
1007 mov r10, rcx ; pu64Dst
1008
1009 mov ebx, [r8]
1010 mov ecx, [r8 + 4]
1011 IEM_MAYBE_LOAD_FLAGS r9, (X86_EFL_ZF), 0 ; clobbers T0 (eax)
1012 mov eax, [r11]
1013 mov edx, [r11 + 4]
1014
1015 lock cmpxchg8b [r10]
1016
1017 mov [r11], eax
1018 mov [r11 + 4], edx
1019 IEM_SAVE_FLAGS r9, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, r11)
1020
1021 pop rbx
1022 ret
1023 %else
1024 push rbx
1025
1026 mov r10, rcx ; pEFlags
1027 mov r11, rdx ; pu64EbxEcx (is also T1)
1028
1029 mov ebx, [r11]
1030 mov ecx, [r11 + 4]
1031 IEM_MAYBE_LOAD_FLAGS r10, (X86_EFL_ZF), 0 ; clobbers T0 (eax)
1032 mov eax, [rsi]
1033 mov edx, [rsi + 4]
1034
1035 lock cmpxchg8b [rdi]
1036
1037 mov [rsi], eax
1038 mov [rsi + 4], edx
1039 IEM_SAVE_FLAGS r10, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, r11)
1040
1041 pop rbx
1042 ret
1043
1044 %endif
1045%else
1046 push esi
1047 push edi
1048 push ebx
1049 push ebp
1050
1051 mov edi, ecx ; pu64Dst
1052 mov esi, edx ; pu64EaxEdx
1053 mov ecx, [esp + 16 + 4 + 0] ; pu64EbxEcx
1054 mov ebp, [esp + 16 + 4 + 4] ; pEFlags
1055
1056 mov ebx, [ecx]
1057 mov ecx, [ecx + 4]
1058 IEM_MAYBE_LOAD_FLAGS ebp, (X86_EFL_ZF), 0 ; clobbers T0 (eax)
1059 mov eax, [esi]
1060 mov edx, [esi + 4]
1061
1062 lock cmpxchg8b [edi]
1063
1064 mov [esi], eax
1065 mov [esi + 4], edx
1066 IEM_SAVE_FLAGS ebp, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, edi)
1067
1068 pop ebp
1069 pop ebx
1070 pop edi
1071 pop esi
1072 ret 8
1073%endif
1074ENDPROC iemAImpl_cmpxchg8b
1075
1076BEGINPROC_FASTCALL iemAImpl_cmpxchg8b_locked, 16
1077 ; Lazy bird always lock prefixes cmpxchg8b.
1078 jmp NAME_FASTCALL(iemAImpl_cmpxchg8b,16,$@)
1079ENDPROC iemAImpl_cmpxchg8b_locked
1080
1081%ifdef RT_ARCH_AMD64
1082
1083;
1084; CMPXCHG16B.
1085;
1086; These are tricky register wise, so the code is duplicated for each calling
1087; convention.
1088;
1089; WARNING! This code make ASSUMPTIONS about which registers T1 and T0 are mapped to!
1090;
1091; C-proto:
1092; IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg16b,(PRTUINT128U pu128Dst, PRTUINT128U pu1284RaxRdx, PRTUINT128U pu128RbxRcx,
1093; uint32_t *pEFlags));
1094;
1095; Note! Identical to iemAImpl_cmpxchg8b.
1096;
1097BEGINCODE
1098BEGINPROC_FASTCALL iemAImpl_cmpxchg16b, 16
1099 %ifdef ASM_CALL64_MSC
1100 push rbx
1101
1102 mov r11, rdx ; pu64RaxRdx (is also T1)
1103 mov r10, rcx ; pu64Dst
1104
1105 mov rbx, [r8]
1106 mov rcx, [r8 + 8]
1107 IEM_MAYBE_LOAD_FLAGS r9, (X86_EFL_ZF), 0 ; clobbers T0 (eax)
1108 mov rax, [r11]
1109 mov rdx, [r11 + 8]
1110
1111 lock cmpxchg16b [r10]
1112
1113 mov [r11], rax
1114 mov [r11 + 8], rdx
1115 IEM_SAVE_FLAGS r9, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, r11)
1116
1117 pop rbx
1118 ret
1119 %else
1120 push rbx
1121
1122 mov r10, rcx ; pEFlags
1123 mov r11, rdx ; pu64RbxRcx (is also T1)
1124
1125 mov rbx, [r11]
1126 mov rcx, [r11 + 8]
1127 IEM_MAYBE_LOAD_FLAGS r10, (X86_EFL_ZF), 0 ; clobbers T0 (eax)
1128 mov rax, [rsi]
1129 mov rdx, [rsi + 8]
1130
1131 lock cmpxchg16b [rdi]
1132
1133 mov [rsi], rax
1134 mov [rsi + 8], rdx
1135 IEM_SAVE_FLAGS r10, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, r11)
1136
1137 pop rbx
1138 ret
1139
1140 %endif
1141ENDPROC iemAImpl_cmpxchg16b
1142
1143BEGINPROC_FASTCALL iemAImpl_cmpxchg16b_locked, 16
1144 ; Lazy bird always lock prefixes cmpxchg16b.
1145 jmp NAME_FASTCALL(iemAImpl_cmpxchg16b,16,$@)
1146ENDPROC iemAImpl_cmpxchg16b_locked
1147
1148%endif ; RT_ARCH_AMD64
1149
1150
1151;
1152; CMPXCHG.
1153;
1154; WARNING! This code make ASSUMPTIONS about which registers T1 and T0 are mapped to!
1155;
1156; C-proto:
1157; IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg,(uintX_t *puXDst, uintX_t puEax, uintX_t uReg, uint32_t *pEFlags));
1158;
1159BEGINCODE
1160%macro IEMIMPL_CMPXCHG 2
1161BEGINPROC_FASTCALL iemAImpl_cmpxchg_u8 %+ %2, 16
1162 PROLOGUE_4_ARGS
1163 IEM_MAYBE_LOAD_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0 (eax)
1164 mov al, [A1]
1165 %1 cmpxchg [A0], A2_8
1166 mov [A1], al
1167 IEM_SAVE_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0+T1 (eax, r11/edi)
1168 EPILOGUE_4_ARGS
1169ENDPROC iemAImpl_cmpxchg_u8 %+ %2
1170
1171BEGINPROC_FASTCALL iemAImpl_cmpxchg_u16 %+ %2, 16
1172 PROLOGUE_4_ARGS
1173 IEM_MAYBE_LOAD_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0 (eax)
1174 mov ax, [A1]
1175 %1 cmpxchg [A0], A2_16
1176 mov [A1], ax
1177 IEM_SAVE_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0+T1 (eax, r11/edi)
1178 EPILOGUE_4_ARGS
1179ENDPROC iemAImpl_cmpxchg_u16 %+ %2
1180
1181BEGINPROC_FASTCALL iemAImpl_cmpxchg_u32 %+ %2, 16
1182 PROLOGUE_4_ARGS
1183 IEM_MAYBE_LOAD_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0 (eax)
1184 mov eax, [A1]
1185 %1 cmpxchg [A0], A2_32
1186 mov [A1], eax
1187 IEM_SAVE_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0+T1 (eax, r11/edi)
1188 EPILOGUE_4_ARGS
1189ENDPROC iemAImpl_cmpxchg_u32 %+ %2
1190
1191BEGINPROC_FASTCALL iemAImpl_cmpxchg_u64 %+ %2, 16
1192%ifdef RT_ARCH_AMD64
1193 PROLOGUE_4_ARGS
1194 IEM_MAYBE_LOAD_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0 (eax)
1195 mov rax, [A1]
1196 %1 cmpxchg [A0], A2
1197 mov [A1], rax
1198 IEM_SAVE_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0+T1 (eax, r11/edi)
1199 EPILOGUE_4_ARGS
1200%else
1201 ;
1202 ; Must use cmpxchg8b here. See also iemAImpl_cmpxchg8b.
1203 ;
1204 push esi
1205 push edi
1206 push ebx
1207 push ebp
1208
1209 mov edi, ecx ; pu64Dst
1210 mov esi, edx ; pu64Rax
1211 mov ecx, [esp + 16 + 4 + 0] ; pu64Reg - Note! Pointer on 32-bit hosts!
1212 mov ebp, [esp + 16 + 4 + 4] ; pEFlags
1213
1214 mov ebx, [ecx]
1215 mov ecx, [ecx + 4]
1216 IEM_MAYBE_LOAD_FLAGS ebp, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0 (eax)
1217 mov eax, [esi]
1218 mov edx, [esi + 4]
1219
1220 lock cmpxchg8b [edi]
1221
1222 ; cmpxchg8b doesn't set CF, PF, AF, SF and OF, so we have to do that.
1223 jz .cmpxchg8b_not_equal
1224 cmp eax, eax ; just set the other flags.
1225.store:
1226 mov [esi], eax
1227 mov [esi + 4], edx
1228 IEM_SAVE_FLAGS ebp, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0+T1 (eax, edi)
1229
1230 pop ebp
1231 pop ebx
1232 pop edi
1233 pop esi
1234 ret 8
1235
1236.cmpxchg8b_not_equal:
1237 cmp [esi + 4], edx ;; @todo FIXME - verify 64-bit compare implementation
1238 jne .store
1239 cmp [esi], eax
1240 jmp .store
1241
1242%endif
1243ENDPROC iemAImpl_cmpxchg_u64 %+ %2
1244%endmacro ; IEMIMPL_CMPXCHG
1245
1246IEMIMPL_CMPXCHG , ,
1247IEMIMPL_CMPXCHG lock, _locked
1248
1249;;
1250; Macro for implementing a unary operator.
1251;
1252; This will generate code for the 8, 16, 32 and 64 bit accesses with locked
1253; variants, except on 32-bit system where the 64-bit accesses requires hand
1254; coding.
1255;
1256; All the functions takes a pointer to the destination memory operand in A0,
1257; the source register operand in A1 and a pointer to eflags in A2.
1258;
1259; @param 1 The instruction mnemonic.
1260; @param 2 The modified flags.
1261; @param 3 The undefined flags.
1262;
1263%macro IEMIMPL_UNARY_OP 3
1264BEGINCODE
1265BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 8
1266 PROLOGUE_2_ARGS
1267 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1268 %1 byte [A0]
1269 IEM_SAVE_FLAGS A1, %2, %3
1270 EPILOGUE_2_ARGS
1271ENDPROC iemAImpl_ %+ %1 %+ _u8
1272
1273BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8_locked, 8
1274 PROLOGUE_2_ARGS
1275 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1276 lock %1 byte [A0]
1277 IEM_SAVE_FLAGS A1, %2, %3
1278 EPILOGUE_2_ARGS
1279ENDPROC iemAImpl_ %+ %1 %+ _u8_locked
1280
1281BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 8
1282 PROLOGUE_2_ARGS
1283 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1284 %1 word [A0]
1285 IEM_SAVE_FLAGS A1, %2, %3
1286 EPILOGUE_2_ARGS
1287ENDPROC iemAImpl_ %+ %1 %+ _u16
1288
1289BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16_locked, 8
1290 PROLOGUE_2_ARGS
1291 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1292 lock %1 word [A0]
1293 IEM_SAVE_FLAGS A1, %2, %3
1294 EPILOGUE_2_ARGS
1295ENDPROC iemAImpl_ %+ %1 %+ _u16_locked
1296
1297BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 8
1298 PROLOGUE_2_ARGS
1299 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1300 %1 dword [A0]
1301 IEM_SAVE_FLAGS A1, %2, %3
1302 EPILOGUE_2_ARGS
1303ENDPROC iemAImpl_ %+ %1 %+ _u32
1304
1305BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32_locked, 8
1306 PROLOGUE_2_ARGS
1307 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1308 lock %1 dword [A0]
1309 IEM_SAVE_FLAGS A1, %2, %3
1310 EPILOGUE_2_ARGS
1311ENDPROC iemAImpl_ %+ %1 %+ _u32_locked
1312
1313 %ifdef RT_ARCH_AMD64
1314BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 8
1315 PROLOGUE_2_ARGS
1316 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1317 %1 qword [A0]
1318 IEM_SAVE_FLAGS A1, %2, %3
1319 EPILOGUE_2_ARGS
1320ENDPROC iemAImpl_ %+ %1 %+ _u64
1321
1322BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 8
1323 PROLOGUE_2_ARGS
1324 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1325 lock %1 qword [A0]
1326 IEM_SAVE_FLAGS A1, %2, %3
1327 EPILOGUE_2_ARGS
1328ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
1329 %endif ; RT_ARCH_AMD64
1330
1331%endmacro
1332
1333IEMIMPL_UNARY_OP inc, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF), 0
1334IEMIMPL_UNARY_OP dec, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF), 0
1335IEMIMPL_UNARY_OP neg, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
1336IEMIMPL_UNARY_OP not, 0, 0
1337
1338
1339;
1340; BSWAP. No flag changes.
1341;
1342; Each function takes one argument, pointer to the value to bswap
1343; (input/output). They all return void.
1344;
1345BEGINPROC_FASTCALL iemAImpl_bswap_u16, 4
1346 PROLOGUE_1_ARGS
1347 mov T0_32, [A0] ; just in case any of the upper bits are used.
1348 db 66h
1349 bswap T0_32
1350 mov [A0], T0_32
1351 EPILOGUE_1_ARGS
1352ENDPROC iemAImpl_bswap_u16
1353
1354BEGINPROC_FASTCALL iemAImpl_bswap_u32, 4
1355 PROLOGUE_1_ARGS
1356 mov T0_32, [A0]
1357 bswap T0_32
1358 mov [A0], T0_32
1359 EPILOGUE_1_ARGS
1360ENDPROC iemAImpl_bswap_u32
1361
1362BEGINPROC_FASTCALL iemAImpl_bswap_u64, 4
1363%ifdef RT_ARCH_AMD64
1364 PROLOGUE_1_ARGS
1365 mov T0, [A0]
1366 bswap T0
1367 mov [A0], T0
1368 EPILOGUE_1_ARGS
1369%else
1370 PROLOGUE_1_ARGS
1371 mov T0, [A0]
1372 mov T1, [A0 + 4]
1373 bswap T0
1374 bswap T1
1375 mov [A0 + 4], T0
1376 mov [A0], T1
1377 EPILOGUE_1_ARGS
1378%endif
1379ENDPROC iemAImpl_bswap_u64
1380
1381
1382;;
1383; Macro for implementing a shift operation.
1384;
1385; This will generate code for the 8, 16, 32 and 64 bit accesses, except on
1386; 32-bit system where the 64-bit accesses requires hand coding.
1387;
1388; All the functions takes a pointer to the destination memory operand in A0,
1389; the shift count in A1 and a pointer to eflags in A2.
1390;
1391; @param 1 The instruction mnemonic.
1392; @param 2 The modified flags.
1393; @param 3 The undefined flags.
1394;
1395; Makes ASSUMPTIONS about A0, A1 and A2 assignments.
1396;
1397; @note the _intel and _amd variants are implemented in C.
1398;
1399%macro IEMIMPL_SHIFT_OP 3
1400BEGINCODE
1401BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 12
1402 PROLOGUE_3_ARGS
1403 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1404 %ifdef ASM_CALL64_GCC
1405 mov cl, A1_8
1406 %1 byte [A0], cl
1407 %else
1408 xchg A1, A0
1409 %1 byte [A1], cl
1410 %endif
1411 IEM_SAVE_FLAGS A2, %2, %3
1412 EPILOGUE_3_ARGS
1413ENDPROC iemAImpl_ %+ %1 %+ _u8
1414
1415BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
1416 PROLOGUE_3_ARGS
1417 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1418 %ifdef ASM_CALL64_GCC
1419 mov cl, A1_8
1420 %1 word [A0], cl
1421 %else
1422 xchg A1, A0
1423 %1 word [A1], cl
1424 %endif
1425 IEM_SAVE_FLAGS A2, %2, %3
1426 EPILOGUE_3_ARGS
1427ENDPROC iemAImpl_ %+ %1 %+ _u16
1428
1429BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
1430 PROLOGUE_3_ARGS
1431 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1432 %ifdef ASM_CALL64_GCC
1433 mov cl, A1_8
1434 %1 dword [A0], cl
1435 %else
1436 xchg A1, A0
1437 %1 dword [A1], cl
1438 %endif
1439 IEM_SAVE_FLAGS A2, %2, %3
1440 EPILOGUE_3_ARGS
1441ENDPROC iemAImpl_ %+ %1 %+ _u32
1442
1443 %ifdef RT_ARCH_AMD64
1444BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 12
1445 PROLOGUE_3_ARGS
1446 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1447 %ifdef ASM_CALL64_GCC
1448 mov cl, A1_8
1449 %1 qword [A0], cl
1450 %else
1451 xchg A1, A0
1452 %1 qword [A1], cl
1453 %endif
1454 IEM_SAVE_FLAGS A2, %2, %3
1455 EPILOGUE_3_ARGS
1456ENDPROC iemAImpl_ %+ %1 %+ _u64
1457 %endif ; RT_ARCH_AMD64
1458
1459%endmacro
1460
1461IEMIMPL_SHIFT_OP rol, (X86_EFL_OF | X86_EFL_CF), 0
1462IEMIMPL_SHIFT_OP ror, (X86_EFL_OF | X86_EFL_CF), 0
1463IEMIMPL_SHIFT_OP rcl, (X86_EFL_OF | X86_EFL_CF), 0
1464IEMIMPL_SHIFT_OP rcr, (X86_EFL_OF | X86_EFL_CF), 0
1465IEMIMPL_SHIFT_OP shl, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF)
1466IEMIMPL_SHIFT_OP shr, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF)
1467IEMIMPL_SHIFT_OP sar, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF)
1468
1469
1470;;
1471; Macro for implementing a double precision shift operation.
1472;
1473; This will generate code for the 16, 32 and 64 bit accesses, except on
1474; 32-bit system where the 64-bit accesses requires hand coding.
1475;
1476; The functions takes the destination operand (r/m) in A0, the source (reg) in
1477; A1, the shift count in A2 and a pointer to the eflags variable/register in A3.
1478;
1479; @param 1 The instruction mnemonic.
1480; @param 2 The modified flags.
1481; @param 3 The undefined flags.
1482;
1483; Makes ASSUMPTIONS about A0, A1, A2 and A3 assignments.
1484;
1485; @note the _intel and _amd variants are implemented in C.
1486;
1487%macro IEMIMPL_SHIFT_DBL_OP 3
1488BEGINCODE
1489BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 16
1490 PROLOGUE_4_ARGS
1491 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1492 %ifdef ASM_CALL64_GCC
1493 xchg A3, A2
1494 %1 [A0], A1_16, cl
1495 xchg A3, A2
1496 %else
1497 xchg A0, A2
1498 %1 [A2], A1_16, cl
1499 %endif
1500 IEM_SAVE_FLAGS A3, %2, %3
1501 EPILOGUE_4_ARGS
1502ENDPROC iemAImpl_ %+ %1 %+ _u16
1503
1504BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 16
1505 PROLOGUE_4_ARGS
1506 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1507 %ifdef ASM_CALL64_GCC
1508 xchg A3, A2
1509 %1 [A0], A1_32, cl
1510 xchg A3, A2
1511 %else
1512 xchg A0, A2
1513 %1 [A2], A1_32, cl
1514 %endif
1515 IEM_SAVE_FLAGS A3, %2, %3
1516 EPILOGUE_4_ARGS
1517ENDPROC iemAImpl_ %+ %1 %+ _u32
1518
1519 %ifdef RT_ARCH_AMD64
1520BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
1521 PROLOGUE_4_ARGS
1522 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1523 %ifdef ASM_CALL64_GCC
1524 xchg A3, A2
1525 %1 [A0], A1, cl
1526 xchg A3, A2
1527 %else
1528 xchg A0, A2
1529 %1 [A2], A1, cl
1530 %endif
1531 IEM_SAVE_FLAGS A3, %2, %3
1532 EPILOGUE_4_ARGS_EX 12
1533ENDPROC iemAImpl_ %+ %1 %+ _u64
1534 %endif ; RT_ARCH_AMD64
1535
1536%endmacro
1537
1538IEMIMPL_SHIFT_DBL_OP shld, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF)
1539IEMIMPL_SHIFT_DBL_OP shrd, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF)
1540
1541
1542;;
1543; Macro for implementing a multiplication operations.
1544;
1545; This will generate code for the 8, 16, 32 and 64 bit accesses, except on
1546; 32-bit system where the 64-bit accesses requires hand coding.
1547;
1548; The 8-bit function only operates on AX, so it takes no DX pointer. The other
1549; functions takes a pointer to rAX in A0, rDX in A1, the operand in A2 and a
1550; pointer to eflags in A3.
1551;
1552; The functions all return 0 so the caller can be used for div/idiv as well as
1553; for the mul/imul implementation.
1554;
1555; @param 1 The instruction mnemonic.
1556; @param 2 The modified flags.
1557; @param 3 The undefined flags.
1558; @param 4 Name suffix.
1559; @param 5 EFLAGS behaviour: 0 for native, 1 for intel and 2 for AMD.
1560;
1561; Makes ASSUMPTIONS about A0, A1, A2, A3, T0 and T1 assignments.
1562;
1563%macro IEMIMPL_MUL_OP 5
1564BEGINCODE
1565BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8 %+ %4, 12
1566 PROLOGUE_3_ARGS
1567 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1568 mov al, [A0]
1569 %1 A1_8
1570 mov [A0], ax
1571 %if %5 != 1
1572 IEM_SAVE_FLAGS A2, %2, %3
1573 %else
1574 IEM_SAVE_FLAGS_ADJUST_AND_CALC_SF_PF A2, %2, X86_EFL_AF | X86_EFL_ZF, ax, 8, xAX
1575 %endif
1576 xor eax, eax
1577 EPILOGUE_3_ARGS
1578ENDPROC iemAImpl_ %+ %1 %+ _u8 %+ %4
1579
1580BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16 %+ %4, 16
1581 PROLOGUE_4_ARGS
1582 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1583 mov ax, [A0]
1584 %ifdef ASM_CALL64_GCC
1585 %1 A2_16
1586 mov [A0], ax
1587 mov [A1], dx
1588 %else
1589 mov T1, A1
1590 %1 A2_16
1591 mov [A0], ax
1592 mov [T1], dx
1593 %endif
1594 %if %5 != 1
1595 IEM_SAVE_FLAGS A3, %2, %3
1596 %else
1597 IEM_SAVE_FLAGS_ADJUST_AND_CALC_SF_PF A3, %2, X86_EFL_AF | X86_EFL_ZF, ax, 16, xAX
1598 %endif
1599 xor eax, eax
1600 EPILOGUE_4_ARGS
1601ENDPROC iemAImpl_ %+ %1 %+ _u16 %+ %4
1602
1603BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32 %+ %4, 16
1604 PROLOGUE_4_ARGS
1605 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1606 mov eax, [A0]
1607 %ifdef ASM_CALL64_GCC
1608 %1 A2_32
1609 mov [A0], eax
1610 mov [A1], edx
1611 %else
1612 mov T1, A1
1613 %1 A2_32
1614 mov [A0], eax
1615 mov [T1], edx
1616 %endif
1617 %if %5 != 1
1618 IEM_SAVE_FLAGS A3, %2, %3
1619 %else
1620 IEM_SAVE_FLAGS_ADJUST_AND_CALC_SF_PF A3, %2, X86_EFL_AF | X86_EFL_ZF, eax, 32, xAX
1621 %endif
1622 xor eax, eax
1623 EPILOGUE_4_ARGS
1624ENDPROC iemAImpl_ %+ %1 %+ _u32 %+ %4
1625
1626 %ifdef RT_ARCH_AMD64 ; The 32-bit host version lives in IEMAllAImplC.cpp.
1627BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64 %+ %4, 20
1628 PROLOGUE_4_ARGS
1629 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1630 mov rax, [A0]
1631 %ifdef ASM_CALL64_GCC
1632 %1 A2
1633 mov [A0], rax
1634 mov [A1], rdx
1635 %else
1636 mov T1, A1
1637 %1 A2
1638 mov [A0], rax
1639 mov [T1], rdx
1640 %endif
1641 %if %5 != 1
1642 IEM_SAVE_FLAGS A3, %2, %3
1643 %else
1644 IEM_SAVE_FLAGS_ADJUST_AND_CALC_SF_PF A3, %2, X86_EFL_AF | X86_EFL_ZF, rax, 64, xAX
1645 %endif
1646 xor eax, eax
1647 EPILOGUE_4_ARGS_EX 12
1648ENDPROC iemAImpl_ %+ %1 %+ _u64 %+ %4
1649 %endif ; !RT_ARCH_AMD64
1650
1651%endmacro
1652
1653IEMIMPL_MUL_OP mul, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF), , 0
1654IEMIMPL_MUL_OP mul, (X86_EFL_OF | X86_EFL_CF), 0, _intel, 1
1655IEMIMPL_MUL_OP mul, (X86_EFL_OF | X86_EFL_CF), 0, _amd, 2
1656IEMIMPL_MUL_OP imul, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF), , 0
1657IEMIMPL_MUL_OP imul, (X86_EFL_OF | X86_EFL_CF), 0, _intel, 1
1658IEMIMPL_MUL_OP imul, (X86_EFL_OF | X86_EFL_CF), 0, _amd, 2
1659
1660
1661BEGINCODE
1662;;
1663; Worker function for negating a 32-bit number in T1:T0
1664; @uses None (T0,T1)
1665BEGINPROC iemAImpl_negate_T0_T1_u32
1666 push 0
1667 push 0
1668 xchg T0_32, [xSP]
1669 xchg T1_32, [xSP + xCB]
1670 sub T0_32, [xSP]
1671 sbb T1_32, [xSP + xCB]
1672 add xSP, xCB*2
1673 ret
1674ENDPROC iemAImpl_negate_T0_T1_u32
1675
1676%ifdef RT_ARCH_AMD64
1677;;
1678; Worker function for negating a 64-bit number in T1:T0
1679; @uses None (T0,T1)
1680BEGINPROC iemAImpl_negate_T0_T1_u64
1681 push 0
1682 push 0
1683 xchg T0, [xSP]
1684 xchg T1, [xSP + xCB]
1685 sub T0, [xSP]
1686 sbb T1, [xSP + xCB]
1687 add xSP, xCB*2
1688 ret
1689ENDPROC iemAImpl_negate_T0_T1_u64
1690%endif
1691
1692
1693;;
1694; Macro for implementing a division operations.
1695;
1696; This will generate code for the 8, 16, 32 and 64 bit accesses, except on
1697; 32-bit system where the 64-bit accesses requires hand coding.
1698;
1699; The 8-bit function only operates on AX, so it takes no DX pointer. The other
1700; functions takes a pointer to rAX in A0, rDX in A1, the operand in A2 and a
1701; pointer to eflags in A3.
1702;
1703; The functions all return 0 on success and -1 if a divide error should be
1704; raised by the caller.
1705;
1706; @param 1 The instruction mnemonic.
1707; @param 2 The modified flags.
1708; @param 3 The undefined flags.
1709; @param 4 1 if signed, 0 if unsigned.
1710; @param 5 Function suffix.
1711; @param 6 EFLAGS variation: 0 for native, 1 for intel (ignored),
1712; 2 for AMD (set AF, clear PF, ZF and SF).
1713;
1714; Makes ASSUMPTIONS about A0, A1, A2, A3, T0 and T1 assignments.
1715;
1716%macro IEMIMPL_DIV_OP 6
1717BEGINCODE
1718BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8 %+ %5, 12
1719 PROLOGUE_3_ARGS
1720
1721 ; div by chainsaw check.
1722 test A1_8, A1_8
1723 jz .div_zero
1724
1725 ; Overflow check - unsigned division is simple to verify, haven't
1726 ; found a simple way to check signed division yet unfortunately.
1727 %if %4 == 0
1728 cmp [A0 + 1], A1_8
1729 jae .div_overflow
1730 %else
1731 mov T0_16, [A0] ; T0 = dividend
1732 mov T1, A1 ; T1 = saved divisor (because of missing T1_8 in 32-bit)
1733 test A1_8, A1_8
1734 js .divisor_negative
1735 test T0_16, T0_16
1736 jns .both_positive
1737 neg T0_16
1738.one_of_each: ; OK range is 2^(result-with - 1) + (divisor - 1).
1739 push T0 ; Start off like unsigned below.
1740 shr T0_16, 7
1741 cmp T0_8, A1_8
1742 pop T0
1743 jb .div_no_overflow
1744 ja .div_overflow
1745 and T0_8, 0x7f ; Special case for covering (divisor - 1).
1746 cmp T0_8, A1_8
1747 jae .div_overflow
1748 jmp .div_no_overflow
1749
1750.divisor_negative:
1751 neg A1_8
1752 test T0_16, T0_16
1753 jns .one_of_each
1754 neg T0_16
1755.both_positive: ; Same as unsigned shifted by sign indicator bit.
1756 shr T0_16, 7
1757 cmp T0_8, A1_8
1758 jae .div_overflow
1759.div_no_overflow:
1760 mov A1, T1 ; restore divisor
1761 %endif
1762
1763 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1764 mov ax, [A0]
1765 %1 A1_8
1766 mov [A0], ax
1767 %if %6 == 2 ; AMD64 3990X: Set AF and clear PF, ZF and SF.
1768 IEM_ADJUST_FLAGS A2, X86_EFL_PF | X86_EFL_ZF | X86_EFL_SF, X86_EFL_AF
1769 %else
1770 IEM_SAVE_FLAGS A2, %2, %3
1771 %endif
1772 xor eax, eax
1773
1774.return:
1775 EPILOGUE_3_ARGS
1776
1777.div_zero:
1778.div_overflow:
1779 mov eax, -1
1780 jmp .return
1781ENDPROC iemAImpl_ %+ %1 %+ _u8 %+ %5
1782
1783BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16 %+ %5, 16
1784 PROLOGUE_4_ARGS
1785
1786 ; div by chainsaw check.
1787 test A2_16, A2_16
1788 jz .div_zero
1789
1790 ; Overflow check - unsigned division is simple to verify, haven't
1791 ; found a simple way to check signed division yet unfortunately.
1792 %if %4 == 0
1793 cmp [A1], A2_16
1794 jae .div_overflow
1795 %else
1796 mov T0_16, [A1]
1797 shl T0_32, 16
1798 mov T0_16, [A0] ; T0 = dividend
1799 mov T1, A2 ; T1 = divisor
1800 test T1_16, T1_16
1801 js .divisor_negative
1802 test T0_32, T0_32
1803 jns .both_positive
1804 neg T0_32
1805.one_of_each: ; OK range is 2^(result-with - 1) + (divisor - 1).
1806 push T0 ; Start off like unsigned below.
1807 shr T0_32, 15
1808 cmp T0_16, T1_16
1809 pop T0
1810 jb .div_no_overflow
1811 ja .div_overflow
1812 and T0_16, 0x7fff ; Special case for covering (divisor - 1).
1813 cmp T0_16, T1_16
1814 jae .div_overflow
1815 jmp .div_no_overflow
1816
1817.divisor_negative:
1818 neg T1_16
1819 test T0_32, T0_32
1820 jns .one_of_each
1821 neg T0_32
1822.both_positive: ; Same as unsigned shifted by sign indicator bit.
1823 shr T0_32, 15
1824 cmp T0_16, T1_16
1825 jae .div_overflow
1826.div_no_overflow:
1827 %endif
1828
1829 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1830 %ifdef ASM_CALL64_GCC
1831 mov T1, A2
1832 mov ax, [A0]
1833 mov dx, [A1]
1834 %1 T1_16
1835 mov [A0], ax
1836 mov [A1], dx
1837 %else
1838 mov T1, A1
1839 mov ax, [A0]
1840 mov dx, [T1]
1841 %1 A2_16
1842 mov [A0], ax
1843 mov [T1], dx
1844 %endif
1845 %if %6 == 2 ; AMD64 3990X: Set AF and clear PF, ZF and SF.
1846 IEM_ADJUST_FLAGS A3, X86_EFL_PF | X86_EFL_ZF | X86_EFL_SF, X86_EFL_AF
1847 %else
1848 IEM_SAVE_FLAGS A3, %2, %3
1849 %endif
1850 xor eax, eax
1851
1852.return:
1853 EPILOGUE_4_ARGS
1854
1855.div_zero:
1856.div_overflow:
1857 mov eax, -1
1858 jmp .return
1859ENDPROC iemAImpl_ %+ %1 %+ _u16 %+ %5
1860
1861BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32 %+ %5, 16
1862 PROLOGUE_4_ARGS
1863
1864 ; div by chainsaw check.
1865 test A2_32, A2_32
1866 jz .div_zero
1867
1868 ; Overflow check - unsigned division is simple to verify, haven't
1869 ; found a simple way to check signed division yet unfortunately.
1870 %if %4 == 0
1871 cmp [A1], A2_32
1872 jae .div_overflow
1873 %else
1874 push A2 ; save A2 so we modify it (we out of regs on x86).
1875 mov T0_32, [A0] ; T0 = dividend low
1876 mov T1_32, [A1] ; T1 = dividend high
1877 test A2_32, A2_32
1878 js .divisor_negative
1879 test T1_32, T1_32
1880 jns .both_positive
1881 call NAME(iemAImpl_negate_T0_T1_u32)
1882.one_of_each: ; OK range is 2^(result-with - 1) + (divisor - 1).
1883 push T0 ; Start off like unsigned below.
1884 shl T1_32, 1
1885 shr T0_32, 31
1886 or T1_32, T0_32
1887 cmp T1_32, A2_32
1888 pop T0
1889 jb .div_no_overflow
1890 ja .div_overflow
1891 and T0_32, 0x7fffffff ; Special case for covering (divisor - 1).
1892 cmp T0_32, A2_32
1893 jae .div_overflow
1894 jmp .div_no_overflow
1895
1896.divisor_negative:
1897 neg A2_32
1898 test T1_32, T1_32
1899 jns .one_of_each
1900 call NAME(iemAImpl_negate_T0_T1_u32)
1901.both_positive: ; Same as unsigned shifted by sign indicator bit.
1902 shl T1_32, 1
1903 shr T0_32, 31
1904 or T1_32, T0_32
1905 cmp T1_32, A2_32
1906 jae .div_overflow
1907.div_no_overflow:
1908 pop A2
1909 %endif
1910
1911 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1912 mov eax, [A0]
1913 %ifdef ASM_CALL64_GCC
1914 mov T1, A2
1915 mov eax, [A0]
1916 mov edx, [A1]
1917 %1 T1_32
1918 mov [A0], eax
1919 mov [A1], edx
1920 %else
1921 mov T1, A1
1922 mov eax, [A0]
1923 mov edx, [T1]
1924 %1 A2_32
1925 mov [A0], eax
1926 mov [T1], edx
1927 %endif
1928 %if %6 == 2 ; AMD64 3990X: Set AF and clear PF, ZF and SF.
1929 IEM_ADJUST_FLAGS A3, X86_EFL_PF | X86_EFL_ZF | X86_EFL_SF, X86_EFL_AF
1930 %else
1931 IEM_SAVE_FLAGS A3, %2, %3
1932 %endif
1933 xor eax, eax
1934
1935.return:
1936 EPILOGUE_4_ARGS
1937
1938.div_overflow:
1939 %if %4 != 0
1940 pop A2
1941 %endif
1942.div_zero:
1943 mov eax, -1
1944 jmp .return
1945ENDPROC iemAImpl_ %+ %1 %+ _u32 %+ %5
1946
1947 %ifdef RT_ARCH_AMD64 ; The 32-bit host version lives in IEMAllAImplC.cpp.
1948BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64 %+ %5, 20
1949 PROLOGUE_4_ARGS
1950
1951 test A2, A2
1952 jz .div_zero
1953 %if %4 == 0
1954 cmp [A1], A2
1955 jae .div_overflow
1956 %else
1957 push A2 ; save A2 so we modify it (we out of regs on x86).
1958 mov T0, [A0] ; T0 = dividend low
1959 mov T1, [A1] ; T1 = dividend high
1960 test A2, A2
1961 js .divisor_negative
1962 test T1, T1
1963 jns .both_positive
1964 call NAME(iemAImpl_negate_T0_T1_u64)
1965.one_of_each: ; OK range is 2^(result-with - 1) + (divisor - 1).
1966 push T0 ; Start off like unsigned below.
1967 shl T1, 1
1968 shr T0, 63
1969 or T1, T0
1970 cmp T1, A2
1971 pop T0
1972 jb .div_no_overflow
1973 ja .div_overflow
1974 mov T1, 0x7fffffffffffffff
1975 and T0, T1 ; Special case for covering (divisor - 1).
1976 cmp T0, A2
1977 jae .div_overflow
1978 jmp .div_no_overflow
1979
1980.divisor_negative:
1981 neg A2
1982 test T1, T1
1983 jns .one_of_each
1984 call NAME(iemAImpl_negate_T0_T1_u64)
1985.both_positive: ; Same as unsigned shifted by sign indicator bit.
1986 shl T1, 1
1987 shr T0, 63
1988 or T1, T0
1989 cmp T1, A2
1990 jae .div_overflow
1991.div_no_overflow:
1992 pop A2
1993 %endif
1994
1995 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1996 mov rax, [A0]
1997 %ifdef ASM_CALL64_GCC
1998 mov T1, A2
1999 mov rax, [A0]
2000 mov rdx, [A1]
2001 %1 T1
2002 mov [A0], rax
2003 mov [A1], rdx
2004 %else
2005 mov T1, A1
2006 mov rax, [A0]
2007 mov rdx, [T1]
2008 %1 A2
2009 mov [A0], rax
2010 mov [T1], rdx
2011 %endif
2012 %if %6 == 2 ; AMD64 3990X: Set AF and clear PF, ZF and SF.
2013 IEM_ADJUST_FLAGS A3, X86_EFL_PF | X86_EFL_ZF | X86_EFL_SF, X86_EFL_AF
2014 %else
2015 IEM_SAVE_FLAGS A3, %2, %3
2016 %endif
2017 xor eax, eax
2018
2019.return:
2020 EPILOGUE_4_ARGS_EX 12
2021
2022.div_overflow:
2023 %if %4 != 0
2024 pop A2
2025 %endif
2026.div_zero:
2027 mov eax, -1
2028 jmp .return
2029ENDPROC iemAImpl_ %+ %1 %+ _u64 %+ %5
2030 %endif ; !RT_ARCH_AMD64
2031
2032%endmacro
2033
2034IEMIMPL_DIV_OP div, 0, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0, , 0
2035IEMIMPL_DIV_OP div, 0, 0, 0, _intel, 1
2036IEMIMPL_DIV_OP div, 0, 0, 0, _amd, 2
2037IEMIMPL_DIV_OP idiv, 0, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 1, , 0
2038IEMIMPL_DIV_OP idiv, 0, 0, 1, _intel, 1
2039IEMIMPL_DIV_OP idiv, 0, 0, 1, _amd, 2
2040
2041
2042;;
2043; Macro for implementing memory fence operation.
2044;
2045; No return value, no operands or anything.
2046;
2047; @param 1 The instruction.
2048;
2049%macro IEMIMPL_MEM_FENCE 1
2050BEGINCODE
2051BEGINPROC_FASTCALL iemAImpl_ %+ %1, 0
2052 %1
2053 ret
2054ENDPROC iemAImpl_ %+ %1
2055%endmacro
2056
2057IEMIMPL_MEM_FENCE lfence
2058IEMIMPL_MEM_FENCE sfence
2059IEMIMPL_MEM_FENCE mfence
2060
2061;;
2062; Alternative for non-SSE2 host.
2063;
2064BEGINPROC_FASTCALL iemAImpl_alt_mem_fence, 0
2065 push xAX
2066 xchg xAX, [xSP]
2067 add xSP, xCB
2068 ret
2069ENDPROC iemAImpl_alt_mem_fence
2070
2071
2072;;
2073; Initialize the FPU for the actual instruction being emulated, this means
2074; loading parts of the guest's control word and status word.
2075;
2076; @uses 24 bytes of stack. T0, T1
2077; @param 1 Expression giving the address of the FXSTATE of the guest.
2078;
2079%macro FPU_LD_FXSTATE_FCW_AND_SAFE_FSW 1
2080 fnstenv [xSP]
2081
2082 ; FCW - for exception, precision and rounding control.
2083 movzx T0, word [%1 + X86FXSTATE.FCW]
2084 and T0, X86_FCW_MASK_ALL | X86_FCW_PC_MASK | X86_FCW_RC_MASK
2085 mov [xSP + X86FSTENV32P.FCW], T0_16
2086
2087 ; FSW - for undefined C0, C1, C2, and C3.
2088 movzx T1, word [%1 + X86FXSTATE.FSW]
2089 and T1, X86_FSW_C_MASK
2090 movzx T0, word [xSP + X86FSTENV32P.FSW]
2091 and T0, X86_FSW_TOP_MASK
2092 or T0, T1
2093 mov [xSP + X86FSTENV32P.FSW], T0_16
2094
2095 fldenv [xSP]
2096%endmacro
2097
2098
2099;;
2100; Initialize the FPU for the actual instruction being emulated, this means
2101; loading parts of the guest's control word, status word, and update the
2102; tag word for the top register if it's empty.
2103;
2104; ASSUMES actual TOP=7
2105;
2106; @uses 24 bytes of stack. T0, T1
2107; @param 1 Expression giving the address of the FXSTATE of the guest.
2108;
2109%macro FPU_LD_FXSTATE_FCW_AND_SAFE_FSW_AND_FTW_0 1
2110 fnstenv [xSP]
2111
2112 ; FCW - for exception, precision and rounding control.
2113 movzx T0_32, word [%1 + X86FXSTATE.FCW]
2114 and T0_32, X86_FCW_MASK_ALL | X86_FCW_PC_MASK | X86_FCW_RC_MASK
2115 mov [xSP + X86FSTENV32P.FCW], T0_16
2116
2117 ; FSW - for undefined C0, C1, C2, and C3.
2118 movzx T1_32, word [%1 + X86FXSTATE.FSW]
2119 and T1_32, X86_FSW_C_MASK
2120 movzx T0_32, word [xSP + X86FSTENV32P.FSW]
2121 and T0_32, X86_FSW_TOP_MASK
2122 or T0_32, T1_32
2123 mov [xSP + X86FSTENV32P.FSW], T0_16
2124
2125 ; FTW - Only for ST0 (in/out).
2126 movzx T1_32, word [%1 + X86FXSTATE.FSW]
2127 shr T1_32, X86_FSW_TOP_SHIFT
2128 and T1_32, X86_FSW_TOP_SMASK
2129 bt [%1 + X86FXSTATE.FTW], T1_16 ; Empty if FTW bit is clear. Fixed register order.
2130 jc %%st0_not_empty
2131 or word [xSP + X86FSTENV32P.FTW], 0c000h ; TOP=7, so set TAG(7)=3
2132%%st0_not_empty:
2133
2134 fldenv [xSP]
2135%endmacro
2136
2137
2138;;
2139; Need to move this as well somewhere better?
2140;
2141struc IEMFPURESULT
2142 .r80Result resw 5
2143 .FSW resw 1
2144endstruc
2145
2146
2147;;
2148; Need to move this as well somewhere better?
2149;
2150struc IEMFPURESULTTWO
2151 .r80Result1 resw 5
2152 .FSW resw 1
2153 .r80Result2 resw 5
2154endstruc
2155
2156
2157;
2158;---------------------- 16-bit signed integer operations ----------------------
2159;
2160
2161
2162;;
2163; Converts a 16-bit floating point value to a 80-bit one (fpu register).
2164;
2165; @param A0 FPU context (fxsave).
2166; @param A1 Pointer to a IEMFPURESULT for the output.
2167; @param A2 Pointer to the 16-bit floating point value to convert.
2168;
2169BEGINPROC_FASTCALL iemAImpl_fild_r80_from_i16, 12
2170 PROLOGUE_3_ARGS
2171 sub xSP, 20h
2172
2173 fninit
2174 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2175 fild word [A2]
2176
2177 fnstsw word [A1 + IEMFPURESULT.FSW]
2178 fnclex
2179 fstp tword [A1 + IEMFPURESULT.r80Result]
2180
2181 fninit
2182 add xSP, 20h
2183 EPILOGUE_3_ARGS
2184ENDPROC iemAImpl_fild_r80_from_i16
2185
2186
2187;;
2188; Store a 80-bit floating point value (register) as a 16-bit signed integer (memory).
2189;
2190; @param A0 FPU context (fxsave).
2191; @param A1 Where to return the output FSW.
2192; @param A2 Where to store the 16-bit signed integer value.
2193; @param A3 Pointer to the 80-bit value.
2194;
2195BEGINPROC_FASTCALL iemAImpl_fist_r80_to_i16, 16
2196 PROLOGUE_4_ARGS
2197 sub xSP, 20h
2198
2199 fninit
2200 fld tword [A3]
2201 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2202 fistp word [A2]
2203
2204 fnstsw word [A1]
2205
2206 fninit
2207 add xSP, 20h
2208 EPILOGUE_4_ARGS
2209ENDPROC iemAImpl_fist_r80_to_i16
2210
2211
2212;;
2213; Store a 80-bit floating point value (register) as a 16-bit signed integer
2214; (memory) with truncation.
2215;
2216; @param A0 FPU context (fxsave).
2217; @param A1 Where to return the output FSW.
2218; @param A2 Where to store the 16-bit signed integer value.
2219; @param A3 Pointer to the 80-bit value.
2220;
2221BEGINPROC_FASTCALL iemAImpl_fistt_r80_to_i16, 16
2222 PROLOGUE_4_ARGS
2223 sub xSP, 20h
2224
2225 fninit
2226 fld tword [A3]
2227 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2228 fisttp word [A2]
2229
2230 fnstsw word [A1]
2231
2232 fninit
2233 add xSP, 20h
2234 EPILOGUE_4_ARGS
2235ENDPROC iemAImpl_fistt_r80_to_i16
2236
2237
2238;;
2239; FPU instruction working on one 80-bit and one 16-bit signed integer value.
2240;
2241; @param 1 The instruction
2242;
2243; @param A0 FPU context (fxsave).
2244; @param A1 Pointer to a IEMFPURESULT for the output.
2245; @param A2 Pointer to the 80-bit value.
2246; @param A3 Pointer to the 16-bit value.
2247;
2248%macro IEMIMPL_FPU_R80_BY_I16 1
2249BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i16, 16
2250 PROLOGUE_4_ARGS
2251 sub xSP, 20h
2252
2253 fninit
2254 fld tword [A2]
2255 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2256 %1 word [A3]
2257
2258 fnstsw word [A1 + IEMFPURESULT.FSW]
2259 fnclex
2260 fstp tword [A1 + IEMFPURESULT.r80Result]
2261
2262 fninit
2263 add xSP, 20h
2264 EPILOGUE_4_ARGS
2265ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i16
2266%endmacro
2267
2268IEMIMPL_FPU_R80_BY_I16 fiadd
2269IEMIMPL_FPU_R80_BY_I16 fimul
2270IEMIMPL_FPU_R80_BY_I16 fisub
2271IEMIMPL_FPU_R80_BY_I16 fisubr
2272IEMIMPL_FPU_R80_BY_I16 fidiv
2273IEMIMPL_FPU_R80_BY_I16 fidivr
2274
2275
2276;;
2277; FPU instruction working on one 80-bit and one 16-bit signed integer value,
2278; only returning FSW.
2279;
2280; @param 1 The instruction
2281;
2282; @param A0 FPU context (fxsave).
2283; @param A1 Where to store the output FSW.
2284; @param A2 Pointer to the 80-bit value.
2285; @param A3 Pointer to the 64-bit value.
2286;
2287%macro IEMIMPL_FPU_R80_BY_I16_FSW 1
2288BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i16, 16
2289 PROLOGUE_4_ARGS
2290 sub xSP, 20h
2291
2292 fninit
2293 fld tword [A2]
2294 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2295 %1 word [A3]
2296
2297 fnstsw word [A1]
2298
2299 fninit
2300 add xSP, 20h
2301 EPILOGUE_4_ARGS
2302ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i16
2303%endmacro
2304
2305IEMIMPL_FPU_R80_BY_I16_FSW ficom
2306
2307
2308
2309;
2310;---------------------- 32-bit signed integer operations ----------------------
2311;
2312
2313
2314;;
2315; Converts a 32-bit floating point value to a 80-bit one (fpu register).
2316;
2317; @param A0 FPU context (fxsave).
2318; @param A1 Pointer to a IEMFPURESULT for the output.
2319; @param A2 Pointer to the 32-bit floating point value to convert.
2320;
2321BEGINPROC_FASTCALL iemAImpl_fild_r80_from_i32, 12
2322 PROLOGUE_3_ARGS
2323 sub xSP, 20h
2324
2325 fninit
2326 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2327 fild dword [A2]
2328
2329 fnstsw word [A1 + IEMFPURESULT.FSW]
2330 fnclex
2331 fstp tword [A1 + IEMFPURESULT.r80Result]
2332
2333 fninit
2334 add xSP, 20h
2335 EPILOGUE_3_ARGS
2336ENDPROC iemAImpl_fild_r80_from_i32
2337
2338
2339;;
2340; Store a 80-bit floating point value (register) as a 32-bit signed integer (memory).
2341;
2342; @param A0 FPU context (fxsave).
2343; @param A1 Where to return the output FSW.
2344; @param A2 Where to store the 32-bit signed integer value.
2345; @param A3 Pointer to the 80-bit value.
2346;
2347BEGINPROC_FASTCALL iemAImpl_fist_r80_to_i32, 16
2348 PROLOGUE_4_ARGS
2349 sub xSP, 20h
2350
2351 fninit
2352 fld tword [A3]
2353 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2354 fistp dword [A2]
2355
2356 fnstsw word [A1]
2357
2358 fninit
2359 add xSP, 20h
2360 EPILOGUE_4_ARGS
2361ENDPROC iemAImpl_fist_r80_to_i32
2362
2363
2364;;
2365; Store a 80-bit floating point value (register) as a 32-bit signed integer
2366; (memory) with truncation.
2367;
2368; @param A0 FPU context (fxsave).
2369; @param A1 Where to return the output FSW.
2370; @param A2 Where to store the 32-bit signed integer value.
2371; @param A3 Pointer to the 80-bit value.
2372;
2373BEGINPROC_FASTCALL iemAImpl_fistt_r80_to_i32, 16
2374 PROLOGUE_4_ARGS
2375 sub xSP, 20h
2376
2377 fninit
2378 fld tword [A3]
2379 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2380 fisttp dword [A2]
2381
2382 fnstsw word [A1]
2383
2384 fninit
2385 add xSP, 20h
2386 EPILOGUE_4_ARGS
2387ENDPROC iemAImpl_fistt_r80_to_i32
2388
2389
2390;;
2391; FPU instruction working on one 80-bit and one 32-bit signed integer value.
2392;
2393; @param 1 The instruction
2394;
2395; @param A0 FPU context (fxsave).
2396; @param A1 Pointer to a IEMFPURESULT for the output.
2397; @param A2 Pointer to the 80-bit value.
2398; @param A3 Pointer to the 32-bit value.
2399;
2400%macro IEMIMPL_FPU_R80_BY_I32 1
2401BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i32, 16
2402 PROLOGUE_4_ARGS
2403 sub xSP, 20h
2404
2405 fninit
2406 fld tword [A2]
2407 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2408 %1 dword [A3]
2409
2410 fnstsw word [A1 + IEMFPURESULT.FSW]
2411 fnclex
2412 fstp tword [A1 + IEMFPURESULT.r80Result]
2413
2414 fninit
2415 add xSP, 20h
2416 EPILOGUE_4_ARGS
2417ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i32
2418%endmacro
2419
2420IEMIMPL_FPU_R80_BY_I32 fiadd
2421IEMIMPL_FPU_R80_BY_I32 fimul
2422IEMIMPL_FPU_R80_BY_I32 fisub
2423IEMIMPL_FPU_R80_BY_I32 fisubr
2424IEMIMPL_FPU_R80_BY_I32 fidiv
2425IEMIMPL_FPU_R80_BY_I32 fidivr
2426
2427
2428;;
2429; FPU instruction working on one 80-bit and one 32-bit signed integer value,
2430; only returning FSW.
2431;
2432; @param 1 The instruction
2433;
2434; @param A0 FPU context (fxsave).
2435; @param A1 Where to store the output FSW.
2436; @param A2 Pointer to the 80-bit value.
2437; @param A3 Pointer to the 64-bit value.
2438;
2439%macro IEMIMPL_FPU_R80_BY_I32_FSW 1
2440BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i32, 16
2441 PROLOGUE_4_ARGS
2442 sub xSP, 20h
2443
2444 fninit
2445 fld tword [A2]
2446 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2447 %1 dword [A3]
2448
2449 fnstsw word [A1]
2450
2451 fninit
2452 add xSP, 20h
2453 EPILOGUE_4_ARGS
2454ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i32
2455%endmacro
2456
2457IEMIMPL_FPU_R80_BY_I32_FSW ficom
2458
2459
2460
2461;
2462;---------------------- 64-bit signed integer operations ----------------------
2463;
2464
2465
2466;;
2467; Converts a 64-bit floating point value to a 80-bit one (fpu register).
2468;
2469; @param A0 FPU context (fxsave).
2470; @param A1 Pointer to a IEMFPURESULT for the output.
2471; @param A2 Pointer to the 64-bit floating point value to convert.
2472;
2473BEGINPROC_FASTCALL iemAImpl_fild_r80_from_i64, 12
2474 PROLOGUE_3_ARGS
2475 sub xSP, 20h
2476
2477 fninit
2478 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2479 fild qword [A2]
2480
2481 fnstsw word [A1 + IEMFPURESULT.FSW]
2482 fnclex
2483 fstp tword [A1 + IEMFPURESULT.r80Result]
2484
2485 fninit
2486 add xSP, 20h
2487 EPILOGUE_3_ARGS
2488ENDPROC iemAImpl_fild_r80_from_i64
2489
2490
2491;;
2492; Store a 80-bit floating point value (register) as a 64-bit signed integer (memory).
2493;
2494; @param A0 FPU context (fxsave).
2495; @param A1 Where to return the output FSW.
2496; @param A2 Where to store the 64-bit signed integer value.
2497; @param A3 Pointer to the 80-bit value.
2498;
2499BEGINPROC_FASTCALL iemAImpl_fist_r80_to_i64, 16
2500 PROLOGUE_4_ARGS
2501 sub xSP, 20h
2502
2503 fninit
2504 fld tword [A3]
2505 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2506 fistp qword [A2]
2507
2508 fnstsw word [A1]
2509
2510 fninit
2511 add xSP, 20h
2512 EPILOGUE_4_ARGS
2513ENDPROC iemAImpl_fist_r80_to_i64
2514
2515
2516;;
2517; Store a 80-bit floating point value (register) as a 64-bit signed integer
2518; (memory) with truncation.
2519;
2520; @param A0 FPU context (fxsave).
2521; @param A1 Where to return the output FSW.
2522; @param A2 Where to store the 64-bit signed integer value.
2523; @param A3 Pointer to the 80-bit value.
2524;
2525BEGINPROC_FASTCALL iemAImpl_fistt_r80_to_i64, 16
2526 PROLOGUE_4_ARGS
2527 sub xSP, 20h
2528
2529 fninit
2530 fld tword [A3]
2531 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2532 fisttp qword [A2]
2533
2534 fnstsw word [A1]
2535
2536 fninit
2537 add xSP, 20h
2538 EPILOGUE_4_ARGS
2539ENDPROC iemAImpl_fistt_r80_to_i64
2540
2541
2542
2543;
2544;---------------------- 32-bit floating point operations ----------------------
2545;
2546
2547;;
2548; Converts a 32-bit floating point value to a 80-bit one (fpu register).
2549;
2550; @param A0 FPU context (fxsave).
2551; @param A1 Pointer to a IEMFPURESULT for the output.
2552; @param A2 Pointer to the 32-bit floating point value to convert.
2553;
2554BEGINPROC_FASTCALL iemAImpl_fld_r80_from_r32, 12
2555 PROLOGUE_3_ARGS
2556 sub xSP, 20h
2557
2558 fninit
2559 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2560 fld dword [A2]
2561
2562 fnstsw word [A1 + IEMFPURESULT.FSW]
2563 fnclex
2564 fstp tword [A1 + IEMFPURESULT.r80Result]
2565
2566 fninit
2567 add xSP, 20h
2568 EPILOGUE_3_ARGS
2569ENDPROC iemAImpl_fld_r80_from_r32
2570
2571
2572;;
2573; Store a 80-bit floating point value (register) as a 32-bit one (memory).
2574;
2575; @param A0 FPU context (fxsave).
2576; @param A1 Where to return the output FSW.
2577; @param A2 Where to store the 32-bit value.
2578; @param A3 Pointer to the 80-bit value.
2579;
2580BEGINPROC_FASTCALL iemAImpl_fst_r80_to_r32, 16
2581 PROLOGUE_4_ARGS
2582 sub xSP, 20h
2583
2584 fninit
2585 fld tword [A3]
2586 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2587 fst dword [A2]
2588
2589 fnstsw word [A1]
2590
2591 fninit
2592 add xSP, 20h
2593 EPILOGUE_4_ARGS
2594ENDPROC iemAImpl_fst_r80_to_r32
2595
2596
2597;;
2598; FPU instruction working on one 80-bit and one 32-bit floating point value.
2599;
2600; @param 1 The instruction
2601;
2602; @param A0 FPU context (fxsave).
2603; @param A1 Pointer to a IEMFPURESULT for the output.
2604; @param A2 Pointer to the 80-bit value.
2605; @param A3 Pointer to the 32-bit value.
2606;
2607%macro IEMIMPL_FPU_R80_BY_R32 1
2608BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r32, 16
2609 PROLOGUE_4_ARGS
2610 sub xSP, 20h
2611
2612 fninit
2613 fld tword [A2]
2614 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2615 %1 dword [A3]
2616
2617 fnstsw word [A1 + IEMFPURESULT.FSW]
2618 fnclex
2619 fstp tword [A1 + IEMFPURESULT.r80Result]
2620
2621 fninit
2622 add xSP, 20h
2623 EPILOGUE_4_ARGS
2624ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r32
2625%endmacro
2626
2627IEMIMPL_FPU_R80_BY_R32 fadd
2628IEMIMPL_FPU_R80_BY_R32 fmul
2629IEMIMPL_FPU_R80_BY_R32 fsub
2630IEMIMPL_FPU_R80_BY_R32 fsubr
2631IEMIMPL_FPU_R80_BY_R32 fdiv
2632IEMIMPL_FPU_R80_BY_R32 fdivr
2633
2634
2635;;
2636; FPU instruction working on one 80-bit and one 32-bit floating point value,
2637; only returning FSW.
2638;
2639; @param 1 The instruction
2640;
2641; @param A0 FPU context (fxsave).
2642; @param A1 Where to store the output FSW.
2643; @param A2 Pointer to the 80-bit value.
2644; @param A3 Pointer to the 64-bit value.
2645;
2646%macro IEMIMPL_FPU_R80_BY_R32_FSW 1
2647BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r32, 16
2648 PROLOGUE_4_ARGS
2649 sub xSP, 20h
2650
2651 fninit
2652 fld tword [A2]
2653 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2654 %1 dword [A3]
2655
2656 fnstsw word [A1]
2657
2658 fninit
2659 add xSP, 20h
2660 EPILOGUE_4_ARGS
2661ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r32
2662%endmacro
2663
2664IEMIMPL_FPU_R80_BY_R32_FSW fcom
2665
2666
2667
2668;
2669;---------------------- 64-bit floating point operations ----------------------
2670;
2671
2672;;
2673; Converts a 64-bit floating point value to a 80-bit one (fpu register).
2674;
2675; @param A0 FPU context (fxsave).
2676; @param A1 Pointer to a IEMFPURESULT for the output.
2677; @param A2 Pointer to the 64-bit floating point value to convert.
2678;
2679BEGINPROC_FASTCALL iemAImpl_fld_r80_from_r64, 12
2680 PROLOGUE_3_ARGS
2681 sub xSP, 20h
2682
2683 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2684 fld qword [A2]
2685
2686 fnstsw word [A1 + IEMFPURESULT.FSW]
2687 fnclex
2688 fstp tword [A1 + IEMFPURESULT.r80Result]
2689
2690 fninit
2691 add xSP, 20h
2692 EPILOGUE_3_ARGS
2693ENDPROC iemAImpl_fld_r80_from_r64
2694
2695
2696;;
2697; Store a 80-bit floating point value (register) as a 64-bit one (memory).
2698;
2699; @param A0 FPU context (fxsave).
2700; @param A1 Where to return the output FSW.
2701; @param A2 Where to store the 64-bit value.
2702; @param A3 Pointer to the 80-bit value.
2703;
2704BEGINPROC_FASTCALL iemAImpl_fst_r80_to_r64, 16
2705 PROLOGUE_4_ARGS
2706 sub xSP, 20h
2707
2708 fninit
2709 fld tword [A3]
2710 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2711 fst qword [A2]
2712
2713 fnstsw word [A1]
2714
2715 fninit
2716 add xSP, 20h
2717 EPILOGUE_4_ARGS
2718ENDPROC iemAImpl_fst_r80_to_r64
2719
2720
2721;;
2722; FPU instruction working on one 80-bit and one 64-bit floating point value.
2723;
2724; @param 1 The instruction
2725;
2726; @param A0 FPU context (fxsave).
2727; @param A1 Pointer to a IEMFPURESULT for the output.
2728; @param A2 Pointer to the 80-bit value.
2729; @param A3 Pointer to the 64-bit value.
2730;
2731%macro IEMIMPL_FPU_R80_BY_R64 1
2732BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r64, 16
2733 PROLOGUE_4_ARGS
2734 sub xSP, 20h
2735
2736 fninit
2737 fld tword [A2]
2738 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2739 %1 qword [A3]
2740
2741 fnstsw word [A1 + IEMFPURESULT.FSW]
2742 fnclex
2743 fstp tword [A1 + IEMFPURESULT.r80Result]
2744
2745 fninit
2746 add xSP, 20h
2747 EPILOGUE_4_ARGS
2748ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r64
2749%endmacro
2750
2751IEMIMPL_FPU_R80_BY_R64 fadd
2752IEMIMPL_FPU_R80_BY_R64 fmul
2753IEMIMPL_FPU_R80_BY_R64 fsub
2754IEMIMPL_FPU_R80_BY_R64 fsubr
2755IEMIMPL_FPU_R80_BY_R64 fdiv
2756IEMIMPL_FPU_R80_BY_R64 fdivr
2757
2758;;
2759; FPU instruction working on one 80-bit and one 64-bit floating point value,
2760; only returning FSW.
2761;
2762; @param 1 The instruction
2763;
2764; @param A0 FPU context (fxsave).
2765; @param A1 Where to store the output FSW.
2766; @param A2 Pointer to the 80-bit value.
2767; @param A3 Pointer to the 64-bit value.
2768;
2769%macro IEMIMPL_FPU_R80_BY_R64_FSW 1
2770BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r64, 16
2771 PROLOGUE_4_ARGS
2772 sub xSP, 20h
2773
2774 fninit
2775 fld tword [A2]
2776 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2777 %1 qword [A3]
2778
2779 fnstsw word [A1]
2780
2781 fninit
2782 add xSP, 20h
2783 EPILOGUE_4_ARGS
2784ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r64
2785%endmacro
2786
2787IEMIMPL_FPU_R80_BY_R64_FSW fcom
2788
2789
2790
2791;
2792;---------------------- 80-bit floating point operations ----------------------
2793;
2794
2795;;
2796; Loads a 80-bit floating point register value from memory.
2797;
2798; @param A0 FPU context (fxsave).
2799; @param A1 Pointer to a IEMFPURESULT for the output.
2800; @param A2 Pointer to the 80-bit floating point value to load.
2801;
2802BEGINPROC_FASTCALL iemAImpl_fld_r80_from_r80, 12
2803 PROLOGUE_3_ARGS
2804 sub xSP, 20h
2805
2806 fninit
2807 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2808 fld tword [A2]
2809
2810 fnstsw word [A1 + IEMFPURESULT.FSW]
2811 fnclex
2812 fstp tword [A1 + IEMFPURESULT.r80Result]
2813
2814 fninit
2815 add xSP, 20h
2816 EPILOGUE_3_ARGS
2817ENDPROC iemAImpl_fld_r80_from_r80
2818
2819
2820;;
2821; Store a 80-bit floating point register to memory
2822;
2823; @param A0 FPU context (fxsave).
2824; @param A1 Where to return the output FSW.
2825; @param A2 Where to store the 80-bit value.
2826; @param A3 Pointer to the 80-bit register value.
2827;
2828BEGINPROC_FASTCALL iemAImpl_fst_r80_to_r80, 16
2829 PROLOGUE_4_ARGS
2830 sub xSP, 20h
2831
2832 fninit
2833 fld tword [A3]
2834 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2835 fstp tword [A2]
2836
2837 fnstsw word [A1]
2838
2839 fninit
2840 add xSP, 20h
2841 EPILOGUE_4_ARGS
2842ENDPROC iemAImpl_fst_r80_to_r80
2843
2844
2845;;
2846; Loads an 80-bit floating point register value in BCD format from memory.
2847;
2848; @param A0 FPU context (fxsave).
2849; @param A1 Pointer to a IEMFPURESULT for the output.
2850; @param A2 Pointer to the 80-bit BCD value to load.
2851;
2852BEGINPROC_FASTCALL iemAImpl_fld_r80_from_d80, 12
2853 PROLOGUE_3_ARGS
2854 sub xSP, 20h
2855
2856 fninit
2857 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2858 fbld tword [A2]
2859
2860 fnstsw word [A1 + IEMFPURESULT.FSW]
2861 fnclex
2862 fstp tword [A1 + IEMFPURESULT.r80Result]
2863
2864 fninit
2865 add xSP, 20h
2866 EPILOGUE_3_ARGS
2867ENDPROC iemAImpl_fld_r80_from_d80
2868
2869
2870;;
2871; Store a 80-bit floating point register to memory as BCD
2872;
2873; @param A0 FPU context (fxsave).
2874; @param A1 Where to return the output FSW.
2875; @param A2 Where to store the 80-bit BCD value.
2876; @param A3 Pointer to the 80-bit register value.
2877;
2878BEGINPROC_FASTCALL iemAImpl_fst_r80_to_d80, 16
2879 PROLOGUE_4_ARGS
2880 sub xSP, 20h
2881
2882 fninit
2883 fld tword [A3]
2884 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2885 fbstp tword [A2]
2886
2887 fnstsw word [A1]
2888
2889 fninit
2890 add xSP, 20h
2891 EPILOGUE_4_ARGS
2892ENDPROC iemAImpl_fst_r80_to_d80
2893
2894
2895;;
2896; FPU instruction working on two 80-bit floating point values.
2897;
2898; @param 1 The instruction
2899;
2900; @param A0 FPU context (fxsave).
2901; @param A1 Pointer to a IEMFPURESULT for the output.
2902; @param A2 Pointer to the first 80-bit value (ST0)
2903; @param A3 Pointer to the second 80-bit value (STn).
2904;
2905%macro IEMIMPL_FPU_R80_BY_R80 2
2906BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
2907 PROLOGUE_4_ARGS
2908 sub xSP, 20h
2909
2910 fninit
2911 fld tword [A3]
2912 fld tword [A2]
2913 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2914 %1 %2
2915
2916 fnstsw word [A1 + IEMFPURESULT.FSW]
2917 fnclex
2918 fstp tword [A1 + IEMFPURESULT.r80Result]
2919
2920 fninit
2921 add xSP, 20h
2922 EPILOGUE_4_ARGS
2923ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
2924%endmacro
2925
2926IEMIMPL_FPU_R80_BY_R80 fadd, {st0, st1}
2927IEMIMPL_FPU_R80_BY_R80 fmul, {st0, st1}
2928IEMIMPL_FPU_R80_BY_R80 fsub, {st0, st1}
2929IEMIMPL_FPU_R80_BY_R80 fsubr, {st0, st1}
2930IEMIMPL_FPU_R80_BY_R80 fdiv, {st0, st1}
2931IEMIMPL_FPU_R80_BY_R80 fdivr, {st0, st1}
2932IEMIMPL_FPU_R80_BY_R80 fprem, {}
2933IEMIMPL_FPU_R80_BY_R80 fprem1, {}
2934IEMIMPL_FPU_R80_BY_R80 fscale, {}
2935
2936
2937;;
2938; FPU instruction working on two 80-bit floating point values, ST1 and ST0,
2939; storing the result in ST1 and popping the stack.
2940;
2941; @param 1 The instruction
2942;
2943; @param A0 FPU context (fxsave).
2944; @param A1 Pointer to a IEMFPURESULT for the output.
2945; @param A2 Pointer to the first 80-bit value (ST1).
2946; @param A3 Pointer to the second 80-bit value (ST0).
2947;
2948%macro IEMIMPL_FPU_R80_BY_R80_ST1_ST0_POP 1
2949BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
2950 PROLOGUE_4_ARGS
2951 sub xSP, 20h
2952
2953 fninit
2954 fld tword [A2]
2955 fld tword [A3]
2956 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2957 %1
2958
2959 fnstsw word [A1 + IEMFPURESULT.FSW]
2960 fnclex
2961 fstp tword [A1 + IEMFPURESULT.r80Result]
2962
2963 fninit
2964 add xSP, 20h
2965 EPILOGUE_4_ARGS
2966ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
2967%endmacro
2968
2969IEMIMPL_FPU_R80_BY_R80_ST1_ST0_POP fpatan
2970IEMIMPL_FPU_R80_BY_R80_ST1_ST0_POP fyl2x
2971IEMIMPL_FPU_R80_BY_R80_ST1_ST0_POP fyl2xp1
2972
2973
2974;;
2975; FPU instruction working on two 80-bit floating point values, only
2976; returning FSW.
2977;
2978; @param 1 The instruction
2979;
2980; @param A0 FPU context (fxsave).
2981; @param A1 Pointer to a uint16_t for the resulting FSW.
2982; @param A2 Pointer to the first 80-bit value.
2983; @param A3 Pointer to the second 80-bit value.
2984;
2985%macro IEMIMPL_FPU_R80_BY_R80_FSW 1
2986BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
2987 PROLOGUE_4_ARGS
2988 sub xSP, 20h
2989
2990 fninit
2991 fld tword [A3]
2992 fld tword [A2]
2993 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2994 %1 st0, st1
2995
2996 fnstsw word [A1]
2997
2998 fninit
2999 add xSP, 20h
3000 EPILOGUE_4_ARGS
3001ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
3002%endmacro
3003
3004IEMIMPL_FPU_R80_BY_R80_FSW fcom
3005IEMIMPL_FPU_R80_BY_R80_FSW fucom
3006
3007
3008;;
3009; FPU instruction working on two 80-bit floating point values,
3010; returning FSW and EFLAGS (eax).
3011;
3012; @param 1 The instruction
3013;
3014; @returns EFLAGS in EAX.
3015; @param A0 FPU context (fxsave).
3016; @param A1 Pointer to a uint16_t for the resulting FSW.
3017; @param A2 Pointer to the first 80-bit value.
3018; @param A3 Pointer to the second 80-bit value.
3019;
3020%macro IEMIMPL_FPU_R80_BY_R80_EFL 1
3021BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
3022 PROLOGUE_4_ARGS
3023 sub xSP, 20h
3024
3025 fninit
3026 fld tword [A3]
3027 fld tword [A2]
3028 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
3029 %1 st1
3030
3031 fnstsw word [A1]
3032 pushf
3033 pop xAX
3034
3035 fninit
3036 add xSP, 20h
3037 EPILOGUE_4_ARGS
3038ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
3039%endmacro
3040
3041IEMIMPL_FPU_R80_BY_R80_EFL fcomi
3042IEMIMPL_FPU_R80_BY_R80_EFL fucomi
3043
3044
3045;;
3046; FPU instruction working on one 80-bit floating point value.
3047;
3048; @param 1 The instruction
3049;
3050; @param A0 FPU context (fxsave).
3051; @param A1 Pointer to a IEMFPURESULT for the output.
3052; @param A2 Pointer to the 80-bit value.
3053;
3054%macro IEMIMPL_FPU_R80 1
3055BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80, 12
3056 PROLOGUE_3_ARGS
3057 sub xSP, 20h
3058
3059 fninit
3060 fld tword [A2]
3061 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
3062 %1
3063
3064 fnstsw word [A1 + IEMFPURESULT.FSW]
3065 fnclex
3066 fstp tword [A1 + IEMFPURESULT.r80Result]
3067
3068 fninit
3069 add xSP, 20h
3070 EPILOGUE_3_ARGS
3071ENDPROC iemAImpl_ %+ %1 %+ _r80
3072%endmacro
3073
3074IEMIMPL_FPU_R80 fchs
3075IEMIMPL_FPU_R80 fabs
3076IEMIMPL_FPU_R80 f2xm1
3077IEMIMPL_FPU_R80 fsqrt
3078IEMIMPL_FPU_R80 frndint
3079IEMIMPL_FPU_R80 fsin
3080IEMIMPL_FPU_R80 fcos
3081
3082
3083;;
3084; FPU instruction working on one 80-bit floating point value, only
3085; returning FSW.
3086;
3087; @param 1 The instruction
3088; @param 2 Non-zero to also restore FTW.
3089;
3090; @param A0 FPU context (fxsave).
3091; @param A1 Pointer to a uint16_t for the resulting FSW.
3092; @param A2 Pointer to the 80-bit value.
3093;
3094%macro IEMIMPL_FPU_R80_FSW 2
3095BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80, 12
3096 PROLOGUE_3_ARGS
3097 sub xSP, 20h
3098
3099 fninit
3100 fld tword [A2]
3101%if %2 != 0
3102 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW_AND_FTW_0 A0
3103%else
3104 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
3105%endif
3106 %1
3107
3108 fnstsw word [A1]
3109
3110 fninit
3111 add xSP, 20h
3112 EPILOGUE_3_ARGS
3113ENDPROC iemAImpl_ %+ %1 %+ _r80
3114%endmacro
3115
3116IEMIMPL_FPU_R80_FSW ftst, 0
3117IEMIMPL_FPU_R80_FSW fxam, 1 ; No #IS or any other FP exceptions.
3118
3119
3120
3121;;
3122; FPU instruction loading a 80-bit floating point constant.
3123;
3124; @param 1 The instruction
3125;
3126; @param A0 FPU context (fxsave).
3127; @param A1 Pointer to a IEMFPURESULT for the output.
3128;
3129%macro IEMIMPL_FPU_R80_CONST 1
3130BEGINPROC_FASTCALL iemAImpl_ %+ %1, 8
3131 PROLOGUE_2_ARGS
3132 sub xSP, 20h
3133
3134 fninit
3135 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
3136 %1
3137
3138 fnstsw word [A1 + IEMFPURESULT.FSW]
3139 fnclex
3140 fstp tword [A1 + IEMFPURESULT.r80Result]
3141
3142 fninit
3143 add xSP, 20h
3144 EPILOGUE_2_ARGS
3145ENDPROC iemAImpl_ %+ %1 %+
3146%endmacro
3147
3148IEMIMPL_FPU_R80_CONST fld1
3149IEMIMPL_FPU_R80_CONST fldl2t
3150IEMIMPL_FPU_R80_CONST fldl2e
3151IEMIMPL_FPU_R80_CONST fldpi
3152IEMIMPL_FPU_R80_CONST fldlg2
3153IEMIMPL_FPU_R80_CONST fldln2
3154IEMIMPL_FPU_R80_CONST fldz
3155
3156
3157;;
3158; FPU instruction working on one 80-bit floating point value, outputing two.
3159;
3160; @param 1 The instruction
3161;
3162; @param A0 FPU context (fxsave).
3163; @param A1 Pointer to a IEMFPURESULTTWO for the output.
3164; @param A2 Pointer to the 80-bit value.
3165;
3166%macro IEMIMPL_FPU_R80_R80 1
3167BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_r80, 12
3168 PROLOGUE_3_ARGS
3169 sub xSP, 20h
3170
3171 fninit
3172 fld tword [A2]
3173 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
3174 %1
3175
3176 fnstsw word [A1 + IEMFPURESULTTWO.FSW]
3177 fnclex
3178 fstp tword [A1 + IEMFPURESULTTWO.r80Result2]
3179 fnclex
3180 fstp tword [A1 + IEMFPURESULTTWO.r80Result1]
3181
3182 fninit
3183 add xSP, 20h
3184 EPILOGUE_3_ARGS
3185ENDPROC iemAImpl_ %+ %1 %+ _r80_r80
3186%endmacro
3187
3188IEMIMPL_FPU_R80_R80 fptan
3189IEMIMPL_FPU_R80_R80 fxtract
3190IEMIMPL_FPU_R80_R80 fsincos
3191
3192
3193
3194
3195;---------------------- SSE and MMX Operations ----------------------
3196
3197;; @todo what do we need to do for MMX?
3198%macro IEMIMPL_MMX_PROLOGUE 0
3199%endmacro
3200%macro IEMIMPL_MMX_EPILOGUE 0
3201%endmacro
3202
3203;; @todo what do we need to do for SSE?
3204%macro IEMIMPL_SSE_PROLOGUE 0
3205%endmacro
3206%macro IEMIMPL_SSE_EPILOGUE 0
3207%endmacro
3208
3209
3210;;
3211; Media instruction working on two full sized registers.
3212;
3213; @param 1 The instruction
3214;
3215; @param A0 FPU context (fxsave).
3216; @param A1 Pointer to the first media register size operand (input/output).
3217; @param A2 Pointer to the second media register size operand (input).
3218;
3219%macro IEMIMPL_MEDIA_F2 1
3220BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 12
3221 PROLOGUE_3_ARGS
3222 IEMIMPL_MMX_PROLOGUE
3223
3224 movq mm0, [A1]
3225 movq mm1, [A2]
3226 %1 mm0, mm1
3227 movq [A1], mm0
3228
3229 IEMIMPL_MMX_EPILOGUE
3230 EPILOGUE_3_ARGS
3231ENDPROC iemAImpl_ %+ %1 %+ _u64
3232
3233BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u128, 12
3234 PROLOGUE_3_ARGS
3235 IEMIMPL_SSE_PROLOGUE
3236
3237 movdqu xmm0, [A1]
3238 movdqu xmm1, [A2]
3239 %1 xmm0, xmm1
3240 movdqu [A1], xmm0
3241
3242 IEMIMPL_SSE_EPILOGUE
3243 EPILOGUE_3_ARGS
3244ENDPROC iemAImpl_ %+ %1 %+ _u128
3245%endmacro
3246
3247IEMIMPL_MEDIA_F2 pxor
3248IEMIMPL_MEDIA_F2 pcmpeqb
3249IEMIMPL_MEDIA_F2 pcmpeqw
3250IEMIMPL_MEDIA_F2 pcmpeqd
3251
3252
3253;;
3254; Media instruction working on one full sized and one half sized register (lower half).
3255;
3256; @param 1 The instruction
3257; @param 2 1 if MMX is included, 0 if not.
3258;
3259; @param A0 FPU context (fxsave).
3260; @param A1 Pointer to the first full sized media register operand (input/output).
3261; @param A2 Pointer to the second half sized media register operand (input).
3262;
3263%macro IEMIMPL_MEDIA_F1L1 2
3264 %if %2 != 0
3265BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 12
3266 PROLOGUE_3_ARGS
3267 IEMIMPL_MMX_PROLOGUE
3268
3269 movq mm0, [A1]
3270 movd mm1, [A2]
3271 %1 mm0, mm1
3272 movq [A1], mm0
3273
3274 IEMIMPL_MMX_EPILOGUE
3275 EPILOGUE_3_ARGS
3276ENDPROC iemAImpl_ %+ %1 %+ _u64
3277 %endif
3278
3279BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u128, 12
3280 PROLOGUE_3_ARGS
3281 IEMIMPL_SSE_PROLOGUE
3282
3283 movdqu xmm0, [A1]
3284 movq xmm1, [A2]
3285 %1 xmm0, xmm1
3286 movdqu [A1], xmm0
3287
3288 IEMIMPL_SSE_EPILOGUE
3289 EPILOGUE_3_ARGS
3290ENDPROC iemAImpl_ %+ %1 %+ _u128
3291%endmacro
3292
3293IEMIMPL_MEDIA_F1L1 punpcklbw, 1
3294IEMIMPL_MEDIA_F1L1 punpcklwd, 1
3295IEMIMPL_MEDIA_F1L1 punpckldq, 1
3296IEMIMPL_MEDIA_F1L1 punpcklqdq, 0
3297
3298
3299;;
3300; Media instruction working on one full sized and one half sized register (high half).
3301;
3302; @param 1 The instruction
3303; @param 2 1 if MMX is included, 0 if not.
3304;
3305; @param A0 FPU context (fxsave).
3306; @param A1 Pointer to the first full sized media register operand (input/output).
3307; @param A2 Pointer to the second full sized media register operand, where we
3308; will only use the upper half (input).
3309;
3310%macro IEMIMPL_MEDIA_F1H1 2
3311 %if %2 != 0
3312BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 12
3313 PROLOGUE_3_ARGS
3314 IEMIMPL_MMX_PROLOGUE
3315
3316 movq mm0, [A1]
3317 movq mm1, [A2]
3318 %1 mm0, mm1
3319 movq [A1], mm0
3320
3321 IEMIMPL_MMX_EPILOGUE
3322 EPILOGUE_3_ARGS
3323ENDPROC iemAImpl_ %+ %1 %+ _u64
3324 %endif
3325
3326BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u128, 12
3327 PROLOGUE_3_ARGS
3328 IEMIMPL_SSE_PROLOGUE
3329
3330 movdqu xmm0, [A1]
3331 movdqu xmm1, [A2]
3332 %1 xmm0, xmm1
3333 movdqu [A1], xmm0
3334
3335 IEMIMPL_SSE_EPILOGUE
3336 EPILOGUE_3_ARGS
3337ENDPROC iemAImpl_ %+ %1 %+ _u128
3338%endmacro
3339
3340IEMIMPL_MEDIA_F1L1 punpckhbw, 1
3341IEMIMPL_MEDIA_F1L1 punpckhwd, 1
3342IEMIMPL_MEDIA_F1L1 punpckhdq, 1
3343IEMIMPL_MEDIA_F1L1 punpckhqdq, 0
3344
3345
3346;
3347; Shufflers with evil 8-bit immediates.
3348;
3349
3350BEGINPROC_FASTCALL iemAImpl_pshufw, 16
3351 PROLOGUE_4_ARGS
3352 IEMIMPL_MMX_PROLOGUE
3353
3354 movq mm0, [A1]
3355 movq mm1, [A2]
3356 lea T0, [A3 + A3*4] ; sizeof(pshufw+ret) == 5
3357 lea T1, [.imm0 xWrtRIP]
3358 lea T1, [T1 + T0]
3359 call T1
3360 movq [A1], mm0
3361
3362 IEMIMPL_MMX_EPILOGUE
3363 EPILOGUE_4_ARGS
3364%assign bImm 0
3365%rep 256
3366.imm %+ bImm:
3367 pshufw mm0, mm1, bImm
3368 ret
3369 %assign bImm bImm + 1
3370%endrep
3371.immEnd: ; 256*5 == 0x500
3372dw 0xfaff + (.immEnd - .imm0) ; will cause warning if entries are too big.
3373dw 0x104ff - (.immEnd - .imm0) ; will cause warning if entries are small big.
3374ENDPROC iemAImpl_pshufw
3375
3376
3377%macro IEMIMPL_MEDIA_SSE_PSHUFXX 1
3378BEGINPROC_FASTCALL iemAImpl_ %+ %1, 16
3379 PROLOGUE_4_ARGS
3380 IEMIMPL_SSE_PROLOGUE
3381
3382 movdqu xmm0, [A1]
3383 movdqu xmm1, [A2]
3384 lea T1, [.imm0 xWrtRIP]
3385 lea T0, [A3 + A3*2] ; sizeof(pshufXX+ret) == 6: (A3 * 3) *2
3386 lea T1, [T1 + T0*2]
3387 call T1
3388 movdqu [A1], xmm0
3389
3390 IEMIMPL_SSE_EPILOGUE
3391 EPILOGUE_4_ARGS
3392 %assign bImm 0
3393 %rep 256
3394.imm %+ bImm:
3395 %1 xmm0, xmm1, bImm
3396 ret
3397 %assign bImm bImm + 1
3398 %endrep
3399.immEnd: ; 256*6 == 0x600
3400dw 0xf9ff + (.immEnd - .imm0) ; will cause warning if entries are too big.
3401dw 0x105ff - (.immEnd - .imm0) ; will cause warning if entries are small big.
3402ENDPROC iemAImpl_ %+ %1
3403%endmacro
3404
3405IEMIMPL_MEDIA_SSE_PSHUFXX pshufhw
3406IEMIMPL_MEDIA_SSE_PSHUFXX pshuflw
3407IEMIMPL_MEDIA_SSE_PSHUFXX pshufd
3408
3409
3410;
3411; Move byte mask.
3412;
3413
3414BEGINPROC_FASTCALL iemAImpl_pmovmskb_u64, 12
3415 PROLOGUE_3_ARGS
3416 IEMIMPL_MMX_PROLOGUE
3417
3418 mov T0, [A1]
3419 movq mm1, [A2]
3420 pmovmskb T0, mm1
3421 mov [A1], T0
3422%ifdef RT_ARCH_X86
3423 mov dword [A1 + 4], 0
3424%endif
3425 IEMIMPL_MMX_EPILOGUE
3426 EPILOGUE_3_ARGS
3427ENDPROC iemAImpl_pmovmskb_u64
3428
3429BEGINPROC_FASTCALL iemAImpl_pmovmskb_u128, 12
3430 PROLOGUE_3_ARGS
3431 IEMIMPL_SSE_PROLOGUE
3432
3433 mov T0, [A1]
3434 movdqu xmm1, [A2]
3435 pmovmskb T0, xmm1
3436 mov [A1], T0
3437%ifdef RT_ARCH_X86
3438 mov dword [A1 + 4], 0
3439%endif
3440 IEMIMPL_SSE_EPILOGUE
3441 EPILOGUE_3_ARGS
3442ENDPROC iemAImpl_pmovmskb_u128
3443
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette