VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllAImpl.asm@ 66250

Last change on this file since 66250 was 65506, checked in by vboxsync, 8 years ago

IEM: cmpxchg16v implementation (disabled).

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 81.6 KB
Line 
1; $Id: IEMAllAImpl.asm 65506 2017-01-29 14:25:45Z vboxsync $
2;; @file
3; IEM - Instruction Implementation in Assembly.
4;
5
6; Copyright (C) 2011-2016 Oracle Corporation
7;
8; This file is part of VirtualBox Open Source Edition (OSE), as
9; available from http://www.virtualbox.org. This file is free software;
10; you can redistribute it and/or modify it under the terms of the GNU
11; General Public License (GPL) as published by the Free Software
12; Foundation, in version 2 as it comes in the "COPYING" file of the
13; VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15;
16
17
18;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
19; Header Files ;
20;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
21%include "VBox/asmdefs.mac"
22%include "VBox/err.mac"
23%include "iprt/x86.mac"
24
25
26;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
27; Defined Constants And Macros ;
28;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
29
30;;
31; RET XX / RET wrapper for fastcall.
32;
33%macro RET_FASTCALL 1
34%ifdef RT_ARCH_X86
35 %ifdef RT_OS_WINDOWS
36 ret %1
37 %else
38 ret
39 %endif
40%else
41 ret
42%endif
43%endmacro
44
45;;
46; NAME for fastcall functions.
47;
48;; @todo 'global @fastcall@12' is still broken in yasm and requires dollar
49; escaping (or whatever the dollar is good for here). Thus the ugly
50; prefix argument.
51;
52%define NAME_FASTCALL(a_Name, a_cbArgs, a_Prefix) NAME(a_Name)
53%ifdef RT_ARCH_X86
54 %ifdef RT_OS_WINDOWS
55 %undef NAME_FASTCALL
56 %define NAME_FASTCALL(a_Name, a_cbArgs, a_Prefix) a_Prefix %+ a_Name %+ @ %+ a_cbArgs
57 %endif
58%endif
59
60;;
61; BEGINPROC for fastcall functions.
62;
63; @param 1 The function name (C).
64; @param 2 The argument size on x86.
65;
66%macro BEGINPROC_FASTCALL 2
67 %ifdef ASM_FORMAT_PE
68 export %1=NAME_FASTCALL(%1,%2,$@)
69 %endif
70 %ifdef __NASM__
71 %ifdef ASM_FORMAT_OMF
72 export NAME(%1) NAME_FASTCALL(%1,%2,$@)
73 %endif
74 %endif
75 %ifndef ASM_FORMAT_BIN
76 global NAME_FASTCALL(%1,%2,$@)
77 %endif
78NAME_FASTCALL(%1,%2,@):
79%endmacro
80
81
82;
83; We employ some macro assembly here to hid the calling convention differences.
84;
85%ifdef RT_ARCH_AMD64
86 %macro PROLOGUE_1_ARGS 0
87 %endmacro
88 %macro EPILOGUE_1_ARGS 0
89 ret
90 %endmacro
91 %macro EPILOGUE_1_ARGS_EX 0
92 ret
93 %endmacro
94
95 %macro PROLOGUE_2_ARGS 0
96 %endmacro
97 %macro EPILOGUE_2_ARGS 0
98 ret
99 %endmacro
100 %macro EPILOGUE_2_ARGS_EX 1
101 ret
102 %endmacro
103
104 %macro PROLOGUE_3_ARGS 0
105 %endmacro
106 %macro EPILOGUE_3_ARGS 0
107 ret
108 %endmacro
109 %macro EPILOGUE_3_ARGS_EX 1
110 ret
111 %endmacro
112
113 %macro PROLOGUE_4_ARGS 0
114 %endmacro
115 %macro EPILOGUE_4_ARGS 0
116 ret
117 %endmacro
118 %macro EPILOGUE_4_ARGS_EX 1
119 ret
120 %endmacro
121
122 %ifdef ASM_CALL64_GCC
123 %define A0 rdi
124 %define A0_32 edi
125 %define A0_16 di
126 %define A0_8 dil
127
128 %define A1 rsi
129 %define A1_32 esi
130 %define A1_16 si
131 %define A1_8 sil
132
133 %define A2 rdx
134 %define A2_32 edx
135 %define A2_16 dx
136 %define A2_8 dl
137
138 %define A3 rcx
139 %define A3_32 ecx
140 %define A3_16 cx
141 %endif
142
143 %ifdef ASM_CALL64_MSC
144 %define A0 rcx
145 %define A0_32 ecx
146 %define A0_16 cx
147 %define A0_8 cl
148
149 %define A1 rdx
150 %define A1_32 edx
151 %define A1_16 dx
152 %define A1_8 dl
153
154 %define A2 r8
155 %define A2_32 r8d
156 %define A2_16 r8w
157 %define A2_8 r8b
158
159 %define A3 r9
160 %define A3_32 r9d
161 %define A3_16 r9w
162 %endif
163
164 %define T0 rax
165 %define T0_32 eax
166 %define T0_16 ax
167 %define T0_8 al
168
169 %define T1 r11
170 %define T1_32 r11d
171 %define T1_16 r11w
172 %define T1_8 r11b
173
174%else
175 ; x86
176 %macro PROLOGUE_1_ARGS 0
177 push edi
178 %endmacro
179 %macro EPILOGUE_1_ARGS 0
180 pop edi
181 ret 0
182 %endmacro
183 %macro EPILOGUE_1_ARGS_EX 1
184 pop edi
185 ret %1
186 %endmacro
187
188 %macro PROLOGUE_2_ARGS 0
189 push edi
190 %endmacro
191 %macro EPILOGUE_2_ARGS 0
192 pop edi
193 ret 0
194 %endmacro
195 %macro EPILOGUE_2_ARGS_EX 1
196 pop edi
197 ret %1
198 %endmacro
199
200 %macro PROLOGUE_3_ARGS 0
201 push ebx
202 mov ebx, [esp + 4 + 4]
203 push edi
204 %endmacro
205 %macro EPILOGUE_3_ARGS_EX 1
206 %if (%1) < 4
207 %error "With three args, at least 4 bytes must be remove from the stack upon return (32-bit)."
208 %endif
209 pop edi
210 pop ebx
211 ret %1
212 %endmacro
213 %macro EPILOGUE_3_ARGS 0
214 EPILOGUE_3_ARGS_EX 4
215 %endmacro
216
217 %macro PROLOGUE_4_ARGS 0
218 push ebx
219 push edi
220 push esi
221 mov ebx, [esp + 12 + 4 + 0]
222 mov esi, [esp + 12 + 4 + 4]
223 %endmacro
224 %macro EPILOGUE_4_ARGS_EX 1
225 %if (%1) < 8
226 %error "With four args, at least 8 bytes must be remove from the stack upon return (32-bit)."
227 %endif
228 pop esi
229 pop edi
230 pop ebx
231 ret %1
232 %endmacro
233 %macro EPILOGUE_4_ARGS 0
234 EPILOGUE_4_ARGS_EX 8
235 %endmacro
236
237 %define A0 ecx
238 %define A0_32 ecx
239 %define A0_16 cx
240 %define A0_8 cl
241
242 %define A1 edx
243 %define A1_32 edx
244 %define A1_16 dx
245 %define A1_8 dl
246
247 %define A2 ebx
248 %define A2_32 ebx
249 %define A2_16 bx
250 %define A2_8 bl
251
252 %define A3 esi
253 %define A3_32 esi
254 %define A3_16 si
255
256 %define T0 eax
257 %define T0_32 eax
258 %define T0_16 ax
259 %define T0_8 al
260
261 %define T1 edi
262 %define T1_32 edi
263 %define T1_16 di
264%endif
265
266
267;;
268; Load the relevant flags from [%1] if there are undefined flags (%3).
269;
270; @remarks Clobbers T0, stack. Changes EFLAGS.
271; @param A2 The register pointing to the flags.
272; @param 1 The parameter (A0..A3) pointing to the eflags.
273; @param 2 The set of modified flags.
274; @param 3 The set of undefined flags.
275;
276%macro IEM_MAYBE_LOAD_FLAGS 3
277 ;%if (%3) != 0
278 pushf ; store current flags
279 mov T0_32, [%1] ; load the guest flags
280 and dword [xSP], ~(%2 | %3) ; mask out the modified and undefined flags
281 and T0_32, (%2 | %3) ; select the modified and undefined flags.
282 or [xSP], T0 ; merge guest flags with host flags.
283 popf ; load the mixed flags.
284 ;%endif
285%endmacro
286
287;;
288; Update the flag.
289;
290; @remarks Clobbers T0, T1, stack.
291; @param 1 The register pointing to the EFLAGS.
292; @param 2 The mask of modified flags to save.
293; @param 3 The mask of undefined flags to (maybe) save.
294;
295%macro IEM_SAVE_FLAGS 3
296 %if (%2 | %3) != 0
297 pushf
298 pop T1
299 mov T0_32, [%1] ; flags
300 and T0_32, ~(%2 | %3) ; clear the modified & undefined flags.
301 and T1_32, (%2 | %3) ; select the modified and undefined flags.
302 or T0_32, T1_32 ; combine the flags.
303 mov [%1], T0_32 ; save the flags.
304 %endif
305%endmacro
306
307
308;;
309; Macro for implementing a binary operator.
310;
311; This will generate code for the 8, 16, 32 and 64 bit accesses with locked
312; variants, except on 32-bit system where the 64-bit accesses requires hand
313; coding.
314;
315; All the functions takes a pointer to the destination memory operand in A0,
316; the source register operand in A1 and a pointer to eflags in A2.
317;
318; @param 1 The instruction mnemonic.
319; @param 2 Non-zero if there should be a locked version.
320; @param 3 The modified flags.
321; @param 4 The undefined flags.
322;
323%macro IEMIMPL_BIN_OP 4
324BEGINCODE
325BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 12
326 PROLOGUE_3_ARGS
327 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
328 %1 byte [A0], A1_8
329 IEM_SAVE_FLAGS A2, %3, %4
330 EPILOGUE_3_ARGS
331ENDPROC iemAImpl_ %+ %1 %+ _u8
332
333BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
334 PROLOGUE_3_ARGS
335 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
336 %1 word [A0], A1_16
337 IEM_SAVE_FLAGS A2, %3, %4
338 EPILOGUE_3_ARGS
339ENDPROC iemAImpl_ %+ %1 %+ _u16
340
341BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
342 PROLOGUE_3_ARGS
343 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
344 %1 dword [A0], A1_32
345 IEM_SAVE_FLAGS A2, %3, %4
346 EPILOGUE_3_ARGS
347ENDPROC iemAImpl_ %+ %1 %+ _u32
348
349 %ifdef RT_ARCH_AMD64
350BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
351 PROLOGUE_3_ARGS
352 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
353 %1 qword [A0], A1
354 IEM_SAVE_FLAGS A2, %3, %4
355 EPILOGUE_3_ARGS_EX 8
356ENDPROC iemAImpl_ %+ %1 %+ _u64
357 %endif ; RT_ARCH_AMD64
358
359 %if %2 != 0 ; locked versions requested?
360
361BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8_locked, 12
362 PROLOGUE_3_ARGS
363 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
364 lock %1 byte [A0], A1_8
365 IEM_SAVE_FLAGS A2, %3, %4
366 EPILOGUE_3_ARGS
367ENDPROC iemAImpl_ %+ %1 %+ _u8_locked
368
369BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16_locked, 12
370 PROLOGUE_3_ARGS
371 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
372 lock %1 word [A0], A1_16
373 IEM_SAVE_FLAGS A2, %3, %4
374 EPILOGUE_3_ARGS
375ENDPROC iemAImpl_ %+ %1 %+ _u16_locked
376
377BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32_locked, 12
378 PROLOGUE_3_ARGS
379 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
380 lock %1 dword [A0], A1_32
381 IEM_SAVE_FLAGS A2, %3, %4
382 EPILOGUE_3_ARGS
383ENDPROC iemAImpl_ %+ %1 %+ _u32_locked
384
385 %ifdef RT_ARCH_AMD64
386BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 16
387 PROLOGUE_3_ARGS
388 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
389 lock %1 qword [A0], A1
390 IEM_SAVE_FLAGS A2, %3, %4
391 EPILOGUE_3_ARGS_EX 8
392ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
393 %endif ; RT_ARCH_AMD64
394 %endif ; locked
395%endmacro
396
397; instr,lock,modified-flags.
398IEMIMPL_BIN_OP add, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
399IEMIMPL_BIN_OP adc, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
400IEMIMPL_BIN_OP sub, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
401IEMIMPL_BIN_OP sbb, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
402IEMIMPL_BIN_OP or, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), X86_EFL_AF
403IEMIMPL_BIN_OP xor, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), X86_EFL_AF
404IEMIMPL_BIN_OP and, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), X86_EFL_AF
405IEMIMPL_BIN_OP cmp, 0, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
406IEMIMPL_BIN_OP test, 0, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), X86_EFL_AF
407
408
409;;
410; Macro for implementing a bit operator.
411;
412; This will generate code for the 16, 32 and 64 bit accesses with locked
413; variants, except on 32-bit system where the 64-bit accesses requires hand
414; coding.
415;
416; All the functions takes a pointer to the destination memory operand in A0,
417; the source register operand in A1 and a pointer to eflags in A2.
418;
419; @param 1 The instruction mnemonic.
420; @param 2 Non-zero if there should be a locked version.
421; @param 3 The modified flags.
422; @param 4 The undefined flags.
423;
424%macro IEMIMPL_BIT_OP 4
425BEGINCODE
426BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
427 PROLOGUE_3_ARGS
428 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
429 %1 word [A0], A1_16
430 IEM_SAVE_FLAGS A2, %3, %4
431 EPILOGUE_3_ARGS
432ENDPROC iemAImpl_ %+ %1 %+ _u16
433
434BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
435 PROLOGUE_3_ARGS
436 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
437 %1 dword [A0], A1_32
438 IEM_SAVE_FLAGS A2, %3, %4
439 EPILOGUE_3_ARGS
440ENDPROC iemAImpl_ %+ %1 %+ _u32
441
442 %ifdef RT_ARCH_AMD64
443BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
444 PROLOGUE_3_ARGS
445 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
446 %1 qword [A0], A1
447 IEM_SAVE_FLAGS A2, %3, %4
448 EPILOGUE_3_ARGS_EX 8
449ENDPROC iemAImpl_ %+ %1 %+ _u64
450 %endif ; RT_ARCH_AMD64
451
452 %if %2 != 0 ; locked versions requested?
453
454BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16_locked, 12
455 PROLOGUE_3_ARGS
456 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
457 lock %1 word [A0], A1_16
458 IEM_SAVE_FLAGS A2, %3, %4
459 EPILOGUE_3_ARGS
460ENDPROC iemAImpl_ %+ %1 %+ _u16_locked
461
462BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32_locked, 12
463 PROLOGUE_3_ARGS
464 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
465 lock %1 dword [A0], A1_32
466 IEM_SAVE_FLAGS A2, %3, %4
467 EPILOGUE_3_ARGS
468ENDPROC iemAImpl_ %+ %1 %+ _u32_locked
469
470 %ifdef RT_ARCH_AMD64
471BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 16
472 PROLOGUE_3_ARGS
473 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
474 lock %1 qword [A0], A1
475 IEM_SAVE_FLAGS A2, %3, %4
476 EPILOGUE_3_ARGS_EX 8
477ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
478 %endif ; RT_ARCH_AMD64
479 %endif ; locked
480%endmacro
481IEMIMPL_BIT_OP bt, 0, (X86_EFL_CF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
482IEMIMPL_BIT_OP btc, 1, (X86_EFL_CF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
483IEMIMPL_BIT_OP bts, 1, (X86_EFL_CF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
484IEMIMPL_BIT_OP btr, 1, (X86_EFL_CF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
485
486;;
487; Macro for implementing a bit search operator.
488;
489; This will generate code for the 16, 32 and 64 bit accesses, except on 32-bit
490; system where the 64-bit accesses requires hand coding.
491;
492; All the functions takes a pointer to the destination memory operand in A0,
493; the source register operand in A1 and a pointer to eflags in A2.
494;
495; @param 1 The instruction mnemonic.
496; @param 2 The modified flags.
497; @param 3 The undefined flags.
498;
499%macro IEMIMPL_BIT_OP 3
500BEGINCODE
501BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
502 PROLOGUE_3_ARGS
503 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
504 %1 T0_16, A1_16
505 jz .unchanged_dst
506 mov [A0], T0_16
507.unchanged_dst:
508 IEM_SAVE_FLAGS A2, %2, %3
509 EPILOGUE_3_ARGS
510ENDPROC iemAImpl_ %+ %1 %+ _u16
511
512BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
513 PROLOGUE_3_ARGS
514 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
515 %1 T0_32, A1_32
516 jz .unchanged_dst
517 mov [A0], T0_32
518.unchanged_dst:
519 IEM_SAVE_FLAGS A2, %2, %3
520 EPILOGUE_3_ARGS
521ENDPROC iemAImpl_ %+ %1 %+ _u32
522
523 %ifdef RT_ARCH_AMD64
524BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
525 PROLOGUE_3_ARGS
526 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
527 %1 T0, A1
528 jz .unchanged_dst
529 mov [A0], T0
530.unchanged_dst:
531 IEM_SAVE_FLAGS A2, %2, %3
532 EPILOGUE_3_ARGS_EX 8
533ENDPROC iemAImpl_ %+ %1 %+ _u64
534 %endif ; RT_ARCH_AMD64
535%endmacro
536IEMIMPL_BIT_OP bsf, (X86_EFL_ZF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF)
537IEMIMPL_BIT_OP bsr, (X86_EFL_ZF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF)
538
539
540;
541; IMUL is also a similar but yet different case (no lock, no mem dst).
542; The rDX:rAX variant of imul is handled together with mul further down.
543;
544BEGINCODE
545BEGINPROC_FASTCALL iemAImpl_imul_two_u16, 12
546 PROLOGUE_3_ARGS
547 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
548 imul A1_16, word [A0]
549 mov [A0], A1_16
550 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
551 EPILOGUE_3_ARGS
552ENDPROC iemAImpl_imul_two_u16
553
554BEGINPROC_FASTCALL iemAImpl_imul_two_u32, 12
555 PROLOGUE_3_ARGS
556 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
557 imul A1_32, dword [A0]
558 mov [A0], A1_32
559 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
560 EPILOGUE_3_ARGS
561ENDPROC iemAImpl_imul_two_u32
562
563%ifdef RT_ARCH_AMD64
564BEGINPROC_FASTCALL iemAImpl_imul_two_u64, 16
565 PROLOGUE_3_ARGS
566 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
567 imul A1, qword [A0]
568 mov [A0], A1
569 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
570 EPILOGUE_3_ARGS_EX 8
571ENDPROC iemAImpl_imul_two_u64
572%endif ; RT_ARCH_AMD64
573
574
575;
576; XCHG for memory operands. This implies locking. No flag changes.
577;
578; Each function takes two arguments, first the pointer to the memory,
579; then the pointer to the register. They all return void.
580;
581BEGINCODE
582BEGINPROC_FASTCALL iemAImpl_xchg_u8, 8
583 PROLOGUE_2_ARGS
584 mov T0_8, [A1]
585 xchg [A0], T0_8
586 mov [A1], T0_8
587 EPILOGUE_2_ARGS
588ENDPROC iemAImpl_xchg_u8
589
590BEGINPROC_FASTCALL iemAImpl_xchg_u16, 8
591 PROLOGUE_2_ARGS
592 mov T0_16, [A1]
593 xchg [A0], T0_16
594 mov [A1], T0_16
595 EPILOGUE_2_ARGS
596ENDPROC iemAImpl_xchg_u16
597
598BEGINPROC_FASTCALL iemAImpl_xchg_u32, 8
599 PROLOGUE_2_ARGS
600 mov T0_32, [A1]
601 xchg [A0], T0_32
602 mov [A1], T0_32
603 EPILOGUE_2_ARGS
604ENDPROC iemAImpl_xchg_u32
605
606%ifdef RT_ARCH_AMD64
607BEGINPROC_FASTCALL iemAImpl_xchg_u64, 8
608 PROLOGUE_2_ARGS
609 mov T0, [A1]
610 xchg [A0], T0
611 mov [A1], T0
612 EPILOGUE_2_ARGS
613ENDPROC iemAImpl_xchg_u64
614%endif
615
616
617;
618; XADD for memory operands.
619;
620; Each function takes three arguments, first the pointer to the
621; memory/register, then the pointer to the register, and finally a pointer to
622; eflags. They all return void.
623;
624BEGINCODE
625BEGINPROC_FASTCALL iemAImpl_xadd_u8, 12
626 PROLOGUE_3_ARGS
627 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
628 mov T0_8, [A1]
629 xadd [A0], T0_8
630 mov [A1], T0_8
631 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
632 EPILOGUE_3_ARGS
633ENDPROC iemAImpl_xadd_u8
634
635BEGINPROC_FASTCALL iemAImpl_xadd_u16, 12
636 PROLOGUE_3_ARGS
637 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
638 mov T0_16, [A1]
639 xadd [A0], T0_16
640 mov [A1], T0_16
641 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
642 EPILOGUE_3_ARGS
643ENDPROC iemAImpl_xadd_u16
644
645BEGINPROC_FASTCALL iemAImpl_xadd_u32, 12
646 PROLOGUE_3_ARGS
647 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
648 mov T0_32, [A1]
649 xadd [A0], T0_32
650 mov [A1], T0_32
651 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
652 EPILOGUE_3_ARGS
653ENDPROC iemAImpl_xadd_u32
654
655%ifdef RT_ARCH_AMD64
656BEGINPROC_FASTCALL iemAImpl_xadd_u64, 12
657 PROLOGUE_3_ARGS
658 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
659 mov T0, [A1]
660 xadd [A0], T0
661 mov [A1], T0
662 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
663 EPILOGUE_3_ARGS
664ENDPROC iemAImpl_xadd_u64
665%endif ; RT_ARCH_AMD64
666
667BEGINPROC_FASTCALL iemAImpl_xadd_u8_locked, 12
668 PROLOGUE_3_ARGS
669 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
670 mov T0_8, [A1]
671 lock xadd [A0], T0_8
672 mov [A1], T0_8
673 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
674 EPILOGUE_3_ARGS
675ENDPROC iemAImpl_xadd_u8_locked
676
677BEGINPROC_FASTCALL iemAImpl_xadd_u16_locked, 12
678 PROLOGUE_3_ARGS
679 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
680 mov T0_16, [A1]
681 lock xadd [A0], T0_16
682 mov [A1], T0_16
683 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
684 EPILOGUE_3_ARGS
685ENDPROC iemAImpl_xadd_u16_locked
686
687BEGINPROC_FASTCALL iemAImpl_xadd_u32_locked, 12
688 PROLOGUE_3_ARGS
689 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
690 mov T0_32, [A1]
691 lock xadd [A0], T0_32
692 mov [A1], T0_32
693 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
694 EPILOGUE_3_ARGS
695ENDPROC iemAImpl_xadd_u32_locked
696
697%ifdef RT_ARCH_AMD64
698BEGINPROC_FASTCALL iemAImpl_xadd_u64_locked, 12
699 PROLOGUE_3_ARGS
700 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
701 mov T0, [A1]
702 lock xadd [A0], T0
703 mov [A1], T0
704 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
705 EPILOGUE_3_ARGS
706ENDPROC iemAImpl_xadd_u64_locked
707%endif ; RT_ARCH_AMD64
708
709
710;
711; CMPXCHG8B.
712;
713; These are tricky register wise, so the code is duplicated for each calling
714; convention.
715;
716; WARNING! This code make ASSUMPTIONS about which registers T1 and T0 are mapped to!
717;
718; C-proto:
719; IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg8b,(uint64_t *pu64Dst, PRTUINT64U pu64EaxEdx, PRTUINT64U pu64EbxEcx,
720; uint32_t *pEFlags));
721;
722; Note! Identical to iemAImpl_cmpxchg16b.
723;
724BEGINCODE
725BEGINPROC_FASTCALL iemAImpl_cmpxchg8b, 16
726%ifdef RT_ARCH_AMD64
727 %ifdef ASM_CALL64_MSC
728 push rbx
729
730 mov r11, rdx ; pu64EaxEdx (is also T1)
731 mov r10, rcx ; pu64Dst
732
733 mov ebx, [r8]
734 mov ecx, [r8 + 4]
735 IEM_MAYBE_LOAD_FLAGS r9, (X86_EFL_ZF), 0 ; clobbers T0 (eax)
736 mov eax, [r11]
737 mov edx, [r11 + 4]
738
739 lock cmpxchg8b [r10]
740
741 mov [r11], eax
742 mov [r11 + 4], edx
743 IEM_SAVE_FLAGS r9, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, r11)
744
745 pop rbx
746 ret
747 %else
748 push rbx
749
750 mov r10, rcx ; pEFlags
751 mov r11, rdx ; pu64EbxEcx (is also T1)
752
753 mov ebx, [r11]
754 mov ecx, [r11 + 4]
755 IEM_MAYBE_LOAD_FLAGS r10, (X86_EFL_ZF), 0 ; clobbers T0 (eax)
756 mov eax, [rsi]
757 mov edx, [rsi + 4]
758
759 lock cmpxchg8b [rdi]
760
761 mov [rsi], eax
762 mov [rsi + 4], edx
763 IEM_SAVE_FLAGS r10, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, r11)
764
765 pop rbx
766 ret
767
768 %endif
769%else
770 push esi
771 push edi
772 push ebx
773 push ebp
774
775 mov edi, ecx ; pu64Dst
776 mov esi, edx ; pu64EaxEdx
777 mov ecx, [esp + 16 + 4 + 0] ; pu64EbxEcx
778 mov ebp, [esp + 16 + 4 + 4] ; pEFlags
779
780 mov ebx, [ecx]
781 mov ecx, [ecx + 4]
782 IEM_MAYBE_LOAD_FLAGS ebp, (X86_EFL_ZF), 0 ; clobbers T0 (eax)
783 mov eax, [esi]
784 mov edx, [esi + 4]
785
786 lock cmpxchg8b [edi]
787
788 mov [esi], eax
789 mov [esi + 4], edx
790 IEM_SAVE_FLAGS ebp, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, edi)
791
792 pop ebp
793 pop ebx
794 pop edi
795 pop esi
796 ret 8
797%endif
798ENDPROC iemAImpl_cmpxchg8b
799
800BEGINPROC_FASTCALL iemAImpl_cmpxchg8b_locked, 16
801 ; Lazy bird always lock prefixes cmpxchg8b.
802 jmp NAME_FASTCALL(iemAImpl_cmpxchg8b,16,$@)
803ENDPROC iemAImpl_cmpxchg8b_locked
804
805%ifdef RT_ARCH_AMD64
806
807;
808; CMPXCHG16B.
809;
810; These are tricky register wise, so the code is duplicated for each calling
811; convention.
812;
813; WARNING! This code make ASSUMPTIONS about which registers T1 and T0 are mapped to!
814;
815; C-proto:
816; IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg16b,(PRTUINT128U pu128Dst, PRTUINT128U pu1284RaxRdx, PRTUINT128U pu128RbxRcx,
817; uint32_t *pEFlags));
818;
819; Note! Identical to iemAImpl_cmpxchg8b.
820;
821BEGINCODE
822BEGINPROC_FASTCALL iemAImpl_cmpxchg16b, 16
823 %ifdef ASM_CALL64_MSC
824 push rbx
825
826 mov r11, rdx ; pu64RaxRdx (is also T1)
827 mov r10, rcx ; pu64Dst
828
829 mov rbx, [r8]
830 mov rcx, [r8 + 8]
831 IEM_MAYBE_LOAD_FLAGS r9, (X86_EFL_ZF), 0 ; clobbers T0 (eax)
832 mov rax, [r11]
833 mov rdx, [r11 + 8]
834
835 lock cmpxchg16b [r10]
836
837 mov [r11], rax
838 mov [r11 + 8], rdx
839 IEM_SAVE_FLAGS r9, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, r11)
840
841 pop rbx
842 ret
843 %else
844 push rbx
845
846 mov r10, rcx ; pEFlags
847 mov r11, rdx ; pu64RbxRcx (is also T1)
848
849 mov rbx, [r11]
850 mov rcx, [r11 + 8]
851 IEM_MAYBE_LOAD_FLAGS r10, (X86_EFL_ZF), 0 ; clobbers T0 (eax)
852 mov rax, [rsi]
853 mov rdx, [rsi + 8]
854
855 lock cmpxchg16b [rdi]
856
857 mov [rsi], eax
858 mov [rsi + 8], edx
859 IEM_SAVE_FLAGS r10, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, r11)
860
861 pop rbx
862 ret
863
864 %endif
865ENDPROC iemAImpl_cmpxchg16b
866
867BEGINPROC_FASTCALL iemAImpl_cmpxchg16b_locked, 16
868 ; Lazy bird always lock prefixes cmpxchg8b.
869 jmp NAME_FASTCALL(iemAImpl_cmpxchg16b,16,$@)
870ENDPROC iemAImpl_cmpxchg16b_locked
871
872%endif ; RT_ARCH_AMD64
873
874
875;
876; CMPXCHG.
877;
878; WARNING! This code make ASSUMPTIONS about which registers T1 and T0 are mapped to!
879;
880; C-proto:
881; IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg,(uintX_t *puXDst, uintX_t puEax, uintX_t uReg, uint32_t *pEFlags));
882;
883BEGINCODE
884%macro IEMIMPL_CMPXCHG 2
885BEGINPROC_FASTCALL iemAImpl_cmpxchg_u8 %+ %2, 16
886 PROLOGUE_4_ARGS
887 IEM_MAYBE_LOAD_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0 (eax)
888 mov al, [A1]
889 %1 cmpxchg [A0], A2_8
890 mov [A1], al
891 IEM_SAVE_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0+T1 (eax, r11/edi)
892 EPILOGUE_4_ARGS
893ENDPROC iemAImpl_cmpxchg_u8 %+ %2
894
895BEGINPROC_FASTCALL iemAImpl_cmpxchg_u16 %+ %2, 16
896 PROLOGUE_4_ARGS
897 IEM_MAYBE_LOAD_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0 (eax)
898 mov ax, [A1]
899 %1 cmpxchg [A0], A2_16
900 mov [A1], ax
901 IEM_SAVE_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0+T1 (eax, r11/edi)
902 EPILOGUE_4_ARGS
903ENDPROC iemAImpl_cmpxchg_u16 %+ %2
904
905BEGINPROC_FASTCALL iemAImpl_cmpxchg_u32 %+ %2, 16
906 PROLOGUE_4_ARGS
907 IEM_MAYBE_LOAD_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0 (eax)
908 mov eax, [A1]
909 %1 cmpxchg [A0], A2_32
910 mov [A1], eax
911 IEM_SAVE_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0+T1 (eax, r11/edi)
912 EPILOGUE_4_ARGS
913ENDPROC iemAImpl_cmpxchg_u32 %+ %2
914
915BEGINPROC_FASTCALL iemAImpl_cmpxchg_u64 %+ %2, 16
916%ifdef RT_ARCH_AMD64
917 PROLOGUE_4_ARGS
918 IEM_MAYBE_LOAD_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0 (eax)
919 mov rax, [A1]
920 %1 cmpxchg [A0], A2
921 mov [A1], rax
922 IEM_SAVE_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0+T1 (eax, r11/edi)
923 EPILOGUE_4_ARGS
924%else
925 ;
926 ; Must use cmpxchg8b here. See also iemAImpl_cmpxchg8b.
927 ;
928 push esi
929 push edi
930 push ebx
931 push ebp
932
933 mov edi, ecx ; pu64Dst
934 mov esi, edx ; pu64Rax
935 mov ecx, [esp + 16 + 4 + 0] ; pu64Reg - Note! Pointer on 32-bit hosts!
936 mov ebp, [esp + 16 + 4 + 4] ; pEFlags
937
938 mov ebx, [ecx]
939 mov ecx, [ecx + 4]
940 IEM_MAYBE_LOAD_FLAGS ebp, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0 (eax)
941 mov eax, [esi]
942 mov edx, [esi + 4]
943
944 lock cmpxchg8b [edi]
945
946 ; cmpxchg8b doesn't set CF, PF, AF, SF and OF, so we have to do that.
947 jz .cmpxchg8b_not_equal
948 cmp eax, eax ; just set the other flags.
949.store:
950 mov [esi], eax
951 mov [esi + 4], edx
952 IEM_SAVE_FLAGS ebp, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0+T1 (eax, edi)
953
954 pop ebp
955 pop ebx
956 pop edi
957 pop esi
958 ret 8
959
960.cmpxchg8b_not_equal:
961 cmp [esi + 4], edx ;; @todo FIXME - verify 64-bit compare implementation
962 jne .store
963 cmp [esi], eax
964 jmp .store
965
966%endif
967ENDPROC iemAImpl_cmpxchg_u64 %+ %2
968%endmacro ; IEMIMPL_CMPXCHG
969
970IEMIMPL_CMPXCHG , ,
971IEMIMPL_CMPXCHG lock, _locked
972
973;;
974; Macro for implementing a unary operator.
975;
976; This will generate code for the 8, 16, 32 and 64 bit accesses with locked
977; variants, except on 32-bit system where the 64-bit accesses requires hand
978; coding.
979;
980; All the functions takes a pointer to the destination memory operand in A0,
981; the source register operand in A1 and a pointer to eflags in A2.
982;
983; @param 1 The instruction mnemonic.
984; @param 2 The modified flags.
985; @param 3 The undefined flags.
986;
987%macro IEMIMPL_UNARY_OP 3
988BEGINCODE
989BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 8
990 PROLOGUE_2_ARGS
991 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
992 %1 byte [A0]
993 IEM_SAVE_FLAGS A1, %2, %3
994 EPILOGUE_2_ARGS
995ENDPROC iemAImpl_ %+ %1 %+ _u8
996
997BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8_locked, 8
998 PROLOGUE_2_ARGS
999 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1000 lock %1 byte [A0]
1001 IEM_SAVE_FLAGS A1, %2, %3
1002 EPILOGUE_2_ARGS
1003ENDPROC iemAImpl_ %+ %1 %+ _u8_locked
1004
1005BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 8
1006 PROLOGUE_2_ARGS
1007 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1008 %1 word [A0]
1009 IEM_SAVE_FLAGS A1, %2, %3
1010 EPILOGUE_2_ARGS
1011ENDPROC iemAImpl_ %+ %1 %+ _u16
1012
1013BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16_locked, 8
1014 PROLOGUE_2_ARGS
1015 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1016 lock %1 word [A0]
1017 IEM_SAVE_FLAGS A1, %2, %3
1018 EPILOGUE_2_ARGS
1019ENDPROC iemAImpl_ %+ %1 %+ _u16_locked
1020
1021BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 8
1022 PROLOGUE_2_ARGS
1023 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1024 %1 dword [A0]
1025 IEM_SAVE_FLAGS A1, %2, %3
1026 EPILOGUE_2_ARGS
1027ENDPROC iemAImpl_ %+ %1 %+ _u32
1028
1029BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32_locked, 8
1030 PROLOGUE_2_ARGS
1031 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1032 lock %1 dword [A0]
1033 IEM_SAVE_FLAGS A1, %2, %3
1034 EPILOGUE_2_ARGS
1035ENDPROC iemAImpl_ %+ %1 %+ _u32_locked
1036
1037 %ifdef RT_ARCH_AMD64
1038BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 8
1039 PROLOGUE_2_ARGS
1040 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1041 %1 qword [A0]
1042 IEM_SAVE_FLAGS A1, %2, %3
1043 EPILOGUE_2_ARGS
1044ENDPROC iemAImpl_ %+ %1 %+ _u64
1045
1046BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 8
1047 PROLOGUE_2_ARGS
1048 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1049 lock %1 qword [A0]
1050 IEM_SAVE_FLAGS A1, %2, %3
1051 EPILOGUE_2_ARGS
1052ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
1053 %endif ; RT_ARCH_AMD64
1054
1055%endmacro
1056
1057IEMIMPL_UNARY_OP inc, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF), 0
1058IEMIMPL_UNARY_OP dec, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF), 0
1059IEMIMPL_UNARY_OP neg, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
1060IEMIMPL_UNARY_OP not, 0, 0
1061
1062
1063;;
1064; Macro for implementing memory fence operation.
1065;
1066; No return value, no operands or anything.
1067;
1068; @param 1 The instruction.
1069;
1070%macro IEMIMPL_MEM_FENCE 1
1071BEGINCODE
1072BEGINPROC_FASTCALL iemAImpl_ %+ %1, 0
1073 %1
1074 ret
1075ENDPROC iemAImpl_ %+ %1
1076%endmacro
1077
1078IEMIMPL_MEM_FENCE lfence
1079IEMIMPL_MEM_FENCE sfence
1080IEMIMPL_MEM_FENCE mfence
1081
1082;;
1083; Alternative for non-SSE2 host.
1084;
1085BEGINPROC_FASTCALL iemAImpl_alt_mem_fence, 0
1086 push xAX
1087 xchg xAX, [xSP]
1088 add xSP, xCB
1089 ret
1090ENDPROC iemAImpl_alt_mem_fence
1091
1092
1093
1094;;
1095; Macro for implementing a shift operation.
1096;
1097; This will generate code for the 8, 16, 32 and 64 bit accesses, except on
1098; 32-bit system where the 64-bit accesses requires hand coding.
1099;
1100; All the functions takes a pointer to the destination memory operand in A0,
1101; the shift count in A1 and a pointer to eflags in A2.
1102;
1103; @param 1 The instruction mnemonic.
1104; @param 2 The modified flags.
1105; @param 3 The undefined flags.
1106;
1107; Makes ASSUMPTIONS about A0, A1 and A2 assignments.
1108;
1109%macro IEMIMPL_SHIFT_OP 3
1110BEGINCODE
1111BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 12
1112 PROLOGUE_3_ARGS
1113 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1114 %ifdef ASM_CALL64_GCC
1115 mov cl, A1_8
1116 %1 byte [A0], cl
1117 %else
1118 xchg A1, A0
1119 %1 byte [A1], cl
1120 %endif
1121 IEM_SAVE_FLAGS A2, %2, %3
1122 EPILOGUE_3_ARGS
1123ENDPROC iemAImpl_ %+ %1 %+ _u8
1124
1125BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
1126 PROLOGUE_3_ARGS
1127 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1128 %ifdef ASM_CALL64_GCC
1129 mov cl, A1_8
1130 %1 word [A0], cl
1131 %else
1132 xchg A1, A0
1133 %1 word [A1], cl
1134 %endif
1135 IEM_SAVE_FLAGS A2, %2, %3
1136 EPILOGUE_3_ARGS
1137ENDPROC iemAImpl_ %+ %1 %+ _u16
1138
1139BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
1140 PROLOGUE_3_ARGS
1141 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1142 %ifdef ASM_CALL64_GCC
1143 mov cl, A1_8
1144 %1 dword [A0], cl
1145 %else
1146 xchg A1, A0
1147 %1 dword [A1], cl
1148 %endif
1149 IEM_SAVE_FLAGS A2, %2, %3
1150 EPILOGUE_3_ARGS
1151ENDPROC iemAImpl_ %+ %1 %+ _u32
1152
1153 %ifdef RT_ARCH_AMD64
1154BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 12
1155 PROLOGUE_3_ARGS
1156 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1157 %ifdef ASM_CALL64_GCC
1158 mov cl, A1_8
1159 %1 qword [A0], cl
1160 %else
1161 xchg A1, A0
1162 %1 qword [A1], cl
1163 %endif
1164 IEM_SAVE_FLAGS A2, %2, %3
1165 EPILOGUE_3_ARGS
1166ENDPROC iemAImpl_ %+ %1 %+ _u64
1167 %endif ; RT_ARCH_AMD64
1168
1169%endmacro
1170
1171IEMIMPL_SHIFT_OP rol, (X86_EFL_OF | X86_EFL_CF), 0
1172IEMIMPL_SHIFT_OP ror, (X86_EFL_OF | X86_EFL_CF), 0
1173IEMIMPL_SHIFT_OP rcl, (X86_EFL_OF | X86_EFL_CF), 0
1174IEMIMPL_SHIFT_OP rcr, (X86_EFL_OF | X86_EFL_CF), 0
1175IEMIMPL_SHIFT_OP shl, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF)
1176IEMIMPL_SHIFT_OP shr, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF)
1177IEMIMPL_SHIFT_OP sar, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF)
1178
1179
1180;;
1181; Macro for implementing a double precision shift operation.
1182;
1183; This will generate code for the 16, 32 and 64 bit accesses, except on
1184; 32-bit system where the 64-bit accesses requires hand coding.
1185;
1186; The functions takes the destination operand (r/m) in A0, the source (reg) in
1187; A1, the shift count in A2 and a pointer to the eflags variable/register in A3.
1188;
1189; @param 1 The instruction mnemonic.
1190; @param 2 The modified flags.
1191; @param 3 The undefined flags.
1192;
1193; Makes ASSUMPTIONS about A0, A1, A2 and A3 assignments.
1194;
1195%macro IEMIMPL_SHIFT_DBL_OP 3
1196BEGINCODE
1197BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 16
1198 PROLOGUE_4_ARGS
1199 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1200 %ifdef ASM_CALL64_GCC
1201 xchg A3, A2
1202 %1 [A0], A1_16, cl
1203 xchg A3, A2
1204 %else
1205 xchg A0, A2
1206 %1 [A2], A1_16, cl
1207 %endif
1208 IEM_SAVE_FLAGS A3, %2, %3
1209 EPILOGUE_4_ARGS
1210ENDPROC iemAImpl_ %+ %1 %+ _u16
1211
1212BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 16
1213 PROLOGUE_4_ARGS
1214 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1215 %ifdef ASM_CALL64_GCC
1216 xchg A3, A2
1217 %1 [A0], A1_32, cl
1218 xchg A3, A2
1219 %else
1220 xchg A0, A2
1221 %1 [A2], A1_32, cl
1222 %endif
1223 IEM_SAVE_FLAGS A3, %2, %3
1224 EPILOGUE_4_ARGS
1225ENDPROC iemAImpl_ %+ %1 %+ _u32
1226
1227 %ifdef RT_ARCH_AMD64
1228BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
1229 PROLOGUE_4_ARGS
1230 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1231 %ifdef ASM_CALL64_GCC
1232 xchg A3, A2
1233 %1 [A0], A1, cl
1234 xchg A3, A2
1235 %else
1236 xchg A0, A2
1237 %1 [A2], A1, cl
1238 %endif
1239 IEM_SAVE_FLAGS A3, %2, %3
1240 EPILOGUE_4_ARGS_EX 12
1241ENDPROC iemAImpl_ %+ %1 %+ _u64
1242 %endif ; RT_ARCH_AMD64
1243
1244%endmacro
1245
1246IEMIMPL_SHIFT_DBL_OP shld, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF)
1247IEMIMPL_SHIFT_DBL_OP shrd, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF)
1248
1249
1250;;
1251; Macro for implementing a multiplication operations.
1252;
1253; This will generate code for the 8, 16, 32 and 64 bit accesses, except on
1254; 32-bit system where the 64-bit accesses requires hand coding.
1255;
1256; The 8-bit function only operates on AX, so it takes no DX pointer. The other
1257; functions takes a pointer to rAX in A0, rDX in A1, the operand in A2 and a
1258; pointer to eflags in A3.
1259;
1260; The functions all return 0 so the caller can be used for div/idiv as well as
1261; for the mul/imul implementation.
1262;
1263; @param 1 The instruction mnemonic.
1264; @param 2 The modified flags.
1265; @param 3 The undefined flags.
1266;
1267; Makes ASSUMPTIONS about A0, A1, A2, A3, T0 and T1 assignments.
1268;
1269%macro IEMIMPL_MUL_OP 3
1270BEGINCODE
1271BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 12
1272 PROLOGUE_3_ARGS
1273 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1274 mov al, [A0]
1275 %1 A1_8
1276 mov [A0], ax
1277 IEM_SAVE_FLAGS A2, %2, %3
1278 xor eax, eax
1279 EPILOGUE_3_ARGS
1280ENDPROC iemAImpl_ %+ %1 %+ _u8
1281
1282BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 16
1283 PROLOGUE_4_ARGS
1284 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1285 mov ax, [A0]
1286 %ifdef ASM_CALL64_GCC
1287 %1 A2_16
1288 mov [A0], ax
1289 mov [A1], dx
1290 %else
1291 mov T1, A1
1292 %1 A2_16
1293 mov [A0], ax
1294 mov [T1], dx
1295 %endif
1296 IEM_SAVE_FLAGS A3, %2, %3
1297 xor eax, eax
1298 EPILOGUE_4_ARGS
1299ENDPROC iemAImpl_ %+ %1 %+ _u16
1300
1301BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 16
1302 PROLOGUE_4_ARGS
1303 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1304 mov eax, [A0]
1305 %ifdef ASM_CALL64_GCC
1306 %1 A2_32
1307 mov [A0], eax
1308 mov [A1], edx
1309 %else
1310 mov T1, A1
1311 %1 A2_32
1312 mov [A0], eax
1313 mov [T1], edx
1314 %endif
1315 IEM_SAVE_FLAGS A3, %2, %3
1316 xor eax, eax
1317 EPILOGUE_4_ARGS
1318ENDPROC iemAImpl_ %+ %1 %+ _u32
1319
1320 %ifdef RT_ARCH_AMD64 ; The 32-bit host version lives in IEMAllAImplC.cpp.
1321BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
1322 PROLOGUE_4_ARGS
1323 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1324 mov rax, [A0]
1325 %ifdef ASM_CALL64_GCC
1326 %1 A2
1327 mov [A0], rax
1328 mov [A1], rdx
1329 %else
1330 mov T1, A1
1331 %1 A2
1332 mov [A0], rax
1333 mov [T1], rdx
1334 %endif
1335 IEM_SAVE_FLAGS A3, %2, %3
1336 xor eax, eax
1337 EPILOGUE_4_ARGS_EX 12
1338ENDPROC iemAImpl_ %+ %1 %+ _u64
1339 %endif ; !RT_ARCH_AMD64
1340
1341%endmacro
1342
1343IEMIMPL_MUL_OP mul, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
1344IEMIMPL_MUL_OP imul, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
1345
1346
1347BEGINCODE
1348;;
1349; Worker function for negating a 32-bit number in T1:T0
1350; @uses None (T0,T1)
1351iemAImpl_negate_T0_T1_u32:
1352 push 0
1353 push 0
1354 xchg T0_32, [xSP]
1355 xchg T1_32, [xSP + xCB]
1356 sub T0_32, [xSP]
1357 sbb T1_32, [xSP + xCB]
1358 add xSP, xCB*2
1359 ret
1360
1361%ifdef RT_ARCH_AMD64
1362;;
1363; Worker function for negating a 64-bit number in T1:T0
1364; @uses None (T0,T1)
1365iemAImpl_negate_T0_T1_u64:
1366 push 0
1367 push 0
1368 xchg T0, [xSP]
1369 xchg T1, [xSP + xCB]
1370 sub T0, [xSP]
1371 sbb T1, [xSP + xCB]
1372 add xSP, xCB*2
1373 ret
1374%endif
1375
1376
1377;;
1378; Macro for implementing a division operations.
1379;
1380; This will generate code for the 8, 16, 32 and 64 bit accesses, except on
1381; 32-bit system where the 64-bit accesses requires hand coding.
1382;
1383; The 8-bit function only operates on AX, so it takes no DX pointer. The other
1384; functions takes a pointer to rAX in A0, rDX in A1, the operand in A2 and a
1385; pointer to eflags in A3.
1386;
1387; The functions all return 0 on success and -1 if a divide error should be
1388; raised by the caller.
1389;
1390; @param 1 The instruction mnemonic.
1391; @param 2 The modified flags.
1392; @param 3 The undefined flags.
1393; @param 4 1 if signed, 0 if unsigned.
1394;
1395; Makes ASSUMPTIONS about A0, A1, A2, A3, T0 and T1 assignments.
1396;
1397%macro IEMIMPL_DIV_OP 4
1398BEGINCODE
1399BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 12
1400 PROLOGUE_3_ARGS
1401
1402 ; div by chainsaw check.
1403 test A1_8, A1_8
1404 jz .div_zero
1405
1406 ; Overflow check - unsigned division is simple to verify, haven't
1407 ; found a simple way to check signed division yet unfortunately.
1408 %if %4 == 0
1409 cmp [A0 + 1], A1_8
1410 jae .div_overflow
1411 %else
1412 mov T0_16, [A0] ; T0 = dividend
1413 mov T1, A1 ; T1 = saved divisor (because of missing T1_8 in 32-bit)
1414 test A1_8, A1_8
1415 js .divisor_negative
1416 test T0_16, T0_16
1417 jns .both_positive
1418 neg T0_16
1419.one_of_each: ; OK range is 2^(result-with - 1) + (divisor - 1).
1420 push T0 ; Start off like unsigned below.
1421 shr T0_16, 7
1422 cmp T0_8, A1_8
1423 pop T0
1424 jb .div_no_overflow
1425 ja .div_overflow
1426 and T0_8, 0x7f ; Special case for covering (divisor - 1).
1427 cmp T0_8, A1_8
1428 jae .div_overflow
1429 jmp .div_no_overflow
1430
1431.divisor_negative:
1432 neg A1_8
1433 test T0_16, T0_16
1434 jns .one_of_each
1435 neg T0_16
1436.both_positive: ; Same as unsigned shifted by sign indicator bit.
1437 shr T0_16, 7
1438 cmp T0_8, A1_8
1439 jae .div_overflow
1440.div_no_overflow:
1441 mov A1, T1 ; restore divisor
1442 %endif
1443
1444 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1445 mov ax, [A0]
1446 %1 A1_8
1447 mov [A0], ax
1448 IEM_SAVE_FLAGS A2, %2, %3
1449 xor eax, eax
1450
1451.return:
1452 EPILOGUE_3_ARGS
1453
1454.div_zero:
1455.div_overflow:
1456 mov eax, -1
1457 jmp .return
1458ENDPROC iemAImpl_ %+ %1 %+ _u8
1459
1460BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 16
1461 PROLOGUE_4_ARGS
1462
1463 ; div by chainsaw check.
1464 test A2_16, A2_16
1465 jz .div_zero
1466
1467 ; Overflow check - unsigned division is simple to verify, haven't
1468 ; found a simple way to check signed division yet unfortunately.
1469 %if %4 == 0
1470 cmp [A1], A2_16
1471 jae .div_overflow
1472 %else
1473 mov T0_16, [A1]
1474 shl T0_32, 16
1475 mov T0_16, [A0] ; T0 = dividend
1476 mov T1, A2 ; T1 = divisor
1477 test T1_16, T1_16
1478 js .divisor_negative
1479 test T0_32, T0_32
1480 jns .both_positive
1481 neg T0_32
1482.one_of_each: ; OK range is 2^(result-with - 1) + (divisor - 1).
1483 push T0 ; Start off like unsigned below.
1484 shr T0_32, 15
1485 cmp T0_16, T1_16
1486 pop T0
1487 jb .div_no_overflow
1488 ja .div_overflow
1489 and T0_16, 0x7fff ; Special case for covering (divisor - 1).
1490 cmp T0_16, T1_16
1491 jae .div_overflow
1492 jmp .div_no_overflow
1493
1494.divisor_negative:
1495 neg T1_16
1496 test T0_32, T0_32
1497 jns .one_of_each
1498 neg T0_32
1499.both_positive: ; Same as unsigned shifted by sign indicator bit.
1500 shr T0_32, 15
1501 cmp T0_16, T1_16
1502 jae .div_overflow
1503.div_no_overflow:
1504 %endif
1505
1506 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1507 %ifdef ASM_CALL64_GCC
1508 mov T1, A2
1509 mov ax, [A0]
1510 mov dx, [A1]
1511 %1 T1_16
1512 mov [A0], ax
1513 mov [A1], dx
1514 %else
1515 mov T1, A1
1516 mov ax, [A0]
1517 mov dx, [T1]
1518 %1 A2_16
1519 mov [A0], ax
1520 mov [T1], dx
1521 %endif
1522 IEM_SAVE_FLAGS A3, %2, %3
1523 xor eax, eax
1524
1525.return:
1526 EPILOGUE_4_ARGS
1527
1528.div_zero:
1529.div_overflow:
1530 mov eax, -1
1531 jmp .return
1532ENDPROC iemAImpl_ %+ %1 %+ _u16
1533
1534BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 16
1535 PROLOGUE_4_ARGS
1536
1537 ; div by chainsaw check.
1538 test A2_32, A2_32
1539 jz .div_zero
1540
1541 ; Overflow check - unsigned division is simple to verify, haven't
1542 ; found a simple way to check signed division yet unfortunately.
1543 %if %4 == 0
1544 cmp [A1], A2_32
1545 jae .div_overflow
1546 %else
1547 push A2 ; save A2 so we modify it (we out of regs on x86).
1548 mov T0_32, [A0] ; T0 = dividend low
1549 mov T1_32, [A1] ; T1 = dividend high
1550 test A2_32, A2_32
1551 js .divisor_negative
1552 test T1_32, T1_32
1553 jns .both_positive
1554 call iemAImpl_negate_T0_T1_u32
1555.one_of_each: ; OK range is 2^(result-with - 1) + (divisor - 1).
1556 push T0 ; Start off like unsigned below.
1557 shl T1_32, 1
1558 shr T0_32, 31
1559 or T1_32, T0_32
1560 cmp T1_32, A2_32
1561 pop T0
1562 jb .div_no_overflow
1563 ja .div_overflow
1564 and T0_32, 0x7fffffff ; Special case for covering (divisor - 1).
1565 cmp T0_32, A2_32
1566 jae .div_overflow
1567 jmp .div_no_overflow
1568
1569.divisor_negative:
1570 neg A2_32
1571 test T1_32, T1_32
1572 jns .one_of_each
1573 call iemAImpl_negate_T0_T1_u32
1574.both_positive: ; Same as unsigned shifted by sign indicator bit.
1575 shl T1_32, 1
1576 shr T0_32, 31
1577 or T1_32, T0_32
1578 cmp T1_32, A2_32
1579 jae .div_overflow
1580.div_no_overflow:
1581 pop A2
1582 %endif
1583
1584 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1585 mov eax, [A0]
1586 %ifdef ASM_CALL64_GCC
1587 mov T1, A2
1588 mov eax, [A0]
1589 mov edx, [A1]
1590 %1 T1_32
1591 mov [A0], eax
1592 mov [A1], edx
1593 %else
1594 mov T1, A1
1595 mov eax, [A0]
1596 mov edx, [T1]
1597 %1 A2_32
1598 mov [A0], eax
1599 mov [T1], edx
1600 %endif
1601 IEM_SAVE_FLAGS A3, %2, %3
1602 xor eax, eax
1603
1604.return:
1605 EPILOGUE_4_ARGS
1606
1607.div_overflow:
1608 %if %4 != 0
1609 pop A2
1610 %endif
1611.div_zero:
1612 mov eax, -1
1613 jmp .return
1614ENDPROC iemAImpl_ %+ %1 %+ _u32
1615
1616 %ifdef RT_ARCH_AMD64 ; The 32-bit host version lives in IEMAllAImplC.cpp.
1617BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
1618 PROLOGUE_4_ARGS
1619
1620 test A2, A2
1621 jz .div_zero
1622 %if %4 == 0
1623 cmp [A1], A2
1624 jae .div_overflow
1625 %else
1626 push A2 ; save A2 so we modify it (we out of regs on x86).
1627 mov T0, [A0] ; T0 = dividend low
1628 mov T1, [A1] ; T1 = dividend high
1629 test A2, A2
1630 js .divisor_negative
1631 test T1, T1
1632 jns .both_positive
1633 call iemAImpl_negate_T0_T1_u64
1634.one_of_each: ; OK range is 2^(result-with - 1) + (divisor - 1).
1635 push T0 ; Start off like unsigned below.
1636 shl T1, 1
1637 shr T0, 63
1638 or T1, T0
1639 cmp T1, A2
1640 pop T0
1641 jb .div_no_overflow
1642 ja .div_overflow
1643 mov T1, 0x7fffffffffffffff
1644 and T0, T1 ; Special case for covering (divisor - 1).
1645 cmp T0, A2
1646 jae .div_overflow
1647 jmp .div_no_overflow
1648
1649.divisor_negative:
1650 neg A2
1651 test T1, T1
1652 jns .one_of_each
1653 call iemAImpl_negate_T0_T1_u64
1654.both_positive: ; Same as unsigned shifted by sign indicator bit.
1655 shl T1, 1
1656 shr T0, 63
1657 or T1, T0
1658 cmp T1, A2
1659 jae .div_overflow
1660.div_no_overflow:
1661 pop A2
1662 %endif
1663
1664 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1665 mov rax, [A0]
1666 %ifdef ASM_CALL64_GCC
1667 mov T1, A2
1668 mov rax, [A0]
1669 mov rdx, [A1]
1670 %1 T1
1671 mov [A0], rax
1672 mov [A1], rdx
1673 %else
1674 mov T1, A1
1675 mov rax, [A0]
1676 mov rdx, [T1]
1677 %1 A2
1678 mov [A0], rax
1679 mov [T1], rdx
1680 %endif
1681 IEM_SAVE_FLAGS A3, %2, %3
1682 xor eax, eax
1683
1684.return:
1685 EPILOGUE_4_ARGS_EX 12
1686
1687.div_overflow:
1688 %if %4 != 0
1689 pop A2
1690 %endif
1691.div_zero:
1692 mov eax, -1
1693 jmp .return
1694ENDPROC iemAImpl_ %+ %1 %+ _u64
1695 %endif ; !RT_ARCH_AMD64
1696
1697%endmacro
1698
1699IEMIMPL_DIV_OP div, 0, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
1700IEMIMPL_DIV_OP idiv, 0, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 1
1701
1702
1703;
1704; BSWAP. No flag changes.
1705;
1706; Each function takes one argument, pointer to the value to bswap
1707; (input/output). They all return void.
1708;
1709BEGINPROC_FASTCALL iemAImpl_bswap_u16, 4
1710 PROLOGUE_1_ARGS
1711 mov T0_32, [A0] ; just in case any of the upper bits are used.
1712 db 66h
1713 bswap T0_32
1714 mov [A0], T0_32
1715 EPILOGUE_1_ARGS
1716ENDPROC iemAImpl_bswap_u16
1717
1718BEGINPROC_FASTCALL iemAImpl_bswap_u32, 4
1719 PROLOGUE_1_ARGS
1720 mov T0_32, [A0]
1721 bswap T0_32
1722 mov [A0], T0_32
1723 EPILOGUE_1_ARGS
1724ENDPROC iemAImpl_bswap_u32
1725
1726BEGINPROC_FASTCALL iemAImpl_bswap_u64, 4
1727%ifdef RT_ARCH_AMD64
1728 PROLOGUE_1_ARGS
1729 mov T0, [A0]
1730 bswap T0
1731 mov [A0], T0
1732 EPILOGUE_1_ARGS
1733%else
1734 PROLOGUE_1_ARGS
1735 mov T0, [A0]
1736 mov T1, [A0 + 4]
1737 bswap T0
1738 bswap T1
1739 mov [A0 + 4], T0
1740 mov [A0], T1
1741 EPILOGUE_1_ARGS
1742%endif
1743ENDPROC iemAImpl_bswap_u64
1744
1745
1746;;
1747; Initialize the FPU for the actual instruction being emulated, this means
1748; loading parts of the guest's control word and status word.
1749;
1750; @uses 24 bytes of stack.
1751; @param 1 Expression giving the address of the FXSTATE of the guest.
1752;
1753%macro FPU_LD_FXSTATE_FCW_AND_SAFE_FSW 1
1754 fnstenv [xSP]
1755
1756 ; FCW - for exception, precision and rounding control.
1757 movzx T0, word [%1 + X86FXSTATE.FCW]
1758 and T0, X86_FCW_MASK_ALL | X86_FCW_PC_MASK | X86_FCW_RC_MASK
1759 mov [xSP + X86FSTENV32P.FCW], T0_16
1760
1761 ; FSW - for undefined C0, C1, C2, and C3.
1762 movzx T1, word [%1 + X86FXSTATE.FSW]
1763 and T1, X86_FSW_C_MASK
1764 movzx T0, word [xSP + X86FSTENV32P.FSW]
1765 and T0, X86_FSW_TOP_MASK
1766 or T0, T1
1767 mov [xSP + X86FSTENV32P.FSW], T0_16
1768
1769 fldenv [xSP]
1770%endmacro
1771
1772
1773;;
1774; Need to move this as well somewhere better?
1775;
1776struc IEMFPURESULT
1777 .r80Result resw 5
1778 .FSW resw 1
1779endstruc
1780
1781
1782;;
1783; Need to move this as well somewhere better?
1784;
1785struc IEMFPURESULTTWO
1786 .r80Result1 resw 5
1787 .FSW resw 1
1788 .r80Result2 resw 5
1789endstruc
1790
1791
1792;
1793;---------------------- 16-bit signed integer operations ----------------------
1794;
1795
1796
1797;;
1798; Converts a 16-bit floating point value to a 80-bit one (fpu register).
1799;
1800; @param A0 FPU context (fxsave).
1801; @param A1 Pointer to a IEMFPURESULT for the output.
1802; @param A2 Pointer to the 16-bit floating point value to convert.
1803;
1804BEGINPROC_FASTCALL iemAImpl_fild_i16_to_r80, 12
1805 PROLOGUE_3_ARGS
1806 sub xSP, 20h
1807
1808 fninit
1809 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1810 fild word [A2]
1811
1812 fnstsw word [A1 + IEMFPURESULT.FSW]
1813 fnclex
1814 fstp tword [A1 + IEMFPURESULT.r80Result]
1815
1816 fninit
1817 add xSP, 20h
1818 EPILOGUE_3_ARGS
1819ENDPROC iemAImpl_fild_i16_to_r80
1820
1821
1822;;
1823; Store a 80-bit floating point value (register) as a 16-bit signed integer (memory).
1824;
1825; @param A0 FPU context (fxsave).
1826; @param A1 Where to return the output FSW.
1827; @param A2 Where to store the 16-bit signed integer value.
1828; @param A3 Pointer to the 80-bit value.
1829;
1830BEGINPROC_FASTCALL iemAImpl_fist_r80_to_i16, 16
1831 PROLOGUE_4_ARGS
1832 sub xSP, 20h
1833
1834 fninit
1835 fld tword [A3]
1836 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1837 fistp word [A2]
1838
1839 fnstsw word [A1]
1840
1841 fninit
1842 add xSP, 20h
1843 EPILOGUE_4_ARGS
1844ENDPROC iemAImpl_fist_r80_to_i16
1845
1846
1847;;
1848; Store a 80-bit floating point value (register) as a 16-bit signed integer
1849; (memory) with truncation.
1850;
1851; @param A0 FPU context (fxsave).
1852; @param A1 Where to return the output FSW.
1853; @param A2 Where to store the 16-bit signed integer value.
1854; @param A3 Pointer to the 80-bit value.
1855;
1856BEGINPROC_FASTCALL iemAImpl_fistt_r80_to_i16, 16
1857 PROLOGUE_4_ARGS
1858 sub xSP, 20h
1859
1860 fninit
1861 fld tword [A3]
1862 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1863 fisttp dword [A2]
1864
1865 fnstsw word [A1]
1866
1867 fninit
1868 add xSP, 20h
1869 EPILOGUE_4_ARGS
1870ENDPROC iemAImpl_fistt_r80_to_i16
1871
1872
1873;;
1874; FPU instruction working on one 80-bit and one 16-bit signed integer value.
1875;
1876; @param 1 The instruction
1877;
1878; @param A0 FPU context (fxsave).
1879; @param A1 Pointer to a IEMFPURESULT for the output.
1880; @param A2 Pointer to the 80-bit value.
1881; @param A3 Pointer to the 16-bit value.
1882;
1883%macro IEMIMPL_FPU_R80_BY_I16 1
1884BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i16, 16
1885 PROLOGUE_4_ARGS
1886 sub xSP, 20h
1887
1888 fninit
1889 fld tword [A2]
1890 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1891 %1 word [A3]
1892
1893 fnstsw word [A1 + IEMFPURESULT.FSW]
1894 fnclex
1895 fstp tword [A1 + IEMFPURESULT.r80Result]
1896
1897 fninit
1898 add xSP, 20h
1899 EPILOGUE_4_ARGS
1900ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i16
1901%endmacro
1902
1903IEMIMPL_FPU_R80_BY_I16 fiadd
1904IEMIMPL_FPU_R80_BY_I16 fimul
1905IEMIMPL_FPU_R80_BY_I16 fisub
1906IEMIMPL_FPU_R80_BY_I16 fisubr
1907IEMIMPL_FPU_R80_BY_I16 fidiv
1908IEMIMPL_FPU_R80_BY_I16 fidivr
1909
1910
1911;;
1912; FPU instruction working on one 80-bit and one 16-bit signed integer value,
1913; only returning FSW.
1914;
1915; @param 1 The instruction
1916;
1917; @param A0 FPU context (fxsave).
1918; @param A1 Where to store the output FSW.
1919; @param A2 Pointer to the 80-bit value.
1920; @param A3 Pointer to the 64-bit value.
1921;
1922%macro IEMIMPL_FPU_R80_BY_I16_FSW 1
1923BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i16, 16
1924 PROLOGUE_4_ARGS
1925 sub xSP, 20h
1926
1927 fninit
1928 fld tword [A2]
1929 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1930 %1 word [A3]
1931
1932 fnstsw word [A1]
1933
1934 fninit
1935 add xSP, 20h
1936 EPILOGUE_4_ARGS
1937ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i16
1938%endmacro
1939
1940IEMIMPL_FPU_R80_BY_I16_FSW ficom
1941
1942
1943
1944;
1945;---------------------- 32-bit signed integer operations ----------------------
1946;
1947
1948
1949;;
1950; Converts a 32-bit floating point value to a 80-bit one (fpu register).
1951;
1952; @param A0 FPU context (fxsave).
1953; @param A1 Pointer to a IEMFPURESULT for the output.
1954; @param A2 Pointer to the 32-bit floating point value to convert.
1955;
1956BEGINPROC_FASTCALL iemAImpl_fild_i32_to_r80, 12
1957 PROLOGUE_3_ARGS
1958 sub xSP, 20h
1959
1960 fninit
1961 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1962 fild dword [A2]
1963
1964 fnstsw word [A1 + IEMFPURESULT.FSW]
1965 fnclex
1966 fstp tword [A1 + IEMFPURESULT.r80Result]
1967
1968 fninit
1969 add xSP, 20h
1970 EPILOGUE_3_ARGS
1971ENDPROC iemAImpl_fild_i32_to_r80
1972
1973
1974;;
1975; Store a 80-bit floating point value (register) as a 32-bit signed integer (memory).
1976;
1977; @param A0 FPU context (fxsave).
1978; @param A1 Where to return the output FSW.
1979; @param A2 Where to store the 32-bit signed integer value.
1980; @param A3 Pointer to the 80-bit value.
1981;
1982BEGINPROC_FASTCALL iemAImpl_fist_r80_to_i32, 16
1983 PROLOGUE_4_ARGS
1984 sub xSP, 20h
1985
1986 fninit
1987 fld tword [A3]
1988 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1989 fistp dword [A2]
1990
1991 fnstsw word [A1]
1992
1993 fninit
1994 add xSP, 20h
1995 EPILOGUE_4_ARGS
1996ENDPROC iemAImpl_fist_r80_to_i32
1997
1998
1999;;
2000; Store a 80-bit floating point value (register) as a 32-bit signed integer
2001; (memory) with truncation.
2002;
2003; @param A0 FPU context (fxsave).
2004; @param A1 Where to return the output FSW.
2005; @param A2 Where to store the 32-bit signed integer value.
2006; @param A3 Pointer to the 80-bit value.
2007;
2008BEGINPROC_FASTCALL iemAImpl_fistt_r80_to_i32, 16
2009 PROLOGUE_4_ARGS
2010 sub xSP, 20h
2011
2012 fninit
2013 fld tword [A3]
2014 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2015 fisttp dword [A2]
2016
2017 fnstsw word [A1]
2018
2019 fninit
2020 add xSP, 20h
2021 EPILOGUE_4_ARGS
2022ENDPROC iemAImpl_fistt_r80_to_i32
2023
2024
2025;;
2026; FPU instruction working on one 80-bit and one 32-bit signed integer value.
2027;
2028; @param 1 The instruction
2029;
2030; @param A0 FPU context (fxsave).
2031; @param A1 Pointer to a IEMFPURESULT for the output.
2032; @param A2 Pointer to the 80-bit value.
2033; @param A3 Pointer to the 32-bit value.
2034;
2035%macro IEMIMPL_FPU_R80_BY_I32 1
2036BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i32, 16
2037 PROLOGUE_4_ARGS
2038 sub xSP, 20h
2039
2040 fninit
2041 fld tword [A2]
2042 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2043 %1 dword [A3]
2044
2045 fnstsw word [A1 + IEMFPURESULT.FSW]
2046 fnclex
2047 fstp tword [A1 + IEMFPURESULT.r80Result]
2048
2049 fninit
2050 add xSP, 20h
2051 EPILOGUE_4_ARGS
2052ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i32
2053%endmacro
2054
2055IEMIMPL_FPU_R80_BY_I32 fiadd
2056IEMIMPL_FPU_R80_BY_I32 fimul
2057IEMIMPL_FPU_R80_BY_I32 fisub
2058IEMIMPL_FPU_R80_BY_I32 fisubr
2059IEMIMPL_FPU_R80_BY_I32 fidiv
2060IEMIMPL_FPU_R80_BY_I32 fidivr
2061
2062
2063;;
2064; FPU instruction working on one 80-bit and one 32-bit signed integer value,
2065; only returning FSW.
2066;
2067; @param 1 The instruction
2068;
2069; @param A0 FPU context (fxsave).
2070; @param A1 Where to store the output FSW.
2071; @param A2 Pointer to the 80-bit value.
2072; @param A3 Pointer to the 64-bit value.
2073;
2074%macro IEMIMPL_FPU_R80_BY_I32_FSW 1
2075BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i32, 16
2076 PROLOGUE_4_ARGS
2077 sub xSP, 20h
2078
2079 fninit
2080 fld tword [A2]
2081 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2082 %1 dword [A3]
2083
2084 fnstsw word [A1]
2085
2086 fninit
2087 add xSP, 20h
2088 EPILOGUE_4_ARGS
2089ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i32
2090%endmacro
2091
2092IEMIMPL_FPU_R80_BY_I32_FSW ficom
2093
2094
2095
2096;
2097;---------------------- 64-bit signed integer operations ----------------------
2098;
2099
2100
2101;;
2102; Converts a 64-bit floating point value to a 80-bit one (fpu register).
2103;
2104; @param A0 FPU context (fxsave).
2105; @param A1 Pointer to a IEMFPURESULT for the output.
2106; @param A2 Pointer to the 64-bit floating point value to convert.
2107;
2108BEGINPROC_FASTCALL iemAImpl_fild_i64_to_r80, 12
2109 PROLOGUE_3_ARGS
2110 sub xSP, 20h
2111
2112 fninit
2113 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2114 fild qword [A2]
2115
2116 fnstsw word [A1 + IEMFPURESULT.FSW]
2117 fnclex
2118 fstp tword [A1 + IEMFPURESULT.r80Result]
2119
2120 fninit
2121 add xSP, 20h
2122 EPILOGUE_3_ARGS
2123ENDPROC iemAImpl_fild_i64_to_r80
2124
2125
2126;;
2127; Store a 80-bit floating point value (register) as a 64-bit signed integer (memory).
2128;
2129; @param A0 FPU context (fxsave).
2130; @param A1 Where to return the output FSW.
2131; @param A2 Where to store the 64-bit signed integer value.
2132; @param A3 Pointer to the 80-bit value.
2133;
2134BEGINPROC_FASTCALL iemAImpl_fist_r80_to_i64, 16
2135 PROLOGUE_4_ARGS
2136 sub xSP, 20h
2137
2138 fninit
2139 fld tword [A3]
2140 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2141 fistp qword [A2]
2142
2143 fnstsw word [A1]
2144
2145 fninit
2146 add xSP, 20h
2147 EPILOGUE_4_ARGS
2148ENDPROC iemAImpl_fist_r80_to_i64
2149
2150
2151;;
2152; Store a 80-bit floating point value (register) as a 64-bit signed integer
2153; (memory) with truncation.
2154;
2155; @param A0 FPU context (fxsave).
2156; @param A1 Where to return the output FSW.
2157; @param A2 Where to store the 64-bit signed integer value.
2158; @param A3 Pointer to the 80-bit value.
2159;
2160BEGINPROC_FASTCALL iemAImpl_fistt_r80_to_i64, 16
2161 PROLOGUE_4_ARGS
2162 sub xSP, 20h
2163
2164 fninit
2165 fld tword [A3]
2166 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2167 fisttp qword [A2]
2168
2169 fnstsw word [A1]
2170
2171 fninit
2172 add xSP, 20h
2173 EPILOGUE_4_ARGS
2174ENDPROC iemAImpl_fistt_r80_to_i64
2175
2176
2177
2178;
2179;---------------------- 32-bit floating point operations ----------------------
2180;
2181
2182;;
2183; Converts a 32-bit floating point value to a 80-bit one (fpu register).
2184;
2185; @param A0 FPU context (fxsave).
2186; @param A1 Pointer to a IEMFPURESULT for the output.
2187; @param A2 Pointer to the 32-bit floating point value to convert.
2188;
2189BEGINPROC_FASTCALL iemAImpl_fld_r32_to_r80, 12
2190 PROLOGUE_3_ARGS
2191 sub xSP, 20h
2192
2193 fninit
2194 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2195 fld dword [A2]
2196
2197 fnstsw word [A1 + IEMFPURESULT.FSW]
2198 fnclex
2199 fstp tword [A1 + IEMFPURESULT.r80Result]
2200
2201 fninit
2202 add xSP, 20h
2203 EPILOGUE_3_ARGS
2204ENDPROC iemAImpl_fld_r32_to_r80
2205
2206
2207;;
2208; Store a 80-bit floating point value (register) as a 32-bit one (memory).
2209;
2210; @param A0 FPU context (fxsave).
2211; @param A1 Where to return the output FSW.
2212; @param A2 Where to store the 32-bit value.
2213; @param A3 Pointer to the 80-bit value.
2214;
2215BEGINPROC_FASTCALL iemAImpl_fst_r80_to_r32, 16
2216 PROLOGUE_4_ARGS
2217 sub xSP, 20h
2218
2219 fninit
2220 fld tword [A3]
2221 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2222 fst dword [A2]
2223
2224 fnstsw word [A1]
2225
2226 fninit
2227 add xSP, 20h
2228 EPILOGUE_4_ARGS
2229ENDPROC iemAImpl_fst_r80_to_r32
2230
2231
2232;;
2233; FPU instruction working on one 80-bit and one 32-bit floating point value.
2234;
2235; @param 1 The instruction
2236;
2237; @param A0 FPU context (fxsave).
2238; @param A1 Pointer to a IEMFPURESULT for the output.
2239; @param A2 Pointer to the 80-bit value.
2240; @param A3 Pointer to the 32-bit value.
2241;
2242%macro IEMIMPL_FPU_R80_BY_R32 1
2243BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r32, 16
2244 PROLOGUE_4_ARGS
2245 sub xSP, 20h
2246
2247 fninit
2248 fld tword [A2]
2249 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2250 %1 dword [A3]
2251
2252 fnstsw word [A1 + IEMFPURESULT.FSW]
2253 fnclex
2254 fstp tword [A1 + IEMFPURESULT.r80Result]
2255
2256 fninit
2257 add xSP, 20h
2258 EPILOGUE_4_ARGS
2259ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r32
2260%endmacro
2261
2262IEMIMPL_FPU_R80_BY_R32 fadd
2263IEMIMPL_FPU_R80_BY_R32 fmul
2264IEMIMPL_FPU_R80_BY_R32 fsub
2265IEMIMPL_FPU_R80_BY_R32 fsubr
2266IEMIMPL_FPU_R80_BY_R32 fdiv
2267IEMIMPL_FPU_R80_BY_R32 fdivr
2268
2269
2270;;
2271; FPU instruction working on one 80-bit and one 32-bit floating point value,
2272; only returning FSW.
2273;
2274; @param 1 The instruction
2275;
2276; @param A0 FPU context (fxsave).
2277; @param A1 Where to store the output FSW.
2278; @param A2 Pointer to the 80-bit value.
2279; @param A3 Pointer to the 64-bit value.
2280;
2281%macro IEMIMPL_FPU_R80_BY_R32_FSW 1
2282BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r32, 16
2283 PROLOGUE_4_ARGS
2284 sub xSP, 20h
2285
2286 fninit
2287 fld tword [A2]
2288 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2289 %1 dword [A3]
2290
2291 fnstsw word [A1]
2292
2293 fninit
2294 add xSP, 20h
2295 EPILOGUE_4_ARGS
2296ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r32
2297%endmacro
2298
2299IEMIMPL_FPU_R80_BY_R32_FSW fcom
2300
2301
2302
2303;
2304;---------------------- 64-bit floating point operations ----------------------
2305;
2306
2307;;
2308; Converts a 64-bit floating point value to a 80-bit one (fpu register).
2309;
2310; @param A0 FPU context (fxsave).
2311; @param A1 Pointer to a IEMFPURESULT for the output.
2312; @param A2 Pointer to the 64-bit floating point value to convert.
2313;
2314BEGINPROC_FASTCALL iemAImpl_fld_r64_to_r80, 12
2315 PROLOGUE_3_ARGS
2316 sub xSP, 20h
2317
2318 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2319 fld qword [A2]
2320
2321 fnstsw word [A1 + IEMFPURESULT.FSW]
2322 fnclex
2323 fstp tword [A1 + IEMFPURESULT.r80Result]
2324
2325 fninit
2326 add xSP, 20h
2327 EPILOGUE_3_ARGS
2328ENDPROC iemAImpl_fld_r64_to_r80
2329
2330
2331;;
2332; Store a 80-bit floating point value (register) as a 64-bit one (memory).
2333;
2334; @param A0 FPU context (fxsave).
2335; @param A1 Where to return the output FSW.
2336; @param A2 Where to store the 64-bit value.
2337; @param A3 Pointer to the 80-bit value.
2338;
2339BEGINPROC_FASTCALL iemAImpl_fst_r80_to_r64, 16
2340 PROLOGUE_4_ARGS
2341 sub xSP, 20h
2342
2343 fninit
2344 fld tword [A3]
2345 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2346 fst qword [A2]
2347
2348 fnstsw word [A1]
2349
2350 fninit
2351 add xSP, 20h
2352 EPILOGUE_4_ARGS
2353ENDPROC iemAImpl_fst_r80_to_r64
2354
2355
2356;;
2357; FPU instruction working on one 80-bit and one 64-bit floating point value.
2358;
2359; @param 1 The instruction
2360;
2361; @param A0 FPU context (fxsave).
2362; @param A1 Pointer to a IEMFPURESULT for the output.
2363; @param A2 Pointer to the 80-bit value.
2364; @param A3 Pointer to the 64-bit value.
2365;
2366%macro IEMIMPL_FPU_R80_BY_R64 1
2367BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r64, 16
2368 PROLOGUE_4_ARGS
2369 sub xSP, 20h
2370
2371 fninit
2372 fld tword [A2]
2373 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2374 %1 qword [A3]
2375
2376 fnstsw word [A1 + IEMFPURESULT.FSW]
2377 fnclex
2378 fstp tword [A1 + IEMFPURESULT.r80Result]
2379
2380 fninit
2381 add xSP, 20h
2382 EPILOGUE_4_ARGS
2383ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r64
2384%endmacro
2385
2386IEMIMPL_FPU_R80_BY_R64 fadd
2387IEMIMPL_FPU_R80_BY_R64 fmul
2388IEMIMPL_FPU_R80_BY_R64 fsub
2389IEMIMPL_FPU_R80_BY_R64 fsubr
2390IEMIMPL_FPU_R80_BY_R64 fdiv
2391IEMIMPL_FPU_R80_BY_R64 fdivr
2392
2393;;
2394; FPU instruction working on one 80-bit and one 64-bit floating point value,
2395; only returning FSW.
2396;
2397; @param 1 The instruction
2398;
2399; @param A0 FPU context (fxsave).
2400; @param A1 Where to store the output FSW.
2401; @param A2 Pointer to the 80-bit value.
2402; @param A3 Pointer to the 64-bit value.
2403;
2404%macro IEMIMPL_FPU_R80_BY_R64_FSW 1
2405BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r64, 16
2406 PROLOGUE_4_ARGS
2407 sub xSP, 20h
2408
2409 fninit
2410 fld tword [A2]
2411 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2412 %1 qword [A3]
2413
2414 fnstsw word [A1]
2415
2416 fninit
2417 add xSP, 20h
2418 EPILOGUE_4_ARGS
2419ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r64
2420%endmacro
2421
2422IEMIMPL_FPU_R80_BY_R64_FSW fcom
2423
2424
2425
2426;
2427;---------------------- 80-bit floating point operations ----------------------
2428;
2429
2430;;
2431; Loads a 80-bit floating point register value from memory.
2432;
2433; @param A0 FPU context (fxsave).
2434; @param A1 Pointer to a IEMFPURESULT for the output.
2435; @param A2 Pointer to the 80-bit floating point value to load.
2436;
2437BEGINPROC_FASTCALL iemAImpl_fld_r80_from_r80, 12
2438 PROLOGUE_3_ARGS
2439 sub xSP, 20h
2440
2441 fninit
2442 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2443 fld tword [A2]
2444
2445 fnstsw word [A1 + IEMFPURESULT.FSW]
2446 fnclex
2447 fstp tword [A1 + IEMFPURESULT.r80Result]
2448
2449 fninit
2450 add xSP, 20h
2451 EPILOGUE_3_ARGS
2452ENDPROC iemAImpl_fld_r80_from_r80
2453
2454
2455;;
2456; Store a 80-bit floating point register to memory
2457;
2458; @param A0 FPU context (fxsave).
2459; @param A1 Where to return the output FSW.
2460; @param A2 Where to store the 80-bit value.
2461; @param A3 Pointer to the 80-bit register value.
2462;
2463BEGINPROC_FASTCALL iemAImpl_fst_r80_to_r80, 16
2464 PROLOGUE_4_ARGS
2465 sub xSP, 20h
2466
2467 fninit
2468 fld tword [A3]
2469 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2470 fstp tword [A2]
2471
2472 fnstsw word [A1]
2473
2474 fninit
2475 add xSP, 20h
2476 EPILOGUE_4_ARGS
2477ENDPROC iemAImpl_fst_r80_to_r80
2478
2479
2480;;
2481; FPU instruction working on two 80-bit floating point values.
2482;
2483; @param 1 The instruction
2484;
2485; @param A0 FPU context (fxsave).
2486; @param A1 Pointer to a IEMFPURESULT for the output.
2487; @param A2 Pointer to the first 80-bit value (ST0)
2488; @param A3 Pointer to the second 80-bit value (STn).
2489;
2490%macro IEMIMPL_FPU_R80_BY_R80 2
2491BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
2492 PROLOGUE_4_ARGS
2493 sub xSP, 20h
2494
2495 fninit
2496 fld tword [A3]
2497 fld tword [A2]
2498 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2499 %1 %2
2500
2501 fnstsw word [A1 + IEMFPURESULT.FSW]
2502 fnclex
2503 fstp tword [A1 + IEMFPURESULT.r80Result]
2504
2505 fninit
2506 add xSP, 20h
2507 EPILOGUE_4_ARGS
2508ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
2509%endmacro
2510
2511IEMIMPL_FPU_R80_BY_R80 fadd, {st0, st1}
2512IEMIMPL_FPU_R80_BY_R80 fmul, {st0, st1}
2513IEMIMPL_FPU_R80_BY_R80 fsub, {st0, st1}
2514IEMIMPL_FPU_R80_BY_R80 fsubr, {st0, st1}
2515IEMIMPL_FPU_R80_BY_R80 fdiv, {st0, st1}
2516IEMIMPL_FPU_R80_BY_R80 fdivr, {st0, st1}
2517IEMIMPL_FPU_R80_BY_R80 fprem, {}
2518IEMIMPL_FPU_R80_BY_R80 fprem1, {}
2519IEMIMPL_FPU_R80_BY_R80 fscale, {}
2520
2521
2522;;
2523; FPU instruction working on two 80-bit floating point values, ST1 and ST0,
2524; storing the result in ST1 and popping the stack.
2525;
2526; @param 1 The instruction
2527;
2528; @param A0 FPU context (fxsave).
2529; @param A1 Pointer to a IEMFPURESULT for the output.
2530; @param A2 Pointer to the first 80-bit value (ST1).
2531; @param A3 Pointer to the second 80-bit value (ST0).
2532;
2533%macro IEMIMPL_FPU_R80_BY_R80_ST1_ST0_POP 1
2534BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
2535 PROLOGUE_4_ARGS
2536 sub xSP, 20h
2537
2538 fninit
2539 fld tword [A2]
2540 fld tword [A3]
2541 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2542 %1
2543
2544 fnstsw word [A1 + IEMFPURESULT.FSW]
2545 fnclex
2546 fstp tword [A1 + IEMFPURESULT.r80Result]
2547
2548 fninit
2549 add xSP, 20h
2550 EPILOGUE_4_ARGS
2551ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
2552%endmacro
2553
2554IEMIMPL_FPU_R80_BY_R80_ST1_ST0_POP fpatan
2555IEMIMPL_FPU_R80_BY_R80_ST1_ST0_POP fyl2x
2556IEMIMPL_FPU_R80_BY_R80_ST1_ST0_POP fyl2xp1
2557
2558
2559;;
2560; FPU instruction working on two 80-bit floating point values, only
2561; returning FSW.
2562;
2563; @param 1 The instruction
2564;
2565; @param A0 FPU context (fxsave).
2566; @param A1 Pointer to a uint16_t for the resulting FSW.
2567; @param A2 Pointer to the first 80-bit value.
2568; @param A3 Pointer to the second 80-bit value.
2569;
2570%macro IEMIMPL_FPU_R80_BY_R80_FSW 1
2571BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
2572 PROLOGUE_4_ARGS
2573 sub xSP, 20h
2574
2575 fninit
2576 fld tword [A3]
2577 fld tword [A2]
2578 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2579 %1 st0, st1
2580
2581 fnstsw word [A1]
2582
2583 fninit
2584 add xSP, 20h
2585 EPILOGUE_4_ARGS
2586ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
2587%endmacro
2588
2589IEMIMPL_FPU_R80_BY_R80_FSW fcom
2590IEMIMPL_FPU_R80_BY_R80_FSW fucom
2591
2592
2593;;
2594; FPU instruction working on two 80-bit floating point values,
2595; returning FSW and EFLAGS (eax).
2596;
2597; @param 1 The instruction
2598;
2599; @returns EFLAGS in EAX.
2600; @param A0 FPU context (fxsave).
2601; @param A1 Pointer to a uint16_t for the resulting FSW.
2602; @param A2 Pointer to the first 80-bit value.
2603; @param A3 Pointer to the second 80-bit value.
2604;
2605%macro IEMIMPL_FPU_R80_BY_R80_EFL 1
2606BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
2607 PROLOGUE_4_ARGS
2608 sub xSP, 20h
2609
2610 fninit
2611 fld tword [A3]
2612 fld tword [A2]
2613 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2614 %1 st1
2615
2616 fnstsw word [A1]
2617 pushf
2618 pop xAX
2619
2620 fninit
2621 add xSP, 20h
2622 EPILOGUE_4_ARGS
2623ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
2624%endmacro
2625
2626IEMIMPL_FPU_R80_BY_R80_EFL fcomi
2627IEMIMPL_FPU_R80_BY_R80_EFL fucomi
2628
2629
2630;;
2631; FPU instruction working on one 80-bit floating point value.
2632;
2633; @param 1 The instruction
2634;
2635; @param A0 FPU context (fxsave).
2636; @param A1 Pointer to a IEMFPURESULT for the output.
2637; @param A2 Pointer to the 80-bit value.
2638;
2639%macro IEMIMPL_FPU_R80 1
2640BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80, 12
2641 PROLOGUE_3_ARGS
2642 sub xSP, 20h
2643
2644 fninit
2645 fld tword [A2]
2646 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2647 %1
2648
2649 fnstsw word [A1 + IEMFPURESULT.FSW]
2650 fnclex
2651 fstp tword [A1 + IEMFPURESULT.r80Result]
2652
2653 fninit
2654 add xSP, 20h
2655 EPILOGUE_3_ARGS
2656ENDPROC iemAImpl_ %+ %1 %+ _r80
2657%endmacro
2658
2659IEMIMPL_FPU_R80 fchs
2660IEMIMPL_FPU_R80 fabs
2661IEMIMPL_FPU_R80 f2xm1
2662IEMIMPL_FPU_R80 fsqrt
2663IEMIMPL_FPU_R80 frndint
2664IEMIMPL_FPU_R80 fsin
2665IEMIMPL_FPU_R80 fcos
2666
2667
2668;;
2669; FPU instruction working on one 80-bit floating point value, only
2670; returning FSW.
2671;
2672; @param 1 The instruction
2673;
2674; @param A0 FPU context (fxsave).
2675; @param A1 Pointer to a uint16_t for the resulting FSW.
2676; @param A2 Pointer to the 80-bit value.
2677;
2678%macro IEMIMPL_FPU_R80_FSW 1
2679BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80, 12
2680 PROLOGUE_3_ARGS
2681 sub xSP, 20h
2682
2683 fninit
2684 fld tword [A2]
2685 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2686 %1
2687
2688 fnstsw word [A1]
2689
2690 fninit
2691 add xSP, 20h
2692 EPILOGUE_3_ARGS
2693ENDPROC iemAImpl_ %+ %1 %+ _r80
2694%endmacro
2695
2696IEMIMPL_FPU_R80_FSW ftst
2697IEMIMPL_FPU_R80_FSW fxam
2698
2699
2700
2701;;
2702; FPU instruction loading a 80-bit floating point constant.
2703;
2704; @param 1 The instruction
2705;
2706; @param A0 FPU context (fxsave).
2707; @param A1 Pointer to a IEMFPURESULT for the output.
2708;
2709%macro IEMIMPL_FPU_R80_CONST 1
2710BEGINPROC_FASTCALL iemAImpl_ %+ %1, 8
2711 PROLOGUE_2_ARGS
2712 sub xSP, 20h
2713
2714 fninit
2715 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2716 %1
2717
2718 fnstsw word [A1 + IEMFPURESULT.FSW]
2719 fnclex
2720 fstp tword [A1 + IEMFPURESULT.r80Result]
2721
2722 fninit
2723 add xSP, 20h
2724 EPILOGUE_2_ARGS
2725ENDPROC iemAImpl_ %+ %1 %+
2726%endmacro
2727
2728IEMIMPL_FPU_R80_CONST fld1
2729IEMIMPL_FPU_R80_CONST fldl2t
2730IEMIMPL_FPU_R80_CONST fldl2e
2731IEMIMPL_FPU_R80_CONST fldpi
2732IEMIMPL_FPU_R80_CONST fldlg2
2733IEMIMPL_FPU_R80_CONST fldln2
2734IEMIMPL_FPU_R80_CONST fldz
2735
2736
2737;;
2738; FPU instruction working on one 80-bit floating point value, outputing two.
2739;
2740; @param 1 The instruction
2741;
2742; @param A0 FPU context (fxsave).
2743; @param A1 Pointer to a IEMFPURESULTTWO for the output.
2744; @param A2 Pointer to the 80-bit value.
2745;
2746%macro IEMIMPL_FPU_R80_R80 1
2747BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_r80, 12
2748 PROLOGUE_3_ARGS
2749 sub xSP, 20h
2750
2751 fninit
2752 fld tword [A2]
2753 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2754 %1
2755
2756 fnstsw word [A1 + IEMFPURESULTTWO.FSW]
2757 fnclex
2758 fstp tword [A1 + IEMFPURESULTTWO.r80Result2]
2759 fnclex
2760 fstp tword [A1 + IEMFPURESULTTWO.r80Result1]
2761
2762 fninit
2763 add xSP, 20h
2764 EPILOGUE_3_ARGS
2765ENDPROC iemAImpl_ %+ %1 %+ _r80_r80
2766%endmacro
2767
2768IEMIMPL_FPU_R80_R80 fptan
2769IEMIMPL_FPU_R80_R80 fxtract
2770IEMIMPL_FPU_R80_R80 fsincos
2771
2772
2773
2774
2775;---------------------- SSE and MMX Operations ----------------------
2776
2777;; @todo what do we need to do for MMX?
2778%macro IEMIMPL_MMX_PROLOGUE 0
2779%endmacro
2780%macro IEMIMPL_MMX_EPILOGUE 0
2781%endmacro
2782
2783;; @todo what do we need to do for SSE?
2784%macro IEMIMPL_SSE_PROLOGUE 0
2785%endmacro
2786%macro IEMIMPL_SSE_EPILOGUE 0
2787%endmacro
2788
2789
2790;;
2791; Media instruction working on two full sized registers.
2792;
2793; @param 1 The instruction
2794;
2795; @param A0 FPU context (fxsave).
2796; @param A1 Pointer to the first media register size operand (input/output).
2797; @param A2 Pointer to the second media register size operand (input).
2798;
2799%macro IEMIMPL_MEDIA_F2 1
2800BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 12
2801 PROLOGUE_3_ARGS
2802 IEMIMPL_MMX_PROLOGUE
2803
2804 movq mm0, [A1]
2805 movq mm1, [A2]
2806 %1 mm0, mm1
2807 movq [A1], mm0
2808
2809 IEMIMPL_MMX_EPILOGUE
2810 EPILOGUE_3_ARGS
2811ENDPROC iemAImpl_ %+ %1 %+ _u64
2812
2813BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u128, 12
2814 PROLOGUE_3_ARGS
2815 IEMIMPL_SSE_PROLOGUE
2816
2817 movdqu xmm0, [A1]
2818 movdqu xmm1, [A2]
2819 %1 xmm0, xmm1
2820 movdqu [A1], xmm0
2821
2822 IEMIMPL_SSE_EPILOGUE
2823 EPILOGUE_3_ARGS
2824ENDPROC iemAImpl_ %+ %1 %+ _u128
2825%endmacro
2826
2827IEMIMPL_MEDIA_F2 pxor
2828IEMIMPL_MEDIA_F2 pcmpeqb
2829IEMIMPL_MEDIA_F2 pcmpeqw
2830IEMIMPL_MEDIA_F2 pcmpeqd
2831
2832
2833;;
2834; Media instruction working on one full sized and one half sized register (lower half).
2835;
2836; @param 1 The instruction
2837; @param 2 1 if MMX is included, 0 if not.
2838;
2839; @param A0 FPU context (fxsave).
2840; @param A1 Pointer to the first full sized media register operand (input/output).
2841; @param A2 Pointer to the second half sized media register operand (input).
2842;
2843%macro IEMIMPL_MEDIA_F1L1 2
2844 %if %2 != 0
2845BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 12
2846 PROLOGUE_3_ARGS
2847 IEMIMPL_MMX_PROLOGUE
2848
2849 movq mm0, [A1]
2850 movd mm1, [A2]
2851 %1 mm0, mm1
2852 movq [A1], mm0
2853
2854 IEMIMPL_MMX_EPILOGUE
2855 EPILOGUE_3_ARGS
2856ENDPROC iemAImpl_ %+ %1 %+ _u64
2857 %endif
2858
2859BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u128, 12
2860 PROLOGUE_3_ARGS
2861 IEMIMPL_SSE_PROLOGUE
2862
2863 movdqu xmm0, [A1]
2864 movq xmm1, [A2]
2865 %1 xmm0, xmm1
2866 movdqu [A1], xmm0
2867
2868 IEMIMPL_SSE_EPILOGUE
2869 EPILOGUE_3_ARGS
2870ENDPROC iemAImpl_ %+ %1 %+ _u128
2871%endmacro
2872
2873IEMIMPL_MEDIA_F1L1 punpcklbw, 1
2874IEMIMPL_MEDIA_F1L1 punpcklwd, 1
2875IEMIMPL_MEDIA_F1L1 punpckldq, 1
2876IEMIMPL_MEDIA_F1L1 punpcklqdq, 0
2877
2878
2879;;
2880; Media instruction working on one full sized and one half sized register (high half).
2881;
2882; @param 1 The instruction
2883; @param 2 1 if MMX is included, 0 if not.
2884;
2885; @param A0 FPU context (fxsave).
2886; @param A1 Pointer to the first full sized media register operand (input/output).
2887; @param A2 Pointer to the second full sized media register operand, where we
2888; will only use the upper half (input).
2889;
2890%macro IEMIMPL_MEDIA_F1H1 2
2891 %if %2 != 0
2892BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 12
2893 PROLOGUE_3_ARGS
2894 IEMIMPL_MMX_PROLOGUE
2895
2896 movq mm0, [A1]
2897 movq mm1, [A2]
2898 %1 mm0, mm1
2899 movq [A1], mm0
2900
2901 IEMIMPL_MMX_EPILOGUE
2902 EPILOGUE_3_ARGS
2903ENDPROC iemAImpl_ %+ %1 %+ _u64
2904 %endif
2905
2906BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u128, 12
2907 PROLOGUE_3_ARGS
2908 IEMIMPL_SSE_PROLOGUE
2909
2910 movdqu xmm0, [A1]
2911 movdqu xmm1, [A2]
2912 %1 xmm0, xmm1
2913 movdqu [A1], xmm0
2914
2915 IEMIMPL_SSE_EPILOGUE
2916 EPILOGUE_3_ARGS
2917ENDPROC iemAImpl_ %+ %1 %+ _u128
2918%endmacro
2919
2920IEMIMPL_MEDIA_F1L1 punpckhbw, 1
2921IEMIMPL_MEDIA_F1L1 punpckhwd, 1
2922IEMIMPL_MEDIA_F1L1 punpckhdq, 1
2923IEMIMPL_MEDIA_F1L1 punpckhqdq, 0
2924
2925
2926;
2927; Shufflers with evil 8-bit immediates.
2928;
2929
2930BEGINPROC_FASTCALL iemAImpl_pshufw, 16
2931 PROLOGUE_4_ARGS
2932 IEMIMPL_MMX_PROLOGUE
2933
2934 movq mm0, [A1]
2935 movq mm1, [A2]
2936 lea T0, [A3 + A3*4] ; sizeof(pshufw+ret) == 5
2937 lea T1, [.imm0 xWrtRIP]
2938 lea T1, [T1 + T0]
2939 call T1
2940 movq [A1], mm0
2941
2942 IEMIMPL_MMX_EPILOGUE
2943 EPILOGUE_4_ARGS
2944%assign bImm 0
2945%rep 256
2946.imm %+ bImm:
2947 pshufw mm0, mm1, bImm
2948 ret
2949 %assign bImm bImm + 1
2950%endrep
2951.immEnd: ; 256*5 == 0x500
2952dw 0xfaff + (.immEnd - .imm0) ; will cause warning if entries are too big.
2953dw 0x104ff - (.immEnd - .imm0) ; will cause warning if entries are small big.
2954ENDPROC iemAImpl_pshufw
2955
2956
2957%macro IEMIMPL_MEDIA_SSE_PSHUFXX 1
2958BEGINPROC_FASTCALL iemAImpl_ %+ %1, 16
2959 PROLOGUE_4_ARGS
2960 IEMIMPL_SSE_PROLOGUE
2961
2962 movdqu xmm0, [A1]
2963 movdqu xmm1, [A2]
2964 lea T1, [.imm0 xWrtRIP]
2965 lea T0, [A3 + A3*2] ; sizeof(pshufXX+ret) == 6: (A3 * 3) *2
2966 lea T1, [T1 + T0*2]
2967 call T1
2968 movdqu [A1], xmm0
2969
2970 IEMIMPL_SSE_EPILOGUE
2971 EPILOGUE_4_ARGS
2972 %assign bImm 0
2973 %rep 256
2974.imm %+ bImm:
2975 %1 xmm0, xmm1, bImm
2976 ret
2977 %assign bImm bImm + 1
2978 %endrep
2979.immEnd: ; 256*6 == 0x600
2980dw 0xf9ff + (.immEnd - .imm0) ; will cause warning if entries are too big.
2981dw 0x105ff - (.immEnd - .imm0) ; will cause warning if entries are small big.
2982ENDPROC iemAImpl_ %+ %1
2983%endmacro
2984
2985IEMIMPL_MEDIA_SSE_PSHUFXX pshufhw
2986IEMIMPL_MEDIA_SSE_PSHUFXX pshuflw
2987IEMIMPL_MEDIA_SSE_PSHUFXX pshufd
2988
2989
2990;
2991; Move byte mask.
2992;
2993
2994BEGINPROC_FASTCALL iemAImpl_pmovmskb_u64, 12
2995 PROLOGUE_3_ARGS
2996 IEMIMPL_MMX_PROLOGUE
2997
2998 mov T0, [A1]
2999 movq mm1, [A2]
3000 pmovmskb T0, mm1
3001 mov [A1], T0
3002%ifdef RT_ARCH_X86
3003 mov dword [A1 + 4], 0
3004%endif
3005 IEMIMPL_MMX_EPILOGUE
3006 EPILOGUE_3_ARGS
3007ENDPROC iemAImpl_pmovmskb_u64
3008
3009BEGINPROC_FASTCALL iemAImpl_pmovmskb_u128, 12
3010 PROLOGUE_3_ARGS
3011 IEMIMPL_SSE_PROLOGUE
3012
3013 mov T0, [A1]
3014 movdqu xmm1, [A2]
3015 pmovmskb T0, xmm1
3016 mov [A1], T0
3017%ifdef RT_ARCH_X86
3018 mov dword [A1 + 4], 0
3019%endif
3020 IEMIMPL_SSE_EPILOGUE
3021 EPILOGUE_3_ARGS
3022ENDPROC iemAImpl_pmovmskb_u128
3023
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette