VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllAImpl.asm@ 47705

Last change on this file since 47705 was 47548, checked in by vboxsync, 11 years ago

IEM: Bunch of fixes, mostly DOS related.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 81.3 KB
Line 
1; $Id: IEMAllAImpl.asm 47548 2013-08-06 03:58:21Z vboxsync $
2;; @file
3; IEM - Instruction Implementation in Assembly.
4;
5
6; Copyright (C) 2011-2012 Oracle Corporation
7;
8; This file is part of VirtualBox Open Source Edition (OSE), as
9; available from http://www.virtualbox.org. This file is free software;
10; you can redistribute it and/or modify it under the terms of the GNU
11; General Public License (GPL) as published by the Free Software
12; Foundation, in version 2 as it comes in the "COPYING" file of the
13; VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15;
16
17
18;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
19; Header Files ;
20;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
21%include "VBox/asmdefs.mac"
22%include "VBox/err.mac"
23%include "iprt/x86.mac"
24
25
26;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
27; Defined Constants And Macros ;
28;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
29
30;;
31; RET XX / RET wrapper for fastcall.
32;
33%macro RET_FASTCALL 1
34%ifdef RT_ARCH_X86
35 %ifdef RT_OS_WINDOWS
36 ret %1
37 %else
38 ret
39 %endif
40%else
41 ret
42%endif
43%endmacro
44
45;;
46; NAME for fastcall functions.
47;
48;; @todo 'global @fastcall@12' is still broken in yasm and requires dollar
49; escaping (or whatever the dollar is good for here). Thus the ugly
50; prefix argument.
51;
52%define NAME_FASTCALL(a_Name, a_cbArgs, a_Prefix) NAME(a_Name)
53%ifdef RT_ARCH_X86
54 %ifdef RT_OS_WINDOWS
55 %undef NAME_FASTCALL
56 %define NAME_FASTCALL(a_Name, a_cbArgs, a_Prefix) a_Prefix %+ a_Name %+ @ %+ a_cbArgs
57 %endif
58%endif
59
60;;
61; BEGINPROC for fastcall functions.
62;
63; @param 1 The function name (C).
64; @param 2 The argument size on x86.
65;
66%macro BEGINPROC_FASTCALL 2
67 %ifdef ASM_FORMAT_PE
68 export %1=NAME_FASTCALL(%1,%2,$@)
69 %endif
70 %ifdef __NASM__
71 %ifdef ASM_FORMAT_OMF
72 export NAME(%1) NAME_FASTCALL(%1,%2,$@)
73 %endif
74 %endif
75 %ifndef ASM_FORMAT_BIN
76 global NAME_FASTCALL(%1,%2,$@)
77 %endif
78NAME_FASTCALL(%1,%2,@):
79%endmacro
80
81
82;
83; We employ some macro assembly here to hid the calling convention differences.
84;
85%ifdef RT_ARCH_AMD64
86 %macro PROLOGUE_1_ARGS 0
87 %endmacro
88 %macro EPILOGUE_1_ARGS 0
89 ret
90 %endmacro
91 %macro EPILOGUE_1_ARGS_EX 0
92 ret
93 %endmacro
94
95 %macro PROLOGUE_2_ARGS 0
96 %endmacro
97 %macro EPILOGUE_2_ARGS 0
98 ret
99 %endmacro
100 %macro EPILOGUE_2_ARGS_EX 1
101 ret
102 %endmacro
103
104 %macro PROLOGUE_3_ARGS 0
105 %endmacro
106 %macro EPILOGUE_3_ARGS 0
107 ret
108 %endmacro
109 %macro EPILOGUE_3_ARGS_EX 1
110 ret
111 %endmacro
112
113 %macro PROLOGUE_4_ARGS 0
114 %endmacro
115 %macro EPILOGUE_4_ARGS 0
116 ret
117 %endmacro
118 %macro EPILOGUE_4_ARGS_EX 1
119 ret
120 %endmacro
121
122 %ifdef ASM_CALL64_GCC
123 %define A0 rdi
124 %define A0_32 edi
125 %define A0_16 di
126 %define A0_8 dil
127
128 %define A1 rsi
129 %define A1_32 esi
130 %define A1_16 si
131 %define A1_8 sil
132
133 %define A2 rdx
134 %define A2_32 edx
135 %define A2_16 dx
136 %define A2_8 dl
137
138 %define A3 rcx
139 %define A3_32 ecx
140 %define A3_16 cx
141 %endif
142
143 %ifdef ASM_CALL64_MSC
144 %define A0 rcx
145 %define A0_32 ecx
146 %define A0_16 cx
147 %define A0_8 cl
148
149 %define A1 rdx
150 %define A1_32 edx
151 %define A1_16 dx
152 %define A1_8 dl
153
154 %define A2 r8
155 %define A2_32 r8d
156 %define A2_16 r8w
157 %define A2_8 r8b
158
159 %define A3 r9
160 %define A3_32 r9d
161 %define A3_16 r9w
162 %endif
163
164 %define T0 rax
165 %define T0_32 eax
166 %define T0_16 ax
167 %define T0_8 al
168
169 %define T1 r11
170 %define T1_32 r11d
171 %define T1_16 r11w
172 %define T1_8 r11b
173
174%else
175 ; x86
176 %macro PROLOGUE_1_ARGS 0
177 push edi
178 %endmacro
179 %macro EPILOGUE_1_ARGS 0
180 pop edi
181 ret 0
182 %endmacro
183 %macro EPILOGUE_1_ARGS_EX 1
184 pop edi
185 ret %1
186 %endmacro
187
188 %macro PROLOGUE_2_ARGS 0
189 push edi
190 %endmacro
191 %macro EPILOGUE_2_ARGS 0
192 pop edi
193 ret 0
194 %endmacro
195 %macro EPILOGUE_2_ARGS_EX 1
196 pop edi
197 ret %1
198 %endmacro
199
200 %macro PROLOGUE_3_ARGS 0
201 push ebx
202 mov ebx, [esp + 4 + 4]
203 push edi
204 %endmacro
205 %macro EPILOGUE_3_ARGS_EX 1
206 %if (%1) < 4
207 %error "With three args, at least 4 bytes must be remove from the stack upon return (32-bit)."
208 %endif
209 pop edi
210 pop ebx
211 ret %1
212 %endmacro
213 %macro EPILOGUE_3_ARGS 0
214 EPILOGUE_3_ARGS_EX 4
215 %endmacro
216
217 %macro PROLOGUE_4_ARGS 0
218 push ebx
219 push edi
220 push esi
221 mov ebx, [esp + 12 + 4 + 0]
222 mov esi, [esp + 12 + 4 + 4]
223 %endmacro
224 %macro EPILOGUE_4_ARGS_EX 1
225 %if (%1) < 8
226 %error "With four args, at least 8 bytes must be remove from the stack upon return (32-bit)."
227 %endif
228 pop esi
229 pop edi
230 pop ebx
231 ret %1
232 %endmacro
233 %macro EPILOGUE_4_ARGS 0
234 EPILOGUE_4_ARGS_EX 8
235 %endmacro
236
237 %define A0 ecx
238 %define A0_32 ecx
239 %define A0_16 cx
240 %define A0_8 cl
241
242 %define A1 edx
243 %define A1_32 edx
244 %define A1_16 dx
245 %define A1_8 dl
246
247 %define A2 ebx
248 %define A2_32 ebx
249 %define A2_16 bx
250 %define A2_8 bl
251
252 %define A3 esi
253 %define A3_32 esi
254 %define A3_16 si
255
256 %define T0 eax
257 %define T0_32 eax
258 %define T0_16 ax
259 %define T0_8 al
260
261 %define T1 edi
262 %define T1_32 edi
263 %define T1_16 di
264%endif
265
266
267;;
268; Load the relevant flags from [%1] if there are undefined flags (%3).
269;
270; @remarks Clobbers T0, stack. Changes EFLAGS.
271; @param A2 The register pointing to the flags.
272; @param 1 The parameter (A0..A3) pointing to the eflags.
273; @param 2 The set of modified flags.
274; @param 3 The set of undefined flags.
275;
276%macro IEM_MAYBE_LOAD_FLAGS 3
277 ;%if (%3) != 0
278 pushf ; store current flags
279 mov T0_32, [%1] ; load the guest flags
280 and dword [xSP], ~(%2 | %3) ; mask out the modified and undefined flags
281 and T0_32, (%2 | %3) ; select the modified and undefined flags.
282 or [xSP], T0 ; merge guest flags with host flags.
283 popf ; load the mixed flags.
284 ;%endif
285%endmacro
286
287;;
288; Update the flag.
289;
290; @remarks Clobbers T0, T1, stack.
291; @param 1 The register pointing to the EFLAGS.
292; @param 2 The mask of modified flags to save.
293; @param 3 The mask of undefined flags to (maybe) save.
294;
295%macro IEM_SAVE_FLAGS 3
296 %if (%2 | %3) != 0
297 pushf
298 pop T1
299 mov T0_32, [%1] ; flags
300 and T0_32, ~(%2 | %3) ; clear the modified & undefined flags.
301 and T1_32, (%2 | %3) ; select the modified and undefined flags.
302 or T0_32, T1_32 ; combine the flags.
303 mov [%1], T0_32 ; save the flags.
304 %endif
305%endmacro
306
307
308;;
309; Macro for implementing a binary operator.
310;
311; This will generate code for the 8, 16, 32 and 64 bit accesses with locked
312; variants, except on 32-bit system where the 64-bit accesses requires hand
313; coding.
314;
315; All the functions takes a pointer to the destination memory operand in A0,
316; the source register operand in A1 and a pointer to eflags in A2.
317;
318; @param 1 The instruction mnemonic.
319; @param 2 Non-zero if there should be a locked version.
320; @param 3 The modified flags.
321; @param 4 The undefined flags.
322;
323%macro IEMIMPL_BIN_OP 4
324BEGINCODE
325BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 12
326 PROLOGUE_3_ARGS
327 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
328 %1 byte [A0], A1_8
329 IEM_SAVE_FLAGS A2, %3, %4
330 EPILOGUE_3_ARGS
331ENDPROC iemAImpl_ %+ %1 %+ _u8
332
333BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
334 PROLOGUE_3_ARGS
335 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
336 %1 word [A0], A1_16
337 IEM_SAVE_FLAGS A2, %3, %4
338 EPILOGUE_3_ARGS
339ENDPROC iemAImpl_ %+ %1 %+ _u16
340
341BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
342 PROLOGUE_3_ARGS
343 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
344 %1 dword [A0], A1_32
345 IEM_SAVE_FLAGS A2, %3, %4
346 EPILOGUE_3_ARGS
347ENDPROC iemAImpl_ %+ %1 %+ _u32
348
349 %ifdef RT_ARCH_AMD64
350BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
351 PROLOGUE_3_ARGS
352 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
353 %1 qword [A0], A1
354 IEM_SAVE_FLAGS A2, %3, %4
355 EPILOGUE_3_ARGS_EX 8
356ENDPROC iemAImpl_ %+ %1 %+ _u64
357 %else ; stub it for now - later, replace with hand coded stuff.
358BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
359 int3
360 ret
361ENDPROC iemAImpl_ %+ %1 %+ _u64
362 %endif ; !RT_ARCH_AMD64
363
364 %if %2 != 0 ; locked versions requested?
365
366BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8_locked, 12
367 PROLOGUE_3_ARGS
368 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
369 lock %1 byte [A0], A1_8
370 IEM_SAVE_FLAGS A2, %3, %4
371 EPILOGUE_3_ARGS
372ENDPROC iemAImpl_ %+ %1 %+ _u8_locked
373
374BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16_locked, 12
375 PROLOGUE_3_ARGS
376 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
377 lock %1 word [A0], A1_16
378 IEM_SAVE_FLAGS A2, %3, %4
379 EPILOGUE_3_ARGS
380ENDPROC iemAImpl_ %+ %1 %+ _u16_locked
381
382BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32_locked, 12
383 PROLOGUE_3_ARGS
384 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
385 lock %1 dword [A0], A1_32
386 IEM_SAVE_FLAGS A2, %3, %4
387 EPILOGUE_3_ARGS
388ENDPROC iemAImpl_ %+ %1 %+ _u32_locked
389
390 %ifdef RT_ARCH_AMD64
391BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 16
392 PROLOGUE_3_ARGS
393 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
394 lock %1 qword [A0], A1
395 IEM_SAVE_FLAGS A2, %3, %4
396 EPILOGUE_3_ARGS_EX 8
397ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
398 %else ; stub it for now - later, replace with hand coded stuff.
399BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 16
400 int3
401 ret 8
402ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
403 %endif ; !RT_ARCH_AMD64
404 %endif ; locked
405%endmacro
406
407; instr,lock,modified-flags.
408IEMIMPL_BIN_OP add, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
409IEMIMPL_BIN_OP adc, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
410IEMIMPL_BIN_OP sub, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
411IEMIMPL_BIN_OP sbb, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
412IEMIMPL_BIN_OP or, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), X86_EFL_AF
413IEMIMPL_BIN_OP xor, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), X86_EFL_AF
414IEMIMPL_BIN_OP and, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), X86_EFL_AF
415IEMIMPL_BIN_OP cmp, 0, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
416IEMIMPL_BIN_OP test, 0, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), X86_EFL_AF
417
418
419;;
420; Macro for implementing a bit operator.
421;
422; This will generate code for the 16, 32 and 64 bit accesses with locked
423; variants, except on 32-bit system where the 64-bit accesses requires hand
424; coding.
425;
426; All the functions takes a pointer to the destination memory operand in A0,
427; the source register operand in A1 and a pointer to eflags in A2.
428;
429; @param 1 The instruction mnemonic.
430; @param 2 Non-zero if there should be a locked version.
431; @param 3 The modified flags.
432; @param 4 The undefined flags.
433;
434%macro IEMIMPL_BIT_OP 4
435BEGINCODE
436BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
437 PROLOGUE_3_ARGS
438 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
439 %1 word [A0], A1_16
440 IEM_SAVE_FLAGS A2, %3, %4
441 EPILOGUE_3_ARGS
442ENDPROC iemAImpl_ %+ %1 %+ _u16
443
444BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
445 PROLOGUE_3_ARGS
446 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
447 %1 dword [A0], A1_32
448 IEM_SAVE_FLAGS A2, %3, %4
449 EPILOGUE_3_ARGS
450ENDPROC iemAImpl_ %+ %1 %+ _u32
451
452 %ifdef RT_ARCH_AMD64
453BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
454 PROLOGUE_3_ARGS
455 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
456 %1 qword [A0], A1
457 IEM_SAVE_FLAGS A2, %3, %4
458 EPILOGUE_3_ARGS_EX 8
459ENDPROC iemAImpl_ %+ %1 %+ _u64
460 %else ; stub it for now - later, replace with hand coded stuff.
461BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
462 int3
463 ret 8
464ENDPROC iemAImpl_ %+ %1 %+ _u64
465 %endif ; !RT_ARCH_AMD64
466
467 %if %2 != 0 ; locked versions requested?
468
469BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16_locked, 12
470 PROLOGUE_3_ARGS
471 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
472 lock %1 word [A0], A1_16
473 IEM_SAVE_FLAGS A2, %3, %4
474 EPILOGUE_3_ARGS
475ENDPROC iemAImpl_ %+ %1 %+ _u16_locked
476
477BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32_locked, 12
478 PROLOGUE_3_ARGS
479 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
480 lock %1 dword [A0], A1_32
481 IEM_SAVE_FLAGS A2, %3, %4
482 EPILOGUE_3_ARGS
483ENDPROC iemAImpl_ %+ %1 %+ _u32_locked
484
485 %ifdef RT_ARCH_AMD64
486BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 16
487 PROLOGUE_3_ARGS
488 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
489 lock %1 qword [A0], A1
490 IEM_SAVE_FLAGS A2, %3, %4
491 EPILOGUE_3_ARGS_EX 8
492ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
493 %else ; stub it for now - later, replace with hand coded stuff.
494BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 16
495 int3
496 ret 8
497ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
498 %endif ; !RT_ARCH_AMD64
499 %endif ; locked
500%endmacro
501IEMIMPL_BIT_OP bt, 0, (X86_EFL_CF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
502IEMIMPL_BIT_OP btc, 1, (X86_EFL_CF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
503IEMIMPL_BIT_OP bts, 1, (X86_EFL_CF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
504IEMIMPL_BIT_OP btr, 1, (X86_EFL_CF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
505
506;;
507; Macro for implementing a bit search operator.
508;
509; This will generate code for the 16, 32 and 64 bit accesses, except on 32-bit
510; system where the 64-bit accesses requires hand coding.
511;
512; All the functions takes a pointer to the destination memory operand in A0,
513; the source register operand in A1 and a pointer to eflags in A2.
514;
515; @param 1 The instruction mnemonic.
516; @param 2 The modified flags.
517; @param 3 The undefined flags.
518;
519%macro IEMIMPL_BIT_OP 3
520BEGINCODE
521BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
522 PROLOGUE_3_ARGS
523 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
524 %1 T0_16, A1_16
525 jz .unchanged_dst
526 mov [A0], T0_16
527.unchanged_dst:
528 IEM_SAVE_FLAGS A2, %2, %3
529 EPILOGUE_3_ARGS
530ENDPROC iemAImpl_ %+ %1 %+ _u16
531
532BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
533 PROLOGUE_3_ARGS
534 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
535 %1 T0_32, A1_32
536 jz .unchanged_dst
537 mov [A0], T0_32
538.unchanged_dst:
539 IEM_SAVE_FLAGS A2, %2, %3
540 EPILOGUE_3_ARGS
541ENDPROC iemAImpl_ %+ %1 %+ _u32
542
543 %ifdef RT_ARCH_AMD64
544BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
545 PROLOGUE_3_ARGS
546 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
547 %1 T0, A1
548 jz .unchanged_dst
549 mov [A0], T0
550.unchanged_dst:
551 IEM_SAVE_FLAGS A2, %2, %3
552 EPILOGUE_3_ARGS_EX 8
553ENDPROC iemAImpl_ %+ %1 %+ _u64
554 %else ; stub it for now - later, replace with hand coded stuff.
555BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
556 int3
557 ret 8
558ENDPROC iemAImpl_ %+ %1 %+ _u64
559 %endif ; !RT_ARCH_AMD64
560%endmacro
561IEMIMPL_BIT_OP bsf, (X86_EFL_ZF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF)
562IEMIMPL_BIT_OP bsr, (X86_EFL_ZF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF)
563
564
565;
566; IMUL is also a similar but yet different case (no lock, no mem dst).
567; The rDX:rAX variant of imul is handled together with mul further down.
568;
569BEGINCODE
570BEGINPROC_FASTCALL iemAImpl_imul_two_u16, 12
571 PROLOGUE_3_ARGS
572 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
573 imul A1_16, word [A0]
574 mov [A0], A1_16
575 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
576 EPILOGUE_3_ARGS
577ENDPROC iemAImpl_imul_two_u16
578
579BEGINPROC_FASTCALL iemAImpl_imul_two_u32, 12
580 PROLOGUE_3_ARGS
581 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
582 imul A1_32, dword [A0]
583 mov [A0], A1_32
584 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
585 EPILOGUE_3_ARGS
586ENDPROC iemAImpl_imul_two_u32
587
588BEGINPROC_FASTCALL iemAImpl_imul_two_u64, 16
589 PROLOGUE_3_ARGS
590%ifdef RT_ARCH_AMD64
591 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
592 imul A1, qword [A0]
593 mov [A0], A1
594 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
595%else
596 int3 ;; @todo implement me
597%endif
598 EPILOGUE_3_ARGS_EX 8
599ENDPROC iemAImpl_imul_two_u64
600
601
602;
603; XCHG for memory operands. This implies locking. No flag changes.
604;
605; Each function takes two arguments, first the pointer to the memory,
606; then the pointer to the register. They all return void.
607;
608BEGINCODE
609BEGINPROC_FASTCALL iemAImpl_xchg_u8, 8
610 PROLOGUE_2_ARGS
611 mov T0_8, [A1]
612 xchg [A0], T0_8
613 mov [A1], T0_8
614 EPILOGUE_2_ARGS
615ENDPROC iemAImpl_xchg_u8
616
617BEGINPROC_FASTCALL iemAImpl_xchg_u16, 8
618 PROLOGUE_2_ARGS
619 mov T0_16, [A1]
620 xchg [A0], T0_16
621 mov [A1], T0_16
622 EPILOGUE_2_ARGS
623ENDPROC iemAImpl_xchg_u16
624
625BEGINPROC_FASTCALL iemAImpl_xchg_u32, 8
626 PROLOGUE_2_ARGS
627 mov T0_32, [A1]
628 xchg [A0], T0_32
629 mov [A1], T0_32
630 EPILOGUE_2_ARGS
631ENDPROC iemAImpl_xchg_u32
632
633BEGINPROC_FASTCALL iemAImpl_xchg_u64, 8
634%ifdef RT_ARCH_AMD64
635 PROLOGUE_2_ARGS
636 mov T0, [A1]
637 xchg [A0], T0
638 mov [A1], T0
639 EPILOGUE_2_ARGS
640%else
641 int3
642 ret 0
643%endif
644ENDPROC iemAImpl_xchg_u64
645
646
647;
648; XADD for memory operands.
649;
650; Each function takes three arguments, first the pointer to the
651; memory/register, then the pointer to the register, and finally a pointer to
652; eflags. They all return void.
653;
654BEGINCODE
655BEGINPROC_FASTCALL iemAImpl_xadd_u8, 12
656 PROLOGUE_3_ARGS
657 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
658 mov T0_8, [A1]
659 xadd [A0], T0_8
660 mov [A1], T0_8
661 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
662 EPILOGUE_3_ARGS
663ENDPROC iemAImpl_xadd_u8
664
665BEGINPROC_FASTCALL iemAImpl_xadd_u16, 12
666 PROLOGUE_3_ARGS
667 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
668 mov T0_16, [A1]
669 xadd [A0], T0_16
670 mov [A1], T0_16
671 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
672 EPILOGUE_3_ARGS
673ENDPROC iemAImpl_xadd_u16
674
675BEGINPROC_FASTCALL iemAImpl_xadd_u32, 12
676 PROLOGUE_3_ARGS
677 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
678 mov T0_32, [A1]
679 xadd [A0], T0_32
680 mov [A1], T0_32
681 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
682 EPILOGUE_3_ARGS
683ENDPROC iemAImpl_xadd_u32
684
685BEGINPROC_FASTCALL iemAImpl_xadd_u64, 12
686%ifdef RT_ARCH_AMD64
687 PROLOGUE_3_ARGS
688 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
689 mov T0, [A1]
690 xadd [A0], T0
691 mov [A1], T0
692 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
693 EPILOGUE_3_ARGS
694%else
695 int3
696 ret 4
697%endif
698ENDPROC iemAImpl_xadd_u64
699
700BEGINPROC_FASTCALL iemAImpl_xadd_u8_locked, 12
701 PROLOGUE_3_ARGS
702 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
703 mov T0_8, [A1]
704 lock xadd [A0], T0_8
705 mov [A1], T0_8
706 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
707 EPILOGUE_3_ARGS
708ENDPROC iemAImpl_xadd_u8_locked
709
710BEGINPROC_FASTCALL iemAImpl_xadd_u16_locked, 12
711 PROLOGUE_3_ARGS
712 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
713 mov T0_16, [A1]
714 lock xadd [A0], T0_16
715 mov [A1], T0_16
716 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
717 EPILOGUE_3_ARGS
718ENDPROC iemAImpl_xadd_u16_locked
719
720BEGINPROC_FASTCALL iemAImpl_xadd_u32_locked, 12
721 PROLOGUE_3_ARGS
722 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
723 mov T0_32, [A1]
724 lock xadd [A0], T0_32
725 mov [A1], T0_32
726 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
727 EPILOGUE_3_ARGS
728ENDPROC iemAImpl_xadd_u32_locked
729
730BEGINPROC_FASTCALL iemAImpl_xadd_u64_locked, 12
731%ifdef RT_ARCH_AMD64
732 PROLOGUE_3_ARGS
733 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
734 mov T0, [A1]
735 lock xadd [A0], T0
736 mov [A1], T0
737 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
738 EPILOGUE_3_ARGS
739%else
740 int3
741 ret 4
742%endif
743ENDPROC iemAImpl_xadd_u64_locked
744
745
746;
747; CMPXCHG8B.
748;
749; These are tricky register wise, so the code is duplicated for each calling
750; convention.
751;
752; WARNING! This code make ASSUMPTIONS about which registers T1 and T0 are mapped to!
753;
754; C-proto:
755; IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg8b,(uint64_t *pu64Dst, PRTUINT64U pu64EaxEdx, PRTUINT64U pu64EbxEcx,
756; uint32_t *pEFlags));
757;
758BEGINCODE
759BEGINPROC_FASTCALL iemAImpl_cmpxchg8b, 16
760%ifdef RT_ARCH_AMD64
761 %ifdef ASM_CALL64_MSC
762 push rbx
763
764 mov r11, rdx ; pu64EaxEdx (is also T1)
765 mov r10, rcx ; pu64Dst
766
767 mov ebx, [r8]
768 mov ecx, [r8 + 4]
769 IEM_MAYBE_LOAD_FLAGS r9, (X86_EFL_ZF), 0 ; clobbers T0 (eax)
770 mov eax, [r11]
771 mov edx, [r11 + 4]
772
773 lock cmpxchg8b [r10]
774
775 mov [r11], eax
776 mov [r11 + 4], edx
777 IEM_SAVE_FLAGS r9, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, r11)
778
779 pop rbx
780 ret
781 %else
782 push rbx
783
784 mov r10, rcx ; pEFlags
785 mov r11, rdx ; pu64EbxEcx (is also T1)
786
787 mov ebx, [r11]
788 mov ecx, [r11 + 4]
789 IEM_MAYBE_LOAD_FLAGS r10, (X86_EFL_ZF), 0 ; clobbers T0 (eax)
790 mov eax, [rsi]
791 mov edx, [rsi + 4]
792
793 lock cmpxchg8b [rdi]
794
795 mov [rsi], eax
796 mov [rsi + 4], edx
797 IEM_SAVE_FLAGS r10, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, r11)
798
799 pop rbx
800 ret
801
802 %endif
803%else
804 push esi
805 push edi
806 push ebx
807 push ebp
808
809 mov edi, ecx ; pu64Dst
810 mov esi, edx ; pu64EaxEdx
811 mov ecx, [esp + 16 + 4 + 0] ; pu64EbxEcx
812 mov ebp, [esp + 16 + 4 + 4] ; pEFlags
813
814 mov ebx, [ecx]
815 mov ecx, [ecx + 4]
816 IEM_MAYBE_LOAD_FLAGS ebp, (X86_EFL_ZF), 0 ; clobbers T0 (eax)
817 mov eax, [esi]
818 mov edx, [esi + 4]
819
820 lock cmpxchg8b [edi]
821
822 mov [esi], eax
823 mov [esi + 4], edx
824 IEM_SAVE_FLAGS ebp, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, edi)
825
826 pop ebp
827 pop ebx
828 pop edi
829 pop esi
830 ret 8
831%endif
832ENDPROC iemAImpl_cmpxchg8b
833
834BEGINPROC_FASTCALL iemAImpl_cmpxchg8b_locked, 16
835 ; Lazy bird always lock prefixes cmpxchg8b.
836 jmp NAME_FASTCALL(iemAImpl_cmpxchg8b,16,$@)
837ENDPROC iemAImpl_cmpxchg8b_locked
838
839
840
841;
842; CMPXCHG.
843;
844; WARNING! This code make ASSUMPTIONS about which registers T1 and T0 are mapped to!
845;
846; C-proto:
847; IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg,(uintX_t *puXDst, uintX_t puEax, uintX_t uReg, uint32_t *pEFlags));
848;
849BEGINCODE
850%macro IEMIMPL_CMPXCHG 2
851BEGINPROC_FASTCALL iemAImpl_cmpxchg_u8 %+ %2, 16
852 PROLOGUE_4_ARGS
853 IEM_MAYBE_LOAD_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0 (eax)
854 mov al, [A1]
855 %1 cmpxchg [A0], A2_8
856 mov [A1], al
857 IEM_SAVE_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0+T1 (eax, r11/edi)
858 EPILOGUE_4_ARGS
859ENDPROC iemAImpl_cmpxchg_u8 %+ %2
860
861BEGINPROC_FASTCALL iemAImpl_cmpxchg_u16 %+ %2, 16
862 PROLOGUE_4_ARGS
863 IEM_MAYBE_LOAD_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0 (eax)
864 mov ax, [A1]
865 %1 cmpxchg [A0], A2_16
866 mov [A1], ax
867 IEM_SAVE_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0+T1 (eax, r11/edi)
868 EPILOGUE_4_ARGS
869ENDPROC iemAImpl_cmpxchg_u16 %+ %2
870
871BEGINPROC_FASTCALL iemAImpl_cmpxchg_u32 %+ %2, 16
872 PROLOGUE_4_ARGS
873 IEM_MAYBE_LOAD_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0 (eax)
874 mov eax, [A1]
875 %1 cmpxchg [A0], A2_32
876 mov [A1], eax
877 IEM_SAVE_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0+T1 (eax, r11/edi)
878 EPILOGUE_4_ARGS
879ENDPROC iemAImpl_cmpxchg_u32 %+ %2
880
881BEGINPROC_FASTCALL iemAImpl_cmpxchg_u64 %+ %2, 16
882%ifdef RT_ARCH_AMD64
883 PROLOGUE_4_ARGS
884 IEM_MAYBE_LOAD_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0 (eax)
885 mov rax, [A1]
886 %1 cmpxchg [A0], A2
887 mov [A1], rax
888 IEM_SAVE_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0+T1 (eax, r11/edi)
889 EPILOGUE_4_ARGS
890%else
891 ;
892 ; Must use cmpxchg8b here. See also iemAImpl_cmpxchg8b.
893 ;
894 push esi
895 push edi
896 push ebx
897 push ebp
898
899 mov edi, ecx ; pu64Dst
900 mov esi, edx ; pu64Rax
901 mov ecx, [esp + 16 + 4 + 0] ; pu64Reg - Note! Pointer on 32-bit hosts!
902 mov ebp, [esp + 16 + 4 + 4] ; pEFlags
903
904 mov ebx, [ecx]
905 mov ecx, [ecx + 4]
906 IEM_MAYBE_LOAD_FLAGS ebp, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0 (eax)
907 mov eax, [esi]
908 mov edx, [esi + 4]
909
910 lock cmpxchg8b [edi]
911
912 ; cmpxchg8b doesn't set CF, PF, AF, SF and OF, so we have to do that.
913 jz .cmpxchg8b_not_equal
914 cmp eax, eax ; just set the other flags.
915.store:
916 mov [esi], eax
917 mov [esi + 4], edx
918 IEM_SAVE_FLAGS ebp, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0+T1 (eax, edi)
919
920 pop ebp
921 pop ebx
922 pop edi
923 pop esi
924 ret 8
925
926.cmpxchg8b_not_equal:
927 cmp [esi + 4], edx ;; @todo FIXME - verify 64-bit compare implementation
928 jne .store
929 cmp [esi], eax
930 jmp .store
931
932%endif
933ENDPROC iemAImpl_cmpxchg_u64 %+ %2
934%endmacro ; IEMIMPL_CMPXCHG
935
936IEMIMPL_CMPXCHG , ,
937IEMIMPL_CMPXCHG lock, _locked
938
939;;
940; Macro for implementing a unary operator.
941;
942; This will generate code for the 8, 16, 32 and 64 bit accesses with locked
943; variants, except on 32-bit system where the 64-bit accesses requires hand
944; coding.
945;
946; All the functions takes a pointer to the destination memory operand in A0,
947; the source register operand in A1 and a pointer to eflags in A2.
948;
949; @param 1 The instruction mnemonic.
950; @param 2 The modified flags.
951; @param 3 The undefined flags.
952;
953%macro IEMIMPL_UNARY_OP 3
954BEGINCODE
955BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 8
956 PROLOGUE_2_ARGS
957 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
958 %1 byte [A0]
959 IEM_SAVE_FLAGS A1, %2, %3
960 EPILOGUE_2_ARGS
961ENDPROC iemAImpl_ %+ %1 %+ _u8
962
963BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8_locked, 8
964 PROLOGUE_2_ARGS
965 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
966 lock %1 byte [A0]
967 IEM_SAVE_FLAGS A1, %2, %3
968 EPILOGUE_2_ARGS
969ENDPROC iemAImpl_ %+ %1 %+ _u8_locked
970
971BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 8
972 PROLOGUE_2_ARGS
973 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
974 %1 word [A0]
975 IEM_SAVE_FLAGS A1, %2, %3
976 EPILOGUE_2_ARGS
977ENDPROC iemAImpl_ %+ %1 %+ _u16
978
979BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16_locked, 8
980 PROLOGUE_2_ARGS
981 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
982 lock %1 word [A0]
983 IEM_SAVE_FLAGS A1, %2, %3
984 EPILOGUE_2_ARGS
985ENDPROC iemAImpl_ %+ %1 %+ _u16_locked
986
987BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 8
988 PROLOGUE_2_ARGS
989 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
990 %1 dword [A0]
991 IEM_SAVE_FLAGS A1, %2, %3
992 EPILOGUE_2_ARGS
993ENDPROC iemAImpl_ %+ %1 %+ _u32
994
995BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32_locked, 8
996 PROLOGUE_2_ARGS
997 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
998 lock %1 dword [A0]
999 IEM_SAVE_FLAGS A1, %2, %3
1000 EPILOGUE_2_ARGS
1001ENDPROC iemAImpl_ %+ %1 %+ _u32_locked
1002
1003 %ifdef RT_ARCH_AMD64
1004BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 8
1005 PROLOGUE_2_ARGS
1006 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1007 %1 qword [A0]
1008 IEM_SAVE_FLAGS A1, %2, %3
1009 EPILOGUE_2_ARGS
1010ENDPROC iemAImpl_ %+ %1 %+ _u64
1011
1012BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 8
1013 PROLOGUE_2_ARGS
1014 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1015 lock %1 qword [A0]
1016 IEM_SAVE_FLAGS A1, %2, %3
1017 EPILOGUE_2_ARGS
1018ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
1019 %else
1020 ; stub them for now.
1021BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 8
1022 int3
1023 ret 0
1024ENDPROC iemAImpl_ %+ %1 %+ _u64
1025BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 8
1026 int3
1027 ret 0
1028ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
1029 %endif
1030
1031%endmacro
1032
1033IEMIMPL_UNARY_OP inc, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF), 0
1034IEMIMPL_UNARY_OP dec, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF), 0
1035IEMIMPL_UNARY_OP neg, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
1036IEMIMPL_UNARY_OP not, 0, 0
1037
1038
1039;;
1040; Macro for implementing memory fence operation.
1041;
1042; No return value, no operands or anything.
1043;
1044; @param 1 The instruction.
1045;
1046%macro IEMIMPL_MEM_FENCE 1
1047BEGINCODE
1048BEGINPROC_FASTCALL iemAImpl_ %+ %1, 0
1049 %1
1050 ret
1051ENDPROC iemAImpl_ %+ %1
1052%endmacro
1053
1054IEMIMPL_MEM_FENCE lfence
1055IEMIMPL_MEM_FENCE sfence
1056IEMIMPL_MEM_FENCE mfence
1057
1058;;
1059; Alternative for non-SSE2 host.
1060;
1061BEGINPROC_FASTCALL iemAImpl_alt_mem_fence, 0
1062 push xAX
1063 xchg xAX, [xSP]
1064 add xSP, xCB
1065 ret
1066ENDPROC iemAImpl_alt_mem_fence
1067
1068
1069
1070;;
1071; Macro for implementing a shift operation.
1072;
1073; This will generate code for the 8, 16, 32 and 64 bit accesses, except on
1074; 32-bit system where the 64-bit accesses requires hand coding.
1075;
1076; All the functions takes a pointer to the destination memory operand in A0,
1077; the shift count in A1 and a pointer to eflags in A2.
1078;
1079; @param 1 The instruction mnemonic.
1080; @param 2 The modified flags.
1081; @param 3 The undefined flags.
1082;
1083; Makes ASSUMPTIONS about A0, A1 and A2 assignments.
1084;
1085%macro IEMIMPL_SHIFT_OP 3
1086BEGINCODE
1087BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 12
1088 PROLOGUE_3_ARGS
1089 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1090 %ifdef ASM_CALL64_GCC
1091 mov cl, A1_8
1092 %1 byte [A0], cl
1093 %else
1094 xchg A1, A0
1095 %1 byte [A1], cl
1096 %endif
1097 IEM_SAVE_FLAGS A2, %2, %3
1098 EPILOGUE_3_ARGS
1099ENDPROC iemAImpl_ %+ %1 %+ _u8
1100
1101BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
1102 PROLOGUE_3_ARGS
1103 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1104 %ifdef ASM_CALL64_GCC
1105 mov cl, A1_8
1106 %1 word [A0], cl
1107 %else
1108 xchg A1, A0
1109 %1 word [A1], cl
1110 %endif
1111 IEM_SAVE_FLAGS A2, %2, %3
1112 EPILOGUE_3_ARGS
1113ENDPROC iemAImpl_ %+ %1 %+ _u16
1114
1115BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
1116 PROLOGUE_3_ARGS
1117 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1118 %ifdef ASM_CALL64_GCC
1119 mov cl, A1_8
1120 %1 dword [A0], cl
1121 %else
1122 xchg A1, A0
1123 %1 dword [A1], cl
1124 %endif
1125 IEM_SAVE_FLAGS A2, %2, %3
1126 EPILOGUE_3_ARGS
1127ENDPROC iemAImpl_ %+ %1 %+ _u32
1128
1129 %ifdef RT_ARCH_AMD64
1130BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 12
1131 PROLOGUE_3_ARGS
1132 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1133 %ifdef ASM_CALL64_GCC
1134 mov cl, A1_8
1135 %1 qword [A0], cl
1136 %else
1137 xchg A1, A0
1138 %1 qword [A1], cl
1139 %endif
1140 IEM_SAVE_FLAGS A2, %2, %3
1141 EPILOGUE_3_ARGS
1142ENDPROC iemAImpl_ %+ %1 %+ _u64
1143 %else ; stub it for now - later, replace with hand coded stuff.
1144BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 12
1145 int3
1146 ret 4
1147ENDPROC iemAImpl_ %+ %1 %+ _u64
1148 %endif ; !RT_ARCH_AMD64
1149
1150%endmacro
1151
1152IEMIMPL_SHIFT_OP rol, (X86_EFL_OF | X86_EFL_CF), 0
1153IEMIMPL_SHIFT_OP ror, (X86_EFL_OF | X86_EFL_CF), 0
1154IEMIMPL_SHIFT_OP rcl, (X86_EFL_OF | X86_EFL_CF), 0
1155IEMIMPL_SHIFT_OP rcr, (X86_EFL_OF | X86_EFL_CF), 0
1156IEMIMPL_SHIFT_OP shl, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF)
1157IEMIMPL_SHIFT_OP shr, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF)
1158IEMIMPL_SHIFT_OP sar, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF)
1159
1160
1161;;
1162; Macro for implementing a double precision shift operation.
1163;
1164; This will generate code for the 16, 32 and 64 bit accesses, except on
1165; 32-bit system where the 64-bit accesses requires hand coding.
1166;
1167; The functions takes the destination operand (r/m) in A0, the source (reg) in
1168; A1, the shift count in A2 and a pointer to the eflags variable/register in A3.
1169;
1170; @param 1 The instruction mnemonic.
1171; @param 2 The modified flags.
1172; @param 3 The undefined flags.
1173;
1174; Makes ASSUMPTIONS about A0, A1, A2 and A3 assignments.
1175;
1176%macro IEMIMPL_SHIFT_DBL_OP 3
1177BEGINCODE
1178BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 16
1179 PROLOGUE_4_ARGS
1180 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1181 %ifdef ASM_CALL64_GCC
1182 xchg A3, A2
1183 %1 [A0], A1_16, cl
1184 xchg A3, A2
1185 %else
1186 xchg A0, A2
1187 %1 [A2], A1_16, cl
1188 %endif
1189 IEM_SAVE_FLAGS A3, %2, %3
1190 EPILOGUE_4_ARGS
1191ENDPROC iemAImpl_ %+ %1 %+ _u16
1192
1193BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 16
1194 PROLOGUE_4_ARGS
1195 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1196 %ifdef ASM_CALL64_GCC
1197 xchg A3, A2
1198 %1 [A0], A1_32, cl
1199 xchg A3, A2
1200 %else
1201 xchg A0, A2
1202 %1 [A2], A1_32, cl
1203 %endif
1204 IEM_SAVE_FLAGS A3, %2, %3
1205 EPILOGUE_4_ARGS
1206ENDPROC iemAImpl_ %+ %1 %+ _u32
1207
1208 %ifdef RT_ARCH_AMD64
1209BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
1210 PROLOGUE_4_ARGS
1211 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1212 %ifdef ASM_CALL64_GCC
1213 xchg A3, A2
1214 %1 [A0], A1, cl
1215 xchg A3, A2
1216 %else
1217 xchg A0, A2
1218 %1 [A2], A1, cl
1219 %endif
1220 IEM_SAVE_FLAGS A3, %2, %3
1221 EPILOGUE_4_ARGS_EX 12
1222ENDPROC iemAImpl_ %+ %1 %+ _u64
1223 %else ; stub it for now - later, replace with hand coded stuff.
1224BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
1225 int3
1226 ret 12
1227ENDPROC iemAImpl_ %+ %1 %+ _u64
1228 %endif ; !RT_ARCH_AMD64
1229
1230%endmacro
1231
1232IEMIMPL_SHIFT_DBL_OP shld, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF)
1233IEMIMPL_SHIFT_DBL_OP shrd, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF)
1234
1235
1236;;
1237; Macro for implementing a multiplication operations.
1238;
1239; This will generate code for the 8, 16, 32 and 64 bit accesses, except on
1240; 32-bit system where the 64-bit accesses requires hand coding.
1241;
1242; The 8-bit function only operates on AX, so it takes no DX pointer. The other
1243; functions takes a pointer to rAX in A0, rDX in A1, the operand in A2 and a
1244; pointer to eflags in A3.
1245;
1246; The functions all return 0 so the caller can be used for div/idiv as well as
1247; for the mul/imul implementation.
1248;
1249; @param 1 The instruction mnemonic.
1250; @param 2 The modified flags.
1251; @param 3 The undefined flags.
1252;
1253; Makes ASSUMPTIONS about A0, A1, A2, A3, T0 and T1 assignments.
1254;
1255%macro IEMIMPL_MUL_OP 3
1256BEGINCODE
1257BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 12
1258 PROLOGUE_3_ARGS
1259 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1260 mov al, [A0]
1261 %1 A1_8
1262 mov [A0], ax
1263 IEM_SAVE_FLAGS A2, %2, %3
1264 xor eax, eax
1265 EPILOGUE_3_ARGS
1266ENDPROC iemAImpl_ %+ %1 %+ _u8
1267
1268BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 16
1269 PROLOGUE_4_ARGS
1270 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1271 mov ax, [A0]
1272 %ifdef ASM_CALL64_GCC
1273 %1 A2_16
1274 mov [A0], ax
1275 mov [A1], dx
1276 %else
1277 mov T1, A1
1278 %1 A2_16
1279 mov [A0], ax
1280 mov [T1], dx
1281 %endif
1282 IEM_SAVE_FLAGS A3, %2, %3
1283 xor eax, eax
1284 EPILOGUE_4_ARGS
1285ENDPROC iemAImpl_ %+ %1 %+ _u16
1286
1287BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 16
1288 PROLOGUE_4_ARGS
1289 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1290 mov eax, [A0]
1291 %ifdef ASM_CALL64_GCC
1292 %1 A2_32
1293 mov [A0], eax
1294 mov [A1], edx
1295 %else
1296 mov T1, A1
1297 %1 A2_32
1298 mov [A0], eax
1299 mov [T1], edx
1300 %endif
1301 IEM_SAVE_FLAGS A3, %2, %3
1302 xor eax, eax
1303 EPILOGUE_4_ARGS
1304ENDPROC iemAImpl_ %+ %1 %+ _u32
1305
1306 %ifdef RT_ARCH_AMD64 ; The 32-bit host version lives in IEMAllAImplC.cpp.
1307BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
1308 PROLOGUE_4_ARGS
1309 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1310 mov rax, [A0]
1311 %ifdef ASM_CALL64_GCC
1312 %1 A2
1313 mov [A0], rax
1314 mov [A1], rdx
1315 %else
1316 mov T1, A1
1317 %1 A2
1318 mov [A0], rax
1319 mov [T1], rdx
1320 %endif
1321 IEM_SAVE_FLAGS A3, %2, %3
1322 xor eax, eax
1323 EPILOGUE_4_ARGS_EX 12
1324ENDPROC iemAImpl_ %+ %1 %+ _u64
1325 %endif ; !RT_ARCH_AMD64
1326
1327%endmacro
1328
1329IEMIMPL_MUL_OP mul, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
1330IEMIMPL_MUL_OP imul, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
1331
1332
1333BEGINCODE
1334;;
1335; Worker function for negating a 32-bit number in T1:T0
1336; @uses None (T0,T1)
1337iemAImpl_negate_T0_T1_u32:
1338 push 0
1339 push 0
1340 xchg T0_32, [xSP]
1341 xchg T1_32, [xSP + xCB]
1342 sub T0_32, [xSP]
1343 sbb T1_32, [xSP + xCB]
1344 add xSP, xCB*2
1345 ret
1346
1347%ifdef RT_ARCH_AMD64
1348;;
1349; Worker function for negating a 64-bit number in T1:T0
1350; @uses None (T0,T1)
1351iemAImpl_negate_T0_T1_u64:
1352 push 0
1353 push 0
1354 xchg T0, [xSP]
1355 xchg T1, [xSP + xCB]
1356 sub T0, [xSP]
1357 sbb T1, [xSP + xCB]
1358 add xSP, xCB*2
1359 ret
1360%endif
1361
1362
1363;;
1364; Macro for implementing a division operations.
1365;
1366; This will generate code for the 8, 16, 32 and 64 bit accesses, except on
1367; 32-bit system where the 64-bit accesses requires hand coding.
1368;
1369; The 8-bit function only operates on AX, so it takes no DX pointer. The other
1370; functions takes a pointer to rAX in A0, rDX in A1, the operand in A2 and a
1371; pointer to eflags in A3.
1372;
1373; The functions all return 0 on success and -1 if a divide error should be
1374; raised by the caller.
1375;
1376; @param 1 The instruction mnemonic.
1377; @param 2 The modified flags.
1378; @param 3 The undefined flags.
1379; @param 4 1 if signed, 0 if unsigned.
1380;
1381; Makes ASSUMPTIONS about A0, A1, A2, A3, T0 and T1 assignments.
1382;
1383%macro IEMIMPL_DIV_OP 4
1384BEGINCODE
1385BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 12
1386 PROLOGUE_3_ARGS
1387
1388 ; div by chainsaw check.
1389 test A1_8, A1_8
1390 jz .div_zero
1391
1392 ; Overflow check - unsigned division is simple to verify, haven't
1393 ; found a simple way to check signed division yet unfortunately.
1394 %if %4 == 0
1395 cmp [A0 + 1], A1_8
1396 jae .div_overflow
1397 %else
1398 mov T0_16, [A0] ; T0 = dividend
1399 mov T1, A1 ; T1 = saved divisor (because of missing T1_8 in 32-bit)
1400 test A1_8, A1_8
1401 js .divisor_negative
1402 test T0_16, T0_16
1403 jns .both_positive
1404 neg T0_16
1405.one_of_each: ; OK range is 2^(result-with - 1) + (divisor - 1).
1406 push T0 ; Start off like unsigned below.
1407 shr T0_16, 7
1408 cmp T0_8, A1_8
1409 pop T0
1410 jb .div_no_overflow
1411 ja .div_overflow
1412 and T0_8, 0x7f ; Special case for covering (divisor - 1).
1413 cmp T0_8, A1_8
1414 jae .div_overflow
1415 jmp .div_no_overflow
1416
1417.divisor_negative:
1418 neg A1_8
1419 test T0_16, T0_16
1420 jns .one_of_each
1421 neg T0_16
1422.both_positive: ; Same as unsigned shifted by sign indicator bit.
1423 shr T0_16, 7
1424 cmp T0_8, A1_8
1425 jae .div_overflow
1426.div_no_overflow:
1427 mov A1, T1 ; restore divisor
1428 %endif
1429
1430 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1431 mov ax, [A0]
1432 %1 A1_8
1433 mov [A0], ax
1434 IEM_SAVE_FLAGS A2, %2, %3
1435 xor eax, eax
1436
1437.return:
1438 EPILOGUE_3_ARGS
1439
1440.div_zero:
1441.div_overflow:
1442 mov eax, -1
1443 jmp .return
1444ENDPROC iemAImpl_ %+ %1 %+ _u8
1445
1446BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 16
1447 PROLOGUE_4_ARGS
1448
1449 ; div by chainsaw check.
1450 test A2_16, A2_16
1451 jz .div_zero
1452
1453 ; Overflow check - unsigned division is simple to verify, haven't
1454 ; found a simple way to check signed division yet unfortunately.
1455 %if %4 == 0
1456 cmp [A1], A2_16
1457 jae .div_overflow
1458 %else
1459 mov T0_16, [A1]
1460 shl T0_32, 16
1461 mov T0_16, [A0] ; T0 = dividend
1462 mov T1, A2 ; T1 = divisor
1463 test T1_16, T1_16
1464 js .divisor_negative
1465 test T0_32, T0_32
1466 jns .both_positive
1467 neg T0_32
1468.one_of_each: ; OK range is 2^(result-with - 1) + (divisor - 1).
1469 push T0 ; Start off like unsigned below.
1470 shr T0_32, 15
1471 cmp T0_16, T1_16
1472 pop T0
1473 jb .div_no_overflow
1474 ja .div_overflow
1475 and T0_16, 0x7fff ; Special case for covering (divisor - 1).
1476 cmp T0_16, T1_16
1477 jae .div_overflow
1478 jmp .div_no_overflow
1479
1480.divisor_negative:
1481 neg T1_16
1482 test T0_32, T0_32
1483 jns .one_of_each
1484 neg T0_32
1485.both_positive: ; Same as unsigned shifted by sign indicator bit.
1486 shr T0_32, 15
1487 cmp T0_16, T1_16
1488 jae .div_overflow
1489.div_no_overflow:
1490 %endif
1491
1492 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1493 %ifdef ASM_CALL64_GCC
1494 mov T1, A2
1495 mov ax, [A0]
1496 mov dx, [A1]
1497 %1 T1_16
1498 mov [A0], ax
1499 mov [A1], dx
1500 %else
1501 mov T1, A1
1502 mov ax, [A0]
1503 mov dx, [T1]
1504 %1 A2_16
1505 mov [A0], ax
1506 mov [T1], dx
1507 %endif
1508 IEM_SAVE_FLAGS A3, %2, %3
1509 xor eax, eax
1510
1511.return:
1512 EPILOGUE_4_ARGS
1513
1514.div_zero:
1515.div_overflow:
1516 mov eax, -1
1517 jmp .return
1518ENDPROC iemAImpl_ %+ %1 %+ _u16
1519
1520BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 16
1521 PROLOGUE_4_ARGS
1522
1523 ; div by chainsaw check.
1524 test A2_32, A2_32
1525 jz .div_zero
1526
1527 ; Overflow check - unsigned division is simple to verify, haven't
1528 ; found a simple way to check signed division yet unfortunately.
1529 %if %4 == 0
1530 cmp [A1], A2_32
1531 jae .div_overflow
1532 %else
1533 push A2 ; save A2 so we modify it (we out of regs on x86).
1534 mov T0_32, [A0] ; T0 = dividend low
1535 mov T1_32, [A1] ; T1 = dividend high
1536 test A2_32, A2_32
1537 js .divisor_negative
1538 test T1_32, T1_32
1539 jns .both_positive
1540 call iemAImpl_negate_T0_T1_u32
1541.one_of_each: ; OK range is 2^(result-with - 1) + (divisor - 1).
1542 push T0 ; Start off like unsigned below.
1543 shl T1_32, 1
1544 shr T0_32, 31
1545 or T1_32, T0_32
1546 cmp T1_32, A2_32
1547 pop T0
1548 jb .div_no_overflow
1549 ja .div_overflow
1550 and T0_32, 0x7fffffff ; Special case for covering (divisor - 1).
1551 cmp T0_32, A2_32
1552 jae .div_overflow
1553 jmp .div_no_overflow
1554
1555.divisor_negative:
1556 neg A2_32
1557 test T1_32, T1_32
1558 jns .one_of_each
1559 call iemAImpl_negate_T0_T1_u32
1560.both_positive: ; Same as unsigned shifted by sign indicator bit.
1561 shl T1_32, 1
1562 shr T0_32, 31
1563 or T1_32, T0_32
1564 cmp T1_32, A2_32
1565 jae .div_overflow
1566.div_no_overflow:
1567 pop A2
1568 %endif
1569
1570 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1571 mov eax, [A0]
1572 %ifdef ASM_CALL64_GCC
1573 mov T1, A2
1574 mov eax, [A0]
1575 mov edx, [A1]
1576 %1 T1_32
1577 mov [A0], eax
1578 mov [A1], edx
1579 %else
1580 mov T1, A1
1581 mov eax, [A0]
1582 mov edx, [T1]
1583 %1 A2_32
1584 mov [A0], eax
1585 mov [T1], edx
1586 %endif
1587 IEM_SAVE_FLAGS A3, %2, %3
1588 xor eax, eax
1589
1590.return:
1591 EPILOGUE_4_ARGS
1592
1593.div_overflow:
1594 %if %4 != 0
1595 pop A2
1596 %endif
1597.div_zero:
1598 mov eax, -1
1599 jmp .return
1600ENDPROC iemAImpl_ %+ %1 %+ _u32
1601
1602 %ifdef RT_ARCH_AMD64 ; The 32-bit host version lives in IEMAllAImplC.cpp.
1603BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
1604 PROLOGUE_4_ARGS
1605
1606 test A2, A2
1607 jz .div_zero
1608 %if %4 == 0
1609 cmp [A1], A2
1610 jae .div_overflow
1611 %else
1612 push A2 ; save A2 so we modify it (we out of regs on x86).
1613 mov T0, [A0] ; T0 = dividend low
1614 mov T1, [A1] ; T1 = dividend high
1615 test A2, A2
1616 js .divisor_negative
1617 test T1, T1
1618 jns .both_positive
1619 call iemAImpl_negate_T0_T1_u64
1620.one_of_each: ; OK range is 2^(result-with - 1) + (divisor - 1).
1621 push T0 ; Start off like unsigned below.
1622 shl T1, 1
1623 shr T0, 63
1624 or T1, T0
1625 cmp T1, A2
1626 pop T0
1627 jb .div_no_overflow
1628 ja .div_overflow
1629 mov T1, 0x7fffffffffffffff
1630 and T0, T1 ; Special case for covering (divisor - 1).
1631 cmp T0, A2
1632 jae .div_overflow
1633 jmp .div_no_overflow
1634
1635.divisor_negative:
1636 neg A2
1637 test T1, T1
1638 jns .one_of_each
1639 call iemAImpl_negate_T0_T1_u64
1640.both_positive: ; Same as unsigned shifted by sign indicator bit.
1641 shl T1, 1
1642 shr T0, 63
1643 or T1, T0
1644 cmp T1, A2
1645 jae .div_overflow
1646.div_no_overflow:
1647 pop A2
1648 %endif
1649
1650 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1651 mov rax, [A0]
1652 %ifdef ASM_CALL64_GCC
1653 mov T1, A2
1654 mov rax, [A0]
1655 mov rdx, [A1]
1656 %1 T1
1657 mov [A0], rax
1658 mov [A1], rdx
1659 %else
1660 mov T1, A1
1661 mov rax, [A0]
1662 mov rdx, [T1]
1663 %1 A2
1664 mov [A0], rax
1665 mov [T1], rdx
1666 %endif
1667 IEM_SAVE_FLAGS A3, %2, %3
1668 xor eax, eax
1669
1670.return:
1671 EPILOGUE_4_ARGS_EX 12
1672
1673.div_overflow:
1674 %if %4 != 0
1675 pop A2
1676 %endif
1677.div_zero:
1678 mov eax, -1
1679 jmp .return
1680ENDPROC iemAImpl_ %+ %1 %+ _u64
1681 %endif ; !RT_ARCH_AMD64
1682
1683%endmacro
1684
1685IEMIMPL_DIV_OP div, 0, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
1686IEMIMPL_DIV_OP idiv, 0, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 1
1687
1688
1689;
1690; BSWAP. No flag changes.
1691;
1692; Each function takes one argument, pointer to the value to bswap
1693; (input/output). They all return void.
1694;
1695BEGINPROC_FASTCALL iemAImpl_bswap_u16, 4
1696 PROLOGUE_1_ARGS
1697 mov T0_32, [A0] ; just in case any of the upper bits are used.
1698 db 66h
1699 bswap T0_32
1700 mov [A0], T0_32
1701 EPILOGUE_1_ARGS
1702ENDPROC iemAImpl_bswap_u16
1703
1704BEGINPROC_FASTCALL iemAImpl_bswap_u32, 4
1705 PROLOGUE_1_ARGS
1706 mov T0_32, [A0]
1707 bswap T0_32
1708 mov [A0], T0_32
1709 EPILOGUE_1_ARGS
1710ENDPROC iemAImpl_bswap_u32
1711
1712BEGINPROC_FASTCALL iemAImpl_bswap_u64, 4
1713%ifdef RT_ARCH_AMD64
1714 PROLOGUE_1_ARGS
1715 mov T0, [A0]
1716 bswap T0
1717 mov [A0], T0
1718 EPILOGUE_1_ARGS
1719%else
1720 PROLOGUE_1_ARGS
1721 mov T0, [A0]
1722 mov T1, [A0 + 4]
1723 bswap T0
1724 bswap T1
1725 mov [A0 + 4], T0
1726 mov [A0], T1
1727 EPILOGUE_1_ARGS
1728%endif
1729ENDPROC iemAImpl_bswap_u64
1730
1731
1732;;
1733; Initialize the FPU for the actual instruction being emulated, this means
1734; loading parts of the guest's control word and status word.
1735;
1736; @uses 24 bytes of stack.
1737; @param 1 Expression giving the address of the FXSTATE of the guest.
1738;
1739%macro FPU_LD_FXSTATE_FCW_AND_SAFE_FSW 1
1740 fnstenv [xSP]
1741
1742 ; FCW - for exception, precision and rounding control.
1743 movzx T0, word [%1 + X86FXSTATE.FCW]
1744 and T0, X86_FCW_MASK_ALL | X86_FCW_PC_MASK | X86_FCW_RC_MASK
1745 mov [xSP + X86FSTENV32P.FCW], T0_16
1746
1747 ; FSW - for undefined C0, C1, C2, and C3.
1748 movzx T1, word [%1 + X86FXSTATE.FSW]
1749 and T1, X86_FSW_C_MASK
1750 movzx T0, word [xSP + X86FSTENV32P.FSW]
1751 and T0, X86_FSW_TOP_MASK
1752 or T0, T1
1753 mov [xSP + X86FSTENV32P.FSW], T0_16
1754
1755 fldenv [xSP]
1756%endmacro
1757
1758
1759;;
1760; Need to move this as well somewhere better?
1761;
1762struc IEMFPURESULT
1763 .r80Result resw 5
1764 .FSW resw 1
1765endstruc
1766
1767
1768;;
1769; Need to move this as well somewhere better?
1770;
1771struc IEMFPURESULTTWO
1772 .r80Result1 resw 5
1773 .FSW resw 1
1774 .r80Result2 resw 5
1775endstruc
1776
1777
1778;
1779;---------------------- 16-bit signed integer operations ----------------------
1780;
1781
1782
1783;;
1784; Converts a 16-bit floating point value to a 80-bit one (fpu register).
1785;
1786; @param A0 FPU context (fxsave).
1787; @param A1 Pointer to a IEMFPURESULT for the output.
1788; @param A2 Pointer to the 16-bit floating point value to convert.
1789;
1790BEGINPROC_FASTCALL iemAImpl_fild_i16_to_r80, 12
1791 PROLOGUE_3_ARGS
1792 sub xSP, 20h
1793
1794 fninit
1795 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1796 fild word [A2]
1797
1798 fnstsw word [A1 + IEMFPURESULT.FSW]
1799 fnclex
1800 fstp tword [A1 + IEMFPURESULT.r80Result]
1801
1802 fninit
1803 add xSP, 20h
1804 EPILOGUE_3_ARGS
1805ENDPROC iemAImpl_fild_i16_to_r80
1806
1807
1808;;
1809; Store a 80-bit floating point value (register) as a 16-bit signed integer (memory).
1810;
1811; @param A0 FPU context (fxsave).
1812; @param A1 Where to return the output FSW.
1813; @param A2 Where to store the 16-bit signed integer value.
1814; @param A3 Pointer to the 80-bit value.
1815;
1816BEGINPROC_FASTCALL iemAImpl_fist_r80_to_i16, 16
1817 PROLOGUE_4_ARGS
1818 sub xSP, 20h
1819
1820 fninit
1821 fld tword [A3]
1822 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1823 fistp word [A2]
1824
1825 fnstsw word [A1]
1826
1827 fninit
1828 add xSP, 20h
1829 EPILOGUE_4_ARGS
1830ENDPROC iemAImpl_fist_r80_to_i16
1831
1832
1833;;
1834; Store a 80-bit floating point value (register) as a 16-bit signed integer
1835; (memory) with truncation.
1836;
1837; @param A0 FPU context (fxsave).
1838; @param A1 Where to return the output FSW.
1839; @param A2 Where to store the 16-bit signed integer value.
1840; @param A3 Pointer to the 80-bit value.
1841;
1842BEGINPROC_FASTCALL iemAImpl_fistt_r80_to_i16, 16
1843 PROLOGUE_4_ARGS
1844 sub xSP, 20h
1845
1846 fninit
1847 fld tword [A3]
1848 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1849 fisttp dword [A2]
1850
1851 fnstsw word [A1]
1852
1853 fninit
1854 add xSP, 20h
1855 EPILOGUE_4_ARGS
1856ENDPROC iemAImpl_fistt_r80_to_i16
1857
1858
1859;;
1860; FPU instruction working on one 80-bit and one 16-bit signed integer value.
1861;
1862; @param 1 The instruction
1863;
1864; @param A0 FPU context (fxsave).
1865; @param A1 Pointer to a IEMFPURESULT for the output.
1866; @param A2 Pointer to the 80-bit value.
1867; @param A3 Pointer to the 16-bit value.
1868;
1869%macro IEMIMPL_FPU_R80_BY_I16 1
1870BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i16, 16
1871 PROLOGUE_4_ARGS
1872 sub xSP, 20h
1873
1874 fninit
1875 fld tword [A2]
1876 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1877 %1 word [A3]
1878
1879 fnstsw word [A1 + IEMFPURESULT.FSW]
1880 fnclex
1881 fstp tword [A1 + IEMFPURESULT.r80Result]
1882
1883 fninit
1884 add xSP, 20h
1885 EPILOGUE_4_ARGS
1886ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i16
1887%endmacro
1888
1889IEMIMPL_FPU_R80_BY_I16 fiadd
1890IEMIMPL_FPU_R80_BY_I16 fimul
1891IEMIMPL_FPU_R80_BY_I16 fisub
1892IEMIMPL_FPU_R80_BY_I16 fisubr
1893IEMIMPL_FPU_R80_BY_I16 fidiv
1894IEMIMPL_FPU_R80_BY_I16 fidivr
1895
1896
1897;;
1898; FPU instruction working on one 80-bit and one 16-bit signed integer value,
1899; only returning FSW.
1900;
1901; @param 1 The instruction
1902;
1903; @param A0 FPU context (fxsave).
1904; @param A1 Where to store the output FSW.
1905; @param A2 Pointer to the 80-bit value.
1906; @param A3 Pointer to the 64-bit value.
1907;
1908%macro IEMIMPL_FPU_R80_BY_I16_FSW 1
1909BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i16, 16
1910 PROLOGUE_4_ARGS
1911 sub xSP, 20h
1912
1913 fninit
1914 fld tword [A2]
1915 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1916 %1 word [A3]
1917
1918 fnstsw word [A1]
1919
1920 fninit
1921 add xSP, 20h
1922 EPILOGUE_4_ARGS
1923ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i16
1924%endmacro
1925
1926IEMIMPL_FPU_R80_BY_I16_FSW ficom
1927
1928
1929
1930;
1931;---------------------- 32-bit signed integer operations ----------------------
1932;
1933
1934
1935;;
1936; Converts a 32-bit floating point value to a 80-bit one (fpu register).
1937;
1938; @param A0 FPU context (fxsave).
1939; @param A1 Pointer to a IEMFPURESULT for the output.
1940; @param A2 Pointer to the 32-bit floating point value to convert.
1941;
1942BEGINPROC_FASTCALL iemAImpl_fild_i32_to_r80, 12
1943 PROLOGUE_3_ARGS
1944 sub xSP, 20h
1945
1946 fninit
1947 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1948 fild dword [A2]
1949
1950 fnstsw word [A1 + IEMFPURESULT.FSW]
1951 fnclex
1952 fstp tword [A1 + IEMFPURESULT.r80Result]
1953
1954 fninit
1955 add xSP, 20h
1956 EPILOGUE_3_ARGS
1957ENDPROC iemAImpl_fild_i32_to_r80
1958
1959
1960;;
1961; Store a 80-bit floating point value (register) as a 32-bit signed integer (memory).
1962;
1963; @param A0 FPU context (fxsave).
1964; @param A1 Where to return the output FSW.
1965; @param A2 Where to store the 32-bit signed integer value.
1966; @param A3 Pointer to the 80-bit value.
1967;
1968BEGINPROC_FASTCALL iemAImpl_fist_r80_to_i32, 16
1969 PROLOGUE_4_ARGS
1970 sub xSP, 20h
1971
1972 fninit
1973 fld tword [A3]
1974 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1975 fistp dword [A2]
1976
1977 fnstsw word [A1]
1978
1979 fninit
1980 add xSP, 20h
1981 EPILOGUE_4_ARGS
1982ENDPROC iemAImpl_fist_r80_to_i32
1983
1984
1985;;
1986; Store a 80-bit floating point value (register) as a 32-bit signed integer
1987; (memory) with truncation.
1988;
1989; @param A0 FPU context (fxsave).
1990; @param A1 Where to return the output FSW.
1991; @param A2 Where to store the 32-bit signed integer value.
1992; @param A3 Pointer to the 80-bit value.
1993;
1994BEGINPROC_FASTCALL iemAImpl_fistt_r80_to_i32, 16
1995 PROLOGUE_4_ARGS
1996 sub xSP, 20h
1997
1998 fninit
1999 fld tword [A3]
2000 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2001 fisttp dword [A2]
2002
2003 fnstsw word [A1]
2004
2005 fninit
2006 add xSP, 20h
2007 EPILOGUE_4_ARGS
2008ENDPROC iemAImpl_fistt_r80_to_i32
2009
2010
2011;;
2012; FPU instruction working on one 80-bit and one 32-bit signed integer value.
2013;
2014; @param 1 The instruction
2015;
2016; @param A0 FPU context (fxsave).
2017; @param A1 Pointer to a IEMFPURESULT for the output.
2018; @param A2 Pointer to the 80-bit value.
2019; @param A3 Pointer to the 32-bit value.
2020;
2021%macro IEMIMPL_FPU_R80_BY_I32 1
2022BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i32, 16
2023 PROLOGUE_4_ARGS
2024 sub xSP, 20h
2025
2026 fninit
2027 fld tword [A2]
2028 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2029 %1 dword [A3]
2030
2031 fnstsw word [A1 + IEMFPURESULT.FSW]
2032 fnclex
2033 fstp tword [A1 + IEMFPURESULT.r80Result]
2034
2035 fninit
2036 add xSP, 20h
2037 EPILOGUE_4_ARGS
2038ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i32
2039%endmacro
2040
2041IEMIMPL_FPU_R80_BY_I32 fiadd
2042IEMIMPL_FPU_R80_BY_I32 fimul
2043IEMIMPL_FPU_R80_BY_I32 fisub
2044IEMIMPL_FPU_R80_BY_I32 fisubr
2045IEMIMPL_FPU_R80_BY_I32 fidiv
2046IEMIMPL_FPU_R80_BY_I32 fidivr
2047
2048
2049;;
2050; FPU instruction working on one 80-bit and one 32-bit signed integer value,
2051; only returning FSW.
2052;
2053; @param 1 The instruction
2054;
2055; @param A0 FPU context (fxsave).
2056; @param A1 Where to store the output FSW.
2057; @param A2 Pointer to the 80-bit value.
2058; @param A3 Pointer to the 64-bit value.
2059;
2060%macro IEMIMPL_FPU_R80_BY_I32_FSW 1
2061BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i32, 16
2062 PROLOGUE_4_ARGS
2063 sub xSP, 20h
2064
2065 fninit
2066 fld tword [A2]
2067 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2068 %1 dword [A3]
2069
2070 fnstsw word [A1]
2071
2072 fninit
2073 add xSP, 20h
2074 EPILOGUE_4_ARGS
2075ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i32
2076%endmacro
2077
2078IEMIMPL_FPU_R80_BY_I32_FSW ficom
2079
2080
2081
2082;
2083;---------------------- 64-bit signed integer operations ----------------------
2084;
2085
2086
2087;;
2088; Converts a 64-bit floating point value to a 80-bit one (fpu register).
2089;
2090; @param A0 FPU context (fxsave).
2091; @param A1 Pointer to a IEMFPURESULT for the output.
2092; @param A2 Pointer to the 64-bit floating point value to convert.
2093;
2094BEGINPROC_FASTCALL iemAImpl_fild_i64_to_r80, 12
2095 PROLOGUE_3_ARGS
2096 sub xSP, 20h
2097
2098 fninit
2099 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2100 fild qword [A2]
2101
2102 fnstsw word [A1 + IEMFPURESULT.FSW]
2103 fnclex
2104 fstp tword [A1 + IEMFPURESULT.r80Result]
2105
2106 fninit
2107 add xSP, 20h
2108 EPILOGUE_3_ARGS
2109ENDPROC iemAImpl_fild_i64_to_r80
2110
2111
2112;;
2113; Store a 80-bit floating point value (register) as a 64-bit signed integer (memory).
2114;
2115; @param A0 FPU context (fxsave).
2116; @param A1 Where to return the output FSW.
2117; @param A2 Where to store the 64-bit signed integer value.
2118; @param A3 Pointer to the 80-bit value.
2119;
2120BEGINPROC_FASTCALL iemAImpl_fist_r80_to_i64, 16
2121 PROLOGUE_4_ARGS
2122 sub xSP, 20h
2123
2124 fninit
2125 fld tword [A3]
2126 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2127 fistp qword [A2]
2128
2129 fnstsw word [A1]
2130
2131 fninit
2132 add xSP, 20h
2133 EPILOGUE_4_ARGS
2134ENDPROC iemAImpl_fist_r80_to_i64
2135
2136
2137;;
2138; Store a 80-bit floating point value (register) as a 64-bit signed integer
2139; (memory) with truncation.
2140;
2141; @param A0 FPU context (fxsave).
2142; @param A1 Where to return the output FSW.
2143; @param A2 Where to store the 64-bit signed integer value.
2144; @param A3 Pointer to the 80-bit value.
2145;
2146BEGINPROC_FASTCALL iemAImpl_fistt_r80_to_i64, 16
2147 PROLOGUE_4_ARGS
2148 sub xSP, 20h
2149
2150 fninit
2151 fld tword [A3]
2152 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2153 fisttp qword [A2]
2154
2155 fnstsw word [A1]
2156
2157 fninit
2158 add xSP, 20h
2159 EPILOGUE_4_ARGS
2160ENDPROC iemAImpl_fistt_r80_to_i64
2161
2162
2163
2164;
2165;---------------------- 32-bit floating point operations ----------------------
2166;
2167
2168;;
2169; Converts a 32-bit floating point value to a 80-bit one (fpu register).
2170;
2171; @param A0 FPU context (fxsave).
2172; @param A1 Pointer to a IEMFPURESULT for the output.
2173; @param A2 Pointer to the 32-bit floating point value to convert.
2174;
2175BEGINPROC_FASTCALL iemAImpl_fld_r32_to_r80, 12
2176 PROLOGUE_3_ARGS
2177 sub xSP, 20h
2178
2179 fninit
2180 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2181 fld dword [A2]
2182
2183 fnstsw word [A1 + IEMFPURESULT.FSW]
2184 fnclex
2185 fstp tword [A1 + IEMFPURESULT.r80Result]
2186
2187 fninit
2188 add xSP, 20h
2189 EPILOGUE_3_ARGS
2190ENDPROC iemAImpl_fld_r32_to_r80
2191
2192
2193;;
2194; Store a 80-bit floating point value (register) as a 32-bit one (memory).
2195;
2196; @param A0 FPU context (fxsave).
2197; @param A1 Where to return the output FSW.
2198; @param A2 Where to store the 32-bit value.
2199; @param A3 Pointer to the 80-bit value.
2200;
2201BEGINPROC_FASTCALL iemAImpl_fst_r80_to_r32, 16
2202 PROLOGUE_4_ARGS
2203 sub xSP, 20h
2204
2205 fninit
2206 fld tword [A3]
2207 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2208 fst dword [A2]
2209
2210 fnstsw word [A1]
2211
2212 fninit
2213 add xSP, 20h
2214 EPILOGUE_4_ARGS
2215ENDPROC iemAImpl_fst_r80_to_r32
2216
2217
2218;;
2219; FPU instruction working on one 80-bit and one 32-bit floating point value.
2220;
2221; @param 1 The instruction
2222;
2223; @param A0 FPU context (fxsave).
2224; @param A1 Pointer to a IEMFPURESULT for the output.
2225; @param A2 Pointer to the 80-bit value.
2226; @param A3 Pointer to the 32-bit value.
2227;
2228%macro IEMIMPL_FPU_R80_BY_R32 1
2229BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r32, 16
2230 PROLOGUE_4_ARGS
2231 sub xSP, 20h
2232
2233 fninit
2234 fld tword [A2]
2235 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2236 %1 dword [A3]
2237
2238 fnstsw word [A1 + IEMFPURESULT.FSW]
2239 fnclex
2240 fstp tword [A1 + IEMFPURESULT.r80Result]
2241
2242 fninit
2243 add xSP, 20h
2244 EPILOGUE_4_ARGS
2245ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r32
2246%endmacro
2247
2248IEMIMPL_FPU_R80_BY_R32 fadd
2249IEMIMPL_FPU_R80_BY_R32 fmul
2250IEMIMPL_FPU_R80_BY_R32 fsub
2251IEMIMPL_FPU_R80_BY_R32 fsubr
2252IEMIMPL_FPU_R80_BY_R32 fdiv
2253IEMIMPL_FPU_R80_BY_R32 fdivr
2254
2255
2256;;
2257; FPU instruction working on one 80-bit and one 32-bit floating point value,
2258; only returning FSW.
2259;
2260; @param 1 The instruction
2261;
2262; @param A0 FPU context (fxsave).
2263; @param A1 Where to store the output FSW.
2264; @param A2 Pointer to the 80-bit value.
2265; @param A3 Pointer to the 64-bit value.
2266;
2267%macro IEMIMPL_FPU_R80_BY_R32_FSW 1
2268BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r32, 16
2269 PROLOGUE_4_ARGS
2270 sub xSP, 20h
2271
2272 fninit
2273 fld tword [A2]
2274 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2275 %1 dword [A3]
2276
2277 fnstsw word [A1]
2278
2279 fninit
2280 add xSP, 20h
2281 EPILOGUE_4_ARGS
2282ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r32
2283%endmacro
2284
2285IEMIMPL_FPU_R80_BY_R32_FSW fcom
2286
2287
2288
2289;
2290;---------------------- 64-bit floating point operations ----------------------
2291;
2292
2293;;
2294; Converts a 64-bit floating point value to a 80-bit one (fpu register).
2295;
2296; @param A0 FPU context (fxsave).
2297; @param A1 Pointer to a IEMFPURESULT for the output.
2298; @param A2 Pointer to the 64-bit floating point value to convert.
2299;
2300BEGINPROC_FASTCALL iemAImpl_fld_r64_to_r80, 12
2301 PROLOGUE_3_ARGS
2302 sub xSP, 20h
2303
2304 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2305 fld qword [A2]
2306
2307 fnstsw word [A1 + IEMFPURESULT.FSW]
2308 fnclex
2309 fstp tword [A1 + IEMFPURESULT.r80Result]
2310
2311 fninit
2312 add xSP, 20h
2313 EPILOGUE_3_ARGS
2314ENDPROC iemAImpl_fld_r64_to_r80
2315
2316
2317;;
2318; Store a 80-bit floating point value (register) as a 64-bit one (memory).
2319;
2320; @param A0 FPU context (fxsave).
2321; @param A1 Where to return the output FSW.
2322; @param A2 Where to store the 64-bit value.
2323; @param A3 Pointer to the 80-bit value.
2324;
2325BEGINPROC_FASTCALL iemAImpl_fst_r80_to_r64, 16
2326 PROLOGUE_4_ARGS
2327 sub xSP, 20h
2328
2329 fninit
2330 fld tword [A3]
2331 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2332 fst qword [A2]
2333
2334 fnstsw word [A1]
2335
2336 fninit
2337 add xSP, 20h
2338 EPILOGUE_4_ARGS
2339ENDPROC iemAImpl_fst_r80_to_r64
2340
2341
2342;;
2343; FPU instruction working on one 80-bit and one 64-bit floating point value.
2344;
2345; @param 1 The instruction
2346;
2347; @param A0 FPU context (fxsave).
2348; @param A1 Pointer to a IEMFPURESULT for the output.
2349; @param A2 Pointer to the 80-bit value.
2350; @param A3 Pointer to the 64-bit value.
2351;
2352%macro IEMIMPL_FPU_R80_BY_R64 1
2353BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r64, 16
2354 PROLOGUE_4_ARGS
2355 sub xSP, 20h
2356
2357 fninit
2358 fld tword [A2]
2359 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2360 %1 qword [A3]
2361
2362 fnstsw word [A1 + IEMFPURESULT.FSW]
2363 fnclex
2364 fstp tword [A1 + IEMFPURESULT.r80Result]
2365
2366 fninit
2367 add xSP, 20h
2368 EPILOGUE_4_ARGS
2369ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r64
2370%endmacro
2371
2372IEMIMPL_FPU_R80_BY_R64 fadd
2373IEMIMPL_FPU_R80_BY_R64 fmul
2374IEMIMPL_FPU_R80_BY_R64 fsub
2375IEMIMPL_FPU_R80_BY_R64 fsubr
2376IEMIMPL_FPU_R80_BY_R64 fdiv
2377IEMIMPL_FPU_R80_BY_R64 fdivr
2378
2379;;
2380; FPU instruction working on one 80-bit and one 64-bit floating point value,
2381; only returning FSW.
2382;
2383; @param 1 The instruction
2384;
2385; @param A0 FPU context (fxsave).
2386; @param A1 Where to store the output FSW.
2387; @param A2 Pointer to the 80-bit value.
2388; @param A3 Pointer to the 64-bit value.
2389;
2390%macro IEMIMPL_FPU_R80_BY_R64_FSW 1
2391BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r64, 16
2392 PROLOGUE_4_ARGS
2393 sub xSP, 20h
2394
2395 fninit
2396 fld tword [A2]
2397 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2398 %1 qword [A3]
2399
2400 fnstsw word [A1]
2401
2402 fninit
2403 add xSP, 20h
2404 EPILOGUE_4_ARGS
2405ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r64
2406%endmacro
2407
2408IEMIMPL_FPU_R80_BY_R64_FSW fcom
2409
2410
2411
2412;
2413;---------------------- 80-bit floating point operations ----------------------
2414;
2415
2416;;
2417; Loads a 80-bit floating point register value from memory.
2418;
2419; @param A0 FPU context (fxsave).
2420; @param A1 Pointer to a IEMFPURESULT for the output.
2421; @param A2 Pointer to the 80-bit floating point value to load.
2422;
2423BEGINPROC_FASTCALL iemAImpl_fld_r80_from_r80, 12
2424 PROLOGUE_3_ARGS
2425 sub xSP, 20h
2426
2427 fninit
2428 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2429 fld tword [A2]
2430
2431 fnstsw word [A1 + IEMFPURESULT.FSW]
2432 fnclex
2433 fstp tword [A1 + IEMFPURESULT.r80Result]
2434
2435 fninit
2436 add xSP, 20h
2437 EPILOGUE_3_ARGS
2438ENDPROC iemAImpl_fld_r80_from_r80
2439
2440
2441;;
2442; Store a 80-bit floating point register to memory
2443;
2444; @param A0 FPU context (fxsave).
2445; @param A1 Where to return the output FSW.
2446; @param A2 Where to store the 80-bit value.
2447; @param A3 Pointer to the 80-bit register value.
2448;
2449BEGINPROC_FASTCALL iemAImpl_fst_r80_to_r80, 16
2450 PROLOGUE_4_ARGS
2451 sub xSP, 20h
2452
2453 fninit
2454 fld tword [A3]
2455 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2456 fstp tword [A2]
2457
2458 fnstsw word [A1]
2459
2460 fninit
2461 add xSP, 20h
2462 EPILOGUE_4_ARGS
2463ENDPROC iemAImpl_fst_r80_to_r80
2464
2465
2466;;
2467; FPU instruction working on two 80-bit floating point values.
2468;
2469; @param 1 The instruction
2470;
2471; @param A0 FPU context (fxsave).
2472; @param A1 Pointer to a IEMFPURESULT for the output.
2473; @param A2 Pointer to the first 80-bit value (ST0)
2474; @param A3 Pointer to the second 80-bit value (STn).
2475;
2476%macro IEMIMPL_FPU_R80_BY_R80 2
2477BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
2478 PROLOGUE_4_ARGS
2479 sub xSP, 20h
2480
2481 fninit
2482 fld tword [A3]
2483 fld tword [A2]
2484 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2485 %1 %2
2486
2487 fnstsw word [A1 + IEMFPURESULT.FSW]
2488 fnclex
2489 fstp tword [A1 + IEMFPURESULT.r80Result]
2490
2491 fninit
2492 add xSP, 20h
2493 EPILOGUE_4_ARGS
2494ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
2495%endmacro
2496
2497IEMIMPL_FPU_R80_BY_R80 fadd, {st0, st1}
2498IEMIMPL_FPU_R80_BY_R80 fmul, {st0, st1}
2499IEMIMPL_FPU_R80_BY_R80 fsub, {st0, st1}
2500IEMIMPL_FPU_R80_BY_R80 fsubr, {st0, st1}
2501IEMIMPL_FPU_R80_BY_R80 fdiv, {st0, st1}
2502IEMIMPL_FPU_R80_BY_R80 fdivr, {st0, st1}
2503IEMIMPL_FPU_R80_BY_R80 fprem, {}
2504IEMIMPL_FPU_R80_BY_R80 fprem1, {}
2505IEMIMPL_FPU_R80_BY_R80 fscale, {}
2506
2507
2508;;
2509; FPU instruction working on two 80-bit floating point values, ST1 and ST0,
2510; storing the result in ST1 and popping the stack.
2511;
2512; @param 1 The instruction
2513;
2514; @param A0 FPU context (fxsave).
2515; @param A1 Pointer to a IEMFPURESULT for the output.
2516; @param A2 Pointer to the first 80-bit value (ST1).
2517; @param A3 Pointer to the second 80-bit value (ST0).
2518;
2519%macro IEMIMPL_FPU_R80_BY_R80_ST1_ST0_POP 1
2520BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
2521 PROLOGUE_4_ARGS
2522 sub xSP, 20h
2523
2524 fninit
2525 fld tword [A2]
2526 fld tword [A3]
2527 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2528 %1
2529
2530 fnstsw word [A1 + IEMFPURESULT.FSW]
2531 fnclex
2532 fstp tword [A1 + IEMFPURESULT.r80Result]
2533
2534 fninit
2535 add xSP, 20h
2536 EPILOGUE_4_ARGS
2537ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
2538%endmacro
2539
2540IEMIMPL_FPU_R80_BY_R80_ST1_ST0_POP fpatan
2541IEMIMPL_FPU_R80_BY_R80_ST1_ST0_POP fyl2xp1
2542
2543
2544;;
2545; FPU instruction working on two 80-bit floating point values, only
2546; returning FSW.
2547;
2548; @param 1 The instruction
2549;
2550; @param A0 FPU context (fxsave).
2551; @param A1 Pointer to a uint16_t for the resulting FSW.
2552; @param A2 Pointer to the first 80-bit value.
2553; @param A3 Pointer to the second 80-bit value.
2554;
2555%macro IEMIMPL_FPU_R80_BY_R80_FSW 1
2556BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
2557 PROLOGUE_4_ARGS
2558 sub xSP, 20h
2559
2560 fninit
2561 fld tword [A3]
2562 fld tword [A2]
2563 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2564 %1 st0, st1
2565
2566 fnstsw word [A1]
2567
2568 fninit
2569 add xSP, 20h
2570 EPILOGUE_4_ARGS
2571ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
2572%endmacro
2573
2574IEMIMPL_FPU_R80_BY_R80_FSW fcom
2575IEMIMPL_FPU_R80_BY_R80_FSW fucom
2576
2577
2578;;
2579; FPU instruction working on two 80-bit floating point values,
2580; returning FSW and EFLAGS (eax).
2581;
2582; @param 1 The instruction
2583;
2584; @returns EFLAGS in EAX.
2585; @param A0 FPU context (fxsave).
2586; @param A1 Pointer to a uint16_t for the resulting FSW.
2587; @param A2 Pointer to the first 80-bit value.
2588; @param A3 Pointer to the second 80-bit value.
2589;
2590%macro IEMIMPL_FPU_R80_BY_R80_EFL 1
2591BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
2592 PROLOGUE_4_ARGS
2593 sub xSP, 20h
2594
2595 fninit
2596 fld tword [A3]
2597 fld tword [A2]
2598 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2599 %1 st1
2600
2601 fnstsw word [A1]
2602 pushf
2603 pop xAX
2604
2605 fninit
2606 add xSP, 20h
2607 EPILOGUE_4_ARGS
2608ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
2609%endmacro
2610
2611IEMIMPL_FPU_R80_BY_R80_EFL fcomi
2612IEMIMPL_FPU_R80_BY_R80_EFL fucomi
2613
2614
2615;;
2616; FPU instruction working on one 80-bit floating point value.
2617;
2618; @param 1 The instruction
2619;
2620; @param A0 FPU context (fxsave).
2621; @param A1 Pointer to a IEMFPURESULT for the output.
2622; @param A2 Pointer to the 80-bit value.
2623;
2624%macro IEMIMPL_FPU_R80 1
2625BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80, 12
2626 PROLOGUE_3_ARGS
2627 sub xSP, 20h
2628
2629 fninit
2630 fld tword [A2]
2631 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2632 %1
2633
2634 fnstsw word [A1 + IEMFPURESULT.FSW]
2635 fnclex
2636 fstp tword [A1 + IEMFPURESULT.r80Result]
2637
2638 fninit
2639 add xSP, 20h
2640 EPILOGUE_3_ARGS
2641ENDPROC iemAImpl_ %+ %1 %+ _r80
2642%endmacro
2643
2644IEMIMPL_FPU_R80 fchs
2645IEMIMPL_FPU_R80 fabs
2646IEMIMPL_FPU_R80 f2xm1
2647IEMIMPL_FPU_R80 fyl2x
2648IEMIMPL_FPU_R80 fsqrt
2649IEMIMPL_FPU_R80 frndint
2650IEMIMPL_FPU_R80 fsin
2651IEMIMPL_FPU_R80 fcos
2652
2653
2654;;
2655; FPU instruction working on one 80-bit floating point value, only
2656; returning FSW.
2657;
2658; @param 1 The instruction
2659;
2660; @param A0 FPU context (fxsave).
2661; @param A1 Pointer to a uint16_t for the resulting FSW.
2662; @param A2 Pointer to the 80-bit value.
2663;
2664%macro IEMIMPL_FPU_R80_FSW 1
2665BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80, 12
2666 PROLOGUE_3_ARGS
2667 sub xSP, 20h
2668
2669 fninit
2670 fld tword [A2]
2671 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2672 %1
2673
2674 fnstsw word [A1]
2675
2676 fninit
2677 add xSP, 20h
2678 EPILOGUE_3_ARGS
2679ENDPROC iemAImpl_ %+ %1 %+ _r80
2680%endmacro
2681
2682IEMIMPL_FPU_R80_FSW ftst
2683IEMIMPL_FPU_R80_FSW fxam
2684
2685
2686
2687;;
2688; FPU instruction loading a 80-bit floating point constant.
2689;
2690; @param 1 The instruction
2691;
2692; @param A0 FPU context (fxsave).
2693; @param A1 Pointer to a IEMFPURESULT for the output.
2694;
2695%macro IEMIMPL_FPU_R80_CONST 1
2696BEGINPROC_FASTCALL iemAImpl_ %+ %1, 8
2697 PROLOGUE_2_ARGS
2698 sub xSP, 20h
2699
2700 fninit
2701 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2702 %1
2703
2704 fnstsw word [A1 + IEMFPURESULT.FSW]
2705 fnclex
2706 fstp tword [A1 + IEMFPURESULT.r80Result]
2707
2708 fninit
2709 add xSP, 20h
2710 EPILOGUE_2_ARGS
2711ENDPROC iemAImpl_ %+ %1 %+
2712%endmacro
2713
2714IEMIMPL_FPU_R80_CONST fld1
2715IEMIMPL_FPU_R80_CONST fldl2t
2716IEMIMPL_FPU_R80_CONST fldl2e
2717IEMIMPL_FPU_R80_CONST fldpi
2718IEMIMPL_FPU_R80_CONST fldlg2
2719IEMIMPL_FPU_R80_CONST fldln2
2720IEMIMPL_FPU_R80_CONST fldz
2721
2722
2723;;
2724; FPU instruction working on one 80-bit floating point value, outputing two.
2725;
2726; @param 1 The instruction
2727;
2728; @param A0 FPU context (fxsave).
2729; @param A1 Pointer to a IEMFPURESULTTWO for the output.
2730; @param A2 Pointer to the 80-bit value.
2731;
2732%macro IEMIMPL_FPU_R80_R80 1
2733BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_r80, 12
2734 PROLOGUE_3_ARGS
2735 sub xSP, 20h
2736
2737 fninit
2738 fld tword [A2]
2739 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2740 %1
2741
2742 fnstsw word [A1 + IEMFPURESULTTWO.FSW]
2743 fnclex
2744 fstp tword [A1 + IEMFPURESULTTWO.r80Result2]
2745 fnclex
2746 fstp tword [A1 + IEMFPURESULTTWO.r80Result1]
2747
2748 fninit
2749 add xSP, 20h
2750 EPILOGUE_3_ARGS
2751ENDPROC iemAImpl_ %+ %1 %+ _r80_r80
2752%endmacro
2753
2754IEMIMPL_FPU_R80_R80 fptan
2755IEMIMPL_FPU_R80_R80 fxtract
2756IEMIMPL_FPU_R80_R80 fsincos
2757
2758
2759
2760
2761;---------------------- SSE and MMX Operations ----------------------
2762
2763;; @todo what do we need to do for MMX?
2764%macro IEMIMPL_MMX_PROLOGUE 0
2765%endmacro
2766%macro IEMIMPL_MMX_EPILOGUE 0
2767%endmacro
2768
2769;; @todo what do we need to do for SSE?
2770%macro IEMIMPL_SSE_PROLOGUE 0
2771%endmacro
2772%macro IEMIMPL_SSE_EPILOGUE 0
2773%endmacro
2774
2775
2776;;
2777; Media instruction working on two full sized registers.
2778;
2779; @param 1 The instruction
2780;
2781; @param A0 FPU context (fxsave).
2782; @param A1 Pointer to the first media register size operand (input/output).
2783; @param A2 Pointer to the second media register size operand (input).
2784;
2785%macro IEMIMPL_MEDIA_F2 1
2786BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 12
2787 PROLOGUE_3_ARGS
2788 IEMIMPL_MMX_PROLOGUE
2789
2790 movq mm0, [A1]
2791 movq mm1, [A2]
2792 %1 mm0, mm1
2793 movq [A1], mm0
2794
2795 IEMIMPL_MMX_EPILOGUE
2796 EPILOGUE_3_ARGS
2797ENDPROC iemAImpl_ %+ %1 %+ _u64
2798
2799BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u128, 12
2800 PROLOGUE_3_ARGS
2801 IEMIMPL_SSE_PROLOGUE
2802
2803 movdqu xmm0, [A1]
2804 movdqu xmm1, [A2]
2805 %1 xmm0, xmm1
2806 movdqu [A1], xmm0
2807
2808 IEMIMPL_SSE_EPILOGUE
2809 EPILOGUE_3_ARGS
2810ENDPROC iemAImpl_ %+ %1 %+ _u128
2811%endmacro
2812
2813IEMIMPL_MEDIA_F2 pxor
2814IEMIMPL_MEDIA_F2 pcmpeqb
2815IEMIMPL_MEDIA_F2 pcmpeqw
2816IEMIMPL_MEDIA_F2 pcmpeqd
2817
2818
2819;;
2820; Media instruction working on one full sized and one half sized register (lower half).
2821;
2822; @param 1 The instruction
2823; @param 2 1 if MMX is included, 0 if not.
2824;
2825; @param A0 FPU context (fxsave).
2826; @param A1 Pointer to the first full sized media register operand (input/output).
2827; @param A2 Pointer to the second half sized media register operand (input).
2828;
2829%macro IEMIMPL_MEDIA_F1L1 2
2830 %if %2 != 0
2831BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 12
2832 PROLOGUE_3_ARGS
2833 IEMIMPL_MMX_PROLOGUE
2834
2835 movq mm0, [A1]
2836 movd mm1, [A2]
2837 %1 mm0, mm1
2838 movq [A1], mm0
2839
2840 IEMIMPL_MMX_EPILOGUE
2841 EPILOGUE_3_ARGS
2842ENDPROC iemAImpl_ %+ %1 %+ _u64
2843 %endif
2844
2845BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u128, 12
2846 PROLOGUE_3_ARGS
2847 IEMIMPL_SSE_PROLOGUE
2848
2849 movdqu xmm0, [A1]
2850 movq xmm1, [A2]
2851 %1 xmm0, xmm1
2852 movdqu [A1], xmm0
2853
2854 IEMIMPL_SSE_EPILOGUE
2855 EPILOGUE_3_ARGS
2856ENDPROC iemAImpl_ %+ %1 %+ _u128
2857%endmacro
2858
2859IEMIMPL_MEDIA_F1L1 punpcklbw, 1
2860IEMIMPL_MEDIA_F1L1 punpcklwd, 1
2861IEMIMPL_MEDIA_F1L1 punpckldq, 1
2862IEMIMPL_MEDIA_F1L1 punpcklqdq, 0
2863
2864
2865;;
2866; Media instruction working on one full sized and one half sized register (high half).
2867;
2868; @param 1 The instruction
2869; @param 2 1 if MMX is included, 0 if not.
2870;
2871; @param A0 FPU context (fxsave).
2872; @param A1 Pointer to the first full sized media register operand (input/output).
2873; @param A2 Pointer to the second full sized media register operand, where we
2874; will only use the upper half (input).
2875;
2876%macro IEMIMPL_MEDIA_F1H1 2
2877 %if %2 != 0
2878BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 12
2879 PROLOGUE_3_ARGS
2880 IEMIMPL_MMX_PROLOGUE
2881
2882 movq mm0, [A1]
2883 movq mm1, [A2]
2884 %1 mm0, mm1
2885 movq [A1], mm0
2886
2887 IEMIMPL_MMX_EPILOGUE
2888 EPILOGUE_3_ARGS
2889ENDPROC iemAImpl_ %+ %1 %+ _u64
2890 %endif
2891
2892BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u128, 12
2893 PROLOGUE_3_ARGS
2894 IEMIMPL_SSE_PROLOGUE
2895
2896 movdqu xmm0, [A1]
2897 movdqu xmm1, [A2]
2898 %1 xmm0, xmm1
2899 movdqu [A1], xmm0
2900
2901 IEMIMPL_SSE_EPILOGUE
2902 EPILOGUE_3_ARGS
2903ENDPROC iemAImpl_ %+ %1 %+ _u128
2904%endmacro
2905
2906IEMIMPL_MEDIA_F1L1 punpckhbw, 1
2907IEMIMPL_MEDIA_F1L1 punpckhwd, 1
2908IEMIMPL_MEDIA_F1L1 punpckhdq, 1
2909IEMIMPL_MEDIA_F1L1 punpckhqdq, 0
2910
2911
2912;
2913; Shufflers with evil 8-bit immediates.
2914;
2915
2916BEGINPROC_FASTCALL iemAImpl_pshufw, 16
2917 PROLOGUE_4_ARGS
2918 IEMIMPL_MMX_PROLOGUE
2919
2920 movq mm0, [A1]
2921 movq mm1, [A2]
2922 lea T0, [A3 + A3*4] ; sizeof(pshufw+ret) == 5
2923 lea T1, [.imm0 xWrtRIP]
2924 lea T1, [T1 + T0]
2925 call T1
2926 movq [A1], mm0
2927
2928 IEMIMPL_MMX_EPILOGUE
2929 EPILOGUE_4_ARGS
2930%assign bImm 0
2931%rep 256
2932.imm %+ bImm:
2933 pshufw mm0, mm1, bImm
2934 ret
2935 %assign bImm bImm + 1
2936%endrep
2937.immEnd: ; 256*5 == 0x500
2938dw 0xfaff + (.immEnd - .imm0) ; will cause warning if entries are too big.
2939dw 0x104ff - (.immEnd - .imm0) ; will cause warning if entries are small big.
2940ENDPROC iemAImpl_pshufw
2941
2942
2943%macro IEMIMPL_MEDIA_SSE_PSHUFXX 1
2944BEGINPROC_FASTCALL iemAImpl_ %+ %1, 16
2945 PROLOGUE_4_ARGS
2946 IEMIMPL_SSE_PROLOGUE
2947
2948 movdqu xmm0, [A1]
2949 movdqu xmm1, [A2]
2950 lea T1, [.imm0 xWrtRIP]
2951 lea T0, [A3 + A3*2] ; sizeof(pshufXX+ret) == 6: (A3 * 3) *2
2952 lea T1, [T1 + T0*2]
2953 call T1
2954 movdqu [A1], xmm0
2955
2956 IEMIMPL_SSE_EPILOGUE
2957 EPILOGUE_4_ARGS
2958 %assign bImm 0
2959 %rep 256
2960.imm %+ bImm:
2961 %1 xmm0, xmm1, bImm
2962 ret
2963 %assign bImm bImm + 1
2964 %endrep
2965.immEnd: ; 256*6 == 0x600
2966dw 0xf9ff + (.immEnd - .imm0) ; will cause warning if entries are too big.
2967dw 0x105ff - (.immEnd - .imm0) ; will cause warning if entries are small big.
2968ENDPROC iemAImpl_ %+ %1
2969%endmacro
2970
2971IEMIMPL_MEDIA_SSE_PSHUFXX pshufhw
2972IEMIMPL_MEDIA_SSE_PSHUFXX pshuflw
2973IEMIMPL_MEDIA_SSE_PSHUFXX pshufd
2974
2975
2976;
2977; Move byte mask.
2978;
2979
2980BEGINPROC_FASTCALL iemAImpl_pmovmskb_u64, 12
2981 PROLOGUE_3_ARGS
2982 IEMIMPL_MMX_PROLOGUE
2983
2984 mov T0, [A1]
2985 movq mm1, [A2]
2986 pmovmskb T0, mm1
2987 mov [A1], T0
2988%ifdef RT_ARCH_X86
2989 mov dword [A1 + 4], 0
2990%endif
2991 IEMIMPL_MMX_EPILOGUE
2992 EPILOGUE_3_ARGS
2993ENDPROC iemAImpl_pmovmskb_u64
2994
2995BEGINPROC_FASTCALL iemAImpl_pmovmskb_u128, 12
2996 PROLOGUE_3_ARGS
2997 IEMIMPL_SSE_PROLOGUE
2998
2999 mov T0, [A1]
3000 movdqu xmm1, [A2]
3001 pmovmskb T0, xmm1
3002 mov [A1], T0
3003%ifdef RT_ARCH_X86
3004 mov dword [A1 + 4], 0
3005%endif
3006 IEMIMPL_SSE_EPILOGUE
3007 EPILOGUE_3_ARGS
3008ENDPROC iemAImpl_pmovmskb_u128
3009
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette