VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllAImpl.asm@ 45991

Last change on this file since 45991 was 42720, checked in by vboxsync, 12 years ago

IEM: Fixed BSF and BSR bug where they would set the destination register when the source was zero.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 68.6 KB
Line 
1; $Id: IEMAllAImpl.asm 42720 2012-08-09 17:25:25Z vboxsync $
2;; @file
3; IEM - Instruction Implementation in Assembly.
4;
5
6; Copyright (C) 2011-2012 Oracle Corporation
7;
8; This file is part of VirtualBox Open Source Edition (OSE), as
9; available from http://www.virtualbox.org. This file is free software;
10; you can redistribute it and/or modify it under the terms of the GNU
11; General Public License (GPL) as published by the Free Software
12; Foundation, in version 2 as it comes in the "COPYING" file of the
13; VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15;
16
17
18;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
19; Header Files ;
20;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
21%include "VBox/asmdefs.mac"
22%include "VBox/err.mac"
23%include "iprt/x86.mac"
24
25
26;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
27; Defined Constants And Macros ;
28;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
29
30;;
31; RET XX / RET wrapper for fastcall.
32;
33%macro RET_FASTCALL 1
34%ifdef RT_ARCH_X86
35 %ifdef RT_OS_WINDOWS
36 ret %1
37 %else
38 ret
39 %endif
40%else
41 ret
42%endif
43%endmacro
44
45;;
46; NAME for fastcall functions.
47;
48;; @todo 'global @fastcall@12' is still broken in yasm and requires dollar
49; escaping (or whatever the dollar is good for here). Thus the ugly
50; prefix argument.
51;
52%define NAME_FASTCALL(a_Name, a_cbArgs, a_Prefix) NAME(a_Name)
53%ifdef RT_ARCH_X86
54 %ifdef RT_OS_WINDOWS
55 %undef NAME_FASTCALL
56 %define NAME_FASTCALL(a_Name, a_cbArgs, a_Prefix) a_Prefix %+ a_Name %+ @ %+ a_cbArgs
57 %endif
58%endif
59
60;;
61; BEGINPROC for fastcall functions.
62;
63; @param 1 The function name (C).
64; @param 2 The argument size on x86.
65;
66%macro BEGINPROC_FASTCALL 2
67 %ifdef ASM_FORMAT_PE
68 export %1=NAME_FASTCALL(%1,%2,$@)
69 %endif
70 %ifdef __NASM__
71 %ifdef ASM_FORMAT_OMF
72 export NAME(%1) NAME_FASTCALL(%1,%2,$@)
73 %endif
74 %endif
75 %ifndef ASM_FORMAT_BIN
76 global NAME_FASTCALL(%1,%2,$@)
77 %endif
78NAME_FASTCALL(%1,%2,@):
79%endmacro
80
81
82;
83; We employ some macro assembly here to hid the calling convention differences.
84;
85%ifdef RT_ARCH_AMD64
86 %macro PROLOGUE_1_ARGS 0
87 %endmacro
88 %macro EPILOGUE_1_ARGS 0
89 ret
90 %endmacro
91 %macro EPILOGUE_1_ARGS_EX 0
92 ret
93 %endmacro
94
95 %macro PROLOGUE_2_ARGS 0
96 %endmacro
97 %macro EPILOGUE_2_ARGS 0
98 ret
99 %endmacro
100 %macro EPILOGUE_2_ARGS_EX 1
101 ret
102 %endmacro
103
104 %macro PROLOGUE_3_ARGS 0
105 %endmacro
106 %macro EPILOGUE_3_ARGS 0
107 ret
108 %endmacro
109 %macro EPILOGUE_3_ARGS_EX 1
110 ret
111 %endmacro
112
113 %macro PROLOGUE_4_ARGS 0
114 %endmacro
115 %macro EPILOGUE_4_ARGS 0
116 ret
117 %endmacro
118 %macro EPILOGUE_4_ARGS_EX 1
119 ret
120 %endmacro
121
122 %ifdef ASM_CALL64_GCC
123 %define A0 rdi
124 %define A0_32 edi
125 %define A0_16 di
126 %define A0_8 dil
127
128 %define A1 rsi
129 %define A1_32 esi
130 %define A1_16 si
131 %define A1_8 sil
132
133 %define A2 rdx
134 %define A2_32 edx
135 %define A2_16 dx
136 %define A2_8 dl
137
138 %define A3 rcx
139 %define A3_32 ecx
140 %define A3_16 cx
141 %endif
142
143 %ifdef ASM_CALL64_MSC
144 %define A0 rcx
145 %define A0_32 ecx
146 %define A0_16 cx
147 %define A0_8 cl
148
149 %define A1 rdx
150 %define A1_32 edx
151 %define A1_16 dx
152 %define A1_8 dl
153
154 %define A2 r8
155 %define A2_32 r8d
156 %define A2_16 r8w
157 %define A2_8 r8b
158
159 %define A3 r9
160 %define A3_32 r9d
161 %define A3_16 r9w
162 %endif
163
164 %define T0 rax
165 %define T0_32 eax
166 %define T0_16 ax
167 %define T0_8 al
168
169 %define T1 r11
170 %define T1_32 r11d
171 %define T1_16 r11w
172 %define T1_8 r11b
173
174%else
175 ; x86
176 %macro PROLOGUE_1_ARGS 0
177 push edi
178 %endmacro
179 %macro EPILOGUE_1_ARGS 0
180 pop edi
181 ret 0
182 %endmacro
183 %macro EPILOGUE_1_ARGS_EX 1
184 pop edi
185 ret %1
186 %endmacro
187
188 %macro PROLOGUE_2_ARGS 0
189 push edi
190 %endmacro
191 %macro EPILOGUE_2_ARGS 0
192 pop edi
193 ret 0
194 %endmacro
195 %macro EPILOGUE_2_ARGS_EX 1
196 pop edi
197 ret %1
198 %endmacro
199
200 %macro PROLOGUE_3_ARGS 0
201 push ebx
202 mov ebx, [esp + 4 + 4]
203 push edi
204 %endmacro
205 %macro EPILOGUE_3_ARGS_EX 1
206 %if (%1) < 4
207 %error "With three args, at least 4 bytes must be remove from the stack upon return (32-bit)."
208 %endif
209 pop edi
210 pop ebx
211 ret %1
212 %endmacro
213 %macro EPILOGUE_3_ARGS 0
214 EPILOGUE_3_ARGS_EX 4
215 %endmacro
216
217 %macro PROLOGUE_4_ARGS 0
218 push ebx
219 push edi
220 push esi
221 mov ebx, [esp + 12 + 4 + 0]
222 mov esi, [esp + 12 + 4 + 4]
223 %endmacro
224 %macro EPILOGUE_4_ARGS_EX 1
225 %if (%1) < 8
226 %error "With four args, at least 8 bytes must be remove from the stack upon return (32-bit)."
227 %endif
228 pop esi
229 pop edi
230 pop ebx
231 ret %1
232 %endmacro
233 %macro EPILOGUE_4_ARGS 0
234 EPILOGUE_4_ARGS_EX 8
235 %endmacro
236
237 %define A0 ecx
238 %define A0_32 ecx
239 %define A0_16 cx
240 %define A0_8 cl
241
242 %define A1 edx
243 %define A1_32 edx
244 %define A1_16 dx
245 %define A1_8 dl
246
247 %define A2 ebx
248 %define A2_32 ebx
249 %define A2_16 bx
250 %define A2_8 bl
251
252 %define A3 esi
253 %define A3_32 esi
254 %define A3_16 si
255
256 %define T0 eax
257 %define T0_32 eax
258 %define T0_16 ax
259 %define T0_8 al
260
261 %define T1 edi
262 %define T1_32 edi
263 %define T1_16 di
264%endif
265
266
267;;
268; Load the relevant flags from [%1] if there are undefined flags (%3).
269;
270; @remarks Clobbers T0, stack. Changes EFLAGS.
271; @param A2 The register pointing to the flags.
272; @param 1 The parameter (A0..A3) pointing to the eflags.
273; @param 2 The set of modified flags.
274; @param 3 The set of undefined flags.
275;
276%macro IEM_MAYBE_LOAD_FLAGS 3
277 ;%if (%3) != 0
278 pushf ; store current flags
279 mov T0_32, [%1] ; load the guest flags
280 and dword [xSP], ~(%2 | %3) ; mask out the modified and undefined flags
281 and T0_32, (%2 | %3) ; select the modified and undefined flags.
282 or [xSP], T0 ; merge guest flags with host flags.
283 popf ; load the mixed flags.
284 ;%endif
285%endmacro
286
287;;
288; Update the flag.
289;
290; @remarks Clobbers T0, T1, stack.
291; @param 1 The register pointing to the EFLAGS.
292; @param 2 The mask of modified flags to save.
293; @param 3 The mask of undefined flags to (maybe) save.
294;
295%macro IEM_SAVE_FLAGS 3
296 %if (%2 | %3) != 0
297 pushf
298 pop T1
299 mov T0_32, [%1] ; flags
300 and T0_32, ~(%2 | %3) ; clear the modified & undefined flags.
301 and T1_32, (%2 | %3) ; select the modified and undefined flags.
302 or T0_32, T1_32 ; combine the flags.
303 mov [%1], T0_32 ; save the flags.
304 %endif
305%endmacro
306
307
308;;
309; Macro for implementing a binary operator.
310;
311; This will generate code for the 8, 16, 32 and 64 bit accesses with locked
312; variants, except on 32-bit system where the 64-bit accesses requires hand
313; coding.
314;
315; All the functions takes a pointer to the destination memory operand in A0,
316; the source register operand in A1 and a pointer to eflags in A2.
317;
318; @param 1 The instruction mnemonic.
319; @param 2 Non-zero if there should be a locked version.
320; @param 3 The modified flags.
321; @param 4 The undefined flags.
322;
323%macro IEMIMPL_BIN_OP 4
324BEGINCODE
325BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 12
326 PROLOGUE_3_ARGS
327 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
328 %1 byte [A0], A1_8
329 IEM_SAVE_FLAGS A2, %3, %4
330 EPILOGUE_3_ARGS
331ENDPROC iemAImpl_ %+ %1 %+ _u8
332
333BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
334 PROLOGUE_3_ARGS
335 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
336 %1 word [A0], A1_16
337 IEM_SAVE_FLAGS A2, %3, %4
338 EPILOGUE_3_ARGS
339ENDPROC iemAImpl_ %+ %1 %+ _u16
340
341BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
342 PROLOGUE_3_ARGS
343 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
344 %1 dword [A0], A1_32
345 IEM_SAVE_FLAGS A2, %3, %4
346 EPILOGUE_3_ARGS
347ENDPROC iemAImpl_ %+ %1 %+ _u32
348
349 %ifdef RT_ARCH_AMD64
350BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
351 PROLOGUE_3_ARGS
352 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
353 %1 qword [A0], A1
354 IEM_SAVE_FLAGS A2, %3, %4
355 EPILOGUE_3_ARGS_EX 8
356ENDPROC iemAImpl_ %+ %1 %+ _u64
357 %else ; stub it for now - later, replace with hand coded stuff.
358BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
359 int3
360 ret
361ENDPROC iemAImpl_ %+ %1 %+ _u64
362 %endif ; !RT_ARCH_AMD64
363
364 %if %2 != 0 ; locked versions requested?
365
366BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8_locked, 12
367 PROLOGUE_3_ARGS
368 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
369 lock %1 byte [A0], A1_8
370 IEM_SAVE_FLAGS A2, %3, %4
371 EPILOGUE_3_ARGS
372ENDPROC iemAImpl_ %+ %1 %+ _u8_locked
373
374BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16_locked, 12
375 PROLOGUE_3_ARGS
376 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
377 lock %1 word [A0], A1_16
378 IEM_SAVE_FLAGS A2, %3, %4
379 EPILOGUE_3_ARGS
380ENDPROC iemAImpl_ %+ %1 %+ _u16_locked
381
382BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32_locked, 12
383 PROLOGUE_3_ARGS
384 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
385 lock %1 dword [A0], A1_32
386 IEM_SAVE_FLAGS A2, %3, %4
387 EPILOGUE_3_ARGS
388ENDPROC iemAImpl_ %+ %1 %+ _u32_locked
389
390 %ifdef RT_ARCH_AMD64
391BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 16
392 PROLOGUE_3_ARGS
393 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
394 lock %1 qword [A0], A1
395 IEM_SAVE_FLAGS A2, %3, %4
396 EPILOGUE_3_ARGS_EX 8
397ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
398 %else ; stub it for now - later, replace with hand coded stuff.
399BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 16
400 int3
401 ret 8
402ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
403 %endif ; !RT_ARCH_AMD64
404 %endif ; locked
405%endmacro
406
407; instr,lock,modified-flags.
408IEMIMPL_BIN_OP add, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
409IEMIMPL_BIN_OP adc, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
410IEMIMPL_BIN_OP sub, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
411IEMIMPL_BIN_OP sbb, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
412IEMIMPL_BIN_OP or, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), X86_EFL_AF,
413IEMIMPL_BIN_OP xor, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), X86_EFL_AF,
414IEMIMPL_BIN_OP and, 1, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), X86_EFL_AF,
415IEMIMPL_BIN_OP cmp, 0, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
416IEMIMPL_BIN_OP test, 0, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), X86_EFL_AF,
417
418
419;;
420; Macro for implementing a bit operator.
421;
422; This will generate code for the 16, 32 and 64 bit accesses with locked
423; variants, except on 32-bit system where the 64-bit accesses requires hand
424; coding.
425;
426; All the functions takes a pointer to the destination memory operand in A0,
427; the source register operand in A1 and a pointer to eflags in A2.
428;
429; @param 1 The instruction mnemonic.
430; @param 2 Non-zero if there should be a locked version.
431; @param 3 The modified flags.
432; @param 4 The undefined flags.
433;
434%macro IEMIMPL_BIT_OP 4
435BEGINCODE
436BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
437 PROLOGUE_3_ARGS
438 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
439 %1 word [A0], A1_16
440 IEM_SAVE_FLAGS A2, %3, %4
441 EPILOGUE_3_ARGS
442ENDPROC iemAImpl_ %+ %1 %+ _u16
443
444BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
445 PROLOGUE_3_ARGS
446 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
447 %1 dword [A0], A1_32
448 IEM_SAVE_FLAGS A2, %3, %4
449 EPILOGUE_3_ARGS
450ENDPROC iemAImpl_ %+ %1 %+ _u32
451
452 %ifdef RT_ARCH_AMD64
453BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
454 PROLOGUE_3_ARGS
455 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
456 %1 qword [A0], A1
457 IEM_SAVE_FLAGS A2, %3, %4
458 EPILOGUE_3_ARGS_EX 8
459ENDPROC iemAImpl_ %+ %1 %+ _u64
460 %else ; stub it for now - later, replace with hand coded stuff.
461BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
462 int3
463 ret 8
464ENDPROC iemAImpl_ %+ %1 %+ _u64
465 %endif ; !RT_ARCH_AMD64
466
467 %if %2 != 0 ; locked versions requested?
468
469BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16_locked, 12
470 PROLOGUE_3_ARGS
471 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
472 lock %1 word [A0], A1_16
473 IEM_SAVE_FLAGS A2, %3, %4
474 EPILOGUE_3_ARGS
475ENDPROC iemAImpl_ %+ %1 %+ _u16_locked
476
477BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32_locked, 12
478 PROLOGUE_3_ARGS
479 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
480 lock %1 dword [A0], A1_32
481 IEM_SAVE_FLAGS A2, %3, %4
482 EPILOGUE_3_ARGS
483ENDPROC iemAImpl_ %+ %1 %+ _u32_locked
484
485 %ifdef RT_ARCH_AMD64
486BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 16
487 PROLOGUE_3_ARGS
488 IEM_MAYBE_LOAD_FLAGS A2, %3, %4
489 lock %1 qword [A0], A1
490 IEM_SAVE_FLAGS A2, %3, %4
491 EPILOGUE_3_ARGS_EX 8
492ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
493 %else ; stub it for now - later, replace with hand coded stuff.
494BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 16
495 int3
496 ret 8
497ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
498 %endif ; !RT_ARCH_AMD64
499 %endif ; locked
500%endmacro
501IEMIMPL_BIT_OP bt, 0, (X86_EFL_CF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
502IEMIMPL_BIT_OP btc, 1, (X86_EFL_CF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
503IEMIMPL_BIT_OP bts, 1, (X86_EFL_CF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
504IEMIMPL_BIT_OP btr, 1, (X86_EFL_CF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
505
506;;
507; Macro for implementing a bit search operator.
508;
509; This will generate code for the 16, 32 and 64 bit accesses, except on 32-bit
510; system where the 64-bit accesses requires hand coding.
511;
512; All the functions takes a pointer to the destination memory operand in A0,
513; the source register operand in A1 and a pointer to eflags in A2.
514;
515; @param 1 The instruction mnemonic.
516; @param 2 The modified flags.
517; @param 3 The undefined flags.
518;
519%macro IEMIMPL_BIT_OP 3
520BEGINCODE
521BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
522 PROLOGUE_3_ARGS
523 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
524 %1 T0_16, A1_16
525 jz .unchanged_dst
526 mov [A0], T0_16
527.unchanged_dst:
528 IEM_SAVE_FLAGS A2, %2, %3
529 EPILOGUE_3_ARGS
530ENDPROC iemAImpl_ %+ %1 %+ _u16
531
532BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
533 PROLOGUE_3_ARGS
534 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
535 %1 T0_32, A1_32
536 jz .unchanged_dst
537 mov [A0], T0_32
538.unchanged_dst:
539 IEM_SAVE_FLAGS A2, %2, %3
540 EPILOGUE_3_ARGS
541ENDPROC iemAImpl_ %+ %1 %+ _u32
542
543 %ifdef RT_ARCH_AMD64
544BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
545 PROLOGUE_3_ARGS
546 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
547 %1 T0, A1
548 jz .unchanged_dst
549 mov [A0], T0
550.unchanged_dst:
551 IEM_SAVE_FLAGS A2, %2, %3
552 EPILOGUE_3_ARGS_EX 8
553ENDPROC iemAImpl_ %+ %1 %+ _u64
554 %else ; stub it for now - later, replace with hand coded stuff.
555BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
556 int3
557 ret 8
558ENDPROC iemAImpl_ %+ %1 %+ _u64
559 %endif ; !RT_ARCH_AMD64
560%endmacro
561IEMIMPL_BIT_OP bsf, (X86_EFL_ZF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF)
562IEMIMPL_BIT_OP bsr, (X86_EFL_ZF), (X86_EFL_OF | X86_EFL_SF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF)
563
564
565;
566; IMUL is also a similar but yet different case (no lock, no mem dst).
567; The rDX:rAX variant of imul is handled together with mul further down.
568;
569BEGINCODE
570BEGINPROC_FASTCALL iemAImpl_imul_two_u16, 12
571 PROLOGUE_3_ARGS
572 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
573 imul A1_16, word [A0]
574 mov [A0], A1_16
575 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
576 EPILOGUE_3_ARGS
577ENDPROC iemAImpl_imul_two_u16
578
579BEGINPROC_FASTCALL iemAImpl_imul_two_u32, 12
580 PROLOGUE_3_ARGS
581 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
582 imul A1_32, dword [A0]
583 mov [A0], A1_32
584 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
585 EPILOGUE_3_ARGS
586ENDPROC iemAImpl_imul_two_u32
587
588BEGINPROC_FASTCALL iemAImpl_imul_two_u64, 16
589 PROLOGUE_3_ARGS
590%ifdef RT_ARCH_AMD64
591 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
592 imul A1, qword [A0]
593 mov [A0], A1
594 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
595%else
596 int3 ;; @todo implement me
597%endif
598 EPILOGUE_3_ARGS_EX 8
599ENDPROC iemAImpl_imul_two_u64
600
601
602;
603; XCHG for memory operands. This implies locking. No flag changes.
604;
605; Each function takes two arguments, first the pointer to the memory,
606; then the pointer to the register. They all return void.
607;
608BEGINCODE
609BEGINPROC_FASTCALL iemAImpl_xchg_u8, 8
610 PROLOGUE_2_ARGS
611 mov T0_8, [A1]
612 xchg [A0], T0_8
613 mov [A1], T0_8
614 EPILOGUE_2_ARGS
615ENDPROC iemAImpl_xchg_u8
616
617BEGINPROC_FASTCALL iemAImpl_xchg_u16, 8
618 PROLOGUE_2_ARGS
619 mov T0_16, [A1]
620 xchg [A0], T0_16
621 mov [A1], T0_16
622 EPILOGUE_2_ARGS
623ENDPROC iemAImpl_xchg_u16
624
625BEGINPROC_FASTCALL iemAImpl_xchg_u32, 8
626 PROLOGUE_2_ARGS
627 mov T0_32, [A1]
628 xchg [A0], T0_32
629 mov [A1], T0_32
630 EPILOGUE_2_ARGS
631ENDPROC iemAImpl_xchg_u32
632
633BEGINPROC_FASTCALL iemAImpl_xchg_u64, 8
634%ifdef RT_ARCH_AMD64
635 PROLOGUE_2_ARGS
636 mov T0, [A1]
637 xchg [A0], T0
638 mov [A1], T0
639 EPILOGUE_2_ARGS
640%else
641 int3
642 ret 0
643%endif
644ENDPROC iemAImpl_xchg_u64
645
646
647;
648; XADD for memory operands.
649;
650; Each function takes three arguments, first the pointer to the
651; memory/register, then the pointer to the register, and finally a pointer to
652; eflags. They all return void.
653;
654BEGINCODE
655BEGINPROC_FASTCALL iemAImpl_xadd_u8, 12
656 PROLOGUE_3_ARGS
657 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
658 mov T0_8, [A1]
659 xadd [A0], T0_8
660 mov [A1], T0_8
661 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
662 EPILOGUE_3_ARGS
663ENDPROC iemAImpl_xadd_u8
664
665BEGINPROC_FASTCALL iemAImpl_xadd_u16, 12
666 PROLOGUE_3_ARGS
667 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
668 mov T0_16, [A1]
669 xadd [A0], T0_16
670 mov [A1], T0_16
671 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
672 EPILOGUE_3_ARGS
673ENDPROC iemAImpl_xadd_u16
674
675BEGINPROC_FASTCALL iemAImpl_xadd_u32, 12
676 PROLOGUE_3_ARGS
677 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
678 mov T0_32, [A1]
679 xadd [A0], T0_32
680 mov [A1], T0_32
681 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
682 EPILOGUE_3_ARGS
683ENDPROC iemAImpl_xadd_u32
684
685BEGINPROC_FASTCALL iemAImpl_xadd_u64, 12
686%ifdef RT_ARCH_AMD64
687 PROLOGUE_3_ARGS
688 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
689 mov T0, [A1]
690 xadd [A0], T0
691 mov [A1], T0
692 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
693 EPILOGUE_3_ARGS
694%else
695 int3
696 ret 4
697%endif
698ENDPROC iemAImpl_xadd_u64
699
700BEGINPROC_FASTCALL iemAImpl_xadd_u8_locked, 12
701 PROLOGUE_3_ARGS
702 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
703 mov T0_8, [A1]
704 lock xadd [A0], T0_8
705 mov [A1], T0_8
706 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
707 EPILOGUE_3_ARGS
708ENDPROC iemAImpl_xadd_u8_locked
709
710BEGINPROC_FASTCALL iemAImpl_xadd_u16_locked, 12
711 PROLOGUE_3_ARGS
712 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
713 mov T0_16, [A1]
714 lock xadd [A0], T0_16
715 mov [A1], T0_16
716 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
717 EPILOGUE_3_ARGS
718ENDPROC iemAImpl_xadd_u16_locked
719
720BEGINPROC_FASTCALL iemAImpl_xadd_u32_locked, 12
721 PROLOGUE_3_ARGS
722 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
723 mov T0_32, [A1]
724 lock xadd [A0], T0_32
725 mov [A1], T0_32
726 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
727 EPILOGUE_3_ARGS
728ENDPROC iemAImpl_xadd_u32_locked
729
730BEGINPROC_FASTCALL iemAImpl_xadd_u64_locked, 12
731%ifdef RT_ARCH_AMD64
732 PROLOGUE_3_ARGS
733 IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
734 mov T0, [A1]
735 lock xadd [A0], T0
736 mov [A1], T0
737 IEM_SAVE_FLAGS A2, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
738 EPILOGUE_3_ARGS
739%else
740 int3
741 ret 4
742%endif
743ENDPROC iemAImpl_xadd_u64_locked
744
745
746;
747; CMPXCHG8B.
748;
749; These are tricky register wise, so the code is duplicated for each calling
750; convention.
751;
752; WARNING! This code make ASSUMPTIONS about which registers T1 and T0 are mapped to!
753;
754; C-proto:
755; IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg8b,(uint64_t *pu64Dst, PRTUINT64U pu64EaxEdx, PRTUINT64U pu64EbxEcx,
756; uint32_t *pEFlags));
757;
758BEGINCODE
759BEGINPROC_FASTCALL iemAImpl_cmpxchg8b, 16
760%ifdef RT_ARCH_AMD64
761 %ifdef ASM_CALL64_MSC
762 push rbx
763
764 mov r11, rdx ; pu64EaxEdx (is also T1)
765 mov r10, rcx ; pu64Dst
766
767 mov ebx, [r8]
768 mov ecx, [r8 + 4]
769 IEM_MAYBE_LOAD_FLAGS r9, (X86_EFL_ZF), 0 ; clobbers T0 (eax)
770 mov eax, [r11]
771 mov edx, [r11 + 4]
772
773 lock cmpxchg8b [r10]
774
775 mov [r11], eax
776 mov [r11 + 4], edx
777 IEM_SAVE_FLAGS r9, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, r11)
778
779 pop rbx
780 ret
781 %else
782 push rbx
783
784 mov r10, rcx ; pEFlags
785 mov r11, rdx ; pu64EbxEcx (is also T1)
786
787 mov ebx, [r11]
788 mov ecx, [r11 + 4]
789 IEM_MAYBE_LOAD_FLAGS r10, (X86_EFL_ZF), 0 ; clobbers T0 (eax)
790 mov eax, [rsi]
791 mov edx, [rsi + 4]
792
793 lock cmpxchg8b [rdi]
794
795 mov [rsi], eax
796 mov [rsi + 4], edx
797 IEM_SAVE_FLAGS r10, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, r11)
798
799 pop rbx
800 ret
801
802 %endif
803%else
804 push esi
805 push edi
806 push ebx
807 push ebp
808
809 mov edi, ecx ; pu64Dst
810 mov esi, edx ; pu64EaxEdx
811 mov ecx, [esp + 16 + 4 + 0] ; pu64EbxEcx
812 mov ebp, [esp + 16 + 4 + 4] ; pEFlags
813
814 mov ebx, [ecx]
815 mov ecx, [ecx + 4]
816 IEM_MAYBE_LOAD_FLAGS ebp, (X86_EFL_ZF), 0 ; clobbers T0 (eax)
817 mov eax, [esi]
818 mov edx, [esi + 4]
819
820 lock cmpxchg8b [edi]
821
822 mov [esi], eax
823 mov [esi + 4], edx
824 IEM_SAVE_FLAGS ebp, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, edi)
825
826 pop ebp
827 pop ebx
828 pop edi
829 pop esi
830 ret 8
831%endif
832ENDPROC iemAImpl_cmpxchg8b
833
834BEGINPROC_FASTCALL iemAImpl_cmpxchg8b_locked, 16
835 ; Lazy bird always lock prefixes cmpxchg8b.
836 jmp NAME_FASTCALL(iemAImpl_cmpxchg8b,16,$@)
837ENDPROC iemAImpl_cmpxchg8b_locked
838
839
840
841;
842; CMPXCHG.
843;
844; WARNING! This code make ASSUMPTIONS about which registers T1 and T0 are mapped to!
845;
846; C-proto:
847; IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg,(uintX_t *puXDst, uintX_t puEax, uintX_t uReg, uint32_t *pEFlags));
848;
849BEGINCODE
850%macro IEMIMPL_CMPXCHG 2
851BEGINPROC_FASTCALL iemAImpl_cmpxchg_u8 %+ %2, 16
852 PROLOGUE_4_ARGS
853 IEM_MAYBE_LOAD_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0 (eax)
854 mov al, [A1]
855 %1 cmpxchg [A0], A2_8
856 mov [A1], al
857 IEM_SAVE_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0+T1 (eax, r11/edi)
858 EPILOGUE_4_ARGS
859ENDPROC iemAImpl_cmpxchg_u8 %+ %2
860
861BEGINPROC_FASTCALL iemAImpl_cmpxchg_u16 %+ %2, 16
862 PROLOGUE_4_ARGS
863 IEM_MAYBE_LOAD_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0 (eax)
864 mov ax, [A1]
865 %1 cmpxchg [A0], A2_16
866 mov [A1], ax
867 IEM_SAVE_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0+T1 (eax, r11/edi)
868 EPILOGUE_4_ARGS
869ENDPROC iemAImpl_cmpxchg_u16 %+ %2
870
871BEGINPROC_FASTCALL iemAImpl_cmpxchg_u32 %+ %2, 16
872 PROLOGUE_4_ARGS
873 IEM_MAYBE_LOAD_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0 (eax)
874 mov eax, [A1]
875 %1 cmpxchg [A0], A2_32
876 mov [A1], eax
877 IEM_SAVE_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0+T1 (eax, r11/edi)
878 EPILOGUE_4_ARGS
879ENDPROC iemAImpl_cmpxchg_u32 %+ %2
880
881BEGINPROC_FASTCALL iemAImpl_cmpxchg_u64 %+ %2, 16
882%ifdef RT_ARCH_AMD64
883 PROLOGUE_4_ARGS
884 IEM_MAYBE_LOAD_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0 (eax)
885 mov ax, [A1]
886 %1 cmpxchg [A0], A2
887 mov [A1], ax
888 IEM_SAVE_FLAGS A3, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0+T1 (eax, r11/edi)
889 EPILOGUE_4_ARGS
890%else
891 ;
892 ; Must use cmpxchg8b here. See also iemAImpl_cmpxchg8b.
893 ;
894 push esi
895 push edi
896 push ebx
897 push ebp
898
899 mov edi, ecx ; pu64Dst
900 mov esi, edx ; pu64Rax
901 mov ecx, [esp + 16 + 4 + 0] ; pu64Reg - Note! Pointer on 32-bit hosts!
902 mov ebp, [esp + 16 + 4 + 4] ; pEFlags
903
904 mov ebx, [ecx]
905 mov ecx, [ecx + 4]
906 IEM_MAYBE_LOAD_FLAGS ebp, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0 (eax)
907 mov eax, [esi]
908 mov edx, [esi + 4]
909
910 lock cmpxchg8b [edi]
911
912 ; cmpxchg8b doesn't set CF, PF, AF, SF and OF, so we have to do that.
913 jz .cmpxchg8b_not_equal
914 cmp eax, eax ; just set the other flags.
915.store:
916 mov [esi], eax
917 mov [esi + 4], edx
918 IEM_SAVE_FLAGS ebp, (X86_EFL_ZF | X86_EFL_CF | X86_EFL_PF | X86_EFL_AF | X86_EFL_SF | X86_EFL_OF), 0 ; clobbers T0+T1 (eax, edi)
919
920 pop ebp
921 pop ebx
922 pop edi
923 pop esi
924 ret 8
925
926.cmpxchg8b_not_equal:
927 cmp [esi + 4], edx ;; @todo FIXME - verify 64-bit compare implementation
928 jne .store
929 cmp [esi], eax
930 jmp .store
931
932%endif
933ENDPROC iemAImpl_cmpxchg_u64 %+ %2
934%endmacro ; IEMIMPL_CMPXCHG
935
936IEMIMPL_CMPXCHG , ,
937IEMIMPL_CMPXCHG lock, _locked
938
939;;
940; Macro for implementing a unary operator.
941;
942; This will generate code for the 8, 16, 32 and 64 bit accesses with locked
943; variants, except on 32-bit system where the 64-bit accesses requires hand
944; coding.
945;
946; All the functions takes a pointer to the destination memory operand in A0,
947; the source register operand in A1 and a pointer to eflags in A2.
948;
949; @param 1 The instruction mnemonic.
950; @param 2 The modified flags.
951; @param 3 The undefined flags.
952;
953%macro IEMIMPL_UNARY_OP 3
954BEGINCODE
955BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 8
956 PROLOGUE_2_ARGS
957 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
958 %1 byte [A0]
959 IEM_SAVE_FLAGS A1, %2, %3
960 EPILOGUE_2_ARGS
961ENDPROC iemAImpl_ %+ %1 %+ _u8
962
963BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8_locked, 8
964 PROLOGUE_2_ARGS
965 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
966 lock %1 byte [A0]
967 IEM_SAVE_FLAGS A1, %2, %3
968 EPILOGUE_2_ARGS
969ENDPROC iemAImpl_ %+ %1 %+ _u8_locked
970
971BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 8
972 PROLOGUE_2_ARGS
973 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
974 %1 word [A0]
975 IEM_SAVE_FLAGS A1, %2, %3
976 EPILOGUE_2_ARGS
977ENDPROC iemAImpl_ %+ %1 %+ _u16
978
979BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16_locked, 8
980 PROLOGUE_2_ARGS
981 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
982 lock %1 word [A0]
983 IEM_SAVE_FLAGS A1, %2, %3
984 EPILOGUE_2_ARGS
985ENDPROC iemAImpl_ %+ %1 %+ _u16_locked
986
987BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 8
988 PROLOGUE_2_ARGS
989 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
990 %1 dword [A0]
991 IEM_SAVE_FLAGS A1, %2, %3
992 EPILOGUE_2_ARGS
993ENDPROC iemAImpl_ %+ %1 %+ _u32
994
995BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32_locked, 8
996 PROLOGUE_2_ARGS
997 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
998 lock %1 dword [A0]
999 IEM_SAVE_FLAGS A1, %2, %3
1000 EPILOGUE_2_ARGS
1001ENDPROC iemAImpl_ %+ %1 %+ _u32_locked
1002
1003 %ifdef RT_ARCH_AMD64
1004BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 8
1005 PROLOGUE_2_ARGS
1006 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1007 %1 qword [A0]
1008 IEM_SAVE_FLAGS A1, %2, %3
1009 EPILOGUE_2_ARGS
1010ENDPROC iemAImpl_ %+ %1 %+ _u64
1011
1012BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 8
1013 PROLOGUE_2_ARGS
1014 IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1015 lock %1 qword [A0]
1016 IEM_SAVE_FLAGS A1, %2, %3
1017 EPILOGUE_2_ARGS
1018ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
1019 %else
1020 ; stub them for now.
1021BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 8
1022 int3
1023 ret 0
1024ENDPROC iemAImpl_ %+ %1 %+ _u64
1025BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 8
1026 int3
1027 ret 0
1028ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
1029 %endif
1030
1031%endmacro
1032
1033IEMIMPL_UNARY_OP inc, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF), 0
1034IEMIMPL_UNARY_OP dec, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF), 0
1035IEMIMPL_UNARY_OP neg, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF), 0
1036IEMIMPL_UNARY_OP not, 0, 0
1037
1038
1039
1040;;
1041; Macro for implementing a shift operation.
1042;
1043; This will generate code for the 8, 16, 32 and 64 bit accesses, except on
1044; 32-bit system where the 64-bit accesses requires hand coding.
1045;
1046; All the functions takes a pointer to the destination memory operand in A0,
1047; the shift count in A1 and a pointer to eflags in A2.
1048;
1049; @param 1 The instruction mnemonic.
1050; @param 2 The modified flags.
1051; @param 3 The undefined flags.
1052;
1053; Makes ASSUMPTIONS about A0, A1 and A2 assignments.
1054;
1055%macro IEMIMPL_SHIFT_OP 3
1056BEGINCODE
1057BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 12
1058 PROLOGUE_3_ARGS
1059 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1060 %ifdef ASM_CALL64_GCC
1061 mov cl, A1_8
1062 %1 byte [A0], cl
1063 %else
1064 xchg A1, A0
1065 %1 byte [A1], cl
1066 %endif
1067 IEM_SAVE_FLAGS A2, %2, %3
1068 EPILOGUE_3_ARGS
1069ENDPROC iemAImpl_ %+ %1 %+ _u8
1070
1071BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
1072 PROLOGUE_3_ARGS
1073 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1074 %ifdef ASM_CALL64_GCC
1075 mov cl, A1_8
1076 %1 word [A0], cl
1077 %else
1078 xchg A1, A0
1079 %1 word [A1], cl
1080 %endif
1081 IEM_SAVE_FLAGS A2, %2, %3
1082 EPILOGUE_3_ARGS
1083ENDPROC iemAImpl_ %+ %1 %+ _u16
1084
1085BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
1086 PROLOGUE_3_ARGS
1087 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1088 %ifdef ASM_CALL64_GCC
1089 mov cl, A1_8
1090 %1 dword [A0], cl
1091 %else
1092 xchg A1, A0
1093 %1 dword [A1], cl
1094 %endif
1095 IEM_SAVE_FLAGS A2, %2, %3
1096 EPILOGUE_3_ARGS
1097ENDPROC iemAImpl_ %+ %1 %+ _u32
1098
1099 %ifdef RT_ARCH_AMD64
1100BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 12
1101 PROLOGUE_3_ARGS
1102 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1103 %ifdef ASM_CALL64_GCC
1104 mov cl, A1_8
1105 %1 qword [A0], cl
1106 %else
1107 xchg A1, A0
1108 %1 qword [A1], cl
1109 %endif
1110 IEM_SAVE_FLAGS A2, %2, %3
1111 EPILOGUE_3_ARGS
1112ENDPROC iemAImpl_ %+ %1 %+ _u64
1113 %else ; stub it for now - later, replace with hand coded stuff.
1114BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 12
1115 int3
1116 ret 4
1117ENDPROC iemAImpl_ %+ %1 %+ _u64
1118 %endif ; !RT_ARCH_AMD64
1119
1120%endmacro
1121
1122IEMIMPL_SHIFT_OP rol, (X86_EFL_OF | X86_EFL_CF), 0
1123IEMIMPL_SHIFT_OP ror, (X86_EFL_OF | X86_EFL_CF), 0
1124IEMIMPL_SHIFT_OP rcl, (X86_EFL_OF | X86_EFL_CF), 0
1125IEMIMPL_SHIFT_OP rcr, (X86_EFL_OF | X86_EFL_CF), 0
1126IEMIMPL_SHIFT_OP shl, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF)
1127IEMIMPL_SHIFT_OP shr, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF)
1128IEMIMPL_SHIFT_OP sar, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF)
1129
1130
1131;;
1132; Macro for implementing a double precision shift operation.
1133;
1134; This will generate code for the 16, 32 and 64 bit accesses, except on
1135; 32-bit system where the 64-bit accesses requires hand coding.
1136;
1137; The functions takes the destination operand (r/m) in A0, the source (reg) in
1138; A1, the shift count in A2 and a pointer to the eflags variable/register in A3.
1139;
1140; @param 1 The instruction mnemonic.
1141; @param 2 The modified flags.
1142; @param 3 The undefined flags.
1143;
1144; Makes ASSUMPTIONS about A0, A1, A2 and A3 assignments.
1145;
1146%macro IEMIMPL_SHIFT_DBL_OP 3
1147BEGINCODE
1148BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 16
1149 PROLOGUE_4_ARGS
1150 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1151 %ifdef ASM_CALL64_GCC
1152 xchg A3, A2
1153 %1 [A0], A1_16, cl
1154 xchg A3, A2
1155 %else
1156 xchg A0, A2
1157 %1 [A2], A1_16, cl
1158 %endif
1159 IEM_SAVE_FLAGS A3, %2, %3
1160 EPILOGUE_4_ARGS
1161ENDPROC iemAImpl_ %+ %1 %+ _u16
1162
1163BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 16
1164 PROLOGUE_4_ARGS
1165 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1166 %ifdef ASM_CALL64_GCC
1167 xchg A3, A2
1168 %1 [A0], A1_32, cl
1169 xchg A3, A2
1170 %else
1171 xchg A0, A2
1172 %1 [A2], A1_32, cl
1173 %endif
1174 IEM_SAVE_FLAGS A3, %2, %3
1175 EPILOGUE_4_ARGS
1176ENDPROC iemAImpl_ %+ %1 %+ _u32
1177
1178 %ifdef RT_ARCH_AMD64
1179BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
1180 PROLOGUE_4_ARGS
1181 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1182 %ifdef ASM_CALL64_GCC
1183 xchg A3, A2
1184 %1 [A0], A1, cl
1185 xchg A3, A2
1186 %else
1187 xchg A0, A2
1188 %1 [A2], A1, cl
1189 %endif
1190 IEM_SAVE_FLAGS A3, %2, %3
1191 EPILOGUE_4_ARGS_EX 12
1192ENDPROC iemAImpl_ %+ %1 %+ _u64
1193 %else ; stub it for now - later, replace with hand coded stuff.
1194BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
1195 int3
1196 ret 12
1197ENDPROC iemAImpl_ %+ %1 %+ _u64
1198 %endif ; !RT_ARCH_AMD64
1199
1200%endmacro
1201
1202IEMIMPL_SHIFT_DBL_OP shld, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF)
1203IEMIMPL_SHIFT_DBL_OP shrd, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_PF | X86_EFL_CF), (X86_EFL_AF)
1204
1205
1206;;
1207; Macro for implementing a multiplication operations.
1208;
1209; This will generate code for the 8, 16, 32 and 64 bit accesses, except on
1210; 32-bit system where the 64-bit accesses requires hand coding.
1211;
1212; The 8-bit function only operates on AX, so it takes no DX pointer. The other
1213; functions takes a pointer to rAX in A0, rDX in A1, the operand in A2 and a
1214; pointer to eflags in A3.
1215;
1216; The functions all return 0 so the caller can be used for div/idiv as well as
1217; for the mul/imul implementation.
1218;
1219; @param 1 The instruction mnemonic.
1220; @param 2 The modified flags.
1221; @param 3 The undefined flags.
1222;
1223; Makes ASSUMPTIONS about A0, A1, A2, A3, T0 and T1 assignments.
1224;
1225%macro IEMIMPL_MUL_OP 3
1226BEGINCODE
1227BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 12
1228 PROLOGUE_3_ARGS
1229 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1230 mov al, [A0]
1231 %1 A1_8
1232 mov [A0], ax
1233 IEM_SAVE_FLAGS A2, %2, %3
1234 xor eax, eax
1235 EPILOGUE_3_ARGS
1236ENDPROC iemAImpl_ %+ %1 %+ _u8
1237
1238BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 16
1239 PROLOGUE_4_ARGS
1240 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1241 mov ax, [A0]
1242 %ifdef ASM_CALL64_GCC
1243 %1 A2_16
1244 mov [A0], ax
1245 mov [A1], dx
1246 %else
1247 mov T1, A1
1248 %1 A2_16
1249 mov [A0], ax
1250 mov [T1], dx
1251 %endif
1252 IEM_SAVE_FLAGS A3, %2, %3
1253 xor eax, eax
1254 EPILOGUE_4_ARGS
1255ENDPROC iemAImpl_ %+ %1 %+ _u16
1256
1257BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 16
1258 PROLOGUE_4_ARGS
1259 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1260 mov eax, [A0]
1261 %ifdef ASM_CALL64_GCC
1262 %1 A2_32
1263 mov [A0], eax
1264 mov [A1], edx
1265 %else
1266 mov T1, A1
1267 %1 A2_32
1268 mov [A0], eax
1269 mov [T1], edx
1270 %endif
1271 IEM_SAVE_FLAGS A3, %2, %3
1272 xor eax, eax
1273 EPILOGUE_4_ARGS
1274ENDPROC iemAImpl_ %+ %1 %+ _u32
1275
1276 %ifdef RT_ARCH_AMD64
1277BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
1278 PROLOGUE_4_ARGS
1279 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1280 mov rax, [A0]
1281 %ifdef ASM_CALL64_GCC
1282 %1 A2
1283 mov [A0], rax
1284 mov [A1], rdx
1285 %else
1286 mov T1, A1
1287 %1 A2
1288 mov [A0], rax
1289 mov [T1], rdx
1290 %endif
1291 IEM_SAVE_FLAGS A3, %2, %3
1292 xor eax, eax
1293 EPILOGUE_4_ARGS_EX 12
1294ENDPROC iemAImpl_ %+ %1 %+ _u64
1295 %else ; stub it for now - later, replace with hand coded stuff.
1296BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
1297 int3
1298 ret 12
1299ENDPROC iemAImpl_ %+ %1 %+ _u64
1300 %endif ; !RT_ARCH_AMD64
1301
1302%endmacro
1303
1304IEMIMPL_MUL_OP mul, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
1305IEMIMPL_MUL_OP imul, (X86_EFL_OF | X86_EFL_CF), (X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF)
1306
1307
1308;;
1309; Macro for implementing a division operations.
1310;
1311; This will generate code for the 8, 16, 32 and 64 bit accesses, except on
1312; 32-bit system where the 64-bit accesses requires hand coding.
1313;
1314; The 8-bit function only operates on AX, so it takes no DX pointer. The other
1315; functions takes a pointer to rAX in A0, rDX in A1, the operand in A2 and a
1316; pointer to eflags in A3.
1317;
1318; The functions all return 0 on success and -1 if a divide error should be
1319; raised by the caller.
1320;
1321; @param 1 The instruction mnemonic.
1322; @param 2 The modified flags.
1323; @param 3 The undefined flags.
1324;
1325; Makes ASSUMPTIONS about A0, A1, A2, A3, T0 and T1 assignments.
1326;
1327%macro IEMIMPL_DIV_OP 3
1328BEGINCODE
1329BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 12
1330 PROLOGUE_3_ARGS
1331
1332 test A1_8, A1_8
1333 jz .div_zero
1334 ;; @todo test for overflow
1335
1336 IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1337 mov ax, [A0]
1338 %1 A1_8
1339 mov [A0], ax
1340 IEM_SAVE_FLAGS A2, %2, %3
1341 xor eax, eax
1342
1343.return:
1344 EPILOGUE_3_ARGS
1345
1346.div_zero:
1347 mov eax, -1
1348 jmp .return
1349ENDPROC iemAImpl_ %+ %1 %+ _u8
1350
1351BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 16
1352 PROLOGUE_4_ARGS
1353
1354 test A1_16, A1_16
1355 jz .div_zero
1356 ;; @todo test for overflow
1357
1358 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1359 %ifdef ASM_CALL64_GCC
1360 mov T1, A2
1361 mov ax, [A0]
1362 mov dx, [A1]
1363 %1 T1_16
1364 mov [A0], ax
1365 mov [A1], dx
1366 %else
1367 mov T1, A1
1368 mov ax, [A0]
1369 mov dx, [T1]
1370 %1 A2_16
1371 mov [A0], ax
1372 mov [T1], dx
1373 %endif
1374 IEM_SAVE_FLAGS A3, %2, %3
1375 xor eax, eax
1376
1377.return:
1378 EPILOGUE_4_ARGS
1379
1380.div_zero:
1381 mov eax, -1
1382 jmp .return
1383ENDPROC iemAImpl_ %+ %1 %+ _u16
1384
1385BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 16
1386 PROLOGUE_4_ARGS
1387
1388 test A1_32, A1_32
1389 jz .div_zero
1390 ;; @todo test for overflow
1391
1392 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1393 mov eax, [A0]
1394 %ifdef ASM_CALL64_GCC
1395 mov T1, A2
1396 mov eax, [A0]
1397 mov edx, [A1]
1398 %1 T1_32
1399 mov [A0], eax
1400 mov [A1], edx
1401 %else
1402 mov T1, A1
1403 mov eax, [A0]
1404 mov edx, [T1]
1405 %1 A2_32
1406 mov [A0], eax
1407 mov [T1], edx
1408 %endif
1409 IEM_SAVE_FLAGS A3, %2, %3
1410 xor eax, eax
1411
1412.return:
1413 EPILOGUE_4_ARGS
1414
1415.div_zero:
1416 mov eax, -1
1417 jmp .return
1418ENDPROC iemAImpl_ %+ %1 %+ _u32
1419
1420 %ifdef RT_ARCH_AMD64
1421BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
1422 PROLOGUE_4_ARGS
1423
1424 test A1, A1
1425 jz .div_zero
1426 ;; @todo test for overflow
1427
1428 IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1429 mov rax, [A0]
1430 %ifdef ASM_CALL64_GCC
1431 mov T1, A2
1432 mov rax, [A0]
1433 mov rdx, [A1]
1434 %1 T1
1435 mov [A0], rax
1436 mov [A1], rdx
1437 %else
1438 mov T1, A1
1439 mov rax, [A0]
1440 mov rdx, [T1]
1441 %1 A2
1442 mov [A0], rax
1443 mov [T1], rdx
1444 %endif
1445 IEM_SAVE_FLAGS A3, %2, %3
1446 xor eax, eax
1447
1448.return:
1449 EPILOGUE_4_ARGS_EX 12
1450
1451.div_zero:
1452 mov eax, -1
1453 jmp .return
1454ENDPROC iemAImpl_ %+ %1 %+ _u64
1455 %else ; stub it for now - later, replace with hand coded stuff.
1456BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
1457 int3
1458 ret
1459ENDPROC iemAImpl_ %+ %1 %+ _u64
1460 %endif ; !RT_ARCH_AMD64
1461
1462%endmacro
1463
1464IEMIMPL_DIV_OP div, 0, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF)
1465IEMIMPL_DIV_OP idiv, 0, (X86_EFL_OF | X86_EFL_SF | X86_EFL_ZF | X86_EFL_AF | X86_EFL_PF | X86_EFL_CF)
1466
1467
1468;
1469; BSWAP. No flag changes.
1470;
1471; Each function takes one argument, pointer to the value to bswap
1472; (input/output). They all return void.
1473;
1474BEGINPROC_FASTCALL iemAImpl_bswap_u16, 4
1475 PROLOGUE_1_ARGS
1476 mov T0_32, [A0] ; just in case any of the upper bits are used.
1477 db 66h
1478 bswap T0_32
1479 mov [A0], T0_32
1480 EPILOGUE_1_ARGS
1481ENDPROC iemAImpl_bswap_u16
1482
1483BEGINPROC_FASTCALL iemAImpl_bswap_u32, 4
1484 PROLOGUE_1_ARGS
1485 mov T0_32, [A0]
1486 bswap T0_32
1487 mov [A0], T0_32
1488 EPILOGUE_1_ARGS
1489ENDPROC iemAImpl_bswap_u32
1490
1491BEGINPROC_FASTCALL iemAImpl_bswap_u64, 4
1492%ifdef RT_ARCH_AMD64
1493 PROLOGUE_1_ARGS
1494 mov T0, [A0]
1495 bswap T0
1496 mov [A0], T0
1497 EPILOGUE_1_ARGS
1498%else
1499 PROLOGUE_1_ARGS
1500 mov T0, [A0]
1501 mov T1, [A0 + 4]
1502 bswap T0
1503 bswap T1
1504 mov [A0 + 4], T0
1505 mov [A0], T1
1506 EPILOGUE_1_ARGS
1507%endif
1508ENDPROC iemAImpl_bswap_u64
1509
1510
1511;;
1512; Initialize the FPU for the actual instruction being emulated, this means
1513; loading parts of the guest's control word and status word.
1514;
1515; @uses 24 bytes of stack.
1516; @param 1 Expression giving the address of the FXSTATE of the guest.
1517;
1518%macro FPU_LD_FXSTATE_FCW_AND_SAFE_FSW 1
1519 fnstenv [xSP]
1520
1521 ; FCW - for exception, precision and rounding control.
1522 movzx T0, word [%1 + X86FXSTATE.FCW]
1523 and T0, X86_FCW_MASK_ALL | X86_FCW_PC_MASK | X86_FCW_RC_MASK
1524 mov [xSP + X86FSTENV32P.FCW], T0_16
1525
1526 ; FSW - for undefined C0, C1, C2, and C3.
1527 movzx T1, word [%1 + X86FXSTATE.FSW]
1528 and T1, X86_FSW_C_MASK
1529 movzx T0, word [xSP + X86FSTENV32P.FSW]
1530 and T0, X86_FSW_TOP_MASK
1531 or T0, T1
1532 mov [xSP + X86FSTENV32P.FSW], T0_16
1533
1534 fldenv [xSP]
1535%endmacro
1536
1537
1538;;
1539; Need to move this as well somewhere better?
1540;
1541struc IEMFPURESULT
1542 .r80Result resw 5
1543 .FSW resw 1
1544endstruc
1545
1546
1547;;
1548; Need to move this as well somewhere better?
1549;
1550struc IEMFPURESULTTWO
1551 .r80Result1 resw 5
1552 .FSW resw 1
1553 .r80Result2 resw 5
1554endstruc
1555
1556
1557;
1558;---------------------- 16-bit signed integer operations ----------------------
1559;
1560
1561
1562;;
1563; Converts a 16-bit floating point value to a 80-bit one (fpu register).
1564;
1565; @param A0 FPU context (fxsave).
1566; @param A1 Pointer to a IEMFPURESULT for the output.
1567; @param A2 Pointer to the 16-bit floating point value to convert.
1568;
1569BEGINPROC_FASTCALL iemAImpl_fild_i16_to_r80, 12
1570 PROLOGUE_3_ARGS
1571 sub xSP, 20h
1572
1573 fninit
1574 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1575 fild word [A2]
1576
1577 fnstsw word [A1 + IEMFPURESULT.FSW]
1578 fnclex
1579 fstp tword [A1 + IEMFPURESULT.r80Result]
1580
1581 fninit
1582 add xSP, 20h
1583 EPILOGUE_3_ARGS
1584ENDPROC iemAImpl_fild_i16_to_r80
1585
1586
1587;;
1588; Store a 80-bit floating point value (register) as a 16-bit signed integer (memory).
1589;
1590; @param A0 FPU context (fxsave).
1591; @param A1 Where to return the output FSW.
1592; @param A2 Where to store the 16-bit signed integer value.
1593; @param A3 Pointer to the 80-bit value.
1594;
1595BEGINPROC_FASTCALL iemAImpl_fist_r80_to_i16, 16
1596 PROLOGUE_4_ARGS
1597 sub xSP, 20h
1598
1599 fninit
1600 fld tword [A3]
1601 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1602 fistp word [A2]
1603
1604 fnstsw word [A1]
1605
1606 fninit
1607 add xSP, 20h
1608 EPILOGUE_4_ARGS
1609ENDPROC iemAImpl_fist_r80_to_i16
1610
1611
1612;;
1613; Store a 80-bit floating point value (register) as a 16-bit signed integer
1614; (memory) with truncation.
1615;
1616; @param A0 FPU context (fxsave).
1617; @param A1 Where to return the output FSW.
1618; @param A2 Where to store the 16-bit signed integer value.
1619; @param A3 Pointer to the 80-bit value.
1620;
1621BEGINPROC_FASTCALL iemAImpl_fistt_r80_to_i16, 16
1622 PROLOGUE_4_ARGS
1623 sub xSP, 20h
1624
1625 fninit
1626 fld tword [A3]
1627 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1628 fisttp dword [A2]
1629
1630 fnstsw word [A1]
1631
1632 fninit
1633 add xSP, 20h
1634 EPILOGUE_4_ARGS
1635ENDPROC iemAImpl_fistt_r80_to_i16
1636
1637
1638;;
1639; FPU instruction working on one 80-bit and one 16-bit signed integer value.
1640;
1641; @param 1 The instruction
1642;
1643; @param A0 FPU context (fxsave).
1644; @param A1 Pointer to a IEMFPURESULT for the output.
1645; @param A2 Pointer to the 80-bit value.
1646; @param A3 Pointer to the 16-bit value.
1647;
1648%macro IEMIMPL_FPU_R80_BY_I16 1
1649BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i16, 16
1650 PROLOGUE_4_ARGS
1651 sub xSP, 20h
1652
1653 fninit
1654 fld tword [A2]
1655 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1656 %1 word [A3]
1657
1658 fnstsw word [A1 + IEMFPURESULT.FSW]
1659 fnclex
1660 fstp tword [A1 + IEMFPURESULT.r80Result]
1661
1662 fninit
1663 add xSP, 20h
1664 EPILOGUE_4_ARGS
1665ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i16
1666%endmacro
1667
1668IEMIMPL_FPU_R80_BY_I16 fiadd
1669IEMIMPL_FPU_R80_BY_I16 fimul
1670IEMIMPL_FPU_R80_BY_I16 fisub
1671IEMIMPL_FPU_R80_BY_I16 fisubr
1672IEMIMPL_FPU_R80_BY_I16 fidiv
1673IEMIMPL_FPU_R80_BY_I16 fidivr
1674
1675
1676;;
1677; FPU instruction working on one 80-bit and one 16-bit signed integer value,
1678; only returning FSW.
1679;
1680; @param 1 The instruction
1681;
1682; @param A0 FPU context (fxsave).
1683; @param A1 Where to store the output FSW.
1684; @param A2 Pointer to the 80-bit value.
1685; @param A3 Pointer to the 64-bit value.
1686;
1687%macro IEMIMPL_FPU_R80_BY_I16_FSW 1
1688BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i16, 16
1689 PROLOGUE_4_ARGS
1690 sub xSP, 20h
1691
1692 fninit
1693 fld tword [A2]
1694 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1695 %1 word [A3]
1696
1697 fnstsw word [A1]
1698
1699 fninit
1700 add xSP, 20h
1701 EPILOGUE_4_ARGS
1702ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i16
1703%endmacro
1704
1705IEMIMPL_FPU_R80_BY_I16_FSW ficom
1706
1707
1708
1709;
1710;---------------------- 32-bit signed integer operations ----------------------
1711;
1712
1713
1714;;
1715; Converts a 32-bit floating point value to a 80-bit one (fpu register).
1716;
1717; @param A0 FPU context (fxsave).
1718; @param A1 Pointer to a IEMFPURESULT for the output.
1719; @param A2 Pointer to the 32-bit floating point value to convert.
1720;
1721BEGINPROC_FASTCALL iemAImpl_fild_i32_to_r80, 12
1722 PROLOGUE_3_ARGS
1723 sub xSP, 20h
1724
1725 fninit
1726 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1727 fild dword [A2]
1728
1729 fnstsw word [A1 + IEMFPURESULT.FSW]
1730 fnclex
1731 fstp tword [A1 + IEMFPURESULT.r80Result]
1732
1733 fninit
1734 add xSP, 20h
1735 EPILOGUE_3_ARGS
1736ENDPROC iemAImpl_fild_i32_to_r80
1737
1738
1739;;
1740; Store a 80-bit floating point value (register) as a 32-bit signed integer (memory).
1741;
1742; @param A0 FPU context (fxsave).
1743; @param A1 Where to return the output FSW.
1744; @param A2 Where to store the 32-bit signed integer value.
1745; @param A3 Pointer to the 80-bit value.
1746;
1747BEGINPROC_FASTCALL iemAImpl_fist_r80_to_i32, 16
1748 PROLOGUE_4_ARGS
1749 sub xSP, 20h
1750
1751 fninit
1752 fld tword [A3]
1753 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1754 fistp dword [A2]
1755
1756 fnstsw word [A1]
1757
1758 fninit
1759 add xSP, 20h
1760 EPILOGUE_4_ARGS
1761ENDPROC iemAImpl_fist_r80_to_i32
1762
1763
1764;;
1765; Store a 80-bit floating point value (register) as a 32-bit signed integer
1766; (memory) with truncation.
1767;
1768; @param A0 FPU context (fxsave).
1769; @param A1 Where to return the output FSW.
1770; @param A2 Where to store the 32-bit signed integer value.
1771; @param A3 Pointer to the 80-bit value.
1772;
1773BEGINPROC_FASTCALL iemAImpl_fistt_r80_to_i32, 16
1774 PROLOGUE_4_ARGS
1775 sub xSP, 20h
1776
1777 fninit
1778 fld tword [A3]
1779 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1780 fisttp dword [A2]
1781
1782 fnstsw word [A1]
1783
1784 fninit
1785 add xSP, 20h
1786 EPILOGUE_4_ARGS
1787ENDPROC iemAImpl_fistt_r80_to_i32
1788
1789
1790;;
1791; FPU instruction working on one 80-bit and one 32-bit signed integer value.
1792;
1793; @param 1 The instruction
1794;
1795; @param A0 FPU context (fxsave).
1796; @param A1 Pointer to a IEMFPURESULT for the output.
1797; @param A2 Pointer to the 80-bit value.
1798; @param A3 Pointer to the 32-bit value.
1799;
1800%macro IEMIMPL_FPU_R80_BY_I32 1
1801BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i32, 16
1802 PROLOGUE_4_ARGS
1803 sub xSP, 20h
1804
1805 fninit
1806 fld tword [A2]
1807 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1808 %1 dword [A3]
1809
1810 fnstsw word [A1 + IEMFPURESULT.FSW]
1811 fnclex
1812 fstp tword [A1 + IEMFPURESULT.r80Result]
1813
1814 fninit
1815 add xSP, 20h
1816 EPILOGUE_4_ARGS
1817ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i32
1818%endmacro
1819
1820IEMIMPL_FPU_R80_BY_I32 fiadd
1821IEMIMPL_FPU_R80_BY_I32 fimul
1822IEMIMPL_FPU_R80_BY_I32 fisub
1823IEMIMPL_FPU_R80_BY_I32 fisubr
1824IEMIMPL_FPU_R80_BY_I32 fidiv
1825IEMIMPL_FPU_R80_BY_I32 fidivr
1826
1827
1828;;
1829; FPU instruction working on one 80-bit and one 32-bit signed integer value,
1830; only returning FSW.
1831;
1832; @param 1 The instruction
1833;
1834; @param A0 FPU context (fxsave).
1835; @param A1 Where to store the output FSW.
1836; @param A2 Pointer to the 80-bit value.
1837; @param A3 Pointer to the 64-bit value.
1838;
1839%macro IEMIMPL_FPU_R80_BY_I32_FSW 1
1840BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i32, 16
1841 PROLOGUE_4_ARGS
1842 sub xSP, 20h
1843
1844 fninit
1845 fld tword [A2]
1846 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1847 %1 dword [A3]
1848
1849 fnstsw word [A1]
1850
1851 fninit
1852 add xSP, 20h
1853 EPILOGUE_4_ARGS
1854ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i32
1855%endmacro
1856
1857IEMIMPL_FPU_R80_BY_I32_FSW ficom
1858
1859
1860
1861;
1862;---------------------- 64-bit signed integer operations ----------------------
1863;
1864
1865
1866;;
1867; Converts a 64-bit floating point value to a 80-bit one (fpu register).
1868;
1869; @param A0 FPU context (fxsave).
1870; @param A1 Pointer to a IEMFPURESULT for the output.
1871; @param A2 Pointer to the 64-bit floating point value to convert.
1872;
1873BEGINPROC_FASTCALL iemAImpl_fild_i64_to_r80, 12
1874 PROLOGUE_3_ARGS
1875 sub xSP, 20h
1876
1877 fninit
1878 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1879 fild qword [A2]
1880
1881 fnstsw word [A1 + IEMFPURESULT.FSW]
1882 fnclex
1883 fstp tword [A1 + IEMFPURESULT.r80Result]
1884
1885 fninit
1886 add xSP, 20h
1887 EPILOGUE_3_ARGS
1888ENDPROC iemAImpl_fild_i64_to_r80
1889
1890
1891;;
1892; Store a 80-bit floating point value (register) as a 64-bit signed integer (memory).
1893;
1894; @param A0 FPU context (fxsave).
1895; @param A1 Where to return the output FSW.
1896; @param A2 Where to store the 64-bit signed integer value.
1897; @param A3 Pointer to the 80-bit value.
1898;
1899BEGINPROC_FASTCALL iemAImpl_fist_r80_to_i64, 16
1900 PROLOGUE_4_ARGS
1901 sub xSP, 20h
1902
1903 fninit
1904 fld tword [A3]
1905 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1906 fistp qword [A2]
1907
1908 fnstsw word [A1]
1909
1910 fninit
1911 add xSP, 20h
1912 EPILOGUE_4_ARGS
1913ENDPROC iemAImpl_fist_r80_to_i64
1914
1915
1916;;
1917; Store a 80-bit floating point value (register) as a 64-bit signed integer
1918; (memory) with truncation.
1919;
1920; @param A0 FPU context (fxsave).
1921; @param A1 Where to return the output FSW.
1922; @param A2 Where to store the 64-bit signed integer value.
1923; @param A3 Pointer to the 80-bit value.
1924;
1925BEGINPROC_FASTCALL iemAImpl_fistt_r80_to_i64, 16
1926 PROLOGUE_4_ARGS
1927 sub xSP, 20h
1928
1929 fninit
1930 fld tword [A3]
1931 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1932 fisttp qword [A2]
1933
1934 fnstsw word [A1]
1935
1936 fninit
1937 add xSP, 20h
1938 EPILOGUE_4_ARGS
1939ENDPROC iemAImpl_fistt_r80_to_i64
1940
1941
1942
1943;
1944;---------------------- 32-bit floating point operations ----------------------
1945;
1946
1947;;
1948; Converts a 32-bit floating point value to a 80-bit one (fpu register).
1949;
1950; @param A0 FPU context (fxsave).
1951; @param A1 Pointer to a IEMFPURESULT for the output.
1952; @param A2 Pointer to the 32-bit floating point value to convert.
1953;
1954BEGINPROC_FASTCALL iemAImpl_fld_r32_to_r80, 12
1955 PROLOGUE_3_ARGS
1956 sub xSP, 20h
1957
1958 fninit
1959 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1960 fld dword [A2]
1961
1962 fnstsw word [A1 + IEMFPURESULT.FSW]
1963 fnclex
1964 fstp tword [A1 + IEMFPURESULT.r80Result]
1965
1966 fninit
1967 add xSP, 20h
1968 EPILOGUE_3_ARGS
1969ENDPROC iemAImpl_fld_r32_to_r80
1970
1971
1972;;
1973; Store a 80-bit floating point value (register) as a 32-bit one (memory).
1974;
1975; @param A0 FPU context (fxsave).
1976; @param A1 Where to return the output FSW.
1977; @param A2 Where to store the 32-bit value.
1978; @param A3 Pointer to the 80-bit value.
1979;
1980BEGINPROC_FASTCALL iemAImpl_fst_r80_to_r32, 16
1981 PROLOGUE_4_ARGS
1982 sub xSP, 20h
1983
1984 fninit
1985 fld tword [A3]
1986 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1987 fst dword [A2]
1988
1989 fnstsw word [A1]
1990
1991 fninit
1992 add xSP, 20h
1993 EPILOGUE_4_ARGS
1994ENDPROC iemAImpl_fst_r80_to_r32
1995
1996
1997;;
1998; FPU instruction working on one 80-bit and one 32-bit floating point value.
1999;
2000; @param 1 The instruction
2001;
2002; @param A0 FPU context (fxsave).
2003; @param A1 Pointer to a IEMFPURESULT for the output.
2004; @param A2 Pointer to the 80-bit value.
2005; @param A3 Pointer to the 32-bit value.
2006;
2007%macro IEMIMPL_FPU_R80_BY_R32 1
2008BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r32, 16
2009 PROLOGUE_4_ARGS
2010 sub xSP, 20h
2011
2012 fninit
2013 fld tword [A2]
2014 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2015 %1 dword [A3]
2016
2017 fnstsw word [A1 + IEMFPURESULT.FSW]
2018 fnclex
2019 fstp tword [A1 + IEMFPURESULT.r80Result]
2020
2021 fninit
2022 add xSP, 20h
2023 EPILOGUE_4_ARGS
2024ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r32
2025%endmacro
2026
2027IEMIMPL_FPU_R80_BY_R32 fadd
2028IEMIMPL_FPU_R80_BY_R32 fmul
2029IEMIMPL_FPU_R80_BY_R32 fsub
2030IEMIMPL_FPU_R80_BY_R32 fsubr
2031IEMIMPL_FPU_R80_BY_R32 fdiv
2032IEMIMPL_FPU_R80_BY_R32 fdivr
2033
2034
2035;;
2036; FPU instruction working on one 80-bit and one 32-bit floating point value,
2037; only returning FSW.
2038;
2039; @param 1 The instruction
2040;
2041; @param A0 FPU context (fxsave).
2042; @param A1 Where to store the output FSW.
2043; @param A2 Pointer to the 80-bit value.
2044; @param A3 Pointer to the 64-bit value.
2045;
2046%macro IEMIMPL_FPU_R80_BY_R32_FSW 1
2047BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r32, 16
2048 PROLOGUE_4_ARGS
2049 sub xSP, 20h
2050
2051 fninit
2052 fld tword [A2]
2053 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2054 %1 dword [A3]
2055
2056 fnstsw word [A1]
2057
2058 fninit
2059 add xSP, 20h
2060 EPILOGUE_4_ARGS
2061ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r32
2062%endmacro
2063
2064IEMIMPL_FPU_R80_BY_R32_FSW fcom
2065
2066
2067
2068;
2069;---------------------- 64-bit floating point operations ----------------------
2070;
2071
2072;;
2073; Converts a 64-bit floating point value to a 80-bit one (fpu register).
2074;
2075; @param A0 FPU context (fxsave).
2076; @param A1 Pointer to a IEMFPURESULT for the output.
2077; @param A2 Pointer to the 64-bit floating point value to convert.
2078;
2079BEGINPROC_FASTCALL iemAImpl_fld_r64_to_r80, 12
2080 PROLOGUE_3_ARGS
2081 sub xSP, 20h
2082
2083 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2084 fld qword [A2]
2085
2086 fnstsw word [A1 + IEMFPURESULT.FSW]
2087 fnclex
2088 fstp tword [A1 + IEMFPURESULT.r80Result]
2089
2090 fninit
2091 add xSP, 20h
2092 EPILOGUE_3_ARGS
2093ENDPROC iemAImpl_fld_r64_to_r80
2094
2095
2096;;
2097; Store a 80-bit floating point value (register) as a 64-bit one (memory).
2098;
2099; @param A0 FPU context (fxsave).
2100; @param A1 Where to return the output FSW.
2101; @param A2 Where to store the 64-bit value.
2102; @param A3 Pointer to the 80-bit value.
2103;
2104BEGINPROC_FASTCALL iemAImpl_fst_r80_to_r64, 16
2105 PROLOGUE_4_ARGS
2106 sub xSP, 20h
2107
2108 fninit
2109 fld tword [A3]
2110 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2111 fst qword [A2]
2112
2113 fnstsw word [A1]
2114
2115 fninit
2116 add xSP, 20h
2117 EPILOGUE_4_ARGS
2118ENDPROC iemAImpl_fst_r80_to_r64
2119
2120
2121;;
2122; FPU instruction working on one 80-bit and one 64-bit floating point value.
2123;
2124; @param 1 The instruction
2125;
2126; @param A0 FPU context (fxsave).
2127; @param A1 Pointer to a IEMFPURESULT for the output.
2128; @param A2 Pointer to the 80-bit value.
2129; @param A3 Pointer to the 64-bit value.
2130;
2131%macro IEMIMPL_FPU_R80_BY_R64 1
2132BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r64, 16
2133 PROLOGUE_4_ARGS
2134 sub xSP, 20h
2135
2136 fninit
2137 fld tword [A2]
2138 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2139 %1 qword [A3]
2140
2141 fnstsw word [A1 + IEMFPURESULT.FSW]
2142 fnclex
2143 fstp tword [A1 + IEMFPURESULT.r80Result]
2144
2145 fninit
2146 add xSP, 20h
2147 EPILOGUE_4_ARGS
2148ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r64
2149%endmacro
2150
2151IEMIMPL_FPU_R80_BY_R64 fadd
2152IEMIMPL_FPU_R80_BY_R64 fmul
2153IEMIMPL_FPU_R80_BY_R64 fsub
2154IEMIMPL_FPU_R80_BY_R64 fsubr
2155IEMIMPL_FPU_R80_BY_R64 fdiv
2156IEMIMPL_FPU_R80_BY_R64 fdivr
2157
2158;;
2159; FPU instruction working on one 80-bit and one 64-bit floating point value,
2160; only returning FSW.
2161;
2162; @param 1 The instruction
2163;
2164; @param A0 FPU context (fxsave).
2165; @param A1 Where to store the output FSW.
2166; @param A2 Pointer to the 80-bit value.
2167; @param A3 Pointer to the 64-bit value.
2168;
2169%macro IEMIMPL_FPU_R80_BY_R64_FSW 1
2170BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r64, 16
2171 PROLOGUE_4_ARGS
2172 sub xSP, 20h
2173
2174 fninit
2175 fld tword [A2]
2176 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2177 %1 qword [A3]
2178
2179 fnstsw word [A1]
2180
2181 fninit
2182 add xSP, 20h
2183 EPILOGUE_4_ARGS
2184ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r64
2185%endmacro
2186
2187IEMIMPL_FPU_R80_BY_R64_FSW fcom
2188
2189
2190
2191;
2192;---------------------- 80-bit floating point operations ----------------------
2193;
2194
2195;;
2196; Loads a 80-bit floating point register value from memory.
2197;
2198; @param A0 FPU context (fxsave).
2199; @param A1 Pointer to a IEMFPURESULT for the output.
2200; @param A2 Pointer to the 80-bit floating point value to load.
2201;
2202BEGINPROC_FASTCALL iemAImpl_fld_r80_from_r80, 12
2203 PROLOGUE_3_ARGS
2204 sub xSP, 20h
2205
2206 fninit
2207 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2208 fld tword [A2]
2209
2210 fnstsw word [A1 + IEMFPURESULT.FSW]
2211 fnclex
2212 fstp tword [A1 + IEMFPURESULT.r80Result]
2213
2214 fninit
2215 add xSP, 20h
2216 EPILOGUE_3_ARGS
2217ENDPROC iemAImpl_fld_r80_from_r80
2218
2219
2220;;
2221; Store a 80-bit floating point register to memory
2222;
2223; @param A0 FPU context (fxsave).
2224; @param A1 Where to return the output FSW.
2225; @param A2 Where to store the 80-bit value.
2226; @param A3 Pointer to the 80-bit register value.
2227;
2228BEGINPROC_FASTCALL iemAImpl_fst_r80_to_r80, 16
2229 PROLOGUE_4_ARGS
2230 sub xSP, 20h
2231
2232 fninit
2233 fld tword [A3]
2234 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2235 fstp tword [A2]
2236
2237 fnstsw word [A1]
2238
2239 fninit
2240 add xSP, 20h
2241 EPILOGUE_4_ARGS
2242ENDPROC iemAImpl_fst_r80_to_r80
2243
2244
2245;;
2246; FPU instruction working on two 80-bit floating point values.
2247;
2248; @param 1 The instruction
2249;
2250; @param A0 FPU context (fxsave).
2251; @param A1 Pointer to a IEMFPURESULT for the output.
2252; @param A2 Pointer to the first 80-bit value (ST0)
2253; @param A3 Pointer to the second 80-bit value (STn).
2254;
2255%macro IEMIMPL_FPU_R80_BY_R80 2
2256BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
2257 PROLOGUE_4_ARGS
2258 sub xSP, 20h
2259
2260 fninit
2261 fld tword [A3]
2262 fld tword [A2]
2263 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2264 %1 %2
2265
2266 fnstsw word [A1 + IEMFPURESULT.FSW]
2267 fnclex
2268 fstp tword [A1 + IEMFPURESULT.r80Result]
2269
2270 fninit
2271 add xSP, 20h
2272 EPILOGUE_4_ARGS
2273ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
2274%endmacro
2275
2276IEMIMPL_FPU_R80_BY_R80 fadd, {st0, st1}
2277IEMIMPL_FPU_R80_BY_R80 fmul, {st0, st1}
2278IEMIMPL_FPU_R80_BY_R80 fsub, {st0, st1}
2279IEMIMPL_FPU_R80_BY_R80 fsubr, {st0, st1}
2280IEMIMPL_FPU_R80_BY_R80 fdiv, {st0, st1}
2281IEMIMPL_FPU_R80_BY_R80 fdivr, {st0, st1}
2282IEMIMPL_FPU_R80_BY_R80 fprem, {}
2283IEMIMPL_FPU_R80_BY_R80 fprem1, {}
2284IEMIMPL_FPU_R80_BY_R80 fscale, {}
2285
2286
2287;;
2288; FPU instruction working on two 80-bit floating point values, ST1 and ST0,
2289; storing the result in ST1 and popping the stack.
2290;
2291; @param 1 The instruction
2292;
2293; @param A0 FPU context (fxsave).
2294; @param A1 Pointer to a IEMFPURESULT for the output.
2295; @param A2 Pointer to the first 80-bit value (ST1).
2296; @param A3 Pointer to the second 80-bit value (ST0).
2297;
2298%macro IEMIMPL_FPU_R80_BY_R80_ST1_ST0_POP 1
2299BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
2300 PROLOGUE_4_ARGS
2301 sub xSP, 20h
2302
2303 fninit
2304 fld tword [A2]
2305 fld tword [A3]
2306 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2307 %1
2308
2309 fnstsw word [A1 + IEMFPURESULT.FSW]
2310 fnclex
2311 fstp tword [A1 + IEMFPURESULT.r80Result]
2312
2313 fninit
2314 add xSP, 20h
2315 EPILOGUE_4_ARGS
2316ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
2317%endmacro
2318
2319IEMIMPL_FPU_R80_BY_R80_ST1_ST0_POP fpatan
2320IEMIMPL_FPU_R80_BY_R80_ST1_ST0_POP fyl2xp1
2321
2322
2323;;
2324; FPU instruction working on two 80-bit floating point values, only
2325; returning FSW.
2326;
2327; @param 1 The instruction
2328;
2329; @param A0 FPU context (fxsave).
2330; @param A1 Pointer to a uint16_t for the resulting FSW.
2331; @param A2 Pointer to the first 80-bit value.
2332; @param A3 Pointer to the second 80-bit value.
2333;
2334%macro IEMIMPL_FPU_R80_BY_R80_FSW 1
2335BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
2336 PROLOGUE_4_ARGS
2337 sub xSP, 20h
2338
2339 fninit
2340 fld tword [A3]
2341 fld tword [A2]
2342 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2343 %1 st0, st1
2344
2345 fnstsw word [A1]
2346
2347 fninit
2348 add xSP, 20h
2349 EPILOGUE_4_ARGS
2350ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
2351%endmacro
2352
2353IEMIMPL_FPU_R80_BY_R80_FSW fcom
2354IEMIMPL_FPU_R80_BY_R80_FSW fucom
2355
2356
2357;;
2358; FPU instruction working on two 80-bit floating point values,
2359; returning FSW and EFLAGS (eax).
2360;
2361; @param 1 The instruction
2362;
2363; @returns EFLAGS in EAX.
2364; @param A0 FPU context (fxsave).
2365; @param A1 Pointer to a uint16_t for the resulting FSW.
2366; @param A2 Pointer to the first 80-bit value.
2367; @param A3 Pointer to the second 80-bit value.
2368;
2369%macro IEMIMPL_FPU_R80_BY_R80_EFL 1
2370BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
2371 PROLOGUE_4_ARGS
2372 sub xSP, 20h
2373
2374 fninit
2375 fld tword [A3]
2376 fld tword [A2]
2377 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2378 %1 st1
2379
2380 fnstsw word [A1]
2381 pushf
2382 pop xAX
2383
2384 fninit
2385 add xSP, 20h
2386 EPILOGUE_4_ARGS
2387ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
2388%endmacro
2389
2390IEMIMPL_FPU_R80_BY_R80_EFL fcomi
2391IEMIMPL_FPU_R80_BY_R80_EFL fucomi
2392
2393
2394;;
2395; FPU instruction working on one 80-bit floating point value.
2396;
2397; @param 1 The instruction
2398;
2399; @param A0 FPU context (fxsave).
2400; @param A1 Pointer to a IEMFPURESULT for the output.
2401; @param A2 Pointer to the 80-bit value.
2402;
2403%macro IEMIMPL_FPU_R80 1
2404BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80, 12
2405 PROLOGUE_3_ARGS
2406 sub xSP, 20h
2407
2408 fninit
2409 fld tword [A2]
2410 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2411 %1
2412
2413 fnstsw word [A1 + IEMFPURESULT.FSW]
2414 fnclex
2415 fstp tword [A1 + IEMFPURESULT.r80Result]
2416
2417 fninit
2418 add xSP, 20h
2419 EPILOGUE_3_ARGS
2420ENDPROC iemAImpl_ %+ %1 %+ _r80
2421%endmacro
2422
2423IEMIMPL_FPU_R80 fchs
2424IEMIMPL_FPU_R80 fabs
2425IEMIMPL_FPU_R80 f2xm1
2426IEMIMPL_FPU_R80 fyl2x
2427IEMIMPL_FPU_R80 fsqrt
2428IEMIMPL_FPU_R80 frndint
2429IEMIMPL_FPU_R80 fsin
2430IEMIMPL_FPU_R80 fcos
2431
2432
2433;;
2434; FPU instruction working on one 80-bit floating point value, only
2435; returning FSW.
2436;
2437; @param 1 The instruction
2438;
2439; @param A0 FPU context (fxsave).
2440; @param A1 Pointer to a uint16_t for the resulting FSW.
2441; @param A2 Pointer to the 80-bit value.
2442;
2443%macro IEMIMPL_FPU_R80_FSW 1
2444BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80, 12
2445 PROLOGUE_3_ARGS
2446 sub xSP, 20h
2447
2448 fninit
2449 fld tword [A2]
2450 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2451 %1
2452
2453 fnstsw word [A1]
2454
2455 fninit
2456 add xSP, 20h
2457 EPILOGUE_3_ARGS
2458ENDPROC iemAImpl_ %+ %1 %+ _r80
2459%endmacro
2460
2461IEMIMPL_FPU_R80_FSW ftst
2462IEMIMPL_FPU_R80_FSW fxam
2463
2464
2465
2466;;
2467; FPU instruction loading a 80-bit floating point constant.
2468;
2469; @param 1 The instruction
2470;
2471; @param A0 FPU context (fxsave).
2472; @param A1 Pointer to a IEMFPURESULT for the output.
2473;
2474%macro IEMIMPL_FPU_R80_CONST 1
2475BEGINPROC_FASTCALL iemAImpl_ %+ %1, 8
2476 PROLOGUE_2_ARGS
2477 sub xSP, 20h
2478
2479 fninit
2480 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2481 %1
2482
2483 fnstsw word [A1 + IEMFPURESULT.FSW]
2484 fnclex
2485 fstp tword [A1 + IEMFPURESULT.r80Result]
2486
2487 fninit
2488 add xSP, 20h
2489 EPILOGUE_2_ARGS
2490ENDPROC iemAImpl_ %+ %1 %+
2491%endmacro
2492
2493IEMIMPL_FPU_R80_CONST fld1
2494IEMIMPL_FPU_R80_CONST fldl2t
2495IEMIMPL_FPU_R80_CONST fldl2e
2496IEMIMPL_FPU_R80_CONST fldpi
2497IEMIMPL_FPU_R80_CONST fldlg2
2498IEMIMPL_FPU_R80_CONST fldln2
2499IEMIMPL_FPU_R80_CONST fldz
2500
2501
2502;;
2503; FPU instruction working on one 80-bit floating point value, outputing two.
2504;
2505; @param 1 The instruction
2506;
2507; @param A0 FPU context (fxsave).
2508; @param A1 Pointer to a IEMFPURESULTTWO for the output.
2509; @param A2 Pointer to the 80-bit value.
2510;
2511%macro IEMIMPL_FPU_R80_R80 1
2512BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_r80, 12
2513 PROLOGUE_3_ARGS
2514 sub xSP, 20h
2515
2516 fninit
2517 fld tword [A2]
2518 FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2519 %1
2520
2521 fnstsw word [A1 + IEMFPURESULTTWO.FSW]
2522 fnclex
2523 fstp tword [A1 + IEMFPURESULTTWO.r80Result2]
2524 fnclex
2525 fstp tword [A1 + IEMFPURESULTTWO.r80Result1]
2526
2527 fninit
2528 add xSP, 20h
2529 EPILOGUE_3_ARGS
2530ENDPROC iemAImpl_ %+ %1 %+ _r80_r80
2531%endmacro
2532
2533IEMIMPL_FPU_R80_R80 fptan
2534IEMIMPL_FPU_R80_R80 fxtract
2535IEMIMPL_FPU_R80_R80 fsincos
2536
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette