IEMAllAImpl.asm@ 42699

Last change on this file since 42699 was 42699, checked in by vboxsync, 12 years ago
IEMAllAImpl.asm: Fixed the wrong 'RET X' stuff in 32-bit mode once and for all.
Property svn:eol-style set to `native` Property svn:keywords set to `Author Date Id Revision`
File size: 68.5 KB

Line
1	; $Id: IEMAllAImpl.asm 42699 2012-08-08 23:38:11Z vboxsync $
2	;; @file
3	; IEM - Instruction Implementation in Assembly.
4	;
5
6	; Copyright (C) 2011-2012 Oracle Corporation
7	;
8	; This file is part of VirtualBox Open Source Edition (OSE), as
9	; available from http://www.virtualbox.org. This file is free software;
10	; you can redistribute it and/or modify it under the terms of the GNU
11	; General Public License (GPL) as published by the Free Software
12	; Foundation, in version 2 as it comes in the "COPYING" file of the
13	; VirtualBox OSE distribution. VirtualBox OSE is distributed in the
14	; hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
15	;
16
17
18	;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
19	; Header Files ;
20	;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
21	%include "VBox/asmdefs.mac"
22	%include "VBox/err.mac"
23	%include "iprt/x86.mac"
24
25
26	;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
27	; Defined Constants And Macros ;
28	;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
29
30	;;
31	; RET XX / RET wrapper for fastcall.
32	;
33	%macro RET_FASTCALL 1
34	%ifdef RT_ARCH_X86
35	%ifdef RT_OS_WINDOWS
36	ret %1
37	%else
38	ret
39	%endif
40	%else
41	ret
42	%endif
43	%endmacro
44
45	;;
46	; NAME for fastcall functions.
47	;
48	;; @todo 'global @fastcall@12' is still broken in yasm and requires dollar
49	; escaping (or whatever the dollar is good for here). Thus the ugly
50	; prefix argument.
51	;
52	%define NAME_FASTCALL(a_Name, a_cbArgs, a_Prefix) NAME(a_Name)
53	%ifdef RT_ARCH_X86
54	%ifdef RT_OS_WINDOWS
55	%undef NAME_FASTCALL
56	%define NAME_FASTCALL(a_Name, a_cbArgs, a_Prefix) a_Prefix %+ a_Name %+ @ %+ a_cbArgs
57	%endif
58	%endif
59
60	;;
61	; BEGINPROC for fastcall functions.
62	;
63	; @param 1 The function name (C).
64	; @param 2 The argument size on x86.
65	;
66	%macro BEGINPROC_FASTCALL 2
67	%ifdef ASM_FORMAT_PE
68	export %1=NAME_FASTCALL(%1,%2,$@)
69	%endif
70	%ifdef __NASM__
71	%ifdef ASM_FORMAT_OMF
72	export NAME(%1) NAME_FASTCALL(%1,%2,$@)
73	%endif
74	%endif
75	%ifndef ASM_FORMAT_BIN
76	global NAME_FASTCALL(%1,%2,$@)
77	%endif
78	NAME_FASTCALL(%1,%2,@):
79	%endmacro
80
81
82	;
83	; We employ some macro assembly here to hid the calling convention differences.
84	;
85	%ifdef RT_ARCH_AMD64
86	%macro PROLOGUE_1_ARGS 0
87	%endmacro
88	%macro EPILOGUE_1_ARGS 0
89	ret
90	%endmacro
91	%macro EPILOGUE_1_ARGS_EX 0
92	ret
93	%endmacro
94
95	%macro PROLOGUE_2_ARGS 0
96	%endmacro
97	%macro EPILOGUE_2_ARGS 0
98	ret
99	%endmacro
100	%macro EPILOGUE_2_ARGS_EX 1
101	ret
102	%endmacro
103
104	%macro PROLOGUE_3_ARGS 0
105	%endmacro
106	%macro EPILOGUE_3_ARGS 0
107	ret
108	%endmacro
109	%macro EPILOGUE_3_ARGS_EX 1
110	ret
111	%endmacro
112
113	%macro PROLOGUE_4_ARGS 0
114	%endmacro
115	%macro EPILOGUE_4_ARGS 0
116	ret
117	%endmacro
118	%macro EPILOGUE_4_ARGS_EX 1
119	ret
120	%endmacro
121
122	%ifdef ASM_CALL64_GCC
123	%define A0 rdi
124	%define A0_32 edi
125	%define A0_16 di
126	%define A0_8 dil
127
128	%define A1 rsi
129	%define A1_32 esi
130	%define A1_16 si
131	%define A1_8 sil
132
133	%define A2 rdx
134	%define A2_32 edx
135	%define A2_16 dx
136	%define A2_8 dl
137
138	%define A3 rcx
139	%define A3_32 ecx
140	%define A3_16 cx
141	%endif
142
143	%ifdef ASM_CALL64_MSC
144	%define A0 rcx
145	%define A0_32 ecx
146	%define A0_16 cx
147	%define A0_8 cl
148
149	%define A1 rdx
150	%define A1_32 edx
151	%define A1_16 dx
152	%define A1_8 dl
153
154	%define A2 r8
155	%define A2_32 r8d
156	%define A2_16 r8w
157	%define A2_8 r8b
158
159	%define A3 r9
160	%define A3_32 r9d
161	%define A3_16 r9w
162	%endif
163
164	%define T0 rax
165	%define T0_32 eax
166	%define T0_16 ax
167	%define T0_8 al
168
169	%define T1 r11
170	%define T1_32 r11d
171	%define T1_16 r11w
172	%define T1_8 r11b
173
174	%else
175	; x86
176	%macro PROLOGUE_1_ARGS 0
177	push edi
178	%endmacro
179	%macro EPILOGUE_1_ARGS 0
180	pop edi
181	ret 0
182	%endmacro
183	%macro EPILOGUE_1_ARGS_EX 1
184	pop edi
185	ret %1
186	%endmacro
187
188	%macro PROLOGUE_2_ARGS 0
189	push edi
190	%endmacro
191	%macro EPILOGUE_2_ARGS 0
192	pop edi
193	ret 0
194	%endmacro
195	%macro EPILOGUE_2_ARGS_EX 1
196	pop edi
197	ret %1
198	%endmacro
199
200	%macro PROLOGUE_3_ARGS 0
201	push ebx
202	mov ebx, [esp + 4 + 4]
203	push edi
204	%endmacro
205	%macro EPILOGUE_3_ARGS_EX 1
206	%if (%1) < 4
207	%error "With three args, at least 4 bytes must be remove from the stack upon return (32-bit)."
208	%endif
209	pop edi
210	pop ebx
211	ret %1
212	%endmacro
213	%macro EPILOGUE_3_ARGS 0
214	EPILOGUE_3_ARGS_EX 4
215	%endmacro
216
217	%macro PROLOGUE_4_ARGS 0
218	push ebx
219	push edi
220	push esi
221	mov ebx, [esp + 12 + 4 + 0]
222	mov esi, [esp + 12 + 4 + 4]
223	%endmacro
224	%macro EPILOGUE_4_ARGS_EX 1
225	%if (%1) < 8
226	%error "With four args, at least 8 bytes must be remove from the stack upon return (32-bit)."
227	%endif
228	pop esi
229	pop edi
230	pop ebx
231	ret %1
232	%endmacro
233	%macro EPILOGUE_4_ARGS 0
234	EPILOGUE_4_ARGS_EX 8
235	%endmacro
236
237	%define A0 ecx
238	%define A0_32 ecx
239	%define A0_16 cx
240	%define A0_8 cl
241
242	%define A1 edx
243	%define A1_32 edx
244	%define A1_16 dx
245	%define A1_8 dl
246
247	%define A2 ebx
248	%define A2_32 ebx
249	%define A2_16 bx
250	%define A2_8 bl
251
252	%define A3 esi
253	%define A3_32 esi
254	%define A3_16 si
255
256	%define T0 eax
257	%define T0_32 eax
258	%define T0_16 ax
259	%define T0_8 al
260
261	%define T1 edi
262	%define T1_32 edi
263	%define T1_16 di
264	%endif
265
266
267	;;
268	; Load the relevant flags from [%1] if there are undefined flags (%3).
269	;
270	; @remarks Clobbers T0, stack. Changes EFLAGS.
271	; @param A2 The register pointing to the flags.
272	; @param 1 The parameter (A0..A3) pointing to the eflags.
273	; @param 2 The set of modified flags.
274	; @param 3 The set of undefined flags.
275	;
276	%macro IEM_MAYBE_LOAD_FLAGS 3
277	;%if (%3) != 0
278	pushf ; store current flags
279	mov T0_32, [%1] ; load the guest flags
280	and dword [xSP], ~(%2 \| %3) ; mask out the modified and undefined flags
281	and T0_32, (%2 \| %3) ; select the modified and undefined flags.
282	or [xSP], T0 ; merge guest flags with host flags.
283	popf ; load the mixed flags.
284	;%endif
285	%endmacro
286
287	;;
288	; Update the flag.
289	;
290	; @remarks Clobbers T0, T1, stack.
291	; @param 1 The register pointing to the EFLAGS.
292	; @param 2 The mask of modified flags to save.
293	; @param 3 The mask of undefined flags to (maybe) save.
294	;
295	%macro IEM_SAVE_FLAGS 3
296	%if (%2 \| %3) != 0
297	pushf
298	pop T1
299	mov T0_32, [%1] ; flags
300	and T0_32, ~(%2 \| %3) ; clear the modified & undefined flags.
301	and T1_32, (%2 \| %3) ; select the modified and undefined flags.
302	or T0_32, T1_32 ; combine the flags.
303	mov [%1], T0_32 ; save the flags.
304	%endif
305	%endmacro
306
307
308	;;
309	; Macro for implementing a binary operator.
310	;
311	; This will generate code for the 8, 16, 32 and 64 bit accesses with locked
312	; variants, except on 32-bit system where the 64-bit accesses requires hand
313	; coding.
314	;
315	; All the functions takes a pointer to the destination memory operand in A0,
316	; the source register operand in A1 and a pointer to eflags in A2.
317	;
318	; @param 1 The instruction mnemonic.
319	; @param 2 Non-zero if there should be a locked version.
320	; @param 3 The modified flags.
321	; @param 4 The undefined flags.
322	;
323	%macro IEMIMPL_BIN_OP 4
324	BEGINCODE
325	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 12
326	PROLOGUE_3_ARGS
327	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
328	%1 byte [A0], A1_8
329	IEM_SAVE_FLAGS A2, %3, %4
330	EPILOGUE_3_ARGS
331	ENDPROC iemAImpl_ %+ %1 %+ _u8
332
333	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
334	PROLOGUE_3_ARGS
335	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
336	%1 word [A0], A1_16
337	IEM_SAVE_FLAGS A2, %3, %4
338	EPILOGUE_3_ARGS
339	ENDPROC iemAImpl_ %+ %1 %+ _u16
340
341	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
342	PROLOGUE_3_ARGS
343	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
344	%1 dword [A0], A1_32
345	IEM_SAVE_FLAGS A2, %3, %4
346	EPILOGUE_3_ARGS
347	ENDPROC iemAImpl_ %+ %1 %+ _u32
348
349	%ifdef RT_ARCH_AMD64
350	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
351	PROLOGUE_3_ARGS
352	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
353	%1 qword [A0], A1
354	IEM_SAVE_FLAGS A2, %3, %4
355	EPILOGUE_3_ARGS_EX 8
356	ENDPROC iemAImpl_ %+ %1 %+ _u64
357	%else ; stub it for now - later, replace with hand coded stuff.
358	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
359	int3
360	ret
361	ENDPROC iemAImpl_ %+ %1 %+ _u64
362	%endif ; !RT_ARCH_AMD64
363
364	%if %2 != 0 ; locked versions requested?
365
366	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8_locked, 12
367	PROLOGUE_3_ARGS
368	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
369	lock %1 byte [A0], A1_8
370	IEM_SAVE_FLAGS A2, %3, %4
371	EPILOGUE_3_ARGS
372	ENDPROC iemAImpl_ %+ %1 %+ _u8_locked
373
374	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16_locked, 12
375	PROLOGUE_3_ARGS
376	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
377	lock %1 word [A0], A1_16
378	IEM_SAVE_FLAGS A2, %3, %4
379	EPILOGUE_3_ARGS
380	ENDPROC iemAImpl_ %+ %1 %+ _u16_locked
381
382	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32_locked, 12
383	PROLOGUE_3_ARGS
384	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
385	lock %1 dword [A0], A1_32
386	IEM_SAVE_FLAGS A2, %3, %4
387	EPILOGUE_3_ARGS
388	ENDPROC iemAImpl_ %+ %1 %+ _u32_locked
389
390	%ifdef RT_ARCH_AMD64
391	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 16
392	PROLOGUE_3_ARGS
393	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
394	lock %1 qword [A0], A1
395	IEM_SAVE_FLAGS A2, %3, %4
396	EPILOGUE_3_ARGS_EX 8
397	ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
398	%else ; stub it for now - later, replace with hand coded stuff.
399	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 16
400	int3
401	ret 8
402	ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
403	%endif ; !RT_ARCH_AMD64
404	%endif ; locked
405	%endmacro
406
407	; instr,lock,modified-flags.
408	IEMIMPL_BIN_OP add, 1, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
409	IEMIMPL_BIN_OP adc, 1, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
410	IEMIMPL_BIN_OP sub, 1, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
411	IEMIMPL_BIN_OP sbb, 1, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
412	IEMIMPL_BIN_OP or, 1, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_PF \| X86_EFL_CF), X86_EFL_AF,
413	IEMIMPL_BIN_OP xor, 1, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_PF \| X86_EFL_CF), X86_EFL_AF,
414	IEMIMPL_BIN_OP and, 1, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_PF \| X86_EFL_CF), X86_EFL_AF,
415	IEMIMPL_BIN_OP cmp, 0, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
416	IEMIMPL_BIN_OP test, 0, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_PF \| X86_EFL_CF), X86_EFL_AF,
417
418
419	;;
420	; Macro for implementing a bit operator.
421	;
422	; This will generate code for the 16, 32 and 64 bit accesses with locked
423	; variants, except on 32-bit system where the 64-bit accesses requires hand
424	; coding.
425	;
426	; All the functions takes a pointer to the destination memory operand in A0,
427	; the source register operand in A1 and a pointer to eflags in A2.
428	;
429	; @param 1 The instruction mnemonic.
430	; @param 2 Non-zero if there should be a locked version.
431	; @param 3 The modified flags.
432	; @param 4 The undefined flags.
433	;
434	%macro IEMIMPL_BIT_OP 4
435	BEGINCODE
436	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
437	PROLOGUE_3_ARGS
438	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
439	%1 word [A0], A1_16
440	IEM_SAVE_FLAGS A2, %3, %4
441	EPILOGUE_3_ARGS
442	ENDPROC iemAImpl_ %+ %1 %+ _u16
443
444	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
445	PROLOGUE_3_ARGS
446	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
447	%1 dword [A0], A1_32
448	IEM_SAVE_FLAGS A2, %3, %4
449	EPILOGUE_3_ARGS
450	ENDPROC iemAImpl_ %+ %1 %+ _u32
451
452	%ifdef RT_ARCH_AMD64
453	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
454	PROLOGUE_3_ARGS
455	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
456	%1 qword [A0], A1
457	IEM_SAVE_FLAGS A2, %3, %4
458	EPILOGUE_3_ARGS_EX 8
459	ENDPROC iemAImpl_ %+ %1 %+ _u64
460	%else ; stub it for now - later, replace with hand coded stuff.
461	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
462	int3
463	ret 8
464	ENDPROC iemAImpl_ %+ %1 %+ _u64
465	%endif ; !RT_ARCH_AMD64
466
467	%if %2 != 0 ; locked versions requested?
468
469	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16_locked, 12
470	PROLOGUE_3_ARGS
471	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
472	lock %1 word [A0], A1_16
473	IEM_SAVE_FLAGS A2, %3, %4
474	EPILOGUE_3_ARGS
475	ENDPROC iemAImpl_ %+ %1 %+ _u16_locked
476
477	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32_locked, 12
478	PROLOGUE_3_ARGS
479	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
480	lock %1 dword [A0], A1_32
481	IEM_SAVE_FLAGS A2, %3, %4
482	EPILOGUE_3_ARGS
483	ENDPROC iemAImpl_ %+ %1 %+ _u32_locked
484
485	%ifdef RT_ARCH_AMD64
486	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 16
487	PROLOGUE_3_ARGS
488	IEM_MAYBE_LOAD_FLAGS A2, %3, %4
489	lock %1 qword [A0], A1
490	IEM_SAVE_FLAGS A2, %3, %4
491	EPILOGUE_3_ARGS_EX 8
492	ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
493	%else ; stub it for now - later, replace with hand coded stuff.
494	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 16
495	int3
496	ret 8
497	ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
498	%endif ; !RT_ARCH_AMD64
499	%endif ; locked
500	%endmacro
501	IEMIMPL_BIT_OP bt, 0, (X86_EFL_CF), (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF)
502	IEMIMPL_BIT_OP btc, 1, (X86_EFL_CF), (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF)
503	IEMIMPL_BIT_OP bts, 1, (X86_EFL_CF), (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF)
504	IEMIMPL_BIT_OP btr, 1, (X86_EFL_CF), (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF)
505
506	;;
507	; Macro for implementing a bit search operator.
508	;
509	; This will generate code for the 16, 32 and 64 bit accesses, except on 32-bit
510	; system where the 64-bit accesses requires hand coding.
511	;
512	; All the functions takes a pointer to the destination memory operand in A0,
513	; the source register operand in A1 and a pointer to eflags in A2.
514	;
515	; @param 1 The instruction mnemonic.
516	; @param 2 The modified flags.
517	; @param 3 The undefined flags.
518	;
519	%macro IEMIMPL_BIT_OP 3
520	BEGINCODE
521	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
522	PROLOGUE_3_ARGS
523	IEM_MAYBE_LOAD_FLAGS A2, %2, %3
524	%1 T0_16, A1_16
525	mov [A0], T0_16
526	IEM_SAVE_FLAGS A2, %2, %3
527	EPILOGUE_3_ARGS
528	ENDPROC iemAImpl_ %+ %1 %+ _u16
529
530	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
531	PROLOGUE_3_ARGS
532	IEM_MAYBE_LOAD_FLAGS A2, %2, %3
533	%1 T0_32, A1_32
534	mov [A0], T0_32
535	IEM_SAVE_FLAGS A2, %2, %3
536	EPILOGUE_3_ARGS
537	ENDPROC iemAImpl_ %+ %1 %+ _u32
538
539	%ifdef RT_ARCH_AMD64
540	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
541	PROLOGUE_3_ARGS
542	IEM_MAYBE_LOAD_FLAGS A2, %2, %3
543	%1 T0, A1
544	mov [A0], T0
545	IEM_SAVE_FLAGS A2, %2, %3
546	EPILOGUE_3_ARGS_EX 8
547	ENDPROC iemAImpl_ %+ %1 %+ _u64
548	%else ; stub it for now - later, replace with hand coded stuff.
549	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 16
550	int3
551	ret 8
552	ENDPROC iemAImpl_ %+ %1 %+ _u64
553	%endif ; !RT_ARCH_AMD64
554	%endmacro
555	IEMIMPL_BIT_OP bsf, (X86_EFL_ZF), (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF)
556	IEMIMPL_BIT_OP bsr, (X86_EFL_ZF), (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF)
557
558
559	;
560	; IMUL is also a similar but yet different case (no lock, no mem dst).
561	; The rDX:rAX variant of imul is handled together with mul further down.
562	;
563	BEGINCODE
564	BEGINPROC_FASTCALL iemAImpl_imul_two_u16, 12
565	PROLOGUE_3_ARGS
566	IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF \| X86_EFL_CF), (X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF)
567	imul A1_16, word [A0]
568	mov [A0], A1_16
569	IEM_SAVE_FLAGS A2, (X86_EFL_OF \| X86_EFL_CF), (X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF)
570	EPILOGUE_3_ARGS
571	ENDPROC iemAImpl_imul_two_u16
572
573	BEGINPROC_FASTCALL iemAImpl_imul_two_u32, 12
574	PROLOGUE_3_ARGS
575	IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF \| X86_EFL_CF), (X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF)
576	imul A1_32, dword [A0]
577	mov [A0], A1_32
578	IEM_SAVE_FLAGS A2, (X86_EFL_OF \| X86_EFL_CF), (X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF)
579	EPILOGUE_3_ARGS
580	ENDPROC iemAImpl_imul_two_u32
581
582	BEGINPROC_FASTCALL iemAImpl_imul_two_u64, 16
583	PROLOGUE_3_ARGS
584	%ifdef RT_ARCH_AMD64
585	IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF \| X86_EFL_CF), (X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF)
586	imul A1, qword [A0]
587	mov [A0], A1
588	IEM_SAVE_FLAGS A2, (X86_EFL_OF \| X86_EFL_CF), (X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF)
589	%else
590	int3 ;; @todo implement me
591	%endif
592	EPILOGUE_3_ARGS_EX 8
593	ENDPROC iemAImpl_imul_two_u64
594
595
596	;
597	; XCHG for memory operands. This implies locking. No flag changes.
598	;
599	; Each function takes two arguments, first the pointer to the memory,
600	; then the pointer to the register. They all return void.
601	;
602	BEGINCODE
603	BEGINPROC_FASTCALL iemAImpl_xchg_u8, 8
604	PROLOGUE_2_ARGS
605	mov T0_8, [A1]
606	xchg [A0], T0_8
607	mov [A1], T0_8
608	EPILOGUE_2_ARGS
609	ENDPROC iemAImpl_xchg_u8
610
611	BEGINPROC_FASTCALL iemAImpl_xchg_u16, 8
612	PROLOGUE_2_ARGS
613	mov T0_16, [A1]
614	xchg [A0], T0_16
615	mov [A1], T0_16
616	EPILOGUE_2_ARGS
617	ENDPROC iemAImpl_xchg_u16
618
619	BEGINPROC_FASTCALL iemAImpl_xchg_u32, 8
620	PROLOGUE_2_ARGS
621	mov T0_32, [A1]
622	xchg [A0], T0_32
623	mov [A1], T0_32
624	EPILOGUE_2_ARGS
625	ENDPROC iemAImpl_xchg_u32
626
627	BEGINPROC_FASTCALL iemAImpl_xchg_u64, 8
628	%ifdef RT_ARCH_AMD64
629	PROLOGUE_2_ARGS
630	mov T0, [A1]
631	xchg [A0], T0
632	mov [A1], T0
633	EPILOGUE_2_ARGS
634	%else
635	int3
636	ret 0
637	%endif
638	ENDPROC iemAImpl_xchg_u64
639
640
641	;
642	; XADD for memory operands.
643	;
644	; Each function takes three arguments, first the pointer to the
645	; memory/register, then the pointer to the register, and finally a pointer to
646	; eflags. They all return void.
647	;
648	BEGINCODE
649	BEGINPROC_FASTCALL iemAImpl_xadd_u8, 12
650	PROLOGUE_3_ARGS
651	IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
652	mov T0_8, [A1]
653	xadd [A0], T0_8
654	mov [A1], T0_8
655	IEM_SAVE_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
656	EPILOGUE_3_ARGS
657	ENDPROC iemAImpl_xadd_u8
658
659	BEGINPROC_FASTCALL iemAImpl_xadd_u16, 12
660	PROLOGUE_3_ARGS
661	IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
662	mov T0_16, [A1]
663	xadd [A0], T0_16
664	mov [A1], T0_16
665	IEM_SAVE_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
666	EPILOGUE_3_ARGS
667	ENDPROC iemAImpl_xadd_u16
668
669	BEGINPROC_FASTCALL iemAImpl_xadd_u32, 12
670	PROLOGUE_3_ARGS
671	IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
672	mov T0_32, [A1]
673	xadd [A0], T0_32
674	mov [A1], T0_32
675	IEM_SAVE_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
676	EPILOGUE_3_ARGS
677	ENDPROC iemAImpl_xadd_u32
678
679	BEGINPROC_FASTCALL iemAImpl_xadd_u64, 12
680	%ifdef RT_ARCH_AMD64
681	PROLOGUE_3_ARGS
682	IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
683	mov T0, [A1]
684	xadd [A0], T0
685	mov [A1], T0
686	IEM_SAVE_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
687	EPILOGUE_3_ARGS
688	%else
689	int3
690	ret 4
691	%endif
692	ENDPROC iemAImpl_xadd_u64
693
694	BEGINPROC_FASTCALL iemAImpl_xadd_u8_locked, 12
695	PROLOGUE_3_ARGS
696	IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
697	mov T0_8, [A1]
698	lock xadd [A0], T0_8
699	mov [A1], T0_8
700	IEM_SAVE_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
701	EPILOGUE_3_ARGS
702	ENDPROC iemAImpl_xadd_u8_locked
703
704	BEGINPROC_FASTCALL iemAImpl_xadd_u16_locked, 12
705	PROLOGUE_3_ARGS
706	IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
707	mov T0_16, [A1]
708	lock xadd [A0], T0_16
709	mov [A1], T0_16
710	IEM_SAVE_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
711	EPILOGUE_3_ARGS
712	ENDPROC iemAImpl_xadd_u16_locked
713
714	BEGINPROC_FASTCALL iemAImpl_xadd_u32_locked, 12
715	PROLOGUE_3_ARGS
716	IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
717	mov T0_32, [A1]
718	lock xadd [A0], T0_32
719	mov [A1], T0_32
720	IEM_SAVE_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
721	EPILOGUE_3_ARGS
722	ENDPROC iemAImpl_xadd_u32_locked
723
724	BEGINPROC_FASTCALL iemAImpl_xadd_u64_locked, 12
725	%ifdef RT_ARCH_AMD64
726	PROLOGUE_3_ARGS
727	IEM_MAYBE_LOAD_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
728	mov T0, [A1]
729	lock xadd [A0], T0
730	mov [A1], T0
731	IEM_SAVE_FLAGS A2, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
732	EPILOGUE_3_ARGS
733	%else
734	int3
735	ret 4
736	%endif
737	ENDPROC iemAImpl_xadd_u64_locked
738
739
740	;
741	; CMPXCHG8B.
742	;
743	; These are tricky register wise, so the code is duplicated for each calling
744	; convention.
745	;
746	; WARNING! This code make ASSUMPTIONS about which registers T1 and T0 are mapped to!
747	;
748	; C-proto:
749	; IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg8b,(uint64_t *pu64Dst, PRTUINT64U pu64EaxEdx, PRTUINT64U pu64EbxEcx,
750	; uint32_t *pEFlags));
751	;
752	BEGINCODE
753	BEGINPROC_FASTCALL iemAImpl_cmpxchg8b, 16
754	%ifdef RT_ARCH_AMD64
755	%ifdef ASM_CALL64_MSC
756	push rbx
757
758	mov r11, rdx ; pu64EaxEdx (is also T1)
759	mov r10, rcx ; pu64Dst
760
761	mov ebx, [r8]
762	mov ecx, [r8 + 4]
763	IEM_MAYBE_LOAD_FLAGS r9, (X86_EFL_ZF), 0 ; clobbers T0 (eax)
764	mov eax, [r11]
765	mov edx, [r11 + 4]
766
767	lock cmpxchg8b [r10]
768
769	mov [r11], eax
770	mov [r11 + 4], edx
771	IEM_SAVE_FLAGS r9, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, r11)
772
773	pop rbx
774	ret
775	%else
776	push rbx
777
778	mov r10, rcx ; pEFlags
779	mov r11, rdx ; pu64EbxEcx (is also T1)
780
781	mov ebx, [r11]
782	mov ecx, [r11 + 4]
783	IEM_MAYBE_LOAD_FLAGS r10, (X86_EFL_ZF), 0 ; clobbers T0 (eax)
784	mov eax, [rsi]
785	mov edx, [rsi + 4]
786
787	lock cmpxchg8b [rdi]
788
789	mov [rsi], eax
790	mov [rsi + 4], edx
791	IEM_SAVE_FLAGS r10, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, r11)
792
793	pop rbx
794	ret
795
796	%endif
797	%else
798	push esi
799	push edi
800	push ebx
801	push ebp
802
803	mov edi, ecx ; pu64Dst
804	mov esi, edx ; pu64EaxEdx
805	mov ecx, [esp + 16 + 4 + 0] ; pu64EbxEcx
806	mov ebp, [esp + 16 + 4 + 4] ; pEFlags
807
808	mov ebx, [ecx]
809	mov ecx, [ecx + 4]
810	IEM_MAYBE_LOAD_FLAGS ebp, (X86_EFL_ZF), 0 ; clobbers T0 (eax)
811	mov eax, [esi]
812	mov edx, [esi + 4]
813
814	lock cmpxchg8b [edi]
815
816	mov [esi], eax
817	mov [esi + 4], edx
818	IEM_SAVE_FLAGS ebp, (X86_EFL_ZF), 0 ; clobbers T0+T1 (eax, edi)
819
820	pop ebp
821	pop ebx
822	pop edi
823	pop esi
824	ret 8
825	%endif
826	ENDPROC iemAImpl_cmpxchg8b
827
828	BEGINPROC_FASTCALL iemAImpl_cmpxchg8b_locked, 16
829	; Lazy bird always lock prefixes cmpxchg8b.
830	jmp NAME_FASTCALL(iemAImpl_cmpxchg8b,16,$@)
831	ENDPROC iemAImpl_cmpxchg8b_locked
832
833
834
835	;
836	; CMPXCHG.
837	;
838	; WARNING! This code make ASSUMPTIONS about which registers T1 and T0 are mapped to!
839	;
840	; C-proto:
841	; IEM_DECL_IMPL_DEF(void, iemAImpl_cmpxchg,(uintX_t puXDst, uintX_t puEax, uintX_t uReg, uint32_t pEFlags));
842	;
843	BEGINCODE
844	%macro IEMIMPL_CMPXCHG 2
845	BEGINPROC_FASTCALL iemAImpl_cmpxchg_u8 %+ %2, 16
846	PROLOGUE_4_ARGS
847	IEM_MAYBE_LOAD_FLAGS A3, (X86_EFL_ZF \| X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF), 0 ; clobbers T0 (eax)
848	mov al, [A1]
849	%1 cmpxchg [A0], A2_8
850	mov [A1], al
851	IEM_SAVE_FLAGS A3, (X86_EFL_ZF \| X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF), 0 ; clobbers T0+T1 (eax, r11/edi)
852	EPILOGUE_4_ARGS
853	ENDPROC iemAImpl_cmpxchg_u8 %+ %2
854
855	BEGINPROC_FASTCALL iemAImpl_cmpxchg_u16 %+ %2, 16
856	PROLOGUE_4_ARGS
857	IEM_MAYBE_LOAD_FLAGS A3, (X86_EFL_ZF \| X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF), 0 ; clobbers T0 (eax)
858	mov ax, [A1]
859	%1 cmpxchg [A0], A2_16
860	mov [A1], ax
861	IEM_SAVE_FLAGS A3, (X86_EFL_ZF \| X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF), 0 ; clobbers T0+T1 (eax, r11/edi)
862	EPILOGUE_4_ARGS
863	ENDPROC iemAImpl_cmpxchg_u16 %+ %2
864
865	BEGINPROC_FASTCALL iemAImpl_cmpxchg_u32 %+ %2, 16
866	PROLOGUE_4_ARGS
867	IEM_MAYBE_LOAD_FLAGS A3, (X86_EFL_ZF \| X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF), 0 ; clobbers T0 (eax)
868	mov eax, [A1]
869	%1 cmpxchg [A0], A2_32
870	mov [A1], eax
871	IEM_SAVE_FLAGS A3, (X86_EFL_ZF \| X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF), 0 ; clobbers T0+T1 (eax, r11/edi)
872	EPILOGUE_4_ARGS
873	ENDPROC iemAImpl_cmpxchg_u32 %+ %2
874
875	BEGINPROC_FASTCALL iemAImpl_cmpxchg_u64 %+ %2, 16
876	%ifdef RT_ARCH_AMD64
877	PROLOGUE_4_ARGS
878	IEM_MAYBE_LOAD_FLAGS A3, (X86_EFL_ZF \| X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF), 0 ; clobbers T0 (eax)
879	mov ax, [A1]
880	%1 cmpxchg [A0], A2
881	mov [A1], ax
882	IEM_SAVE_FLAGS A3, (X86_EFL_ZF \| X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF), 0 ; clobbers T0+T1 (eax, r11/edi)
883	EPILOGUE_4_ARGS
884	%else
885	;
886	; Must use cmpxchg8b here. See also iemAImpl_cmpxchg8b.
887	;
888	push esi
889	push edi
890	push ebx
891	push ebp
892
893	mov edi, ecx ; pu64Dst
894	mov esi, edx ; pu64Rax
895	mov ecx, [esp + 16 + 4 + 0] ; pu64Reg - Note! Pointer on 32-bit hosts!
896	mov ebp, [esp + 16 + 4 + 4] ; pEFlags
897
898	mov ebx, [ecx]
899	mov ecx, [ecx + 4]
900	IEM_MAYBE_LOAD_FLAGS ebp, (X86_EFL_ZF \| X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF), 0 ; clobbers T0 (eax)
901	mov eax, [esi]
902	mov edx, [esi + 4]
903
904	lock cmpxchg8b [edi]
905
906	; cmpxchg8b doesn't set CF, PF, AF, SF and OF, so we have to do that.
907	jz .cmpxchg8b_not_equal
908	cmp eax, eax ; just set the other flags.
909	.store:
910	mov [esi], eax
911	mov [esi + 4], edx
912	IEM_SAVE_FLAGS ebp, (X86_EFL_ZF \| X86_EFL_CF \| X86_EFL_PF \| X86_EFL_AF \| X86_EFL_SF \| X86_EFL_OF), 0 ; clobbers T0+T1 (eax, edi)
913
914	pop ebp
915	pop ebx
916	pop edi
917	pop esi
918	ret 8
919
920	.cmpxchg8b_not_equal:
921	cmp [esi + 4], edx ;; @todo FIXME - verify 64-bit compare implementation
922	jne .store
923	cmp [esi], eax
924	jmp .store
925
926	%endif
927	ENDPROC iemAImpl_cmpxchg_u64 %+ %2
928	%endmacro ; IEMIMPL_CMPXCHG
929
930	IEMIMPL_CMPXCHG , ,
931	IEMIMPL_CMPXCHG lock, _locked
932
933	;;
934	; Macro for implementing a unary operator.
935	;
936	; This will generate code for the 8, 16, 32 and 64 bit accesses with locked
937	; variants, except on 32-bit system where the 64-bit accesses requires hand
938	; coding.
939	;
940	; All the functions takes a pointer to the destination memory operand in A0,
941	; the source register operand in A1 and a pointer to eflags in A2.
942	;
943	; @param 1 The instruction mnemonic.
944	; @param 2 The modified flags.
945	; @param 3 The undefined flags.
946	;
947	%macro IEMIMPL_UNARY_OP 3
948	BEGINCODE
949	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 8
950	PROLOGUE_2_ARGS
951	IEM_MAYBE_LOAD_FLAGS A1, %2, %3
952	%1 byte [A0]
953	IEM_SAVE_FLAGS A1, %2, %3
954	EPILOGUE_2_ARGS
955	ENDPROC iemAImpl_ %+ %1 %+ _u8
956
957	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8_locked, 8
958	PROLOGUE_2_ARGS
959	IEM_MAYBE_LOAD_FLAGS A1, %2, %3
960	lock %1 byte [A0]
961	IEM_SAVE_FLAGS A1, %2, %3
962	EPILOGUE_2_ARGS
963	ENDPROC iemAImpl_ %+ %1 %+ _u8_locked
964
965	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 8
966	PROLOGUE_2_ARGS
967	IEM_MAYBE_LOAD_FLAGS A1, %2, %3
968	%1 word [A0]
969	IEM_SAVE_FLAGS A1, %2, %3
970	EPILOGUE_2_ARGS
971	ENDPROC iemAImpl_ %+ %1 %+ _u16
972
973	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16_locked, 8
974	PROLOGUE_2_ARGS
975	IEM_MAYBE_LOAD_FLAGS A1, %2, %3
976	lock %1 word [A0]
977	IEM_SAVE_FLAGS A1, %2, %3
978	EPILOGUE_2_ARGS
979	ENDPROC iemAImpl_ %+ %1 %+ _u16_locked
980
981	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 8
982	PROLOGUE_2_ARGS
983	IEM_MAYBE_LOAD_FLAGS A1, %2, %3
984	%1 dword [A0]
985	IEM_SAVE_FLAGS A1, %2, %3
986	EPILOGUE_2_ARGS
987	ENDPROC iemAImpl_ %+ %1 %+ _u32
988
989	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32_locked, 8
990	PROLOGUE_2_ARGS
991	IEM_MAYBE_LOAD_FLAGS A1, %2, %3
992	lock %1 dword [A0]
993	IEM_SAVE_FLAGS A1, %2, %3
994	EPILOGUE_2_ARGS
995	ENDPROC iemAImpl_ %+ %1 %+ _u32_locked
996
997	%ifdef RT_ARCH_AMD64
998	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 8
999	PROLOGUE_2_ARGS
1000	IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1001	%1 qword [A0]
1002	IEM_SAVE_FLAGS A1, %2, %3
1003	EPILOGUE_2_ARGS
1004	ENDPROC iemAImpl_ %+ %1 %+ _u64
1005
1006	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 8
1007	PROLOGUE_2_ARGS
1008	IEM_MAYBE_LOAD_FLAGS A1, %2, %3
1009	lock %1 qword [A0]
1010	IEM_SAVE_FLAGS A1, %2, %3
1011	EPILOGUE_2_ARGS
1012	ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
1013	%else
1014	; stub them for now.
1015	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 8
1016	int3
1017	ret 0
1018	ENDPROC iemAImpl_ %+ %1 %+ _u64
1019	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64_locked, 8
1020	int3
1021	ret 0
1022	ENDPROC iemAImpl_ %+ %1 %+ _u64_locked
1023	%endif
1024
1025	%endmacro
1026
1027	IEMIMPL_UNARY_OP inc, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF), 0
1028	IEMIMPL_UNARY_OP dec, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF), 0
1029	IEMIMPL_UNARY_OP neg, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF), 0
1030	IEMIMPL_UNARY_OP not, 0, 0
1031
1032
1033
1034	;;
1035	; Macro for implementing a shift operation.
1036	;
1037	; This will generate code for the 8, 16, 32 and 64 bit accesses, except on
1038	; 32-bit system where the 64-bit accesses requires hand coding.
1039	;
1040	; All the functions takes a pointer to the destination memory operand in A0,
1041	; the shift count in A1 and a pointer to eflags in A2.
1042	;
1043	; @param 1 The instruction mnemonic.
1044	; @param 2 The modified flags.
1045	; @param 3 The undefined flags.
1046	;
1047	; Makes ASSUMPTIONS about A0, A1 and A2 assignments.
1048	;
1049	%macro IEMIMPL_SHIFT_OP 3
1050	BEGINCODE
1051	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 12
1052	PROLOGUE_3_ARGS
1053	IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1054	%ifdef ASM_CALL64_GCC
1055	mov cl, A1_8
1056	%1 byte [A0], cl
1057	%else
1058	xchg A1, A0
1059	%1 byte [A1], cl
1060	%endif
1061	IEM_SAVE_FLAGS A2, %2, %3
1062	EPILOGUE_3_ARGS
1063	ENDPROC iemAImpl_ %+ %1 %+ _u8
1064
1065	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 12
1066	PROLOGUE_3_ARGS
1067	IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1068	%ifdef ASM_CALL64_GCC
1069	mov cl, A1_8
1070	%1 word [A0], cl
1071	%else
1072	xchg A1, A0
1073	%1 word [A1], cl
1074	%endif
1075	IEM_SAVE_FLAGS A2, %2, %3
1076	EPILOGUE_3_ARGS
1077	ENDPROC iemAImpl_ %+ %1 %+ _u16
1078
1079	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 12
1080	PROLOGUE_3_ARGS
1081	IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1082	%ifdef ASM_CALL64_GCC
1083	mov cl, A1_8
1084	%1 dword [A0], cl
1085	%else
1086	xchg A1, A0
1087	%1 dword [A1], cl
1088	%endif
1089	IEM_SAVE_FLAGS A2, %2, %3
1090	EPILOGUE_3_ARGS
1091	ENDPROC iemAImpl_ %+ %1 %+ _u32
1092
1093	%ifdef RT_ARCH_AMD64
1094	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 12
1095	PROLOGUE_3_ARGS
1096	IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1097	%ifdef ASM_CALL64_GCC
1098	mov cl, A1_8
1099	%1 qword [A0], cl
1100	%else
1101	xchg A1, A0
1102	%1 qword [A1], cl
1103	%endif
1104	IEM_SAVE_FLAGS A2, %2, %3
1105	EPILOGUE_3_ARGS
1106	ENDPROC iemAImpl_ %+ %1 %+ _u64
1107	%else ; stub it for now - later, replace with hand coded stuff.
1108	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 12
1109	int3
1110	ret 4
1111	ENDPROC iemAImpl_ %+ %1 %+ _u64
1112	%endif ; !RT_ARCH_AMD64
1113
1114	%endmacro
1115
1116	IEMIMPL_SHIFT_OP rol, (X86_EFL_OF \| X86_EFL_CF), 0
1117	IEMIMPL_SHIFT_OP ror, (X86_EFL_OF \| X86_EFL_CF), 0
1118	IEMIMPL_SHIFT_OP rcl, (X86_EFL_OF \| X86_EFL_CF), 0
1119	IEMIMPL_SHIFT_OP rcr, (X86_EFL_OF \| X86_EFL_CF), 0
1120	IEMIMPL_SHIFT_OP shl, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_PF \| X86_EFL_CF), (X86_EFL_AF)
1121	IEMIMPL_SHIFT_OP shr, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_PF \| X86_EFL_CF), (X86_EFL_AF)
1122	IEMIMPL_SHIFT_OP sar, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_PF \| X86_EFL_CF), (X86_EFL_AF)
1123
1124
1125	;;
1126	; Macro for implementing a double precision shift operation.
1127	;
1128	; This will generate code for the 16, 32 and 64 bit accesses, except on
1129	; 32-bit system where the 64-bit accesses requires hand coding.
1130	;
1131	; The functions takes the destination operand (r/m) in A0, the source (reg) in
1132	; A1, the shift count in A2 and a pointer to the eflags variable/register in A3.
1133	;
1134	; @param 1 The instruction mnemonic.
1135	; @param 2 The modified flags.
1136	; @param 3 The undefined flags.
1137	;
1138	; Makes ASSUMPTIONS about A0, A1, A2 and A3 assignments.
1139	;
1140	%macro IEMIMPL_SHIFT_DBL_OP 3
1141	BEGINCODE
1142	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 16
1143	PROLOGUE_4_ARGS
1144	IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1145	%ifdef ASM_CALL64_GCC
1146	xchg A3, A2
1147	%1 [A0], A1_16, cl
1148	xchg A3, A2
1149	%else
1150	xchg A0, A2
1151	%1 [A2], A1_16, cl
1152	%endif
1153	IEM_SAVE_FLAGS A3, %2, %3
1154	EPILOGUE_4_ARGS
1155	ENDPROC iemAImpl_ %+ %1 %+ _u16
1156
1157	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 16
1158	PROLOGUE_4_ARGS
1159	IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1160	%ifdef ASM_CALL64_GCC
1161	xchg A3, A2
1162	%1 [A0], A1_32, cl
1163	xchg A3, A2
1164	%else
1165	xchg A0, A2
1166	%1 [A2], A1_32, cl
1167	%endif
1168	IEM_SAVE_FLAGS A3, %2, %3
1169	EPILOGUE_4_ARGS
1170	ENDPROC iemAImpl_ %+ %1 %+ _u32
1171
1172	%ifdef RT_ARCH_AMD64
1173	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
1174	PROLOGUE_4_ARGS
1175	IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1176	%ifdef ASM_CALL64_GCC
1177	xchg A3, A2
1178	%1 [A0], A1, cl
1179	xchg A3, A2
1180	%else
1181	xchg A0, A2
1182	%1 [A2], A1, cl
1183	%endif
1184	IEM_SAVE_FLAGS A3, %2, %3
1185	EPILOGUE_4_ARGS_EX 12
1186	ENDPROC iemAImpl_ %+ %1 %+ _u64
1187	%else ; stub it for now - later, replace with hand coded stuff.
1188	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
1189	int3
1190	ret 12
1191	ENDPROC iemAImpl_ %+ %1 %+ _u64
1192	%endif ; !RT_ARCH_AMD64
1193
1194	%endmacro
1195
1196	IEMIMPL_SHIFT_DBL_OP shld, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_PF \| X86_EFL_CF), (X86_EFL_AF)
1197	IEMIMPL_SHIFT_DBL_OP shrd, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_PF \| X86_EFL_CF), (X86_EFL_AF)
1198
1199
1200	;;
1201	; Macro for implementing a multiplication operations.
1202	;
1203	; This will generate code for the 8, 16, 32 and 64 bit accesses, except on
1204	; 32-bit system where the 64-bit accesses requires hand coding.
1205	;
1206	; The 8-bit function only operates on AX, so it takes no DX pointer. The other
1207	; functions takes a pointer to rAX in A0, rDX in A1, the operand in A2 and a
1208	; pointer to eflags in A3.
1209	;
1210	; The functions all return 0 so the caller can be used for div/idiv as well as
1211	; for the mul/imul implementation.
1212	;
1213	; @param 1 The instruction mnemonic.
1214	; @param 2 The modified flags.
1215	; @param 3 The undefined flags.
1216	;
1217	; Makes ASSUMPTIONS about A0, A1, A2, A3, T0 and T1 assignments.
1218	;
1219	%macro IEMIMPL_MUL_OP 3
1220	BEGINCODE
1221	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 12
1222	PROLOGUE_3_ARGS
1223	IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1224	mov al, [A0]
1225	%1 A1_8
1226	mov [A0], ax
1227	IEM_SAVE_FLAGS A2, %2, %3
1228	xor eax, eax
1229	EPILOGUE_3_ARGS
1230	ENDPROC iemAImpl_ %+ %1 %+ _u8
1231
1232	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 16
1233	PROLOGUE_4_ARGS
1234	IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1235	mov ax, [A0]
1236	%ifdef ASM_CALL64_GCC
1237	%1 A2_16
1238	mov [A0], ax
1239	mov [A1], dx
1240	%else
1241	mov T1, A1
1242	%1 A2_16
1243	mov [A0], ax
1244	mov [T1], dx
1245	%endif
1246	IEM_SAVE_FLAGS A3, %2, %3
1247	xor eax, eax
1248	EPILOGUE_4_ARGS
1249	ENDPROC iemAImpl_ %+ %1 %+ _u16
1250
1251	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 16
1252	PROLOGUE_4_ARGS
1253	IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1254	mov eax, [A0]
1255	%ifdef ASM_CALL64_GCC
1256	%1 A2_32
1257	mov [A0], eax
1258	mov [A1], edx
1259	%else
1260	mov T1, A1
1261	%1 A2_32
1262	mov [A0], eax
1263	mov [T1], edx
1264	%endif
1265	IEM_SAVE_FLAGS A3, %2, %3
1266	xor eax, eax
1267	EPILOGUE_4_ARGS
1268	ENDPROC iemAImpl_ %+ %1 %+ _u32
1269
1270	%ifdef RT_ARCH_AMD64
1271	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
1272	PROLOGUE_4_ARGS
1273	IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1274	mov rax, [A0]
1275	%ifdef ASM_CALL64_GCC
1276	%1 A2
1277	mov [A0], rax
1278	mov [A1], rdx
1279	%else
1280	mov T1, A1
1281	%1 A2
1282	mov [A0], rax
1283	mov [T1], rdx
1284	%endif
1285	IEM_SAVE_FLAGS A3, %2, %3
1286	xor eax, eax
1287	EPILOGUE_4_ARGS_EX 12
1288	ENDPROC iemAImpl_ %+ %1 %+ _u64
1289	%else ; stub it for now - later, replace with hand coded stuff.
1290	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
1291	int3
1292	ret 12
1293	ENDPROC iemAImpl_ %+ %1 %+ _u64
1294	%endif ; !RT_ARCH_AMD64
1295
1296	%endmacro
1297
1298	IEMIMPL_MUL_OP mul, (X86_EFL_OF \| X86_EFL_CF), (X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF)
1299	IEMIMPL_MUL_OP imul, (X86_EFL_OF \| X86_EFL_CF), (X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF)
1300
1301
1302	;;
1303	; Macro for implementing a division operations.
1304	;
1305	; This will generate code for the 8, 16, 32 and 64 bit accesses, except on
1306	; 32-bit system where the 64-bit accesses requires hand coding.
1307	;
1308	; The 8-bit function only operates on AX, so it takes no DX pointer. The other
1309	; functions takes a pointer to rAX in A0, rDX in A1, the operand in A2 and a
1310	; pointer to eflags in A3.
1311	;
1312	; The functions all return 0 on success and -1 if a divide error should be
1313	; raised by the caller.
1314	;
1315	; @param 1 The instruction mnemonic.
1316	; @param 2 The modified flags.
1317	; @param 3 The undefined flags.
1318	;
1319	; Makes ASSUMPTIONS about A0, A1, A2, A3, T0 and T1 assignments.
1320	;
1321	%macro IEMIMPL_DIV_OP 3
1322	BEGINCODE
1323	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u8, 12
1324	PROLOGUE_3_ARGS
1325
1326	test A1_8, A1_8
1327	jz .div_zero
1328	;; @todo test for overflow
1329
1330	IEM_MAYBE_LOAD_FLAGS A2, %2, %3
1331	mov ax, [A0]
1332	%1 A1_8
1333	mov [A0], ax
1334	IEM_SAVE_FLAGS A2, %2, %3
1335	xor eax, eax
1336
1337	.return:
1338	EPILOGUE_3_ARGS
1339
1340	.div_zero:
1341	mov eax, -1
1342	jmp .return
1343	ENDPROC iemAImpl_ %+ %1 %+ _u8
1344
1345	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u16, 16
1346	PROLOGUE_4_ARGS
1347
1348	test A1_16, A1_16
1349	jz .div_zero
1350	;; @todo test for overflow
1351
1352	IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1353	%ifdef ASM_CALL64_GCC
1354	mov T1, A2
1355	mov ax, [A0]
1356	mov dx, [A1]
1357	%1 T1_16
1358	mov [A0], ax
1359	mov [A1], dx
1360	%else
1361	mov T1, A1
1362	mov ax, [A0]
1363	mov dx, [T1]
1364	%1 A2_16
1365	mov [A0], ax
1366	mov [T1], dx
1367	%endif
1368	IEM_SAVE_FLAGS A3, %2, %3
1369	xor eax, eax
1370
1371	.return:
1372	EPILOGUE_4_ARGS
1373
1374	.div_zero:
1375	mov eax, -1
1376	jmp .return
1377	ENDPROC iemAImpl_ %+ %1 %+ _u16
1378
1379	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u32, 16
1380	PROLOGUE_4_ARGS
1381
1382	test A1_32, A1_32
1383	jz .div_zero
1384	;; @todo test for overflow
1385
1386	IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1387	mov eax, [A0]
1388	%ifdef ASM_CALL64_GCC
1389	mov T1, A2
1390	mov eax, [A0]
1391	mov edx, [A1]
1392	%1 T1_32
1393	mov [A0], eax
1394	mov [A1], edx
1395	%else
1396	mov T1, A1
1397	mov eax, [A0]
1398	mov edx, [T1]
1399	%1 A2_32
1400	mov [A0], eax
1401	mov [T1], edx
1402	%endif
1403	IEM_SAVE_FLAGS A3, %2, %3
1404	xor eax, eax
1405
1406	.return:
1407	EPILOGUE_4_ARGS
1408
1409	.div_zero:
1410	mov eax, -1
1411	jmp .return
1412	ENDPROC iemAImpl_ %+ %1 %+ _u32
1413
1414	%ifdef RT_ARCH_AMD64
1415	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
1416	PROLOGUE_4_ARGS
1417
1418	test A1, A1
1419	jz .div_zero
1420	;; @todo test for overflow
1421
1422	IEM_MAYBE_LOAD_FLAGS A3, %2, %3
1423	mov rax, [A0]
1424	%ifdef ASM_CALL64_GCC
1425	mov T1, A2
1426	mov rax, [A0]
1427	mov rdx, [A1]
1428	%1 T1
1429	mov [A0], rax
1430	mov [A1], rdx
1431	%else
1432	mov T1, A1
1433	mov rax, [A0]
1434	mov rdx, [T1]
1435	%1 A2
1436	mov [A0], rax
1437	mov [T1], rdx
1438	%endif
1439	IEM_SAVE_FLAGS A3, %2, %3
1440	xor eax, eax
1441
1442	.return:
1443	EPILOGUE_4_ARGS_EX 12
1444
1445	.div_zero:
1446	mov eax, -1
1447	jmp .return
1448	ENDPROC iemAImpl_ %+ %1 %+ _u64
1449	%else ; stub it for now - later, replace with hand coded stuff.
1450	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _u64, 20
1451	int3
1452	ret
1453	ENDPROC iemAImpl_ %+ %1 %+ _u64
1454	%endif ; !RT_ARCH_AMD64
1455
1456	%endmacro
1457
1458	IEMIMPL_DIV_OP div, 0, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF)
1459	IEMIMPL_DIV_OP idiv, 0, (X86_EFL_OF \| X86_EFL_SF \| X86_EFL_ZF \| X86_EFL_AF \| X86_EFL_PF \| X86_EFL_CF)
1460
1461
1462	;
1463	; BSWAP. No flag changes.
1464	;
1465	; Each function takes one argument, pointer to the value to bswap
1466	; (input/output). They all return void.
1467	;
1468	BEGINPROC_FASTCALL iemAImpl_bswap_u16, 4
1469	PROLOGUE_1_ARGS
1470	mov T0_32, [A0] ; just in case any of the upper bits are used.
1471	db 66h
1472	bswap T0_32
1473	mov [A0], T0_32
1474	EPILOGUE_1_ARGS
1475	ENDPROC iemAImpl_bswap_u16
1476
1477	BEGINPROC_FASTCALL iemAImpl_bswap_u32, 4
1478	PROLOGUE_1_ARGS
1479	mov T0_32, [A0]
1480	bswap T0_32
1481	mov [A0], T0_32
1482	EPILOGUE_1_ARGS
1483	ENDPROC iemAImpl_bswap_u32
1484
1485	BEGINPROC_FASTCALL iemAImpl_bswap_u64, 4
1486	%ifdef RT_ARCH_AMD64
1487	PROLOGUE_1_ARGS
1488	mov T0, [A0]
1489	bswap T0
1490	mov [A0], T0
1491	EPILOGUE_1_ARGS
1492	%else
1493	PROLOGUE_1_ARGS
1494	mov T0, [A0]
1495	mov T1, [A0 + 4]
1496	bswap T0
1497	bswap T1
1498	mov [A0 + 4], T0
1499	mov [A0], T1
1500	EPILOGUE_1_ARGS
1501	%endif
1502	ENDPROC iemAImpl_bswap_u64
1503
1504
1505	;;
1506	; Initialize the FPU for the actual instruction being emulated, this means
1507	; loading parts of the guest's control word and status word.
1508	;
1509	; @uses 24 bytes of stack.
1510	; @param 1 Expression giving the address of the FXSTATE of the guest.
1511	;
1512	%macro FPU_LD_FXSTATE_FCW_AND_SAFE_FSW 1
1513	fnstenv [xSP]
1514
1515	; FCW - for exception, precision and rounding control.
1516	movzx T0, word [%1 + X86FXSTATE.FCW]
1517	and T0, X86_FCW_MASK_ALL \| X86_FCW_PC_MASK \| X86_FCW_RC_MASK
1518	mov [xSP + X86FSTENV32P.FCW], T0_16
1519
1520	; FSW - for undefined C0, C1, C2, and C3.
1521	movzx T1, word [%1 + X86FXSTATE.FSW]
1522	and T1, X86_FSW_C_MASK
1523	movzx T0, word [xSP + X86FSTENV32P.FSW]
1524	and T0, X86_FSW_TOP_MASK
1525	or T0, T1
1526	mov [xSP + X86FSTENV32P.FSW], T0_16
1527
1528	fldenv [xSP]
1529	%endmacro
1530
1531
1532	;;
1533	; Need to move this as well somewhere better?
1534	;
1535	struc IEMFPURESULT
1536	.r80Result resw 5
1537	.FSW resw 1
1538	endstruc
1539
1540
1541	;;
1542	; Need to move this as well somewhere better?
1543	;
1544	struc IEMFPURESULTTWO
1545	.r80Result1 resw 5
1546	.FSW resw 1
1547	.r80Result2 resw 5
1548	endstruc
1549
1550
1551	;
1552	;---------------------- 16-bit signed integer operations ----------------------
1553	;
1554
1555
1556	;;
1557	; Converts a 16-bit floating point value to a 80-bit one (fpu register).
1558	;
1559	; @param A0 FPU context (fxsave).
1560	; @param A1 Pointer to a IEMFPURESULT for the output.
1561	; @param A2 Pointer to the 16-bit floating point value to convert.
1562	;
1563	BEGINPROC_FASTCALL iemAImpl_fild_i16_to_r80, 12
1564	PROLOGUE_3_ARGS
1565	sub xSP, 20h
1566
1567	fninit
1568	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1569	fild word [A2]
1570
1571	fnstsw word [A1 + IEMFPURESULT.FSW]
1572	fnclex
1573	fstp tword [A1 + IEMFPURESULT.r80Result]
1574
1575	fninit
1576	add xSP, 20h
1577	EPILOGUE_3_ARGS
1578	ENDPROC iemAImpl_fild_i16_to_r80
1579
1580
1581	;;
1582	; Store a 80-bit floating point value (register) as a 16-bit signed integer (memory).
1583	;
1584	; @param A0 FPU context (fxsave).
1585	; @param A1 Where to return the output FSW.
1586	; @param A2 Where to store the 16-bit signed integer value.
1587	; @param A3 Pointer to the 80-bit value.
1588	;
1589	BEGINPROC_FASTCALL iemAImpl_fist_r80_to_i16, 16
1590	PROLOGUE_4_ARGS
1591	sub xSP, 20h
1592
1593	fninit
1594	fld tword [A3]
1595	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1596	fistp word [A2]
1597
1598	fnstsw word [A1]
1599
1600	fninit
1601	add xSP, 20h
1602	EPILOGUE_4_ARGS
1603	ENDPROC iemAImpl_fist_r80_to_i16
1604
1605
1606	;;
1607	; Store a 80-bit floating point value (register) as a 16-bit signed integer
1608	; (memory) with truncation.
1609	;
1610	; @param A0 FPU context (fxsave).
1611	; @param A1 Where to return the output FSW.
1612	; @param A2 Where to store the 16-bit signed integer value.
1613	; @param A3 Pointer to the 80-bit value.
1614	;
1615	BEGINPROC_FASTCALL iemAImpl_fistt_r80_to_i16, 16
1616	PROLOGUE_4_ARGS
1617	sub xSP, 20h
1618
1619	fninit
1620	fld tword [A3]
1621	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1622	fisttp dword [A2]
1623
1624	fnstsw word [A1]
1625
1626	fninit
1627	add xSP, 20h
1628	EPILOGUE_4_ARGS
1629	ENDPROC iemAImpl_fistt_r80_to_i16
1630
1631
1632	;;
1633	; FPU instruction working on one 80-bit and one 16-bit signed integer value.
1634	;
1635	; @param 1 The instruction
1636	;
1637	; @param A0 FPU context (fxsave).
1638	; @param A1 Pointer to a IEMFPURESULT for the output.
1639	; @param A2 Pointer to the 80-bit value.
1640	; @param A3 Pointer to the 16-bit value.
1641	;
1642	%macro IEMIMPL_FPU_R80_BY_I16 1
1643	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i16, 16
1644	PROLOGUE_4_ARGS
1645	sub xSP, 20h
1646
1647	fninit
1648	fld tword [A2]
1649	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1650	%1 word [A3]
1651
1652	fnstsw word [A1 + IEMFPURESULT.FSW]
1653	fnclex
1654	fstp tword [A1 + IEMFPURESULT.r80Result]
1655
1656	fninit
1657	add xSP, 20h
1658	EPILOGUE_4_ARGS
1659	ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i16
1660	%endmacro
1661
1662	IEMIMPL_FPU_R80_BY_I16 fiadd
1663	IEMIMPL_FPU_R80_BY_I16 fimul
1664	IEMIMPL_FPU_R80_BY_I16 fisub
1665	IEMIMPL_FPU_R80_BY_I16 fisubr
1666	IEMIMPL_FPU_R80_BY_I16 fidiv
1667	IEMIMPL_FPU_R80_BY_I16 fidivr
1668
1669
1670	;;
1671	; FPU instruction working on one 80-bit and one 16-bit signed integer value,
1672	; only returning FSW.
1673	;
1674	; @param 1 The instruction
1675	;
1676	; @param A0 FPU context (fxsave).
1677	; @param A1 Where to store the output FSW.
1678	; @param A2 Pointer to the 80-bit value.
1679	; @param A3 Pointer to the 64-bit value.
1680	;
1681	%macro IEMIMPL_FPU_R80_BY_I16_FSW 1
1682	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i16, 16
1683	PROLOGUE_4_ARGS
1684	sub xSP, 20h
1685
1686	fninit
1687	fld tword [A2]
1688	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1689	%1 word [A3]
1690
1691	fnstsw word [A1]
1692
1693	fninit
1694	add xSP, 20h
1695	EPILOGUE_4_ARGS
1696	ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i16
1697	%endmacro
1698
1699	IEMIMPL_FPU_R80_BY_I16_FSW ficom
1700
1701
1702
1703	;
1704	;---------------------- 32-bit signed integer operations ----------------------
1705	;
1706
1707
1708	;;
1709	; Converts a 32-bit floating point value to a 80-bit one (fpu register).
1710	;
1711	; @param A0 FPU context (fxsave).
1712	; @param A1 Pointer to a IEMFPURESULT for the output.
1713	; @param A2 Pointer to the 32-bit floating point value to convert.
1714	;
1715	BEGINPROC_FASTCALL iemAImpl_fild_i32_to_r80, 12
1716	PROLOGUE_3_ARGS
1717	sub xSP, 20h
1718
1719	fninit
1720	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1721	fild dword [A2]
1722
1723	fnstsw word [A1 + IEMFPURESULT.FSW]
1724	fnclex
1725	fstp tword [A1 + IEMFPURESULT.r80Result]
1726
1727	fninit
1728	add xSP, 20h
1729	EPILOGUE_3_ARGS
1730	ENDPROC iemAImpl_fild_i32_to_r80
1731
1732
1733	;;
1734	; Store a 80-bit floating point value (register) as a 32-bit signed integer (memory).
1735	;
1736	; @param A0 FPU context (fxsave).
1737	; @param A1 Where to return the output FSW.
1738	; @param A2 Where to store the 32-bit signed integer value.
1739	; @param A3 Pointer to the 80-bit value.
1740	;
1741	BEGINPROC_FASTCALL iemAImpl_fist_r80_to_i32, 16
1742	PROLOGUE_4_ARGS
1743	sub xSP, 20h
1744
1745	fninit
1746	fld tword [A3]
1747	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1748	fistp dword [A2]
1749
1750	fnstsw word [A1]
1751
1752	fninit
1753	add xSP, 20h
1754	EPILOGUE_4_ARGS
1755	ENDPROC iemAImpl_fist_r80_to_i32
1756
1757
1758	;;
1759	; Store a 80-bit floating point value (register) as a 32-bit signed integer
1760	; (memory) with truncation.
1761	;
1762	; @param A0 FPU context (fxsave).
1763	; @param A1 Where to return the output FSW.
1764	; @param A2 Where to store the 32-bit signed integer value.
1765	; @param A3 Pointer to the 80-bit value.
1766	;
1767	BEGINPROC_FASTCALL iemAImpl_fistt_r80_to_i32, 16
1768	PROLOGUE_4_ARGS
1769	sub xSP, 20h
1770
1771	fninit
1772	fld tword [A3]
1773	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1774	fisttp dword [A2]
1775
1776	fnstsw word [A1]
1777
1778	fninit
1779	add xSP, 20h
1780	EPILOGUE_4_ARGS
1781	ENDPROC iemAImpl_fistt_r80_to_i32
1782
1783
1784	;;
1785	; FPU instruction working on one 80-bit and one 32-bit signed integer value.
1786	;
1787	; @param 1 The instruction
1788	;
1789	; @param A0 FPU context (fxsave).
1790	; @param A1 Pointer to a IEMFPURESULT for the output.
1791	; @param A2 Pointer to the 80-bit value.
1792	; @param A3 Pointer to the 32-bit value.
1793	;
1794	%macro IEMIMPL_FPU_R80_BY_I32 1
1795	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i32, 16
1796	PROLOGUE_4_ARGS
1797	sub xSP, 20h
1798
1799	fninit
1800	fld tword [A2]
1801	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1802	%1 dword [A3]
1803
1804	fnstsw word [A1 + IEMFPURESULT.FSW]
1805	fnclex
1806	fstp tword [A1 + IEMFPURESULT.r80Result]
1807
1808	fninit
1809	add xSP, 20h
1810	EPILOGUE_4_ARGS
1811	ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i32
1812	%endmacro
1813
1814	IEMIMPL_FPU_R80_BY_I32 fiadd
1815	IEMIMPL_FPU_R80_BY_I32 fimul
1816	IEMIMPL_FPU_R80_BY_I32 fisub
1817	IEMIMPL_FPU_R80_BY_I32 fisubr
1818	IEMIMPL_FPU_R80_BY_I32 fidiv
1819	IEMIMPL_FPU_R80_BY_I32 fidivr
1820
1821
1822	;;
1823	; FPU instruction working on one 80-bit and one 32-bit signed integer value,
1824	; only returning FSW.
1825	;
1826	; @param 1 The instruction
1827	;
1828	; @param A0 FPU context (fxsave).
1829	; @param A1 Where to store the output FSW.
1830	; @param A2 Pointer to the 80-bit value.
1831	; @param A3 Pointer to the 64-bit value.
1832	;
1833	%macro IEMIMPL_FPU_R80_BY_I32_FSW 1
1834	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_i32, 16
1835	PROLOGUE_4_ARGS
1836	sub xSP, 20h
1837
1838	fninit
1839	fld tword [A2]
1840	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1841	%1 dword [A3]
1842
1843	fnstsw word [A1]
1844
1845	fninit
1846	add xSP, 20h
1847	EPILOGUE_4_ARGS
1848	ENDPROC iemAImpl_ %+ %1 %+ _r80_by_i32
1849	%endmacro
1850
1851	IEMIMPL_FPU_R80_BY_I32_FSW ficom
1852
1853
1854
1855	;
1856	;---------------------- 64-bit signed integer operations ----------------------
1857	;
1858
1859
1860	;;
1861	; Converts a 64-bit floating point value to a 80-bit one (fpu register).
1862	;
1863	; @param A0 FPU context (fxsave).
1864	; @param A1 Pointer to a IEMFPURESULT for the output.
1865	; @param A2 Pointer to the 64-bit floating point value to convert.
1866	;
1867	BEGINPROC_FASTCALL iemAImpl_fild_i64_to_r80, 12
1868	PROLOGUE_3_ARGS
1869	sub xSP, 20h
1870
1871	fninit
1872	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1873	fild qword [A2]
1874
1875	fnstsw word [A1 + IEMFPURESULT.FSW]
1876	fnclex
1877	fstp tword [A1 + IEMFPURESULT.r80Result]
1878
1879	fninit
1880	add xSP, 20h
1881	EPILOGUE_3_ARGS
1882	ENDPROC iemAImpl_fild_i64_to_r80
1883
1884
1885	;;
1886	; Store a 80-bit floating point value (register) as a 64-bit signed integer (memory).
1887	;
1888	; @param A0 FPU context (fxsave).
1889	; @param A1 Where to return the output FSW.
1890	; @param A2 Where to store the 64-bit signed integer value.
1891	; @param A3 Pointer to the 80-bit value.
1892	;
1893	BEGINPROC_FASTCALL iemAImpl_fist_r80_to_i64, 16
1894	PROLOGUE_4_ARGS
1895	sub xSP, 20h
1896
1897	fninit
1898	fld tword [A3]
1899	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1900	fistp qword [A2]
1901
1902	fnstsw word [A1]
1903
1904	fninit
1905	add xSP, 20h
1906	EPILOGUE_4_ARGS
1907	ENDPROC iemAImpl_fist_r80_to_i64
1908
1909
1910	;;
1911	; Store a 80-bit floating point value (register) as a 64-bit signed integer
1912	; (memory) with truncation.
1913	;
1914	; @param A0 FPU context (fxsave).
1915	; @param A1 Where to return the output FSW.
1916	; @param A2 Where to store the 64-bit signed integer value.
1917	; @param A3 Pointer to the 80-bit value.
1918	;
1919	BEGINPROC_FASTCALL iemAImpl_fistt_r80_to_i64, 16
1920	PROLOGUE_4_ARGS
1921	sub xSP, 20h
1922
1923	fninit
1924	fld tword [A3]
1925	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1926	fisttp qword [A2]
1927
1928	fnstsw word [A1]
1929
1930	fninit
1931	add xSP, 20h
1932	EPILOGUE_4_ARGS
1933	ENDPROC iemAImpl_fistt_r80_to_i64
1934
1935
1936
1937	;
1938	;---------------------- 32-bit floating point operations ----------------------
1939	;
1940
1941	;;
1942	; Converts a 32-bit floating point value to a 80-bit one (fpu register).
1943	;
1944	; @param A0 FPU context (fxsave).
1945	; @param A1 Pointer to a IEMFPURESULT for the output.
1946	; @param A2 Pointer to the 32-bit floating point value to convert.
1947	;
1948	BEGINPROC_FASTCALL iemAImpl_fld_r32_to_r80, 12
1949	PROLOGUE_3_ARGS
1950	sub xSP, 20h
1951
1952	fninit
1953	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1954	fld dword [A2]
1955
1956	fnstsw word [A1 + IEMFPURESULT.FSW]
1957	fnclex
1958	fstp tword [A1 + IEMFPURESULT.r80Result]
1959
1960	fninit
1961	add xSP, 20h
1962	EPILOGUE_3_ARGS
1963	ENDPROC iemAImpl_fld_r32_to_r80
1964
1965
1966	;;
1967	; Store a 80-bit floating point value (register) as a 32-bit one (memory).
1968	;
1969	; @param A0 FPU context (fxsave).
1970	; @param A1 Where to return the output FSW.
1971	; @param A2 Where to store the 32-bit value.
1972	; @param A3 Pointer to the 80-bit value.
1973	;
1974	BEGINPROC_FASTCALL iemAImpl_fst_r80_to_r32, 16
1975	PROLOGUE_4_ARGS
1976	sub xSP, 20h
1977
1978	fninit
1979	fld tword [A3]
1980	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
1981	fst dword [A2]
1982
1983	fnstsw word [A1]
1984
1985	fninit
1986	add xSP, 20h
1987	EPILOGUE_4_ARGS
1988	ENDPROC iemAImpl_fst_r80_to_r32
1989
1990
1991	;;
1992	; FPU instruction working on one 80-bit and one 32-bit floating point value.
1993	;
1994	; @param 1 The instruction
1995	;
1996	; @param A0 FPU context (fxsave).
1997	; @param A1 Pointer to a IEMFPURESULT for the output.
1998	; @param A2 Pointer to the 80-bit value.
1999	; @param A3 Pointer to the 32-bit value.
2000	;
2001	%macro IEMIMPL_FPU_R80_BY_R32 1
2002	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r32, 16
2003	PROLOGUE_4_ARGS
2004	sub xSP, 20h
2005
2006	fninit
2007	fld tword [A2]
2008	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2009	%1 dword [A3]
2010
2011	fnstsw word [A1 + IEMFPURESULT.FSW]
2012	fnclex
2013	fstp tword [A1 + IEMFPURESULT.r80Result]
2014
2015	fninit
2016	add xSP, 20h
2017	EPILOGUE_4_ARGS
2018	ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r32
2019	%endmacro
2020
2021	IEMIMPL_FPU_R80_BY_R32 fadd
2022	IEMIMPL_FPU_R80_BY_R32 fmul
2023	IEMIMPL_FPU_R80_BY_R32 fsub
2024	IEMIMPL_FPU_R80_BY_R32 fsubr
2025	IEMIMPL_FPU_R80_BY_R32 fdiv
2026	IEMIMPL_FPU_R80_BY_R32 fdivr
2027
2028
2029	;;
2030	; FPU instruction working on one 80-bit and one 32-bit floating point value,
2031	; only returning FSW.
2032	;
2033	; @param 1 The instruction
2034	;
2035	; @param A0 FPU context (fxsave).
2036	; @param A1 Where to store the output FSW.
2037	; @param A2 Pointer to the 80-bit value.
2038	; @param A3 Pointer to the 64-bit value.
2039	;
2040	%macro IEMIMPL_FPU_R80_BY_R32_FSW 1
2041	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r32, 16
2042	PROLOGUE_4_ARGS
2043	sub xSP, 20h
2044
2045	fninit
2046	fld tword [A2]
2047	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2048	%1 dword [A3]
2049
2050	fnstsw word [A1]
2051
2052	fninit
2053	add xSP, 20h
2054	EPILOGUE_4_ARGS
2055	ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r32
2056	%endmacro
2057
2058	IEMIMPL_FPU_R80_BY_R32_FSW fcom
2059
2060
2061
2062	;
2063	;---------------------- 64-bit floating point operations ----------------------
2064	;
2065
2066	;;
2067	; Converts a 64-bit floating point value to a 80-bit one (fpu register).
2068	;
2069	; @param A0 FPU context (fxsave).
2070	; @param A1 Pointer to a IEMFPURESULT for the output.
2071	; @param A2 Pointer to the 64-bit floating point value to convert.
2072	;
2073	BEGINPROC_FASTCALL iemAImpl_fld_r64_to_r80, 12
2074	PROLOGUE_3_ARGS
2075	sub xSP, 20h
2076
2077	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2078	fld qword [A2]
2079
2080	fnstsw word [A1 + IEMFPURESULT.FSW]
2081	fnclex
2082	fstp tword [A1 + IEMFPURESULT.r80Result]
2083
2084	fninit
2085	add xSP, 20h
2086	EPILOGUE_3_ARGS
2087	ENDPROC iemAImpl_fld_r64_to_r80
2088
2089
2090	;;
2091	; Store a 80-bit floating point value (register) as a 64-bit one (memory).
2092	;
2093	; @param A0 FPU context (fxsave).
2094	; @param A1 Where to return the output FSW.
2095	; @param A2 Where to store the 64-bit value.
2096	; @param A3 Pointer to the 80-bit value.
2097	;
2098	BEGINPROC_FASTCALL iemAImpl_fst_r80_to_r64, 16
2099	PROLOGUE_4_ARGS
2100	sub xSP, 20h
2101
2102	fninit
2103	fld tword [A3]
2104	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2105	fst qword [A2]
2106
2107	fnstsw word [A1]
2108
2109	fninit
2110	add xSP, 20h
2111	EPILOGUE_4_ARGS
2112	ENDPROC iemAImpl_fst_r80_to_r64
2113
2114
2115	;;
2116	; FPU instruction working on one 80-bit and one 64-bit floating point value.
2117	;
2118	; @param 1 The instruction
2119	;
2120	; @param A0 FPU context (fxsave).
2121	; @param A1 Pointer to a IEMFPURESULT for the output.
2122	; @param A2 Pointer to the 80-bit value.
2123	; @param A3 Pointer to the 64-bit value.
2124	;
2125	%macro IEMIMPL_FPU_R80_BY_R64 1
2126	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r64, 16
2127	PROLOGUE_4_ARGS
2128	sub xSP, 20h
2129
2130	fninit
2131	fld tword [A2]
2132	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2133	%1 qword [A3]
2134
2135	fnstsw word [A1 + IEMFPURESULT.FSW]
2136	fnclex
2137	fstp tword [A1 + IEMFPURESULT.r80Result]
2138
2139	fninit
2140	add xSP, 20h
2141	EPILOGUE_4_ARGS
2142	ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r64
2143	%endmacro
2144
2145	IEMIMPL_FPU_R80_BY_R64 fadd
2146	IEMIMPL_FPU_R80_BY_R64 fmul
2147	IEMIMPL_FPU_R80_BY_R64 fsub
2148	IEMIMPL_FPU_R80_BY_R64 fsubr
2149	IEMIMPL_FPU_R80_BY_R64 fdiv
2150	IEMIMPL_FPU_R80_BY_R64 fdivr
2151
2152	;;
2153	; FPU instruction working on one 80-bit and one 64-bit floating point value,
2154	; only returning FSW.
2155	;
2156	; @param 1 The instruction
2157	;
2158	; @param A0 FPU context (fxsave).
2159	; @param A1 Where to store the output FSW.
2160	; @param A2 Pointer to the 80-bit value.
2161	; @param A3 Pointer to the 64-bit value.
2162	;
2163	%macro IEMIMPL_FPU_R80_BY_R64_FSW 1
2164	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r64, 16
2165	PROLOGUE_4_ARGS
2166	sub xSP, 20h
2167
2168	fninit
2169	fld tword [A2]
2170	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2171	%1 qword [A3]
2172
2173	fnstsw word [A1]
2174
2175	fninit
2176	add xSP, 20h
2177	EPILOGUE_4_ARGS
2178	ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r64
2179	%endmacro
2180
2181	IEMIMPL_FPU_R80_BY_R64_FSW fcom
2182
2183
2184
2185	;
2186	;---------------------- 80-bit floating point operations ----------------------
2187	;
2188
2189	;;
2190	; Loads a 80-bit floating point register value from memory.
2191	;
2192	; @param A0 FPU context (fxsave).
2193	; @param A1 Pointer to a IEMFPURESULT for the output.
2194	; @param A2 Pointer to the 80-bit floating point value to load.
2195	;
2196	BEGINPROC_FASTCALL iemAImpl_fld_r80_from_r80, 12
2197	PROLOGUE_3_ARGS
2198	sub xSP, 20h
2199
2200	fninit
2201	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2202	fld tword [A2]
2203
2204	fnstsw word [A1 + IEMFPURESULT.FSW]
2205	fnclex
2206	fstp tword [A1 + IEMFPURESULT.r80Result]
2207
2208	fninit
2209	add xSP, 20h
2210	EPILOGUE_3_ARGS
2211	ENDPROC iemAImpl_fld_r80_from_r80
2212
2213
2214	;;
2215	; Store a 80-bit floating point register to memory
2216	;
2217	; @param A0 FPU context (fxsave).
2218	; @param A1 Where to return the output FSW.
2219	; @param A2 Where to store the 80-bit value.
2220	; @param A3 Pointer to the 80-bit register value.
2221	;
2222	BEGINPROC_FASTCALL iemAImpl_fst_r80_to_r80, 16
2223	PROLOGUE_4_ARGS
2224	sub xSP, 20h
2225
2226	fninit
2227	fld tword [A3]
2228	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2229	fstp tword [A2]
2230
2231	fnstsw word [A1]
2232
2233	fninit
2234	add xSP, 20h
2235	EPILOGUE_4_ARGS
2236	ENDPROC iemAImpl_fst_r80_to_r80
2237
2238
2239	;;
2240	; FPU instruction working on two 80-bit floating point values.
2241	;
2242	; @param 1 The instruction
2243	;
2244	; @param A0 FPU context (fxsave).
2245	; @param A1 Pointer to a IEMFPURESULT for the output.
2246	; @param A2 Pointer to the first 80-bit value (ST0)
2247	; @param A3 Pointer to the second 80-bit value (STn).
2248	;
2249	%macro IEMIMPL_FPU_R80_BY_R80 2
2250	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
2251	PROLOGUE_4_ARGS
2252	sub xSP, 20h
2253
2254	fninit
2255	fld tword [A3]
2256	fld tword [A2]
2257	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2258	%1 %2
2259
2260	fnstsw word [A1 + IEMFPURESULT.FSW]
2261	fnclex
2262	fstp tword [A1 + IEMFPURESULT.r80Result]
2263
2264	fninit
2265	add xSP, 20h
2266	EPILOGUE_4_ARGS
2267	ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
2268	%endmacro
2269
2270	IEMIMPL_FPU_R80_BY_R80 fadd, {st0, st1}
2271	IEMIMPL_FPU_R80_BY_R80 fmul, {st0, st1}
2272	IEMIMPL_FPU_R80_BY_R80 fsub, {st0, st1}
2273	IEMIMPL_FPU_R80_BY_R80 fsubr, {st0, st1}
2274	IEMIMPL_FPU_R80_BY_R80 fdiv, {st0, st1}
2275	IEMIMPL_FPU_R80_BY_R80 fdivr, {st0, st1}
2276	IEMIMPL_FPU_R80_BY_R80 fprem, {}
2277	IEMIMPL_FPU_R80_BY_R80 fprem1, {}
2278	IEMIMPL_FPU_R80_BY_R80 fscale, {}
2279
2280
2281	;;
2282	; FPU instruction working on two 80-bit floating point values, ST1 and ST0,
2283	; storing the result in ST1 and popping the stack.
2284	;
2285	; @param 1 The instruction
2286	;
2287	; @param A0 FPU context (fxsave).
2288	; @param A1 Pointer to a IEMFPURESULT for the output.
2289	; @param A2 Pointer to the first 80-bit value (ST1).
2290	; @param A3 Pointer to the second 80-bit value (ST0).
2291	;
2292	%macro IEMIMPL_FPU_R80_BY_R80_ST1_ST0_POP 1
2293	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
2294	PROLOGUE_4_ARGS
2295	sub xSP, 20h
2296
2297	fninit
2298	fld tword [A2]
2299	fld tword [A3]
2300	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2301	%1
2302
2303	fnstsw word [A1 + IEMFPURESULT.FSW]
2304	fnclex
2305	fstp tword [A1 + IEMFPURESULT.r80Result]
2306
2307	fninit
2308	add xSP, 20h
2309	EPILOGUE_4_ARGS
2310	ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
2311	%endmacro
2312
2313	IEMIMPL_FPU_R80_BY_R80_ST1_ST0_POP fpatan
2314	IEMIMPL_FPU_R80_BY_R80_ST1_ST0_POP fyl2xp1
2315
2316
2317	;;
2318	; FPU instruction working on two 80-bit floating point values, only
2319	; returning FSW.
2320	;
2321	; @param 1 The instruction
2322	;
2323	; @param A0 FPU context (fxsave).
2324	; @param A1 Pointer to a uint16_t for the resulting FSW.
2325	; @param A2 Pointer to the first 80-bit value.
2326	; @param A3 Pointer to the second 80-bit value.
2327	;
2328	%macro IEMIMPL_FPU_R80_BY_R80_FSW 1
2329	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
2330	PROLOGUE_4_ARGS
2331	sub xSP, 20h
2332
2333	fninit
2334	fld tword [A3]
2335	fld tword [A2]
2336	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2337	%1 st0, st1
2338
2339	fnstsw word [A1]
2340
2341	fninit
2342	add xSP, 20h
2343	EPILOGUE_4_ARGS
2344	ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
2345	%endmacro
2346
2347	IEMIMPL_FPU_R80_BY_R80_FSW fcom
2348	IEMIMPL_FPU_R80_BY_R80_FSW fucom
2349
2350
2351	;;
2352	; FPU instruction working on two 80-bit floating point values,
2353	; returning FSW and EFLAGS (eax).
2354	;
2355	; @param 1 The instruction
2356	;
2357	; @returns EFLAGS in EAX.
2358	; @param A0 FPU context (fxsave).
2359	; @param A1 Pointer to a uint16_t for the resulting FSW.
2360	; @param A2 Pointer to the first 80-bit value.
2361	; @param A3 Pointer to the second 80-bit value.
2362	;
2363	%macro IEMIMPL_FPU_R80_BY_R80_EFL 1
2364	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_by_r80, 16
2365	PROLOGUE_4_ARGS
2366	sub xSP, 20h
2367
2368	fninit
2369	fld tword [A3]
2370	fld tword [A2]
2371	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2372	%1 st1
2373
2374	fnstsw word [A1]
2375	pushf
2376	pop xAX
2377
2378	fninit
2379	add xSP, 20h
2380	EPILOGUE_4_ARGS
2381	ENDPROC iemAImpl_ %+ %1 %+ _r80_by_r80
2382	%endmacro
2383
2384	IEMIMPL_FPU_R80_BY_R80_EFL fcomi
2385	IEMIMPL_FPU_R80_BY_R80_EFL fucomi
2386
2387
2388	;;
2389	; FPU instruction working on one 80-bit floating point value.
2390	;
2391	; @param 1 The instruction
2392	;
2393	; @param A0 FPU context (fxsave).
2394	; @param A1 Pointer to a IEMFPURESULT for the output.
2395	; @param A2 Pointer to the 80-bit value.
2396	;
2397	%macro IEMIMPL_FPU_R80 1
2398	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80, 12
2399	PROLOGUE_3_ARGS
2400	sub xSP, 20h
2401
2402	fninit
2403	fld tword [A2]
2404	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2405	%1
2406
2407	fnstsw word [A1 + IEMFPURESULT.FSW]
2408	fnclex
2409	fstp tword [A1 + IEMFPURESULT.r80Result]
2410
2411	fninit
2412	add xSP, 20h
2413	EPILOGUE_3_ARGS
2414	ENDPROC iemAImpl_ %+ %1 %+ _r80
2415	%endmacro
2416
2417	IEMIMPL_FPU_R80 fchs
2418	IEMIMPL_FPU_R80 fabs
2419	IEMIMPL_FPU_R80 f2xm1
2420	IEMIMPL_FPU_R80 fyl2x
2421	IEMIMPL_FPU_R80 fsqrt
2422	IEMIMPL_FPU_R80 frndint
2423	IEMIMPL_FPU_R80 fsin
2424	IEMIMPL_FPU_R80 fcos
2425
2426
2427	;;
2428	; FPU instruction working on one 80-bit floating point value, only
2429	; returning FSW.
2430	;
2431	; @param 1 The instruction
2432	;
2433	; @param A0 FPU context (fxsave).
2434	; @param A1 Pointer to a uint16_t for the resulting FSW.
2435	; @param A2 Pointer to the 80-bit value.
2436	;
2437	%macro IEMIMPL_FPU_R80_FSW 1
2438	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80, 12
2439	PROLOGUE_3_ARGS
2440	sub xSP, 20h
2441
2442	fninit
2443	fld tword [A2]
2444	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2445	%1
2446
2447	fnstsw word [A1]
2448
2449	fninit
2450	add xSP, 20h
2451	EPILOGUE_3_ARGS
2452	ENDPROC iemAImpl_ %+ %1 %+ _r80
2453	%endmacro
2454
2455	IEMIMPL_FPU_R80_FSW ftst
2456	IEMIMPL_FPU_R80_FSW fxam
2457
2458
2459
2460	;;
2461	; FPU instruction loading a 80-bit floating point constant.
2462	;
2463	; @param 1 The instruction
2464	;
2465	; @param A0 FPU context (fxsave).
2466	; @param A1 Pointer to a IEMFPURESULT for the output.
2467	;
2468	%macro IEMIMPL_FPU_R80_CONST 1
2469	BEGINPROC_FASTCALL iemAImpl_ %+ %1, 8
2470	PROLOGUE_2_ARGS
2471	sub xSP, 20h
2472
2473	fninit
2474	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2475	%1
2476
2477	fnstsw word [A1 + IEMFPURESULT.FSW]
2478	fnclex
2479	fstp tword [A1 + IEMFPURESULT.r80Result]
2480
2481	fninit
2482	add xSP, 20h
2483	EPILOGUE_2_ARGS
2484	ENDPROC iemAImpl_ %+ %1 %+
2485	%endmacro
2486
2487	IEMIMPL_FPU_R80_CONST fld1
2488	IEMIMPL_FPU_R80_CONST fldl2t
2489	IEMIMPL_FPU_R80_CONST fldl2e
2490	IEMIMPL_FPU_R80_CONST fldpi
2491	IEMIMPL_FPU_R80_CONST fldlg2
2492	IEMIMPL_FPU_R80_CONST fldln2
2493	IEMIMPL_FPU_R80_CONST fldz
2494
2495
2496	;;
2497	; FPU instruction working on one 80-bit floating point value, outputing two.
2498	;
2499	; @param 1 The instruction
2500	;
2501	; @param A0 FPU context (fxsave).
2502	; @param A1 Pointer to a IEMFPURESULTTWO for the output.
2503	; @param A2 Pointer to the 80-bit value.
2504	;
2505	%macro IEMIMPL_FPU_R80_R80 1
2506	BEGINPROC_FASTCALL iemAImpl_ %+ %1 %+ _r80_r80, 12
2507	PROLOGUE_3_ARGS
2508	sub xSP, 20h
2509
2510	fninit
2511	fld tword [A2]
2512	FPU_LD_FXSTATE_FCW_AND_SAFE_FSW A0
2513	%1
2514
2515	fnstsw word [A1 + IEMFPURESULTTWO.FSW]
2516	fnclex
2517	fstp tword [A1 + IEMFPURESULTTWO.r80Result2]
2518	fnclex
2519	fstp tword [A1 + IEMFPURESULTTWO.r80Result1]
2520
2521	fninit
2522	add xSP, 20h
2523	EPILOGUE_3_ARGS
2524	ENDPROC iemAImpl_ %+ %1 %+ _r80_r80
2525	%endmacro
2526
2527	IEMIMPL_FPU_R80_R80 fptan
2528	IEMIMPL_FPU_R80_R80 fxtract
2529	IEMIMPL_FPU_R80_R80 fsincos
2530

Note: See TracBrowser for help on using the repository browser.

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllAImpl.asm@ 42699

Download in other formats: