VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/target-x86/IEMAllN8veRecompFuncs-x86.h@ 108260

Last change on this file since 108260 was 108204, checked in by vboxsync, 3 months ago

VMM/IEM: Moving x86 target specific files to VMMAll/target-x86/... jiraref:VBP-1531

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 540.3 KB
Line 
1/* $Id: IEMAllN8veRecompFuncs-x86.h 108204 2025-02-13 16:26:48Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler - Inlined Bits, x86 target.
4 */
5
6/*
7 * Copyright (C) 2023-2024 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
33#define IEM_WITH_OPAQUE_DECODER_STATE
34#define VMCPU_INCL_CPUM_GST_CTX
35#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
36#define IEMNATIVE_INCL_TABLE_FUNCTION_PROTOTYPES
37#ifdef IN_RING0
38# define VBOX_VMM_TARGET_X86
39#endif
40#include <VBox/vmm/iem.h>
41#include <VBox/vmm/cpum.h>
42#include <VBox/vmm/dbgf.h>
43#include "IEMInternal.h"
44#include <VBox/vmm/vmcc.h>
45#include <VBox/log.h>
46#include <VBox/err.h>
47#include <VBox/dis.h>
48#include <VBox/param.h>
49#include <iprt/assert.h>
50#include <iprt/heap.h>
51#include <iprt/mem.h>
52#include <iprt/string.h>
53#if defined(RT_ARCH_AMD64)
54# include <iprt/x86.h>
55#elif defined(RT_ARCH_ARM64)
56# include <iprt/armv8.h>
57#endif
58
59#include "IEMInline.h"
60#include "IEMThreadedFunctions.h"
61#include "IEMN8veRecompiler.h"
62#include "IEMN8veRecompilerEmit.h"
63#include "IEMN8veRecompilerTlbLookup.h"
64#include "IEMNativeFunctions.h"
65#include "VMMAll/target-x86/IEMAllN8veEmit-x86.h"
66
67
68/*
69 * Narrow down configs here to avoid wasting time on unused configs here.
70 * Note! Same checks in IEMAllThrdRecompiler.cpp.
71 */
72
73#ifndef IEM_WITH_CODE_TLB
74# error The code TLB must be enabled for the recompiler.
75#endif
76
77#ifndef IEM_WITH_DATA_TLB
78# error The data TLB must be enabled for the recompiler.
79#endif
80
81#ifndef IEM_WITH_SETJMP
82# error The setjmp approach must be enabled for the recompiler.
83#endif
84
85
86/*********************************************************************************************************************************
87* Code emitters for flushing pending guest register writes and sanity checks *
88*********************************************************************************************************************************/
89
90#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
91
92# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
93/**
94 * Updates IEMCPU::uPcUpdatingDebug.
95 */
96DECL_INLINE_THROW(uint32_t) iemNativeEmitPcDebugAdd(PIEMRECOMPILERSTATE pReNative, uint32_t off, int64_t offDisp, uint8_t cBits)
97{
98# ifdef RT_ARCH_AMD64
99 if (pReNative->Core.fDebugPcInitialized && cBits >= 32)
100 {
101 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
102 if ((int32_t)offDisp == offDisp || cBits != 64)
103 {
104 /* add [q]word [pVCpu->iem.s.uPcUpdatingDebug], imm32/imm8 */
105 if (cBits == 64)
106 pCodeBuf[off++] = X86_OP_REX_W;
107 pCodeBuf[off++] = (int8_t)offDisp == offDisp ? 0x83 : 0x81;
108 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
109 if ((int8_t)offDisp == offDisp)
110 pCodeBuf[off++] = (int8_t)offDisp;
111 else
112 {
113 *(int32_t *)&pCodeBuf[off] = (int32_t)offDisp;
114 off += sizeof(int32_t);
115 }
116 }
117 else
118 {
119 /* mov tmp0, imm64 */
120 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, IEMNATIVE_REG_FIXED_TMP0, offDisp);
121
122 /* add [pVCpu->iem.s.uPcUpdatingDebug], tmp0 */
123 if (cBits == 64)
124 pCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 >= 8 ? X86_OP_REX_R : 0);
125 else if (IEMNATIVE_REG_FIXED_TMP0 >= 8)
126 pCodeBuf[off++] = X86_OP_REX_R;
127 pCodeBuf[off++] = 0x01;
128 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, IEMNATIVE_REG_FIXED_TMP0 & 7,
129 RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
130 }
131 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
132 return off;
133 }
134# endif
135
136 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
137 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, RT_ARCH_VAL == RT_ARCH_VAL_AMD64 ? 32 : 12);
138
139 if (pReNative->Core.fDebugPcInitialized)
140 {
141 Log4(("uPcUpdatingDebug+=%ld cBits=%d off=%#x\n", offDisp, cBits, off));
142 off = iemNativeEmitLoadGprFromVCpuU64Ex(pCodeBuf, off, idxTmpReg, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
143 }
144 else
145 {
146 Log4(("uPcUpdatingDebug=rip+%ld cBits=%d off=%#x\n", offDisp, cBits, off));
147 pReNative->Core.fDebugPcInitialized = true;
148 off = iemNativeEmitLoadGprWithGstRegExT<kIemNativeGstReg_Pc>(pCodeBuf, off, idxTmpReg);
149 }
150
151 if (cBits == 64)
152 off = iemNativeEmitAddGprImmEx(pCodeBuf, off, idxTmpReg, offDisp, IEMNATIVE_REG_FIXED_TMP0);
153 else
154 {
155 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxTmpReg, (int32_t)offDisp, IEMNATIVE_REG_FIXED_TMP0);
156 if (cBits == 16)
157 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxTmpReg, UINT16_MAX);
158 }
159
160 off = iemNativeEmitStoreGprToVCpuU64Ex(pCodeBuf, off, idxTmpReg, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug),
161 IEMNATIVE_REG_FIXED_TMP0);
162
163 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
164 iemNativeRegFreeTmp(pReNative, idxTmpReg);
165 return off;
166}
167
168
169# elif defined(IEMNATIVE_REG_FIXED_PC_DBG)
170DECL_INLINE_THROW(uint32_t) iemNativePcAdjustCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
171{
172 /* Compare the shadow with the context value, they should match. */
173 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, IEMNATIVE_REG_FIXED_PC_DBG);
174 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, pReNative->Core.offPc);
175 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, kIemNativeGstReg_Pc);
176 return off;
177}
178# endif
179
180#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
181
182/**
183 * Flushes delayed write of a specific guest register.
184 *
185 * This must be called prior to calling CImpl functions and any helpers that use
186 * the guest state (like raising exceptions) and such.
187 *
188 * This optimization has not yet been implemented. The first target would be
189 * RIP updates, since these are the most common ones.
190 */
191template<IEMNATIVEGSTREGREF a_enmClass>
192DECL_INLINE_THROW(uint32_t)
193iemNativeRegFlushPendingSpecificWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
194{
195#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
196 /* If for whatever reason it is possible to reference the PC register at some point we need to do the writeback here first. */
197#endif
198
199#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
200# if 0 /** @todo r=aeichner EFLAGS writeback delay. */
201 if RT_CONSTEXPR_IF(a_enmClass == kIemNativeGstRegRef_EFlags)
202 if (pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(kIemNativeGstReg_EFlags))
203 off = iemNativeRegFlushPendingWrite(pReNative, off, kIemNativeGstReg_EFlags);
204# else
205 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(kIemNativeGstReg_EFlags)));
206# endif
207
208 if RT_CONSTEXPR_IF(a_enmClass == kIemNativeGstRegRef_Gpr)
209 if (pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxReg))
210 off = iemNativeRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTREG_GPR(idxReg));
211#endif
212
213 if RT_CONSTEXPR_IF(a_enmClass == kIemNativeGstRegRef_XReg)
214 if (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxReg))
215 {
216 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxReg));
217 /* Flush the shadows as the register needs to be reloaded (there is no
218 guarantee right now, that the referenced register doesn't change). */
219 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxReg];
220
221 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off);
222 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxReg)));
223 }
224
225 return off;
226}
227
228
229
230/*********************************************************************************************************************************
231* Emitters for IEM_MC_BEGIN_EX and IEM_MC_END. *
232*********************************************************************************************************************************/
233
234#undef IEM_MC_BEGIN /* unused */
235#define IEM_MC_BEGIN_EX(a_fMcFlags, a_fCImplFlags, a_cArgsIncludingHidden) \
236 { \
237 Assert(pReNative->Core.bmVars == 0); \
238 Assert(pReNative->Core.u64ArgVars == UINT64_MAX); \
239 Assert(pReNative->Core.bmStack == 0); \
240 pReNative->fMc = (a_fMcFlags); \
241 pReNative->fCImpl = (a_fCImplFlags); \
242 pReNative->cArgsX = (a_cArgsIncludingHidden)
243
244/** We have to get to the end in recompilation mode, as otherwise we won't
245 * generate code for all the IEM_MC_IF_XXX branches. */
246#define IEM_MC_END() \
247 iemNativeVarFreeAll(pReNative); \
248 } return off
249
250
251
252/*********************************************************************************************************************************
253* Liveness Stubs *
254*********************************************************************************************************************************/
255
256#define IEM_MC_LIVENESS_GREG_INPUT(a_iGReg) ((void)0)
257#define IEM_MC_LIVENESS_GREG_CLOBBER(a_iGReg) ((void)0)
258#define IEM_MC_LIVENESS_GREG_MODIFY(a_iGReg) ((void)0)
259
260#define IEM_MC_LIVENESS_MREG_INPUT(a_iMReg) ((void)0)
261#define IEM_MC_LIVENESS_MREG_CLOBBER(a_iMReg) ((void)0)
262#define IEM_MC_LIVENESS_MREG_MODIFY(a_iMReg) ((void)0)
263
264#define IEM_MC_LIVENESS_XREG_INPUT(a_iXReg) ((void)0)
265#define IEM_MC_LIVENESS_XREG_CLOBBER(a_iXReg) ((void)0)
266#define IEM_MC_LIVENESS_XREG_MODIFY(a_iXReg) ((void)0)
267
268#define IEM_MC_LIVENESS_MXCSR_INPUT() ((void)0)
269#define IEM_MC_LIVENESS_MXCSR_CLOBBER() ((void)0)
270#define IEM_MC_LIVENESS_MXCSR_MODIFY() ((void)0)
271
272
273/*********************************************************************************************************************************
274* Native Emitter Support. *
275*********************************************************************************************************************************/
276
277#define IEM_MC_NATIVE_IF(a_fSupportedHosts) if (RT_ARCH_VAL & (a_fSupportedHosts)) {
278
279#define IEM_MC_NATIVE_ELSE() } else {
280
281#define IEM_MC_NATIVE_ENDIF() } ((void)0)
282
283
284#define IEM_MC_NATIVE_EMIT_0(a_fnEmitter) \
285 off = a_fnEmitter(pReNative, off)
286
287#define IEM_MC_NATIVE_EMIT_1(a_fnEmitter, a0) \
288 off = a_fnEmitter(pReNative, off, (a0))
289
290#define IEM_MC_NATIVE_EMIT_2(a_fnEmitter, a0, a1) \
291 off = a_fnEmitter(pReNative, off, (a0), (a1))
292
293#define IEM_MC_NATIVE_EMIT_2_EX(a_fnEmitter, a0, a1) \
294 off = a_fnEmitter(pReNative, off, pCallEntry->idxInstr, (a0), (a1))
295
296#define IEM_MC_NATIVE_EMIT_3(a_fnEmitter, a0, a1, a2) \
297 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2))
298
299#define IEM_MC_NATIVE_EMIT_4(a_fnEmitter, a0, a1, a2, a3) \
300 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3))
301
302#define IEM_MC_NATIVE_EMIT_5(a_fnEmitter, a0, a1, a2, a3, a4) \
303 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4))
304
305#define IEM_MC_NATIVE_EMIT_6(a_fnEmitter, a0, a1, a2, a3, a4, a5) \
306 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5))
307
308#define IEM_MC_NATIVE_EMIT_7(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6) \
309 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6))
310
311#define IEM_MC_NATIVE_EMIT_8(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6, a7) \
312 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6), (a7))
313
314
315#ifndef RT_ARCH_AMD64
316# define IEM_MC_NATIVE_SET_AMD64_HOST_REG_FOR_LOCAL(a_VarNm, a_idxHostReg) ((void)0)
317#else
318/** @note This is a naive approach that ASSUMES that the register isn't
319 * allocated, so it only works safely for the first allocation(s) in
320 * a MC block. */
321# define IEM_MC_NATIVE_SET_AMD64_HOST_REG_FOR_LOCAL(a_VarNm, a_idxHostReg) \
322 off = iemNativeVarSetAmd64HostRegisterForLocal(pReNative, off, a_VarNm, a_idxHostReg)
323
324DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg,
325 uint32_t off, bool fAllocated);
326
327DECL_INLINE_THROW(uint32_t)
328iemNativeVarSetAmd64HostRegisterForLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t idxHstReg)
329{
330 Log12(("iemNativeVarSetAmd64HostRegisterForLocal: idxVar=%#x idxHstReg=%s (%#x) off=%#x\n", idxVar, g_apszIemNativeHstRegNames[idxHstReg], idxHstReg, off));
331 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
332 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg))); /* iemNativeVarRegisterSet does a throw/longjmp on this */
333
334# ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
335 /* Must flush the register if it hold pending writes. */
336 if ( (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
337 && (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows) )
338 off = iemNativeRegFlushDirtyGuest(pReNative, off, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
339# endif
340
341 iemNativeVarRegisterSet(pReNative, idxVar, idxHstReg, off, false /*fAllocated*/);
342 return off;
343}
344
345#endif /* RT_ARCH_AMD64 */
346
347
348
349/*********************************************************************************************************************************
350* Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX) *
351*********************************************************************************************************************************/
352
353#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl) \
354 pReNative->fMc = 0; \
355 pReNative->fCImpl = (a_fFlags); \
356 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, \
357 a_cbInstr) /** @todo not used ... */
358
359
360#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
361 pReNative->fMc = 0; \
362 pReNative->fCImpl = (a_fFlags); \
363 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
364
365DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off,
366 uint8_t idxInstr, uint64_t a_fGstShwFlush,
367 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
368{
369 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
370}
371
372
373#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
374 pReNative->fMc = 0; \
375 pReNative->fCImpl = (a_fFlags); \
376 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
377 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
378
379DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off,
380 uint8_t idxInstr, uint64_t a_fGstShwFlush,
381 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
382{
383 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
384}
385
386
387#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
388 pReNative->fMc = 0; \
389 pReNative->fCImpl = (a_fFlags); \
390 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
391 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
392
393DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off,
394 uint8_t idxInstr, uint64_t a_fGstShwFlush,
395 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
396 uint64_t uArg2)
397{
398 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
399}
400
401
402
403/*********************************************************************************************************************************
404* Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX) *
405*********************************************************************************************************************************/
406
407/** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
408 * and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
409DECL_INLINE_THROW(uint32_t)
410iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
411{
412 /*
413 * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
414 * return with special status code and make the execution loop deal with
415 * this. If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
416 * exception and won't continue execution. While CPUMCTX_DBG_DBGF_MASK
417 * could continue w/o interruption, it probably will drop into the
418 * debugger, so not worth the effort of trying to services it here and we
419 * just lump it in with the handling of the others.
420 *
421 * To simplify the code and the register state management even more (wrt
422 * immediate in AND operation), we always update the flags and skip the
423 * extra check associated conditional jump.
424 */
425 AssertCompile( (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
426 <= UINT32_MAX);
427#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
428 AssertMsg( pReNative->idxCurCall == 0
429 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1],
430 IEMLIVENESSBIT_IDX_EFL_OTHER)),
431 ("Efl_Other - %u\n", iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1],
432 IEMLIVENESSBIT_IDX_EFL_OTHER)));
433#endif
434
435 /*
436 * As this code can break out of the execution loop when jumping to the ReturnWithFlags label
437 * any pending register writes must be flushed.
438 */
439 off = iemNativeRegFlushPendingWrites(pReNative, off);
440
441 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsForUpdate(pReNative, &off,
442 RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_OTHER),
443 RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_OTHER));
444 off = iemNativeEmitTbExitIfAnyBitsSetInGpr<kIemNativeLabelType_ReturnWithFlags>(pReNative, off, idxEflReg,
445 X86_EFL_TF
446 | CPUMCTX_DBG_HIT_DRX_MASK
447 | CPUMCTX_DBG_DBGF_MASK);
448 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
449 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_EFlags>(pReNative, off, idxEflReg);
450
451 /* Free but don't flush the EFLAGS register. */
452 iemNativeRegFreeTmp(pReNative, idxEflReg);
453
454 return off;
455}
456
457
458/** Helper for iemNativeEmitFinishInstructionWithStatus. */
459DECLINLINE(RTGCPHYS) iemNativeCallEntryToGCPhysPc(PCIEMTB pTb, PCIEMTHRDEDCALLENTRY pCallEntry)
460{
461 unsigned const offOpcodes = pCallEntry->offOpcode;
462 unsigned const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
463 for (unsigned idxRange = 0; idxRange < cRanges; idxRange++)
464 {
465 unsigned const offRange = offOpcodes - (unsigned)pTb->aRanges[idxRange].offOpcodes;
466 if (offRange < (unsigned)pTb->aRanges[idxRange].cbOpcodes)
467 return iemTbGetRangePhysPageAddr(pTb, idxRange) + offRange + pTb->aRanges[idxRange].offPhysPage;
468 }
469 AssertFailedReturn(NIL_RTGCPHYS);
470}
471
472
473/** The VINF_SUCCESS dummy. */
474template<int const a_rcNormal, bool const a_fIsJump>
475DECL_FORCE_INLINE_THROW(uint32_t)
476iemNativeEmitFinishInstructionWithStatus(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry,
477 int32_t const offJump)
478{
479 AssertCompile(a_rcNormal == VINF_SUCCESS || a_rcNormal == VINF_IEM_REEXEC_BREAK);
480 if (a_rcNormal != VINF_SUCCESS)
481 {
482#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
483 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
484#else
485 RT_NOREF_PV(pCallEntry);
486#endif
487
488 /* As this code returns from the TB any pending register writes must be flushed. */
489 off = iemNativeRegFlushPendingWrites(pReNative, off);
490
491 /*
492 * If we're in a conditional, mark the current branch as exiting so we
493 * can disregard its state when we hit the IEM_MC_ENDIF.
494 */
495 iemNativeMarkCurCondBranchAsExiting(pReNative);
496
497 /*
498 * Use the lookup table for getting to the next TB quickly.
499 * Note! In this code path there can only be one entry at present.
500 */
501 uint8_t const idxTbLookupFirst = IEM_TB_LOOKUP_TAB_GET_IDX(pCallEntry->uTbLookup);
502 PCIEMTB const pTbOrg = pReNative->pTbOrg;
503 Assert(idxTbLookupFirst < pTbOrg->cTbLookupEntries);
504 Assert(IEM_TB_LOOKUP_TAB_GET_SIZE(pCallEntry->uTbLookup) == 1);
505
506#if 0
507 /* Update IEMCPU::ppTbLookupEntryR3 to get the best lookup effect. */
508 PIEMTB * const ppTbLookupFirst = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTbOrg, idxTbLookupFirst);
509 Assert(IEM_TB_LOOKUP_TAB_GET_SIZE(pCallEntry->uTbLookup) == 1); /* large stuff later/never */
510 off = iemNativeEmitStoreImmToVCpuU64(pReNative, off, (uintptr_t)ppTbLookupFirst,
511 RT_UOFFSETOF(VMCPU, iem.s.ppTbLookupEntryR3));
512
513 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnBreak);
514
515#else
516 /* Load the index as argument #1 for the helper call at the given label. */
517 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxTbLookupFirst);
518
519 /*
520 * Figure out the physical address of the current instruction and see
521 * whether the next instruction we're about to execute is in the same
522 * page so we by can optimistically skip TLB loading.
523 *
524 * - This is safe for all cases in FLAT mode.
525 * - In segmentmented modes it is complicated, given that a negative
526 * jump may underflow EIP and a forward jump may overflow or run into
527 * CS.LIM and triggering a #GP. The only thing we can get away with
528 * now at compile time is forward jumps w/o CS.LIM checks, since the
529 * lack of CS.LIM checks means we're good for the entire physical page
530 * we're executing on and another 15 bytes before we run into CS.LIM.
531 */
532 if ( IEM_F_MODE_X86_IS_FLAT(pReNative->fExec)
533# if 0 /** @todo breaks on IP/EIP/RIP wraparound tests in bs3-cpu-weird-1. See also iemNativeHlpReturnBreakViaLookup. */
534 || !(pTbOrg->fFlags & IEMTB_F_CS_LIM_CHECKS)
535# endif
536 )
537 {
538 RTGCPHYS const GCPhysPcCurrent = iemNativeCallEntryToGCPhysPc(pTbOrg, pCallEntry);
539 RTGCPHYS const GCPhysPcNext = GCPhysPcCurrent + pCallEntry->cbOpcode + (int64_t)(a_fIsJump ? offJump : 0);
540 if ( (GCPhysPcNext >> GUEST_PAGE_SHIFT) == (GCPhysPcCurrent >> GUEST_PAGE_SHIFT)
541 && GUEST_PAGE_SIZE - (GCPhysPcCurrent & GUEST_PAGE_OFFSET_MASK) >= pCallEntry->cbOpcode /* 0xfff: je -56h */ )
542
543 {
544 /* Load the next GCPhysPc into the 3rd argument for the helper call. */
545 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, GCPhysPcNext);
546
547 /* Load the key lookup flags into the 2nd argument for the helper call.
548 - This is safe wrt CS limit checking since we're only here for FLAT modes.
549 - ASSUMING that this isn't a STI or POPF instruction, we can exclude any
550 interrupt shadow.
551 - The NMI inhibiting is more questionable, though... */
552 /** @todo We don't implement NMI blocking atm, except via VT-x/AMD-V.
553 * Should we copy it into fExec to simplify this? OTOH, it's just a
554 * couple of extra instructions if EFLAGS are already in a register. */
555 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG,
556 (pReNative->fExec & IEMTB_F_KEY_MASK) | IEMTB_F_TYPE_NATIVE);
557
558 if (pReNative->idxLastCheckIrqCallNo != UINT32_MAX)
559 return iemNativeEmitTbExit<kIemNativeLabelType_ReturnBreakViaLookup>(pReNative, off);
560 return iemNativeEmitTbExit<kIemNativeLabelType_ReturnBreakViaLookupWithIrq>(pReNative, off);
561 }
562 }
563 if (pReNative->idxLastCheckIrqCallNo != UINT32_MAX)
564 return iemNativeEmitTbExit<kIemNativeLabelType_ReturnBreakViaLookupWithTlb>(pReNative, off);
565 return iemNativeEmitTbExit<kIemNativeLabelType_ReturnBreakViaLookupWithTlbAndIrq>(pReNative, off);
566#endif
567 }
568 return off;
569}
570
571
572#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr, a_rcNormal) \
573 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
574 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
575
576#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr, a_rcNormal) \
577 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
578 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
579 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
580
581/** Same as iemRegAddToRip64AndFinishingNoFlags. */
582DECL_INLINE_THROW(uint32_t)
583iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
584{
585#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
586# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
587 if (!pReNative->Core.offPc)
588 off = iemNativeEmitLoadGprWithGstShadowRegT<kIemNativeGstReg_Pc>(pNative, off, IEMNATIVE_REG_FIXED_PC_DBG);
589# endif
590
591 /* Allocate a temporary PC register. */
592 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
593
594 /* Perform the addition and store the result. */
595 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
596 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Pc>(pReNative, off, idxPcReg);
597
598 /* Free but don't flush the PC register. */
599 iemNativeRegFreeTmp(pReNative, idxPcReg);
600#endif
601
602#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
603 pReNative->Core.offPc += cbInstr;
604 Log4(("offPc=%#RX64 cbInstr=%#x off=%#x\n", pReNative->Core.offPc, cbInstr, off));
605# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
606 off = iemNativeEmitPcDebugAdd(pReNative, off, cbInstr, 64);
607 off = iemNativeEmitPcDebugCheck(pReNative, off);
608# elif defined(IEMNATIVE_REG_FIXED_PC_DBG)
609 off = iemNativePcAdjustCheck(pReNative, off);
610# endif
611 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
612#endif
613
614 return off;
615}
616
617
618#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr, a_rcNormal) \
619 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
620 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
621
622#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr, a_rcNormal) \
623 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
624 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
625 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
626
627/** Same as iemRegAddToEip32AndFinishingNoFlags. */
628DECL_INLINE_THROW(uint32_t)
629iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
630{
631#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
632# ifdef IEMNATIVE_REG_FIXED_PC_DBG
633 if (!pReNative->Core.offPc)
634 off = iemNativeEmitLoadGprWithGstShadowRegT<kIemNativeGstReg_Pc>(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG);
635# endif
636
637 /* Allocate a temporary PC register. */
638 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
639
640 /* Perform the addition and store the result. */
641 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
642 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Pc>(pReNative, off, idxPcReg);
643
644 /* Free but don't flush the PC register. */
645 iemNativeRegFreeTmp(pReNative, idxPcReg);
646#endif
647
648#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
649 pReNative->Core.offPc += cbInstr;
650 Log4(("offPc=%#RX64 cbInstr=%#x off=%#x\n", pReNative->Core.offPc, cbInstr, off));
651# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
652 off = iemNativeEmitPcDebugAdd(pReNative, off, cbInstr, 32);
653 off = iemNativeEmitPcDebugCheck(pReNative, off);
654# elif defined(IEMNATIVE_REG_FIXED_PC_DBG)
655 off = iemNativePcAdjustCheck(pReNative, off);
656# endif
657 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
658#endif
659
660 return off;
661}
662
663
664#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr, a_rcNormal) \
665 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
666 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
667
668#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr, a_rcNormal) \
669 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
670 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
671 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
672
673/** Same as iemRegAddToIp16AndFinishingNoFlags. */
674DECL_INLINE_THROW(uint32_t)
675iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
676{
677#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
678# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
679 if (!pReNative->Core.offPc)
680 off = iemNativeEmitLoadGprWithGstShadowRegT<kIemNativeGstReg_Pc>(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG);
681# endif
682
683 /* Allocate a temporary PC register. */
684 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
685
686 /* Perform the addition and store the result. */
687 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
688 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
689 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Pc>(pReNative, off, idxPcReg);
690
691 /* Free but don't flush the PC register. */
692 iemNativeRegFreeTmp(pReNative, idxPcReg);
693#endif
694
695#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
696 pReNative->Core.offPc += cbInstr;
697 Log4(("offPc=%#RX64 cbInstr=%#x off=%#x\n", pReNative->Core.offPc, cbInstr, off));
698# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
699 off = iemNativeEmitPcDebugAdd(pReNative, off, cbInstr, 16);
700 off = iemNativeEmitPcDebugCheck(pReNative, off);
701# elif defined(IEMNATIVE_REG_FIXED_PC_DBG)
702 off = iemNativePcAdjustCheck(pReNative, off);
703# endif
704 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
705#endif
706
707 return off;
708}
709
710
711/*********************************************************************************************************************************
712* Common code for changing PC/RIP/EIP/IP. *
713*********************************************************************************************************************************/
714
715/**
716 * Emits code to check if the content of @a idxAddrReg is a canonical address,
717 * raising a \#GP(0) if it isn't.
718 *
719 * @returns New code buffer offset, UINT32_MAX on failure.
720 * @param pReNative The native recompile state.
721 * @param off The code buffer offset.
722 * @param idxAddrReg The host register with the address to check.
723 * @param idxInstr The current instruction.
724 */
725DECL_FORCE_INLINE_THROW(uint32_t)
726iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
727{
728 /*
729 * Make sure we don't have any outstanding guest register writes as we may
730 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
731 */
732 off = iemNativeRegFlushPendingWrites(pReNative, off);
733
734#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
735 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
736#else
737 RT_NOREF(idxInstr);
738#endif
739
740#ifdef RT_ARCH_AMD64
741 /*
742 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
743 * return raisexcpt();
744 * ---- this variant avoid loading a 64-bit immediate, but is an instruction longer.
745 */
746 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
747
748 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
749 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
750 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
751 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
752 off = iemNativeEmitTbExitJnz<kIemNativeLabelType_RaiseGp0>(pReNative, off);
753
754 iemNativeRegFreeTmp(pReNative, iTmpReg);
755
756#elif defined(RT_ARCH_ARM64)
757 /*
758 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
759 * return raisexcpt();
760 * ----
761 * mov x1, 0x800000000000
762 * add x1, x0, x1
763 * cmp xzr, x1, lsr 48
764 * b.ne .Lraisexcpt
765 */
766 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
767
768 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
769 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
770 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
771 off = iemNativeEmitTbExitJnz<kIemNativeLabelType_RaiseGp0>(pReNative, off);
772
773 iemNativeRegFreeTmp(pReNative, iTmpReg);
774
775#else
776# error "Port me"
777#endif
778 return off;
779}
780
781
782/**
783 * Emits code to check if the content of @a idxAddrReg is a canonical address,
784 * raising a \#GP(0) if it isn't.
785 *
786 * Caller makes sure everything is flushed, except maybe PC.
787 *
788 * @returns New code buffer offset, UINT32_MAX on failure.
789 * @param pReNative The native recompile state.
790 * @param off The code buffer offset.
791 * @param idxAddrReg The host register with the address to check.
792 * @param offDisp The relative displacement that has already been
793 * added to idxAddrReg and must be subtracted if
794 * raising a \#GP(0).
795 * @param idxInstr The current instruction.
796 */
797DECL_FORCE_INLINE_THROW(uint32_t)
798iemNativeEmitCheckGprCanonicalMaybeRaiseGp0WithDisp(PIEMRECOMPILERSTATE pReNative, uint32_t off,
799 uint8_t idxAddrReg, int64_t offDisp, uint8_t idxInstr)
800{
801#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
802 Assert(pReNative->Core.bmGstRegShadowDirty == 0);
803#endif
804
805#ifdef RT_ARCH_AMD64
806 /*
807 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
808 * return raisexcpt();
809 * ---- this variant avoid loading a 64-bit immediate, but is an instruction longer.
810 */
811 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
812
813 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
814 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
815 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
816 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
817
818#elif defined(RT_ARCH_ARM64)
819 /*
820 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
821 * return raisexcpt();
822 * ----
823 * mov x1, 0x800000000000
824 * add x1, x0, x1
825 * cmp xzr, x1, lsr 48
826 * b.ne .Lraisexcpt
827 */
828 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
829
830 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
831 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
832 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
833#else
834# error "Port me"
835#endif
836
837 /* Jump to the #GP code (hoping static prediction considers forward branches as not-taken). */
838 uint32_t const offFixup1 = off;
839 off = iemNativeEmitJnzToFixed(pReNative, off, off /*8-bit jump suffices*/);
840
841 /* jump .Lnoexcept; Skip the #GP code. */
842 uint32_t const offFixup2 = off;
843 off = iemNativeEmitJmpToFixed(pReNative, off, off /*8-bit jump suffices*/);
844
845 /* .Lraisexcpt: */
846 iemNativeFixupFixedJump(pReNative, offFixup1, off);
847#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
848 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr), iTmpReg);
849#else
850 RT_NOREF(idxInstr);
851#endif
852
853 /* Undo the PC adjustment and store the old PC value. */
854 off = iemNativeEmitSubGprImm(pReNative, off, idxAddrReg, offDisp, iTmpReg);
855 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Pc>(pReNative, off, idxAddrReg);
856
857 off = iemNativeEmitTbExit<kIemNativeLabelType_RaiseGp0, false /*a_fActuallyExitingTb*/>(pReNative, off);
858
859 /* .Lnoexcept: */
860 iemNativeFixupFixedJump(pReNative, offFixup2, off);
861
862 iemNativeRegFreeTmp(pReNative, iTmpReg);
863 return off;
864}
865
866
867/**
868 * Emits code to check if the content of @a idxAddrReg is a canonical address,
869 * raising a \#GP(0) if it isn't.
870 *
871 * Caller makes sure everything is flushed, except maybe PC.
872 *
873 * @returns New code buffer offset, UINT32_MAX on failure.
874 * @param pReNative The native recompile state.
875 * @param off The code buffer offset.
876 * @param idxAddrReg The host register with the address to check.
877 * @param idxOldPcReg Register holding the old PC that offPc is relative
878 * to if available, otherwise UINT8_MAX.
879 * @param idxInstr The current instruction.
880 */
881DECL_FORCE_INLINE_THROW(uint32_t)
882iemNativeEmitCheckGprCanonicalMaybeRaiseGp0WithOldPc(PIEMRECOMPILERSTATE pReNative, uint32_t off,
883 uint8_t idxAddrReg, uint8_t idxOldPcReg, uint8_t idxInstr)
884{
885#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
886 Assert(pReNative->Core.bmGstRegShadowDirty == 0);
887#endif
888
889#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
890# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
891 if (!pReNative->Core.offPc)
892# endif
893 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
894#else
895 RT_NOREF(idxInstr);
896#endif
897
898#ifdef RT_ARCH_AMD64
899 /*
900 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
901 * return raisexcpt();
902 * ---- this variant avoid loading a 64-bit immediate, but is an instruction longer.
903 */
904 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
905
906 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
907 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
908 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
909 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
910
911#elif defined(RT_ARCH_ARM64)
912 /*
913 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
914 * return raisexcpt();
915 * ----
916 * mov x1, 0x800000000000
917 * add x1, x0, x1
918 * cmp xzr, x1, lsr 48
919 * b.ne .Lraisexcpt
920 */
921 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
922
923 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
924 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
925 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
926#else
927# error "Port me"
928#endif
929
930#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
931 if (pReNative->Core.offPc)
932 {
933 /** @todo On x86, it is said that conditional jumps forward are statically
934 * predicited as not taken, so this isn't a very good construct.
935 * Investigate whether it makes sense to invert it and add another
936 * jump. Also, find out wtf the static predictor does here on arm! */
937 uint32_t const offFixup = off;
938 off = iemNativeEmitJzToFixed(pReNative, off, off + 16 /*8-bit suffices*/);
939
940 /* .Lraisexcpt: */
941# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
942 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr), iTmpReg);
943# endif
944 /* We need to update cpum.GstCtx.rip. */
945 if (idxOldPcReg == UINT8_MAX)
946 {
947 idxOldPcReg = iTmpReg;
948 off = iemNativeEmitLoadGprWithGstRegT<kIemNativeGstReg_Pc>(pReNative, off, idxOldPcReg);
949 }
950 off = iemNativeEmitAddGprImm(pReNative, off, idxOldPcReg, pReNative->Core.offPc);
951 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Pc>(pReNative, off, idxOldPcReg);
952
953 off = iemNativeEmitTbExit<kIemNativeLabelType_RaiseGp0, false /*a_fActuallyExitingTb*/>(pReNative, off);
954 iemNativeFixupFixedJump(pReNative, offFixup, off);
955 }
956 else
957#endif
958 off = iemNativeEmitTbExitJnz<kIemNativeLabelType_RaiseGp0>(pReNative, off);
959
960 iemNativeRegFreeTmp(pReNative, iTmpReg);
961
962 return off;
963}
964
965
966/**
967 * Emits code to check if that the content of @a idxAddrReg is within the limit
968 * of CS, raising a \#GP(0) if it isn't.
969 *
970 * @returns New code buffer offset; throws VBox status code on error.
971 * @param pReNative The native recompile state.
972 * @param off The code buffer offset.
973 * @param idxAddrReg The host register (32-bit) with the address to
974 * check.
975 * @param idxInstr The current instruction.
976 */
977DECL_FORCE_INLINE_THROW(uint32_t)
978iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
979 uint8_t idxAddrReg, uint8_t idxInstr)
980{
981 /*
982 * Make sure we don't have any outstanding guest register writes as we may
983 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
984 */
985 off = iemNativeRegFlushPendingWrites(pReNative, off);
986
987#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
988 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
989#else
990 RT_NOREF(idxInstr);
991#endif
992
993 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
994 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
995 kIemNativeGstRegUse_ReadOnly);
996
997 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
998 off = iemNativeEmitTbExitJa<kIemNativeLabelType_RaiseGp0>(pReNative, off);
999
1000 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
1001 return off;
1002}
1003
1004
1005
1006
1007/**
1008 * Emits code to check if that the content of @a idxAddrReg is within the limit
1009 * of CS, raising a \#GP(0) if it isn't.
1010 *
1011 * Caller makes sure everything is flushed, except maybe PC.
1012 *
1013 * @returns New code buffer offset; throws VBox status code on error.
1014 * @param pReNative The native recompile state.
1015 * @param off The code buffer offset.
1016 * @param idxAddrReg The host register (32-bit) with the address to
1017 * check.
1018 * @param idxOldPcReg Register holding the old PC that offPc is relative
1019 * to if available, otherwise UINT8_MAX.
1020 * @param idxInstr The current instruction.
1021 */
1022DECL_FORCE_INLINE_THROW(uint32_t)
1023iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0WithOldPc(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1024 uint8_t idxAddrReg, uint8_t idxOldPcReg, uint8_t idxInstr)
1025{
1026#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
1027 Assert(pReNative->Core.bmGstRegShadowDirty == 0);
1028#endif
1029
1030#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1031# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1032 if (!pReNative->Core.offPc)
1033# endif
1034 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1035#else
1036 RT_NOREF(idxInstr);
1037#endif
1038
1039 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
1040 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
1041 kIemNativeGstRegUse_ReadOnly);
1042
1043 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
1044#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1045 if (pReNative->Core.offPc)
1046 {
1047 uint32_t const offFixup = off;
1048 off = iemNativeEmitJbeToFixed(pReNative, off, off + 16 /*8-bit suffices*/);
1049
1050 /* Raising a GP(0), but first we need to update cpum.GstCtx.rip. */
1051 if (idxOldPcReg == UINT8_MAX)
1052 {
1053 idxOldPcReg = idxAddrReg;
1054 off = iemNativeEmitLoadGprWithGstRegT<kIemNativeGstReg_Pc>(pReNative, off, idxOldPcReg);
1055 }
1056 off = iemNativeEmitAddGprImm(pReNative, off, idxOldPcReg, pReNative->Core.offPc);
1057 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Pc>(pReNative, off, idxOldPcReg);
1058# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1059 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1060# endif
1061 off = iemNativeEmitTbExit<kIemNativeLabelType_RaiseGp0, false /*a_fActuallyExitingTb*/>(pReNative, off);
1062 iemNativeFixupFixedJump(pReNative, offFixup, off);
1063 }
1064 else
1065#endif
1066 off = iemNativeEmitTbExitJa<kIemNativeLabelType_RaiseGp0>(pReNative, off);
1067
1068 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
1069 return off;
1070}
1071
1072
1073/*********************************************************************************************************************************
1074* Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX). *
1075*********************************************************************************************************************************/
1076
1077#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1078 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1079 (a_enmEffOpSize), pCallEntry->idxInstr); \
1080 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1081
1082#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1083 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1084 (a_enmEffOpSize), pCallEntry->idxInstr); \
1085 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1086 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1087
1088#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr, a_rcNormal) \
1089 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1090 IEMMODE_16BIT, pCallEntry->idxInstr); \
1091 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1092
1093#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
1094 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1095 IEMMODE_16BIT, pCallEntry->idxInstr); \
1096 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1097 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1098
1099#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr, a_rcNormal) \
1100 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (a_i32), \
1101 IEMMODE_64BIT, pCallEntry->idxInstr); \
1102 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1103
1104#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
1105 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (a_i32), \
1106 IEMMODE_64BIT, pCallEntry->idxInstr); \
1107 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1108 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1109
1110
1111#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_INTRAPG(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1112 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1113 (a_enmEffOpSize), pCallEntry->idxInstr); \
1114 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1115
1116#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_INTRAPG_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1117 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1118 (a_enmEffOpSize), pCallEntry->idxInstr); \
1119 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1120 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1121
1122#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_INTRAPG(a_i16, a_cbInstr, a_rcNormal) \
1123 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1124 IEMMODE_16BIT, pCallEntry->idxInstr); \
1125 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1126
1127#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_INTRAPG_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
1128 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1129 IEMMODE_16BIT, pCallEntry->idxInstr); \
1130 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1131 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1132
1133#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_INTRAPG(a_i32, a_cbInstr, a_rcNormal) \
1134 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (a_i32), \
1135 IEMMODE_64BIT, pCallEntry->idxInstr); \
1136 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1137
1138#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_INTRAPG_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
1139 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (a_i32), \
1140 IEMMODE_64BIT, pCallEntry->idxInstr); \
1141 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1142 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1143
1144/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
1145 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
1146 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
1147template<bool const a_fWithinPage>
1148DECL_INLINE_THROW(uint32_t)
1149iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
1150 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
1151{
1152 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
1153#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1154 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1155 if (a_fWithinPage && enmEffOpSize == IEMMODE_64BIT)
1156 {
1157 /* No #GP checking required, just update offPc and get on with it. */
1158 pReNative->Core.offPc += (int64_t)offDisp + cbInstr;
1159# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1160 off = iemNativeEmitPcDebugAdd(pReNative, off, (int64_t)offDisp + cbInstr, enmEffOpSize == IEMMODE_64BIT ? 64 : 16);
1161# endif
1162 }
1163 else
1164#endif
1165 {
1166 /* Flush all but PC iff we're doing a 64-bit update here and this isn't within a page.. */
1167 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT && !a_fWithinPage))
1168 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc) /*fGstShwExcept*/);
1169
1170 /* Allocate a temporary PC register. */
1171 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1172 kIemNativeGstRegUse_ForUpdate);
1173
1174 /* Perform the addition. */
1175 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr + pReNative->Core.offPc);
1176
1177 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
1178 {
1179 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't.
1180 We can skip this if the target is within the same page. */
1181 if (!a_fWithinPage)
1182 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0WithDisp(pReNative, off, idxPcReg,
1183 (int64_t)offDisp + cbInstr, idxInstr);
1184 }
1185 else
1186 {
1187 /* Just truncate the result to 16-bit IP. */
1188 Assert(enmEffOpSize == IEMMODE_16BIT);
1189 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
1190 }
1191
1192#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1193# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1194 off = iemNativeEmitPcDebugAdd(pReNative, off, (int64_t)offDisp + cbInstr, enmEffOpSize == IEMMODE_64BIT ? 64 : 16);
1195 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
1196# endif
1197 /* Since we've already got the new PC value in idxPcReg, we can just as
1198 well write it out and reset offPc to zero. Otherwise, we'd need to use
1199 a copy the shadow PC, which will cost another move instruction here. */
1200# if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || defined(LOG_ENABLED) || defined(VBOX_WITH_STATISTICS)
1201 uint8_t const idxOldInstrPlusOne = pReNative->idxInstrPlusOneOfLastPcUpdate;
1202 pReNative->idxInstrPlusOneOfLastPcUpdate = RT_MAX(idxInstr + 1, idxOldInstrPlusOne);
1203 uint8_t const cInstrsSkipped = idxInstr <= idxOldInstrPlusOne ? 0 : idxInstr - idxOldInstrPlusOne;
1204 Log4(("iemNativeEmitRip64RelativeJumpAndFinishingNoFlags: offPc=%#RX64 -> 0; off=%#x; idxInstr=%u cInstrsSkipped=%u cCondDepth=%d\n",
1205 pReNative->Core.offPc, off, idxInstr, cInstrsSkipped, pReNative->cCondDepth));
1206 STAM_COUNTER_ADD(&pReNative->pVCpu->iem.s.StatNativePcUpdateDelayed, cInstrsSkipped);
1207# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1208 iemNativeDbgInfoAddNativeOffset(pReNative, off);
1209 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc, cInstrsSkipped);
1210# endif
1211# endif
1212 pReNative->Core.offPc = 0;
1213#endif
1214
1215 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Pc>(pReNative, off, idxPcReg);
1216
1217 /* Free but don't flush the PC register. */
1218 iemNativeRegFreeTmp(pReNative, idxPcReg);
1219 }
1220 return off;
1221}
1222
1223
1224#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1225 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1226 (a_enmEffOpSize), pCallEntry->idxInstr); \
1227 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1228
1229#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1230 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1231 (a_enmEffOpSize), pCallEntry->idxInstr); \
1232 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1233 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1234
1235#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr, a_rcNormal) \
1236 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1237 IEMMODE_16BIT, pCallEntry->idxInstr); \
1238 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1239
1240#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
1241 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1242 IEMMODE_16BIT, pCallEntry->idxInstr); \
1243 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1244 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1245
1246#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr, a_rcNormal) \
1247 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (a_i32), \
1248 IEMMODE_32BIT, pCallEntry->idxInstr); \
1249 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1250
1251#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
1252 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (a_i32), \
1253 IEMMODE_32BIT, pCallEntry->idxInstr); \
1254 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1255 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1256
1257
1258#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_FLAT(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1259 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1260 (a_enmEffOpSize), pCallEntry->idxInstr); \
1261 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1262
1263#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_FLAT_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1264 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1265 (a_enmEffOpSize), pCallEntry->idxInstr); \
1266 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1267 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1268
1269#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_FLAT(a_i16, a_cbInstr, a_rcNormal) \
1270 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1271 IEMMODE_16BIT, pCallEntry->idxInstr); \
1272 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1273
1274#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_FLAT_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
1275 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1276 IEMMODE_16BIT, pCallEntry->idxInstr); \
1277 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1278 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1279
1280#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_FLAT(a_i32, a_cbInstr, a_rcNormal) \
1281 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (a_i32), \
1282 IEMMODE_32BIT, pCallEntry->idxInstr); \
1283 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1284
1285#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_FLAT_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
1286 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (a_i32), \
1287 IEMMODE_32BIT, pCallEntry->idxInstr); \
1288 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1289 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1290
1291/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
1292 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
1293 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
1294template<bool const a_fFlat>
1295DECL_INLINE_THROW(uint32_t)
1296iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
1297 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
1298{
1299 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
1300#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1301 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1302#endif
1303
1304 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1305 if (!a_fFlat || enmEffOpSize == IEMMODE_16BIT)
1306 {
1307 off = iemNativeRegFlushPendingWrites(pReNative, off);
1308#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1309 Assert(pReNative->Core.offPc == 0);
1310#endif
1311 }
1312
1313 /* Allocate a temporary PC register. */
1314 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
1315
1316 /* Perform the addition. */
1317#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1318 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr + (int32_t)pReNative->Core.offPc);
1319#else
1320 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr + (int32_t)pReNative->Core.offPc);
1321#endif
1322
1323 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
1324 if (enmEffOpSize == IEMMODE_16BIT)
1325 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
1326
1327 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
1328 if (!a_fFlat)
1329 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
1330
1331 /* Commit it. */
1332#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1333 off = iemNativeEmitPcDebugAdd(pReNative, off, offDisp + cbInstr, enmEffOpSize == IEMMODE_32BIT ? 32 : 16);
1334 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
1335#endif
1336
1337 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Pc>(pReNative, off, idxPcReg);
1338#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1339 pReNative->Core.offPc = 0;
1340#endif
1341
1342 /* Free but don't flush the PC register. */
1343 iemNativeRegFreeTmp(pReNative, idxPcReg);
1344
1345 return off;
1346}
1347
1348
1349#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr, a_rcNormal) \
1350 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
1351 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1352
1353#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr, a_rcNormal) \
1354 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
1355 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1356 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1357
1358#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr, a_rcNormal) \
1359 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
1360 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1361
1362#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
1363 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
1364 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1365 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1366
1367#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr, a_rcNormal) \
1368 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
1369 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, a_i32)
1370
1371#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
1372 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
1373 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1374 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, a_i32)
1375
1376/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
1377DECL_INLINE_THROW(uint32_t)
1378iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1379 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
1380{
1381 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1382 off = iemNativeRegFlushPendingWrites(pReNative, off);
1383
1384#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1385 Assert(pReNative->Core.offPc == 0);
1386 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1387#endif
1388
1389 /* Allocate a temporary PC register. */
1390 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
1391
1392 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
1393 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
1394 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
1395 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
1396#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1397 off = iemNativeEmitPcDebugAdd(pReNative, off, offDisp + cbInstr, 16);
1398 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
1399#endif
1400 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Pc>(pReNative, off, idxPcReg);
1401
1402 /* Free but don't flush the PC register. */
1403 iemNativeRegFreeTmp(pReNative, idxPcReg);
1404
1405 return off;
1406}
1407
1408
1409
1410/*********************************************************************************************************************************
1411* Emitters for changing PC/RIP/EIP/IP with a indirect jump (IEM_MC_SET_RIP_UXX_AND_FINISH). *
1412*********************************************************************************************************************************/
1413
1414/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets. */
1415#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP) \
1416 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1417
1418/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets. */
1419#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP) \
1420 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1421
1422/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code. */
1423#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP) \
1424 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1425
1426/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets that checks and
1427 * clears flags. */
1428#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP) \
1429 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP); \
1430 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1431
1432/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets that checks and
1433 * clears flags. */
1434#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP) \
1435 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP); \
1436 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1437
1438/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code that checks and
1439 * clears flags. */
1440#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16NewIP) \
1441 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP); \
1442 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1443
1444#undef IEM_MC_SET_RIP_U16_AND_FINISH
1445
1446
1447/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets. */
1448#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP) \
1449 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
1450
1451/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code. */
1452#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP) \
1453 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
1454
1455/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets that checks and
1456 * clears flags. */
1457#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP) \
1458 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP); \
1459 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1460
1461/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code that checks
1462 * and clears flags. */
1463#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u32NewEIP) \
1464 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP); \
1465 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1466
1467#undef IEM_MC_SET_RIP_U32_AND_FINISH
1468
1469
1470/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code. */
1471#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP) \
1472 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u64NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
1473
1474/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code that checks
1475 * and clears flags. */
1476#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewEIP) \
1477 IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP); \
1478 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1479
1480#undef IEM_MC_SET_RIP_U64_AND_FINISH
1481
1482
1483/** Same as iemRegRipJumpU16AndFinishNoFlags,
1484 * iemRegRipJumpU32AndFinishNoFlags and iemRegRipJumpU64AndFinishNoFlags. */
1485DECL_INLINE_THROW(uint32_t)
1486iemNativeEmitRipJumpNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarPc, bool f64Bit,
1487 uint8_t idxInstr, uint8_t cbVar)
1488{
1489 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
1490 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
1491
1492 /* If we can't rule out a #GP(0) below, flush all dirty register except for
1493 PC which will be handled specially by the two workers below if they raise a GP. */
1494 bool const fMayRaiseGp0 = (f64Bit && cbVar > sizeof(uint32_t)) || (!f64Bit && !IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
1495 uint8_t const idxOldPcReg = fMayRaiseGp0
1496 ? iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off, kIemNativeGstReg_Pc)
1497 : UINT8_MAX;
1498 if (fMayRaiseGp0)
1499 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc) /*fGstShwExcept*/);
1500
1501 /* Get a register with the new PC loaded from idxVarPc.
1502 Note! This ASSUMES that the high bits of the GPR is zeroed. */
1503 uint8_t const idxPcReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxVarPc, kIemNativeGstReg_Pc, &off);
1504
1505 /* Check that the target is within CS.LIM / is canonical (may #GP(0) + exit TB). */
1506 if (fMayRaiseGp0)
1507 {
1508 if (f64Bit)
1509 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0WithOldPc(pReNative, off, idxPcReg, idxOldPcReg, idxInstr);
1510 else
1511 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0WithOldPc(pReNative, off, idxPcReg, idxOldPcReg, idxInstr);
1512 }
1513
1514 /* Store the result. */
1515 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Pc>(pReNative, off, idxPcReg);
1516
1517#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1518 pReNative->Core.offPc = 0;
1519 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1520# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1521 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
1522 pReNative->Core.fDebugPcInitialized = true;
1523 Log4(("uPcUpdatingDebug=rip off=%#x\n", off));
1524# endif
1525#endif
1526
1527 if (idxOldPcReg != UINT8_MAX)
1528 iemNativeRegFreeTmp(pReNative, idxOldPcReg);
1529 iemNativeVarRegisterRelease(pReNative, idxVarPc);
1530 /** @todo implictly free the variable? */
1531
1532 return off;
1533}
1534
1535
1536
1537/*********************************************************************************************************************************
1538* Emitters for changing PC/RIP/EIP/IP with a relative call jump (IEM_MC_IND_CALL_UXX_AND_FINISH) (requires stack emmiters). *
1539*********************************************************************************************************************************/
1540
1541/** @todo These helpers belong to the stack push API naturally but we already need them up here (we could of course move
1542 * this below the stack emitters but then this is not close to the rest of the PC/RIP handling...). */
1543DECL_FORCE_INLINE_THROW(uint32_t)
1544iemNativeEmitStackPushUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
1545{
1546 /* Use16BitSp: */
1547#ifdef RT_ARCH_AMD64
1548 off = iemNativeEmitSubGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
1549 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
1550#else
1551 /* sub regeff, regrsp, #cbMem */
1552 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegEffSp, idxRegRsp, cbMem, false /*f64Bit*/);
1553 /* and regeff, regeff, #0xffff */
1554 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
1555 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegEffSp, idxRegEffSp, 15, 0, false /*f64Bit*/);
1556 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
1557 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegEffSp, 0, 16, false /*f64Bit*/);
1558#endif
1559 return off;
1560}
1561
1562
1563DECL_FORCE_INLINE(uint32_t)
1564iemNativeEmitStackPushUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
1565{
1566 /* Use32BitSp: */
1567 off = iemNativeEmitSubGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
1568 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
1569 return off;
1570}
1571
1572
1573template<uint8_t const a_cBitsVar, uint8_t const a_cBitsFlat>
1574DECL_INLINE_THROW(uint32_t)
1575iemNativeEmitStackPushRip(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const idxRegPc,
1576 uintptr_t pfnFunction, uint8_t idxInstr)
1577{
1578 AssertCompile(a_cBitsVar == 16 || a_cBitsVar == 32 || a_cBitsVar == 64);
1579 AssertCompile(a_cBitsFlat == 0 || a_cBitsFlat == 32 || a_cBitsFlat == 64);
1580
1581 /*
1582 * Assert sanity.
1583 */
1584#ifdef VBOX_STRICT
1585 if RT_CONSTEXPR_IF(a_cBitsFlat != 0)
1586 {
1587 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
1588 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
1589 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
1590 Assert( pfnFunction
1591 == ( RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(16, 32) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
1592 : RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(32, 32) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
1593 : RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(16, 64) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
1594 : RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(64, 64) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
1595 : UINT64_C(0xc000b000a0009000) ));
1596 }
1597 else
1598 Assert( pfnFunction
1599 == ( RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(16, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
1600 : RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(32, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
1601 : RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(64, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
1602 : UINT64_C(0xc000b000a0009000) ));
1603#endif
1604
1605#ifdef VBOX_STRICT
1606 /*
1607 * Check that the fExec flags we've got make sense.
1608 */
1609 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
1610#endif
1611
1612 /*
1613 * To keep things simple we have to commit any pending writes first as we
1614 * may end up making calls.
1615 */
1616 /** @todo we could postpone this till we make the call and reload the
1617 * registers after returning from the call. Not sure if that's sensible or
1618 * not, though. */
1619 off = iemNativeRegFlushPendingWrites(pReNative, off);
1620
1621 /*
1622 * First we calculate the new RSP and the effective stack pointer value.
1623 * For 64-bit mode and flat 32-bit these two are the same.
1624 * (Code structure is very similar to that of PUSH)
1625 */
1626 RT_CONSTEXPR
1627 uint8_t const cbMem = a_cBitsVar / 8;
1628 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
1629 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
1630 uint8_t const idxRegEffSp = a_cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
1631 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
1632 if RT_CONSTEXPR_IF(a_cBitsFlat != 0)
1633 {
1634 Assert(idxRegEffSp == idxRegRsp);
1635 Assert(a_cBitsFlat == 32 || a_cBitsFlat == 64);
1636 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
1637 if RT_CONSTEXPR_IF(a_cBitsFlat == 64)
1638 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
1639 else
1640 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
1641 }
1642 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
1643 {
1644 Assert(idxRegEffSp != idxRegRsp);
1645 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
1646 kIemNativeGstRegUse_ReadOnly);
1647#ifdef RT_ARCH_AMD64
1648 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1649#else
1650 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1651#endif
1652 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
1653 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
1654 offFixupJumpToUseOtherBitSp = off;
1655 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
1656 {
1657 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
1658 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1659 }
1660 else
1661 {
1662 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
1663 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1664 }
1665 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1666 }
1667 /* SpUpdateEnd: */
1668 uint32_t const offLabelSpUpdateEnd = off;
1669
1670 /*
1671 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
1672 * we're skipping lookup).
1673 */
1674 uint8_t const iSegReg = a_cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
1675 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
1676 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
1677 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
1678 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
1679 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
1680 : UINT32_MAX;
1681 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
1682
1683
1684 if (!TlbState.fSkip)
1685 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
1686 else
1687 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
1688
1689 /*
1690 * Use16BitSp:
1691 */
1692 if RT_CONSTEXPR_IF(a_cBitsFlat == 0)
1693 {
1694#ifdef RT_ARCH_AMD64
1695 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1696#else
1697 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1698#endif
1699 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
1700 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
1701 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1702 else
1703 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1704 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
1705 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1706 }
1707
1708 /*
1709 * TlbMiss:
1710 *
1711 * Call helper to do the pushing.
1712 */
1713 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
1714
1715#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1716 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1717#else
1718 RT_NOREF(idxInstr);
1719#endif
1720
1721 /* Save variables in volatile registers. */
1722 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
1723 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
1724 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
1725 | (RT_BIT_32(idxRegPc));
1726 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
1727
1728 if ( idxRegPc == IEMNATIVE_CALL_ARG1_GREG
1729 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
1730 {
1731 /* Swap them using ARG0 as temp register: */
1732 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
1733 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
1734 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
1735 }
1736 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
1737 {
1738 /* IEMNATIVE_CALL_ARG2_GREG = idxRegPc (first!) */
1739 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxRegPc);
1740
1741 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
1742 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
1743 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
1744 }
1745 else
1746 {
1747 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
1748 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
1749
1750 /* IEMNATIVE_CALL_ARG2_GREG = idxRegPc */
1751 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxRegPc);
1752 }
1753
1754#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
1755 /* Do delayed EFLAGS calculations. */
1756 off = iemNativeDoPostponedEFlagsAtTlbMiss< RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG)
1757 | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)>(pReNative, off, &TlbState, fHstRegsNotToSave);
1758#endif
1759
1760 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
1761 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
1762
1763 /* Done setting up parameters, make the call. */
1764 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnFunction);
1765
1766 /* Restore variables and guest shadow registers to volatile registers. */
1767 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
1768 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
1769
1770#ifdef IEMNATIVE_WITH_TLB_LOOKUP
1771 if (!TlbState.fSkip)
1772 {
1773 /* end of TlbMiss - Jump to the done label. */
1774 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
1775 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
1776
1777 /*
1778 * TlbLookup:
1779 */
1780 off = iemNativeEmitTlbLookup<true, cbMem, cbMem - 1, IEM_ACCESS_TYPE_WRITE>(pReNative, off, &TlbState, iSegReg,
1781 idxLabelTlbLookup, idxLabelTlbMiss,
1782 idxRegMemResult);
1783
1784 /*
1785 * Emit code to do the actual storing / fetching.
1786 */
1787 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
1788# ifdef IEM_WITH_TLB_STATISTICS
1789 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
1790 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
1791# endif
1792 AssertCompile(cbMem == 2 || cbMem == 4 || cbMem == 8);
1793 if RT_CONSTEXPR_IF(cbMem == 2)
1794 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1795 else if RT_CONSTEXPR_IF(cbMem == 4)
1796 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1797 else
1798 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1799
1800 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
1801 TlbState.freeRegsAndReleaseVars(pReNative);
1802
1803 /*
1804 * TlbDone:
1805 *
1806 * Commit the new RSP value.
1807 */
1808 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
1809 }
1810#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
1811
1812#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
1813 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Rsp>(pReNative, off, idxRegRsp);
1814#endif
1815 iemNativeRegFreeTmp(pReNative, idxRegRsp);
1816 if (idxRegEffSp != idxRegRsp)
1817 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
1818
1819 return off;
1820}
1821
1822
1823/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for pre-386 targets. */
1824#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16(a_u16NewIP, a_cbInstr) \
1825 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1826
1827/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for pre-386 targets that checks and
1828 * clears flags. */
1829#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP, a_cbInstr) \
1830 IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16(a_u16NewIP, a_cbInstr); \
1831 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1832
1833/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for 386+ targets. */
1834#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32(a_u16NewIP, a_cbInstr) \
1835 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1836
1837/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for 386+ targets that checks and
1838 * clears flags. */
1839#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP, a_cbInstr) \
1840 IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32(a_u16NewIP, a_cbInstr); \
1841 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1842
1843#undef IEM_MC_IND_CALL_U16_AND_FINISH
1844
1845
1846/** Variant of IEM_MC_IND_CALL_U32_AND_FINISH for 386+ targets. */
1847#define IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP, a_cbInstr) \
1848 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
1849
1850/** Variant of IEM_MC_IND_CALL_U32_AND_FINISH for 386+ targets that checks and
1851 * clears flags. */
1852#define IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP, a_cbInstr) \
1853 IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP, a_cbInstr); \
1854 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1855
1856#undef IEM_MC_IND_CALL_U32_AND_FINISH
1857
1858
1859/** Variant of IEM_MC_IND_CALL_U64_AND_FINISH with instruction length as
1860 * an extra parameter, for use in 64-bit code. */
1861#define IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64(a_u64NewIP, a_cbInstr) \
1862 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u64NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
1863
1864
1865/** Variant of IEM_MC_IND_CALL_U64_AND_FINISH with instruction length as
1866 * an extra parameter, for use in 64-bit code and we need to check and clear
1867 * flags. */
1868#define IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewIP, a_cbInstr) \
1869 IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64(a_u64NewIP, a_cbInstr); \
1870 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1871
1872#undef IEM_MC_IND_CALL_U64_AND_FINISH
1873
1874/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
1875 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
1876DECL_INLINE_THROW(uint32_t)
1877iemNativeEmitRipIndirectCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxVarPc, bool f64Bit,
1878 uint8_t idxInstr, uint8_t cbVar)
1879{
1880 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
1881 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
1882
1883 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1884 off = iemNativeRegFlushPendingWrites(pReNative, off);
1885
1886#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1887 Assert(pReNative->Core.offPc == 0);
1888 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1889#endif
1890
1891 /* Get a register with the new PC loaded from idxVarPc.
1892 Note! This ASSUMES that the high bits of the GPR is zeroed. */
1893 uint8_t const idxPcRegNew = iemNativeVarRegisterAcquire(pReNative, idxVarPc, &off);
1894
1895 /* Check limit (may #GP(0) + exit TB). */
1896 if (!f64Bit)
1897/** @todo we can skip this test in FLAT 32-bit mode. */
1898 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
1899 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
1900 else if (cbVar > sizeof(uint32_t))
1901 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
1902
1903#if 1
1904 /* Allocate a temporary PC register, we don't want it shadowed. */
1905 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1906 kIemNativeGstRegUse_Calculation, true /*fNoVolatileRegs*/);
1907#else
1908 /* Allocate a temporary PC register. */
1909 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate,
1910 true /*fNoVolatileRegs*/);
1911#endif
1912
1913 /* Perform the addition and push the variable to the guest stack. */
1914 /** @todo Flat variants for PC32 variants. */
1915 switch (cbVar)
1916 {
1917 case sizeof(uint16_t):
1918 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
1919 /* Truncate the result to 16-bit IP. */
1920 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
1921 off = iemNativeEmitStackPushRip<16, 0>(pReNative, off, idxPcReg, (uintptr_t)iemNativeHlpStackStoreU16, idxInstr);
1922 break;
1923 case sizeof(uint32_t):
1924 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
1925 /** @todo In FLAT mode we can use the flat variant. */
1926 off = iemNativeEmitStackPushRip<32, 0>(pReNative, off, idxPcReg, (uintptr_t)iemNativeHlpStackStoreU32, idxInstr);
1927 break;
1928 case sizeof(uint64_t):
1929 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
1930 off = iemNativeEmitStackPushRip<64, 64>(pReNative, off, idxPcReg, (uintptr_t)iemNativeHlpStackFlatStoreU64, idxInstr);
1931 break;
1932 default:
1933 AssertFailed();
1934 }
1935
1936 /* RSP got changed, so do this again. */
1937 off = iemNativeRegFlushPendingWrites(pReNative, off);
1938
1939 /* Store the result. */
1940 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Pc>(pReNative, off, idxPcRegNew);
1941#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1942 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
1943 pReNative->Core.fDebugPcInitialized = true;
1944 Log4(("uPcUpdatingDebug=rip/indirect-call off=%#x\n", off));
1945#endif
1946
1947#if 1
1948 /* Need to transfer the shadow information to the new RIP register. */
1949 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
1950#else
1951 /* Sync the new PC. */
1952 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxPcReg, idxPcRegNew);
1953#endif
1954 iemNativeVarRegisterRelease(pReNative, idxVarPc);
1955 iemNativeRegFreeTmp(pReNative, idxPcReg);
1956 /** @todo implictly free the variable? */
1957
1958 return off;
1959}
1960
1961
1962/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1963 * an extra parameter, for use in 16-bit code on a pre-386 CPU. */
1964#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr) \
1965 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
1966
1967/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1968 * an extra parameter, for use in 16-bit code on a pre-386 CPU and we need to check and clear
1969 * flags. */
1970#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr) \
1971 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr); \
1972 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1973
1974/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1975 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
1976#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr) \
1977 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
1978
1979/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1980 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
1981 * flags. */
1982#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr) \
1983 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr); \
1984 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1985
1986/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1987 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
1988#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr) \
1989 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
1990
1991/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1992 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
1993 * flags. */
1994#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr) \
1995 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr); \
1996 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1997
1998#undef IEM_MC_REL_CALL_S16_AND_FINISH
1999
2000/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
2001 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
2002DECL_INLINE_THROW(uint32_t)
2003iemNativeEmitRipRelativeCallS16NoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int16_t offDisp,
2004 uint8_t idxInstr)
2005{
2006 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
2007 off = iemNativeRegFlushPendingWrites(pReNative, off);
2008
2009#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2010 Assert(pReNative->Core.offPc == 0);
2011 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
2012#endif
2013
2014 /* Allocate a temporary PC register. */
2015 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
2016 kIemNativeGstRegUse_Calculation, true /*fNoVolatileRegs*/);
2017 uint8_t const idxPcRegNew = iemNativeRegAllocTmpPreferNonVolatile(pReNative, &off);
2018
2019 /* Calculate the new RIP. */
2020 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcRegOld, cbInstr);
2021 /* Truncate the result to 16-bit IP. */
2022 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcRegOld);
2023 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxPcRegNew, idxPcRegOld);
2024 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcRegNew, offDisp);
2025
2026 /* Truncate the result to 16-bit IP. */
2027 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcRegNew);
2028
2029 /* Check limit (may #GP(0) + exit TB). */
2030 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
2031
2032 /* Perform the addition and push the variable to the guest stack. */
2033 off = iemNativeEmitStackPushRip<16, 0>(pReNative, off, idxPcRegOld, (uintptr_t)iemNativeHlpStackStoreU16, idxInstr);
2034
2035 /* RSP got changed, so flush again. */
2036 off = iemNativeRegFlushPendingWrites(pReNative, off);
2037
2038 /* Store the result. */
2039 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Pc>(pReNative, off, idxPcRegNew);
2040#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
2041 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
2042 pReNative->Core.fDebugPcInitialized = true;
2043 Log4(("uPcUpdatingDebug=rip/rel-call-16 off=%#x offDisp=%d\n", off, offDisp));
2044#endif
2045
2046 /* Need to transfer the shadow information to the new RIP register. */
2047 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
2048 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
2049 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
2050
2051 return off;
2052}
2053
2054
2055/** Variant of IEM_MC_REL_CALL_S32_AND_FINISH with instruction length as
2056 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
2057#define IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr) \
2058 off = iemNativeEmitEip32RelativeCallNoFlags(pReNative, off, a_cbInstr, (a_i32), pCallEntry->idxInstr)
2059
2060/** Variant of IEM_MC_REL_CALL_S32_AND_FINISH with instruction length as
2061 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
2062 * flags. */
2063#define IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr) \
2064 IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr); \
2065 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2066
2067#undef IEM_MC_REL_CALL_S32_AND_FINISH
2068
2069/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
2070 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
2071DECL_INLINE_THROW(uint32_t)
2072iemNativeEmitEip32RelativeCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int32_t offDisp,
2073 uint8_t idxInstr)
2074{
2075 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
2076 off = iemNativeRegFlushPendingWrites(pReNative, off);
2077
2078#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2079 Assert(pReNative->Core.offPc == 0);
2080 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
2081#endif
2082
2083 /* Allocate a temporary PC register. */
2084 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
2085 kIemNativeGstRegUse_ReadOnly, true /*fNoVolatileRegs*/);
2086 uint8_t const idxPcRegNew = iemNativeRegAllocTmpPreferNonVolatile(pReNative, &off);
2087
2088 /* Update the EIP to get the return address. */
2089 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcRegOld, cbInstr);
2090
2091 /* Load address, add the displacement and check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
2092 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxPcRegNew, idxPcRegOld);
2093 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcRegNew, offDisp);
2094 /** @todo we can skip this test in FLAT 32-bit mode. */
2095 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
2096
2097 /* Perform Perform the return address to the guest stack. */
2098 /** @todo Can avoid the stack limit checks in FLAT 32-bit mode. */
2099 off = iemNativeEmitStackPushRip<32, 0>(pReNative, off, idxPcRegOld, (uintptr_t)iemNativeHlpStackStoreU32, idxInstr);
2100
2101 /* RSP got changed, so do this again. */
2102 off = iemNativeRegFlushPendingWrites(pReNative, off);
2103
2104 /* Store the result. */
2105 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Pc>(pReNative, off, idxPcRegNew);
2106#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
2107 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
2108 pReNative->Core.fDebugPcInitialized = true;
2109 Log4(("uPcUpdatingDebug=eip/rel-call-32 off=%#x offDisp=%d\n", off, offDisp));
2110#endif
2111
2112 /* Need to transfer the shadow information to the new RIP register. */
2113 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
2114 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
2115 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
2116
2117 return off;
2118}
2119
2120
2121/** Variant of IEM_MC_REL_CALL_S64_AND_FINISH with instruction length as
2122 * an extra parameter, for use in 64-bit code. */
2123#define IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64(a_i64, a_cbInstr) \
2124 off = iemNativeEmitRip64RelativeCallNoFlags(pReNative, off, a_cbInstr, (a_i64), pCallEntry->idxInstr)
2125
2126/** Variant of IEM_MC_REL_CALL_S64_AND_FINISH with instruction length as
2127 * an extra parameter, for use in 64-bit code and we need to check and clear
2128 * flags. */
2129#define IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i64, a_cbInstr) \
2130 IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64(a_i64, a_cbInstr); \
2131 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2132
2133#undef IEM_MC_REL_CALL_S64_AND_FINISH
2134
2135/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
2136 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
2137DECL_INLINE_THROW(uint32_t)
2138iemNativeEmitRip64RelativeCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int64_t offDisp,
2139 uint8_t idxInstr)
2140{
2141 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
2142 off = iemNativeRegFlushPendingWrites(pReNative, off);
2143
2144#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2145 Assert(pReNative->Core.offPc == 0);
2146 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
2147#endif
2148
2149 /* Allocate a temporary PC register. */
2150 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
2151 kIemNativeGstRegUse_ReadOnly, true /*fNoVolatileRegs*/);
2152 uint8_t const idxPcRegNew = iemNativeRegAllocTmpPreferNonVolatile(pReNative, &off);
2153
2154 /* Update the RIP to get the return address. */
2155 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcRegOld, cbInstr);
2156
2157 /* Load address, add the displacement and check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
2158 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxPcRegNew, idxPcRegOld);
2159 off = iemNativeEmitAddGprImm(pReNative, off, idxPcRegNew, offDisp);
2160 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
2161
2162 /* Perform Perform the return address to the guest stack. */
2163 off = iemNativeEmitStackPushRip<64, 64>(pReNative, off, idxPcRegOld, (uintptr_t)iemNativeHlpStackFlatStoreU64, idxInstr);
2164
2165 /* RSP got changed, so do this again. */
2166 off = iemNativeRegFlushPendingWrites(pReNative, off);
2167
2168 /* Store the result. */
2169 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Pc>(pReNative, off, idxPcRegNew);
2170#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
2171 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
2172 pReNative->Core.fDebugPcInitialized = true;
2173 Log4(("uPcUpdatingDebug=rip/rel-call-64 off=%#x offDisp=%ld\n", off, offDisp));
2174#endif
2175
2176 /* Need to transfer the shadow information to the new RIP register. */
2177 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
2178 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
2179 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
2180
2181 return off;
2182}
2183
2184
2185/*********************************************************************************************************************************
2186* Emitters for changing PC/RIP/EIP/IP with a RETN (Iw) instruction (IEM_MC_RETN_AND_FINISH) (requires stack emmiters). *
2187*********************************************************************************************************************************/
2188
2189DECL_FORCE_INLINE_THROW(uint32_t)
2190iemNativeEmitStackPopForRetnUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
2191 uint16_t cbPopAdd, uint8_t idxRegTmp)
2192{
2193 /* Use16BitSp: */
2194#ifdef RT_ARCH_AMD64
2195 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
2196 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
2197 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbPopAdd); /* ASSUMES this does NOT modify bits [63:16]! */
2198 RT_NOREF(idxRegTmp);
2199
2200#elif defined(RT_ARCH_ARM64)
2201 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
2202 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
2203 /* add tmp, regrsp, #cbMem */
2204 uint16_t const cbCombined = cbMem + cbPopAdd;
2205 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbCombined & (RT_BIT_32(12) - 1U), false /*f64Bit*/);
2206 if (cbCombined >= RT_BIT_32(12))
2207 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegTmp, cbCombined >> 12,
2208 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift12*/);
2209 /* and tmp, tmp, #0xffff */
2210 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
2211 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
2212 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
2213 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
2214
2215#else
2216# error "Port me"
2217#endif
2218 return off;
2219}
2220
2221
2222DECL_FORCE_INLINE_THROW(uint32_t)
2223iemNativeEmitStackPopForRetnUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
2224 uint16_t cbPopAdd)
2225{
2226 /* Use32BitSp: */
2227 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
2228 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem + cbPopAdd);
2229 return off;
2230}
2231
2232
2233/** Variant of IEM_MC_RETN_AND_FINISH for pre-386 targets. */
2234#define IEM_MC_RETN_AND_FINISH_THREADED_PC16(a_cbPopArgs, a_cbInstr) \
2235 off = iemNativeEmitRetn<IEMMODE_16BIT, false>(pReNative, off, (a_cbInstr), (a_cbPopArgs), pCallEntry->idxInstr)
2236
2237/** Variant of IEM_MC_RETN_AND_FINISH for 386+ targets. */
2238#define IEM_MC_RETN_AND_FINISH_THREADED_PC32(a_cbPopArgs, a_cbInstr, a_enmEffOpSize) \
2239 Assert((a_enmEffOpSize) == IEMMODE_32BIT || (a_enmEffOpSize) == IEMMODE_16BIT); \
2240 off = (a_enmEffOpSize) == IEMMODE_32BIT \
2241 ? iemNativeEmitRetn<IEMMODE_32BIT, false>(pReNative, off, (a_cbInstr), (a_cbPopArgs), pCallEntry->idxInstr) \
2242 : iemNativeEmitRetn<IEMMODE_16BIT, false>(pReNative, off, (a_cbInstr), (a_cbPopArgs), pCallEntry->idxInstr)
2243
2244/** Variant of IEM_MC_RETN_AND_FINISH for use in 64-bit code. */
2245#define IEM_MC_RETN_AND_FINISH_THREADED_PC64(a_cbPopArgs, a_cbInstr, a_enmEffOpSize) \
2246 Assert((a_enmEffOpSize) == IEMMODE_64BIT || (a_enmEffOpSize) == IEMMODE_16BIT); \
2247 off = (a_enmEffOpSize) == IEMMODE_64BIT \
2248 ? iemNativeEmitRetn<IEMMODE_64BIT, true>(pReNative, off, (a_cbInstr), (a_cbPopArgs), pCallEntry->idxInstr) \
2249 : iemNativeEmitRetn<IEMMODE_16BIT, true>(pReNative, off, (a_cbInstr), (a_cbPopArgs), pCallEntry->idxInstr)
2250
2251/** Variant of IEM_MC_RETN_AND_FINISH for pre-386 targets that checks and
2252 * clears flags. */
2253#define IEM_MC_RETN_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbPopArgs, a_cbInstr) \
2254 IEM_MC_RETN_AND_FINISH_THREADED_PC16(a_cbPopArgs, a_cbInstr); \
2255 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2256
2257/** Variant of IEM_MC_RETN_AND_FINISH for 386+ targets that checks and
2258 * clears flags. */
2259#define IEM_MC_RETN_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbPopArgs, a_cbInstr, a_enmEffOpSize) \
2260 IEM_MC_RETN_AND_FINISH_THREADED_PC32(a_cbPopArgs, a_cbInstr, a_enmEffOpSize); \
2261 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2262
2263/** Variant of IEM_MC_RETN_AND_FINISH for use in 64-bit code that checks and
2264 * clears flags. */
2265#define IEM_MC_RETN_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbPopArgs, a_cbInstr, a_enmEffOpSize) \
2266 IEM_MC_RETN_AND_FINISH_THREADED_PC64(a_cbPopArgs, a_cbInstr, a_enmEffOpSize); \
2267 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2268
2269/** IEM_MC[|_FLAT32|_FLAT64]_RETN_AND_FINISH */
2270template<IEMMODE const a_enmEffOpSize, bool const a_f64Bit>
2271DECL_INLINE_THROW(uint32_t)
2272iemNativeEmitRetn(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint16_t cbPopArgs, uint8_t idxInstr)
2273{
2274 RT_NOREF(cbInstr);
2275 AssertCompile(a_enmEffOpSize == IEMMODE_64BIT || a_enmEffOpSize == IEMMODE_32BIT || a_enmEffOpSize == IEMMODE_16BIT);
2276
2277#ifdef VBOX_STRICT
2278 /*
2279 * Check that the fExec flags we've got make sense.
2280 */
2281 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
2282#endif
2283
2284 /*
2285 * To keep things simple we have to commit any pending writes first as we
2286 * may end up making calls.
2287 */
2288 off = iemNativeRegFlushPendingWrites(pReNative, off);
2289
2290 /*
2291 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
2292 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
2293 * directly as the effective stack pointer.
2294 *
2295 * (Code structure is very similar to that of PUSH)
2296 *
2297 * Note! As a simplification, we treat opsize overridden returns (o16 ret)
2298 * in FLAT 32-bit mode as if we weren't in FLAT mode since these
2299 * aren't commonly used (or useful) and thus not in need of optimizing.
2300 *
2301 * Note! For non-flat modes the guest RSP is not allocated for update but
2302 * rather for calculation as the shadowed register would remain modified
2303 * even if the return address throws a #GP(0) due to being outside the
2304 * CS limit causing a wrong stack pointer value in the guest (see the
2305 * near return testcase in bs3-cpu-basic-2). If no exception is thrown
2306 * the shadowing is transfered to the new register returned by
2307 * iemNativeRegAllocTmpForGuestReg() at the end.
2308 */
2309 RT_CONSTEXPR
2310 uint8_t const cbMem = a_enmEffOpSize == IEMMODE_64BIT
2311 ? sizeof(uint64_t)
2312 : a_enmEffOpSize == IEMMODE_32BIT
2313 ? sizeof(uint32_t)
2314 : sizeof(uint16_t);
2315/** @todo the basic flatness could be detected by the threaded compiler step
2316 * like for the other macros... worth it? */
2317 bool const fFlat = a_enmEffOpSize == IEMMODE_64BIT
2318 || (a_enmEffOpSize == IEMMODE_32BIT /* see note */ && IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
2319 uintptr_t const pfnFunction = a_enmEffOpSize == IEMMODE_64BIT
2320 ? (uintptr_t)iemNativeHlpStackFlatFetchU64
2321 : fFlat
2322 ? (uintptr_t)iemNativeHlpStackFlatFetchU32
2323 : a_enmEffOpSize == IEMMODE_32BIT
2324 ? (uintptr_t)iemNativeHlpStackFetchU32
2325 : (uintptr_t)iemNativeHlpStackFetchU16;
2326 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
2327 fFlat ? kIemNativeGstRegUse_ForUpdate
2328 : kIemNativeGstRegUse_Calculation,
2329 true /*fNoVolatileRegs*/);
2330 uint8_t const idxRegEffSp = fFlat ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
2331 /** @todo can do a better job picking the register here. For cbMem >= 4 this
2332 * will be the resulting register value. */
2333 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
2334
2335 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
2336 if (fFlat)
2337 Assert(idxRegEffSp == idxRegRsp);
2338 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
2339 {
2340 Assert(idxRegEffSp != idxRegRsp);
2341 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
2342 kIemNativeGstRegUse_ReadOnly);
2343#ifdef RT_ARCH_AMD64
2344 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
2345#else
2346 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
2347#endif
2348 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
2349 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
2350 offFixupJumpToUseOtherBitSp = off;
2351 if RT_CONSTEXPR_IF(a_enmEffOpSize == IEMMODE_32BIT)
2352 {
2353 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
2354 off = iemNativeEmitStackPopForRetnUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPopArgs);
2355 }
2356 else
2357 {
2358 Assert(a_enmEffOpSize == IEMMODE_16BIT);
2359 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
2360 off = iemNativeEmitStackPopForRetnUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPopArgs,
2361 idxRegMemResult);
2362 }
2363 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2364 }
2365 /* SpUpdateEnd: */
2366 uint32_t const offLabelSpUpdateEnd = off;
2367
2368 /*
2369 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
2370 * we're skipping lookup).
2371 */
2372 uint8_t const iSegReg = fFlat ? UINT8_MAX : X86_SREG_SS;
2373 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
2374 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
2375 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
2376 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
2377 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
2378 : UINT32_MAX;
2379
2380 if (!TlbState.fSkip)
2381 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
2382 else
2383 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
2384
2385 /*
2386 * Use16BitSp:
2387 */
2388 if (!fFlat)
2389 {
2390#ifdef RT_ARCH_AMD64
2391 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
2392#else
2393 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
2394#endif
2395 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
2396 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
2397 off = iemNativeEmitStackPopForRetnUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPopArgs,
2398 idxRegMemResult);
2399 else
2400 off = iemNativeEmitStackPopForRetnUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPopArgs);
2401 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
2402 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2403 }
2404
2405 /*
2406 * TlbMiss:
2407 *
2408 * Call helper to do the pushing.
2409 */
2410 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
2411
2412#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2413 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2414#else
2415 RT_NOREF(idxInstr);
2416#endif
2417
2418 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
2419 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
2420 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
2421 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
2422
2423
2424 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
2425 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
2426 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
2427
2428#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
2429 /* Do delayed EFLAGS calculations. */
2430 off = iemNativeDoPostponedEFlagsAtTlbMiss<RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG)>(pReNative, off, &TlbState, fHstRegsNotToSave);
2431#endif
2432
2433 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
2434 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
2435
2436 /* Done setting up parameters, make the call. */
2437 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnFunction);
2438
2439 /* Move the return register content to idxRegMemResult. */
2440 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
2441 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
2442
2443 /* Restore variables and guest shadow registers to volatile registers. */
2444 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
2445 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
2446
2447#ifdef IEMNATIVE_WITH_TLB_LOOKUP
2448 if (!TlbState.fSkip)
2449 {
2450 /* end of TlbMiss - Jump to the done label. */
2451 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
2452 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
2453
2454 /*
2455 * TlbLookup:
2456 */
2457 off = iemNativeEmitTlbLookup<true, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ>(pReNative, off, &TlbState, iSegReg,
2458 idxLabelTlbLookup, idxLabelTlbMiss,
2459 idxRegMemResult);
2460
2461 /*
2462 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
2463 */
2464 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
2465# ifdef IEM_WITH_TLB_STATISTICS
2466 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
2467 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
2468# endif
2469 switch (cbMem)
2470 {
2471 case 2:
2472 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
2473 break;
2474 case 4:
2475 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
2476 break;
2477 case 8:
2478 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
2479 break;
2480 default:
2481 AssertFailed();
2482 }
2483
2484 TlbState.freeRegsAndReleaseVars(pReNative);
2485
2486 /*
2487 * TlbDone:
2488 *
2489 * Set the new RSP value (FLAT accesses needs to calculate it first) and
2490 * commit the popped register value.
2491 */
2492 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
2493 }
2494#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
2495
2496 /* Check limit before committing RIP and RSP (may #GP(0) + exit TB). */
2497 if RT_CONSTEXPR_IF(!a_f64Bit)
2498/** @todo we can skip this test in FLAT 32-bit mode. */
2499 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxRegMemResult, idxInstr);
2500 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
2501 else if RT_CONSTEXPR_IF(a_enmEffOpSize == IEMMODE_64BIT)
2502 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxRegMemResult, idxInstr);
2503
2504 /* Complete RSP calculation for FLAT mode. */
2505 if (idxRegEffSp == idxRegRsp)
2506 {
2507 if RT_CONSTEXPR_IF(a_enmEffOpSize == IEMMODE_64BIT)
2508 off = iemNativeEmitAddGprImm(pReNative, off, idxRegRsp, sizeof(uint64_t) + cbPopArgs);
2509 else
2510 {
2511 Assert(a_enmEffOpSize == IEMMODE_32BIT);
2512 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxRegRsp, sizeof(uint32_t) + cbPopArgs);
2513 }
2514 }
2515
2516 /* Commit the result and clear any current guest shadows for RIP. */
2517 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Rsp>(pReNative, off, idxRegRsp);
2518 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Pc>( pReNative, off, idxRegMemResult);
2519 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, kIemNativeGstReg_Pc, off);
2520#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
2521 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
2522 pReNative->Core.fDebugPcInitialized = true;
2523 Log4(("uPcUpdatingDebug=rip/ret off=%#x\n", off));
2524#endif
2525
2526 /* Need to transfer the shadowing information to the host register containing the updated value now. */
2527 if (!fFlat)
2528 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegRsp, IEMNATIVEGSTREG_GPR(X86_GREG_xSP), off);
2529
2530 iemNativeRegFreeTmp(pReNative, idxRegRsp);
2531 if (idxRegEffSp != idxRegRsp)
2532 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
2533 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
2534 return off;
2535}
2536
2537
2538/*********************************************************************************************************************************
2539* Emitters for raising exceptions (IEM_MC_MAYBE_RAISE_XXX) *
2540*********************************************************************************************************************************/
2541
2542#define IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE() \
2543 off = iemNativeEmitMaybeRaiseDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
2544
2545/**
2546 * Emits code to check if a \#NM exception should be raised.
2547 *
2548 * @returns New code buffer offset, UINT32_MAX on failure.
2549 * @param pReNative The native recompile state.
2550 * @param off The code buffer offset.
2551 * @param idxInstr The current instruction.
2552 */
2553DECL_INLINE_THROW(uint32_t)
2554iemNativeEmitMaybeRaiseDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2555{
2556 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeDeviceNotAvailXcptCheckPotential);
2557
2558 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE))
2559 {
2560 /*
2561 * Make sure we don't have any outstanding guest register writes as we may
2562 * raise an #NM and all guest register must be up to date in CPUMCTX.
2563 */
2564 /** @todo r=aeichner Can we postpone this to the RaiseNm path? */
2565 off = iemNativeRegFlushPendingWrites(pReNative, off);
2566
2567#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2568 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2569#else
2570 RT_NOREF(idxInstr);
2571#endif
2572
2573 /* Allocate a temporary CR0 register. */
2574 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0,
2575 kIemNativeGstRegUse_ReadOnly);
2576
2577 /*
2578 * if (cr0 & (X86_CR0_EM | X86_CR0_TS) != 0)
2579 * return raisexcpt();
2580 */
2581 /* Test and jump. */
2582 off = iemNativeEmitTbExitIfAnyBitsSetInGpr<kIemNativeLabelType_RaiseNm>(pReNative, off, idxCr0Reg,
2583 X86_CR0_EM | X86_CR0_TS);
2584
2585 /* Free but don't flush the CR0 register. */
2586 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2587
2588 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE;
2589 }
2590 else
2591 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeDeviceNotAvailXcptCheckOmitted);
2592
2593 return off;
2594}
2595
2596
2597#define IEM_MC_MAYBE_RAISE_WAIT_DEVICE_NOT_AVAILABLE() \
2598 off = iemNativeEmitMaybeRaiseWaitDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
2599
2600/**
2601 * Emits code to check if a \#NM exception should be raised.
2602 *
2603 * @returns New code buffer offset, UINT32_MAX on failure.
2604 * @param pReNative The native recompile state.
2605 * @param off The code buffer offset.
2606 * @param idxInstr The current instruction.
2607 */
2608DECL_INLINE_THROW(uint32_t)
2609iemNativeEmitMaybeRaiseWaitDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2610{
2611 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeWaitDeviceNotAvailXcptCheckPotential);
2612
2613 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_WAIT_DEVICE_NOT_AVAILABLE))
2614 {
2615 /*
2616 * Make sure we don't have any outstanding guest register writes as we may
2617 * raise an #NM and all guest register must be up to date in CPUMCTX.
2618 */
2619 /** @todo r=aeichner Can we postpone this to the RaiseNm path? */
2620 off = iemNativeRegFlushPendingWrites(pReNative, off);
2621
2622#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2623 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2624#else
2625 RT_NOREF(idxInstr);
2626#endif
2627
2628 /* Allocate a temporary CR0 register. */
2629 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0,
2630 kIemNativeGstRegUse_Calculation);
2631
2632 /*
2633 * if (cr0 & (X86_CR0_MP | X86_CR0_TS) == (X86_CR0_MP | X86_CR0_TS))
2634 * return raisexcpt();
2635 */
2636 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxCr0Reg, X86_CR0_MP | X86_CR0_TS);
2637 /* Test and jump. */
2638 off = iemNativeEmitTbExitIfGpr32EqualsImm<kIemNativeLabelType_RaiseNm>(pReNative, off, idxCr0Reg, X86_CR0_MP | X86_CR0_TS);
2639
2640 /* Free the CR0 register. */
2641 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2642
2643 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_WAIT_DEVICE_NOT_AVAILABLE;
2644 }
2645 else
2646 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeWaitDeviceNotAvailXcptCheckOmitted);
2647
2648 return off;
2649}
2650
2651
2652#define IEM_MC_MAYBE_RAISE_FPU_XCPT() \
2653 off = iemNativeEmitMaybeRaiseFpuException(pReNative, off, pCallEntry->idxInstr)
2654
2655/**
2656 * Emits code to check if a \#MF exception should be raised.
2657 *
2658 * @returns New code buffer offset, UINT32_MAX on failure.
2659 * @param pReNative The native recompile state.
2660 * @param off The code buffer offset.
2661 * @param idxInstr The current instruction.
2662 */
2663DECL_INLINE_THROW(uint32_t)
2664iemNativeEmitMaybeRaiseFpuException(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2665{
2666 /*
2667 * Make sure we don't have any outstanding guest register writes as we may
2668 * raise an #MF and all guest register must be up to date in CPUMCTX.
2669 */
2670 /** @todo r=aeichner Can we postpone this to the RaiseMf path? */
2671 off = iemNativeRegFlushPendingWrites(pReNative, off);
2672
2673#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2674 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2675#else
2676 RT_NOREF(idxInstr);
2677#endif
2678
2679 /* Allocate a temporary FSW register. */
2680 uint8_t const idxFpuFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw,
2681 kIemNativeGstRegUse_ReadOnly);
2682
2683 /*
2684 * if (FSW & X86_FSW_ES != 0)
2685 * return raisexcpt();
2686 */
2687 /* Test and jump. */
2688 off = iemNativeEmitTbExitIfBitSetInGpr<kIemNativeLabelType_RaiseMf>(pReNative, off, idxFpuFswReg, X86_FSW_ES_BIT);
2689
2690 /* Free but don't flush the FSW register. */
2691 iemNativeRegFreeTmp(pReNative, idxFpuFswReg);
2692
2693 return off;
2694}
2695
2696
2697#define IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT() \
2698 off = iemNativeEmitMaybeRaiseSseRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
2699
2700/**
2701 * Emits code to check if a SSE exception (either \#UD or \#NM) should be raised.
2702 *
2703 * @returns New code buffer offset, UINT32_MAX on failure.
2704 * @param pReNative The native recompile state.
2705 * @param off The code buffer offset.
2706 * @param idxInstr The current instruction.
2707 */
2708DECL_INLINE_THROW(uint32_t)
2709iemNativeEmitMaybeRaiseSseRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2710{
2711 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeSseXcptCheckPotential);
2712
2713 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE))
2714 {
2715 /*
2716 * Make sure we don't have any outstanding guest register writes as we may
2717 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
2718 */
2719 /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
2720 off = iemNativeRegFlushPendingWrites(pReNative, off);
2721
2722#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2723 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2724#else
2725 RT_NOREF(idxInstr);
2726#endif
2727
2728 /* Allocate a temporary CR0 and CR4 register. */
2729 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
2730 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
2731 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2732
2733 AssertCompile(!((X86_CR0_EM | X86_CR0_TS) & X86_CR4_OSFXSR));
2734#ifdef RT_ARCH_AMD64
2735 /*
2736 * We do a modified test here:
2737 * if (!(((cr4 & X86_CR4_OSFXSR) | cr0) ^ X86_CR4_OSFXSR)) { likely }
2738 * else { goto RaiseSseRelated; }
2739 * This ASSUMES that CR0[bit 9] is always zero. This is the case on
2740 * all targets except the 386, which doesn't support SSE, this should
2741 * be a safe assumption.
2742 */
2743 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off,
2744 1+6+3+3+7+7+6 + IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS);
2745 //pCodeBuf[off++] = 0xcc;
2746 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, X86_CR4_OSFXSR); /* Isolate CR4.OSFXSR as CR4.TSD and */
2747 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxTmpReg, idxCr4Reg); /* CR4.DE would overlap the CR0 bits. */
2748 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, idxTmpReg, idxCr0Reg);
2749 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_CR0_EM | X86_CR0_TS | X86_CR4_OSFXSR);
2750 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_CR4_OSFXSR);
2751 off = iemNativeEmitTbExitJccEx<kIemNativeLabelType_RaiseSseRelated>(pReNative, pCodeBuf, off, kIemNativeInstrCond_ne);
2752
2753#elif defined(RT_ARCH_ARM64)
2754 /*
2755 * We do a modified test here:
2756 * if (!((cr0 & (X86_CR0_EM | X86_CR0_TS)) | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1) ^ 1))) { likely }
2757 * else { goto RaiseSseRelated; }
2758 */
2759 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off,
2760 1+5 + IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS);
2761 //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
2762 Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - X86_CR0_EM_BIT) == (X86_CR0_EM | X86_CR0_TS));
2763 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxCr0Reg, 1, 32 - X86_CR0_EM_BIT, false /*f64Bit*/);
2764 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSFXSR_BIT, 1, false /*f64Bit*/);
2765 /* -> idxTmpReg[0]=OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
2766 Assert(Armv8A64ConvertImmRImmS2Mask32(0, 0) == 1);
2767 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 0, 0, false /*f64Bit*/);
2768 /* -> idxTmpReg[0]=~OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
2769 off = iemNativeEmitTbExitIfGprIsNotZeroEx<kIemNativeLabelType_RaiseSseRelated>(pReNative, pCodeBuf, off,
2770 idxTmpReg, false /*f64Bit*/);
2771
2772#else
2773# error "Port me!"
2774#endif
2775
2776 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2777 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2778 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2779 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
2780
2781 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE;
2782 }
2783 else
2784 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeSseXcptCheckOmitted);
2785
2786 return off;
2787}
2788
2789
2790#define IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT() \
2791 off = iemNativeEmitMaybeRaiseAvxRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
2792
2793/**
2794 * Emits code to check if a AVX exception (either \#UD or \#NM) should be raised.
2795 *
2796 * @returns New code buffer offset, UINT32_MAX on failure.
2797 * @param pReNative The native recompile state.
2798 * @param off The code buffer offset.
2799 * @param idxInstr The current instruction.
2800 */
2801DECL_INLINE_THROW(uint32_t)
2802iemNativeEmitMaybeRaiseAvxRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2803{
2804 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeAvxXcptCheckPotential);
2805
2806 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX))
2807 {
2808 /*
2809 * Make sure we don't have any outstanding guest register writes as we may
2810 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
2811 */
2812 /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
2813 off = iemNativeRegFlushPendingWrites(pReNative, off);
2814
2815#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2816 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2817#else
2818 RT_NOREF(idxInstr);
2819#endif
2820
2821 /* Allocate a temporary CR0, CR4 and XCR0 register. */
2822 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
2823 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
2824 uint8_t const idxXcr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Xcr0);
2825 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2826
2827 /*
2828 * We have the following in IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT:
2829 * if (RT_LIKELY( ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE))
2830 * | (pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE)
2831 * | (pVCpu->cpum.GstCtx.cr0 & X86_CR0_TS))
2832 * == (XSAVE_C_YMM | XSAVE_C_SSE | X86_CR4_OSXSAVE)))
2833 * { likely }
2834 * else { goto RaiseAvxRelated; }
2835 */
2836#ifdef RT_ARCH_AMD64
2837 /* if (!( ( ((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) << 2)
2838 | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1) << 1)
2839 | ((cr0 >> X86_CR0_TS_BIT) & 1) )
2840 ^ 0x1a) ) { likely }
2841 else { goto RaiseAvxRelated; } */
2842 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off,
2843 1+6+3+5+3+5+3+7+6 + IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS);
2844 //pCodeBuf[off++] = 0xcc;
2845 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, XSAVE_C_YMM | XSAVE_C_SSE);
2846 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxTmpReg, idxXcr0Reg);
2847 off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off, idxCr4Reg, X86_CR4_OSXSAVE_BIT);
2848 off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
2849 /* -> idxTmpReg[0]=CR4.OSXSAVE; idxTmpReg[1]=0; idxTmpReg[2]=SSE; idxTmpReg[3]=YMM; (the rest is zero) */
2850 off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off, idxCr0Reg, X86_CR0_TS_BIT);
2851 off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
2852 /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=SSE; idxTmpReg[4]=YMM; */
2853 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg, ((XSAVE_C_YMM | XSAVE_C_SSE) << 2) | 2);
2854 /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=~SSE; idxTmpReg[4]=~YMM; */
2855 off = iemNativeEmitTbExitJccEx<kIemNativeLabelType_RaiseAvxRelated>(pReNative, pCodeBuf, off, kIemNativeInstrCond_ne);
2856
2857#elif defined(RT_ARCH_ARM64)
2858 /* if (!( (((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) | ((cr4 >> X86_CR4_OSFXSR_BIT) & 1)) ^ 7) << 1)
2859 | ((cr0 >> X86_CR0_TS_BIT) & 1) ) { likely }
2860 else { goto RaiseAvxRelated; } */
2861 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off,
2862 1+6 + IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS);
2863 //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
2864 Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - XSAVE_C_SSE_BIT) == (XSAVE_C_YMM | XSAVE_C_SSE));
2865 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxXcr0Reg, 1, 32 - XSAVE_C_SSE_BIT, false /*f64Bit*/);
2866 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSXSAVE_BIT, 1, false /*f64Bit*/);
2867 /* -> idxTmpReg[0]=CR4.OSXSAVE; idxTmpReg[1]=SSE; idxTmpReg[2]=YMM; (the rest is zero) */
2868 Assert(Armv8A64ConvertImmRImmS2Mask32(2, 0) == 7);
2869 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 2, 0, false /*f64Bit*/);
2870 /* -> idxTmpReg[0]=~CR4.OSXSAVE; idxTmpReg[1]=~SSE; idxTmpReg[2]=~YMM; (the rest is zero) */
2871 pCodeBuf[off++] = Armv8A64MkInstrLslImm(idxTmpReg, idxTmpReg, 1, false /*f64Bit*/);
2872 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr0Reg, X86_CR0_TS_BIT, 1, false /*f64Bit*/);
2873 /* -> idxTmpReg[0]=CR0.TS; idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=~SSE; idxTmpReg[3]=~YMM; (the rest is zero) */
2874 off = iemNativeEmitTbExitIfGprIsNotZeroEx<kIemNativeLabelType_RaiseAvxRelated>(pReNative, pCodeBuf, off,
2875 idxTmpReg, false /*f64Bit*/);
2876
2877#else
2878# error "Port me!"
2879#endif
2880
2881 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2882 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2883 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
2884 iemNativeRegFreeTmp(pReNative, idxXcr0Reg);
2885
2886 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
2887 }
2888 else
2889 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeAvxXcptCheckOmitted);
2890
2891 return off;
2892}
2893
2894
2895#define IEM_MC_RAISE_DIVIDE_ERROR_IF_LOCAL_IS_ZERO(a_uVar) \
2896 off = iemNativeEmitRaiseDivideErrorIfLocalIsZero(pReNative, off, a_uVar, pCallEntry->idxInstr)
2897
2898/**
2899 * Emits code to raise a \#DE if a local variable is zero.
2900 *
2901 * @returns New code buffer offset, UINT32_MAX on failure.
2902 * @param pReNative The native recompile state.
2903 * @param off The code buffer offset.
2904 * @param idxVar The variable to check. This must be 32-bit (EFLAGS).
2905 * @param idxInstr The current instruction.
2906 */
2907DECL_INLINE_THROW(uint32_t)
2908iemNativeEmitRaiseDivideErrorIfLocalIsZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t idxInstr)
2909{
2910 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
2911 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, sizeof(uint32_t));
2912
2913 /* Make sure we don't have any outstanding guest register writes as we may. */
2914 off = iemNativeRegFlushPendingWrites(pReNative, off);
2915
2916 /* Set the instruction number if we're counting. */
2917#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2918 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2919#else
2920 RT_NOREF(idxInstr);
2921#endif
2922
2923 /* Do the job we're here for. */
2924 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off);
2925 off = iemNativeEmitTbExitIfGprIsZero<kIemNativeLabelType_RaiseDe>(pReNative, off, idxVarReg, false /*f64Bit*/);
2926 iemNativeVarRegisterRelease(pReNative, idxVar);
2927
2928 return off;
2929}
2930
2931
2932#define IEM_MC_RAISE_GP0_IF_EFF_ADDR_UNALIGNED(a_EffAddr, a_cbAlign) \
2933 off = iemNativeEmitRaiseGp0IfEffAddrUnaligned(pReNative, off, pCallEntry->idxInstr, a_EffAddr, a_cbAlign)
2934
2935/**
2936 * Emits code to raise a \#GP(0) if the given variable contains an unaligned address.
2937 *
2938 * @returns New code buffer offset, UINT32_MAX on failure.
2939 * @param pReNative The native recompile state.
2940 * @param off The code buffer offset.
2941 * @param idxInstr The current instruction.
2942 * @param idxVarEffAddr Index of the variable containing the effective address to check.
2943 * @param cbAlign The alignment in bytes to check against.
2944 */
2945DECL_INLINE_THROW(uint32_t)
2946iemNativeEmitRaiseGp0IfEffAddrUnaligned(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr,
2947 uint8_t idxVarEffAddr, uint8_t cbAlign)
2948{
2949 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEffAddr);
2950 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEffAddr, sizeof(RTGCPTR));
2951
2952 /*
2953 * Make sure we don't have any outstanding guest register writes as we may throw an exception.
2954 */
2955 off = iemNativeRegFlushPendingWrites(pReNative, off);
2956
2957#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2958 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2959#else
2960 RT_NOREF(idxInstr);
2961#endif
2962
2963 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarEffAddr, &off);
2964 off = iemNativeEmitTbExitIfAnyBitsSetInGpr<kIemNativeLabelType_RaiseGp0>(pReNative, off, idxVarReg, cbAlign - 1);
2965 iemNativeVarRegisterRelease(pReNative, idxVarEffAddr);
2966
2967 return off;
2968}
2969
2970
2971/*********************************************************************************************************************************
2972* Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF) *
2973*********************************************************************************************************************************/
2974
2975/**
2976 * Pushes an IEM_MC_IF_XXX onto the condition stack.
2977 *
2978 * @returns Pointer to the condition stack entry on success, NULL on failure
2979 * (too many nestings)
2980 */
2981DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative)
2982{
2983 uint32_t const idxStack = pReNative->cCondDepth;
2984 AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
2985
2986 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
2987 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
2988
2989 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
2990 pEntry->fInElse = false;
2991 pEntry->fIfExitTb = false;
2992 pEntry->fElseExitTb = false;
2993 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
2994 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
2995
2996 return pEntry;
2997}
2998
2999
3000/**
3001 * Start of the if-block, snapshotting the register and variable state.
3002 */
3003DECL_INLINE_THROW(void)
3004iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
3005{
3006 Assert(offIfBlock != UINT32_MAX);
3007 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
3008 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
3009 Assert(!pEntry->fInElse);
3010
3011 /* Define the start of the IF block if request or for disassembly purposes. */
3012 if (idxLabelIf != UINT32_MAX)
3013 iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
3014#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3015 else
3016 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
3017#else
3018 RT_NOREF(offIfBlock);
3019#endif
3020
3021 /* Copy the initial state so we can restore it in the 'else' block. */
3022 pEntry->InitialState = pReNative->Core;
3023}
3024
3025
3026#define IEM_MC_ELSE() } while (0); \
3027 off = iemNativeEmitElse(pReNative, off); \
3028 do {
3029
3030/** Emits code related to IEM_MC_ELSE. */
3031DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3032{
3033 /* Check sanity and get the conditional stack entry. */
3034 Assert(off != UINT32_MAX);
3035 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
3036 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
3037 Assert(!pEntry->fInElse);
3038
3039 /* We can skip dirty register flushing and the dirty register flushing if
3040 the branch already jumped to a TB exit. */
3041 if (!pEntry->fIfExitTb)
3042 {
3043#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK) && 0
3044 /* Writeback any dirty shadow registers. */
3045 /** @todo r=aeichner Possible optimization is to only writeback guest registers which became dirty
3046 * in one of the branches and leave guest registers already dirty before the start of the if
3047 * block alone. */
3048 off = iemNativeRegFlushDirtyGuest(pReNative, off);
3049#endif
3050
3051 /* Jump to the endif. */
3052 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
3053 }
3054# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3055 else
3056 Assert(pReNative->Core.offPc == 0);
3057# endif
3058
3059 /* Define the else label and enter the else part of the condition. */
3060 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
3061 pEntry->fInElse = true;
3062
3063 /* Snapshot the core state so we can do a merge at the endif and restore
3064 the snapshot we took at the start of the if-block. */
3065 pEntry->IfFinalState = pReNative->Core;
3066 pReNative->Core = pEntry->InitialState;
3067
3068 return off;
3069}
3070
3071
3072#define IEM_MC_ENDIF() } while (0); \
3073 off = iemNativeEmitEndIf(pReNative, off)
3074
3075/** Emits code related to IEM_MC_ENDIF. */
3076DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3077{
3078 /* Check sanity and get the conditional stack entry. */
3079 Assert(off != UINT32_MAX);
3080 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
3081 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
3082
3083#if defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) && 0
3084 off = iemNativeRegFlushDirtyGuest(pReNative, off);
3085#endif
3086
3087 /*
3088 * If either of the branches exited the TB, we can take the state from the
3089 * other branch and skip all the merging headache.
3090 */
3091 bool fDefinedLabels = false;
3092 if (pEntry->fElseExitTb || pEntry->fIfExitTb)
3093 {
3094#ifdef VBOX_STRICT
3095 Assert(pReNative->cCondDepth == 1); /* Assuming this only happens in simple conditional structures. */
3096 Assert(pEntry->fElseExitTb != pEntry->fIfExitTb); /* Assuming we don't have any code where both branches exits. */
3097 PCIEMNATIVECORESTATE const pExitCoreState = pEntry->fIfExitTb && pEntry->fInElse
3098 ? &pEntry->IfFinalState : &pReNative->Core;
3099# ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3100 Assert(pExitCoreState->bmGstRegShadowDirty == 0);
3101# endif
3102# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3103 Assert(pExitCoreState->offPc == 0);
3104# endif
3105 RT_NOREF(pExitCoreState);
3106#endif
3107
3108 if (!pEntry->fIfExitTb)
3109 {
3110 Assert(pEntry->fInElse);
3111 pReNative->Core = pEntry->IfFinalState;
3112 }
3113 }
3114 else
3115 {
3116 /*
3117 * Now we have find common group with the core state at the end of the
3118 * if-final. Use the smallest common denominator and just drop anything
3119 * that isn't the same in both states.
3120 */
3121 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
3122 * which is why we're doing this at the end of the else-block.
3123 * But we'd need more info about future for that to be worth the effort. */
3124 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
3125#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3126 AssertMsgStmt(pReNative->Core.offPc == pOther->offPc,
3127 ("Core.offPc=%#RX64 pOther->offPc=%#RX64\n", pReNative->Core.offPc, pOther->offPc),
3128 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
3129#endif
3130
3131 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
3132 {
3133#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3134 /*
3135 * If the branch has differences in dirty shadow registers, we will flush
3136 * the register only dirty in the current branch and dirty any that's only
3137 * dirty in the other one.
3138 */
3139 uint64_t const fGstRegDirtyOther = pOther->bmGstRegShadowDirty;
3140 uint64_t const fGstRegDirtyThis = pReNative->Core.bmGstRegShadowDirty;
3141 uint64_t const fGstRegDirtyDiff = fGstRegDirtyOther ^ fGstRegDirtyThis;
3142 uint64_t const fGstRegDirtyHead = fGstRegDirtyThis & fGstRegDirtyDiff;
3143 uint64_t fGstRegDirtyTail = fGstRegDirtyOther & fGstRegDirtyDiff;
3144 if (!fGstRegDirtyDiff)
3145 { /* likely */ }
3146 else
3147 {
3148 //uint64_t const fGstRegDirtyHead = pReNative->Core.bmGstRegShadowDirty & fGstRegDirtyDiff;
3149 if (fGstRegDirtyHead)
3150 {
3151 Log12(("iemNativeEmitEndIf: flushing dirty guest registers in current branch: %RX64\n", fGstRegDirtyHead));
3152 off = iemNativeRegFlushDirtyGuest(pReNative, off, fGstRegDirtyHead);
3153 }
3154 }
3155#endif
3156
3157 /*
3158 * Shadowed guest registers.
3159 *
3160 * We drop any shadows where the two states disagree about where
3161 * things are kept. We may end up flushing dirty more registers
3162 * here, if the two branches keeps things in different registers.
3163 */
3164 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
3165 if (fGstRegs)
3166 {
3167 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
3168 do
3169 {
3170 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
3171 fGstRegs &= ~RT_BIT_64(idxGstReg);
3172
3173 uint8_t const idxCurHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
3174 uint8_t const idxOtherHstReg = pOther->aidxGstRegShadows[idxGstReg];
3175 if ( idxCurHstReg != idxOtherHstReg
3176 || !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg)))
3177 {
3178#ifndef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3179 Log12(("iemNativeEmitEndIf: dropping gst %s (%d) from hst %s (other %d/%#RX64)\n",
3180 g_aGstShadowInfo[idxGstReg].pszName, idxGstReg, g_apszIemNativeHstRegNames[idxCurHstReg],
3181 idxOtherHstReg, pOther->bmGstRegShadows));
3182#else
3183 Log12(("iemNativeEmitEndIf: dropping %s gst %s (%d) from hst %s (other %d/%#RX64/%s)\n",
3184 pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg) ? "_dirty_" : "clean",
3185 g_aGstShadowInfo[idxGstReg].pszName, idxGstReg, g_apszIemNativeHstRegNames[idxCurHstReg],
3186 idxOtherHstReg, pOther->bmGstRegShadows,
3187 pOther->bmGstRegShadowDirty & RT_BIT_64(idxGstReg) ? "dirty" : "clean"));
3188 if (pOther->bmGstRegShadowDirty & RT_BIT_64(idxGstReg))
3189 fGstRegDirtyTail |= RT_BIT_64(idxGstReg);
3190 if (pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg))
3191 off = iemNativeRegFlushPendingWrite(pReNative, off, (IEMNATIVEGSTREG)idxGstReg);
3192#endif
3193 iemNativeRegClearGstRegShadowingOne(pReNative, idxCurHstReg, (IEMNATIVEGSTREG)idxGstReg, off);
3194 }
3195 } while (fGstRegs);
3196 }
3197 else
3198 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
3199
3200#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3201 /*
3202 * Generate jumpy code for flushing dirty registers from the other
3203 * branch that aren't dirty in the current one.
3204 */
3205 if (!fGstRegDirtyTail)
3206 { /* likely */ }
3207 else
3208 {
3209 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeEndIfOtherBranchDirty);
3210 Log12(("iemNativeEmitEndIf: Dirty register only in the other branch: %#RX64 - BAD!\n", fGstRegDirtyTail));
3211
3212 /* First the current branch has to jump over the dirty flushing from the other branch. */
3213 uint32_t const offFixup1 = off;
3214 off = iemNativeEmitJmpToFixed(pReNative, off, off + 10);
3215
3216 /* Put the endif and maybe else label here so the other branch ends up here. */
3217 if (!pEntry->fInElse)
3218 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
3219 else
3220 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
3221 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
3222 fDefinedLabels = true;
3223
3224 /* Flush the dirty guest registers from the other branch. */
3225 while (fGstRegDirtyTail)
3226 {
3227 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegDirtyTail) - 1;
3228 fGstRegDirtyTail &= ~RT_BIT_64(idxGstReg);
3229 Log12(("iemNativeEmitEndIf: tail flushing %s (%d) from other branch %d (cur %d/%#RX64)\n",
3230 g_aGstShadowInfo[idxGstReg].pszName, idxGstReg, pOther->aidxGstRegShadows[idxGstReg],
3231 pReNative->Core.aidxGstRegShadows[idxGstReg], pReNative->Core.bmGstRegShadows));
3232
3233 off = iemNativeRegFlushPendingWriteEx(pReNative, off, (PIEMNATIVECORESTATE)pOther, (IEMNATIVEGSTREG)idxGstReg);
3234
3235 /* Mismatching shadowing should've been dropped in the previous step already. */
3236 Assert( !(pReNative->Core.bmGstRegShadows & RT_BIT_64(idxGstReg))
3237 || pReNative->Core.aidxGstRegShadows[idxGstReg] == pOther->aidxGstRegShadows[idxGstReg]);
3238 }
3239
3240 /* Here is the actual endif label, fixup the above jump to land here. */
3241 iemNativeFixupFixedJump(pReNative, offFixup1, off);
3242 }
3243#endif
3244
3245 /*
3246 * Check variables next. For now we must require them to be identical
3247 * or stuff we can recreate. (No code is emitted here.)
3248 */
3249 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
3250#ifdef VBOX_STRICT
3251 uint32_t const offAssert = off;
3252#endif
3253 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
3254 if (fVars)
3255 {
3256 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
3257 do
3258 {
3259 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
3260 fVars &= ~RT_BIT_32(idxVar);
3261
3262 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
3263 {
3264 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
3265 continue;
3266 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
3267 {
3268 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
3269 if (idxHstReg != UINT8_MAX)
3270 {
3271 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3272 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3273 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x\n",
3274 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
3275 }
3276 continue;
3277 }
3278 }
3279 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
3280 continue;
3281
3282 /* Irreconcilable, so drop it. */
3283 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
3284 if (idxHstReg != UINT8_MAX)
3285 {
3286 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3287 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3288 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x (also dropped)\n",
3289 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
3290 }
3291 Log11(("iemNativeEmitEndIf: Freeing variable #%u/%#x\n", idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
3292 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
3293 } while (fVars);
3294 }
3295 Assert(off == offAssert);
3296
3297 /*
3298 * Finally, check that the host register allocations matches.
3299 */
3300 AssertMsgStmt((pReNative->Core.bmHstRegs & (pReNative->Core.bmHstRegs ^ pOther->bmHstRegs)) == 0,
3301 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
3302 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
3303 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
3304 }
3305 }
3306
3307 /*
3308 * Define the endif label and maybe the else one if we're still in the 'if' part.
3309 */
3310 if (!fDefinedLabels)
3311 {
3312 if (!pEntry->fInElse)
3313 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
3314 else
3315 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
3316 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
3317 }
3318
3319 /* Pop the conditional stack.*/
3320 pReNative->cCondDepth -= 1;
3321
3322 return off;
3323}
3324
3325
3326/**
3327 * Helper function to convert X86_EFL_xxx masks to liveness masks.
3328 *
3329 * The compiler should be able to figure this out at compile time, so sprinkling
3330 * constexpr where ever possible here to nudge it along.
3331 */
3332template<uint32_t const a_fEfl>
3333RT_CONSTEXPR uint64_t iemNativeEflagsToLivenessMask(void)
3334{
3335 return (a_fEfl & ~X86_EFL_STATUS_BITS ? RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_OTHER) : 0)
3336 | (a_fEfl & X86_EFL_CF ? RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_CF) : 0)
3337 | (a_fEfl & X86_EFL_PF ? RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_PF) : 0)
3338 | (a_fEfl & X86_EFL_AF ? RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_AF) : 0)
3339 | (a_fEfl & X86_EFL_ZF ? RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_ZF) : 0)
3340 | (a_fEfl & X86_EFL_SF ? RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_SF) : 0)
3341 | (a_fEfl & X86_EFL_OF ? RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_OF) : 0);
3342}
3343
3344
3345/**
3346 * Helper function to convert a single X86_EFL_xxxx value to bit number.
3347 *
3348 * The compiler should be able to figure this out at compile time, so sprinkling
3349 * constexpr where ever possible here to nudge it along.
3350 */
3351template<uint32_t const a_fEfl>
3352RT_CONSTEXPR unsigned iemNativeEflagsToSingleBitNo(void)
3353{
3354 AssertCompile( a_fEfl == X86_EFL_CF
3355 || a_fEfl == X86_EFL_PF
3356 || a_fEfl == X86_EFL_AF
3357 || a_fEfl == X86_EFL_ZF
3358 || a_fEfl == X86_EFL_SF
3359 || a_fEfl == X86_EFL_OF
3360 || a_fEfl == X86_EFL_DF);
3361 return a_fEfl == X86_EFL_CF ? X86_EFL_CF_BIT
3362 : a_fEfl == X86_EFL_PF ? X86_EFL_PF_BIT
3363 : a_fEfl == X86_EFL_AF ? X86_EFL_AF_BIT
3364 : a_fEfl == X86_EFL_ZF ? X86_EFL_ZF_BIT
3365 : a_fEfl == X86_EFL_SF ? X86_EFL_SF_BIT
3366 : a_fEfl == X86_EFL_OF ? X86_EFL_OF_BIT
3367 : X86_EFL_DF_BIT;
3368}
3369
3370
3371#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
3372 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits), iemNativeEflagsToLivenessMask<a_fBits>()); \
3373 do {
3374
3375/** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
3376DECL_INLINE_THROW(uint32_t)
3377iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl, uint64_t fLivenessEflBits)
3378{
3379 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, fBitsInEfl);
3380 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitsInEfl);
3381 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3382
3383 /* Get the eflags. */
3384 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsReadOnly(pReNative, &off, fLivenessEflBits);
3385
3386 /* Test and jump. */
3387 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
3388
3389 /* Free but don't flush the EFlags register. */
3390 iemNativeRegFreeTmp(pReNative, idxEflReg);
3391
3392 /* Make a copy of the core state now as we start the if-block. */
3393 iemNativeCondStartIfBlock(pReNative, off);
3394
3395 return off;
3396}
3397
3398
3399#define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
3400 off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits), iemNativeEflagsToLivenessMask<a_fBits>()); \
3401 do {
3402
3403/** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
3404DECL_INLINE_THROW(uint32_t)
3405iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl, uint64_t fLivenessEflBits)
3406{
3407 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, fBitsInEfl);
3408 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitsInEfl);
3409 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3410
3411 /* Get the eflags. */
3412 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsReadOnly(pReNative, &off, fLivenessEflBits);
3413
3414 /* Test and jump. */
3415 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
3416
3417 /* Free but don't flush the EFlags register. */
3418 iemNativeRegFreeTmp(pReNative, idxEflReg);
3419
3420 /* Make a copy of the core state now as we start the if-block. */
3421 iemNativeCondStartIfBlock(pReNative, off);
3422
3423 return off;
3424}
3425
3426
3427#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
3428 off = iemNativeEmitIfEflagsBitSet(pReNative, off, iemNativeEflagsToSingleBitNo<a_fBit>(), \
3429 iemNativeEflagsToLivenessMask<a_fBit>()); \
3430 do {
3431
3432/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
3433DECL_INLINE_THROW(uint32_t)
3434iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, unsigned iBitNo, uint64_t fLivenessEflBit)
3435{
3436 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, RT_BIT_32(iBitNo));
3437 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, RT_BIT_32(iBitNo));
3438 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3439
3440 /* Get the eflags. */
3441 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsReadOnly(pReNative, &off, fLivenessEflBit);
3442
3443 /* Test and jump. */
3444 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
3445
3446 /* Free but don't flush the EFlags register. */
3447 iemNativeRegFreeTmp(pReNative, idxEflReg);
3448
3449 /* Make a copy of the core state now as we start the if-block. */
3450 iemNativeCondStartIfBlock(pReNative, off);
3451
3452 return off;
3453}
3454
3455
3456#define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
3457 off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, iemNativeEflagsToSingleBitNo<a_fBit>(), \
3458 iemNativeEflagsToLivenessMask<a_fBit>()); \
3459 do {
3460
3461/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
3462DECL_INLINE_THROW(uint32_t)
3463iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, unsigned iBitNo, uint64_t fLivenessEflBit)
3464{
3465 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, RT_BIT_32(iBitNo));
3466 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, RT_BIT_32(iBitNo));
3467 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3468
3469 /* Get the eflags. */
3470 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsReadOnly(pReNative, &off, fLivenessEflBit);
3471
3472 /* Test and jump. */
3473 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
3474
3475 /* Free but don't flush the EFlags register. */
3476 iemNativeRegFreeTmp(pReNative, idxEflReg);
3477
3478 /* Make a copy of the core state now as we start the if-block. */
3479 iemNativeCondStartIfBlock(pReNative, off);
3480
3481 return off;
3482}
3483
3484
3485#define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2) \
3486 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, false /*fInverted*/, \
3487 iemNativeEflagsToSingleBitNo<a_fBit1>(), \
3488 iemNativeEflagsToSingleBitNo<a_fBit2>(), \
3489 iemNativeEflagsToLivenessMask<a_fBit1 | a_fBit2>()); \
3490 do {
3491
3492#define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2) \
3493 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, true /*fInverted*/, \
3494 iemNativeEflagsToSingleBitNo<a_fBit1>(), \
3495 iemNativeEflagsToSingleBitNo<a_fBit2>(), \
3496 iemNativeEflagsToLivenessMask<a_fBit1 | a_fBit2>()); \
3497 do {
3498
3499/** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
3500DECL_INLINE_THROW(uint32_t)
3501iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3502 bool fInverted, unsigned iBitNo1, unsigned iBitNo2, uint64_t fLivenessEflBits)
3503{
3504 Assert(iBitNo1 != iBitNo2);
3505 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, RT_BIT_32(iBitNo1) | RT_BIT_32(iBitNo2));
3506 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, RT_BIT_32(iBitNo1) | RT_BIT_32(iBitNo2));
3507 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3508
3509 /* Get the eflags. */
3510 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsReadOnly(pReNative, &off, fLivenessEflBits);
3511
3512#ifdef RT_ARCH_AMD64
3513 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, RT_BIT_64(iBitNo1));
3514
3515 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
3516 if (iBitNo1 > iBitNo2)
3517 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
3518 else
3519 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
3520 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
3521
3522#elif defined(RT_ARCH_ARM64)
3523 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3524 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
3525
3526 /* and tmpreg, eflreg, #1<<iBitNo1 */
3527 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
3528
3529 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
3530 if (iBitNo1 > iBitNo2)
3531 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
3532 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
3533 else
3534 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
3535 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
3536
3537 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3538
3539#else
3540# error "Port me"
3541#endif
3542
3543 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
3544 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
3545 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
3546
3547 /* Free but don't flush the EFlags and tmp registers. */
3548 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3549 iemNativeRegFreeTmp(pReNative, idxEflReg);
3550
3551 /* Make a copy of the core state now as we start the if-block. */
3552 iemNativeCondStartIfBlock(pReNative, off);
3553
3554 return off;
3555}
3556
3557
3558#define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
3559 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, false /*fInverted*/, \
3560 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3561 iemNativeEflagsToSingleBitNo<a_fBit1>(), \
3562 iemNativeEflagsToSingleBitNo<a_fBit2>(), \
3563 iemNativeEflagsToLivenessMask<a_fBit | a_fBit1 | a_fBit2>()); \
3564 do {
3565
3566#define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
3567 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, true /*fInverted*/, \
3568 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3569 iemNativeEflagsToSingleBitNo<a_fBit1>(), \
3570 iemNativeEflagsToSingleBitNo<a_fBit2>(), \
3571 iemNativeEflagsToLivenessMask<a_fBit | a_fBit1 | a_fBit2>()); \
3572 do {
3573
3574/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
3575 * IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
3576DECL_INLINE_THROW(uint32_t)
3577iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fInverted,
3578 unsigned iBitNo, unsigned iBitNo1, unsigned iBitNo2, uint64_t fLivenessEflBits)
3579{
3580 Assert(iBitNo1 != iBitNo);
3581 Assert(iBitNo2 != iBitNo);
3582 Assert(iBitNo2 != iBitNo1);
3583 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, RT_BIT_32(iBitNo) | RT_BIT_32(iBitNo1) | RT_BIT_32(iBitNo2));
3584 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, RT_BIT_32(iBitNo) | RT_BIT_32(iBitNo1) | RT_BIT_32(iBitNo2));
3585 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3586
3587 /* We need an if-block label for the non-inverted variant. */
3588 uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
3589 pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
3590
3591 /* Get the eflags. */
3592 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsReadOnly(pReNative, &off, fLivenessEflBits);
3593
3594#ifdef RT_ARCH_AMD64
3595 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, RT_BIT_64(iBitNo1)); /* This must come before we jump anywhere! */
3596#elif defined(RT_ARCH_ARM64)
3597 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3598#endif
3599
3600 /* Check for the lone bit first. */
3601 if (!fInverted)
3602 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
3603 else
3604 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
3605
3606 /* Then extract and compare the other two bits. */
3607#ifdef RT_ARCH_AMD64
3608 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
3609 if (iBitNo1 > iBitNo2)
3610 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
3611 else
3612 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
3613 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
3614
3615#elif defined(RT_ARCH_ARM64)
3616 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
3617
3618 /* and tmpreg, eflreg, #1<<iBitNo1 */
3619 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
3620
3621 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
3622 if (iBitNo1 > iBitNo2)
3623 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
3624 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
3625 else
3626 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
3627 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
3628
3629 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3630
3631#else
3632# error "Port me"
3633#endif
3634
3635 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
3636 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
3637 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
3638
3639 /* Free but don't flush the EFlags and tmp registers. */
3640 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3641 iemNativeRegFreeTmp(pReNative, idxEflReg);
3642
3643 /* Make a copy of the core state now as we start the if-block. */
3644 iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
3645
3646 return off;
3647}
3648
3649
3650#define IEM_MC_IF_CX_IS_NZ() \
3651 off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
3652 do {
3653
3654/** Emits code for IEM_MC_IF_CX_IS_NZ. */
3655DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3656{
3657 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3658
3659 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3660 kIemNativeGstRegUse_ReadOnly);
3661 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
3662 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3663
3664 iemNativeCondStartIfBlock(pReNative, off);
3665 return off;
3666}
3667
3668
3669#define IEM_MC_IF_ECX_IS_NZ() \
3670 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
3671 do {
3672
3673#define IEM_MC_IF_RCX_IS_NZ() \
3674 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
3675 do {
3676
3677/** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
3678DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
3679{
3680 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3681
3682 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3683 kIemNativeGstRegUse_ReadOnly);
3684 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
3685 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3686
3687 iemNativeCondStartIfBlock(pReNative, off);
3688 return off;
3689}
3690
3691
3692#define IEM_MC_IF_CX_IS_NOT_ONE() \
3693 off = iemNativeEmitIfCxIsNotOne(pReNative, off); \
3694 do {
3695
3696/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE. */
3697DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3698{
3699 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3700
3701 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3702 kIemNativeGstRegUse_ReadOnly);
3703#ifdef RT_ARCH_AMD64
3704 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3705#else
3706 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3707 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
3708 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3709#endif
3710 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3711
3712 iemNativeCondStartIfBlock(pReNative, off);
3713 return off;
3714}
3715
3716
3717#define IEM_MC_IF_ECX_IS_NOT_ONE() \
3718 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, false /*f64Bit*/); \
3719 do {
3720
3721#define IEM_MC_IF_RCX_IS_NOT_ONE() \
3722 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, true /*f64Bit*/); \
3723 do {
3724
3725/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE and IEM_MC_IF_RCX_IS_NOT_ONE. */
3726DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
3727{
3728 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3729
3730 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3731 kIemNativeGstRegUse_ReadOnly);
3732 if (f64Bit)
3733 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3734 else
3735 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3736 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3737
3738 iemNativeCondStartIfBlock(pReNative, off);
3739 return off;
3740}
3741
3742
3743#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
3744 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, true /*fCheckIfSet*/, \
3745 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3746 iemNativeEflagsToLivenessMask<a_fBit>()); \
3747 do {
3748
3749#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3750 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, false /*fCheckIfSet*/, \
3751 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3752 iemNativeEflagsToLivenessMask<a_fBit>()); \
3753 do {
3754
3755/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET and
3756 * IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
3757DECL_INLINE_THROW(uint32_t)
3758iemNativeEmitIfCxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3759 bool fCheckIfSet, unsigned iBitNo, uint64_t fLivenessEflBit)
3760{
3761 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, RT_BIT_32(iBitNo));
3762 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, RT_BIT_32(iBitNo));
3763 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3764
3765 /* We have to load both RCX and EFLAGS before we can start branching,
3766 otherwise we'll end up in the else-block with an inconsistent
3767 register allocator state.
3768 Doing EFLAGS first as it's more likely to be loaded, right? */
3769 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsReadOnly(pReNative, &off, fLivenessEflBit);
3770 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3771 kIemNativeGstRegUse_ReadOnly);
3772
3773 /** @todo we could reduce this to a single branch instruction by spending a
3774 * temporary register and some setnz stuff. Not sure if loops are
3775 * worth it. */
3776 /* Check CX. */
3777#ifdef RT_ARCH_AMD64
3778 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3779#else
3780 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3781 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
3782 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3783#endif
3784
3785 /* Check the EFlags bit. */
3786 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
3787 !fCheckIfSet /*fJmpIfSet*/);
3788
3789 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3790 iemNativeRegFreeTmp(pReNative, idxEflReg);
3791
3792 iemNativeCondStartIfBlock(pReNative, off);
3793 return off;
3794}
3795
3796
3797#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
3798 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, true /*fCheckIfSet*/, false /*f64Bit*/, \
3799 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3800 iemNativeEflagsToLivenessMask<a_fBit>()); \
3801 do {
3802
3803#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3804 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, false /*fCheckIfSet*/, false /*f64Bit*/, \
3805 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3806 iemNativeEflagsToLivenessMask<a_fBit>()); \
3807 do {
3808
3809#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
3810 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, true /*fCheckIfSet*/, true /*f64Bit*/, \
3811 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3812 iemNativeEflagsToLivenessMask<a_fBit>()); \
3813 do {
3814
3815#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3816 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, false /*fCheckIfSet*/, true /*f64Bit*/, \
3817 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3818 iemNativeEflagsToLivenessMask<a_fBit>()); \
3819 do {
3820
3821/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET,
3822 * IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET,
3823 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET and
3824 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
3825DECL_INLINE_THROW(uint32_t)
3826iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fCheckIfSet, bool f64Bit,
3827 unsigned iBitNo, uint64_t fLivenessEFlBit)
3828
3829{
3830 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, RT_BIT_32(iBitNo));
3831 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, RT_BIT_32(iBitNo));
3832 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3833
3834 /* We have to load both RCX and EFLAGS before we can start branching,
3835 otherwise we'll end up in the else-block with an inconsistent
3836 register allocator state.
3837 Doing EFLAGS first as it's more likely to be loaded, right? */
3838 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsReadOnly(pReNative, &off, fLivenessEFlBit);
3839 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3840 kIemNativeGstRegUse_ReadOnly);
3841
3842 /** @todo we could reduce this to a single branch instruction by spending a
3843 * temporary register and some setnz stuff. Not sure if loops are
3844 * worth it. */
3845 /* Check RCX/ECX. */
3846 if (f64Bit)
3847 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3848 else
3849 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3850
3851 /* Check the EFlags bit. */
3852 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
3853 !fCheckIfSet /*fJmpIfSet*/);
3854
3855 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3856 iemNativeRegFreeTmp(pReNative, idxEflReg);
3857
3858 iemNativeCondStartIfBlock(pReNative, off);
3859 return off;
3860}
3861
3862
3863#define IEM_MC_IF_LOCAL_IS_Z(a_Local) \
3864 off = iemNativeEmitIfLocalIsZ(pReNative, off, a_Local); \
3865 do {
3866
3867/** Emits code for IEM_MC_IF_LOCAL_IS_Z. */
3868DECL_INLINE_THROW(uint32_t)
3869iemNativeEmitIfLocalIsZ(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarLocal)
3870{
3871 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3872
3873 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarLocal);
3874 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarLocal)];
3875 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
3876 AssertStmt(pVarRc->cbVar == sizeof(int32_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
3877
3878 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarLocal, &off);
3879
3880 off = iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(pReNative, off, idxReg, false /*f64Bit*/, pEntry->idxLabelElse);
3881
3882 iemNativeVarRegisterRelease(pReNative, idxVarLocal);
3883
3884 iemNativeCondStartIfBlock(pReNative, off);
3885 return off;
3886}
3887
3888
3889#define IEM_MC_IF_GREG_BIT_SET(a_iGReg, a_iBitNo) \
3890 off = iemNativeEmitIfGregBitSet(pReNative, off, a_iGReg, a_iBitNo); \
3891 do {
3892
3893/** Emits code for IEM_MC_IF_GREG_BIT_SET. */
3894DECL_INLINE_THROW(uint32_t)
3895iemNativeEmitIfGregBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t iBitNo)
3896{
3897 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3898 Assert(iGReg < 16);
3899
3900 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3901 kIemNativeGstRegUse_ReadOnly);
3902
3903 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxGstFullReg, iBitNo, pEntry->idxLabelElse);
3904
3905 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3906
3907 iemNativeCondStartIfBlock(pReNative, off);
3908 return off;
3909}
3910
3911
3912
3913/*********************************************************************************************************************************
3914* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
3915*********************************************************************************************************************************/
3916
3917#define IEM_MC_NOREF(a_Name) \
3918 RT_NOREF_PV(a_Name)
3919
3920#define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
3921 uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
3922
3923#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
3924 uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
3925
3926#define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_Local, a_iArg) \
3927 uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
3928
3929#define IEM_MC_LOCAL(a_Type, a_Name) \
3930 uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
3931
3932#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
3933 uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
3934
3935#define IEM_MC_LOCAL_ASSIGN(a_Type, a_Name, a_Value) \
3936 uint8_t const a_Name = iemNativeVarAllocAssign(pReNative, &off, sizeof(a_Type), (a_Value))
3937
3938
3939/**
3940 * Sets the host register for @a idxVarRc to @a idxReg.
3941 *
3942 * Any guest register shadowing will be implictly dropped by this call.
3943 *
3944 * The variable must not have any register associated with it (causes
3945 * VERR_IEM_VAR_IPE_10 to be raised). Conversion to a stack variable is
3946 * implied.
3947 *
3948 * @returns idxReg
3949 * @param pReNative The recompiler state.
3950 * @param idxVar The variable.
3951 * @param idxReg The host register (typically IEMNATIVE_CALL_RET_GREG).
3952 * @param off For recording in debug info.
3953 * @param fAllocated Set if the register is already allocated, false if not.
3954 *
3955 * @throws VERR_IEM_VAR_IPE_10, VERR_IEM_VAR_IPE_11
3956 */
3957DECL_INLINE_THROW(uint8_t)
3958iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off, bool fAllocated)
3959{
3960 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3961 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3962 Assert(!pVar->fRegAcquired);
3963 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3964 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_10));
3965 AssertStmt(RT_BOOL(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)) == fAllocated,
3966 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_11));
3967
3968 iemNativeRegClearGstRegShadowing(pReNative, idxReg, off);
3969 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
3970
3971 iemNativeVarSetKindToStack(pReNative, idxVar);
3972 pVar->idxReg = idxReg;
3973
3974 return idxReg;
3975}
3976
3977
3978/**
3979 * A convenient helper function.
3980 */
3981DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSetAndAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
3982 uint8_t idxReg, uint32_t *poff)
3983{
3984 idxReg = iemNativeVarRegisterSet(pReNative, idxVar, idxReg, *poff, false /*fAllocated*/);
3985 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fRegAcquired = true;
3986 return idxReg;
3987}
3988
3989
3990/**
3991 * This is called by IEM_MC_END() to clean up all variables.
3992 */
3993DECL_FORCE_INLINE(void) iemNativeVarFreeAll(PIEMRECOMPILERSTATE pReNative)
3994{
3995 uint32_t const bmVars = pReNative->Core.bmVars;
3996 if (bmVars != 0)
3997 iemNativeVarFreeAllSlow(pReNative, bmVars);
3998 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
3999 Assert(pReNative->Core.bmStack == 0);
4000}
4001
4002
4003#define IEM_MC_FREE_LOCAL(a_Name) iemNativeVarFreeLocal(pReNative, a_Name)
4004
4005/**
4006 * This is called by IEM_MC_FREE_LOCAL.
4007 */
4008DECLINLINE(void) iemNativeVarFreeLocal(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
4009{
4010 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4011 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo == UINT8_MAX);
4012 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
4013}
4014
4015
4016#define IEM_MC_FREE_ARG(a_Name) iemNativeVarFreeArg(pReNative, a_Name)
4017
4018/**
4019 * This is called by IEM_MC_FREE_ARG.
4020 */
4021DECLINLINE(void) iemNativeVarFreeArg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
4022{
4023 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4024 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars));
4025 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
4026}
4027
4028
4029#define IEM_MC_ASSIGN_TO_SMALLER(a_VarDst, a_VarSrcEol) off = iemNativeVarAssignToSmaller(pReNative, off, a_VarDst, a_VarSrcEol)
4030
4031/**
4032 * This is called by IEM_MC_ASSIGN_TO_SMALLER.
4033 */
4034DECL_INLINE_THROW(uint32_t)
4035iemNativeVarAssignToSmaller(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarDst, uint8_t idxVarSrc)
4036{
4037 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarDst);
4038 PIEMNATIVEVAR const pVarDst = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarDst)];
4039 AssertStmt(pVarDst->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4040 Assert( pVarDst->cbVar == sizeof(uint16_t)
4041 || pVarDst->cbVar == sizeof(uint32_t));
4042
4043 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarSrc);
4044 PIEMNATIVEVAR const pVarSrc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarSrc)];
4045 AssertStmt( pVarSrc->enmKind == kIemNativeVarKind_Stack
4046 || pVarSrc->enmKind == kIemNativeVarKind_Immediate,
4047 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4048
4049 Assert(pVarDst->cbVar < pVarSrc->cbVar);
4050
4051 /*
4052 * Special case for immediates.
4053 */
4054 if (pVarSrc->enmKind == kIemNativeVarKind_Immediate)
4055 {
4056 switch (pVarDst->cbVar)
4057 {
4058 case sizeof(uint16_t):
4059 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint16_t)pVarSrc->u.uValue);
4060 break;
4061 case sizeof(uint32_t):
4062 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint32_t)pVarSrc->u.uValue);
4063 break;
4064 default: AssertFailed(); break;
4065 }
4066 }
4067 else
4068 {
4069 /*
4070 * The generic solution for now.
4071 */
4072 /** @todo optimize this by having the python script make sure the source
4073 * variable passed to IEM_MC_ASSIGN_TO_SMALLER is not used after the
4074 * statement. Then we could just transfer the register assignments. */
4075 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off);
4076 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off);
4077 switch (pVarDst->cbVar)
4078 {
4079 case sizeof(uint16_t):
4080 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegDst, idxRegSrc);
4081 break;
4082 case sizeof(uint32_t):
4083 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegDst, idxRegSrc);
4084 break;
4085 default: AssertFailed(); break;
4086 }
4087 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
4088 iemNativeVarRegisterRelease(pReNative, idxVarDst);
4089 }
4090 return off;
4091}
4092
4093
4094
4095/*********************************************************************************************************************************
4096* Emitters for IEM_MC_CALL_CIMPL_XXX *
4097*********************************************************************************************************************************/
4098
4099/** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
4100DECL_INLINE_THROW(uint32_t)
4101iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
4102 uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cArgs)
4103
4104{
4105 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, X86_EFL_STATUS_BITS);
4106 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
4107
4108 /* Clear the appropriate IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_XXX flags
4109 when a calls clobber any of the relevant control registers. */
4110#if 1
4111 if (!(fGstShwFlush & (RT_BIT_64(kIemNativeGstReg_Cr0) | RT_BIT_64(kIemNativeGstReg_Cr4) | RT_BIT_64(kIemNativeGstReg_Xcr0))))
4112 {
4113 /* Likely as long as call+ret are done via cimpl. */
4114 Assert( /*pfnCImpl != (uintptr_t)iemCImpl_mov_Cd_Rd && pfnCImpl != (uintptr_t)iemCImpl_xsetbv
4115 &&*/ pfnCImpl != (uintptr_t)iemCImpl_lmsw && pfnCImpl != (uintptr_t)iemCImpl_clts);
4116 }
4117 else if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_Xcr0))
4118 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
4119 else if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_Cr4))
4120 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
4121 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE);
4122 else
4123 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
4124 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
4125 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
4126
4127#else
4128 if (pfnCImpl == (uintptr_t)iemCImpl_xsetbv) /* Modifies xcr0 which only the AVX check uses. */
4129 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
4130 else if (pfnCImpl == (uintptr_t)iemCImpl_mov_Cd_Rd) /* Can modify cr4 which all checks use. */
4131 pReNative->fSimdRaiseXcptChecksEmitted = 0;
4132 else if ( pfnCImpl == (uintptr_t)iemCImpl_FarJmp
4133 || pfnCImpl == (uintptr_t)iemCImpl_callf
4134 || pfnCImpl == (uintptr_t)iemCImpl_lmsw
4135 || pfnCImpl == (uintptr_t)iemCImpl_clts) /* Will only modify cr0 */
4136 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
4137 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
4138 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
4139#endif
4140
4141#ifdef IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS
4142 /* Mark the host floating point control register as not synced if MXCSR is modified. */
4143 if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_MxCsr))
4144 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SYNCED;
4145#endif
4146
4147 /*
4148 * Do all the call setup and cleanup.
4149 */
4150 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
4151
4152 /*
4153 * Load the two or three hidden arguments.
4154 */
4155#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_ARM64))
4156 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_VBOXSTRICRC); /* rcStrict */
4157 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
4158 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
4159#else
4160 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
4161 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
4162#endif
4163
4164 /*
4165 * Make the call and check the return code.
4166 *
4167 * Shadow PC copies are always flushed here, other stuff depends on flags.
4168 * Segment and general purpose registers are explictily flushed via the
4169 * IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG and IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG
4170 * macros.
4171 */
4172 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
4173#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_ARM64))
4174 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_VBOXSTRICRC); /* rcStrict (see above) */
4175#endif
4176 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, fGstShwFlush | RT_BIT_64(kIemNativeGstReg_Pc));
4177 if (!(pReNative->fMc & IEM_MC_F_WITHOUT_FLAGS)) /** @todo We don't emit with-flags/without-flags variations for CIMPL calls. */
4178 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
4179 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
4180
4181#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
4182 pReNative->Core.fDebugPcInitialized = false;
4183 Log4(("fDebugPcInitialized=false cimpl off=%#x (v1)\n", off));
4184#endif
4185
4186 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
4187}
4188
4189
4190#define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
4191 off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0)
4192
4193/** Emits code for IEM_MC_CALL_CIMPL_1. */
4194DECL_INLINE_THROW(uint32_t)
4195iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
4196 uintptr_t pfnCImpl, uint8_t idxArg0)
4197{
4198 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
4199 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 1);
4200}
4201
4202
4203#define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
4204 off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0, a1)
4205
4206/** Emits code for IEM_MC_CALL_CIMPL_2. */
4207DECL_INLINE_THROW(uint32_t)
4208iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
4209 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
4210{
4211 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
4212 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
4213 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 2);
4214}
4215
4216
4217#define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
4218 off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
4219 (uintptr_t)a_pfnCImpl, a0, a1, a2)
4220
4221/** Emits code for IEM_MC_CALL_CIMPL_3. */
4222DECL_INLINE_THROW(uint32_t)
4223iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
4224 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
4225{
4226 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
4227 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
4228 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
4229 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 3);
4230}
4231
4232
4233#define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3) \
4234 off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
4235 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
4236
4237/** Emits code for IEM_MC_CALL_CIMPL_4. */
4238DECL_INLINE_THROW(uint32_t)
4239iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
4240 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
4241{
4242 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
4243 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
4244 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
4245 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
4246 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 4);
4247}
4248
4249
4250#define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3, a4) \
4251 off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
4252 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
4253
4254/** Emits code for IEM_MC_CALL_CIMPL_4. */
4255DECL_INLINE_THROW(uint32_t)
4256iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
4257 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
4258{
4259 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
4260 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
4261 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
4262 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
4263 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg4, 4 + IEM_CIMPL_HIDDEN_ARGS);
4264 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 5);
4265}
4266
4267
4268/** Recompiler debugging: Flush guest register shadow copies. */
4269#define IEM_MC_HINT_FLUSH_GUEST_SHADOW(g_fGstShwFlush) iemNativeRegFlushGuestShadows(pReNative, g_fGstShwFlush)
4270
4271
4272
4273/*********************************************************************************************************************************
4274* Emitters for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX *
4275*********************************************************************************************************************************/
4276
4277/**
4278 * Common worker for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX.
4279 */
4280DECL_INLINE_THROW(uint32_t)
4281iemNativeEmitCallAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
4282 uintptr_t pfnAImpl, uint8_t cArgs)
4283{
4284 if (idxVarRc != UINT8_MAX)
4285 {
4286 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRc);
4287 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarRc)];
4288 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
4289 AssertStmt(pVarRc->cbVar <= sizeof(uint64_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
4290 }
4291
4292 /*
4293 * Do all the call setup and cleanup.
4294 *
4295 * It is only required to flush pending guest register writes in call volatile registers as
4296 * assembly helpers can't throw and don't access anything living in CPUMCTX, they only
4297 * access parameters. The flushing of call volatile registers is always done in iemNativeEmitCallCommon()
4298 * no matter the fFlushPendingWrites parameter.
4299 */
4300 off = iemNativeEmitCallCommon(pReNative, off, cArgs, 0 /*cHiddenArgs*/, false /*fFlushPendingWrites*/);
4301
4302 /*
4303 * Make the call and update the return code variable if we've got one.
4304 */
4305 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnAImpl);
4306 if (idxVarRc != UINT8_MAX)
4307 iemNativeVarRegisterSet(pReNative, idxVarRc, IEMNATIVE_CALL_RET_GREG, off, false /*fAllocated*/);
4308
4309 return off;
4310}
4311
4312
4313
4314#define IEM_MC_CALL_VOID_AIMPL_0(a_pfn) \
4315 off = iemNativeEmitCallAImpl0(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn))
4316
4317#define IEM_MC_CALL_AIMPL_0(a_rc, a_pfn) \
4318 off = iemNativeEmitCallAImpl0(pReNative, off, a_rc, (uintptr_t)(a_pfn))
4319
4320/** Emits code for IEM_MC_CALL_VOID_AIMPL_0 and IEM_MC_CALL_AIMPL_0. */
4321DECL_INLINE_THROW(uint32_t)
4322iemNativeEmitCallAImpl0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl)
4323{
4324 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 0);
4325}
4326
4327
4328#define IEM_MC_CALL_VOID_AIMPL_1(a_pfn, a0) \
4329 off = iemNativeEmitCallAImpl1(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0)
4330
4331#define IEM_MC_CALL_AIMPL_1(a_rc, a_pfn, a0) \
4332 off = iemNativeEmitCallAImpl1(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0)
4333
4334/** Emits code for IEM_MC_CALL_VOID_AIMPL_1 and IEM_MC_CALL_AIMPL_1. */
4335DECL_INLINE_THROW(uint32_t)
4336iemNativeEmitCallAImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl, uint8_t idxArg0)
4337{
4338 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
4339 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 1);
4340}
4341
4342
4343#define IEM_MC_CALL_VOID_AIMPL_2(a_pfn, a0, a1) \
4344 off = iemNativeEmitCallAImpl2(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1)
4345
4346#define IEM_MC_CALL_AIMPL_2(a_rc, a_pfn, a0, a1) \
4347 off = iemNativeEmitCallAImpl2(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1)
4348
4349/** Emits code for IEM_MC_CALL_VOID_AIMPL_2 and IEM_MC_CALL_AIMPL_2. */
4350DECL_INLINE_THROW(uint32_t)
4351iemNativeEmitCallAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
4352 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
4353{
4354 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
4355 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
4356 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 2);
4357}
4358
4359
4360#define IEM_MC_CALL_VOID_AIMPL_3(a_pfn, a0, a1, a2) \
4361 off = iemNativeEmitCallAImpl3(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2)
4362
4363#define IEM_MC_CALL_AIMPL_3(a_rcType, a_rc, a_pfn, a0, a1, a2) \
4364 IEM_MC_LOCAL(a_rcType, a_rc); \
4365 off = iemNativeEmitCallAImpl3(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2)
4366
4367/** Emits code for IEM_MC_CALL_VOID_AIMPL_3 and IEM_MC_CALL_AIMPL_3. */
4368DECL_INLINE_THROW(uint32_t)
4369iemNativeEmitCallAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
4370 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
4371{
4372 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
4373 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
4374 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
4375 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 3);
4376}
4377
4378
4379#define IEM_MC_CALL_VOID_AIMPL_4(a_pfn, a0, a1, a2, a3) \
4380 off = iemNativeEmitCallAImpl4(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2, a3)
4381
4382#define IEM_MC_CALL_AIMPL_4(a_rcType, a_rc, a_pfn, a0, a1, a2, a3) \
4383 IEM_MC_LOCAL(a_rcType, a_rc); \
4384 off = iemNativeEmitCallAImpl4(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2, a3)
4385
4386/** Emits code for IEM_MC_CALL_VOID_AIMPL_4 and IEM_MC_CALL_AIMPL_4. */
4387DECL_INLINE_THROW(uint32_t)
4388iemNativeEmitCallAImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
4389 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
4390{
4391 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
4392 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
4393 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
4394 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3);
4395 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 4);
4396}
4397
4398
4399
4400/*********************************************************************************************************************************
4401* Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX). *
4402*********************************************************************************************************************************/
4403
4404#define IEM_MC_FETCH_GREG_U8_THREADED(a_u8Dst, a_iGRegEx) \
4405 off = iemNativeEmitFetchGregU8(pReNative, off, a_u8Dst, a_iGRegEx, sizeof(uint8_t) /*cbZeroExtended*/)
4406
4407#define IEM_MC_FETCH_GREG_U8_ZX_U16_THREADED(a_u16Dst, a_iGRegEx) \
4408 off = iemNativeEmitFetchGregU8(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t) /*cbZeroExtended*/)
4409
4410#define IEM_MC_FETCH_GREG_U8_ZX_U32_THREADED(a_u32Dst, a_iGRegEx) \
4411 off = iemNativeEmitFetchGregU8(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t) /*cbZeroExtended*/)
4412
4413#define IEM_MC_FETCH_GREG_U8_ZX_U64_THREADED(a_u64Dst, a_iGRegEx) \
4414 off = iemNativeEmitFetchGregU8(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t) /*cbZeroExtended*/)
4415
4416
4417/** Emits code for IEM_MC_FETCH_GREG_U8_THREADED and
4418 * IEM_MC_FETCH_GREG_U8_ZX_U16/32/64_THREADED. */
4419DECL_INLINE_THROW(uint32_t)
4420iemNativeEmitFetchGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, int8_t cbZeroExtended)
4421{
4422 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4423 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
4424 Assert(iGRegEx < 20);
4425
4426 /* Same discussion as in iemNativeEmitFetchGregU16 */
4427 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
4428 kIemNativeGstRegUse_ReadOnly);
4429
4430 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4431 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4432
4433 /* The value is zero-extended to the full 64-bit host register width. */
4434 if (iGRegEx < 16)
4435 off = iemNativeEmitLoadGprFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
4436 else
4437 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
4438
4439 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4440 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4441 return off;
4442}
4443
4444
4445#define IEM_MC_FETCH_GREG_U8_SX_U16_THREADED(a_u16Dst, a_iGRegEx) \
4446 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t))
4447
4448#define IEM_MC_FETCH_GREG_U8_SX_U32_THREADED(a_u32Dst, a_iGRegEx) \
4449 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t))
4450
4451#define IEM_MC_FETCH_GREG_U8_SX_U64_THREADED(a_u64Dst, a_iGRegEx) \
4452 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t))
4453
4454/** Emits code for IEM_MC_FETCH_GREG_U8_SX_U16/32/64_THREADED. */
4455DECL_INLINE_THROW(uint32_t)
4456iemNativeEmitFetchGregU8Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, uint8_t cbSignExtended)
4457{
4458 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4459 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
4460 Assert(iGRegEx < 20);
4461
4462 /* Same discussion as in iemNativeEmitFetchGregU16 */
4463 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
4464 kIemNativeGstRegUse_ReadOnly);
4465
4466 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4467 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4468
4469 if (iGRegEx < 16)
4470 {
4471 switch (cbSignExtended)
4472 {
4473 case sizeof(uint16_t):
4474 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
4475 break;
4476 case sizeof(uint32_t):
4477 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
4478 break;
4479 case sizeof(uint64_t):
4480 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
4481 break;
4482 default: AssertFailed(); break;
4483 }
4484 }
4485 else
4486 {
4487 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
4488 switch (cbSignExtended)
4489 {
4490 case sizeof(uint16_t):
4491 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
4492 break;
4493 case sizeof(uint32_t):
4494 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
4495 break;
4496 case sizeof(uint64_t):
4497 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
4498 break;
4499 default: AssertFailed(); break;
4500 }
4501 }
4502
4503 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4504 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4505 return off;
4506}
4507
4508
4509
4510#define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
4511 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint16_t))
4512
4513#define IEM_MC_FETCH_GREG_U16_ZX_U32(a_u16Dst, a_iGReg) \
4514 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
4515
4516#define IEM_MC_FETCH_GREG_U16_ZX_U64(a_u16Dst, a_iGReg) \
4517 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
4518
4519/** Emits code for IEM_MC_FETCH_GREG_U16 and IEM_MC_FETCH_GREG_U16_ZX_U32/64. */
4520DECL_INLINE_THROW(uint32_t)
4521iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
4522{
4523 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4524 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
4525 Assert(iGReg < 16);
4526
4527 /*
4528 * We can either just load the low 16-bit of the GPR into a host register
4529 * for the variable, or we can do so via a shadow copy host register. The
4530 * latter will avoid having to reload it if it's being stored later, but
4531 * will waste a host register if it isn't touched again. Since we don't
4532 * know what going to happen, we choose the latter for now.
4533 */
4534 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4535 kIemNativeGstRegUse_ReadOnly);
4536
4537 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4538 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4539 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4540 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4541
4542 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4543 return off;
4544}
4545
4546#define IEM_MC_FETCH_GREG_I16(a_i16Dst, a_iGReg) \
4547 off = iemNativeEmitFetchGregI16(pReNative, off, a_i16Dst, a_iGReg)
4548
4549/** Emits code for IEM_MC_FETCH_GREG_I16. */
4550DECL_INLINE_THROW(uint32_t)
4551iemNativeEmitFetchGregI16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
4552{
4553 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4554 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(int16_t));
4555 Assert(iGReg < 16);
4556
4557 /*
4558 * We can either just load the low 16-bit of the GPR into a host register
4559 * for the variable, or we can do so via a shadow copy host register. The
4560 * latter will avoid having to reload it if it's being stored later, but
4561 * will waste a host register if it isn't touched again. Since we don't
4562 * know what going to happen, we choose the latter for now.
4563 */
4564 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4565 kIemNativeGstRegUse_ReadOnly);
4566
4567 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4568 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4569#ifdef RT_ARCH_AMD64
4570 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4571#elif defined(RT_ARCH_ARM64) /* Note! There are no 16-bit registers on ARM, we emulate that through 32-bit registers which requires sign extension. */
4572 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4573#endif
4574 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4575
4576 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4577 return off;
4578}
4579
4580
4581#define IEM_MC_FETCH_GREG_U16_SX_U32(a_u16Dst, a_iGReg) \
4582 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
4583
4584#define IEM_MC_FETCH_GREG_U16_SX_U64(a_u16Dst, a_iGReg) \
4585 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
4586
4587/** Emits code for IEM_MC_FETCH_GREG_U16_SX_U32/64. */
4588DECL_INLINE_THROW(uint32_t)
4589iemNativeEmitFetchGregU16Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbSignExtended)
4590{
4591 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4592 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
4593 Assert(iGReg < 16);
4594
4595 /*
4596 * We can either just load the low 16-bit of the GPR into a host register
4597 * for the variable, or we can do so via a shadow copy host register. The
4598 * latter will avoid having to reload it if it's being stored later, but
4599 * will waste a host register if it isn't touched again. Since we don't
4600 * know what going to happen, we choose the latter for now.
4601 */
4602 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4603 kIemNativeGstRegUse_ReadOnly);
4604
4605 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4606 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4607 if (cbSignExtended == sizeof(uint32_t))
4608 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4609 else
4610 {
4611 Assert(cbSignExtended == sizeof(uint64_t));
4612 off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4613 }
4614 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4615
4616 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4617 return off;
4618}
4619
4620
4621#define IEM_MC_FETCH_GREG_I32(a_i32Dst, a_iGReg) \
4622 off = iemNativeEmitFetchGregU32(pReNative, off, a_i32Dst, a_iGReg, sizeof(uint32_t))
4623
4624#define IEM_MC_FETCH_GREG_U32(a_u32Dst, a_iGReg) \
4625 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint32_t))
4626
4627#define IEM_MC_FETCH_GREG_U32_ZX_U64(a_u32Dst, a_iGReg) \
4628 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint64_t))
4629
4630/** Emits code for IEM_MC_FETCH_GREG_U32. */
4631DECL_INLINE_THROW(uint32_t)
4632iemNativeEmitFetchGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
4633{
4634 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4635 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
4636 Assert(iGReg < 16);
4637
4638 /*
4639 * We can either just load the low 16-bit of the GPR into a host register
4640 * for the variable, or we can do so via a shadow copy host register. The
4641 * latter will avoid having to reload it if it's being stored later, but
4642 * will waste a host register if it isn't touched again. Since we don't
4643 * know what going to happen, we choose the latter for now.
4644 */
4645 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4646 kIemNativeGstRegUse_ReadOnly);
4647
4648 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4649 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4650 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
4651 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4652
4653 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4654 return off;
4655}
4656
4657
4658#define IEM_MC_FETCH_GREG_U32_SX_U64(a_u32Dst, a_iGReg) \
4659 off = iemNativeEmitFetchGregU32SxU64(pReNative, off, a_u32Dst, a_iGReg)
4660
4661/** Emits code for IEM_MC_FETCH_GREG_U32. */
4662DECL_INLINE_THROW(uint32_t)
4663iemNativeEmitFetchGregU32SxU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
4664{
4665 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4666 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
4667 Assert(iGReg < 16);
4668
4669 /*
4670 * We can either just load the low 32-bit of the GPR into a host register
4671 * for the variable, or we can do so via a shadow copy host register. The
4672 * latter will avoid having to reload it if it's being stored later, but
4673 * will waste a host register if it isn't touched again. Since we don't
4674 * know what going to happen, we choose the latter for now.
4675 */
4676 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4677 kIemNativeGstRegUse_ReadOnly);
4678
4679 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4680 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4681 off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
4682 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4683
4684 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4685 return off;
4686}
4687
4688
4689#define IEM_MC_FETCH_GREG_U64(a_u64Dst, a_iGReg) \
4690 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
4691
4692#define IEM_MC_FETCH_GREG_U64_ZX_U64(a_u64Dst, a_iGReg) \
4693 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
4694
4695/** Emits code for IEM_MC_FETCH_GREG_U64 (and the
4696 * IEM_MC_FETCH_GREG_U64_ZX_U64 alias). */
4697DECL_INLINE_THROW(uint32_t)
4698iemNativeEmitFetchGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
4699{
4700 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4701 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
4702 Assert(iGReg < 16);
4703
4704 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4705 kIemNativeGstRegUse_ReadOnly);
4706
4707 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4708 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4709 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxVarReg, idxGstFullReg);
4710 /** @todo name the register a shadow one already? */
4711 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4712
4713 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4714 return off;
4715}
4716
4717
4718#define IEM_MC_FETCH_GREG_PAIR_U64(a_u128Dst, a_iGRegLo, a_iGRegHi) \
4719 off = iemNativeEmitFetchGregPairU64(pReNative, off, a_u128Dst, a_iGRegLo, a_iGRegHi)
4720
4721/** Emits code for IEM_MC_FETCH_GREG_PAIR_U64. */
4722DECL_INLINE_THROW(uint32_t)
4723iemNativeEmitFetchGregPairU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegLo, uint8_t iGRegHi)
4724{
4725 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4726 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
4727 Assert(iGRegLo < 16 && iGRegHi < 16);
4728
4729 uint8_t const idxGstFullRegLo = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegLo),
4730 kIemNativeGstRegUse_ReadOnly);
4731 uint8_t const idxGstFullRegHi = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegHi),
4732 kIemNativeGstRegUse_ReadOnly);
4733
4734 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4735 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
4736 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxVarReg, idxGstFullRegLo, 0);
4737 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxVarReg, idxGstFullRegHi, 1);
4738
4739 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
4740 iemNativeRegFreeTmp(pReNative, idxGstFullRegLo);
4741 iemNativeRegFreeTmp(pReNative, idxGstFullRegHi);
4742 return off;
4743}
4744
4745
4746/*********************************************************************************************************************************
4747* Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX). *
4748*********************************************************************************************************************************/
4749
4750#define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
4751 off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
4752
4753/** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
4754DECL_INLINE_THROW(uint32_t)
4755iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
4756{
4757 Assert(iGRegEx < 20);
4758 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
4759 kIemNativeGstRegUse_ForUpdate);
4760#ifdef RT_ARCH_AMD64
4761 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
4762
4763 /* To the lowest byte of the register: mov r8, imm8 */
4764 if (iGRegEx < 16)
4765 {
4766 if (idxGstTmpReg >= 8)
4767 pbCodeBuf[off++] = X86_OP_REX_B;
4768 else if (idxGstTmpReg >= 4)
4769 pbCodeBuf[off++] = X86_OP_REX;
4770 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
4771 pbCodeBuf[off++] = u8Value;
4772 }
4773 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
4774 else if (idxGstTmpReg < 4)
4775 {
4776 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
4777 pbCodeBuf[off++] = u8Value;
4778 }
4779 else
4780 {
4781 /* ror reg64, 8 */
4782 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4783 pbCodeBuf[off++] = 0xc1;
4784 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4785 pbCodeBuf[off++] = 8;
4786
4787 /* mov reg8, imm8 */
4788 if (idxGstTmpReg >= 8)
4789 pbCodeBuf[off++] = X86_OP_REX_B;
4790 else if (idxGstTmpReg >= 4)
4791 pbCodeBuf[off++] = X86_OP_REX;
4792 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
4793 pbCodeBuf[off++] = u8Value;
4794
4795 /* rol reg64, 8 */
4796 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4797 pbCodeBuf[off++] = 0xc1;
4798 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4799 pbCodeBuf[off++] = 8;
4800 }
4801
4802#elif defined(RT_ARCH_ARM64)
4803 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
4804 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4805 if (iGRegEx < 16)
4806 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
4807 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
4808 else
4809 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
4810 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
4811 iemNativeRegFreeTmp(pReNative, idxImmReg);
4812
4813#else
4814# error "Port me!"
4815#endif
4816
4817 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4818
4819#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4820 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
4821#endif
4822
4823 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4824 return off;
4825}
4826
4827
4828#define IEM_MC_STORE_GREG_U8_THREADED(a_iGRegEx, a_u8Value) \
4829 off = iemNativeEmitStoreGregU8(pReNative, off, a_iGRegEx, a_u8Value)
4830
4831/** Emits code for IEM_MC_STORE_GREG_U8_THREADED. */
4832DECL_INLINE_THROW(uint32_t)
4833iemNativeEmitStoreGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t idxValueVar)
4834{
4835 Assert(iGRegEx < 20);
4836 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
4837
4838 /*
4839 * If it's a constant value (unlikely) we treat this as a
4840 * IEM_MC_STORE_GREG_U8_CONST statement.
4841 */
4842 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
4843 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
4844 { /* likely */ }
4845 else
4846 {
4847 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
4848 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4849 return iemNativeEmitStoreGregU8Const(pReNative, off, iGRegEx, (uint8_t)pValueVar->u.uValue);
4850 }
4851
4852 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
4853 kIemNativeGstRegUse_ForUpdate);
4854 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxValueVar, &off);
4855
4856#ifdef RT_ARCH_AMD64
4857 /* To the lowest byte of the register: mov reg8, reg8(r/m) */
4858 if (iGRegEx < 16)
4859 {
4860 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4861 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
4862 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
4863 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
4864 pbCodeBuf[off++] = X86_OP_REX;
4865 pbCodeBuf[off++] = 0x8a;
4866 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
4867 }
4868 /* Otherwise it's to ah, ch, dh or bh from al, cl, dl or bl: use mov r8, r8 if we can, otherwise, we rotate. */
4869 else if (idxGstTmpReg < 4 && idxVarReg < 4)
4870 {
4871 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2+1);
4872 pbCodeBuf[off++] = 0x8a;
4873 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg + 4, idxVarReg);
4874 }
4875 else
4876 {
4877 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
4878
4879 /* ror reg64, 8 */
4880 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4881 pbCodeBuf[off++] = 0xc1;
4882 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4883 pbCodeBuf[off++] = 8;
4884
4885 /* mov reg8, reg8(r/m) */
4886 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
4887 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
4888 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
4889 pbCodeBuf[off++] = X86_OP_REX;
4890 pbCodeBuf[off++] = 0x8a;
4891 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
4892
4893 /* rol reg64, 8 */
4894 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4895 pbCodeBuf[off++] = 0xc1;
4896 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4897 pbCodeBuf[off++] = 8;
4898 }
4899
4900#elif defined(RT_ARCH_ARM64)
4901 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 7:0.
4902 or
4903 bfi w1, w2, 8, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 15:8. */
4904 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4905 if (iGRegEx < 16)
4906 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 8);
4907 else
4908 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 8, 8);
4909
4910#else
4911# error "Port me!"
4912#endif
4913 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4914
4915 iemNativeVarRegisterRelease(pReNative, idxValueVar);
4916
4917#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4918 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
4919#endif
4920 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4921 return off;
4922}
4923
4924
4925
4926#define IEM_MC_STORE_GREG_U16_CONST(a_iGReg, a_u16Const) \
4927 off = iemNativeEmitStoreGregU16Const(pReNative, off, a_iGReg, a_u16Const)
4928
4929/** Emits code for IEM_MC_STORE_GREG_U16. */
4930DECL_INLINE_THROW(uint32_t)
4931iemNativeEmitStoreGregU16Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint16_t uValue)
4932{
4933 Assert(iGReg < 16);
4934 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4935 kIemNativeGstRegUse_ForUpdate);
4936#ifdef RT_ARCH_AMD64
4937 /* mov reg16, imm16 */
4938 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
4939 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4940 if (idxGstTmpReg >= 8)
4941 pbCodeBuf[off++] = X86_OP_REX_B;
4942 pbCodeBuf[off++] = 0xb8 + (idxGstTmpReg & 7);
4943 pbCodeBuf[off++] = RT_BYTE1(uValue);
4944 pbCodeBuf[off++] = RT_BYTE2(uValue);
4945
4946#elif defined(RT_ARCH_ARM64)
4947 /* movk xdst, #uValue, lsl #0 */
4948 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4949 pu32CodeBuf[off++] = Armv8A64MkInstrMovK(idxGstTmpReg, uValue);
4950
4951#else
4952# error "Port me!"
4953#endif
4954
4955 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4956
4957#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4958 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4959#endif
4960 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4961 return off;
4962}
4963
4964
4965#define IEM_MC_STORE_GREG_U16(a_iGReg, a_u16Value) \
4966 off = iemNativeEmitStoreGregU16(pReNative, off, a_iGReg, a_u16Value)
4967
4968/** Emits code for IEM_MC_STORE_GREG_U16. */
4969DECL_INLINE_THROW(uint32_t)
4970iemNativeEmitStoreGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
4971{
4972 Assert(iGReg < 16);
4973 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
4974
4975 /*
4976 * If it's a constant value (unlikely) we treat this as a
4977 * IEM_MC_STORE_GREG_U16_CONST statement.
4978 */
4979 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
4980 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
4981 { /* likely */ }
4982 else
4983 {
4984 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
4985 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4986 return iemNativeEmitStoreGregU16Const(pReNative, off, iGReg, (uint16_t)pValueVar->u.uValue);
4987 }
4988
4989 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4990 kIemNativeGstRegUse_ForUpdate);
4991
4992#ifdef RT_ARCH_AMD64
4993 /* mov reg16, reg16 or [mem16] */
4994 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
4995 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4996 if (pValueVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
4997 {
4998 if (idxGstTmpReg >= 8 || pValueVar->idxReg >= 8)
4999 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0)
5000 | (pValueVar->idxReg >= 8 ? X86_OP_REX_B : 0);
5001 pbCodeBuf[off++] = 0x8b;
5002 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, pValueVar->idxReg & 7);
5003 }
5004 else
5005 {
5006 uint8_t const idxStackSlot = pValueVar->idxStackSlot;
5007 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
5008 if (idxGstTmpReg >= 8)
5009 pbCodeBuf[off++] = X86_OP_REX_R;
5010 pbCodeBuf[off++] = 0x8b;
5011 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, idxGstTmpReg, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
5012 }
5013
5014#elif defined(RT_ARCH_ARM64)
5015 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
5016 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxValueVar, &off);
5017 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5018 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 16);
5019 iemNativeVarRegisterRelease(pReNative, idxValueVar);
5020
5021#else
5022# error "Port me!"
5023#endif
5024
5025 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5026
5027#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5028 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5029#endif
5030 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5031 return off;
5032}
5033
5034
5035#define IEM_MC_STORE_GREG_U32_CONST(a_iGReg, a_u32Const) \
5036 off = iemNativeEmitStoreGregU32Const(pReNative, off, a_iGReg, a_u32Const)
5037
5038/** Emits code for IEM_MC_STORE_GREG_U32_CONST. */
5039DECL_INLINE_THROW(uint32_t)
5040iemNativeEmitStoreGregU32Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint32_t uValue)
5041{
5042 Assert(iGReg < 16);
5043 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5044 kIemNativeGstRegUse_ForFullWrite);
5045 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
5046#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5047 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5048#endif
5049 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5050 return off;
5051}
5052
5053
5054#define IEM_MC_STORE_GREG_U32(a_iGReg, a_u32Value) \
5055 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_u32Value)
5056
5057#define IEM_MC_STORE_GREG_I32(a_iGReg, a_i32Value) \
5058 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_i32Value)
5059
5060/** Emits code for IEM_MC_STORE_GREG_U32/IEM_MC_STORE_GREG_I32. */
5061DECL_INLINE_THROW(uint32_t)
5062iemNativeEmitStoreGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
5063{
5064 Assert(iGReg < 16);
5065 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
5066
5067 /*
5068 * If it's a constant value (unlikely) we treat this as a
5069 * IEM_MC_STORE_GREG_U32_CONST statement.
5070 */
5071 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
5072 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
5073 { /* likely */ }
5074 else
5075 {
5076 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
5077 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
5078 return iemNativeEmitStoreGregU32Const(pReNative, off, iGReg, (uint32_t)pValueVar->u.uValue);
5079 }
5080
5081 /*
5082 * For the rest we allocate a guest register for the variable and writes
5083 * it to the CPUMCTX structure.
5084 */
5085 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
5086#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5087 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5088#else
5089 RT_NOREF(idxVarReg);
5090#endif
5091#ifdef VBOX_STRICT
5092 off = iemNativeEmitTop32BitsClearCheck(pReNative, off, idxVarReg);
5093#endif
5094 iemNativeVarRegisterRelease(pReNative, idxValueVar);
5095 return off;
5096}
5097
5098
5099#define IEM_MC_STORE_GREG_U64_CONST(a_iGReg, a_u64Const) \
5100 off = iemNativeEmitStoreGregU64Const(pReNative, off, a_iGReg, a_u64Const)
5101
5102/** Emits code for IEM_MC_STORE_GREG_U64_CONST. */
5103DECL_INLINE_THROW(uint32_t)
5104iemNativeEmitStoreGregU64Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uValue)
5105{
5106 Assert(iGReg < 16);
5107 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5108 kIemNativeGstRegUse_ForFullWrite);
5109 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
5110#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5111 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5112#endif
5113 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5114 return off;
5115}
5116
5117
5118#define IEM_MC_STORE_GREG_U64(a_iGReg, a_u64Value) \
5119 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_u64Value)
5120
5121#define IEM_MC_STORE_GREG_I64(a_iGReg, a_i64Value) \
5122 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_i64Value)
5123
5124/** Emits code for IEM_MC_STORE_GREG_U64. */
5125DECL_INLINE_THROW(uint32_t)
5126iemNativeEmitStoreGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
5127{
5128 Assert(iGReg < 16);
5129 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
5130
5131 /*
5132 * If it's a constant value (unlikely) we treat this as a
5133 * IEM_MC_STORE_GREG_U64_CONST statement.
5134 */
5135 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
5136 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
5137 { /* likely */ }
5138 else
5139 {
5140 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
5141 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
5142 return iemNativeEmitStoreGregU64Const(pReNative, off, iGReg, pValueVar->u.uValue);
5143 }
5144
5145 /*
5146 * For the rest we allocate a guest register for the variable and writes
5147 * it to the CPUMCTX structure.
5148 */
5149 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
5150#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5151 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5152#else
5153 RT_NOREF(idxVarReg);
5154#endif
5155 iemNativeVarRegisterRelease(pReNative, idxValueVar);
5156 return off;
5157}
5158
5159
5160#define IEM_MC_CLEAR_HIGH_GREG_U64(a_iGReg) \
5161 off = iemNativeEmitClearHighGregU64(pReNative, off, a_iGReg)
5162
5163/** Emits code for IEM_MC_CLEAR_HIGH_GREG_U64. */
5164DECL_INLINE_THROW(uint32_t)
5165iemNativeEmitClearHighGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg)
5166{
5167 Assert(iGReg < 16);
5168 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5169 kIemNativeGstRegUse_ForUpdate);
5170 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxGstTmpReg, idxGstTmpReg);
5171#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5172 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5173#endif
5174 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5175 return off;
5176}
5177
5178
5179#define IEM_MC_STORE_GREG_PAIR_U64(a_iGRegLo, a_iGRegHi, a_u128Value) \
5180 off = iemNativeEmitStoreGregPairU64(pReNative, off, a_iGRegLo, a_iGRegHi, a_u128Value)
5181
5182/** Emits code for IEM_MC_FETCH_GREG_PAIR_U64. */
5183DECL_INLINE_THROW(uint32_t)
5184iemNativeEmitStoreGregPairU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegLo, uint8_t iGRegHi, uint8_t idxDstVar)
5185{
5186 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
5187 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
5188 Assert(iGRegLo < 16 && iGRegHi < 16);
5189
5190 uint8_t const idxGstFullRegLo = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegLo),
5191 kIemNativeGstRegUse_ForFullWrite);
5192 uint8_t const idxGstFullRegHi = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegHi),
5193 kIemNativeGstRegUse_ForFullWrite);
5194
5195 iemNativeVarSetKindToStack(pReNative, idxDstVar);
5196 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
5197 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxGstFullRegLo, idxVarReg, 0);
5198 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxGstFullRegHi, idxVarReg, 1);
5199
5200 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
5201 iemNativeRegFreeTmp(pReNative, idxGstFullRegLo);
5202 iemNativeRegFreeTmp(pReNative, idxGstFullRegHi);
5203 return off;
5204}
5205
5206
5207/*********************************************************************************************************************************
5208* General purpose register manipulation (add, sub). *
5209*********************************************************************************************************************************/
5210
5211#define IEM_MC_ADD_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
5212 off = iemNativeEmitAddGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
5213
5214/** Emits code for IEM_MC_ADD_GREG_U16. */
5215DECL_INLINE_THROW(uint32_t)
5216iemNativeEmitAddGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend)
5217{
5218 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5219 kIemNativeGstRegUse_ForUpdate);
5220
5221#ifdef RT_ARCH_AMD64
5222 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
5223 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5224 if (idxGstTmpReg >= 8)
5225 pbCodeBuf[off++] = X86_OP_REX_B;
5226 if (uAddend == 1)
5227 {
5228 pbCodeBuf[off++] = 0xff; /* inc */
5229 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5230 }
5231 else
5232 {
5233 pbCodeBuf[off++] = 0x81;
5234 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5235 pbCodeBuf[off++] = uAddend;
5236 pbCodeBuf[off++] = 0;
5237 }
5238
5239#else
5240 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5241 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5242
5243 /* sub tmp, gstgrp, uAddend */
5244 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxTmpReg, idxGstTmpReg, uAddend, false /*f64Bit*/);
5245
5246 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
5247 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
5248
5249 iemNativeRegFreeTmp(pReNative, idxTmpReg);
5250#endif
5251
5252 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5253
5254#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5255 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5256#endif
5257
5258 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5259 return off;
5260}
5261
5262
5263#define IEM_MC_ADD_GREG_U32(a_iGReg, a_u8Const) \
5264 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
5265
5266#define IEM_MC_ADD_GREG_U64(a_iGReg, a_u8Const) \
5267 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
5268
5269/** Emits code for IEM_MC_ADD_GREG_U32 and IEM_MC_ADD_GREG_U64. */
5270DECL_INLINE_THROW(uint32_t)
5271iemNativeEmitAddGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend, bool f64Bit)
5272{
5273 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5274 kIemNativeGstRegUse_ForUpdate);
5275
5276#ifdef RT_ARCH_AMD64
5277 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
5278 if (f64Bit)
5279 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
5280 else if (idxGstTmpReg >= 8)
5281 pbCodeBuf[off++] = X86_OP_REX_B;
5282 if (uAddend == 1)
5283 {
5284 pbCodeBuf[off++] = 0xff; /* inc */
5285 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5286 }
5287 else if (uAddend < 128)
5288 {
5289 pbCodeBuf[off++] = 0x83; /* add */
5290 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5291 pbCodeBuf[off++] = RT_BYTE1(uAddend);
5292 }
5293 else
5294 {
5295 pbCodeBuf[off++] = 0x81; /* add */
5296 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5297 pbCodeBuf[off++] = RT_BYTE1(uAddend);
5298 pbCodeBuf[off++] = 0;
5299 pbCodeBuf[off++] = 0;
5300 pbCodeBuf[off++] = 0;
5301 }
5302
5303#else
5304 /* sub tmp, gstgrp, uAddend */
5305 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5306 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGstTmpReg, idxGstTmpReg, uAddend, f64Bit);
5307
5308#endif
5309
5310 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5311
5312#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5313 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5314#endif
5315
5316 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5317 return off;
5318}
5319
5320
5321
5322#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
5323 off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
5324
5325/** Emits code for IEM_MC_SUB_GREG_U16. */
5326DECL_INLINE_THROW(uint32_t)
5327iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
5328{
5329 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5330 kIemNativeGstRegUse_ForUpdate);
5331
5332#ifdef RT_ARCH_AMD64
5333 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
5334 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5335 if (idxGstTmpReg >= 8)
5336 pbCodeBuf[off++] = X86_OP_REX_B;
5337 if (uSubtrahend == 1)
5338 {
5339 pbCodeBuf[off++] = 0xff; /* dec */
5340 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
5341 }
5342 else
5343 {
5344 pbCodeBuf[off++] = 0x81;
5345 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
5346 pbCodeBuf[off++] = uSubtrahend;
5347 pbCodeBuf[off++] = 0;
5348 }
5349
5350#else
5351 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5352 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5353
5354 /* sub tmp, gstgrp, uSubtrahend */
5355 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
5356
5357 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
5358 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
5359
5360 iemNativeRegFreeTmp(pReNative, idxTmpReg);
5361#endif
5362
5363 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5364
5365#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5366 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5367#endif
5368
5369 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5370 return off;
5371}
5372
5373
5374#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
5375 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
5376
5377#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
5378 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
5379
5380/** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
5381DECL_INLINE_THROW(uint32_t)
5382iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
5383{
5384 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5385 kIemNativeGstRegUse_ForUpdate);
5386
5387#ifdef RT_ARCH_AMD64
5388 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
5389 if (f64Bit)
5390 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
5391 else if (idxGstTmpReg >= 8)
5392 pbCodeBuf[off++] = X86_OP_REX_B;
5393 if (uSubtrahend == 1)
5394 {
5395 pbCodeBuf[off++] = 0xff; /* dec */
5396 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
5397 }
5398 else if (uSubtrahend < 128)
5399 {
5400 pbCodeBuf[off++] = 0x83; /* sub */
5401 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
5402 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
5403 }
5404 else
5405 {
5406 pbCodeBuf[off++] = 0x81; /* sub */
5407 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
5408 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
5409 pbCodeBuf[off++] = 0;
5410 pbCodeBuf[off++] = 0;
5411 pbCodeBuf[off++] = 0;
5412 }
5413
5414#else
5415 /* sub tmp, gstgrp, uSubtrahend */
5416 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5417 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
5418
5419#endif
5420
5421 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5422
5423#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5424 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5425#endif
5426
5427 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5428 return off;
5429}
5430
5431
5432#define IEM_MC_AND_GREG_U8(a_iGReg, a_u8Mask) \
5433 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u8Mask, sizeof(uint8_t))
5434
5435#define IEM_MC_AND_GREG_U16(a_iGReg, a_u16Mask) \
5436 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u16Mask, sizeof(uint16_t))
5437
5438#define IEM_MC_AND_GREG_U32(a_iGReg, a_u32Mask) \
5439 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u32Mask, sizeof(uint32_t))
5440
5441#define IEM_MC_AND_GREG_U64(a_iGReg, a_u64Mask) \
5442 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u64Mask, sizeof(uint64_t))
5443
5444/** Emits code for IEM_MC_AND_GREG_U8, IEM_MC_AND_GREG_U16, IEM_MC_AND_GREG_U32 and IEM_MC_AND_GREG_U64. */
5445DECL_INLINE_THROW(uint32_t)
5446iemNativeEmitAndGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uMask, uint8_t cbMask)
5447{
5448#ifdef VBOX_STRICT
5449 switch (cbMask)
5450 {
5451 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
5452 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
5453 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
5454 case sizeof(uint64_t): break;
5455 default: AssertFailedBreak();
5456 }
5457#endif
5458
5459 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5460 kIemNativeGstRegUse_ForUpdate);
5461
5462 switch (cbMask)
5463 {
5464 case sizeof(uint8_t): /* Leaves the higher bits untouched. */
5465 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask | UINT64_C(0xffffffffffffff00));
5466 break;
5467 case sizeof(uint16_t): /* Leaves the higher bits untouched. */
5468 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask | UINT64_C(0xffffffffffff0000));
5469 break;
5470 case sizeof(uint32_t): /* Zeroes the high 32 bits of the guest register. */
5471 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxGstTmpReg, uMask);
5472 break;
5473 case sizeof(uint64_t):
5474 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask);
5475 break;
5476 default: AssertFailedBreak();
5477 }
5478
5479 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5480
5481#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5482 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5483#endif
5484
5485 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5486 return off;
5487}
5488
5489
5490#define IEM_MC_OR_GREG_U8(a_iGReg, a_u8Mask) \
5491 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u8Mask, sizeof(uint8_t))
5492
5493#define IEM_MC_OR_GREG_U16(a_iGReg, a_u16Mask) \
5494 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u16Mask, sizeof(uint16_t))
5495
5496#define IEM_MC_OR_GREG_U32(a_iGReg, a_u32Mask) \
5497 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u32Mask, sizeof(uint32_t))
5498
5499#define IEM_MC_OR_GREG_U64(a_iGReg, a_u64Mask) \
5500 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u64Mask, sizeof(uint64_t))
5501
5502/** Emits code for IEM_MC_OR_GREG_U8, IEM_MC_OR_GREG_U16, IEM_MC_OR_GREG_U32 and IEM_MC_OR_GREG_U64. */
5503DECL_INLINE_THROW(uint32_t)
5504iemNativeEmitOrGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uMask, uint8_t cbMask)
5505{
5506#ifdef VBOX_STRICT
5507 switch (cbMask)
5508 {
5509 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
5510 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
5511 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
5512 case sizeof(uint64_t): break;
5513 default: AssertFailedBreak();
5514 }
5515#endif
5516
5517 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5518 kIemNativeGstRegUse_ForUpdate);
5519
5520 switch (cbMask)
5521 {
5522 case sizeof(uint8_t): /* Leaves the higher bits untouched. */
5523 case sizeof(uint16_t):
5524 case sizeof(uint64_t):
5525 off = iemNativeEmitOrGprByImm(pReNative, off, idxGstTmpReg, uMask);
5526 break;
5527 case sizeof(uint32_t): /* Zeroes the high 32 bits of the guest register. */
5528 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxGstTmpReg, uMask);
5529 break;
5530 default: AssertFailedBreak();
5531 }
5532
5533 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5534
5535#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5536 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5537#endif
5538
5539 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5540 return off;
5541}
5542
5543
5544/*********************************************************************************************************************************
5545* Local/Argument variable manipulation (add, sub, and, or). *
5546*********************************************************************************************************************************/
5547
5548#define IEM_MC_AND_LOCAL_U8(a_u8Local, a_u8Mask) \
5549 off = iemNativeEmitAndLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
5550
5551#define IEM_MC_AND_LOCAL_U16(a_u16Local, a_u16Mask) \
5552 off = iemNativeEmitAndLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
5553
5554#define IEM_MC_AND_LOCAL_U32(a_u32Local, a_u32Mask) \
5555 off = iemNativeEmitAndLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
5556
5557#define IEM_MC_AND_LOCAL_U64(a_u64Local, a_u64Mask) \
5558 off = iemNativeEmitAndLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
5559
5560
5561#define IEM_MC_AND_ARG_U16(a_u16Arg, a_u16Mask) \
5562 off = iemNativeEmitAndLocal(pReNative, off, a_u16Arg, a_u16Mask, sizeof(uint16_t))
5563
5564#define IEM_MC_AND_ARG_U32(a_u32Arg, a_u32Mask) \
5565 off = iemNativeEmitAndLocal(pReNative, off, a_u32Arg, a_u32Mask, sizeof(uint32_t))
5566
5567#define IEM_MC_AND_ARG_U64(a_u64Arg, a_u64Mask) \
5568 off = iemNativeEmitAndLocal(pReNative, off, a_u64Arg, a_u64Mask, sizeof(uint64_t))
5569
5570/** Emits code for AND'ing a local and a constant value. */
5571DECL_INLINE_THROW(uint32_t)
5572iemNativeEmitAndLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
5573{
5574#ifdef VBOX_STRICT
5575 switch (cbMask)
5576 {
5577 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
5578 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
5579 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
5580 case sizeof(uint64_t): break;
5581 default: AssertFailedBreak();
5582 }
5583#endif
5584
5585 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxVar, &off);
5586 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
5587
5588 if (cbMask <= sizeof(uint32_t))
5589 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg, uMask);
5590 else
5591 off = iemNativeEmitAndGprByImm(pReNative, off, idxVarReg, uMask);
5592
5593 iemNativeVarRegisterRelease(pReNative, idxVar);
5594 return off;
5595}
5596
5597
5598#define IEM_MC_OR_LOCAL_U8(a_u8Local, a_u8Mask) \
5599 off = iemNativeEmitOrLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
5600
5601#define IEM_MC_OR_LOCAL_U16(a_u16Local, a_u16Mask) \
5602 off = iemNativeEmitOrLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
5603
5604#define IEM_MC_OR_LOCAL_U32(a_u32Local, a_u32Mask) \
5605 off = iemNativeEmitOrLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
5606
5607#define IEM_MC_OR_LOCAL_U64(a_u64Local, a_u64Mask) \
5608 off = iemNativeEmitOrLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
5609
5610/** Emits code for OR'ing a local and a constant value. */
5611DECL_INLINE_THROW(uint32_t)
5612iemNativeEmitOrLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
5613{
5614#ifdef VBOX_STRICT
5615 switch (cbMask)
5616 {
5617 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
5618 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
5619 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
5620 case sizeof(uint64_t): break;
5621 default: AssertFailedBreak();
5622 }
5623#endif
5624
5625 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxVar, &off);
5626 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
5627
5628 if (cbMask <= sizeof(uint32_t))
5629 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxVarReg, uMask);
5630 else
5631 off = iemNativeEmitOrGprByImm(pReNative, off, idxVarReg, uMask);
5632
5633 iemNativeVarRegisterRelease(pReNative, idxVar);
5634 return off;
5635}
5636
5637
5638#define IEM_MC_BSWAP_LOCAL_U16(a_u16Local) \
5639 off = iemNativeEmitBswapLocal(pReNative, off, a_u16Local, sizeof(uint16_t))
5640
5641#define IEM_MC_BSWAP_LOCAL_U32(a_u32Local) \
5642 off = iemNativeEmitBswapLocal(pReNative, off, a_u32Local, sizeof(uint32_t))
5643
5644#define IEM_MC_BSWAP_LOCAL_U64(a_u64Local) \
5645 off = iemNativeEmitBswapLocal(pReNative, off, a_u64Local, sizeof(uint64_t))
5646
5647/** Emits code for reversing the byte order in a local value. */
5648DECL_INLINE_THROW(uint32_t)
5649iemNativeEmitBswapLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal)
5650{
5651 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxVar, &off);
5652 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
5653
5654 switch (cbLocal)
5655 {
5656 case sizeof(uint16_t): off = iemNativeEmitBswapGpr16(pReNative, off, idxVarReg); break;
5657 case sizeof(uint32_t): off = iemNativeEmitBswapGpr32(pReNative, off, idxVarReg); break;
5658 case sizeof(uint64_t): off = iemNativeEmitBswapGpr(pReNative, off, idxVarReg); break;
5659 default: AssertFailedBreak();
5660 }
5661
5662 iemNativeVarRegisterRelease(pReNative, idxVar);
5663 return off;
5664}
5665
5666
5667#define IEM_MC_SHL_LOCAL_S16(a_i16Local, a_cShift) \
5668 off = iemNativeEmitShlLocal(pReNative, off, a_i16Local, sizeof(int16_t), a_cShift)
5669
5670#define IEM_MC_SHL_LOCAL_S32(a_i32Local, a_cShift) \
5671 off = iemNativeEmitShlLocal(pReNative, off, a_i32Local, sizeof(int32_t), a_cShift)
5672
5673#define IEM_MC_SHL_LOCAL_S64(a_i64Local, a_cShift) \
5674 off = iemNativeEmitShlLocal(pReNative, off, a_i64Local, sizeof(int64_t), a_cShift)
5675
5676/** Emits code for shifting left a local value. */
5677DECL_INLINE_THROW(uint32_t)
5678iemNativeEmitShlLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal, uint8_t cShift)
5679{
5680#ifdef VBOX_STRICT
5681 switch (cbLocal)
5682 {
5683 case sizeof(uint8_t): Assert(cShift < 8); break;
5684 case sizeof(uint16_t): Assert(cShift < 16); break;
5685 case sizeof(uint32_t): Assert(cShift < 32); break;
5686 case sizeof(uint64_t): Assert(cShift < 64); break;
5687 default: AssertFailedBreak();
5688 }
5689#endif
5690
5691 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxVar, &off);
5692 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
5693
5694 if (cbLocal <= sizeof(uint32_t))
5695 {
5696 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxVarReg, cShift);
5697 if (cbLocal < sizeof(uint32_t))
5698 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg,
5699 cbLocal == sizeof(uint16_t)
5700 ? UINT32_C(0xffff)
5701 : UINT32_C(0xff));
5702 }
5703 else
5704 off = iemNativeEmitShiftGprLeft(pReNative, off, idxVarReg, cShift);
5705
5706 iemNativeVarRegisterRelease(pReNative, idxVar);
5707 return off;
5708}
5709
5710
5711#define IEM_MC_SAR_LOCAL_S16(a_i16Local, a_cShift) \
5712 off = iemNativeEmitSarLocal(pReNative, off, a_i16Local, sizeof(int16_t), a_cShift)
5713
5714#define IEM_MC_SAR_LOCAL_S32(a_i32Local, a_cShift) \
5715 off = iemNativeEmitSarLocal(pReNative, off, a_i32Local, sizeof(int32_t), a_cShift)
5716
5717#define IEM_MC_SAR_LOCAL_S64(a_i64Local, a_cShift) \
5718 off = iemNativeEmitSarLocal(pReNative, off, a_i64Local, sizeof(int64_t), a_cShift)
5719
5720/** Emits code for shifting left a local value. */
5721DECL_INLINE_THROW(uint32_t)
5722iemNativeEmitSarLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal, uint8_t cShift)
5723{
5724#ifdef VBOX_STRICT
5725 switch (cbLocal)
5726 {
5727 case sizeof(int8_t): Assert(cShift < 8); break;
5728 case sizeof(int16_t): Assert(cShift < 16); break;
5729 case sizeof(int32_t): Assert(cShift < 32); break;
5730 case sizeof(int64_t): Assert(cShift < 64); break;
5731 default: AssertFailedBreak();
5732 }
5733#endif
5734
5735 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxVar, &off);
5736 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
5737
5738 /* Need to sign extend the value first to make sure the sign is correct in the following arithmetic shift. */
5739 if (cbLocal == sizeof(uint8_t))
5740 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
5741 else if (cbLocal == sizeof(uint16_t))
5742 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxVarReg);
5743
5744 if (cbLocal <= sizeof(uint32_t))
5745 off = iemNativeEmitArithShiftGpr32Right(pReNative, off, idxVarReg, cShift);
5746 else
5747 off = iemNativeEmitArithShiftGprRight(pReNative, off, idxVarReg, cShift);
5748
5749 iemNativeVarRegisterRelease(pReNative, idxVar);
5750 return off;
5751}
5752
5753
5754#define IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR(a_EffAddr, a_i16) \
5755 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i16, sizeof(int16_t))
5756
5757#define IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR(a_EffAddr, a_i32) \
5758 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i32, sizeof(int32_t))
5759
5760#define IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR(a_EffAddr, a_i64) \
5761 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i64, sizeof(int64_t))
5762
5763/** Emits code for IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR/IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR/IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR. */
5764DECL_INLINE_THROW(uint32_t)
5765iemNativeEmitAddLocalToEffAddr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEffAddr, uint8_t idxVar, uint8_t cbLocal)
5766{
5767 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEffAddr);
5768 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEffAddr, sizeof(RTGCPTR));
5769 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
5770 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
5771
5772 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxVar, &off);
5773 uint8_t const idxVarRegEffAddr = iemNativeVarRegisterAcquireInited(pReNative, idxVarEffAddr, &off);
5774
5775 /* Need to sign extend the value. */
5776 if (cbLocal <= sizeof(uint32_t))
5777 {
5778/** @todo ARM64: In case of boredone, the extended add instruction can do the
5779 * conversion directly: ADD idxVarRegEffAddr, idxVarRegEffAddr, [w]idxVarReg, SXTH/SXTW */
5780 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
5781
5782 switch (cbLocal)
5783 {
5784 case sizeof(int16_t): off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxRegTmp, idxVarReg); break;
5785 case sizeof(int32_t): off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxRegTmp, idxVarReg); break;
5786 default: AssertFailed();
5787 }
5788
5789 off = iemNativeEmitAddTwoGprs(pReNative, off, idxVarRegEffAddr, idxRegTmp);
5790 iemNativeRegFreeTmp(pReNative, idxRegTmp);
5791 }
5792 else
5793 off = iemNativeEmitAddTwoGprs(pReNative, off, idxVarRegEffAddr, idxVarReg);
5794
5795 iemNativeVarRegisterRelease(pReNative, idxVarEffAddr);
5796 iemNativeVarRegisterRelease(pReNative, idxVar);
5797 return off;
5798}
5799
5800
5801
5802/*********************************************************************************************************************************
5803* EFLAGS *
5804*********************************************************************************************************************************/
5805
5806#if !defined(VBOX_WITH_STATISTICS) || !defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
5807# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) ((void)0)
5808#else
5809# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) \
5810 iemNativeEFlagsOptimizationStats(pReNative, a_fEflInput, a_fEflOutput)
5811
5812DECLINLINE(void) iemNativeEFlagsOptimizationStats(PIEMRECOMPILERSTATE pReNative, uint32_t fEflInput, uint32_t fEflOutput)
5813{
5814 if (fEflOutput)
5815 {
5816 PVMCPUCC const pVCpu = pReNative->pVCpu;
5817# ifndef IEMLIVENESS_EXTENDED_LAYOUT
5818 IEMLIVENESSBIT const LivenessBit0 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit0;
5819 IEMLIVENESSBIT const LivenessBit1 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit1;
5820 AssertCompile(IEMLIVENESS_STATE_CLOBBERED == 0);
5821# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
5822 if (fEflOutput & (a_fEfl)) \
5823 { \
5824 if (LivenessBit0.a_fLivenessMember | LivenessBit1.a_fLivenessMember) \
5825 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
5826 else \
5827 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
5828 } else do { } while (0)
5829# else
5830 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall];
5831 IEMLIVENESSBIT const LivenessClobbered = { IEMLIVENESS_STATE_GET_WILL_BE_CLOBBERED_SET(pLivenessEntry) };
5832 IEMLIVENESSBIT const LivenessDelayable = { IEMLIVENESS_STATE_GET_CAN_BE_POSTPONED_SET(pLivenessEntry) };
5833# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
5834 if (fEflOutput & (a_fEfl)) \
5835 { \
5836 if (LivenessClobbered.a_fLivenessMember) \
5837 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
5838 else if (LivenessDelayable.a_fLivenessMember) \
5839 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Delayable); \
5840 else \
5841 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
5842 } else do { } while (0)
5843# endif
5844 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_CF, fEflCf, StatNativeLivenessEflCf);
5845 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_PF, fEflPf, StatNativeLivenessEflPf);
5846 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_AF, fEflAf, StatNativeLivenessEflAf);
5847 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_ZF, fEflZf, StatNativeLivenessEflZf);
5848 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_SF, fEflSf, StatNativeLivenessEflSf);
5849 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_OF, fEflOf, StatNativeLivenessEflOf);
5850 //CHECK_FLAG_AND_UPDATE_STATS(~X86_EFL_STATUS_BITS, fEflOther, StatNativeLivenessEflOther);
5851# undef CHECK_FLAG_AND_UPDATE_STATS
5852 }
5853 RT_NOREF(fEflInput);
5854}
5855#endif /* VBOX_WITH_STATISTICS */
5856
5857#undef IEM_MC_FETCH_EFLAGS /* should not be used */
5858#define IEM_MC_FETCH_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5859 off = iemNativeEmitFetchEFlags<a_fEflInput, iemNativeEflagsToLivenessMask<a_fEflInput>(),\
5860 a_fEflOutput, iemNativeEflagsToLivenessMask<a_fEflOutput>()>(pReNative, off, a_EFlags)
5861
5862/** Handles IEM_MC_FETCH_EFLAGS_EX. */
5863template<uint32_t const a_fEflInput, uint64_t const a_fLivenessEflInput,
5864 uint32_t const a_fEflOutput, uint64_t const a_fLivenessEflOutput>
5865DECL_INLINE_THROW(uint32_t)
5866iemNativeEmitFetchEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags)
5867{
5868 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
5869 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
5870 /** @todo fix NOT AssertCompile(a_fEflInput != 0 || a_fEflOutput != 0); */
5871
5872#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
5873# ifdef VBOX_STRICT
5874 if ( pReNative->idxCurCall != 0
5875 && (a_fEflInput != 0 || a_fEflOutput != 0) /* for NOT these are both zero for now. */)
5876 {
5877 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall - 1];
5878 RT_CONSTEXPR uint32_t const fBoth = a_fEflInput | a_fEflOutput;
5879# define ASSERT_ONE_EFL(a_fElfConst, a_idxField) \
5880 AssertMsg( !(fBoth & (a_fElfConst)) \
5881 || (!(a_fEflInput & (a_fElfConst)) \
5882 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
5883 : !(a_fEflOutput & (a_fElfConst)) \
5884 ? IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
5885 : IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) ), \
5886 ("%s - %u\n", #a_fElfConst, iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)))
5887 ASSERT_ONE_EFL(~(uint32_t)X86_EFL_STATUS_BITS, IEMLIVENESSBIT_IDX_EFL_OTHER);
5888 ASSERT_ONE_EFL(X86_EFL_CF, IEMLIVENESSBIT_IDX_EFL_CF);
5889 ASSERT_ONE_EFL(X86_EFL_PF, IEMLIVENESSBIT_IDX_EFL_PF);
5890 ASSERT_ONE_EFL(X86_EFL_AF, IEMLIVENESSBIT_IDX_EFL_AF);
5891 ASSERT_ONE_EFL(X86_EFL_ZF, IEMLIVENESSBIT_IDX_EFL_ZF);
5892 ASSERT_ONE_EFL(X86_EFL_SF, IEMLIVENESSBIT_IDX_EFL_SF);
5893 ASSERT_ONE_EFL(X86_EFL_OF, IEMLIVENESSBIT_IDX_EFL_OF);
5894# undef ASSERT_ONE_EFL
5895 }
5896# endif
5897#endif
5898
5899 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, a_fEflInput);
5900 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, a_fEflInput);
5901
5902 /** @todo This could be prettier...*/
5903 /** @todo Also, the shadowing+liveness handling of EFlags is currently
5904 * problematic, but I'll try tackle that soon (@bugref{10720}). */
5905 PCIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarEFlags)];
5906 Assert(pVar->enmKind == kIemNativeVarKind_Invalid || pVar->enmKind == kIemNativeVarKind_Stack);
5907 Assert(pVar->idxReg == UINT8_MAX);
5908 if (pVar->uArgNo >= IEMNATIVE_CALL_ARG_GREG_COUNT)
5909 {
5910 /** @todo We could use kIemNativeGstRegUse_ReadOnly here when fOutput is
5911 * zero, but since iemNativeVarRegisterSet clears the shadowing,
5912 * that's counter productive... */
5913 uint8_t const idxGstReg = iemNativeRegAllocTmpForGuestEFlagsForUpdate(pReNative, &off,
5914 a_fLivenessEflInput, a_fLivenessEflOutput);
5915 iemNativeVarRegisterSet(pReNative, idxVarEFlags, idxGstReg, off, true /*fAllocated*/);
5916 }
5917 else
5918 {
5919 /* Register argument variable: Avoid assertions in generic call code and load it the traditional way. */
5920 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off);
5921 uint8_t const idxGstReg = iemNativeRegAllocTmpForGuestEFlagsIfAlreadyPresent(pReNative, &off,
5922 a_fLivenessEflInput, a_fLivenessEflOutput);
5923 if (idxGstReg != UINT8_MAX)
5924 {
5925 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstReg);
5926 iemNativeRegFreeTmp(pReNative, idxGstReg);
5927 }
5928 else
5929 off = iemNativeEmitLoadGprWithGstRegT<kIemNativeGstReg_EFlags>(pReNative, off, idxVarReg);
5930 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
5931 }
5932 return off;
5933}
5934
5935
5936
5937/** @todo emit strict build assertions for IEM_MC_COMMIT_EFLAGS_EX when we
5938 * start using it with custom native code emission (inlining assembly
5939 * instruction helpers). */
5940#undef IEM_MC_COMMIT_EFLAGS /* should not be used */
5941#define IEM_MC_COMMIT_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5942 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
5943 off = iemNativeEmitCommitEFlags<true /*a_fUpdateSkippingAndPostponing*/, a_fEflOutput, \
5944 iemNativeEflagsToLivenessMask<a_fEflInput>(), \
5945 iemNativeEflagsToLivenessMask<a_fEflOutput>()>(pReNative, off, a_EFlags, a_fEflInput)
5946
5947#undef IEM_MC_COMMIT_EFLAGS_OPT /* should not be used */
5948#define IEM_MC_COMMIT_EFLAGS_OPT_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5949 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
5950 off = iemNativeEmitCommitEFlags<false /*a_fUpdateSkippingAndPostponing*/, a_fEflOutput, \
5951 iemNativeEflagsToLivenessMask<a_fEflInput>(), \
5952 iemNativeEflagsToLivenessMask<a_fEflOutput>()>(pReNative, off, a_EFlags, a_fEflInput)
5953
5954/** Handles IEM_MC_COMMIT_EFLAGS_EX. */
5955template<bool const a_fUpdateSkippingAndPostponing, uint32_t const a_fEflOutput,
5956 uint64_t const a_fLivenessEflInputBits, uint64_t const a_fLivenessEflOutputBits>
5957DECL_INLINE_THROW(uint32_t)
5958iemNativeEmitCommitEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags, uint32_t fElfInput)
5959{
5960 uint8_t const idxReg = iemNativeVarRegisterAcquireInited(pReNative, idxVarEFlags, &off);
5961 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
5962
5963#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
5964# ifdef VBOX_STRICT
5965 if ( pReNative->idxCurCall != 0
5966 && (a_fLivenessEflInputBits != 0 || a_fLivenessEflOutputBits != 0) /* for NOT these are both zero for now. */)
5967 {
5968 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall - 1];
5969# define ASSERT_ONE_EFL(a_idxField) \
5970 if RT_CONSTEXPR_IF(((a_fLivenessEflInputBits | a_fLivenessEflOutputBits) & RT_BIT_64(a_idxField)) != 0) \
5971 AssertMsg(!(a_fLivenessEflInputBits & RT_BIT_64(a_idxField)) \
5972 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
5973 : !(a_fLivenessEflOutputBits & RT_BIT_64(a_idxField)) \
5974 ? IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
5975 : IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)), \
5976 ("%s - %u\n", #a_idxField, iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)))
5977 ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_OTHER);
5978 ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_CF);
5979 ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_PF);
5980 ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_AF);
5981 ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_ZF);
5982 ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_SF);
5983 ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_OF);
5984# undef ASSERT_ONE_EFL
5985 }
5986# endif
5987#endif
5988
5989#ifdef VBOX_STRICT
5990 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RA1_MASK);
5991 uint32_t offFixup = off;
5992 off = iemNativeEmitJnzToFixed(pReNative, off, off);
5993 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2001));
5994 iemNativeFixupFixedJump(pReNative, offFixup, off);
5995
5996 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RAZ_MASK & CPUMX86EFLAGS_HW_MASK_32);
5997 offFixup = off;
5998 off = iemNativeEmitJzToFixed(pReNative, off, off);
5999 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2002));
6000 iemNativeFixupFixedJump(pReNative, offFixup, off);
6001
6002 /** @todo validate that only bits in the a_fEflOutput mask changed. */
6003#endif
6004
6005#ifdef IEMNATIVE_WITH_EFLAGS_SKIPPING
6006 if RT_CONSTEXPR_IF(a_fUpdateSkippingAndPostponing)
6007 {
6008 Assert(!(pReNative->fSkippingEFlags & fElfInput)); RT_NOREF(fElfInput);
6009 if (pReNative->fSkippingEFlags)
6010 Log5(("EFLAGS: fSkippingEFlags %#x -> %#x (iemNativeEmitCommitEFlags)\n",
6011 pReNative->fSkippingEFlags, pReNative->fSkippingEFlags & ~(a_fEflOutput & X86_EFL_STATUS_BITS) ));
6012 if RT_CONSTEXPR_IF((a_fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
6013 pReNative->fSkippingEFlags = 0;
6014 else
6015 pReNative->fSkippingEFlags &= ~(a_fEflOutput & X86_EFL_STATUS_BITS);
6016# ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
6017 if RT_CONSTEXPR_IF((a_fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
6018 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
6019 else
6020 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~(a_fEflOutput & X86_EFL_STATUS_BITS),
6021 RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
6022# endif
6023 IEMNATIVE_CLEAR_POSTPONED_EFLAGS(pReNative, a_fEflOutput);
6024 }
6025#endif
6026
6027 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
6028 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_EFlags>(pReNative, off, idxReg);
6029 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
6030 return off;
6031}
6032
6033
6034typedef enum IEMNATIVEMITEFLOP
6035{
6036 kIemNativeEmitEflOp_Set,
6037 kIemNativeEmitEflOp_Clear,
6038 kIemNativeEmitEflOp_Flip
6039} IEMNATIVEMITEFLOP;
6040
6041#define IEM_MC_SET_EFL_BIT(a_fBit) \
6042 off = iemNativeEmitModifyEFlagsBit<kIemNativeEmitEflOp_Set, a_fBit, iemNativeEflagsToLivenessMask<a_fBit>()>(pReNative, off)
6043
6044#define IEM_MC_CLEAR_EFL_BIT(a_fBit) \
6045 off = iemNativeEmitModifyEFlagsBit<kIemNativeEmitEflOp_Clear, a_fBit, iemNativeEflagsToLivenessMask<a_fBit>()>(pReNative, off)
6046
6047#define IEM_MC_FLIP_EFL_BIT(a_fBit) \
6048 off = iemNativeEmitModifyEFlagsBit<kIemNativeEmitEflOp_Flip, a_fBit, iemNativeEflagsToLivenessMask<a_fBit>()>(pReNative, off)
6049
6050/** Handles IEM_MC_SET_EFL_BIT/IEM_MC_CLEAR_EFL_BIT/IEM_MC_FLIP_EFL_BIT. */
6051template<IEMNATIVEMITEFLOP const a_enmOp, uint32_t const a_fEflBit, uint64_t const a_fLivenessEflBit>
6052DECL_INLINE_THROW(uint32_t) iemNativeEmitModifyEFlagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6053{
6054 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsForUpdate(pReNative, &off,
6055 a_enmOp == kIemNativeEmitEflOp_Flip
6056 ? a_fLivenessEflBit : 0,
6057 a_fLivenessEflBit);
6058
6059 /* Using 'if constexpr' forces code elimination in debug builds with VC. */
6060 if RT_CONSTEXPR_IF(a_enmOp == kIemNativeEmitEflOp_Set)
6061 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxEflReg, a_fEflBit);
6062 else if RT_CONSTEXPR_IF(a_enmOp == kIemNativeEmitEflOp_Clear)
6063 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~a_fEflBit);
6064 else if RT_CONSTEXPR_IF(a_enmOp == kIemNativeEmitEflOp_Flip)
6065 off = iemNativeEmitXorGpr32ByImm(pReNative, off, idxEflReg, a_fEflBit);
6066 else
6067 AssertCompile( a_enmOp == kIemNativeEmitEflOp_Set /* AssertCompile(false) works with VC 2019 but not clang 15. */
6068 || a_enmOp == kIemNativeEmitEflOp_Clear
6069 || a_enmOp == kIemNativeEmitEflOp_Flip);
6070
6071 /** @todo No delayed writeback for EFLAGS right now. */
6072 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_EFlags>(pReNative, off, idxEflReg);
6073
6074 /* Free but don't flush the EFLAGS register. */
6075 iemNativeRegFreeTmp(pReNative, idxEflReg);
6076
6077#ifdef IEMNATIVE_WITH_EFLAGS_SKIPPING
6078 /* Clear the bit in the skipped mask if we're clobbering and it's a status bit. */
6079 if RT_CONSTEXPR_IF( (a_enmOp == kIemNativeEmitEflOp_Set || a_enmOp == kIemNativeEmitEflOp_Clear)
6080 && (a_fEflBit & X86_EFL_STATUS_BITS))
6081 {
6082 if (pReNative->fSkippingEFlags)
6083 Log5(("EFLAGS: fSkippingEFlags %#x -> %#x (iemNativeEmitModifyEFlagsBit)\n",
6084 pReNative->fSkippingEFlags, pReNative->fSkippingEFlags & ~a_fEflBit ));
6085 pReNative->fSkippingEFlags &= ~a_fEflBit;
6086# ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
6087 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~a_fEflBit, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
6088# endif
6089 }
6090#endif
6091
6092 return off;
6093}
6094
6095
6096/*********************************************************************************************************************************
6097* Emitters for segment register fetches (IEM_MC_FETCH_SREG_XXX).
6098*********************************************************************************************************************************/
6099
6100#define IEM_MC_FETCH_SREG_U16(a_u16Dst, a_iSReg) \
6101 off = iemNativeEmitFetchSReg(pReNative, off, a_u16Dst, a_iSReg, sizeof(uint16_t))
6102
6103#define IEM_MC_FETCH_SREG_ZX_U32(a_u32Dst, a_iSReg) \
6104 off = iemNativeEmitFetchSReg(pReNative, off, a_u32Dst, a_iSReg, sizeof(uint32_t))
6105
6106#define IEM_MC_FETCH_SREG_ZX_U64(a_u64Dst, a_iSReg) \
6107 off = iemNativeEmitFetchSReg(pReNative, off, a_u64Dst, a_iSReg, sizeof(uint64_t))
6108
6109
6110/** Emits code for IEM_MC_FETCH_SREG_U16, IEM_MC_FETCH_SREG_ZX_U32 and
6111 * IEM_MC_FETCH_SREG_ZX_U64. */
6112DECL_INLINE_THROW(uint32_t)
6113iemNativeEmitFetchSReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iSReg, int8_t cbVar)
6114{
6115 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
6116 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbVar); RT_NOREF(cbVar);
6117 Assert(iSReg < X86_SREG_COUNT);
6118
6119 /*
6120 * For now, we will not create a shadow copy of a selector. The rational
6121 * is that since we do not recompile the popping and loading of segment
6122 * registers and that the the IEM_MC_FETCH_SREG_U* MCs are only used for
6123 * pushing and moving to registers, there is only a small chance that the
6124 * shadow copy will be accessed again before the register is reloaded. One
6125 * scenario would be nested called in 16-bit code, but I doubt it's worth
6126 * the extra register pressure atm.
6127 *
6128 * What we really need first, though, is to combine iemNativeRegAllocTmpForGuestReg
6129 * and iemNativeVarRegisterAcquire for a load scenario. We only got the
6130 * store scencario covered at present (r160730).
6131 */
6132 iemNativeVarSetKindToStack(pReNative, idxDstVar);
6133 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
6134 off = iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aSRegs[iSReg].Sel));
6135 iemNativeVarRegisterRelease(pReNative, idxDstVar);
6136 return off;
6137}
6138
6139
6140
6141/*********************************************************************************************************************************
6142* Register references. *
6143*********************************************************************************************************************************/
6144
6145#define IEM_MC_REF_GREG_U8_THREADED(a_pu8Dst, a_iGRegEx) \
6146 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, false /*fConst*/)
6147
6148#define IEM_MC_REF_GREG_U8_CONST_THREADED(a_pu8Dst, a_iGRegEx) \
6149 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, true /*fConst*/)
6150
6151/** Handles IEM_MC_REF_GREG_U8[_CONST]. */
6152DECL_INLINE_THROW(uint32_t)
6153iemNativeEmitRefGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGRegEx, bool fConst)
6154{
6155 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
6156 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
6157 Assert(iGRegEx < 20);
6158
6159 if (iGRegEx < 16)
6160 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
6161 else
6162 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_GprHighByte, iGRegEx & 15);
6163
6164 /* If we've delayed writing back the register value, flush it now. */
6165 off = iemNativeRegFlushPendingSpecificWrite<kIemNativeGstRegRef_Gpr>(pReNative, off, iGRegEx & 15);
6166
6167 /* If it's not a const reference we need to flush the shadow copy of the register now. */
6168 if (!fConst)
6169 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGRegEx & 15)));
6170
6171 return off;
6172}
6173
6174#define IEM_MC_REF_GREG_U16(a_pu16Dst, a_iGReg) \
6175 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, false /*fConst*/)
6176
6177#define IEM_MC_REF_GREG_U16_CONST(a_pu16Dst, a_iGReg) \
6178 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, true /*fConst*/)
6179
6180#define IEM_MC_REF_GREG_U32(a_pu32Dst, a_iGReg) \
6181 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, false /*fConst*/)
6182
6183#define IEM_MC_REF_GREG_U32_CONST(a_pu32Dst, a_iGReg) \
6184 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, true /*fConst*/)
6185
6186#define IEM_MC_REF_GREG_I32(a_pi32Dst, a_iGReg) \
6187 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, false /*fConst*/)
6188
6189#define IEM_MC_REF_GREG_I32_CONST(a_pi32Dst, a_iGReg) \
6190 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, true /*fConst*/)
6191
6192#define IEM_MC_REF_GREG_U64(a_pu64Dst, a_iGReg) \
6193 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, false /*fConst*/)
6194
6195#define IEM_MC_REF_GREG_U64_CONST(a_pu64Dst, a_iGReg) \
6196 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, true /*fConst*/)
6197
6198#define IEM_MC_REF_GREG_I64(a_pi64Dst, a_iGReg) \
6199 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, false /*fConst*/)
6200
6201#define IEM_MC_REF_GREG_I64_CONST(a_pi64Dst, a_iGReg) \
6202 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, true /*fConst*/)
6203
6204/** Handles IEM_MC_REF_GREG_Uxx[_CONST] and IEM_MC_REF_GREG_Ixx[_CONST]. */
6205DECL_INLINE_THROW(uint32_t)
6206iemNativeEmitRefGregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGReg, bool fConst)
6207{
6208 Assert(iGReg < 16);
6209 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGReg);
6210 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
6211
6212 /* If we've delayed writing back the register value, flush it now. */
6213 off = iemNativeRegFlushPendingSpecificWrite<kIemNativeGstRegRef_Gpr>(pReNative, off, iGReg);
6214
6215 /* If it's not a const reference we need to flush the shadow copy of the register now. */
6216 if (!fConst)
6217 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGReg)));
6218
6219 return off;
6220}
6221
6222
6223#undef IEM_MC_REF_EFLAGS /* should not be used. */
6224#define IEM_MC_REF_EFLAGS_EX(a_pEFlags, a_fEflInput, a_fEflOutput) \
6225 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
6226 off = iemNativeEmitRefEFlags<a_fEflOutput>(pReNative, off, a_pEFlags, a_fEflInput)
6227
6228/** Handles IEM_MC_REF_EFLAGS. */
6229template<uint32_t const a_fEflOutput>
6230DECL_INLINE_THROW(uint32_t)
6231iemNativeEmitRefEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint32_t fEflInput)
6232{
6233 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_EFlags, 0);
6234 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
6235
6236#ifdef IEMNATIVE_WITH_EFLAGS_SKIPPING
6237 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, fEflInput);
6238 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fEflInput);
6239 if (pReNative->fSkippingEFlags)
6240 Log5(("EFLAGS: fSkippingEFlags %#x -> %#x (iemNativeEmitRefEFlags)\n",
6241 pReNative->fSkippingEFlags, pReNative->fSkippingEFlags & ~a_fEflOutput ));
6242 pReNative->fSkippingEFlags &= ~a_fEflOutput;
6243# ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
6244
6245 /* Updating the skipping according to the outputs is a little early, but
6246 we don't have any other hooks for references atm. */
6247 if RT_CONSTEXPR_IF((a_fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
6248 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
6249 else if RT_CONSTEXPR_IF((a_fEflOutput & X86_EFL_STATUS_BITS) != 0)
6250 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~(a_fEflOutput & X86_EFL_STATUS_BITS),
6251 RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
6252# endif
6253
6254 /* This ASSUMES that EFLAGS references are not taken before use. */
6255 IEMNATIVE_CLEAR_POSTPONED_EFLAGS(pReNative, a_fEflOutput);
6256
6257#endif
6258 RT_NOREF(fEflInput);
6259
6260 /* If we've delayed writing back the register value, flush it now. */
6261 off = iemNativeRegFlushPendingSpecificWrite<kIemNativeGstRegRef_EFlags>(pReNative, off, 0);
6262
6263 /* If there is a shadow copy of guest EFLAGS, flush it now. */
6264 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_EFlags));
6265
6266 return off;
6267}
6268
6269
6270/** @todo Emit code for IEM_MC_ASSERT_EFLAGS in strict builds? Once we emit
6271 * different code from threaded recompiler, maybe it would be helpful. For now
6272 * we assume the threaded recompiler catches any incorrect EFLAGS delcarations. */
6273#define IEM_MC_ASSERT_EFLAGS(a_fEflInput, a_fEflOutput) ((void)0)
6274
6275
6276#define IEM_MC_REF_XREG_U128(a_pu128Dst, a_iXReg) \
6277 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, false /*fConst*/)
6278
6279#define IEM_MC_REF_XREG_XMM(a_puXmmDst, a_iXReg) \
6280 off = iemNativeEmitRefXregXxx(pReNative, off, a_puXmmDst, a_iXReg, false /*fConst*/)
6281
6282#define IEM_MC_REF_XREG_U128_CONST(a_pu128Dst, a_iXReg) \
6283 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, true /*fConst*/)
6284
6285#define IEM_MC_REF_XREG_XMM_CONST(a_pXmmDst, a_iXReg) \
6286 off = iemNativeEmitRefXregXxx(pReNative, off, a_pXmmDst, a_iXReg, true /*fConst*/)
6287
6288/* Just being paranoid here. */
6289#ifndef _MSC_VER /* MSC can't compile this, doesn't like [0]. Added reduced version afterwards. */
6290AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].au64[0]);
6291AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].au32[0]);
6292AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].ar64[0]);
6293AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].ar32[0]);
6294#endif
6295AssertCompileMemberOffset(X86XMMREG, au64, 0);
6296AssertCompileMemberOffset(X86XMMREG, au32, 0);
6297AssertCompileMemberOffset(X86XMMREG, ar64, 0);
6298AssertCompileMemberOffset(X86XMMREG, ar32, 0);
6299
6300#define IEM_MC_REF_XREG_U32_CONST(a_pu32Dst, a_iXReg) \
6301 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu32Dst, a_iXReg, true /*fConst*/)
6302#define IEM_MC_REF_XREG_U64_CONST(a_pu64Dst, a_iXReg) \
6303 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu64Dst, a_iXReg, true /*fConst*/)
6304#define IEM_MC_REF_XREG_R32_CONST(a_pr32Dst, a_iXReg) \
6305 off = iemNativeEmitRefXregXxx(pReNative, off, a_pr32Dst, a_iXReg, true /*fConst*/)
6306#define IEM_MC_REF_XREG_R64_CONST(a_pr64Dst, a_iXReg) \
6307 off = iemNativeEmitRefXregXxx(pReNative, off, a_pr64Dst, a_iXReg, true /*fConst*/)
6308
6309/** Handles IEM_MC_REF_XREG_xxx[_CONST]. */
6310DECL_INLINE_THROW(uint32_t)
6311iemNativeEmitRefXregXxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iXReg, bool fConst)
6312{
6313 Assert(iXReg < 16);
6314 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_XReg, iXReg);
6315 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
6316
6317 /* If we've delayed writing back the register value, flush it now. */
6318 off = iemNativeRegFlushPendingSpecificWrite<kIemNativeGstRegRef_XReg>(pReNative, off, iXReg);
6319
6320 /* If it's not a const reference we need to flush the shadow copy of the register now. */
6321 if (!fConst)
6322 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(iXReg)));
6323
6324 return off;
6325}
6326
6327
6328
6329/*********************************************************************************************************************************
6330* Effective Address Calculation *
6331*********************************************************************************************************************************/
6332#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_16(a_GCPtrEff, a_bRm, a_u16Disp) \
6333 off = iemNativeEmitCalcRmEffAddrThreadedAddr16(pReNative, off, a_bRm, a_u16Disp, a_GCPtrEff)
6334
6335/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_16.
6336 * @sa iemOpHlpCalcRmEffAddrThreadedAddr16 */
6337DECL_INLINE_THROW(uint32_t)
6338iemNativeEmitCalcRmEffAddrThreadedAddr16(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6339 uint8_t bRm, uint16_t u16Disp, uint8_t idxVarRet)
6340{
6341 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
6342
6343 /*
6344 * Handle the disp16 form with no registers first.
6345 *
6346 * Convert to an immediate value, as that'll delay the register allocation
6347 * and assignment till the memory access / call / whatever and we can use
6348 * a more appropriate register (or none at all).
6349 */
6350 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
6351 {
6352 iemNativeVarSetKindToConst(pReNative, idxVarRet, u16Disp);
6353 return off;
6354 }
6355
6356 /* Determin the displacment. */
6357 uint16_t u16EffAddr;
6358 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
6359 {
6360 case 0: u16EffAddr = 0; break;
6361 case 1: u16EffAddr = (int16_t)(int8_t)u16Disp; break;
6362 case 2: u16EffAddr = u16Disp; break;
6363 default: AssertFailedStmt(u16EffAddr = 0);
6364 }
6365
6366 /* Determine the registers involved. */
6367 uint8_t idxGstRegBase;
6368 uint8_t idxGstRegIndex;
6369 switch (bRm & X86_MODRM_RM_MASK)
6370 {
6371 case 0:
6372 idxGstRegBase = X86_GREG_xBX;
6373 idxGstRegIndex = X86_GREG_xSI;
6374 break;
6375 case 1:
6376 idxGstRegBase = X86_GREG_xBX;
6377 idxGstRegIndex = X86_GREG_xDI;
6378 break;
6379 case 2:
6380 idxGstRegBase = X86_GREG_xBP;
6381 idxGstRegIndex = X86_GREG_xSI;
6382 break;
6383 case 3:
6384 idxGstRegBase = X86_GREG_xBP;
6385 idxGstRegIndex = X86_GREG_xDI;
6386 break;
6387 case 4:
6388 idxGstRegBase = X86_GREG_xSI;
6389 idxGstRegIndex = UINT8_MAX;
6390 break;
6391 case 5:
6392 idxGstRegBase = X86_GREG_xDI;
6393 idxGstRegIndex = UINT8_MAX;
6394 break;
6395 case 6:
6396 idxGstRegBase = X86_GREG_xBP;
6397 idxGstRegIndex = UINT8_MAX;
6398 break;
6399#ifdef _MSC_VER /* lazy compiler, thinks idxGstRegBase and idxGstRegIndex may otherwise be used uninitialized. */
6400 default:
6401#endif
6402 case 7:
6403 idxGstRegBase = X86_GREG_xBX;
6404 idxGstRegIndex = UINT8_MAX;
6405 break;
6406 }
6407
6408 /*
6409 * Now emit code that calculates: idxRegRet = (uint16_t)(u16EffAddr + idxGstRegBase [+ idxGstRegIndex])
6410 */
6411 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
6412 uint8_t const idxRegBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
6413 kIemNativeGstRegUse_ReadOnly);
6414 uint8_t const idxRegIndex = idxGstRegIndex != UINT8_MAX
6415 ? iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
6416 kIemNativeGstRegUse_ReadOnly)
6417 : UINT8_MAX;
6418#ifdef RT_ARCH_AMD64
6419 if (idxRegIndex == UINT8_MAX)
6420 {
6421 if (u16EffAddr == 0)
6422 {
6423 /* movxz ret, base */
6424 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegRet, idxRegBase);
6425 }
6426 else
6427 {
6428 /* lea ret32, [base64 + disp32] */
6429 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
6430 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6431 if (idxRegRet >= 8 || idxRegBase >= 8)
6432 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
6433 pbCodeBuf[off++] = 0x8d;
6434 if (idxRegBase != X86_GREG_x12 /*SIB*/)
6435 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, idxRegBase & 7);
6436 else
6437 {
6438 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, 4 /*SIB*/);
6439 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
6440 }
6441 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
6442 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
6443 pbCodeBuf[off++] = 0;
6444 pbCodeBuf[off++] = 0;
6445 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6446
6447 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
6448 }
6449 }
6450 else
6451 {
6452 /* lea ret32, [index64 + base64 (+ disp32)] */
6453 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
6454 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6455 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
6456 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6457 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
6458 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
6459 pbCodeBuf[off++] = 0x8d;
6460 uint8_t const bMod = u16EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0 : X86_MOD_MEM4;
6461 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6462 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, 0);
6463 if (bMod == X86_MOD_MEM4)
6464 {
6465 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
6466 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
6467 pbCodeBuf[off++] = 0;
6468 pbCodeBuf[off++] = 0;
6469 }
6470 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6471 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
6472 }
6473
6474#elif defined(RT_ARCH_ARM64)
6475 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6476 if (u16EffAddr == 0)
6477 {
6478 if (idxRegIndex == UINT8_MAX)
6479 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegBase);
6480 else
6481 {
6482 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex, false /*f64Bit*/);
6483 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
6484 }
6485 }
6486 else
6487 {
6488 if ((int16_t)u16EffAddr < 4096 && (int16_t)u16EffAddr >= 0)
6489 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u16EffAddr, false /*f64Bit*/);
6490 else if ((int16_t)u16EffAddr > -4096 && (int16_t)u16EffAddr < 0)
6491 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
6492 (uint16_t)-(int16_t)u16EffAddr, false /*f64Bit*/);
6493 else
6494 {
6495 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegRet, u16EffAddr);
6496 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
6497 }
6498 if (idxRegIndex != UINT8_MAX)
6499 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex, false /*f64Bit*/);
6500 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
6501 }
6502
6503#else
6504# error "port me"
6505#endif
6506
6507 if (idxRegIndex != UINT8_MAX)
6508 iemNativeRegFreeTmp(pReNative, idxRegIndex);
6509 iemNativeRegFreeTmp(pReNative, idxRegBase);
6510 iemNativeVarRegisterRelease(pReNative, idxVarRet);
6511 return off;
6512}
6513
6514
6515#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_32(a_GCPtrEff, a_bRm, a_uSibAndRspOffset, a_u32Disp) \
6516 off = iemNativeEmitCalcRmEffAddrThreadedAddr32(pReNative, off, a_bRm, a_uSibAndRspOffset, a_u32Disp, a_GCPtrEff)
6517
6518/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_32.
6519 * @see iemOpHlpCalcRmEffAddrThreadedAddr32 */
6520DECL_INLINE_THROW(uint32_t)
6521iemNativeEmitCalcRmEffAddrThreadedAddr32(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6522 uint8_t bRm, uint32_t uSibAndRspOffset, uint32_t u32Disp, uint8_t idxVarRet)
6523{
6524 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
6525
6526 /*
6527 * Handle the disp32 form with no registers first.
6528 *
6529 * Convert to an immediate value, as that'll delay the register allocation
6530 * and assignment till the memory access / call / whatever and we can use
6531 * a more appropriate register (or none at all).
6532 */
6533 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
6534 {
6535 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32Disp);
6536 return off;
6537 }
6538
6539 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
6540 uint32_t u32EffAddr = 0;
6541 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
6542 {
6543 case 0: break;
6544 case 1: u32EffAddr = (int8_t)u32Disp; break;
6545 case 2: u32EffAddr = u32Disp; break;
6546 default: AssertFailed();
6547 }
6548
6549 /* Get the register (or SIB) value. */
6550 uint8_t idxGstRegBase = UINT8_MAX;
6551 uint8_t idxGstRegIndex = UINT8_MAX;
6552 uint8_t cShiftIndex = 0;
6553 switch (bRm & X86_MODRM_RM_MASK)
6554 {
6555 case 0: idxGstRegBase = X86_GREG_xAX; break;
6556 case 1: idxGstRegBase = X86_GREG_xCX; break;
6557 case 2: idxGstRegBase = X86_GREG_xDX; break;
6558 case 3: idxGstRegBase = X86_GREG_xBX; break;
6559 case 4: /* SIB */
6560 {
6561 /* index /w scaling . */
6562 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
6563 switch ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
6564 {
6565 case 0: idxGstRegIndex = X86_GREG_xAX; break;
6566 case 1: idxGstRegIndex = X86_GREG_xCX; break;
6567 case 2: idxGstRegIndex = X86_GREG_xDX; break;
6568 case 3: idxGstRegIndex = X86_GREG_xBX; break;
6569 case 4: cShiftIndex = 0; /*no index*/ break;
6570 case 5: idxGstRegIndex = X86_GREG_xBP; break;
6571 case 6: idxGstRegIndex = X86_GREG_xSI; break;
6572 case 7: idxGstRegIndex = X86_GREG_xDI; break;
6573 }
6574
6575 /* base */
6576 switch (uSibAndRspOffset & X86_SIB_BASE_MASK)
6577 {
6578 case 0: idxGstRegBase = X86_GREG_xAX; break;
6579 case 1: idxGstRegBase = X86_GREG_xCX; break;
6580 case 2: idxGstRegBase = X86_GREG_xDX; break;
6581 case 3: idxGstRegBase = X86_GREG_xBX; break;
6582 case 4:
6583 idxGstRegBase = X86_GREG_xSP;
6584 u32EffAddr += uSibAndRspOffset >> 8;
6585 break;
6586 case 5:
6587 if ((bRm & X86_MODRM_MOD_MASK) != 0)
6588 idxGstRegBase = X86_GREG_xBP;
6589 else
6590 {
6591 Assert(u32EffAddr == 0);
6592 u32EffAddr = u32Disp;
6593 }
6594 break;
6595 case 6: idxGstRegBase = X86_GREG_xSI; break;
6596 case 7: idxGstRegBase = X86_GREG_xDI; break;
6597 }
6598 break;
6599 }
6600 case 5: idxGstRegBase = X86_GREG_xBP; break;
6601 case 6: idxGstRegBase = X86_GREG_xSI; break;
6602 case 7: idxGstRegBase = X86_GREG_xDI; break;
6603 }
6604
6605 /*
6606 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
6607 * the start of the function.
6608 */
6609 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
6610 {
6611 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32EffAddr);
6612 return off;
6613 }
6614
6615 /*
6616 * Now emit code that calculates: idxRegRet = (uint32_t)(u32EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
6617 */
6618 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
6619 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
6620 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
6621 kIemNativeGstRegUse_ReadOnly);
6622 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
6623 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
6624 kIemNativeGstRegUse_ReadOnly);
6625
6626 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
6627 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
6628 {
6629 idxRegBase = idxRegIndex;
6630 idxRegIndex = UINT8_MAX;
6631 }
6632
6633#ifdef RT_ARCH_AMD64
6634 if (idxRegIndex == UINT8_MAX)
6635 {
6636 if (u32EffAddr == 0)
6637 {
6638 /* mov ret, base */
6639 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
6640 }
6641 else
6642 {
6643 /* lea ret32, [base64 + disp32] */
6644 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
6645 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6646 if (idxRegRet >= 8 || idxRegBase >= 8)
6647 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
6648 pbCodeBuf[off++] = 0x8d;
6649 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
6650 if (idxRegBase != X86_GREG_x12 /*SIB*/)
6651 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
6652 else
6653 {
6654 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6655 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
6656 }
6657 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6658 if (bMod == X86_MOD_MEM4)
6659 {
6660 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6661 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6662 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6663 }
6664 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6665 }
6666 }
6667 else
6668 {
6669 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
6670 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6671 if (idxRegBase == UINT8_MAX)
6672 {
6673 /* lea ret32, [(index64 << cShiftIndex) + disp32] */
6674 if (idxRegRet >= 8 || idxRegIndex >= 8)
6675 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6676 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
6677 pbCodeBuf[off++] = 0x8d;
6678 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
6679 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
6680 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6681 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6682 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6683 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6684 }
6685 else
6686 {
6687 /* lea ret32, [(index64 << cShiftIndex) + base64 (+ disp32)] */
6688 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
6689 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6690 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
6691 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
6692 pbCodeBuf[off++] = 0x8d;
6693 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
6694 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
6695 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6696 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
6697 if (bMod != X86_MOD_MEM0)
6698 {
6699 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6700 if (bMod == X86_MOD_MEM4)
6701 {
6702 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6703 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6704 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6705 }
6706 }
6707 }
6708 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6709 }
6710
6711#elif defined(RT_ARCH_ARM64)
6712 if (u32EffAddr == 0)
6713 {
6714 if (idxRegIndex == UINT8_MAX)
6715 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
6716 else if (idxRegBase == UINT8_MAX)
6717 {
6718 if (cShiftIndex == 0)
6719 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegIndex);
6720 else
6721 {
6722 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6723 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, false /*f64Bit*/);
6724 }
6725 }
6726 else
6727 {
6728 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6729 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
6730 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
6731 }
6732 }
6733 else
6734 {
6735 if ((int32_t)u32EffAddr < 4096 && (int32_t)u32EffAddr >= 0 && idxRegBase != UINT8_MAX)
6736 {
6737 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6738 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u32EffAddr, false /*f64Bit*/);
6739 }
6740 else if ((int32_t)u32EffAddr > -4096 && (int32_t)u32EffAddr < 0 && idxRegBase != UINT8_MAX)
6741 {
6742 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6743 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
6744 (uint32_t)-(int32_t)u32EffAddr, false /*f64Bit*/);
6745 }
6746 else
6747 {
6748 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, u32EffAddr);
6749 if (idxRegBase != UINT8_MAX)
6750 {
6751 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6752 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
6753 }
6754 }
6755 if (idxRegIndex != UINT8_MAX)
6756 {
6757 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6758 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
6759 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
6760 }
6761 }
6762
6763#else
6764# error "port me"
6765#endif
6766
6767 if (idxRegIndex != UINT8_MAX)
6768 iemNativeRegFreeTmp(pReNative, idxRegIndex);
6769 if (idxRegBase != UINT8_MAX)
6770 iemNativeRegFreeTmp(pReNative, idxRegBase);
6771 iemNativeVarRegisterRelease(pReNative, idxVarRet);
6772 return off;
6773}
6774
6775
6776#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
6777 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
6778 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
6779
6780#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_FSGS(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
6781 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
6782 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
6783
6784#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_ADDR32(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
6785 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
6786 a_u32Disp, a_cbImm, a_GCPtrEff, false /*f64Bit*/)
6787
6788/**
6789 * Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_64*.
6790 *
6791 * @returns New off.
6792 * @param pReNative .
6793 * @param off .
6794 * @param bRmEx The ModRM byte but with bit 3 set to REX.B and
6795 * bit 4 to REX.X. The two bits are part of the
6796 * REG sub-field, which isn't needed in this
6797 * function.
6798 * @param uSibAndRspOffset Two parts:
6799 * - The first 8 bits make up the SIB byte.
6800 * - The next 8 bits are the fixed RSP/ESP offset
6801 * in case of a pop [xSP].
6802 * @param u32Disp The displacement byte/word/dword, if any.
6803 * @param cbInstr The size of the fully decoded instruction. Used
6804 * for RIP relative addressing.
6805 * @param idxVarRet The result variable number.
6806 * @param f64Bit Whether to use a 64-bit or 32-bit address size
6807 * when calculating the address.
6808 *
6809 * @see iemOpHlpCalcRmEffAddrThreadedAddr64
6810 */
6811DECL_INLINE_THROW(uint32_t)
6812iemNativeEmitCalcRmEffAddrThreadedAddr64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bRmEx, uint32_t uSibAndRspOffset,
6813 uint32_t u32Disp, uint8_t cbInstr, uint8_t idxVarRet, bool f64Bit)
6814{
6815 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
6816
6817 /*
6818 * Special case the rip + disp32 form first.
6819 */
6820 if ((bRmEx & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
6821 {
6822 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
6823 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
6824 kIemNativeGstRegUse_ReadOnly);
6825 if (f64Bit)
6826 {
6827#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6828 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr + (int64_t)pReNative->Core.offPc;
6829#else
6830 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr;
6831#endif
6832#ifdef RT_ARCH_AMD64
6833 if ((int32_t)offFinalDisp == offFinalDisp)
6834 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)offFinalDisp);
6835 else
6836 {
6837 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp);
6838 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, cbInstr);
6839 }
6840#else
6841 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, offFinalDisp);
6842#endif
6843 }
6844 else
6845 {
6846# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6847 int32_t const offFinalDisp = (int32_t)u32Disp + cbInstr + (int32_t)pReNative->Core.offPc;
6848# else
6849 int32_t const offFinalDisp = (int32_t)u32Disp + cbInstr;
6850# endif
6851 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, offFinalDisp);
6852 }
6853 iemNativeRegFreeTmp(pReNative, idxRegPc);
6854 iemNativeVarRegisterRelease(pReNative, idxVarRet);
6855 return off;
6856 }
6857
6858 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
6859 int64_t i64EffAddr = 0;
6860 switch ((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
6861 {
6862 case 0: break;
6863 case 1: i64EffAddr = (int8_t)u32Disp; break;
6864 case 2: i64EffAddr = (int32_t)u32Disp; break;
6865 default: AssertFailed();
6866 }
6867
6868 /* Get the register (or SIB) value. */
6869 uint8_t idxGstRegBase = UINT8_MAX;
6870 uint8_t idxGstRegIndex = UINT8_MAX;
6871 uint8_t cShiftIndex = 0;
6872 if ((bRmEx & X86_MODRM_RM_MASK) != 4)
6873 idxGstRegBase = bRmEx & (X86_MODRM_RM_MASK | 0x8); /* bRmEx[bit 3] = REX.B */
6874 else /* SIB: */
6875 {
6876 /* index /w scaling . */
6877 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
6878 idxGstRegIndex = ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
6879 | ((bRmEx & 0x10) >> 1); /* bRmEx[bit 4] = REX.X */
6880 if (idxGstRegIndex == 4)
6881 {
6882 /* no index */
6883 cShiftIndex = 0;
6884 idxGstRegIndex = UINT8_MAX;
6885 }
6886
6887 /* base */
6888 idxGstRegBase = (uSibAndRspOffset & X86_SIB_BASE_MASK) | (bRmEx & 0x8); /* bRmEx[bit 3] = REX.B */
6889 if (idxGstRegBase == 4)
6890 {
6891 /* pop [rsp] hack */
6892 i64EffAddr += uSibAndRspOffset >> 8; /* (this is why i64EffAddr must be 64-bit) */
6893 }
6894 else if ( (idxGstRegBase & X86_SIB_BASE_MASK) == 5
6895 && (bRmEx & X86_MODRM_MOD_MASK) == 0)
6896 {
6897 /* mod=0 and base=5 -> disp32, no base reg. */
6898 Assert(i64EffAddr == 0);
6899 i64EffAddr = (int32_t)u32Disp;
6900 idxGstRegBase = UINT8_MAX;
6901 }
6902 }
6903
6904 /*
6905 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
6906 * the start of the function.
6907 */
6908 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
6909 {
6910 if (f64Bit)
6911 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint64_t)i64EffAddr);
6912 else
6913 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint32_t)i64EffAddr);
6914 return off;
6915 }
6916
6917 /*
6918 * Now emit code that calculates:
6919 * idxRegRet = (uint64_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
6920 * or if !f64Bit:
6921 * idxRegRet = (uint32_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
6922 */
6923 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
6924 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
6925 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
6926 kIemNativeGstRegUse_ReadOnly);
6927 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
6928 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
6929 kIemNativeGstRegUse_ReadOnly);
6930
6931 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
6932 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
6933 {
6934 idxRegBase = idxRegIndex;
6935 idxRegIndex = UINT8_MAX;
6936 }
6937
6938#ifdef RT_ARCH_AMD64
6939 uint8_t bFinalAdj;
6940 if (!f64Bit || (int32_t)i64EffAddr == i64EffAddr)
6941 bFinalAdj = 0; /* likely */
6942 else
6943 {
6944 /* pop [rsp] with a problematic disp32 value. Split out the
6945 RSP offset and add it separately afterwards (bFinalAdj). */
6946 /** @todo testcase: pop [rsp] with problematic disp32 (mod4). */
6947 Assert(idxGstRegBase == X86_GREG_xSP);
6948 Assert(((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK) == X86_MOD_MEM4);
6949 bFinalAdj = (uint8_t)(uSibAndRspOffset >> 8);
6950 Assert(bFinalAdj != 0);
6951 i64EffAddr -= bFinalAdj;
6952 Assert((int32_t)i64EffAddr == i64EffAddr);
6953 }
6954 uint32_t const u32EffAddr = (uint32_t)i64EffAddr;
6955//pReNative->pInstrBuf[off++] = 0xcc;
6956
6957 if (idxRegIndex == UINT8_MAX)
6958 {
6959 if (u32EffAddr == 0)
6960 {
6961 /* mov ret, base */
6962 if (f64Bit)
6963 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRet, idxRegBase);
6964 else
6965 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
6966 }
6967 else
6968 {
6969 /* lea ret, [base + disp32] */
6970 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
6971 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6972 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8)
6973 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6974 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
6975 | (f64Bit ? X86_OP_REX_W : 0);
6976 pbCodeBuf[off++] = 0x8d;
6977 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
6978 if (idxRegBase != X86_GREG_x12 /*SIB*/)
6979 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
6980 else
6981 {
6982 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6983 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
6984 }
6985 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6986 if (bMod == X86_MOD_MEM4)
6987 {
6988 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6989 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6990 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6991 }
6992 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6993 }
6994 }
6995 else
6996 {
6997 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
6998 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6999 if (idxRegBase == UINT8_MAX)
7000 {
7001 /* lea ret, [(index64 << cShiftIndex) + disp32] */
7002 if (f64Bit || idxRegRet >= 8 || idxRegIndex >= 8)
7003 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
7004 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
7005 | (f64Bit ? X86_OP_REX_W : 0);
7006 pbCodeBuf[off++] = 0x8d;
7007 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
7008 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
7009 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
7010 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
7011 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
7012 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
7013 }
7014 else
7015 {
7016 /* lea ret, [(index64 << cShiftIndex) + base64 (+ disp32)] */
7017 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
7018 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
7019 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
7020 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
7021 | (f64Bit ? X86_OP_REX_W : 0);
7022 pbCodeBuf[off++] = 0x8d;
7023 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
7024 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
7025 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
7026 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
7027 if (bMod != X86_MOD_MEM0)
7028 {
7029 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
7030 if (bMod == X86_MOD_MEM4)
7031 {
7032 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
7033 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
7034 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
7035 }
7036 }
7037 }
7038 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7039 }
7040
7041 if (!bFinalAdj)
7042 { /* likely */ }
7043 else
7044 {
7045 Assert(f64Bit);
7046 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, bFinalAdj);
7047 }
7048
7049#elif defined(RT_ARCH_ARM64)
7050 if (i64EffAddr == 0)
7051 {
7052 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7053 if (idxRegIndex == UINT8_MAX)
7054 pu32CodeBuf[off++] = Armv8A64MkInstrMov(idxRegRet, idxRegBase, f64Bit);
7055 else if (idxRegBase != UINT8_MAX)
7056 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
7057 f64Bit, false /*fSetFlags*/, cShiftIndex);
7058 else
7059 {
7060 Assert(cShiftIndex != 0); /* See base = index swap above when shift is 0 and we have no base reg. */
7061 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, f64Bit);
7062 }
7063 }
7064 else
7065 {
7066 if (f64Bit)
7067 { /* likely */ }
7068 else
7069 i64EffAddr = (int32_t)i64EffAddr;
7070
7071 if (i64EffAddr < 4096 && i64EffAddr >= 0 && idxRegBase != UINT8_MAX)
7072 {
7073 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7074 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, i64EffAddr, f64Bit);
7075 }
7076 else if (i64EffAddr > -4096 && i64EffAddr < 0 && idxRegBase != UINT8_MAX)
7077 {
7078 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7079 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase, (uint32_t)-i64EffAddr, f64Bit);
7080 }
7081 else
7082 {
7083 if (f64Bit)
7084 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, i64EffAddr);
7085 else
7086 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, (uint32_t)i64EffAddr);
7087 if (idxRegBase != UINT8_MAX)
7088 {
7089 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7090 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, f64Bit);
7091 }
7092 }
7093 if (idxRegIndex != UINT8_MAX)
7094 {
7095 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7096 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
7097 f64Bit, false /*fSetFlags*/, cShiftIndex);
7098 }
7099 }
7100
7101#else
7102# error "port me"
7103#endif
7104
7105 if (idxRegIndex != UINT8_MAX)
7106 iemNativeRegFreeTmp(pReNative, idxRegIndex);
7107 if (idxRegBase != UINT8_MAX)
7108 iemNativeRegFreeTmp(pReNative, idxRegBase);
7109 iemNativeVarRegisterRelease(pReNative, idxVarRet);
7110 return off;
7111}
7112
7113
7114/*********************************************************************************************************************************
7115* Memory fetches and stores common *
7116*********************************************************************************************************************************/
7117
7118typedef enum IEMNATIVEMITMEMOP
7119{
7120 kIemNativeEmitMemOp_Store = 0,
7121 kIemNativeEmitMemOp_Fetch,
7122 kIemNativeEmitMemOp_Fetch_Zx_U16,
7123 kIemNativeEmitMemOp_Fetch_Zx_U32,
7124 kIemNativeEmitMemOp_Fetch_Zx_U64,
7125 kIemNativeEmitMemOp_Fetch_Sx_U16,
7126 kIemNativeEmitMemOp_Fetch_Sx_U32,
7127 kIemNativeEmitMemOp_Fetch_Sx_U64
7128} IEMNATIVEMITMEMOP;
7129
7130/** Emits code for IEM_MC_FETCH_MEM_U8/16/32/64 and IEM_MC_STORE_MEM_U8/16/32/64,
7131 * and IEM_MC_FETCH_MEM_FLAT_U8/16/32/64 and IEM_MC_STORE_MEM_FLAT_U8/16/32/64
7132 * (with iSegReg = UINT8_MAX). */
7133template<uint8_t const a_cbMem, uint32_t const a_fAlignMaskAndCtl, IEMNATIVEMITMEMOP const a_enmOp, bool a_fFlat = false>
7134DECL_INLINE_THROW(uint32_t)
7135iemNativeEmitMemFetchStoreDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue, uint8_t iSegReg,
7136 uint8_t idxVarGCPtrMem, uintptr_t pfnFunction, uint8_t idxInstr, uint8_t offDisp = 0)
7137{
7138 /*
7139 * Assert sanity.
7140 */
7141 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
7142 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
7143 Assert( a_enmOp != kIemNativeEmitMemOp_Store
7144 || pVarValue->enmKind == kIemNativeVarKind_Immediate
7145 || pVarValue->enmKind == kIemNativeVarKind_Stack);
7146 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
7147 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
7148 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
7149 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
7150 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7151 Assert(!a_fFlat ? iSegReg < 6 : iSegReg == UINT8_MAX);
7152 AssertCompile( a_cbMem == 1 || a_cbMem == 2 || a_cbMem == 4 || a_cbMem == 8
7153 || a_cbMem == sizeof(RTUINT128U) || a_cbMem == sizeof(RTUINT256U));
7154 AssertCompile(!(a_fAlignMaskAndCtl & ~(UINT32_C(0xff) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE)));
7155 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
7156#ifdef VBOX_STRICT
7157 if (iSegReg == UINT8_MAX)
7158 {
7159 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
7160 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
7161 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
7162 switch (a_cbMem)
7163 {
7164 case 1:
7165 Assert( pfnFunction
7166 == ( a_enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU8
7167 : a_enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
7168 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
7169 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
7170 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
7171 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16
7172 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32
7173 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64
7174 : UINT64_C(0xc000b000a0009000) ));
7175 Assert(!a_fAlignMaskAndCtl);
7176 break;
7177 case 2:
7178 Assert( pfnFunction
7179 == ( a_enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU16
7180 : a_enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
7181 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
7182 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
7183 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32
7184 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64
7185 : UINT64_C(0xc000b000a0009000) ));
7186 Assert(a_fAlignMaskAndCtl <= 1);
7187 break;
7188 case 4:
7189 Assert( pfnFunction
7190 == ( a_enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU32
7191 : a_enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
7192 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
7193 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64
7194 : UINT64_C(0xc000b000a0009000) ));
7195 Assert(a_fAlignMaskAndCtl <= 3);
7196 break;
7197 case 8:
7198 Assert( pfnFunction
7199 == ( a_enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU64
7200 : a_enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU64
7201 : UINT64_C(0xc000b000a0009000) ));
7202 Assert(a_fAlignMaskAndCtl <= 7);
7203 break;
7204 case sizeof(RTUINT128U):
7205 Assert( ( a_enmOp == kIemNativeEmitMemOp_Fetch
7206 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128
7207 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse
7208 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc))
7209 || ( a_enmOp == kIemNativeEmitMemOp_Store
7210 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse
7211 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128NoAc)));
7212 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse
7213 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse
7214 ? (a_fAlignMaskAndCtl & (IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE)) && (a_fAlignMaskAndCtl & 0xff) == 15
7215 : a_fAlignMaskAndCtl <= 15U);
7216 break;
7217 case sizeof(RTUINT256U):
7218 Assert( ( a_enmOp == kIemNativeEmitMemOp_Fetch
7219 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc
7220 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx))
7221 || ( a_enmOp == kIemNativeEmitMemOp_Store
7222 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256NoAc
7223 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx)));
7224 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx
7225 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx
7226 ? (a_fAlignMaskAndCtl & IEM_MEMMAP_F_ALIGN_GP) && (a_fAlignMaskAndCtl & 0xff) == 31
7227 : a_fAlignMaskAndCtl <= 31);
7228 break;
7229 }
7230 }
7231 else
7232 {
7233 Assert(iSegReg < 6);
7234 switch (a_cbMem)
7235 {
7236 case 1:
7237 Assert( pfnFunction
7238 == ( a_enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU8
7239 : a_enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU8
7240 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8
7241 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8
7242 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8
7243 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16
7244 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32
7245 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64
7246 : UINT64_C(0xc000b000a0009000) ));
7247 Assert(!a_fAlignMaskAndCtl);
7248 break;
7249 case 2:
7250 Assert( pfnFunction
7251 == ( a_enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU16
7252 : a_enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU16
7253 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16
7254 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16
7255 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32
7256 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64
7257 : UINT64_C(0xc000b000a0009000) ));
7258 Assert(a_fAlignMaskAndCtl <= 1);
7259 break;
7260 case 4:
7261 Assert( pfnFunction
7262 == ( a_enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU32
7263 : a_enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU32
7264 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32
7265 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64
7266 : UINT64_C(0xc000b000a0009000) ));
7267 Assert(a_fAlignMaskAndCtl <= 3);
7268 break;
7269 case 8:
7270 Assert( pfnFunction
7271 == ( a_enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU64
7272 : a_enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU64
7273 : UINT64_C(0xc000b000a0009000) ));
7274 Assert(a_fAlignMaskAndCtl <= 7);
7275 break;
7276 case sizeof(RTUINT128U):
7277 Assert( ( a_enmOp == kIemNativeEmitMemOp_Fetch
7278 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128
7279 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse
7280 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128NoAc))
7281 || ( a_enmOp == kIemNativeEmitMemOp_Store
7282 && ( pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse
7283 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128NoAc)));
7284 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse
7285 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse
7286 ? (a_fAlignMaskAndCtl & (IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE)) && (a_fAlignMaskAndCtl & 0xff) == 15
7287 : a_fAlignMaskAndCtl <= 15);
7288 break;
7289 case sizeof(RTUINT256U):
7290 Assert( ( a_enmOp == kIemNativeEmitMemOp_Fetch
7291 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256NoAc
7292 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx))
7293 || ( a_enmOp == kIemNativeEmitMemOp_Store
7294 && ( pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256NoAc
7295 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx)));
7296 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx
7297 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx
7298 ? (a_fAlignMaskAndCtl & IEM_MEMMAP_F_ALIGN_GP) && (a_fAlignMaskAndCtl & 0xff) == 31
7299 : a_fAlignMaskAndCtl <= 31);
7300 break;
7301 }
7302 }
7303#endif
7304
7305#ifdef VBOX_STRICT
7306 /*
7307 * Check that the fExec flags we've got make sense.
7308 */
7309 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
7310#endif
7311
7312 /*
7313 * To keep things simple we have to commit any pending writes first as we
7314 * may end up making calls.
7315 */
7316 /** @todo we could postpone this till we make the call and reload the
7317 * registers after returning from the call. Not sure if that's sensible or
7318 * not, though. */
7319#ifndef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7320 off = iemNativeRegFlushPendingWrites(pReNative, off);
7321#else
7322 /* The program counter is treated differently for now. */
7323 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc));
7324#endif
7325
7326#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7327 /*
7328 * Move/spill/flush stuff out of call-volatile registers.
7329 * This is the easy way out. We could contain this to the tlb-miss branch
7330 * by saving and restoring active stuff here.
7331 */
7332 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
7333#endif
7334
7335 /*
7336 * Define labels and allocate the result register (trying for the return
7337 * register if we can).
7338 */
7339 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
7340 RT_CONSTEXPR
7341 bool const fSimdRegValues = a_cbMem == sizeof(RTUINT128U) || a_cbMem == sizeof(RTUINT256U);
7342 uint8_t const idxRegValueFetch = a_enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
7343 : fSimdRegValues
7344 ? iemNativeVarSimdRegisterAcquire(pReNative, idxVarValue, &off)
7345 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
7346 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
7347 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
7348 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, a_fFlat, a_cbMem, offDisp);
7349 uint8_t const idxRegValueStore = a_enmOp != kIemNativeEmitMemOp_Store
7350 || TlbState.fSkip
7351 || pVarValue->enmKind == kIemNativeVarKind_Immediate
7352 ? UINT8_MAX
7353 : fSimdRegValues
7354 ? iemNativeVarSimdRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
7355 : iemNativeVarRegisterAcquireInited(pReNative, idxVarValue, &off);
7356 uint32_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
7357 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
7358 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
7359 : UINT32_MAX;
7360
7361 /*
7362 * Jump to the TLB lookup code.
7363 */
7364 if (!TlbState.fSkip)
7365 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
7366
7367 /*
7368 * TlbMiss:
7369 *
7370 * Call helper to do the fetching.
7371 * We flush all guest register shadow copies here.
7372 */
7373 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
7374
7375#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7376 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7377#else
7378 RT_NOREF(idxInstr);
7379#endif
7380
7381#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7382 if (pReNative->Core.offPc)
7383 {
7384 /*
7385 * Update the program counter but restore it at the end of the TlbMiss branch.
7386 * This should allow delaying more program counter updates for the TlbLookup and hit paths
7387 * which are hopefully much more frequent, reducing the amount of memory accesses.
7388 */
7389 /* Allocate a temporary PC register. */
7390/** @todo r=bird: This would technically need to be done up front as it's a register allocation. */
7391 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
7392 kIemNativeGstRegUse_ForUpdate);
7393
7394 /* Perform the addition and store the result. */
7395 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
7396 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Pc>(pReNative, off, idxPcReg);
7397# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
7398 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
7399# endif
7400
7401 /* Free and flush the PC register. */
7402 iemNativeRegFreeTmp(pReNative, idxPcReg);
7403 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
7404 }
7405#endif
7406
7407#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7408 /* Save variables in volatile registers. */
7409 uint32_t const fHstGprsNotToSave = TlbState.getRegsNotToSave()
7410 | (idxRegMemResult < 32 ? RT_BIT_32(idxRegMemResult) : 0)
7411#ifdef _MSC_VER /* Workaround for stupid compiler (2019). */
7412 | (idxRegValueFetch < 32 && !fSimdRegValues ? RT_BIT_32(idxRegValueFetch & 0x1f) : 0);
7413#else
7414 | (idxRegValueFetch < 32 && !fSimdRegValues ? RT_BIT_32(idxRegValueFetch) : 0);
7415#endif
7416 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstGprsNotToSave);
7417#endif
7418
7419 /* IEMNATIVE_CALL_ARG2/3_GREG = uValue (idxVarValue) - if store */
7420 uint32_t fVolGregMask = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
7421 if RT_CONSTEXPR_IF(fSimdRegValues)
7422 {
7423 /*
7424 * For SIMD based variables we pass the reference on the stack for both fetches and stores.
7425 *
7426 * Note! There was a register variable assigned to the variable for the TlbLookup case above
7427 * which must not be freed or the value loaded into the register will not be synced into the register
7428 * further down the road because the variable doesn't know it had a variable assigned.
7429 *
7430 * Note! For loads it is not required to sync what is in the assigned register with the stack slot
7431 * as it will be overwritten anyway.
7432 */
7433 uint8_t const idxRegArgValue = a_fFlat ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
7434 off = iemNativeEmitLoadArgGregWithSimdVarAddrForMemAccess(pReNative, off, idxRegArgValue, idxVarValue,
7435 a_enmOp == kIemNativeEmitMemOp_Store /*fSyncRegWithStack*/);
7436 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
7437 }
7438 else if RT_CONSTEXPR_IF(a_enmOp == kIemNativeEmitMemOp_Store)
7439 {
7440 uint8_t const idxRegArgValue = a_fFlat ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
7441 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, idxRegArgValue, idxVarValue, 0 /*cbAppend*/,
7442#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7443 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
7444#else
7445 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
7446 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
7447#endif
7448 }
7449
7450 /* IEMNATIVE_CALL_ARG1_GREG = GCPtrMem */
7451 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarGCPtrMem, offDisp /*cbAppend*/,
7452#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7453 fVolGregMask);
7454#else
7455 fVolGregMask, true /*fSpilledVarsInvolatileRegs*/);
7456#endif
7457
7458 if RT_CONSTEXPR_IF(!a_fFlat)
7459 {
7460 /* IEMNATIVE_CALL_ARG2_GREG = iSegReg */
7461 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
7462 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, iSegReg);
7463 }
7464
7465#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
7466 /* Do delayed EFLAGS calculations. */
7467 if RT_CONSTEXPR_IF(a_enmOp == kIemNativeEmitMemOp_Store || fSimdRegValues)
7468 {
7469 if RT_CONSTEXPR_IF(a_fFlat)
7470 off = iemNativeDoPostponedEFlagsAtTlbMiss< RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG)
7471 | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)>(pReNative, off, &TlbState,
7472 fHstGprsNotToSave);
7473 else
7474 off = iemNativeDoPostponedEFlagsAtTlbMiss< RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG)
7475 | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
7476 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG)>(pReNative, off, &TlbState,
7477 fHstGprsNotToSave);
7478 }
7479 else if RT_CONSTEXPR_IF(a_fFlat)
7480 off = iemNativeDoPostponedEFlagsAtTlbMiss< RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG)>(pReNative, off, &TlbState,
7481 fHstGprsNotToSave);
7482 else
7483 off = iemNativeDoPostponedEFlagsAtTlbMiss< RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG)
7484 | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)>(pReNative, off, &TlbState,
7485 fHstGprsNotToSave);
7486#endif
7487
7488 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
7489 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7490
7491 /* Done setting up parameters, make the call. */
7492 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnFunction);
7493
7494 /*
7495 * Put the result in the right register if this is a fetch.
7496 */
7497 if RT_CONSTEXPR_IF(a_enmOp != kIemNativeEmitMemOp_Store)
7498 {
7499 if RT_CONSTEXPR_IF(fSimdRegValues)
7500 {
7501 Assert(a_enmOp == kIemNativeEmitMemOp_Fetch);
7502
7503 /* Sync the value on the stack with the host register assigned to the variable. */
7504 off = iemNativeEmitSimdVarSyncStackToRegister(pReNative, off, idxVarValue);
7505 }
7506 else
7507 {
7508 Assert(idxRegValueFetch == pVarValue->idxReg);
7509 if (idxRegValueFetch != IEMNATIVE_CALL_RET_GREG)
7510 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegValueFetch, IEMNATIVE_CALL_RET_GREG);
7511 }
7512 }
7513
7514#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7515 /* Restore variables and guest shadow registers to volatile registers. */
7516 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstGprsNotToSave);
7517 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
7518#endif
7519
7520#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7521 if (pReNative->Core.offPc)
7522 {
7523 /*
7524 * Time to restore the program counter to its original value.
7525 */
7526 /* Allocate a temporary PC register. */
7527 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
7528 kIemNativeGstRegUse_ForUpdate);
7529
7530 /* Restore the original value. */
7531 off = iemNativeEmitSubGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
7532 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Pc>(pReNative, off, idxPcReg);
7533
7534 /* Free and flush the PC register. */
7535 iemNativeRegFreeTmp(pReNative, idxPcReg);
7536 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
7537 }
7538#endif
7539
7540#ifdef IEMNATIVE_WITH_TLB_LOOKUP
7541 if (!TlbState.fSkip)
7542 {
7543 /* end of TlbMiss - Jump to the done label. */
7544 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
7545 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
7546
7547 /*
7548 * TlbLookup:
7549 */
7550 off = iemNativeEmitTlbLookup<true, a_cbMem, a_fAlignMaskAndCtl,
7551 a_enmOp == kIemNativeEmitMemOp_Store ? IEM_ACCESS_TYPE_WRITE : IEM_ACCESS_TYPE_READ
7552 >(pReNative, off, &TlbState, iSegReg, idxLabelTlbLookup, idxLabelTlbMiss,
7553 idxRegMemResult, offDisp);
7554
7555 /*
7556 * Emit code to do the actual storing / fetching.
7557 */
7558 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
7559# ifdef IEM_WITH_TLB_STATISTICS
7560 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
7561 a_enmOp == kIemNativeEmitMemOp_Store
7562 ? RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForFetch)
7563 : RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStore));
7564# endif
7565 switch (a_enmOp)
7566 {
7567 case kIemNativeEmitMemOp_Store:
7568 if (pVarValue->enmKind != kIemNativeVarKind_Immediate)
7569 {
7570 switch (a_cbMem)
7571 {
7572 case 1:
7573 off = iemNativeEmitStoreGpr8ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7574 break;
7575 case 2:
7576 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7577 break;
7578 case 4:
7579 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7580 break;
7581 case 8:
7582 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7583 break;
7584 case sizeof(RTUINT128U):
7585 off = iemNativeEmitStoreVecRegByGprU128Ex(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7586 break;
7587 case sizeof(RTUINT256U):
7588 off = iemNativeEmitStoreVecRegByGprU256Ex(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7589 break;
7590 default:
7591 AssertFailed();
7592 }
7593 }
7594 else
7595 {
7596 switch (a_cbMem)
7597 {
7598 case 1:
7599 off = iemNativeEmitStoreImm8ByGprEx(pCodeBuf, off, (uint8_t)pVarValue->u.uValue,
7600 idxRegMemResult, TlbState.idxReg1);
7601 break;
7602 case 2:
7603 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
7604 idxRegMemResult, TlbState.idxReg1);
7605 break;
7606 case 4:
7607 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
7608 idxRegMemResult, TlbState.idxReg1);
7609 break;
7610 case 8:
7611 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue,
7612 idxRegMemResult, TlbState.idxReg1);
7613 break;
7614 default:
7615 AssertFailed();
7616 }
7617 }
7618 break;
7619
7620 case kIemNativeEmitMemOp_Fetch:
7621 case kIemNativeEmitMemOp_Fetch_Zx_U16:
7622 case kIemNativeEmitMemOp_Fetch_Zx_U32:
7623 case kIemNativeEmitMemOp_Fetch_Zx_U64:
7624 switch (a_cbMem)
7625 {
7626 case 1:
7627 off = iemNativeEmitLoadGprByGprU8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7628 break;
7629 case 2:
7630 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7631 break;
7632 case 4:
7633 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7634 break;
7635 case 8:
7636 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7637 break;
7638 case sizeof(RTUINT128U):
7639 /*
7640 * No need to sync back the register with the stack, this is done by the generic variable handling
7641 * code if there is a register assigned to a variable and the stack must be accessed.
7642 */
7643 off = iemNativeEmitLoadVecRegByGprU128Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7644 break;
7645 case sizeof(RTUINT256U):
7646 /*
7647 * No need to sync back the register with the stack, this is done by the generic variable handling
7648 * code if there is a register assigned to a variable and the stack must be accessed.
7649 */
7650 off = iemNativeEmitLoadVecRegByGprU256Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7651 break;
7652 default:
7653 AssertFailed();
7654 }
7655 break;
7656
7657 case kIemNativeEmitMemOp_Fetch_Sx_U16:
7658 Assert(a_cbMem == 1);
7659 off = iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7660 break;
7661
7662 case kIemNativeEmitMemOp_Fetch_Sx_U32:
7663 Assert(a_cbMem == 1 || a_cbMem == 2);
7664 if (a_cbMem == 1)
7665 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7666 else
7667 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7668 break;
7669
7670 case kIemNativeEmitMemOp_Fetch_Sx_U64:
7671 switch (a_cbMem)
7672 {
7673 case 1:
7674 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7675 break;
7676 case 2:
7677 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7678 break;
7679 case 4:
7680 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7681 break;
7682 default:
7683 AssertFailed();
7684 }
7685 break;
7686
7687 default:
7688 AssertFailed();
7689 }
7690
7691 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
7692
7693 /*
7694 * TlbDone:
7695 */
7696 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
7697
7698 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
7699
7700# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7701 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
7702 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
7703# endif
7704 }
7705#else
7706 RT_NOREF(idxLabelTlbMiss);
7707#endif
7708
7709 if (idxRegValueFetch != UINT8_MAX || idxRegValueStore != UINT8_MAX)
7710 iemNativeVarRegisterRelease(pReNative, idxVarValue);
7711 return off;
7712}
7713
7714
7715
7716/*********************************************************************************************************************************
7717* Memory fetches (IEM_MEM_FETCH_XXX). *
7718*********************************************************************************************************************************/
7719
7720/* 8-bit segmented: */
7721#define IEM_MC_FETCH_MEM_U8(a_u8Dst, a_iSeg, a_GCPtrMem) \
7722 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch>( \
7723 pReNative, off, a_u8Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
7724
7725#define IEM_MC_FETCH_MEM_U8_ZX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
7726 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U16>( \
7727 pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
7728
7729#define IEM_MC_FETCH_MEM_U8_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7730 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U32>( \
7731 pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
7732
7733#define IEM_MC_FETCH_MEM_U8_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7734 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U64>( \
7735 pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
7736
7737#define IEM_MC_FETCH_MEM_U8_SX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
7738 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U16>(\
7739 pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16, pCallEntry->idxInstr)
7740
7741#define IEM_MC_FETCH_MEM_U8_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7742 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U32>(\
7743 pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32, pCallEntry->idxInstr)
7744
7745#define IEM_MC_FETCH_MEM_U8_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7746 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U64>(\
7747 pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64, pCallEntry->idxInstr)
7748
7749/* 16-bit segmented: */
7750#define IEM_MC_FETCH_MEM_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
7751 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch>(\
7752 pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
7753
7754#define IEM_MC_FETCH_MEM_U16_DISP(a_u16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
7755 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch>(\
7756 pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr, a_offDisp)
7757
7758#define IEM_MC_FETCH_MEM_U16_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7759 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32>(\
7760 pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
7761
7762#define IEM_MC_FETCH_MEM_U16_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7763 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64>(\
7764 pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
7765
7766#define IEM_MC_FETCH_MEM_U16_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7767 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32>(\
7768 pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7769
7770#define IEM_MC_FETCH_MEM_U16_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7771 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64>(\
7772 pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64, pCallEntry->idxInstr)
7773
7774
7775/* 32-bit segmented: */
7776#define IEM_MC_FETCH_MEM_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7777 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch>(\
7778 pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
7779
7780#define IEM_MC_FETCH_MEM_U32_DISP(a_u32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
7781 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch>(\
7782 pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7783
7784#define IEM_MC_FETCH_MEM_U32_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7785 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64>(\
7786 pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
7787
7788#define IEM_MC_FETCH_MEM_U32_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7789 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64>(\
7790 pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64, pCallEntry->idxInstr)
7791
7792#define IEM_MC_FETCH_MEM_I16(a_i16Dst, a_iSeg, a_GCPtrMem) \
7793 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32>(\
7794 pReNative, off, a_i16Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7795
7796#define IEM_MC_FETCH_MEM_I16_DISP(a_i16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
7797 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32>(\
7798 pReNative, off, a_i16Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr, \
7799 a_offDisp)
7800
7801#define IEM_MC_FETCH_MEM_I32(a_i32Dst, a_iSeg, a_GCPtrMem) \
7802 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch>(\
7803 pReNative, off, a_i32Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
7804
7805#define IEM_MC_FETCH_MEM_I32_DISP(a_i32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
7806 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch>(\
7807 pReNative, off, a_i32Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7808
7809#define IEM_MC_FETCH_MEM_I64(a_i64Dst, a_iSeg, a_GCPtrMem) \
7810 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(int64_t), sizeof(int64_t) - 1, kIemNativeEmitMemOp_Fetch>(\
7811 pReNative, off, a_i64Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
7812
7813AssertCompileSize(RTFLOAT32U, sizeof(uint32_t));
7814#define IEM_MC_FETCH_MEM_R32(a_r32Dst, a_iSeg, a_GCPtrMem) \
7815 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTFLOAT32U), sizeof(RTFLOAT32U) - 1, kIemNativeEmitMemOp_Fetch>(\
7816 pReNative, off, a_r32Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
7817
7818
7819/* 64-bit segmented: */
7820#define IEM_MC_FETCH_MEM_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7821 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch>(\
7822 pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
7823
7824AssertCompileSize(RTFLOAT64U, sizeof(uint64_t));
7825#define IEM_MC_FETCH_MEM_R64(a_r64Dst, a_iSeg, a_GCPtrMem) \
7826 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTFLOAT64U), sizeof(RTFLOAT64U) - 1, kIemNativeEmitMemOp_Fetch>(\
7827 pReNative, off, a_r64Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
7828
7829
7830/* 8-bit flat: */
7831#define IEM_MC_FETCH_MEM_FLAT_U8(a_u8Dst, a_GCPtrMem) \
7832 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch, true>(\
7833 pReNative, off, a_u8Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7834
7835#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U16(a_u16Dst, a_GCPtrMem) \
7836 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U16, true>(\
7837 pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7838
7839#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U32(a_u32Dst, a_GCPtrMem) \
7840 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U32, true>(\
7841 pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7842
7843#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U64(a_u64Dst, a_GCPtrMem) \
7844 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U64, true>(\
7845 pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7846
7847#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U16(a_u16Dst, a_GCPtrMem) \
7848 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U16, true>(\
7849 pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16, pCallEntry->idxInstr)
7850
7851#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U32(a_u32Dst, a_GCPtrMem) \
7852 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U32, true>(\
7853 pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32, pCallEntry->idxInstr)
7854
7855#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U64(a_u64Dst, a_GCPtrMem) \
7856 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U64, true>(\
7857 pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64, pCallEntry->idxInstr)
7858
7859
7860/* 16-bit flat: */
7861#define IEM_MC_FETCH_MEM_FLAT_U16(a_u16Dst, a_GCPtrMem) \
7862 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7863 pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
7864
7865#define IEM_MC_FETCH_MEM_FLAT_U16_DISP(a_u16Dst, a_GCPtrMem, a_offDisp) \
7866 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7867 pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr, a_offDisp)
7868
7869#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U32(a_u32Dst, a_GCPtrMem) \
7870 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, true>(\
7871 pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
7872
7873#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U64(a_u64Dst, a_GCPtrMem) \
7874 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, true>(\
7875 pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
7876
7877#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U32(a_u32Dst, a_GCPtrMem) \
7878 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, true>(\
7879 pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7880
7881#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U64(a_u64Dst, a_GCPtrMem) \
7882 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, true>(\
7883 pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64, pCallEntry->idxInstr)
7884
7885/* 32-bit flat: */
7886#define IEM_MC_FETCH_MEM_FLAT_U32(a_u32Dst, a_GCPtrMem) \
7887 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7888 pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7889
7890#define IEM_MC_FETCH_MEM_FLAT_U32_DISP(a_u32Dst, a_GCPtrMem, a_offDisp) \
7891 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7892 pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7893
7894#define IEM_MC_FETCH_MEM_FLAT_U32_ZX_U64(a_u64Dst, a_GCPtrMem) \
7895 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, true>(\
7896 pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7897
7898#define IEM_MC_FETCH_MEM_FLAT_U32_SX_U64(a_u64Dst, a_GCPtrMem) \
7899 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, true>(\
7900 pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64, pCallEntry->idxInstr)
7901
7902#define IEM_MC_FETCH_MEM_FLAT_I16(a_i16Dst, a_GCPtrMem) \
7903 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, true>(\
7904 pReNative, off, a_i16Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7905
7906#define IEM_MC_FETCH_MEM_FLAT_I16_DISP(a_i16Dst, a_GCPtrMem, a_offDisp) \
7907 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, true>(\
7908 pReNative, off, a_i16Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr, a_offDisp)
7909
7910#define IEM_MC_FETCH_MEM_FLAT_I32(a_i32Dst, a_GCPtrMem) \
7911 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7912 pReNative, off, a_i32Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7913
7914#define IEM_MC_FETCH_MEM_FLAT_I32_DISP(a_i32Dst, a_GCPtrMem, a_offDisp) \
7915 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7916 pReNative, off, a_i32Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7917
7918#define IEM_MC_FETCH_MEM_FLAT_I64(a_i64Dst, a_GCPtrMem) \
7919 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(int64_t), sizeof(int64_t) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7920 pReNative, off, a_i64Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
7921
7922#define IEM_MC_FETCH_MEM_FLAT_R32(a_r32Dst, a_GCPtrMem) \
7923 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTFLOAT32U), sizeof(RTFLOAT32U) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7924 pReNative, off, a_r32Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7925
7926
7927/* 64-bit flat: */
7928#define IEM_MC_FETCH_MEM_FLAT_U64(a_u64Dst, a_GCPtrMem) \
7929 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7930 pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
7931
7932#define IEM_MC_FETCH_MEM_FLAT_R64(a_r64Dst, a_GCPtrMem) \
7933 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTFLOAT64U), sizeof(RTFLOAT64U) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7934 pReNative, off, a_r64Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
7935
7936
7937/* 128-bit segmented: */
7938#define IEM_MC_FETCH_MEM_U128(a_u128Dst, a_iSeg, a_GCPtrMem) \
7939 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch>(\
7940 pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU128, pCallEntry->idxInstr)
7941
7942#define IEM_MC_FETCH_MEM_U128_ALIGN_SSE(a_u128Dst, a_iSeg, a_GCPtrMem) \
7943 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT128U), \
7944 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7945 kIemNativeEmitMemOp_Fetch>(\
7946 pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse, pCallEntry->idxInstr)
7947
7948AssertCompileSize(X86XMMREG, sizeof(RTUINT128U));
7949#define IEM_MC_FETCH_MEM_XMM_ALIGN_SSE(a_uXmmDst, a_iSeg, a_GCPtrMem) \
7950 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(X86XMMREG), \
7951 (sizeof(X86XMMREG) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7952 kIemNativeEmitMemOp_Fetch>(\
7953 pReNative, off, a_uXmmDst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse, pCallEntry->idxInstr)
7954
7955#define IEM_MC_FETCH_MEM_U128_NO_AC(a_u128Dst, a_iSeg, a_GCPtrMem) \
7956 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch>(\
7957 pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU128NoAc, pCallEntry->idxInstr)
7958
7959#define IEM_MC_FETCH_MEM_XMM_NO_AC(a_u128Dst, a_iSeg, a_GCPtrMem) \
7960 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(X86XMMREG), sizeof(X86XMMREG) - 1, kIemNativeEmitMemOp_Fetch>(\
7961 pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU128NoAc, pCallEntry->idxInstr)
7962
7963
7964/* 128-bit flat: */
7965#define IEM_MC_FETCH_MEM_FLAT_U128(a_u128Dst, a_GCPtrMem) \
7966 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7967 pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU128, pCallEntry->idxInstr)
7968
7969#define IEM_MC_FETCH_MEM_FLAT_U128_ALIGN_SSE(a_u128Dst, a_GCPtrMem) \
7970 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT128U), \
7971 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7972 kIemNativeEmitMemOp_Fetch, true>(\
7973 pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse, pCallEntry->idxInstr)
7974
7975#define IEM_MC_FETCH_MEM_FLAT_XMM_ALIGN_SSE(a_uXmmDst, a_GCPtrMem) \
7976 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(X86XMMREG), \
7977 (sizeof(X86XMMREG) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7978 kIemNativeEmitMemOp_Fetch, true>(\
7979 pReNative, off, a_uXmmDst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse, pCallEntry->idxInstr)
7980
7981#define IEM_MC_FETCH_MEM_FLAT_U128_NO_AC(a_u128Dst, a_GCPtrMem) \
7982 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7983 pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc, pCallEntry->idxInstr)
7984
7985#define IEM_MC_FETCH_MEM_FLAT_XMM_NO_AC(a_uXmmDst, a_GCPtrMem) \
7986 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(X86XMMREG), sizeof(X86XMMREG) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7987 pReNative, off, a_uXmmDst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc, pCallEntry->idxInstr)
7988
7989/* 256-bit segmented: */
7990#define IEM_MC_FETCH_MEM_U256(a_u256Dst, a_iSeg, a_GCPtrMem) \
7991 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch>(\
7992 pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
7993
7994#define IEM_MC_FETCH_MEM_U256_NO_AC(a_u256Dst, a_iSeg, a_GCPtrMem) \
7995 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch>(\
7996 pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
7997
7998#define IEM_MC_FETCH_MEM_U256_ALIGN_AVX(a_u256Dst, a_iSeg, a_GCPtrMem) \
7999 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT256U), \
8000 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, \
8001 kIemNativeEmitMemOp_Fetch>(\
8002 pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx, pCallEntry->idxInstr)
8003
8004#define IEM_MC_FETCH_MEM_YMM_NO_AC(a_u256Dst, a_iSeg, a_GCPtrMem) \
8005 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(X86YMMREG), sizeof(X86YMMREG) - 1, kIemNativeEmitMemOp_Fetch>(\
8006 pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
8007
8008
8009/* 256-bit flat: */
8010#define IEM_MC_FETCH_MEM_FLAT_U256(a_u256Dst, a_GCPtrMem) \
8011 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, true>(\
8012 pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
8013
8014#define IEM_MC_FETCH_MEM_FLAT_U256_NO_AC(a_u256Dst, a_GCPtrMem) \
8015 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, true>(\
8016 pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
8017
8018#define IEM_MC_FETCH_MEM_FLAT_U256_ALIGN_AVX(a_u256Dst, a_GCPtrMem) \
8019 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT256U), \
8020 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, \
8021 kIemNativeEmitMemOp_Fetch, true>(\
8022 pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx, pCallEntry->idxInstr)
8023
8024#define IEM_MC_FETCH_MEM_FLAT_YMM_NO_AC(a_uYmmDst, a_GCPtrMem) \
8025 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(X86YMMREG), sizeof(X86YMMREG) - 1, kIemNativeEmitMemOp_Fetch, true>(\
8026 pReNative, off, a_uYmmDst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
8027
8028
8029
8030/*********************************************************************************************************************************
8031* Memory stores (IEM_MEM_STORE_XXX). *
8032*********************************************************************************************************************************/
8033
8034#define IEM_MC_STORE_MEM_U8(a_iSeg, a_GCPtrMem, a_u8Value) \
8035 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Store>(\
8036 pReNative, off, a_u8Value, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
8037
8038#define IEM_MC_STORE_MEM_U16(a_iSeg, a_GCPtrMem, a_u16Value) \
8039 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store>(\
8040 pReNative, off, a_u16Value, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
8041
8042#define IEM_MC_STORE_MEM_U32(a_iSeg, a_GCPtrMem, a_u32Value) \
8043 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store>(\
8044 pReNative, off, a_u32Value, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
8045
8046#define IEM_MC_STORE_MEM_U64(a_iSeg, a_GCPtrMem, a_u64Value) \
8047 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store>(\
8048 pReNative, off, a_u64Value, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
8049
8050
8051#define IEM_MC_STORE_MEM_FLAT_U8(a_GCPtrMem, a_u8Value) \
8052 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Store, true>(\
8053 pReNative, off, a_u8Value, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
8054
8055#define IEM_MC_STORE_MEM_FLAT_U16(a_GCPtrMem, a_u16Value) \
8056 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, true>(\
8057 pReNative, off, a_u16Value, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
8058
8059#define IEM_MC_STORE_MEM_FLAT_U32(a_GCPtrMem, a_u32Value) \
8060 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, true>(\
8061 pReNative, off, a_u32Value, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
8062
8063#define IEM_MC_STORE_MEM_FLAT_U64(a_GCPtrMem, a_u64Value) \
8064 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, true>(\
8065 pReNative, off, a_u64Value, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
8066
8067
8068#define IEM_MC_STORE_MEM_U8_CONST(a_iSeg, a_GCPtrMem, a_u8ConstValue) \
8069 off = iemNativeEmitMemStoreConstDataCommon<sizeof(uint8_t)>(\
8070 pReNative, off, a_u8ConstValue, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
8071
8072#define IEM_MC_STORE_MEM_U16_CONST(a_iSeg, a_GCPtrMem, a_u16ConstValue) \
8073 off = iemNativeEmitMemStoreConstDataCommon<sizeof(uint16_t)>(\
8074 pReNative, off, a_u16ConstValue, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
8075
8076#define IEM_MC_STORE_MEM_U32_CONST(a_iSeg, a_GCPtrMem, a_u32ConstValue) \
8077 off = iemNativeEmitMemStoreConstDataCommon<sizeof(uint32_t)>(\
8078 pReNative, off, a_u32ConstValue, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
8079
8080#define IEM_MC_STORE_MEM_U64_CONST(a_iSeg, a_GCPtrMem, a_u64ConstValue) \
8081 off = iemNativeEmitMemStoreConstDataCommon<sizeof(uint64_t)>(\
8082 pReNative, off, a_u64ConstValue, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
8083
8084
8085#define IEM_MC_STORE_MEM_FLAT_U8_CONST(a_GCPtrMem, a_u8ConstValue) \
8086 off = iemNativeEmitMemStoreConstDataCommon<sizeof(uint8_t), true>(\
8087 pReNative, off, a_u8ConstValue, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
8088
8089#define IEM_MC_STORE_MEM_FLAT_U16_CONST(a_GCPtrMem, a_u16ConstValue) \
8090 off = iemNativeEmitMemStoreConstDataCommon<sizeof(uint16_t), true>(\
8091 pReNative, off, a_u16ConstValue, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
8092
8093#define IEM_MC_STORE_MEM_FLAT_U32_CONST(a_GCPtrMem, a_u32ConstValue) \
8094 off = iemNativeEmitMemStoreConstDataCommon<sizeof(uint32_t), true>(\
8095 pReNative, off, a_u32ConstValue, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
8096
8097#define IEM_MC_STORE_MEM_FLAT_U64_CONST(a_GCPtrMem, a_u64ConstValue) \
8098 off = iemNativeEmitMemStoreConstDataCommon<sizeof(uint64_t), true>(\
8099 pReNative, off, a_u64ConstValue, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
8100
8101/** Emits code for IEM_MC_STORE_MEM_U8/16/32/64_CONST and
8102 * IEM_MC_STORE_MEM_FLAT_U8/16/32/64_CONST (with iSegReg = UINT8_MAX). */
8103template<uint8_t const a_cbMem, bool a_fFlat = false>
8104DECL_INLINE_THROW(uint32_t)
8105iemNativeEmitMemStoreConstDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uValueConst, uint8_t iSegReg,
8106 uint8_t idxVarGCPtrMem, uintptr_t pfnFunction, uint8_t idxInstr)
8107{
8108 /*
8109 * Create a temporary const variable and call iemNativeEmitMemFetchStoreDataCommon
8110 * to do the grunt work.
8111 */
8112 uint8_t const idxVarConstValue = iemNativeVarAllocConst(pReNative, a_cbMem, uValueConst);
8113 off = iemNativeEmitMemFetchStoreDataCommon<a_cbMem, a_cbMem - 1,
8114 kIemNativeEmitMemOp_Store,
8115 a_fFlat>(pReNative, off, idxVarConstValue, iSegReg,
8116 idxVarGCPtrMem, pfnFunction, idxInstr);
8117 iemNativeVarFreeLocal(pReNative, idxVarConstValue);
8118 return off;
8119}
8120
8121
8122#define IEM_MC_STORE_MEM_U128_ALIGN_SSE(a_iSeg, a_GCPtrMem, a_u128Value) \
8123 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT128U), \
8124 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
8125 kIemNativeEmitMemOp_Store>(\
8126 pReNative, off, a_u128Value, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse, pCallEntry->idxInstr)
8127
8128#define IEM_MC_STORE_MEM_U128_NO_AC(a_iSeg, a_GCPtrMem, a_u128Value) \
8129 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store>(\
8130 pReNative, off, a_u128Value, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU128NoAc, pCallEntry->idxInstr)
8131
8132#define IEM_MC_STORE_MEM_U256_NO_AC(a_iSeg, a_GCPtrMem, a_u256Value) \
8133 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store>(\
8134 pReNative, off, a_u256Value, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU256NoAc, pCallEntry->idxInstr)
8135
8136#define IEM_MC_STORE_MEM_U256_ALIGN_AVX(a_iSeg, a_GCPtrMem, a_u256Value) \
8137 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT256U), \
8138 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, \
8139 kIemNativeEmitMemOp_Store>(\
8140 pReNative, off, a_u256Value, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx, pCallEntry->idxInstr)
8141
8142
8143#define IEM_MC_STORE_MEM_FLAT_U128_ALIGN_SSE(a_GCPtrMem, a_u128Value) \
8144 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT128U), \
8145 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
8146 kIemNativeEmitMemOp_Store, true>(\
8147 pReNative, off, a_u128Value, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse, \
8148 pCallEntry->idxInstr)
8149
8150#define IEM_MC_STORE_MEM_FLAT_U128_NO_AC(a_GCPtrMem, a_u128Value) \
8151 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, true>(\
8152 pReNative, off, a_u128Value, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU128NoAc, pCallEntry->idxInstr)
8153
8154#define IEM_MC_STORE_MEM_FLAT_U256_NO_AC(a_GCPtrMem, a_u256Value) \
8155 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, true>(\
8156 pReNative, off, a_u256Value, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU256NoAc, pCallEntry->idxInstr)
8157
8158#define IEM_MC_STORE_MEM_FLAT_U256_ALIGN_AVX(a_GCPtrMem, a_u256Value) \
8159 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT256U), \
8160 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, kIemNativeEmitMemOp_Store, \
8161 true>(\
8162 pReNative, off, a_u256Value, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx, pCallEntry->idxInstr)
8163
8164
8165
8166/*********************************************************************************************************************************
8167* Stack Accesses. *
8168*********************************************************************************************************************************/
8169#define IEM_MC_PUSH_U16(a_u16Value) \
8170 off = iemNativeEmitStackPush<16, 0, 0>(pReNative, off, a_u16Value, (uintptr_t)iemNativeHlpStackStoreU16, pCallEntry->idxInstr)
8171#define IEM_MC_PUSH_U32(a_u32Value) \
8172 off = iemNativeEmitStackPush<32, 0, 0>(pReNative, off, a_u32Value, (uintptr_t)iemNativeHlpStackStoreU32, pCallEntry->idxInstr)
8173#define IEM_MC_PUSH_U32_SREG(a_uSegVal) \
8174 off = iemNativeEmitStackPush<32, 0, 1>(pReNative, off, a_uSegVal, (uintptr_t)iemNativeHlpStackStoreU32SReg, pCallEntry->idxInstr)
8175#define IEM_MC_PUSH_U64(a_u64Value) \
8176 off = iemNativeEmitStackPush<64, 0, 0>(pReNative, off, a_u64Value, (uintptr_t)iemNativeHlpStackStoreU64, pCallEntry->idxInstr)
8177
8178#define IEM_MC_FLAT32_PUSH_U16(a_u16Value) \
8179 off = iemNativeEmitStackPush<16, 32, 0>(pReNative, off, a_u16Value, (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
8180#define IEM_MC_FLAT32_PUSH_U32(a_u32Value) \
8181 off = iemNativeEmitStackPush<32, 32, 0>(pReNative, off, a_u32Value, (uintptr_t)iemNativeHlpStackFlatStoreU32, pCallEntry->idxInstr)
8182#define IEM_MC_FLAT32_PUSH_U32_SREG(a_u32Value) \
8183 off = iemNativeEmitStackPush<32, 32, 1>(pReNative, off, a_u32Value, (uintptr_t)iemNativeHlpStackFlatStoreU32SReg, pCallEntry->idxInstr)
8184
8185#define IEM_MC_FLAT64_PUSH_U16(a_u16Value) \
8186 off = iemNativeEmitStackPush<16, 64, 0>(pReNative, off, a_u16Value, (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
8187#define IEM_MC_FLAT64_PUSH_U64(a_u64Value) \
8188 off = iemNativeEmitStackPush<64, 64, 0>(pReNative, off, a_u64Value, (uintptr_t)iemNativeHlpStackFlatStoreU64, pCallEntry->idxInstr)
8189
8190
8191/** IEM_MC[|_FLAT32|_FLAT64]_PUSH_U16/32/32_SREG/64 */
8192template<uint8_t const a_cBitsVar, uint8_t const a_cBitsFlat, bool a_fIsSegReg = false>
8193DECL_INLINE_THROW(uint32_t)
8194iemNativeEmitStackPush(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue, uintptr_t pfnFunction, uint8_t idxInstr)
8195{
8196 /*
8197 * Assert sanity.
8198 */
8199 AssertCompile(a_cBitsVar == 16 || a_cBitsVar == 32 || a_cBitsVar == 64);
8200 AssertCompile(a_cBitsFlat == 0 || a_cBitsFlat == 32 || a_cBitsFlat == 64);
8201 AssertCompile(!a_fIsSegReg || a_cBitsVar < 64);
8202 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
8203 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
8204#ifdef VBOX_STRICT
8205 uint32_t const cTmplArgs = RT_MAKE_U32_FROM_U8(a_cBitsVar, a_cBitsFlat, a_fIsSegReg, 0);
8206 if (a_cBitsFlat != 0)
8207 {
8208 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
8209 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
8210 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
8211 Assert( pfnFunction
8212 == ( cTmplArgs == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
8213 : cTmplArgs == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
8214 : cTmplArgs == RT_MAKE_U32_FROM_U8(32, 32, 1, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32SReg
8215 : cTmplArgs == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
8216 : cTmplArgs == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
8217 : UINT64_C(0xc000b000a0009000) ));
8218 }
8219 else
8220 Assert( pfnFunction
8221 == ( cTmplArgs == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
8222 : cTmplArgs == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
8223 : cTmplArgs == RT_MAKE_U32_FROM_U8(32, 0, 1, 0) ? (uintptr_t)iemNativeHlpStackStoreU32SReg
8224 : cTmplArgs == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
8225 : UINT64_C(0xc000b000a0009000) ));
8226#endif
8227
8228#ifdef VBOX_STRICT
8229 /*
8230 * Check that the fExec flags we've got make sense.
8231 */
8232 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
8233#endif
8234
8235 /*
8236 * To keep things simple we have to commit any pending writes first as we
8237 * may end up making calls.
8238 */
8239 /** @todo we could postpone this till we make the call and reload the
8240 * registers after returning from the call. Not sure if that's sensible or
8241 * not, though. */
8242 off = iemNativeRegFlushPendingWrites(pReNative, off);
8243
8244 /*
8245 * First we calculate the new RSP and the effective stack pointer value.
8246 * For 64-bit mode and flat 32-bit these two are the same.
8247 * (Code structure is very similar to that of PUSH)
8248 */
8249 RT_CONSTEXPR
8250 uint8_t const cbMem = a_cBitsVar / 8;
8251 bool const fIsIntelSeg = a_fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
8252 uint8_t const cbMemAccess = !a_fIsSegReg || !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
8253 ? cbMem : sizeof(uint16_t);
8254 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
8255 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
8256 uint8_t const idxRegEffSp = a_cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
8257 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
8258 if RT_CONSTEXPR_IF(a_cBitsFlat != 0)
8259 {
8260 Assert(idxRegEffSp == idxRegRsp);
8261 Assert(a_cBitsFlat == 32 || a_cBitsFlat == 64);
8262 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
8263 if RT_CONSTEXPR_IF(a_cBitsFlat == 64)
8264 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
8265 else
8266 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
8267 }
8268 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
8269 {
8270 Assert(idxRegEffSp != idxRegRsp);
8271 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
8272 kIemNativeGstRegUse_ReadOnly);
8273#ifdef RT_ARCH_AMD64
8274 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8275#else
8276 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
8277#endif
8278 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
8279 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
8280 offFixupJumpToUseOtherBitSp = off;
8281 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
8282 {
8283 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
8284 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8285 }
8286 else
8287 {
8288 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
8289 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8290 }
8291 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8292 }
8293 /* SpUpdateEnd: */
8294 uint32_t const offLabelSpUpdateEnd = off;
8295
8296 /*
8297 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
8298 * we're skipping lookup).
8299 */
8300 uint8_t const iSegReg = a_cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
8301 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
8302 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
8303 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
8304 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
8305 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
8306 : UINT32_MAX;
8307 uint8_t const idxRegValue = !TlbState.fSkip
8308 && pVarValue->enmKind != kIemNativeVarKind_Immediate
8309 ? iemNativeVarRegisterAcquireInitedWithPref(pReNative, idxVarValue, &off,
8310 IEMNATIVE_CALL_ARG2_GREG)
8311 : UINT8_MAX;
8312 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
8313
8314
8315 if (!TlbState.fSkip)
8316 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
8317 else
8318 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
8319
8320 /*
8321 * Use16BitSp:
8322 */
8323 if RT_CONSTEXPR_IF(a_cBitsFlat == 0)
8324 {
8325#ifdef RT_ARCH_AMD64
8326 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8327#else
8328 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
8329#endif
8330 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
8331 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
8332 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8333 else
8334 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8335 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
8336 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8337 }
8338
8339 /*
8340 * TlbMiss:
8341 *
8342 * Call helper to do the pushing.
8343 */
8344 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
8345
8346#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8347 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8348#else
8349 RT_NOREF(idxInstr);
8350#endif
8351
8352 /* Save variables in volatile registers. */
8353 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
8354 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
8355 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
8356 | (idxRegValue < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegValue) : 0);
8357 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
8358
8359 if ( idxRegValue == IEMNATIVE_CALL_ARG1_GREG
8360 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
8361 {
8362 /* Swap them using ARG0 as temp register: */
8363 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
8364 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
8365 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
8366 }
8367 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
8368 {
8369 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue (first!) */
8370 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue,
8371 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
8372
8373 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
8374 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
8375 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
8376 }
8377 else
8378 {
8379 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
8380 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
8381
8382 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue */
8383 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue, 0 /*offAddend*/,
8384 IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG));
8385 }
8386
8387#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
8388 /* Do delayed EFLAGS calculations. */
8389 off = iemNativeDoPostponedEFlagsAtTlbMiss< RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG)
8390 | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)>(pReNative, off, &TlbState, fHstRegsNotToSave);
8391#endif
8392
8393 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
8394 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
8395
8396 /* Done setting up parameters, make the call. */
8397 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnFunction);
8398
8399 /* Restore variables and guest shadow registers to volatile registers. */
8400 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
8401 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
8402
8403#ifdef IEMNATIVE_WITH_TLB_LOOKUP
8404 if (!TlbState.fSkip)
8405 {
8406 /* end of TlbMiss - Jump to the done label. */
8407 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
8408 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
8409
8410 /*
8411 * TlbLookup:
8412 */
8413 if (!a_fIsSegReg || cbMemAccess == cbMem)
8414 {
8415 Assert(cbMemAccess == cbMem);
8416 off = iemNativeEmitTlbLookup<true, cbMem, cbMem - 1, IEM_ACCESS_TYPE_WRITE>(pReNative, off, &TlbState,
8417 iSegReg, idxLabelTlbLookup,
8418 idxLabelTlbMiss, idxRegMemResult);
8419 }
8420 else
8421 {
8422 Assert(cbMemAccess == sizeof(uint16_t));
8423 off = iemNativeEmitTlbLookup<true, sizeof(uint16_t), sizeof(uint16_t) - 1,
8424 IEM_ACCESS_TYPE_WRITE>(pReNative, off, &TlbState, iSegReg,
8425 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
8426 }
8427
8428 /*
8429 * Emit code to do the actual storing / fetching.
8430 */
8431 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
8432# ifdef IEM_WITH_TLB_STATISTICS
8433 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
8434 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
8435# endif
8436 if (idxRegValue != UINT8_MAX)
8437 {
8438 switch (cbMemAccess)
8439 {
8440 case 2:
8441 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
8442 break;
8443 case 4:
8444 if (!a_fIsSegReg || !fIsIntelSeg)
8445 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
8446 else
8447 {
8448 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
8449 PUSH FS in real mode, so we have to try emulate that here.
8450 We borrow the now unused idxReg1 from the TLB lookup code here. */
8451 uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
8452 kIemNativeGstReg_EFlags);
8453 if (idxRegEfl != UINT8_MAX)
8454 {
8455# ifdef ARCH_AMD64
8456 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
8457 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
8458 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
8459# else
8460 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
8461 off, TlbState.idxReg1, idxRegEfl,
8462 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
8463# endif
8464 iemNativeRegFreeTmp(pReNative, idxRegEfl);
8465 }
8466 else
8467 {
8468 off = iemNativeEmitLoadGprWithGstRegT<kIemNativeGstReg_EFlags>(pReNative, off, TlbState.idxReg1);
8469 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
8470 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
8471 }
8472 /* ASSUMES the upper half of idxRegValue is ZERO. */
8473 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegValue);
8474 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
8475 }
8476 break;
8477 case 8:
8478 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
8479 break;
8480 default:
8481 AssertFailed();
8482 }
8483 }
8484 else
8485 {
8486 switch (cbMemAccess)
8487 {
8488 case 2:
8489 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
8490 idxRegMemResult, TlbState.idxReg1);
8491 break;
8492 case 4:
8493 Assert(!a_fIsSegReg);
8494 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
8495 idxRegMemResult, TlbState.idxReg1);
8496 break;
8497 case 8:
8498 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue, idxRegMemResult, TlbState.idxReg1);
8499 break;
8500 default:
8501 AssertFailed();
8502 }
8503 }
8504
8505 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
8506 TlbState.freeRegsAndReleaseVars(pReNative);
8507
8508 /*
8509 * TlbDone:
8510 *
8511 * Commit the new RSP value.
8512 */
8513 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
8514 }
8515#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
8516
8517#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8518 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Rsp>(pReNative, off, idxRegRsp);
8519#endif
8520 iemNativeRegFreeTmp(pReNative, idxRegRsp);
8521 if (idxRegEffSp != idxRegRsp)
8522 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
8523
8524 /* The value variable is implictly flushed. */
8525 if (idxRegValue != UINT8_MAX)
8526 iemNativeVarRegisterRelease(pReNative, idxVarValue);
8527 iemNativeVarFreeLocal(pReNative, idxVarValue);
8528
8529 return off;
8530}
8531
8532
8533
8534#define IEM_MC_POP_GREG_U16(a_iGReg) \
8535 off = iemNativeEmitStackPopGReg<16, 0>(pReNative, off, a_iGReg, (uintptr_t)iemNativeHlpStackFetchU16, pCallEntry->idxInstr)
8536#define IEM_MC_POP_GREG_U32(a_iGReg) \
8537 off = iemNativeEmitStackPopGReg<32, 0>(pReNative, off, a_iGReg, (uintptr_t)iemNativeHlpStackFetchU32, pCallEntry->idxInstr)
8538#define IEM_MC_POP_GREG_U64(a_iGReg) \
8539 off = iemNativeEmitStackPopGReg<64, 0>(pReNative, off, a_iGReg, (uintptr_t)iemNativeHlpStackFetchU64, pCallEntry->idxInstr)
8540
8541#define IEM_MC_FLAT32_POP_GREG_U16(a_iGReg) \
8542 off = iemNativeEmitStackPopGReg<16, 32>(pReNative, off, a_iGReg, (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
8543#define IEM_MC_FLAT32_POP_GREG_U32(a_iGReg) \
8544 off = iemNativeEmitStackPopGReg<32, 32>(pReNative, off, a_iGReg, (uintptr_t)iemNativeHlpStackFlatFetchU32, pCallEntry->idxInstr)
8545
8546#define IEM_MC_FLAT64_POP_GREG_U16(a_iGReg) \
8547 off = iemNativeEmitStackPopGReg<16, 64>(pReNative, off, a_iGReg, (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
8548#define IEM_MC_FLAT64_POP_GREG_U64(a_iGReg) \
8549 off = iemNativeEmitStackPopGReg<64, 64>(pReNative, off, a_iGReg, (uintptr_t)iemNativeHlpStackFlatFetchU64, pCallEntry->idxInstr)
8550
8551
8552DECL_FORCE_INLINE_THROW(uint32_t)
8553iemNativeEmitStackPopUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
8554 uint8_t idxRegTmp)
8555{
8556 /* Use16BitSp: */
8557#ifdef RT_ARCH_AMD64
8558 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
8559 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
8560 RT_NOREF(idxRegTmp);
8561#else
8562 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
8563 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
8564 /* add tmp, regrsp, #cbMem */
8565 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbMem, false /*f64Bit*/);
8566 /* and tmp, tmp, #0xffff */
8567 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
8568 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
8569 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
8570 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
8571#endif
8572 return off;
8573}
8574
8575
8576DECL_FORCE_INLINE(uint32_t)
8577iemNativeEmitStackPopUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
8578{
8579 /* Use32BitSp: */
8580 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
8581 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
8582 return off;
8583}
8584
8585
8586/** IEM_MC[|_FLAT32|_FLAT64]_POP_GREG_U16/32/64 */
8587template<uint8_t const a_cBitsVar, uint8_t const a_cBitsFlat>
8588DECL_INLINE_THROW(uint32_t)
8589iemNativeEmitStackPopGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGReg, uintptr_t pfnFunction, uint8_t idxInstr)
8590{
8591 /*
8592 * Assert sanity.
8593 */
8594 Assert(idxGReg < 16);
8595#ifdef VBOX_STRICT
8596 if (a_cBitsFlat != 0)
8597 {
8598 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
8599 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
8600 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
8601 Assert( pfnFunction
8602 == ( RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(16, 32) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
8603 : RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(32, 32) ? (uintptr_t)iemNativeHlpStackFlatFetchU32
8604 : RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(16, 64) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
8605 : RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(64, 64) ? (uintptr_t)iemNativeHlpStackFlatFetchU64
8606 : UINT64_C(0xc000b000a0009000) ));
8607 }
8608 else
8609 Assert( pfnFunction
8610 == ( RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(16, 0) ? (uintptr_t)iemNativeHlpStackFetchU16
8611 : RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(32, 0) ? (uintptr_t)iemNativeHlpStackFetchU32
8612 : RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(64, 0) ? (uintptr_t)iemNativeHlpStackFetchU64
8613 : UINT64_C(0xc000b000a0009000) ));
8614#endif
8615
8616#ifdef VBOX_STRICT
8617 /*
8618 * Check that the fExec flags we've got make sense.
8619 */
8620 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
8621#endif
8622
8623 /*
8624 * To keep things simple we have to commit any pending writes first as we
8625 * may end up making calls.
8626 */
8627 off = iemNativeRegFlushPendingWrites(pReNative, off);
8628
8629 /*
8630 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
8631 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
8632 * directly as the effective stack pointer.
8633 * (Code structure is very similar to that of PUSH)
8634 */
8635 uint8_t const cbMem = a_cBitsVar / 8;
8636 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
8637 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
8638 uint8_t const idxRegEffSp = a_cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
8639 /** @todo can do a better job picking the register here. For cbMem >= 4 this
8640 * will be the resulting register value. */
8641 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
8642
8643 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
8644 if RT_CONSTEXPR_IF(a_cBitsFlat != 0)
8645 {
8646 Assert(idxRegEffSp == idxRegRsp);
8647 Assert(a_cBitsFlat == 32 || a_cBitsFlat == 64);
8648 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
8649 }
8650 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
8651 {
8652 Assert(idxRegEffSp != idxRegRsp);
8653 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
8654 kIemNativeGstRegUse_ReadOnly);
8655#ifdef RT_ARCH_AMD64
8656 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8657#else
8658 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
8659#endif
8660 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
8661 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
8662 offFixupJumpToUseOtherBitSp = off;
8663 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
8664 {
8665/** @todo can skip idxRegRsp updating when popping ESP. */
8666 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
8667 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8668 }
8669 else
8670 {
8671 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
8672 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
8673 }
8674 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8675 }
8676 /* SpUpdateEnd: */
8677 uint32_t const offLabelSpUpdateEnd = off;
8678
8679 /*
8680 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
8681 * we're skipping lookup).
8682 */
8683 uint8_t const iSegReg = a_cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
8684 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
8685 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
8686 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
8687 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
8688 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
8689 : UINT32_MAX;
8690
8691 if (!TlbState.fSkip)
8692 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
8693 else
8694 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
8695
8696 /*
8697 * Use16BitSp:
8698 */
8699 if RT_CONSTEXPR_IF(a_cBitsFlat == 0)
8700 {
8701#ifdef RT_ARCH_AMD64
8702 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8703#else
8704 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
8705#endif
8706 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
8707 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
8708 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
8709 else
8710 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8711 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
8712 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8713 }
8714
8715 /*
8716 * TlbMiss:
8717 *
8718 * Call helper to do the pushing.
8719 */
8720 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
8721
8722#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8723 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8724#else
8725 RT_NOREF(idxInstr);
8726#endif
8727
8728 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
8729 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
8730 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
8731 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
8732
8733
8734 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
8735 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
8736 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
8737
8738#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
8739 /* Do delayed EFLAGS calculations. */
8740 off = iemNativeDoPostponedEFlagsAtTlbMiss<RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG)>(pReNative, off, &TlbState, fHstRegsNotToSave);
8741#endif
8742
8743 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
8744 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
8745
8746 /* Done setting up parameters, make the call. */
8747 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnFunction);
8748
8749 /* Move the return register content to idxRegMemResult. */
8750 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
8751 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
8752
8753 /* Restore variables and guest shadow registers to volatile registers. */
8754 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
8755 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
8756
8757#ifdef IEMNATIVE_WITH_TLB_LOOKUP
8758 if (!TlbState.fSkip)
8759 {
8760 /* end of TlbMiss - Jump to the done label. */
8761 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
8762 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
8763
8764 /*
8765 * TlbLookup:
8766 */
8767 off = iemNativeEmitTlbLookup<true, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ>(pReNative, off, &TlbState, iSegReg,
8768 idxLabelTlbLookup, idxLabelTlbMiss,
8769 idxRegMemResult);
8770
8771 /*
8772 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
8773 */
8774 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8775# ifdef IEM_WITH_TLB_STATISTICS
8776 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
8777 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
8778# endif
8779 switch (cbMem)
8780 {
8781 case 2:
8782 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
8783 break;
8784 case 4:
8785 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
8786 break;
8787 case 8:
8788 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
8789 break;
8790 default:
8791 AssertFailed();
8792 }
8793
8794 TlbState.freeRegsAndReleaseVars(pReNative);
8795
8796 /*
8797 * TlbDone:
8798 *
8799 * Set the new RSP value (FLAT accesses needs to calculate it first) and
8800 * commit the popped register value.
8801 */
8802 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
8803 }
8804#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
8805
8806 if (idxGReg != X86_GREG_xSP)
8807 {
8808 /* Set the register. */
8809 if (cbMem >= sizeof(uint32_t))
8810 {
8811#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
8812 AssertMsg( pReNative->idxCurCall == 0
8813 || IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))),
8814 ("%s - %u\n", g_aGstShadowInfo[idxGReg].pszName,
8815 iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))));
8816#endif
8817 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, IEMNATIVEGSTREG_GPR(idxGReg), off);
8818#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8819 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(idxGReg);
8820#endif
8821#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8822 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult,
8823 RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
8824#endif
8825 }
8826 else
8827 {
8828 Assert(cbMem == sizeof(uint16_t));
8829 uint8_t const idxRegDst = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGReg),
8830 kIemNativeGstRegUse_ForUpdate);
8831 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegDst, idxRegMemResult);
8832#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8833 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegDst, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
8834#endif
8835 iemNativeRegFreeTmp(pReNative, idxRegDst);
8836 }
8837
8838 /* Complete RSP calculation for FLAT mode. */
8839 if (idxRegEffSp == idxRegRsp)
8840 {
8841 if RT_CONSTEXPR_IF(a_cBitsFlat == 64)
8842 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, cbMem);
8843 else
8844 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, cbMem);
8845 }
8846 }
8847 else
8848 {
8849 /* We're popping RSP, ESP or SP. Only the is a bit extra work, of course. */
8850 if (cbMem == sizeof(uint64_t))
8851 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRsp, idxRegMemResult);
8852 else if (cbMem == sizeof(uint32_t))
8853 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRsp, idxRegMemResult);
8854 else
8855 {
8856 if (idxRegEffSp == idxRegRsp)
8857 {
8858 if RT_CONSTEXPR_IF(a_cBitsFlat == 64)
8859 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, cbMem);
8860 else
8861 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, cbMem);
8862 }
8863 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegRsp, idxRegMemResult);
8864 }
8865 }
8866
8867#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8868 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Rsp>(pReNative, off, idxRegRsp);
8869#endif
8870
8871 iemNativeRegFreeTmp(pReNative, idxRegRsp);
8872 if (idxRegEffSp != idxRegRsp)
8873 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
8874 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
8875
8876 return off;
8877}
8878
8879
8880
8881/*********************************************************************************************************************************
8882* Memory mapping (IEM_MEM_MAP_XXX, IEM_MEM_FLAT_MAP_XXX). *
8883*********************************************************************************************************************************/
8884
8885#define IEM_MC_MEM_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8886 off = iemNativeEmitMemMapCommon<sizeof(uint8_t), IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMaskAndCtl*/>(\
8887 pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU8Atomic, pCallEntry->idxInstr)
8888
8889#define IEM_MC_MEM_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8890 off = iemNativeEmitMemMapCommon<sizeof(uint8_t), IEM_ACCESS_DATA_RW, 0 /*fAlignMaskAndCtl*/>(\
8891 pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU8Rw, pCallEntry->idxInstr)
8892
8893#define IEM_MC_MEM_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8894 off = iemNativeEmitMemMapCommon<sizeof(uint8_t), IEM_ACCESS_DATA_W, 0 /*fAlignMaskAndCtl*/>(\
8895 pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU8Wo, pCallEntry->idxInstr) \
8896
8897#define IEM_MC_MEM_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8898 off = iemNativeEmitMemMapCommon<sizeof(uint8_t), IEM_ACCESS_DATA_R, 0 /*fAlignMaskAndCtl*/>(\
8899 pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU8Ro, pCallEntry->idxInstr)
8900
8901
8902#define IEM_MC_MEM_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8903 off = iemNativeEmitMemMapCommon<sizeof(uint16_t), IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/>(\
8904 pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU16Atomic, pCallEntry->idxInstr)
8905
8906#define IEM_MC_MEM_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8907 off = iemNativeEmitMemMapCommon<sizeof(uint16_t), IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/>(\
8908 pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU16Rw, pCallEntry->idxInstr)
8909
8910#define IEM_MC_MEM_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8911 off = iemNativeEmitMemMapCommon<sizeof(uint16_t), IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/>(\
8912 pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
8913
8914#define IEM_MC_MEM_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8915 off = iemNativeEmitMemMapCommon<sizeof(uint16_t), IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/>(\
8916 pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU16Ro, pCallEntry->idxInstr)
8917
8918#define IEM_MC_MEM_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8919 off = iemNativeEmitMemMapCommon<sizeof(int16_t), IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/>(\
8920 pReNative, off, a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
8921
8922
8923#define IEM_MC_MEM_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8924 off = iemNativeEmitMemMapCommon<sizeof(uint32_t), IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/>(\
8925 pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU32Atomic, pCallEntry->idxInstr)
8926
8927#define IEM_MC_MEM_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8928 off = iemNativeEmitMemMapCommon<sizeof(uint32_t), IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/>(\
8929 pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU32Rw, pCallEntry->idxInstr)
8930
8931#define IEM_MC_MEM_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8932 off = iemNativeEmitMemMapCommon<sizeof(uint32_t), IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/>(\
8933 pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
8934
8935#define IEM_MC_MEM_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8936 off = iemNativeEmitMemMapCommon<sizeof(uint32_t), IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/>(\
8937 pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU32Ro, pCallEntry->idxInstr)
8938
8939#define IEM_MC_MEM_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8940 off = iemNativeEmitMemMapCommon<sizeof(int32_t), IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/>(\
8941 pReNative, off, a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
8942
8943
8944#define IEM_MC_MEM_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8945 off = iemNativeEmitMemMapCommon<sizeof(uint64_t), IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/>(\
8946 pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU64Atomic, pCallEntry->idxInstr)
8947
8948#define IEM_MC_MEM_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8949 off = iemNativeEmitMemMapCommon<sizeof(uint64_t), IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/>(\
8950 pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU64Rw, pCallEntry->idxInstr)
8951#define IEM_MC_MEM_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8952 off = iemNativeEmitMemMapCommon<sizeof(uint64_t), IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/>(\
8953 pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
8954
8955#define IEM_MC_MEM_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8956 off = iemNativeEmitMemMapCommon<sizeof(uint64_t), IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/>(\
8957 pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU64Ro, pCallEntry->idxInstr)
8958
8959#define IEM_MC_MEM_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8960 off = iemNativeEmitMemMapCommon<sizeof(int64_t), IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/>(\
8961 pReNative, off, a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
8962
8963
8964#define IEM_MC_MEM_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8965 off = iemNativeEmitMemMapCommon<sizeof(RTFLOAT80U), IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/>(\
8966 pReNative, off, a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataR80Wo, pCallEntry->idxInstr) \
8967
8968#define IEM_MC_MEM_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8969 off = iemNativeEmitMemMapCommon<sizeof(RTFLOAT80U), IEM_ACCESS_DATA_W, \
8970 sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/ /** @todo check BCD align */>(\
8971 pReNative, off, a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataD80Wo, pCallEntry->idxInstr) \
8972
8973
8974#define IEM_MC_MEM_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8975 off = iemNativeEmitMemMapCommon<sizeof(RTUINT128U), IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/>(\
8976 pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU128Atomic, pCallEntry->idxInstr)
8977
8978#define IEM_MC_MEM_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8979 off = iemNativeEmitMemMapCommon<sizeof(RTUINT128U), IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/>(\
8980 pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU128Rw, pCallEntry->idxInstr)
8981
8982#define IEM_MC_MEM_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8983 off = iemNativeEmitMemMapCommon<sizeof(RTUINT128U), IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/>(\
8984 pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU128Wo, pCallEntry->idxInstr) \
8985
8986#define IEM_MC_MEM_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8987 off = iemNativeEmitMemMapCommon<sizeof(RTUINT128U), IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/>(\
8988 pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU128Ro, pCallEntry->idxInstr)
8989
8990
8991
8992#define IEM_MC_MEM_FLAT_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
8993 off = iemNativeEmitMemMapCommon<sizeof(uint8_t), IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMaskAndCtl*/, true>(\
8994 pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU8Atomic, pCallEntry->idxInstr)
8995
8996#define IEM_MC_MEM_FLAT_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
8997 off = iemNativeEmitMemMapCommon<sizeof(uint8_t), IEM_ACCESS_DATA_RW, 0 /*fAlignMaskAndCtl*/, true>(\
8998 pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU8Rw, pCallEntry->idxInstr)
8999
9000#define IEM_MC_MEM_FLAT_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
9001 off = iemNativeEmitMemMapCommon<sizeof(uint8_t), IEM_ACCESS_DATA_W, 0 /*fAlignMaskAndCtl*/, true>(\
9002 pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU8Wo, pCallEntry->idxInstr) \
9003
9004#define IEM_MC_MEM_FLAT_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
9005 off = iemNativeEmitMemMapCommon<sizeof(uint8_t), IEM_ACCESS_DATA_R, 0 /*fAlignMaskAndCtl*/, true>(\
9006 pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU8Ro, pCallEntry->idxInstr)
9007
9008
9009#define IEM_MC_MEM_FLAT_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
9010 off = iemNativeEmitMemMapCommon<sizeof(uint16_t), IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9011 pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU16Atomic, pCallEntry->idxInstr)
9012
9013#define IEM_MC_MEM_FLAT_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
9014 off = iemNativeEmitMemMapCommon<sizeof(uint16_t), IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9015 pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU16Rw, pCallEntry->idxInstr)
9016
9017#define IEM_MC_MEM_FLAT_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
9018 off = iemNativeEmitMemMapCommon<sizeof(uint16_t), IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9019 pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
9020
9021#define IEM_MC_MEM_FLAT_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
9022 off = iemNativeEmitMemMapCommon<sizeof(uint16_t), IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9023 pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU16Ro, pCallEntry->idxInstr)
9024
9025#define IEM_MC_MEM_FLAT_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_GCPtrMem) \
9026 off = iemNativeEmitMemMapCommon<sizeof(int16_t), IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9027 pReNative, off, a_pi16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
9028
9029
9030#define IEM_MC_MEM_FLAT_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
9031 off = iemNativeEmitMemMapCommon<sizeof(uint32_t), IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9032 pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU32Atomic, pCallEntry->idxInstr)
9033
9034#define IEM_MC_MEM_FLAT_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
9035 off = iemNativeEmitMemMapCommon<sizeof(uint32_t), IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9036 pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU32Rw, pCallEntry->idxInstr)
9037
9038#define IEM_MC_MEM_FLAT_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
9039 off = iemNativeEmitMemMapCommon<sizeof(uint32_t), IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9040 pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
9041
9042#define IEM_MC_MEM_FLAT_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
9043 off = iemNativeEmitMemMapCommon<sizeof(uint32_t), IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9044 pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU32Ro, pCallEntry->idxInstr)
9045
9046#define IEM_MC_MEM_FLAT_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_GCPtrMem) \
9047 off = iemNativeEmitMemMapCommon<sizeof(int32_t), IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9048 pReNative, off, a_pi32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
9049
9050
9051#define IEM_MC_MEM_FLAT_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
9052 off = iemNativeEmitMemMapCommon<sizeof(uint64_t), IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9053 pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU64Atomic, pCallEntry->idxInstr)
9054
9055#define IEM_MC_MEM_FLAT_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
9056 off = iemNativeEmitMemMapCommon<sizeof(uint64_t), IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9057 pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU64Rw, pCallEntry->idxInstr)
9058
9059#define IEM_MC_MEM_FLAT_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
9060 off = iemNativeEmitMemMapCommon<sizeof(uint64_t), IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9061 pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
9062
9063#define IEM_MC_MEM_FLAT_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
9064 off = iemNativeEmitMemMapCommon<sizeof(uint64_t), IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9065 pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU64Ro, pCallEntry->idxInstr)
9066
9067#define IEM_MC_MEM_FLAT_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_GCPtrMem) \
9068 off = iemNativeEmitMemMapCommon<sizeof(int64_t), IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9069 pReNative, off, a_pi64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
9070
9071
9072#define IEM_MC_MEM_FLAT_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_GCPtrMem) \
9073 off = iemNativeEmitMemMapCommon<sizeof(RTFLOAT80U), IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9074 pReNative, off, a_pr80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo, pCallEntry->idxInstr) \
9075
9076#define IEM_MC_MEM_FLAT_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_GCPtrMem) \
9077 off = iemNativeEmitMemMapCommon<sizeof(RTFLOAT80U), IEM_ACCESS_DATA_W, \
9078 sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/ /** @todo check BCD align */, true>(\
9079 pReNative, off, a_pd80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo, pCallEntry->idxInstr) \
9080
9081
9082#define IEM_MC_MEM_FLAT_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
9083 off = iemNativeEmitMemMapCommon<sizeof(RTUINT128U), IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, true>(\
9084 pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU128Atomic, pCallEntry->idxInstr)
9085
9086#define IEM_MC_MEM_FLAT_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
9087 off = iemNativeEmitMemMapCommon<sizeof(RTUINT128U), IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, true>(\
9088 pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU128Rw, pCallEntry->idxInstr)
9089
9090#define IEM_MC_MEM_FLAT_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
9091 off = iemNativeEmitMemMapCommon<sizeof(RTUINT128U), IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, true>(\
9092 pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU128Wo, pCallEntry->idxInstr) \
9093
9094#define IEM_MC_MEM_FLAT_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
9095 off = iemNativeEmitMemMapCommon<sizeof(RTUINT128U), IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, true>(\
9096 pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU128Ro, pCallEntry->idxInstr)
9097
9098
9099template<uint8_t const a_cbMem, uint32_t const a_fAccess, uint32_t const a_fAlignMaskAndCtl, bool a_fFlat = false>
9100DECL_INLINE_THROW(uint32_t)
9101iemNativeEmitMemMapCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarMem, uint8_t idxVarUnmapInfo,
9102 uint8_t iSegReg, uint8_t idxVarGCPtrMem, uintptr_t pfnFunction, uint8_t idxInstr)
9103{
9104 /*
9105 * Assert sanity.
9106 */
9107 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarMem);
9108 PIEMNATIVEVAR const pVarMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarMem)];
9109 AssertStmt( pVarMem->enmKind == kIemNativeVarKind_Invalid
9110 && pVarMem->cbVar == sizeof(void *),
9111 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9112
9113 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
9114 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
9115 AssertStmt( pVarUnmapInfo->enmKind == kIemNativeVarKind_Invalid
9116 && pVarUnmapInfo->cbVar == sizeof(uint8_t),
9117 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9118
9119 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
9120 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
9121 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
9122 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
9123 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9124
9125 Assert(!a_fFlat ? iSegReg < 6 : iSegReg == UINT8_MAX);
9126
9127 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
9128
9129#ifdef VBOX_STRICT
9130# define IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) \
9131 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ) \
9132 ? (uintptr_t)RT_CONCAT(a_fnBase,Rw) \
9133 : ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == IEM_ACCESS_TYPE_READ \
9134 ? (uintptr_t)RT_CONCAT(a_fnBase,Ro) : (uintptr_t)RT_CONCAT(a_fnBase,Wo) )
9135# define IEM_MAP_HLP_FN(a_fAccess, a_fnBase) \
9136 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ | IEM_ACCESS_ATOMIC) \
9137 ? (uintptr_t)RT_CONCAT(a_fnBase,Atomic) \
9138 : IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) )
9139
9140 if RT_CONSTEXPR_IF(a_fFlat)
9141 {
9142 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
9143 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
9144 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
9145 switch (a_cbMem)
9146 {
9147 case 1:
9148 Assert(pfnFunction == IEM_MAP_HLP_FN(a_fAccess, iemNativeHlpMemFlatMapDataU8));
9149 Assert(!a_fAlignMaskAndCtl);
9150 break;
9151 case 2:
9152 Assert(pfnFunction == IEM_MAP_HLP_FN(a_fAccess, iemNativeHlpMemFlatMapDataU16));
9153 Assert(a_fAlignMaskAndCtl < 2);
9154 break;
9155 case 4:
9156 Assert(pfnFunction == IEM_MAP_HLP_FN(a_fAccess, iemNativeHlpMemFlatMapDataU32));
9157 Assert(a_fAlignMaskAndCtl < 4);
9158 break;
9159 case 8:
9160 Assert(pfnFunction == IEM_MAP_HLP_FN(a_fAccess, iemNativeHlpMemFlatMapDataU64));
9161 Assert(a_fAlignMaskAndCtl < 8);
9162 break;
9163 case 10:
9164 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo
9165 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo);
9166 Assert((a_fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
9167 Assert(a_fAlignMaskAndCtl < 8);
9168 break;
9169 case 16:
9170 Assert(pfnFunction == IEM_MAP_HLP_FN(a_fAccess, iemNativeHlpMemFlatMapDataU128));
9171 Assert(a_fAlignMaskAndCtl < 16);
9172 break;
9173# if 0
9174 case 32:
9175 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(a_fAccess, iemNativeHlpMemFlatMapDataU256));
9176 Assert(a_fAlignMaskAndCtl < 32);
9177 break;
9178 case 64:
9179 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(a_fAccess, iemNativeHlpMemFlatMapDataU512));
9180 Assert(a_fAlignMaskAndCtl < 64);
9181 break;
9182# endif
9183 default: AssertFailed(); break;
9184 }
9185 }
9186 else
9187 {
9188 Assert(iSegReg < 6);
9189 switch (a_cbMem)
9190 {
9191 case 1:
9192 Assert(pfnFunction == IEM_MAP_HLP_FN(a_fAccess, iemNativeHlpMemMapDataU8));
9193 Assert(!a_fAlignMaskAndCtl);
9194 break;
9195 case 2:
9196 Assert(pfnFunction == IEM_MAP_HLP_FN(a_fAccess, iemNativeHlpMemMapDataU16));
9197 Assert(a_fAlignMaskAndCtl < 2);
9198 break;
9199 case 4:
9200 Assert(pfnFunction == IEM_MAP_HLP_FN(a_fAccess, iemNativeHlpMemMapDataU32));
9201 Assert(a_fAlignMaskAndCtl < 4);
9202 break;
9203 case 8:
9204 Assert(pfnFunction == IEM_MAP_HLP_FN(a_fAccess, iemNativeHlpMemMapDataU64));
9205 Assert(a_fAlignMaskAndCtl < 8);
9206 break;
9207 case 10:
9208 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemMapDataR80Wo
9209 || pfnFunction == (uintptr_t)iemNativeHlpMemMapDataD80Wo);
9210 Assert((a_fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
9211 Assert(a_fAlignMaskAndCtl < 8);
9212 break;
9213 case 16:
9214 Assert(pfnFunction == IEM_MAP_HLP_FN(a_fAccess, iemNativeHlpMemMapDataU128));
9215 Assert(a_fAlignMaskAndCtl < 16);
9216 break;
9217# if 0
9218 case 32:
9219 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(a_fAccess, iemNativeHlpMemMapDataU256));
9220 Assert(a_fAlignMaskAndCtl < 32);
9221 break;
9222 case 64:
9223 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(a_fAccess, iemNativeHlpMemMapDataU512));
9224 Assert(a_fAlignMaskAndCtl < 64);
9225 break;
9226# endif
9227 default: AssertFailed(); break;
9228 }
9229 }
9230# undef IEM_MAP_HLP_FN
9231# undef IEM_MAP_HLP_FN_NO_AT
9232#endif
9233
9234#ifdef VBOX_STRICT
9235 /*
9236 * Check that the fExec flags we've got make sense.
9237 */
9238 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
9239#endif
9240
9241 /*
9242 * To keep things simple we have to commit any pending writes first as we
9243 * may end up making calls.
9244 */
9245 off = iemNativeRegFlushPendingWrites(pReNative, off);
9246
9247#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
9248 /*
9249 * Move/spill/flush stuff out of call-volatile registers.
9250 * This is the easy way out. We could contain this to the tlb-miss branch
9251 * by saving and restoring active stuff here.
9252 */
9253 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
9254 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
9255#endif
9256
9257 /* The bUnmapInfo variable will get a register in the tlb-hit code path,
9258 while the tlb-miss codepath will temporarily put it on the stack.
9259 Set the the type to stack here so we don't need to do it twice below. */
9260 iemNativeVarSetKindToStack(pReNative, idxVarUnmapInfo);
9261 uint8_t const idxRegUnmapInfo = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off);
9262 /** @todo use a tmp register from TlbState, since they'll be free after tlb
9263 * lookup is done. */
9264
9265 /*
9266 * Define labels and allocate the result register (trying for the return
9267 * register if we can).
9268 */
9269 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
9270 uint8_t const idxRegMemResult = !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
9271 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarMem, IEMNATIVE_CALL_RET_GREG, &off)
9272 : iemNativeVarRegisterAcquire(pReNative, idxVarMem, &off);
9273 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, a_fFlat, a_cbMem);
9274 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
9275 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
9276 : UINT32_MAX;
9277
9278 /*
9279 * Jump to the TLB lookup code.
9280 */
9281 if (!TlbState.fSkip)
9282 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
9283
9284 /*
9285 * TlbMiss:
9286 *
9287 * Call helper to do the fetching.
9288 * We flush all guest register shadow copies here.
9289 */
9290 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
9291
9292#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
9293 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
9294#else
9295 RT_NOREF(idxInstr);
9296#endif
9297
9298#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
9299 /* Save variables in volatile registers. */
9300 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave() | RT_BIT_32(idxRegMemResult) | RT_BIT_32(idxRegUnmapInfo);
9301 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
9302#endif
9303
9304 /* IEMNATIVE_CALL_ARG2_GREG = GCPtrMem - load first as it is from a variable. */
9305 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarGCPtrMem, 0 /*cbAppend*/,
9306#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
9307 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
9308#else
9309 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
9310#endif
9311
9312 /* IEMNATIVE_CALL_ARG3_GREG = iSegReg */
9313 if RT_CONSTEXPR_IF(!a_fFlat)
9314 {
9315 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
9316 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, iSegReg);
9317 }
9318
9319#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
9320 /* Do delayed EFLAGS calculations. */
9321 if RT_CONSTEXPR_IF(a_fFlat)
9322 off = iemNativeDoPostponedEFlagsAtTlbMiss<RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)>(pReNative, off, &TlbState,
9323 fHstRegsNotToSave);
9324 else
9325 off = iemNativeDoPostponedEFlagsAtTlbMiss< RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
9326 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG)>(pReNative, off, &TlbState,
9327 fHstRegsNotToSave);
9328#endif
9329
9330 /* IEMNATIVE_CALL_ARG1_GREG = &idxVarUnmapInfo; stackslot address, load any register with result after the call. */
9331 int32_t const offBpDispVarUnmapInfo = iemNativeStackCalcBpDisp(iemNativeVarGetStackSlot(pReNative, idxVarUnmapInfo));
9332 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, offBpDispVarUnmapInfo);
9333
9334 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
9335 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9336
9337 /* Done setting up parameters, make the call. */
9338 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnFunction);
9339
9340 /*
9341 * Put the output in the right registers.
9342 */
9343 Assert(idxRegMemResult == pVarMem->idxReg);
9344 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
9345 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
9346
9347#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
9348 /* Restore variables and guest shadow registers to volatile registers. */
9349 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
9350 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
9351#endif
9352
9353 Assert(pVarUnmapInfo->idxReg == idxRegUnmapInfo);
9354 off = iemNativeEmitLoadGprByBpU8(pReNative, off, idxRegUnmapInfo, offBpDispVarUnmapInfo);
9355
9356#ifdef IEMNATIVE_WITH_TLB_LOOKUP
9357 if (!TlbState.fSkip)
9358 {
9359 /* end of tlbsmiss - Jump to the done label. */
9360 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
9361 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
9362
9363 /*
9364 * TlbLookup:
9365 */
9366 off = iemNativeEmitTlbLookup<true, a_cbMem, a_fAlignMaskAndCtl, a_fAccess>(pReNative, off, &TlbState, iSegReg,
9367 idxLabelTlbLookup, idxLabelTlbMiss,
9368 idxRegMemResult);
9369# ifdef IEM_WITH_TLB_STATISTICS
9370 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, TlbState.idxReg1, TlbState.idxReg2,
9371 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForMapped));
9372# endif
9373
9374 /* [idxVarUnmapInfo] = 0; */
9375 off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegUnmapInfo, 0);
9376
9377 /*
9378 * TlbDone:
9379 */
9380 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
9381
9382 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
9383
9384# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
9385 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
9386 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
9387# endif
9388 }
9389#else
9390 RT_NOREF(idxLabelTlbMiss);
9391#endif
9392
9393 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
9394 iemNativeVarRegisterRelease(pReNative, idxVarMem);
9395
9396 return off;
9397}
9398
9399
9400#define IEM_MC_MEM_COMMIT_AND_UNMAP_ATOMIC(a_bMapInfo) \
9401 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic, \
9402 pCallEntry->idxInstr, IEM_ACCESS_DATA_ATOMIC)
9403
9404#define IEM_MC_MEM_COMMIT_AND_UNMAP_RW(a_bMapInfo) \
9405 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), (uintptr_t)iemNativeHlpMemCommitAndUnmapRw, \
9406 pCallEntry->idxInstr, IEM_ACCESS_DATA_RW)
9407
9408#define IEM_MC_MEM_COMMIT_AND_UNMAP_WO(a_bMapInfo) \
9409 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), (uintptr_t)iemNativeHlpMemCommitAndUnmapWo, \
9410 pCallEntry->idxInstr, IEM_ACCESS_DATA_W)
9411
9412#define IEM_MC_MEM_COMMIT_AND_UNMAP_RO(a_bMapInfo) \
9413 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), (uintptr_t)iemNativeHlpMemCommitAndUnmapRo, \
9414 pCallEntry->idxInstr, IEM_ACCESS_DATA_R)
9415
9416DECL_INLINE_THROW(uint32_t)
9417iemNativeEmitMemCommitAndUnmap(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarUnmapInfo,
9418 uintptr_t pfnFunction, uint8_t idxInstr, uint32_t fAccess)
9419{
9420 /*
9421 * Assert sanity.
9422 */
9423 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
9424#if defined(VBOX_STRICT) || defined(RT_ARCH_AMD64)
9425 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
9426#endif
9427 Assert(pVarUnmapInfo->enmKind == kIemNativeVarKind_Stack);
9428 Assert( pVarUnmapInfo->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
9429 || pVarUnmapInfo->idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS); /* must be initialized */
9430#ifdef VBOX_STRICT
9431 switch (fAccess & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC))
9432 {
9433 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_ATOMIC:
9434 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic); break;
9435 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE:
9436 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRw); break;
9437 case IEM_ACCESS_TYPE_WRITE:
9438 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapWo); break;
9439 case IEM_ACCESS_TYPE_READ:
9440 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRo); break;
9441 default: AssertFailed();
9442 }
9443#else
9444 RT_NOREF(fAccess);
9445#endif
9446
9447 /*
9448 * To keep things simple we have to commit any pending writes first as we
9449 * may end up making calls (there shouldn't be any at this point, so this
9450 * is just for consistency).
9451 */
9452 /** @todo we could postpone this till we make the call and reload the
9453 * registers after returning from the call. Not sure if that's sensible or
9454 * not, though. */
9455 off = iemNativeRegFlushPendingWrites(pReNative, off);
9456
9457 /*
9458 * Move/spill/flush stuff out of call-volatile registers.
9459 *
9460 * We exclude any register holding the bUnmapInfo variable, as we'll be
9461 * checking it after returning from the call and will free it afterwards.
9462 */
9463 /** @todo save+restore active registers and maybe guest shadows in miss
9464 * scenario. */
9465 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */,
9466 RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)));
9467
9468 /*
9469 * If idxVarUnmapInfo is zero, we can skip all this. Otherwise we'll have
9470 * to call the unmap helper function.
9471 *
9472 * The likelyhood of it being zero is higher than for the TLB hit when doing
9473 * the mapping, as a TLB miss for an well aligned and unproblematic memory
9474 * access should also end up with a mapping that won't need special unmapping.
9475 */
9476 /** @todo Go over iemMemMapJmp and implement the no-unmap-needed case! That
9477 * should speed up things for the pure interpreter as well when TLBs
9478 * are enabled. */
9479#ifdef RT_ARCH_AMD64
9480 if (pVarUnmapInfo->idxReg == UINT8_MAX)
9481 {
9482 /* test byte [rbp - xxx], 0ffh */
9483 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
9484 pbCodeBuf[off++] = 0xf6;
9485 uint8_t const idxStackSlot = pVarUnmapInfo->idxStackSlot;
9486 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, 0, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
9487 pbCodeBuf[off++] = 0xff;
9488 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9489 }
9490 else
9491#endif
9492 {
9493 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInitedWithPref(pReNative, idxVarUnmapInfo, &off,
9494 IEMNATIVE_CALL_ARG1_GREG);
9495 off = iemNativeEmitTestAnyBitsInGpr8(pReNative, off, idxVarReg, 0xff);
9496 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
9497 }
9498 uint32_t const offJmpFixup = off;
9499 off = iemNativeEmitJzToFixed(pReNative, off, off /* ASSUME jz rel8 suffices*/);
9500
9501 /*
9502 * Call the unmap helper function.
9503 */
9504#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING /** @todo This should be unnecessary, the mapping call will already have set it! */
9505 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
9506#else
9507 RT_NOREF(idxInstr);
9508#endif
9509
9510 /* IEMNATIVE_CALL_ARG1_GREG = idxVarUnmapInfo (first!) */
9511 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo,
9512 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
9513
9514 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
9515 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9516
9517 /* Done setting up parameters, make the call.
9518 Note! Since we can only end up here if we took a TLB miss, any postponed EFLAGS
9519 calculations has been done there already. Thus, a_fSkipEflChecks = true. */
9520 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnFunction);
9521
9522 /* The bUnmapInfo variable is implictly free by these MCs. */
9523 iemNativeVarFreeLocal(pReNative, idxVarUnmapInfo);
9524
9525 /*
9526 * Done, just fixup the jump for the non-call case.
9527 */
9528 iemNativeFixupFixedJump(pReNative, offJmpFixup, off);
9529
9530 return off;
9531}
9532
9533
9534
9535/*********************************************************************************************************************************
9536* State and Exceptions *
9537*********************************************************************************************************************************/
9538
9539#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9540#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
9541
9542#define IEM_MC_PREPARE_SSE_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9543#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9544#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
9545
9546#define IEM_MC_PREPARE_AVX_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9547#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9548#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
9549
9550
9551DECL_INLINE_THROW(uint32_t) iemNativeEmitPrepareFpuForUse(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fForChange)
9552{
9553#ifndef IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS
9554 RT_NOREF(pReNative, fForChange);
9555#else
9556 if ( !(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SYNCED)
9557 && fForChange)
9558 {
9559# ifdef RT_ARCH_AMD64
9560
9561 /* Need to save the host MXCSR the first time, and clear the exception flags. */
9562 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED))
9563 {
9564 PIEMNATIVEINSTR pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
9565
9566 /* stmxcsr */
9567 if (IEMNATIVE_REG_FIXED_PVMCPU >= 8)
9568 pbCodeBuf[off++] = X86_OP_REX_B;
9569 pbCodeBuf[off++] = 0x0f;
9570 pbCodeBuf[off++] = 0xae;
9571 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, 3, IEMNATIVE_REG_FIXED_PVMCPU & 7);
9572 pbCodeBuf[off++] = RT_BYTE1(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9573 pbCodeBuf[off++] = RT_BYTE2(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9574 pbCodeBuf[off++] = RT_BYTE3(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9575 pbCodeBuf[off++] = RT_BYTE4(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9576 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9577
9578 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED;
9579 }
9580
9581 uint8_t const idxRegTmp = iemNativeRegAllocTmpPreferNonVolatile(pReNative, &off);
9582 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr,
9583 kIemNativeGstRegUse_ReadOnly);
9584
9585 /*
9586 * Mask any exceptions and clear the exception status and save into MXCSR,
9587 * taking a detour through memory here because ldmxcsr/stmxcsr don't support
9588 * a register source/target (sigh).
9589 */
9590 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, idxRegMxCsr);
9591 off = iemNativeEmitOrGpr32ByImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, X86_MXCSR_XCPT_MASK);
9592 off = iemNativeEmitAndGpr32ByImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, ~X86_MXCSR_XCPT_FLAGS);
9593 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9594
9595 PIEMNATIVEINSTR pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
9596
9597 /* ldmxcsr */
9598 if (IEMNATIVE_REG_FIXED_PVMCPU >= 8)
9599 pbCodeBuf[off++] = X86_OP_REX_B;
9600 pbCodeBuf[off++] = 0x0f;
9601 pbCodeBuf[off++] = 0xae;
9602 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, 2, IEMNATIVE_REG_FIXED_PVMCPU & 7);
9603 pbCodeBuf[off++] = RT_BYTE1(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9604 pbCodeBuf[off++] = RT_BYTE2(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9605 pbCodeBuf[off++] = RT_BYTE3(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9606 pbCodeBuf[off++] = RT_BYTE4(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9607 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9608
9609 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
9610 iemNativeRegFreeTmp(pReNative, idxRegTmp);
9611
9612# elif defined(RT_ARCH_ARM64)
9613 uint8_t const idxRegTmp = iemNativeRegAllocTmpPreferNonVolatile(pReNative, &off);
9614
9615 /* Need to save the host floating point control register the first time, clear FPSR. */
9616 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED))
9617 {
9618 PIEMNATIVEINSTR pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
9619 pu32CodeBuf[off++] = Armv8A64MkInstrMsr(ARMV8_A64_REG_XZR, ARMV8_AARCH64_SYSREG_FPSR);
9620 pu32CodeBuf[off++] = Armv8A64MkInstrMrs(idxRegTmp, ARMV8_AARCH64_SYSREG_FPCR);
9621 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9622 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED;
9623 }
9624
9625 /*
9626 * Translate MXCSR to FPCR.
9627 *
9628 * Unfortunately we can't emulate the exact behavior of MXCSR as we can't take
9629 * FEAT_AFP on arm64 for granted (My M2 Macbook doesn't has it). So we can't map
9630 * MXCSR.DAZ to FPCR.FIZ and MXCSR.FZ to FPCR.FZ with FPCR.AH being set.
9631 * We can only use FPCR.FZ which will flush inputs _and_ output de-normals to zero.
9632 */
9633 /** @todo Check the host supported flags (needs additional work to get the host features from CPUM)
9634 * and implement alternate handling if FEAT_AFP is present. */
9635 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr,
9636 kIemNativeGstRegUse_ReadOnly);
9637
9638 PIEMNATIVEINSTR pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
9639
9640 /* First make sure that there is nothing set for the upper 16-bits (X86_MXCSR_MM, which we don't emulate right now). */
9641 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegTmp, idxRegMxCsr);
9642
9643 /* If either MXCSR.FZ or MXCSR.DAZ is set FPCR.FZ will be set. */
9644 pu32CodeBuf[off++] = Armv8A64MkInstrUbfx(IEMNATIVE_REG_FIXED_TMP0, idxRegTmp, X86_MXCSR_DAZ_BIT, 1);
9645 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegTmp, idxRegTmp, X86_MXCSR_FZ_BIT);
9646 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(idxRegTmp, idxRegTmp, IEMNATIVE_REG_FIXED_TMP0);
9647 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegTmp, idxRegTmp, ARMV8_FPCR_FZ_BIT);
9648
9649 /*
9650 * Init the rounding mode, the layout differs between MXCSR.RM[14:13] and FPCR.RMode[23:22]:
9651 *
9652 * Value MXCSR FPCR
9653 * 0 RN RN
9654 * 1 R- R+
9655 * 2 R+ R-
9656 * 3 RZ RZ
9657 *
9658 * Conversion can be achieved by switching bit positions
9659 */
9660 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxRegMxCsr, X86_MXCSR_RC_SHIFT);
9661 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxRegTmp, IEMNATIVE_REG_FIXED_TMP0, 14, 1);
9662 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxRegMxCsr, X86_MXCSR_RC_SHIFT + 1);
9663 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxRegTmp, IEMNATIVE_REG_FIXED_TMP0, 13, 1);
9664
9665 /* Write the value to FPCR. */
9666 pu32CodeBuf[off++] = Armv8A64MkInstrMsr(idxRegTmp, ARMV8_AARCH64_SYSREG_FPCR);
9667
9668 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9669 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
9670 iemNativeRegFreeTmp(pReNative, idxRegTmp);
9671# else
9672# error "Port me"
9673# endif
9674 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SYNCED;
9675 }
9676#endif
9677 return off;
9678}
9679
9680
9681
9682/*********************************************************************************************************************************
9683* Emitters for FPU related operations. *
9684*********************************************************************************************************************************/
9685
9686#define IEM_MC_FETCH_FCW(a_u16Fcw) \
9687 off = iemNativeEmitFetchFpuFcw(pReNative, off, a_u16Fcw)
9688
9689/** Emits code for IEM_MC_FETCH_FCW. */
9690DECL_INLINE_THROW(uint32_t)
9691iemNativeEmitFetchFpuFcw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
9692{
9693 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9694 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
9695
9696 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9697
9698 /* Allocate a temporary FCW register. */
9699 /** @todo eliminate extra register */
9700 uint8_t const idxFcwReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFcw,
9701 kIemNativeGstRegUse_ReadOnly);
9702
9703 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFcwReg);
9704
9705 /* Free but don't flush the FCW register. */
9706 iemNativeRegFreeTmp(pReNative, idxFcwReg);
9707 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9708
9709 return off;
9710}
9711
9712
9713#define IEM_MC_FETCH_FSW(a_u16Fsw) \
9714 off = iemNativeEmitFetchFpuFsw(pReNative, off, a_u16Fsw)
9715
9716/** Emits code for IEM_MC_FETCH_FSW. */
9717DECL_INLINE_THROW(uint32_t)
9718iemNativeEmitFetchFpuFsw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
9719{
9720 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9721 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
9722
9723 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9724 /* Allocate a temporary FSW register. */
9725 /** @todo eliminate extra register */
9726 uint8_t const idxFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw,
9727 kIemNativeGstRegUse_ReadOnly);
9728
9729 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFswReg);
9730
9731 /* Free but don't flush the FSW register. */
9732 iemNativeRegFreeTmp(pReNative, idxFswReg);
9733 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9734
9735 return off;
9736}
9737
9738
9739
9740/*********************************************************************************************************************************
9741* Emitters for SSE/AVX specific operations. *
9742*********************************************************************************************************************************/
9743
9744#define IEM_MC_COPY_XREG_U128(a_iXRegDst, a_iXRegSrc) \
9745 off = iemNativeEmitSimdCopyXregU128(pReNative, off, a_iXRegDst, a_iXRegSrc)
9746
9747/** Emits code for IEM_MC_COPY_XREG_U128. */
9748DECL_INLINE_THROW(uint32_t)
9749iemNativeEmitSimdCopyXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXRegDst, uint8_t iXRegSrc)
9750{
9751 /* This is a nop if the source and destination register are the same. */
9752 if (iXRegDst != iXRegSrc)
9753 {
9754 /* Allocate destination and source register. */
9755 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegDst),
9756 kIemNativeGstSimdRegLdStSz_Low128,
9757 kIemNativeGstRegUse_ForFullWrite);
9758 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegSrc),
9759 kIemNativeGstSimdRegLdStSz_Low128,
9760 kIemNativeGstRegUse_ReadOnly);
9761
9762 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
9763
9764 /* Free but don't flush the source and destination register. */
9765 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9766 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9767 }
9768
9769 return off;
9770}
9771
9772
9773#define IEM_MC_FETCH_XREG_U128(a_u128Value, a_iXReg) \
9774 off = iemNativeEmitSimdFetchXregU128(pReNative, off, a_u128Value, a_iXReg)
9775
9776/** Emits code for IEM_MC_FETCH_XREG_U128. */
9777DECL_INLINE_THROW(uint32_t)
9778iemNativeEmitSimdFetchXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg)
9779{
9780 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9781 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
9782
9783 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9784 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
9785
9786 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
9787
9788 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxVarReg, idxSimdRegSrc);
9789
9790 /* Free but don't flush the source register. */
9791 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9792 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
9793
9794 return off;
9795}
9796
9797
9798#define IEM_MC_FETCH_XREG_U64(a_u64Value, a_iXReg, a_iQWord) \
9799 off = iemNativeEmitSimdFetchXregU64(pReNative, off, a_u64Value, a_iXReg, a_iQWord)
9800
9801#define IEM_MC_FETCH_XREG_R64(a_r64Value, a_iXReg, a_iQWord) \
9802 off = iemNativeEmitSimdFetchXregU64(pReNative, off, a_r64Value, a_iXReg, a_iQWord)
9803
9804/** Emits code for IEM_MC_FETCH_XREG_U64. */
9805DECL_INLINE_THROW(uint32_t)
9806iemNativeEmitSimdFetchXregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iQWord)
9807{
9808 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9809 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
9810
9811 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9812 kIemNativeGstSimdRegLdStSz_Low128,
9813 kIemNativeGstRegUse_ReadOnly);
9814
9815 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9816 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9817
9818 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
9819
9820 /* Free but don't flush the source register. */
9821 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9822 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9823
9824 return off;
9825}
9826
9827
9828#define IEM_MC_FETCH_XREG_U32(a_u32Value, a_iXReg, a_iDWord) \
9829 off = iemNativeEmitSimdFetchXregU32(pReNative, off, a_u32Value, a_iXReg, a_iDWord)
9830
9831#define IEM_MC_FETCH_XREG_R32(a_r32Value, a_iXReg, a_iDWord) \
9832 off = iemNativeEmitSimdFetchXregU32(pReNative, off, a_r32Value, a_iXReg, a_iDWord)
9833
9834/** Emits code for IEM_MC_FETCH_XREG_U32/IEM_MC_FETCH_XREG_R32. */
9835DECL_INLINE_THROW(uint32_t)
9836iemNativeEmitSimdFetchXregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iDWord)
9837{
9838 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9839 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
9840
9841 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9842 kIemNativeGstSimdRegLdStSz_Low128,
9843 kIemNativeGstRegUse_ReadOnly);
9844
9845 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9846 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9847
9848 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
9849
9850 /* Free but don't flush the source register. */
9851 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9852 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9853
9854 return off;
9855}
9856
9857
9858#define IEM_MC_FETCH_XREG_U16(a_u64Value, a_iXReg, a_iWord) \
9859 off = iemNativeEmitSimdFetchXregU16(pReNative, off, a_u64Value, a_iXReg, a_iWord)
9860
9861/** Emits code for IEM_MC_FETCH_XREG_U16. */
9862DECL_INLINE_THROW(uint32_t)
9863iemNativeEmitSimdFetchXregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iWord)
9864{
9865 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9866 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
9867
9868 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9869 kIemNativeGstSimdRegLdStSz_Low128,
9870 kIemNativeGstRegUse_ReadOnly);
9871
9872 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9873 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9874
9875 off = iemNativeEmitSimdLoadGprFromVecRegU16(pReNative, off, idxVarReg, idxSimdRegSrc, iWord);
9876
9877 /* Free but don't flush the source register. */
9878 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9879 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9880
9881 return off;
9882}
9883
9884
9885#define IEM_MC_FETCH_XREG_U8(a_u64Value, a_iXReg, a_iByte) \
9886 off = iemNativeEmitSimdFetchXregU8(pReNative, off, a_u64Value, a_iXReg, a_iByte)
9887
9888/** Emits code for IEM_MC_FETCH_XREG_U8. */
9889DECL_INLINE_THROW(uint32_t)
9890iemNativeEmitSimdFetchXregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iByte)
9891{
9892 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9893 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint8_t));
9894
9895 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9896 kIemNativeGstSimdRegLdStSz_Low128,
9897 kIemNativeGstRegUse_ReadOnly);
9898
9899 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9900 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9901
9902 off = iemNativeEmitSimdLoadGprFromVecRegU8(pReNative, off, idxVarReg, idxSimdRegSrc, iByte);
9903
9904 /* Free but don't flush the source register. */
9905 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9906 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9907
9908 return off;
9909}
9910
9911
9912#define IEM_MC_STORE_XREG_U128(a_iXReg, a_u128Value) \
9913 off = iemNativeEmitSimdStoreXregU128(pReNative, off, a_iXReg, a_u128Value)
9914
9915AssertCompileSize(X86XMMREG, sizeof(RTUINT128U));
9916#define IEM_MC_STORE_XREG_XMM(a_iXReg, a_XmmValue) \
9917 off = iemNativeEmitSimdStoreXregU128(pReNative, off, a_iXReg, a_XmmValue)
9918
9919
9920/** Emits code for IEM_MC_STORE_XREG_U128/IEM_MC_STORE_XREG_XMM. */
9921DECL_INLINE_THROW(uint32_t)
9922iemNativeEmitSimdStoreXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
9923{
9924 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9925 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
9926
9927 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9928 kIemNativeGstSimdRegLdStSz_Low128,
9929 kIemNativeGstRegUse_ForFullWrite);
9930 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitialized*/);
9931
9932 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
9933
9934 /* Free but don't flush the source register. */
9935 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9936 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9937
9938 return off;
9939}
9940
9941
9942#define IEM_MC_STORE_XREG_U64(a_iXReg, a_iQWord, a_u64Value) \
9943 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u64Value, sizeof(uint64_t), a_iQWord)
9944
9945#define IEM_MC_STORE_XREG_U32(a_iXReg, a_iDWord, a_u32Value) \
9946 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint32_t), a_iDWord)
9947
9948#define IEM_MC_STORE_XREG_U16(a_iXReg, a_iWord, a_u32Value) \
9949 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint16_t), a_iWord)
9950
9951#define IEM_MC_STORE_XREG_U8(a_iXReg, a_iByte, a_u32Value) \
9952 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint8_t), a_iByte)
9953
9954#define IEM_MC_STORE_XREG_R32(a_iXReg, a_r32Value) \
9955 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_r32Value, sizeof(RTFLOAT32U), 0 /*iElem*/)
9956
9957#define IEM_MC_STORE_XREG_R64(a_iXReg, a_r64Value) \
9958 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_r64Value, sizeof(RTFLOAT64U), 0 /*iElem*/)
9959
9960/** Emits code for IEM_MC_STORE_XREG_U64/IEM_MC_STORE_XREG_U32/IEM_MC_STORE_XREG_U16/IEM_MC_STORE_XREG_U8. */
9961DECL_INLINE_THROW(uint32_t)
9962iemNativeEmitSimdStoreXregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar,
9963 uint8_t cbLocal, uint8_t iElem)
9964{
9965 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9966 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbLocal);
9967
9968#ifdef VBOX_STRICT
9969 switch (cbLocal)
9970 {
9971 case sizeof(uint64_t): Assert(iElem < 2); break;
9972 case sizeof(uint32_t): Assert(iElem < 4); break;
9973 case sizeof(uint16_t): Assert(iElem < 8); break;
9974 case sizeof(uint8_t): Assert(iElem < 16); break;
9975 default: AssertFailed();
9976 }
9977#endif
9978
9979 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9980 kIemNativeGstSimdRegLdStSz_Low128,
9981 kIemNativeGstRegUse_ForUpdate);
9982 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxDstVar, &off);
9983
9984 switch (cbLocal)
9985 {
9986 case sizeof(uint64_t): off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
9987 case sizeof(uint32_t): off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
9988 case sizeof(uint16_t): off = iemNativeEmitSimdStoreGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
9989 case sizeof(uint8_t): off = iemNativeEmitSimdStoreGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
9990 default: AssertFailed();
9991 }
9992
9993 /* Free but don't flush the source register. */
9994 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9995 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9996
9997 return off;
9998}
9999
10000
10001#define IEM_MC_STORE_XREG_U64_ZX_U128(a_iXReg, a_u64Value) \
10002 off = iemNativeEmitSimdStoreXregU64ZxU128(pReNative, off, a_iXReg, a_u64Value)
10003
10004/** Emits code for IEM_MC_STORE_XREG_U64_ZX_U128. */
10005DECL_INLINE_THROW(uint32_t)
10006iemNativeEmitSimdStoreXregU64ZxU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
10007{
10008 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10009 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
10010
10011 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10012 kIemNativeGstSimdRegLdStSz_Low128,
10013 kIemNativeGstRegUse_ForUpdate);
10014 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxDstVar, &off);
10015
10016 /* Zero the vector register first, then store the 64-bit value to the lower 64-bit. */
10017 off = iemNativeEmitSimdZeroVecRegLowU128(pReNative, off, idxSimdRegDst);
10018 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0);
10019
10020 /* Free but don't flush the source register. */
10021 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10022 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10023
10024 return off;
10025}
10026
10027
10028#define IEM_MC_STORE_XREG_U32_ZX_U128(a_iXReg, a_u32Value) \
10029 off = iemNativeEmitSimdStoreXregU32ZxU128(pReNative, off, a_iXReg, a_u32Value)
10030
10031/** Emits code for IEM_MC_STORE_XREG_U32_ZX_U128. */
10032DECL_INLINE_THROW(uint32_t)
10033iemNativeEmitSimdStoreXregU32ZxU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
10034{
10035 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10036 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
10037
10038 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10039 kIemNativeGstSimdRegLdStSz_Low128,
10040 kIemNativeGstRegUse_ForUpdate);
10041 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxDstVar, &off);
10042
10043 /* Zero the vector register first, then store the 32-bit value to the lowest 32-bit element. */
10044 off = iemNativeEmitSimdZeroVecRegLowU128(pReNative, off, idxSimdRegDst);
10045 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, 0);
10046
10047 /* Free but don't flush the source register. */
10048 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10049 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10050
10051 return off;
10052}
10053
10054
10055#define IEM_MC_STORE_XREG_U32_U128(a_iXReg, a_iDwDst, a_u128Value, a_iDwSrc) \
10056 off = iemNativeEmitSimdStoreXregU32U128(pReNative, off, a_iXReg, a_iDwDst, a_u128Value, a_iDwSrc)
10057
10058/** Emits code for IEM_MC_STORE_XREG_U32_U128. */
10059DECL_INLINE_THROW(uint32_t)
10060iemNativeEmitSimdStoreXregU32U128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t iDwDst,
10061 uint8_t idxSrcVar, uint8_t iDwSrc)
10062{
10063 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10064 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
10065
10066 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10067 kIemNativeGstSimdRegLdStSz_Low128,
10068 kIemNativeGstRegUse_ForUpdate);
10069 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitialized*/);
10070
10071 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, idxVarReg, iDwSrc);
10072 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, IEMNATIVE_REG_FIXED_TMP0, iDwDst);
10073
10074 /* Free but don't flush the destination register. */
10075 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10076 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10077
10078 return off;
10079}
10080
10081
10082#define IEM_MC_COPY_YREG_U128_ZX_VLMAX(a_iYRegDst, a_iYRegSrc) \
10083 off = iemNativeEmitSimdCopyYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrc)
10084
10085/** Emits code for IEM_MC_COPY_YREG_U128_ZX_VLMAX. */
10086DECL_INLINE_THROW(uint32_t)
10087iemNativeEmitSimdCopyYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrc)
10088{
10089 /*
10090 * The iYRegSrc == iYRegDst case needs to be treated differently here, because
10091 * if iYRegDst gets allocated first for the full write it won't load the
10092 * actual value from CPUMCTX. When allocating iYRegSrc afterwards it will get
10093 * duplicated from the already allocated host register for iYRegDst containing
10094 * garbage. This will be catched by the guest register value checking in debug
10095 * builds.
10096 */
10097 if (iYRegDst != iYRegSrc)
10098 {
10099 /* Allocate destination and source register. */
10100 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10101 kIemNativeGstSimdRegLdStSz_256,
10102 kIemNativeGstRegUse_ForFullWrite);
10103 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
10104 kIemNativeGstSimdRegLdStSz_Low128,
10105 kIemNativeGstRegUse_ReadOnly);
10106
10107 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
10108 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10109
10110 /* Free but don't flush the source and destination register. */
10111 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10112 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10113 }
10114 else
10115 {
10116 /* This effectively only clears the upper 128-bits of the register. */
10117 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10118 kIemNativeGstSimdRegLdStSz_High128,
10119 kIemNativeGstRegUse_ForFullWrite);
10120
10121 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
10122
10123 /* Free but don't flush the destination register. */
10124 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
10125 }
10126
10127 return off;
10128}
10129
10130
10131#define IEM_MC_COPY_YREG_U256_ZX_VLMAX(a_iYRegDst, a_iYRegSrc) \
10132 off = iemNativeEmitSimdCopyYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrc)
10133
10134/** Emits code for IEM_MC_COPY_YREG_U256_ZX_VLMAX. */
10135DECL_INLINE_THROW(uint32_t)
10136iemNativeEmitSimdCopyYregU256ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrc)
10137{
10138 /*
10139 * The iYRegSrc == iYRegDst case needs to be treated differently here, because
10140 * if iYRegDst gets allocated first for the full write it won't load the
10141 * actual value from CPUMCTX. When allocating iYRegSrc afterwards it will get
10142 * duplicated from the already allocated host register for iYRegDst containing
10143 * garbage. This will be catched by the guest register value checking in debug
10144 * builds. iYRegSrc == iYRegDst would effectively only clear any upper 256-bits
10145 * for a zmm register we don't support yet, so this is just a nop.
10146 */
10147 if (iYRegDst != iYRegSrc)
10148 {
10149 /* Allocate destination and source register. */
10150 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
10151 kIemNativeGstSimdRegLdStSz_256,
10152 kIemNativeGstRegUse_ReadOnly);
10153 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10154 kIemNativeGstSimdRegLdStSz_256,
10155 kIemNativeGstRegUse_ForFullWrite);
10156
10157 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
10158
10159 /* Free but don't flush the source and destination register. */
10160 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10161 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10162 }
10163
10164 return off;
10165}
10166
10167
10168#define IEM_MC_FETCH_YREG_U128(a_u128Dst, a_iYRegSrc, a_iDQWord) \
10169 off = iemNativeEmitSimdFetchYregU128(pReNative, off, a_u128Dst, a_iYRegSrc, a_iDQWord)
10170
10171/** Emits code for IEM_MC_FETCH_YREG_U128. */
10172DECL_INLINE_THROW(uint32_t)
10173iemNativeEmitSimdFetchYregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iDQWord)
10174{
10175 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10176 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
10177
10178 Assert(iDQWord <= 1);
10179 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10180 iDQWord == 1
10181 ? kIemNativeGstSimdRegLdStSz_High128
10182 : kIemNativeGstSimdRegLdStSz_Low128,
10183 kIemNativeGstRegUse_ReadOnly);
10184
10185 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10186 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
10187
10188 if (iDQWord == 1)
10189 off = iemNativeEmitSimdLoadVecRegLowU128FromVecRegHighU128(pReNative, off, idxVarReg, idxSimdRegSrc);
10190 else
10191 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxVarReg, idxSimdRegSrc);
10192
10193 /* Free but don't flush the source register. */
10194 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10195 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
10196
10197 return off;
10198}
10199
10200
10201#define IEM_MC_FETCH_YREG_U64(a_u64Dst, a_iYRegSrc, a_iQWord) \
10202 off = iemNativeEmitSimdFetchYregU64(pReNative, off, a_u64Dst, a_iYRegSrc, a_iQWord)
10203
10204/** Emits code for IEM_MC_FETCH_YREG_U64. */
10205DECL_INLINE_THROW(uint32_t)
10206iemNativeEmitSimdFetchYregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iQWord)
10207{
10208 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10209 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
10210
10211 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10212 iQWord >= 2
10213 ? kIemNativeGstSimdRegLdStSz_High128
10214 : kIemNativeGstSimdRegLdStSz_Low128,
10215 kIemNativeGstRegUse_ReadOnly);
10216
10217 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10218 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10219
10220 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
10221
10222 /* Free but don't flush the source register. */
10223 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10224 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10225
10226 return off;
10227}
10228
10229
10230#define IEM_MC_FETCH_YREG_U32(a_u32Dst, a_iYRegSrc) \
10231 off = iemNativeEmitSimdFetchYregU32(pReNative, off, a_u32Dst, a_iYRegSrc, 0)
10232
10233/** Emits code for IEM_MC_FETCH_YREG_U32. */
10234DECL_INLINE_THROW(uint32_t)
10235iemNativeEmitSimdFetchYregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iDWord)
10236{
10237 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10238 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
10239
10240 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10241 iDWord >= 4
10242 ? kIemNativeGstSimdRegLdStSz_High128
10243 : kIemNativeGstSimdRegLdStSz_Low128,
10244 kIemNativeGstRegUse_ReadOnly);
10245
10246 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10247 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10248
10249 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
10250
10251 /* Free but don't flush the source register. */
10252 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10253 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10254
10255 return off;
10256}
10257
10258
10259#define IEM_MC_CLEAR_YREG_128_UP(a_iYReg) \
10260 off = iemNativeEmitSimdClearYregHighU128(pReNative, off, a_iYReg)
10261
10262/** Emits code for IEM_MC_CLEAR_YREG_128_UP. */
10263DECL_INLINE_THROW(uint32_t)
10264iemNativeEmitSimdClearYregHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg)
10265{
10266 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10267 kIemNativeGstSimdRegLdStSz_High128,
10268 kIemNativeGstRegUse_ForFullWrite);
10269
10270 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
10271
10272 /* Free but don't flush the register. */
10273 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
10274
10275 return off;
10276}
10277
10278
10279#define IEM_MC_STORE_YREG_U128(a_iYRegDst, a_iDQword, a_u128Value) \
10280 off = iemNativeEmitSimdStoreYregU128(pReNative, off, a_iYRegDst, a_iDQword, a_u128Value)
10281
10282/** Emits code for IEM_MC_STORE_YREG_U128. */
10283DECL_INLINE_THROW(uint32_t)
10284iemNativeEmitSimdStoreYregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t iDQword, uint8_t idxSrcVar)
10285{
10286 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10287 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
10288
10289 Assert(iDQword <= 1);
10290 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10291 iDQword == 0
10292 ? kIemNativeGstSimdRegLdStSz_Low128
10293 : kIemNativeGstSimdRegLdStSz_High128,
10294 kIemNativeGstRegUse_ForFullWrite);
10295
10296 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
10297
10298 if (iDQword == 0)
10299 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
10300 else
10301 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128(pReNative, off, idxSimdRegDst, idxVarReg);
10302
10303 /* Free but don't flush the source register. */
10304 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10305 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10306
10307 return off;
10308}
10309
10310
10311#define IEM_MC_STORE_YREG_U128_ZX_VLMAX(a_iYRegDst, a_u128Src) \
10312 off = iemNativeEmitSimdStoreYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_u128Src)
10313
10314/** Emits code for IEM_MC_STORE_YREG_U128_ZX_VLMAX. */
10315DECL_INLINE_THROW(uint32_t)
10316iemNativeEmitSimdStoreYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10317{
10318 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10319 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
10320
10321 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10322 kIemNativeGstSimdRegLdStSz_256,
10323 kIemNativeGstRegUse_ForFullWrite);
10324
10325 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
10326
10327 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
10328 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10329
10330 /* Free but don't flush the source register. */
10331 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10332 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10333
10334 return off;
10335}
10336
10337
10338#define IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX(a_iXRegDst, a_u8Src) \
10339 off = iemNativeEmitSimdBroadcastXregU8ZxVlmax(pReNative, off, a_iXRegDst, a_u8Src)
10340
10341/** Emits code for IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX. */
10342DECL_INLINE_THROW(uint32_t)
10343iemNativeEmitSimdBroadcastXregU8ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
10344{
10345 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10346 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint8_t));
10347
10348 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10349 kIemNativeGstSimdRegLdStSz_256,
10350 kIemNativeGstRegUse_ForFullWrite);
10351
10352 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10353
10354 off = iemNativeEmitSimdBroadcastGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
10355 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10356
10357 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10358 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10359
10360 return off;
10361}
10362
10363
10364#define IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX(a_iXRegDst, a_u16Src) \
10365 off = iemNativeEmitSimdBroadcastXregU16ZxVlmax(pReNative, off, a_iXRegDst, a_u16Src)
10366
10367/** Emits code for IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX. */
10368DECL_INLINE_THROW(uint32_t)
10369iemNativeEmitSimdBroadcastXregU16ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
10370{
10371 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10372 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint16_t));
10373
10374 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10375 kIemNativeGstSimdRegLdStSz_256,
10376 kIemNativeGstRegUse_ForFullWrite);
10377
10378 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10379
10380 off = iemNativeEmitSimdBroadcastGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
10381 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10382
10383 /* Free but don't flush the source register. */
10384 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10385 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10386
10387 return off;
10388}
10389
10390
10391#define IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX(a_iXRegDst, a_u32Src) \
10392 off = iemNativeEmitSimdBroadcastXregU32ZxVlmax(pReNative, off, a_iXRegDst, a_u32Src)
10393
10394/** Emits code for IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX. */
10395DECL_INLINE_THROW(uint32_t)
10396iemNativeEmitSimdBroadcastXregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
10397{
10398 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10399 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
10400
10401 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10402 kIemNativeGstSimdRegLdStSz_256,
10403 kIemNativeGstRegUse_ForFullWrite);
10404
10405 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10406
10407 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
10408 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10409
10410 /* Free but don't flush the source register. */
10411 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10412 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10413
10414 return off;
10415}
10416
10417
10418#define IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX(a_iXRegDst, a_u64Src) \
10419 off = iemNativeEmitSimdBroadcastXregU64ZxVlmax(pReNative, off, a_iXRegDst, a_u64Src)
10420
10421/** Emits code for IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX. */
10422DECL_INLINE_THROW(uint32_t)
10423iemNativeEmitSimdBroadcastXregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
10424{
10425 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10426 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10427
10428 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10429 kIemNativeGstSimdRegLdStSz_256,
10430 kIemNativeGstRegUse_ForFullWrite);
10431
10432 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10433
10434 off = iemNativeEmitSimdBroadcastGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
10435 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10436
10437 /* Free but don't flush the source register. */
10438 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10439 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10440
10441 return off;
10442}
10443
10444
10445#define IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX(a_iYRegDst, a_u8Src) \
10446 off = iemNativeEmitSimdBroadcastYregU8ZxVlmax(pReNative, off, a_iYRegDst, a_u8Src)
10447
10448/** Emits code for IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX. */
10449DECL_INLINE_THROW(uint32_t)
10450iemNativeEmitSimdBroadcastYregU8ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10451{
10452 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10453 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint8_t));
10454
10455 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10456 kIemNativeGstSimdRegLdStSz_256,
10457 kIemNativeGstRegUse_ForFullWrite);
10458
10459 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10460
10461 off = iemNativeEmitSimdBroadcastGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
10462
10463 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10464 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10465
10466 return off;
10467}
10468
10469
10470#define IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX(a_iYRegDst, a_u16Src) \
10471 off = iemNativeEmitSimdBroadcastYregU16ZxVlmax(pReNative, off, a_iYRegDst, a_u16Src)
10472
10473/** Emits code for IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX. */
10474DECL_INLINE_THROW(uint32_t)
10475iemNativeEmitSimdBroadcastYregU16ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10476{
10477 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10478 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint16_t));
10479
10480 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10481 kIemNativeGstSimdRegLdStSz_256,
10482 kIemNativeGstRegUse_ForFullWrite);
10483
10484 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10485
10486 off = iemNativeEmitSimdBroadcastGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
10487
10488 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10489 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10490
10491 return off;
10492}
10493
10494
10495#define IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX(a_iYRegDst, a_u32Src) \
10496 off = iemNativeEmitSimdBroadcastYregU32ZxVlmax(pReNative, off, a_iYRegDst, a_u32Src)
10497
10498/** Emits code for IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX. */
10499DECL_INLINE_THROW(uint32_t)
10500iemNativeEmitSimdBroadcastYregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10501{
10502 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10503 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
10504
10505 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10506 kIemNativeGstSimdRegLdStSz_256,
10507 kIemNativeGstRegUse_ForFullWrite);
10508
10509 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10510
10511 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
10512
10513 /* Free but don't flush the source register. */
10514 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10515 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10516
10517 return off;
10518}
10519
10520
10521#define IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX(a_iYRegDst, a_u64Src) \
10522 off = iemNativeEmitSimdBroadcastYregU64ZxVlmax(pReNative, off, a_iYRegDst, a_u64Src)
10523
10524/** Emits code for IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX. */
10525DECL_INLINE_THROW(uint32_t)
10526iemNativeEmitSimdBroadcastYregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10527{
10528 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10529 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10530
10531 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10532 kIemNativeGstSimdRegLdStSz_256,
10533 kIemNativeGstRegUse_ForFullWrite);
10534
10535 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10536
10537 off = iemNativeEmitSimdBroadcastGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
10538
10539 /* Free but don't flush the source register. */
10540 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10541 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10542
10543 return off;
10544}
10545
10546
10547#define IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX(a_iYRegDst, a_u128Src) \
10548 off = iemNativeEmitSimdBroadcastYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_u128Src)
10549
10550/** Emits code for IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX. */
10551DECL_INLINE_THROW(uint32_t)
10552iemNativeEmitSimdBroadcastYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10553{
10554 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10555 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
10556
10557 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10558 kIemNativeGstSimdRegLdStSz_256,
10559 kIemNativeGstRegUse_ForFullWrite);
10560
10561 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
10562
10563 off = iemNativeEmitSimdBroadcastVecRegU128ToVecReg(pReNative, off, idxSimdRegDst, idxVarReg);
10564
10565 /* Free but don't flush the source register. */
10566 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10567 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10568
10569 return off;
10570}
10571
10572
10573#define IEM_MC_STORE_YREG_U32_ZX_VLMAX(a_iYRegDst, a_u32Src) \
10574 off = iemNativeEmitSimdStoreYregU32ZxVlmax(pReNative, off, a_iYRegDst, a_u32Src)
10575
10576/** Emits code for IEM_MC_STORE_YREG_U32_ZX_VLMAX. */
10577DECL_INLINE_THROW(uint32_t)
10578iemNativeEmitSimdStoreYregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10579{
10580 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10581 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
10582
10583 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10584 kIemNativeGstSimdRegLdStSz_256,
10585 kIemNativeGstRegUse_ForFullWrite);
10586
10587 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10588
10589 off = iemNativeEmitSimdZeroVecRegU256(pReNative, off, idxSimdRegDst);
10590 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iDWord*/);
10591
10592 /* Free but don't flush the source register. */
10593 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10594 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10595
10596 return off;
10597}
10598
10599
10600#define IEM_MC_STORE_YREG_U64_ZX_VLMAX(a_iYRegDst, a_u64Src) \
10601 off = iemNativeEmitSimdStoreYregU64ZxVlmax(pReNative, off, a_iYRegDst, a_u64Src)
10602
10603/** Emits code for IEM_MC_STORE_YREG_U64_ZX_VLMAX. */
10604DECL_INLINE_THROW(uint32_t)
10605iemNativeEmitSimdStoreYregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10606{
10607 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10608 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10609
10610 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10611 kIemNativeGstSimdRegLdStSz_256,
10612 kIemNativeGstRegUse_ForFullWrite);
10613
10614 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10615
10616 off = iemNativeEmitSimdZeroVecRegU256(pReNative, off, idxSimdRegDst);
10617 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iQWord*/);
10618
10619 /* Free but don't flush the source register. */
10620 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10621 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10622
10623 return off;
10624}
10625
10626
10627#define IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX(a_iYRegDst, a_u64Local, a_iYRegSrcHx) \
10628 off = iemNativeEmitSimdMergeYregU64LocalU64HiZxVlmax(pReNative, off, a_iYRegDst, a_u64Local, a_iYRegSrcHx)
10629
10630/** Emits code for IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX. */
10631DECL_INLINE_THROW(uint32_t)
10632iemNativeEmitSimdMergeYregU64LocalU64HiZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar, uint8_t iYRegSrcHx)
10633{
10634 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10635 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10636
10637 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10638 kIemNativeGstSimdRegLdStSz_256,
10639 kIemNativeGstRegUse_ForFullWrite);
10640 uint8_t const idxSimdRegSrcHx = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrcHx),
10641 kIemNativeGstSimdRegLdStSz_Low128,
10642 kIemNativeGstRegUse_ReadOnly);
10643 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10644
10645 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrcHx);
10646 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iQWord*/);
10647 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10648
10649 /* Free but don't flush the source and destination registers. */
10650 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrcHx);
10651 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10652 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10653
10654 return off;
10655}
10656
10657
10658#define IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX(a_iYRegDst, a_iYRegSrcHx, a_u64Local) \
10659 off = iemNativeEmitSimdMergeYregU64LoU64LocalZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrcHx, a_u64Local)
10660
10661/** Emits code for IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX. */
10662DECL_INLINE_THROW(uint32_t)
10663iemNativeEmitSimdMergeYregU64LoU64LocalZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrcHx, uint8_t idxSrcVar)
10664{
10665 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10666 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10667
10668 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10669 kIemNativeGstSimdRegLdStSz_256,
10670 kIemNativeGstRegUse_ForFullWrite);
10671 uint8_t const idxSimdRegSrcHx = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrcHx),
10672 kIemNativeGstSimdRegLdStSz_Low128,
10673 kIemNativeGstRegUse_ReadOnly);
10674 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10675
10676 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrcHx);
10677 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 1 /*iQWord*/);
10678 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10679
10680 /* Free but don't flush the source and destination registers. */
10681 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrcHx);
10682 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10683 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10684
10685 return off;
10686}
10687
10688
10689#define IEM_MC_CLEAR_XREG_U32_MASK(a_iXReg, a_bMask) \
10690 off = iemNativeEmitSimdClearXregU32Mask(pReNative, off, a_iXReg, a_bMask)
10691
10692
10693/** Emits code for IEM_MC_CLEAR_XREG_U32_MASK. */
10694DECL_INLINE_THROW(uint32_t)
10695iemNativeEmitSimdClearXregU32Mask(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t bImm8Mask)
10696{
10697 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10698 kIemNativeGstSimdRegLdStSz_Low128,
10699 kIemNativeGstRegUse_ForUpdate);
10700
10701 /** @todo r=aeichner For certain bit combinations we could reduce the number of emitted instructions. */
10702 if (bImm8Mask & RT_BIT(0))
10703 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 0 /*iDWord*/);
10704 if (bImm8Mask & RT_BIT(1))
10705 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 1 /*iDWord*/);
10706 if (bImm8Mask & RT_BIT(2))
10707 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 2 /*iDWord*/);
10708 if (bImm8Mask & RT_BIT(3))
10709 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 3 /*iDWord*/);
10710
10711 /* Free but don't flush the destination register. */
10712 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10713
10714 return off;
10715}
10716
10717
10718#define IEM_MC_FETCH_YREG_U256(a_u256Dst, a_iYRegSrc) \
10719 off = iemNativeEmitSimdFetchYregU256(pReNative, off, a_u256Dst, a_iYRegSrc)
10720
10721#define IEM_MC_FETCH_YREG_YMM(a_uYmmDst, a_iYRegSrc) \
10722 off = iemNativeEmitSimdFetchYregU256(pReNative, off, a_uYmmDst, a_iYRegSrc)
10723
10724/** Emits code for IEM_MC_FETCH_YREG_U256/IEM_MC_FETCH_YREG_YMM. */
10725DECL_INLINE_THROW(uint32_t)
10726iemNativeEmitSimdFetchYregU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYRegSrc)
10727{
10728 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10729 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT256U));
10730
10731 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
10732 kIemNativeGstSimdRegLdStSz_256,
10733 kIemNativeGstRegUse_ReadOnly);
10734 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
10735
10736 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxVarReg, idxSimdRegSrc);
10737
10738 /* Free but don't flush the source register. */
10739 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10740 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
10741
10742 return off;
10743}
10744
10745
10746#define IEM_MC_STORE_YREG_U256_ZX_VLMAX(a_iYRegDst, a_u256Src) \
10747 off = iemNativeEmitSimdStoreYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_u256Src)
10748
10749#define IEM_MC_STORE_YREG_YMM_ZX_VLMAX(a_iYRegDst, a_uYmmSrc) \
10750 off = iemNativeEmitSimdStoreYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_uYmmSrc)
10751
10752/** Emits code for IEM_MC_STORE_YREG_U256_ZX_VLMAX/IEM_MC_STORE_YREG_YMM_ZX_VLMAX. */
10753DECL_INLINE_THROW(uint32_t)
10754iemNativeEmitSimdStoreYregU256ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar)
10755{
10756 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10757 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
10758
10759 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10760 kIemNativeGstSimdRegLdStSz_256,
10761 kIemNativeGstRegUse_ForFullWrite);
10762 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
10763
10764 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxSimdRegDst, idxVarRegSrc);
10765
10766 /* Free but don't flush the source register. */
10767 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10768 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10769
10770 return off;
10771}
10772
10773
10774#define IEM_MC_STORE_YREG_U32_U256(a_iYRegDst, a_iDwDst, a_u256Value, a_iDwSrc) \
10775 off = iemNativeEmitSimdStoreYregU32FromU256(pReNative, off, a_iYRegDst, a_iDwDst, a_u256Value, a_iDwSrc)
10776
10777
10778/** Emits code for IEM_MC_STORE_YREG_U32_U256. */
10779DECL_INLINE_THROW(uint32_t)
10780iemNativeEmitSimdStoreYregU32FromU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iDwDst,
10781 uint8_t idxSrcVar, uint8_t iDwSrc)
10782{
10783 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10784 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
10785
10786 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10787 iDwDst < 4
10788 ? kIemNativeGstSimdRegLdStSz_Low128
10789 : kIemNativeGstSimdRegLdStSz_High128,
10790 kIemNativeGstRegUse_ForUpdate);
10791 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
10792 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
10793
10794 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxRegTmp, idxVarRegSrc, iDwSrc);
10795 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxRegTmp, iDwDst);
10796
10797 /* Free but don't flush the source register. */
10798 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10799 iemNativeRegFreeTmp(pReNative, idxRegTmp);
10800 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10801
10802 return off;
10803}
10804
10805
10806#define IEM_MC_STORE_YREG_U64_U256(a_iYRegDst, a_iQwDst, a_u256Value, a_iQwSrc) \
10807 off = iemNativeEmitSimdStoreYregU64FromU256(pReNative, off, a_iYRegDst, a_iQwDst, a_u256Value, a_iQwSrc)
10808
10809
10810/** Emits code for IEM_MC_STORE_YREG_U64_U256. */
10811DECL_INLINE_THROW(uint32_t)
10812iemNativeEmitSimdStoreYregU64FromU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iQwDst,
10813 uint8_t idxSrcVar, uint8_t iQwSrc)
10814{
10815 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10816 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
10817
10818 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10819 iQwDst < 2
10820 ? kIemNativeGstSimdRegLdStSz_Low128
10821 : kIemNativeGstSimdRegLdStSz_High128,
10822 kIemNativeGstRegUse_ForUpdate);
10823 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
10824 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
10825
10826 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxRegTmp, idxVarRegSrc, iQwSrc);
10827 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxRegTmp, iQwDst);
10828
10829 /* Free but don't flush the source register. */
10830 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10831 iemNativeRegFreeTmp(pReNative, idxRegTmp);
10832 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10833
10834 return off;
10835}
10836
10837
10838#define IEM_MC_STORE_YREG_U64(a_iYRegDst, a_iQword, a_u64Value) \
10839 off = iemNativeEmitSimdStoreYregU64(pReNative, off, a_iYRegDst, a_iQword, a_u64Value)
10840
10841
10842/** Emits code for IEM_MC_STORE_YREG_U64. */
10843DECL_INLINE_THROW(uint32_t)
10844iemNativeEmitSimdStoreYregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iQwDst, uint8_t idxSrcVar)
10845{
10846 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10847 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10848
10849 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10850 iQwDst < 2
10851 ? kIemNativeGstSimdRegLdStSz_Low128
10852 : kIemNativeGstSimdRegLdStSz_High128,
10853 kIemNativeGstRegUse_ForUpdate);
10854
10855 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10856
10857 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, iQwDst);
10858
10859 /* Free but don't flush the source register. */
10860 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10861 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10862
10863 return off;
10864}
10865
10866
10867#define IEM_MC_CLEAR_ZREG_256_UP(a_iYReg) \
10868 off = iemNativeEmitSimdClearZregU256Vlmax(pReNative, off, a_iYReg)
10869
10870/** Emits code for IEM_MC_CLEAR_ZREG_256_UP. */
10871DECL_INLINE_THROW(uint32_t)
10872iemNativeEmitSimdClearZregU256Vlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg)
10873{
10874 RT_NOREF(pReNative, iYReg);
10875 /** @todo Needs to be implemented when support for AVX-512 is added. */
10876 return off;
10877}
10878
10879
10880
10881/*********************************************************************************************************************************
10882* Emitters for IEM_MC_CALL_SSE_AIMPL_XXX *
10883*********************************************************************************************************************************/
10884
10885/**
10886 * Common worker for IEM_MC_CALL_SSE_AIMPL_XXX/IEM_MC_CALL_AVX_AIMPL_XXX.
10887 */
10888DECL_INLINE_THROW(uint32_t)
10889iemNativeEmitCallSseAvxAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t cArgs, uint8_t idxInstr)
10890{
10891 /* Grab the MXCSR register, it must not be call volatile or we end up freeing it when setting up the call below. */
10892 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr,
10893 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
10894 AssertRelease(!(RT_BIT_32(idxRegMxCsr) & IEMNATIVE_CALL_VOLATILE_GREG_MASK));
10895
10896#if 0 /* This is not required right now as the called helper will set up the SSE/AVX state if it is an assembly one. */
10897 /*
10898 * Need to do the FPU preparation.
10899 */
10900 off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/);
10901#endif
10902
10903 /*
10904 * Do all the call setup and cleanup.
10905 */
10906 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_SSE_AIMPL_HIDDEN_ARGS, IEM_SSE_AIMPL_HIDDEN_ARGS,
10907 false /*fFlushPendingWrites*/);
10908
10909 /*
10910 * Load the MXCSR register into the first argument and mask out the current exception flags.
10911 */
10912 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, idxRegMxCsr);
10913 off = iemNativeEmitAndGpr32ByImm(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, ~X86_MXCSR_XCPT_FLAGS);
10914
10915 /*
10916 * Make the call.
10917 */
10918 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnAImpl);
10919
10920 /*
10921 * The updated MXCSR is in the return register, update exception status flags.
10922 *
10923 * The return register is marked allocated as a temporary because it is required for the
10924 * exception generation check below.
10925 */
10926 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG)));
10927 uint8_t const idxRegTmp = iemNativeRegMarkAllocated(pReNative, IEMNATIVE_CALL_RET_GREG, kIemNativeWhat_Tmp);
10928 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, idxRegMxCsr, idxRegTmp);
10929
10930#ifndef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
10931 /* Writeback the MXCSR register value (there is no delayed writeback for such registers at the moment). */
10932 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_MxCsr>(pReNative, off, idxRegMxCsr);
10933#endif
10934
10935 /*
10936 * Make sure we don't have any outstanding guest register writes as we may
10937 * raise an \#UD or \#XF and all guest register must be up to date in CPUMCTX.
10938 */
10939 off = iemNativeRegFlushPendingWrites(pReNative, off);
10940
10941#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
10942 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
10943#else
10944 RT_NOREF(idxInstr);
10945#endif
10946
10947 /** @todo r=aeichner ANDN from BMI1 would save us a temporary and additional instruction here but I don't
10948 * want to assume the existence for this instruction at the moment. */
10949 uint8_t const idxRegTmp2 = iemNativeRegAllocTmp(pReNative, &off);
10950
10951 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegTmp2, idxRegTmp);
10952 /* tmp &= X86_MXCSR_XCPT_MASK */
10953 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK);
10954 /* tmp >>= X86_MXCSR_XCPT_MASK_SHIFT */
10955 off = iemNativeEmitShiftGprRight(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK_SHIFT);
10956 /* tmp = ~tmp */
10957 off = iemNativeEmitInvBitsGpr(pReNative, off, idxRegTmp, idxRegTmp, false /*f64Bit*/);
10958 /* tmp &= mxcsr */
10959 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxRegTmp, idxRegTmp2);
10960 off = iemNativeEmitTbExitIfAnyBitsSetInGpr<kIemNativeLabelType_RaiseSseAvxFpRelated>(pReNative, off, idxRegTmp,
10961 X86_MXCSR_XCPT_FLAGS);
10962
10963 iemNativeRegFreeTmp(pReNative, idxRegTmp2);
10964 iemNativeRegFreeTmp(pReNative, idxRegTmp);
10965 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
10966
10967 return off;
10968}
10969
10970
10971#define IEM_MC_CALL_SSE_AIMPL_2(a_pfnAImpl, a0, a1) \
10972 off = iemNativeEmitCallSseAImpl2(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1))
10973
10974/** Emits code for IEM_MC_CALL_SSE_AIMPL_2. */
10975DECL_INLINE_THROW(uint32_t)
10976iemNativeEmitCallSseAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
10977{
10978 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_SSE_AIMPL_HIDDEN_ARGS);
10979 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_SSE_AIMPL_HIDDEN_ARGS);
10980 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 2, idxInstr);
10981}
10982
10983
10984#define IEM_MC_CALL_SSE_AIMPL_3(a_pfnAImpl, a0, a1, a2) \
10985 off = iemNativeEmitCallSseAImpl3(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1), (a2))
10986
10987/** Emits code for IEM_MC_CALL_SSE_AIMPL_3. */
10988DECL_INLINE_THROW(uint32_t)
10989iemNativeEmitCallSseAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl,
10990 uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
10991{
10992 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_SSE_AIMPL_HIDDEN_ARGS);
10993 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_SSE_AIMPL_HIDDEN_ARGS);
10994 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_SSE_AIMPL_HIDDEN_ARGS);
10995 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 3, idxInstr);
10996}
10997
10998
10999/*********************************************************************************************************************************
11000* Emitters for IEM_MC_CALL_AVX_AIMPL_XXX *
11001*********************************************************************************************************************************/
11002
11003#define IEM_MC_CALL_AVX_AIMPL_2(a_pfnAImpl, a0, a1) \
11004 off = iemNativeEmitCallAvxAImpl2(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1))
11005
11006/** Emits code for IEM_MC_CALL_AVX_AIMPL_2. */
11007DECL_INLINE_THROW(uint32_t)
11008iemNativeEmitCallAvxAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
11009{
11010 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_AVX_AIMPL_HIDDEN_ARGS);
11011 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_AVX_AIMPL_HIDDEN_ARGS);
11012 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 2, idxInstr);
11013}
11014
11015
11016#define IEM_MC_CALL_AVX_AIMPL_3(a_pfnAImpl, a0, a1, a2) \
11017 off = iemNativeEmitCallAvxAImpl3(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1), (a2))
11018
11019/** Emits code for IEM_MC_CALL_AVX_AIMPL_3. */
11020DECL_INLINE_THROW(uint32_t)
11021iemNativeEmitCallAvxAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl,
11022 uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
11023{
11024 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_AVX_AIMPL_HIDDEN_ARGS);
11025 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_AVX_AIMPL_HIDDEN_ARGS);
11026 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_AVX_AIMPL_HIDDEN_ARGS);
11027 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 3, idxInstr);
11028}
11029
11030
11031
11032/*********************************************************************************************************************************
11033* Include instruction emitters. *
11034*********************************************************************************************************************************/
11035#include "VMMAll/target-x86/IEMAllN8veEmit-x86.h"
11036
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette