VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompFuncs.h@ 105462

Last change on this file since 105462 was 105445, checked in by vboxsync, 7 months ago

VMM/IEM: Fold IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT() into IEM_MC_CALL_SSE_AIMPL_X()/IEM_MC_CALL_AVX_AIMPL_X(), bugref:10652

The current way raising exceptions doesn't work as the IEM would raise an #XF/#UD if an exception is unmasked and the corresponding
exception status flag is set, even if the current instruction wouldn't generate that exception.
The Intel Architecture manual states that exception flags are sticky and need manual clearing through ldmxcsr/xrstor but an exception
is only generated from an internal set of flags for the current operation. In order to avoid introducing temporary MXCSR values increasing
the overhead for native emitters later on exception status calculation and raising is now done in the IEM_MC_CALL_SSE_AIMPL_X() and
IEM_MC_CALL_AVX_AIMPL_X() IEM microcode statements.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 491.6 KB
Line 
1/* $Id: IEMAllN8veRecompFuncs.h 105445 2024-07-23 12:17:44Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler - Inlined Bits.
4 */
5
6/*
7 * Copyright (C) 2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
33#define IEM_WITH_OPAQUE_DECODER_STATE
34#define VMCPU_INCL_CPUM_GST_CTX
35#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
36#define IEMNATIVE_INCL_TABLE_FUNCTION_PROTOTYPES
37#include <VBox/vmm/iem.h>
38#include <VBox/vmm/cpum.h>
39#include <VBox/vmm/dbgf.h>
40#include "IEMInternal.h"
41#include <VBox/vmm/vmcc.h>
42#include <VBox/log.h>
43#include <VBox/err.h>
44#include <VBox/dis.h>
45#include <VBox/param.h>
46#include <iprt/assert.h>
47#include <iprt/heap.h>
48#include <iprt/mem.h>
49#include <iprt/string.h>
50#if defined(RT_ARCH_AMD64)
51# include <iprt/x86.h>
52#elif defined(RT_ARCH_ARM64)
53# include <iprt/armv8.h>
54#endif
55
56#include "IEMInline.h"
57#include "IEMThreadedFunctions.h"
58#include "IEMN8veRecompiler.h"
59#include "IEMN8veRecompilerEmit.h"
60#include "IEMN8veRecompilerTlbLookup.h"
61#include "IEMNativeFunctions.h"
62
63
64/*
65 * Narrow down configs here to avoid wasting time on unused configs here.
66 * Note! Same checks in IEMAllThrdRecompiler.cpp.
67 */
68
69#ifndef IEM_WITH_CODE_TLB
70# error The code TLB must be enabled for the recompiler.
71#endif
72
73#ifndef IEM_WITH_DATA_TLB
74# error The data TLB must be enabled for the recompiler.
75#endif
76
77#ifndef IEM_WITH_SETJMP
78# error The setjmp approach must be enabled for the recompiler.
79#endif
80
81
82
83/*********************************************************************************************************************************
84* Code emitters for flushing pending guest register writes and sanity checks *
85*********************************************************************************************************************************/
86
87#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
88# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
89DECL_INLINE_THROW(uint32_t) iemNativePcAdjustCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
90{
91 /* Compare the shadow with the context value, they should match. */
92 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, IEMNATIVE_REG_FIXED_PC_DBG);
93 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, pReNative->Core.offPc);
94 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, kIemNativeGstReg_Pc);
95 return off;
96}
97# endif
98#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
99
100/**
101 * Flushes delayed write of a specific guest register.
102 *
103 * This must be called prior to calling CImpl functions and any helpers that use
104 * the guest state (like raising exceptions) and such.
105 *
106 * This optimization has not yet been implemented. The first target would be
107 * RIP updates, since these are the most common ones.
108 */
109DECL_INLINE_THROW(uint32_t)
110iemNativeRegFlushPendingSpecificWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREGREF enmClass, uint8_t idxReg)
111{
112#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
113 /* If for whatever reason it is possible to reference the PC register at some point we need to do the writeback here first. */
114#endif
115
116#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
117#if 0 /** @todo r=aeichner EFLAGS writeback delay. */
118 if ( enmClass == kIemNativeGstRegRef_EFlags
119 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(kIemNativeGstReg_EFlags))
120 off = iemNativeRegFlushPendingWrite(pReNative, off, kIemNativeGstReg_EFlags);
121#else
122 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(kIemNativeGstReg_EFlags)));
123#endif
124
125 if ( enmClass == kIemNativeGstRegRef_Gpr
126 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxReg))
127 off = iemNativeRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTREG_GPR(idxReg));
128#endif
129
130#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
131 if ( enmClass == kIemNativeGstRegRef_XReg
132 && pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxReg))
133 {
134 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxReg));
135 /* Flush the shadows as the register needs to be reloaded (there is no guarantee right now, that the referenced register doesn't change). */
136 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxReg];
137
138 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off);
139 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxReg)));
140 }
141#endif
142 RT_NOREF(pReNative, enmClass, idxReg);
143 return off;
144}
145
146
147
148/*********************************************************************************************************************************
149* Emitters for IEM_MC_BEGIN_EX and IEM_MC_END. *
150*********************************************************************************************************************************/
151
152#undef IEM_MC_BEGIN /* unused */
153#define IEM_MC_BEGIN_EX(a_fMcFlags, a_fCImplFlags, a_cArgsIncludingHidden) \
154 { \
155 Assert(pReNative->Core.bmVars == 0); \
156 Assert(pReNative->Core.u64ArgVars == UINT64_MAX); \
157 Assert(pReNative->Core.bmStack == 0); \
158 pReNative->fMc = (a_fMcFlags); \
159 pReNative->fCImpl = (a_fCImplFlags); \
160 pReNative->cArgsX = (a_cArgsIncludingHidden)
161
162/** We have to get to the end in recompilation mode, as otherwise we won't
163 * generate code for all the IEM_MC_IF_XXX branches. */
164#define IEM_MC_END() \
165 iemNativeVarFreeAll(pReNative); \
166 } return off
167
168
169
170/*********************************************************************************************************************************
171* Native Emitter Support. *
172*********************************************************************************************************************************/
173
174#define IEM_MC_NATIVE_IF(a_fSupportedHosts) if (RT_ARCH_VAL & (a_fSupportedHosts)) {
175
176#define IEM_MC_NATIVE_ELSE() } else {
177
178#define IEM_MC_NATIVE_ENDIF() } ((void)0)
179
180
181#define IEM_MC_NATIVE_EMIT_0(a_fnEmitter) \
182 off = a_fnEmitter(pReNative, off)
183
184#define IEM_MC_NATIVE_EMIT_1(a_fnEmitter, a0) \
185 off = a_fnEmitter(pReNative, off, (a0))
186
187#define IEM_MC_NATIVE_EMIT_2(a_fnEmitter, a0, a1) \
188 off = a_fnEmitter(pReNative, off, (a0), (a1))
189
190#define IEM_MC_NATIVE_EMIT_3(a_fnEmitter, a0, a1, a2) \
191 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2))
192
193#define IEM_MC_NATIVE_EMIT_4(a_fnEmitter, a0, a1, a2, a3) \
194 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3))
195
196#define IEM_MC_NATIVE_EMIT_5(a_fnEmitter, a0, a1, a2, a3, a4) \
197 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4))
198
199#define IEM_MC_NATIVE_EMIT_6(a_fnEmitter, a0, a1, a2, a3, a4, a5) \
200 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5))
201
202#define IEM_MC_NATIVE_EMIT_7(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6) \
203 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6))
204
205#define IEM_MC_NATIVE_EMIT_8(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6, a7) \
206 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6), (a7))
207
208
209#ifndef RT_ARCH_AMD64
210# define IEM_MC_NATIVE_SET_AMD64_HOST_REG_FOR_LOCAL(a_VarNm, a_idxHostReg) ((void)0)
211#else
212/** @note This is a naive approach that ASSUMES that the register isn't
213 * allocated, so it only works safely for the first allocation(s) in
214 * a MC block. */
215# define IEM_MC_NATIVE_SET_AMD64_HOST_REG_FOR_LOCAL(a_VarNm, a_idxHostReg) \
216 off = iemNativeVarSetAmd64HostRegisterForLocal(pReNative, off, a_VarNm, a_idxHostReg)
217
218DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off);
219
220DECL_INLINE_THROW(uint32_t)
221iemNativeVarSetAmd64HostRegisterForLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t idxHstReg)
222{
223 Log12(("iemNativeVarSetAmd64HostRegisterForLocal: idxVar=%#x idxHstReg=%s (%#x) off=%#x\n", idxVar, g_apszIemNativeHstRegNames[idxHstReg], idxHstReg, off));
224 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
225 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg))); /* iemNativeVarRegisterSet does a throw/longjmp on this */
226
227# ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
228 /* Must flush the register if it hold pending writes. */
229 if ( (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
230 && (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows) )
231 off = iemNativeRegFlushDirtyGuest(pReNative, off, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
232# endif
233
234 iemNativeVarRegisterSet(pReNative, idxVar, idxHstReg, off);
235 return off;
236}
237
238#endif /* RT_ARCH_AMD64 */
239
240
241
242/*********************************************************************************************************************************
243* Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX) *
244*********************************************************************************************************************************/
245
246#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl) \
247 pReNative->fMc = 0; \
248 pReNative->fCImpl = (a_fFlags); \
249 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, \
250 a_cbInstr) /** @todo not used ... */
251
252
253#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
254 pReNative->fMc = 0; \
255 pReNative->fCImpl = (a_fFlags); \
256 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
257
258DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off,
259 uint8_t idxInstr, uint64_t a_fGstShwFlush,
260 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
261{
262 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
263}
264
265
266#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
267 pReNative->fMc = 0; \
268 pReNative->fCImpl = (a_fFlags); \
269 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
270 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
271
272DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off,
273 uint8_t idxInstr, uint64_t a_fGstShwFlush,
274 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
275{
276 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
277}
278
279
280#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
281 pReNative->fMc = 0; \
282 pReNative->fCImpl = (a_fFlags); \
283 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
284 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
285
286DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off,
287 uint8_t idxInstr, uint64_t a_fGstShwFlush,
288 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
289 uint64_t uArg2)
290{
291 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
292}
293
294
295
296/*********************************************************************************************************************************
297* Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX) *
298*********************************************************************************************************************************/
299
300/** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
301 * and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
302DECL_INLINE_THROW(uint32_t)
303iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
304{
305 /*
306 * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
307 * return with special status code and make the execution loop deal with
308 * this. If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
309 * exception and won't continue execution. While CPUMCTX_DBG_DBGF_MASK
310 * could continue w/o interruption, it probably will drop into the
311 * debugger, so not worth the effort of trying to services it here and we
312 * just lump it in with the handling of the others.
313 *
314 * To simplify the code and the register state management even more (wrt
315 * immediate in AND operation), we always update the flags and skip the
316 * extra check associated conditional jump.
317 */
318 AssertCompile( (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
319 <= UINT32_MAX);
320#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
321 AssertMsg( pReNative->idxCurCall == 0
322 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1],
323 IEMLIVENESSBIT_IDX_EFL_OTHER)),
324 ("Efl_Other - %u\n", iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1],
325 IEMLIVENESSBIT_IDX_EFL_OTHER)));
326#endif
327
328 /*
329 * As this code can break out of the execution loop when jumping to the ReturnWithFlags label
330 * any pending register writes must be flushed.
331 */
332 off = iemNativeRegFlushPendingWrites(pReNative, off);
333
334 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
335 kIemNativeGstRegUse_ForUpdate, false /*fNoVolatileRegs*/,
336 true /*fSkipLivenessAssert*/);
337 off = iemNativeEmitTestAnyBitsInGprAndTbExitIfAnySet(pReNative, off, idxEflReg,
338 X86_EFL_TF | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK,
339 kIemNativeLabelType_ReturnWithFlags);
340 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
341 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
342
343 /* Free but don't flush the EFLAGS register. */
344 iemNativeRegFreeTmp(pReNative, idxEflReg);
345
346 return off;
347}
348
349
350/** Helper for iemNativeEmitFinishInstructionWithStatus. */
351DECLINLINE(RTGCPHYS) iemNativeCallEntryToGCPhysPc(PCIEMTB pTb, PCIEMTHRDEDCALLENTRY pCallEntry)
352{
353 unsigned const offOpcodes = pCallEntry->offOpcode;
354 unsigned const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
355 for (unsigned idxRange = 0; idxRange < cRanges; idxRange++)
356 {
357 unsigned const offRange = offOpcodes - (unsigned)pTb->aRanges[idxRange].offOpcodes;
358 if (offRange < (unsigned)pTb->aRanges[idxRange].cbOpcodes)
359 return iemTbGetRangePhysPageAddr(pTb, idxRange) + offRange + pTb->aRanges[idxRange].offPhysPage;
360 }
361 AssertFailedReturn(NIL_RTGCPHYS);
362}
363
364
365/** The VINF_SUCCESS dummy. */
366template<int const a_rcNormal, bool const a_fIsJump>
367DECL_FORCE_INLINE_THROW(uint32_t)
368iemNativeEmitFinishInstructionWithStatus(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry,
369 int32_t const offJump)
370{
371 AssertCompile(a_rcNormal == VINF_SUCCESS || a_rcNormal == VINF_IEM_REEXEC_BREAK);
372 if (a_rcNormal != VINF_SUCCESS)
373 {
374#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
375 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
376#else
377 RT_NOREF_PV(pCallEntry);
378#endif
379
380 /* As this code returns from the TB any pending register writes must be flushed. */
381 off = iemNativeRegFlushPendingWrites(pReNative, off);
382
383 /*
384 * Use the lookup table for getting to the next TB quickly.
385 * Note! In this code path there can only be one entry at present.
386 */
387 uint8_t const idxTbLookupFirst = IEM_TB_LOOKUP_TAB_GET_IDX(pCallEntry->uTbLookup);
388 PCIEMTB const pTbOrg = pReNative->pTbOrg;
389 Assert(idxTbLookupFirst < pTbOrg->cTbLookupEntries);
390 Assert(IEM_TB_LOOKUP_TAB_GET_SIZE(pCallEntry->uTbLookup) == 1);
391
392#if 0
393 /* Update IEMCPU::ppTbLookupEntryR3 to get the best lookup effect. */
394 PIEMTB * const ppTbLookupFirst = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTbOrg, idxTbLookupFirst);
395 Assert(IEM_TB_LOOKUP_TAB_GET_SIZE(pCallEntry->uTbLookup) == 1); /* large stuff later/never */
396 off = iemNativeEmitStoreImmToVCpuU64(pReNative, off, (uintptr_t)ppTbLookupFirst,
397 RT_UOFFSETOF(VMCPU, iem.s.ppTbLookupEntryR3));
398
399 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnBreak);
400
401#else
402 /* Load the index as argument #1 for the helper call at the given label. */
403 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxTbLookupFirst);
404
405 /*
406 * Figure out the physical address of the current instruction and see
407 * whether the next instruction we're about to execute is in the same
408 * page so we by can optimistically skip TLB loading.
409 *
410 * - This is safe for all cases in FLAT mode.
411 * - In segmentmented modes it is complicated, given that a negative
412 * jump may underflow EIP and a forward jump may overflow or run into
413 * CS.LIM and triggering a #GP. The only thing we can get away with
414 * now at compile time is forward jumps w/o CS.LIM checks, since the
415 * lack of CS.LIM checks means we're good for the entire physical page
416 * we're executing on and another 15 bytes before we run into CS.LIM.
417 */
418 if ( IEM_F_MODE_X86_IS_FLAT(pReNative->fExec)
419# if 0 /** @todo breaks on IP/EIP/RIP wraparound tests in bs3-cpu-weird-1. See also iemNativeHlpReturnBreakViaLookup. */
420 || !(pTbOrg->fFlags & IEMTB_F_CS_LIM_CHECKS)
421# endif
422 )
423 {
424 RTGCPHYS const GCPhysPcCurrent = iemNativeCallEntryToGCPhysPc(pTbOrg, pCallEntry);
425 RTGCPHYS const GCPhysPcNext = GCPhysPcCurrent + pCallEntry->cbOpcode + (int64_t)(a_fIsJump ? offJump : 0);
426 if ( (GCPhysPcNext >> GUEST_PAGE_SHIFT) == (GCPhysPcCurrent >> GUEST_PAGE_SHIFT)
427 && GUEST_PAGE_SIZE - (GCPhysPcCurrent & GUEST_PAGE_OFFSET_MASK) >= pCallEntry->cbOpcode /* 0xfff: je -56h */ )
428
429 {
430 /* Load the next GCPhysPc into the 3rd argument for the helper call. */
431 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, GCPhysPcNext);
432
433 /* Load the key lookup flags into the 2nd argument for the helper call.
434 - This is safe wrt CS limit checking since we're only here for FLAT modes.
435 - ASSUMING that this isn't a STI or POPF instruction, we can exclude any
436 interrupt shadow.
437 - The NMI inhibiting is more questionable, though... */
438 /** @todo We don't implement NMI blocking atm, except via VT-x/AMD-V.
439 * Should we copy it into fExec to simplify this? OTOH, it's just a
440 * couple of extra instructions if EFLAGS are already in a register. */
441 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG,
442 (pReNative->fExec & IEMTB_F_KEY_MASK) | IEMTB_F_TYPE_NATIVE);
443
444 if (pReNative->idxLastCheckIrqCallNo != UINT32_MAX)
445 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnBreakViaLookup);
446 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnBreakViaLookupWithIrq);
447 }
448 }
449 if (pReNative->idxLastCheckIrqCallNo != UINT32_MAX)
450 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnBreakViaLookupWithTlb);
451 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnBreakViaLookupWithTlbAndIrq);
452#endif
453 }
454 return off;
455}
456
457
458#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr, a_rcNormal) \
459 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
460 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
461
462#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr, a_rcNormal) \
463 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
464 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
465 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
466
467/** Same as iemRegAddToRip64AndFinishingNoFlags. */
468DECL_INLINE_THROW(uint32_t)
469iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
470{
471#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
472# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
473 if (!pReNative->Core.offPc)
474 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
475# endif
476
477 /* Allocate a temporary PC register. */
478 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
479
480 /* Perform the addition and store the result. */
481 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
482 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
483
484 /* Free but don't flush the PC register. */
485 iemNativeRegFreeTmp(pReNative, idxPcReg);
486#endif
487
488#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
489 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
490
491 pReNative->Core.offPc += cbInstr;
492# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
493 off = iemNativePcAdjustCheck(pReNative, off);
494# endif
495 if (pReNative->cCondDepth)
496 off = iemNativeEmitPcWriteback(pReNative, off);
497 else
498 pReNative->Core.cInstrPcUpdateSkipped++;
499#endif
500
501 return off;
502}
503
504
505#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr, a_rcNormal) \
506 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
507 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
508
509#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr, a_rcNormal) \
510 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
511 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
512 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
513
514/** Same as iemRegAddToEip32AndFinishingNoFlags. */
515DECL_INLINE_THROW(uint32_t)
516iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
517{
518#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
519# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
520 if (!pReNative->Core.offPc)
521 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
522# endif
523
524 /* Allocate a temporary PC register. */
525 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
526
527 /* Perform the addition and store the result. */
528 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
529 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
530
531 /* Free but don't flush the PC register. */
532 iemNativeRegFreeTmp(pReNative, idxPcReg);
533#endif
534
535#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
536 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
537
538 pReNative->Core.offPc += cbInstr;
539# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
540 off = iemNativePcAdjustCheck(pReNative, off);
541# endif
542 if (pReNative->cCondDepth)
543 off = iemNativeEmitPcWriteback(pReNative, off);
544 else
545 pReNative->Core.cInstrPcUpdateSkipped++;
546#endif
547
548 return off;
549}
550
551
552#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr, a_rcNormal) \
553 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
554 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
555
556#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr, a_rcNormal) \
557 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
558 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
559 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
560
561/** Same as iemRegAddToIp16AndFinishingNoFlags. */
562DECL_INLINE_THROW(uint32_t)
563iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
564{
565#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
566# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
567 if (!pReNative->Core.offPc)
568 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
569# endif
570
571 /* Allocate a temporary PC register. */
572 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
573
574 /* Perform the addition and store the result. */
575 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
576 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
577 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
578
579 /* Free but don't flush the PC register. */
580 iemNativeRegFreeTmp(pReNative, idxPcReg);
581#endif
582
583#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
584 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
585
586 pReNative->Core.offPc += cbInstr;
587# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
588 off = iemNativePcAdjustCheck(pReNative, off);
589# endif
590 if (pReNative->cCondDepth)
591 off = iemNativeEmitPcWriteback(pReNative, off);
592 else
593 pReNative->Core.cInstrPcUpdateSkipped++;
594#endif
595
596 return off;
597}
598
599
600
601/*********************************************************************************************************************************
602* Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX). *
603*********************************************************************************************************************************/
604
605#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
606 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
607 (a_enmEffOpSize), pCallEntry->idxInstr); \
608 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
609
610#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
611 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
612 (a_enmEffOpSize), pCallEntry->idxInstr); \
613 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
614 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
615
616#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr, a_rcNormal) \
617 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
618 IEMMODE_16BIT, pCallEntry->idxInstr); \
619 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
620
621#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
622 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
623 IEMMODE_16BIT, pCallEntry->idxInstr); \
624 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
625 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
626
627#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr, a_rcNormal) \
628 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
629 IEMMODE_64BIT, pCallEntry->idxInstr); \
630 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
631
632#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
633 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
634 IEMMODE_64BIT, pCallEntry->idxInstr); \
635 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
636 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
637
638/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
639 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
640 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
641DECL_INLINE_THROW(uint32_t)
642iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
643 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
644{
645 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
646
647 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
648 off = iemNativeRegFlushPendingWrites(pReNative, off);
649
650#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
651 Assert(pReNative->Core.offPc == 0);
652
653 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
654#endif
655
656 /* Allocate a temporary PC register. */
657 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
658
659 /* Perform the addition. */
660 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr);
661
662 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
663 {
664 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
665 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
666 }
667 else
668 {
669 /* Just truncate the result to 16-bit IP. */
670 Assert(enmEffOpSize == IEMMODE_16BIT);
671 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
672 }
673 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
674
675 /* Free but don't flush the PC register. */
676 iemNativeRegFreeTmp(pReNative, idxPcReg);
677
678 return off;
679}
680
681
682#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
683 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
684 (a_enmEffOpSize), pCallEntry->idxInstr); \
685 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
686
687#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
688 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
689 (a_enmEffOpSize), pCallEntry->idxInstr); \
690 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
691 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
692
693#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr, a_rcNormal) \
694 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
695 IEMMODE_16BIT, pCallEntry->idxInstr); \
696 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
697
698#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
699 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
700 IEMMODE_16BIT, pCallEntry->idxInstr); \
701 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
702 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
703
704#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr, a_rcNormal) \
705 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
706 IEMMODE_32BIT, pCallEntry->idxInstr); \
707 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
708
709#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
710 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
711 IEMMODE_32BIT, pCallEntry->idxInstr); \
712 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
713 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
714
715/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
716 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
717 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
718DECL_INLINE_THROW(uint32_t)
719iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
720 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
721{
722 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
723
724 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
725 off = iemNativeRegFlushPendingWrites(pReNative, off);
726
727#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
728 Assert(pReNative->Core.offPc == 0);
729
730 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
731#endif
732
733 /* Allocate a temporary PC register. */
734 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
735
736 /* Perform the addition. */
737 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
738
739 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
740 if (enmEffOpSize == IEMMODE_16BIT)
741 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
742
743 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
744/** @todo we can skip this in 32-bit FLAT mode. */
745 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
746
747 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
748
749 /* Free but don't flush the PC register. */
750 iemNativeRegFreeTmp(pReNative, idxPcReg);
751
752 return off;
753}
754
755
756#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr, a_rcNormal) \
757 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
758 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
759
760#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr, a_rcNormal) \
761 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
762 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
763 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
764
765#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr, a_rcNormal) \
766 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
767 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
768
769#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
770 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
771 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
772 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
773
774#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr, a_rcNormal) \
775 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
776 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, a_i32)
777
778#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
779 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
780 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
781 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, a_i32)
782
783/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
784DECL_INLINE_THROW(uint32_t)
785iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
786 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
787{
788 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
789 off = iemNativeRegFlushPendingWrites(pReNative, off);
790
791#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
792 Assert(pReNative->Core.offPc == 0);
793
794 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
795#endif
796
797 /* Allocate a temporary PC register. */
798 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
799
800 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
801 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
802 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
803 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
804 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
805
806 /* Free but don't flush the PC register. */
807 iemNativeRegFreeTmp(pReNative, idxPcReg);
808
809 return off;
810}
811
812
813
814/*********************************************************************************************************************************
815* Emitters for changing PC/RIP/EIP/IP with a indirect jump (IEM_MC_SET_RIP_UXX_AND_FINISH). *
816*********************************************************************************************************************************/
817
818/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets. */
819#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP) \
820 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
821
822/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets. */
823#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP) \
824 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
825
826/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code. */
827#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP) \
828 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
829
830/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets that checks and
831 * clears flags. */
832#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP) \
833 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP); \
834 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
835
836/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets that checks and
837 * clears flags. */
838#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP) \
839 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP); \
840 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
841
842/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code that checks and
843 * clears flags. */
844#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16NewIP) \
845 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP); \
846 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
847
848#undef IEM_MC_SET_RIP_U16_AND_FINISH
849
850
851/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets. */
852#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP) \
853 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
854
855/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code. */
856#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP) \
857 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
858
859/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets that checks and
860 * clears flags. */
861#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP) \
862 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP); \
863 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
864
865/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code that checks
866 * and clears flags. */
867#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u32NewEIP) \
868 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP); \
869 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
870
871#undef IEM_MC_SET_RIP_U32_AND_FINISH
872
873
874/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code. */
875#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP) \
876 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u64NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
877
878/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code that checks
879 * and clears flags. */
880#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewEIP) \
881 IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP); \
882 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
883
884#undef IEM_MC_SET_RIP_U64_AND_FINISH
885
886
887/** Same as iemRegRipJumpU16AndFinishNoFlags,
888 * iemRegRipJumpU32AndFinishNoFlags and iemRegRipJumpU64AndFinishNoFlags. */
889DECL_INLINE_THROW(uint32_t)
890iemNativeEmitRipJumpNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarPc, bool f64Bit,
891 uint8_t idxInstr, uint8_t cbVar)
892{
893 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
894 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
895
896 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
897 off = iemNativeRegFlushPendingWrites(pReNative, off);
898
899#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
900 Assert(pReNative->Core.offPc == 0);
901
902 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
903#endif
904
905 /* Get a register with the new PC loaded from idxVarPc.
906 Note! This ASSUMES that the high bits of the GPR is zeroed. */
907 uint8_t const idxPcReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxVarPc, kIemNativeGstReg_Pc, &off);
908
909 /* Check limit (may #GP(0) + exit TB). */
910 if (!f64Bit)
911/** @todo we can skip this test in FLAT 32-bit mode. */
912 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
913 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
914 else if (cbVar > sizeof(uint32_t))
915 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
916
917 /* Store the result. */
918 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
919
920 iemNativeVarRegisterRelease(pReNative, idxVarPc);
921 /** @todo implictly free the variable? */
922
923 return off;
924}
925
926
927
928/*********************************************************************************************************************************
929* Emitters for changing PC/RIP/EIP/IP with a relative call jump (IEM_MC_IND_CALL_UXX_AND_FINISH) (requires stack emmiters). *
930*********************************************************************************************************************************/
931
932/** @todo These helpers belong to the stack push API naturally but we already need them up here (we could of course move
933 * this below the stack emitters but then this is not close to the rest of the PC/RIP handling...). */
934DECL_FORCE_INLINE_THROW(uint32_t)
935iemNativeEmitStackPushUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
936{
937 /* Use16BitSp: */
938#ifdef RT_ARCH_AMD64
939 off = iemNativeEmitSubGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
940 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
941#else
942 /* sub regeff, regrsp, #cbMem */
943 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegEffSp, idxRegRsp, cbMem, false /*f64Bit*/);
944 /* and regeff, regeff, #0xffff */
945 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
946 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegEffSp, idxRegEffSp, 15, 0, false /*f64Bit*/);
947 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
948 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegEffSp, 0, 16, false /*f64Bit*/);
949#endif
950 return off;
951}
952
953
954DECL_FORCE_INLINE(uint32_t)
955iemNativeEmitStackPushUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
956{
957 /* Use32BitSp: */
958 off = iemNativeEmitSubGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
959 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
960 return off;
961}
962
963
964DECL_INLINE_THROW(uint32_t)
965iemNativeEmitStackPushRip(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const idxRegPc,
966 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
967{
968 /*
969 * Assert sanity.
970 */
971#ifdef VBOX_STRICT
972 if (RT_BYTE2(cBitsVarAndFlat) != 0)
973 {
974 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
975 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
976 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
977 Assert( pfnFunction
978 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
979 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
980 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
981 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
982 : UINT64_C(0xc000b000a0009000) ));
983 }
984 else
985 Assert( pfnFunction
986 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
987 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
988 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
989 : UINT64_C(0xc000b000a0009000) ));
990#endif
991
992#ifdef VBOX_STRICT
993 /*
994 * Check that the fExec flags we've got make sense.
995 */
996 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
997#endif
998
999 /*
1000 * To keep things simple we have to commit any pending writes first as we
1001 * may end up making calls.
1002 */
1003 /** @todo we could postpone this till we make the call and reload the
1004 * registers after returning from the call. Not sure if that's sensible or
1005 * not, though. */
1006 off = iemNativeRegFlushPendingWrites(pReNative, off);
1007
1008 /*
1009 * First we calculate the new RSP and the effective stack pointer value.
1010 * For 64-bit mode and flat 32-bit these two are the same.
1011 * (Code structure is very similar to that of PUSH)
1012 */
1013 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
1014 bool const fIsSegReg = RT_BYTE3(cBitsVarAndFlat) != 0;
1015 bool const fIsIntelSeg = fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
1016 uint8_t const cbMemAccess = !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
1017 ? cbMem : sizeof(uint16_t);
1018 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
1019 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
1020 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
1021 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
1022 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
1023 if (cBitsFlat != 0)
1024 {
1025 Assert(idxRegEffSp == idxRegRsp);
1026 Assert(cBitsFlat == 32 || cBitsFlat == 64);
1027 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
1028 if (cBitsFlat == 64)
1029 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
1030 else
1031 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
1032 }
1033 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
1034 {
1035 Assert(idxRegEffSp != idxRegRsp);
1036 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
1037 kIemNativeGstRegUse_ReadOnly);
1038#ifdef RT_ARCH_AMD64
1039 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1040#else
1041 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1042#endif
1043 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
1044 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
1045 offFixupJumpToUseOtherBitSp = off;
1046 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
1047 {
1048 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
1049 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1050 }
1051 else
1052 {
1053 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
1054 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1055 }
1056 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1057 }
1058 /* SpUpdateEnd: */
1059 uint32_t const offLabelSpUpdateEnd = off;
1060
1061 /*
1062 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
1063 * we're skipping lookup).
1064 */
1065 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
1066 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
1067 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
1068 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
1069 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
1070 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
1071 : UINT32_MAX;
1072 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
1073
1074
1075 if (!TlbState.fSkip)
1076 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
1077 else
1078 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
1079
1080 /*
1081 * Use16BitSp:
1082 */
1083 if (cBitsFlat == 0)
1084 {
1085#ifdef RT_ARCH_AMD64
1086 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1087#else
1088 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1089#endif
1090 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
1091 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
1092 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1093 else
1094 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1095 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
1096 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1097 }
1098
1099 /*
1100 * TlbMiss:
1101 *
1102 * Call helper to do the pushing.
1103 */
1104 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
1105
1106#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1107 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1108#else
1109 RT_NOREF(idxInstr);
1110#endif
1111
1112 /* Save variables in volatile registers. */
1113 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
1114 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
1115 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
1116 | (RT_BIT_32(idxRegPc));
1117 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
1118
1119 if ( idxRegPc == IEMNATIVE_CALL_ARG1_GREG
1120 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
1121 {
1122 /* Swap them using ARG0 as temp register: */
1123 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
1124 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
1125 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
1126 }
1127 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
1128 {
1129 /* IEMNATIVE_CALL_ARG2_GREG = idxRegPc (first!) */
1130 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxRegPc);
1131
1132 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
1133 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
1134 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
1135 }
1136 else
1137 {
1138 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
1139 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
1140
1141 /* IEMNATIVE_CALL_ARG2_GREG = idxRegPc */
1142 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxRegPc);
1143 }
1144
1145 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
1146 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
1147
1148 /* Done setting up parameters, make the call. */
1149 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
1150
1151 /* Restore variables and guest shadow registers to volatile registers. */
1152 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
1153 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
1154
1155#ifdef IEMNATIVE_WITH_TLB_LOOKUP
1156 if (!TlbState.fSkip)
1157 {
1158 /* end of TlbMiss - Jump to the done label. */
1159 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
1160 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
1161
1162 /*
1163 * TlbLookup:
1164 */
1165 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1,
1166 IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
1167
1168 /*
1169 * Emit code to do the actual storing / fetching.
1170 */
1171 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
1172# ifdef IEM_WITH_TLB_STATISTICS
1173 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
1174 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
1175# endif
1176 switch (cbMemAccess)
1177 {
1178 case 2:
1179 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1180 break;
1181 case 4:
1182 if (!fIsIntelSeg)
1183 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1184 else
1185 {
1186 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
1187 PUSH FS in real mode, so we have to try emulate that here.
1188 We borrow the now unused idxReg1 from the TLB lookup code here. */
1189 uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
1190 kIemNativeGstReg_EFlags);
1191 if (idxRegEfl != UINT8_MAX)
1192 {
1193#ifdef ARCH_AMD64
1194 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
1195 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
1196 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
1197#else
1198 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
1199 off, TlbState.idxReg1, idxRegEfl,
1200 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
1201#endif
1202 iemNativeRegFreeTmp(pReNative, idxRegEfl);
1203 }
1204 else
1205 {
1206 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
1207 RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
1208 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
1209 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
1210 }
1211 /* ASSUMES the upper half of idxRegPc is ZERO. */
1212 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegPc);
1213 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
1214 }
1215 break;
1216 case 8:
1217 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1218 break;
1219 default:
1220 AssertFailed();
1221 }
1222
1223 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
1224 TlbState.freeRegsAndReleaseVars(pReNative);
1225
1226 /*
1227 * TlbDone:
1228 *
1229 * Commit the new RSP value.
1230 */
1231 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
1232 }
1233#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
1234
1235#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
1236 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
1237#endif
1238 iemNativeRegFreeTmp(pReNative, idxRegRsp);
1239 if (idxRegEffSp != idxRegRsp)
1240 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
1241
1242 return off;
1243}
1244
1245
1246/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for pre-386 targets. */
1247#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16(a_u16NewIP, a_cbInstr) \
1248 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1249
1250/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for pre-386 targets that checks and
1251 * clears flags. */
1252#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP, a_cbInstr) \
1253 IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16(a_u16NewIP, a_cbInstr); \
1254 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1255
1256/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for 386+ targets. */
1257#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32(a_u16NewIP, a_cbInstr) \
1258 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1259
1260/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for 386+ targets that checks and
1261 * clears flags. */
1262#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP, a_cbInstr) \
1263 IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32(a_u16NewIP, a_cbInstr); \
1264 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1265
1266#undef IEM_MC_IND_CALL_U16_AND_FINISH
1267
1268
1269/** Variant of IEM_MC_IND_CALL_U32_AND_FINISH for 386+ targets. */
1270#define IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP, a_cbInstr) \
1271 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
1272
1273/** Variant of IEM_MC_IND_CALL_U32_AND_FINISH for 386+ targets that checks and
1274 * clears flags. */
1275#define IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP, a_cbInstr) \
1276 IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP, a_cbInstr); \
1277 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1278
1279#undef IEM_MC_IND_CALL_U32_AND_FINISH
1280
1281
1282/** Variant of IEM_MC_IND_CALL_U64_AND_FINISH with instruction length as
1283 * an extra parameter, for use in 64-bit code. */
1284#define IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64(a_u64NewIP, a_cbInstr) \
1285 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u64NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
1286
1287
1288/** Variant of IEM_MC_IND_CALL_U64_AND_FINISH with instruction length as
1289 * an extra parameter, for use in 64-bit code and we need to check and clear
1290 * flags. */
1291#define IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewIP, a_cbInstr) \
1292 IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64(a_u64NewIP, a_cbInstr); \
1293 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1294
1295#undef IEM_MC_IND_CALL_U64_AND_FINISH
1296
1297/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
1298 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
1299DECL_INLINE_THROW(uint32_t)
1300iemNativeEmitRipIndirectCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxVarPc, bool f64Bit,
1301 uint8_t idxInstr, uint8_t cbVar)
1302{
1303 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
1304 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
1305
1306 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1307 off = iemNativeRegFlushPendingWrites(pReNative, off);
1308
1309#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1310 Assert(pReNative->Core.offPc == 0);
1311
1312 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1313#endif
1314
1315 /* Get a register with the new PC loaded from idxVarPc.
1316 Note! This ASSUMES that the high bits of the GPR is zeroed. */
1317 uint8_t const idxNewPcReg = iemNativeVarRegisterAcquire(pReNative, idxVarPc, &off);
1318
1319 /* Check limit (may #GP(0) + exit TB). */
1320 if (!f64Bit)
1321/** @todo we can skip this test in FLAT 32-bit mode. */
1322 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxNewPcReg, idxInstr);
1323 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
1324 else if (cbVar > sizeof(uint32_t))
1325 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxNewPcReg, idxInstr);
1326
1327#if 1
1328 /* Allocate a temporary PC register, we don't want it shadowed. */
1329 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1330 kIemNativeGstRegUse_Calculation, true /*fNoVolatileRegs*/);
1331#else
1332 /* Allocate a temporary PC register. */
1333 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate,
1334 true /*fNoVolatileRegs*/);
1335#endif
1336
1337 /* Perform the addition and push the variable to the guest stack. */
1338 /** @todo Flat variants for PC32 variants. */
1339 switch (cbVar)
1340 {
1341 case sizeof(uint16_t):
1342 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
1343 /* Truncate the result to 16-bit IP. */
1344 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
1345 off = iemNativeEmitStackPushRip(pReNative, off, idxPcReg, RT_MAKE_U32_FROM_U8(16, 0, 0, 0),
1346 (uintptr_t)iemNativeHlpStackStoreU16, idxInstr);
1347 break;
1348 case sizeof(uint32_t):
1349 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
1350 /** @todo In FLAT mode we can use the flat variant. */
1351 off = iemNativeEmitStackPushRip(pReNative, off, idxPcReg, RT_MAKE_U32_FROM_U8(32, 0, 0, 0),
1352 (uintptr_t)iemNativeHlpStackStoreU32, idxInstr);
1353 break;
1354 case sizeof(uint64_t):
1355 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
1356 off = iemNativeEmitStackPushRip(pReNative, off, idxPcReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0),
1357 (uintptr_t)iemNativeHlpStackFlatStoreU64, idxInstr);
1358 break;
1359 default:
1360 AssertFailed();
1361 }
1362
1363 /* RSP got changed, so do this again. */
1364 off = iemNativeRegFlushPendingWrites(pReNative, off);
1365
1366 /* Store the result. */
1367 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxNewPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1368
1369#if 1
1370 /* Need to transfer the shadow information to the new RIP register. */
1371 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxNewPcReg, kIemNativeGstReg_Pc, off);
1372#else
1373 /* Sync the new PC. */
1374 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxPcReg, idxNewPcReg);
1375#endif
1376 iemNativeVarRegisterRelease(pReNative, idxVarPc);
1377 iemNativeRegFreeTmp(pReNative, idxPcReg);
1378 /** @todo implictly free the variable? */
1379
1380 return off;
1381}
1382
1383
1384/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1385 * an extra parameter, for use in 16-bit code on a pre-386 CPU. */
1386#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr) \
1387 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
1388
1389/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1390 * an extra parameter, for use in 16-bit code on a pre-386 CPU and we need to check and clear
1391 * flags. */
1392#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr) \
1393 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr); \
1394 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1395
1396/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1397 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
1398#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr) \
1399 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
1400
1401/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1402 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
1403 * flags. */
1404#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr) \
1405 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr); \
1406 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1407
1408/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1409 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
1410#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr) \
1411 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
1412
1413/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1414 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
1415 * flags. */
1416#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr) \
1417 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr); \
1418 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1419
1420#undef IEM_MC_REL_CALL_S16_AND_FINISH
1421
1422/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
1423 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
1424DECL_INLINE_THROW(uint32_t)
1425iemNativeEmitRipRelativeCallS16NoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int16_t offDisp,
1426 uint8_t idxInstr)
1427{
1428 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1429 off = iemNativeRegFlushPendingWrites(pReNative, off);
1430
1431#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1432 Assert(pReNative->Core.offPc == 0);
1433
1434 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1435#endif
1436
1437 /* Allocate a temporary PC register. */
1438 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1439 kIemNativeGstRegUse_Calculation, true /*fNoVolatileRegs*/);
1440 uint8_t const idxPcRegNew = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
1441
1442 /* Calculate the new RIP. */
1443 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcRegOld, cbInstr);
1444 /* Truncate the result to 16-bit IP. */
1445 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcRegOld);
1446 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxPcRegNew, idxPcRegOld);
1447 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcRegNew, offDisp);
1448
1449 /* Truncate the result to 16-bit IP. */
1450 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcRegNew);
1451
1452 /* Check limit (may #GP(0) + exit TB). */
1453 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
1454
1455 /* Perform the addition and push the variable to the guest stack. */
1456 off = iemNativeEmitStackPushRip(pReNative, off, idxPcRegOld, RT_MAKE_U32_FROM_U8(16, 0, 0, 0),
1457 (uintptr_t)iemNativeHlpStackStoreU16, idxInstr);
1458
1459 /* RSP got changed, so flush again. */
1460 off = iemNativeRegFlushPendingWrites(pReNative, off);
1461
1462 /* Store the result. */
1463 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1464
1465 /* Need to transfer the shadow information to the new RIP register. */
1466 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
1467 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
1468 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
1469
1470 return off;
1471}
1472
1473
1474/** Variant of IEM_MC_REL_CALL_S32_AND_FINISH with instruction length as
1475 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
1476#define IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr) \
1477 off = iemNativeEmitEip32RelativeCallNoFlags(pReNative, off, a_cbInstr, (a_i32), pCallEntry->idxInstr)
1478
1479/** Variant of IEM_MC_REL_CALL_S32_AND_FINISH with instruction length as
1480 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
1481 * flags. */
1482#define IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr) \
1483 IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr); \
1484 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1485
1486#undef IEM_MC_REL_CALL_S32_AND_FINISH
1487
1488/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
1489 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
1490DECL_INLINE_THROW(uint32_t)
1491iemNativeEmitEip32RelativeCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int32_t offDisp,
1492 uint8_t idxInstr)
1493{
1494 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1495 off = iemNativeRegFlushPendingWrites(pReNative, off);
1496
1497#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1498 Assert(pReNative->Core.offPc == 0);
1499
1500 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1501#endif
1502
1503 /* Allocate a temporary PC register. */
1504 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1505 kIemNativeGstRegUse_ReadOnly, true /*fNoVolatileRegs*/);
1506 uint8_t const idxPcRegNew = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
1507
1508 /* Update the EIP to get the return address. */
1509 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcRegOld, cbInstr);
1510
1511 /* Load address, add the displacement and check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
1512 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxPcRegNew, idxPcRegOld);
1513 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcRegNew, offDisp);
1514 /** @todo we can skip this test in FLAT 32-bit mode. */
1515 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
1516
1517 /* Perform Perform the return address to the guest stack. */
1518 /** @todo Can avoid the stack limit checks in FLAT 32-bit mode. */
1519 off = iemNativeEmitStackPushRip(pReNative, off, idxPcRegOld, RT_MAKE_U32_FROM_U8(32, 0, 0, 0),
1520 (uintptr_t)iemNativeHlpStackStoreU32, idxInstr);
1521
1522 /* RSP got changed, so do this again. */
1523 off = iemNativeRegFlushPendingWrites(pReNative, off);
1524
1525 /* Store the result. */
1526 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1527
1528 /* Need to transfer the shadow information to the new RIP register. */
1529 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
1530 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
1531 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
1532
1533 return off;
1534}
1535
1536
1537/** Variant of IEM_MC_REL_CALL_S64_AND_FINISH with instruction length as
1538 * an extra parameter, for use in 64-bit code. */
1539#define IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64(a_i64, a_cbInstr) \
1540 off = iemNativeEmitRip64RelativeCallNoFlags(pReNative, off, a_cbInstr, (a_i64), pCallEntry->idxInstr)
1541
1542/** Variant of IEM_MC_REL_CALL_S64_AND_FINISH with instruction length as
1543 * an extra parameter, for use in 64-bit code and we need to check and clear
1544 * flags. */
1545#define IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i64, a_cbInstr) \
1546 IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64(a_i64, a_cbInstr); \
1547 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1548
1549#undef IEM_MC_REL_CALL_S64_AND_FINISH
1550
1551/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
1552 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
1553DECL_INLINE_THROW(uint32_t)
1554iemNativeEmitRip64RelativeCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int64_t offDisp,
1555 uint8_t idxInstr)
1556{
1557 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1558 off = iemNativeRegFlushPendingWrites(pReNative, off);
1559
1560#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1561 Assert(pReNative->Core.offPc == 0);
1562
1563 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1564#endif
1565
1566 /* Allocate a temporary PC register. */
1567 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1568 kIemNativeGstRegUse_ReadOnly, true /*fNoVolatileRegs*/);
1569 uint8_t const idxPcRegNew = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
1570
1571 /* Update the RIP to get the return address. */
1572 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcRegOld, cbInstr);
1573
1574 /* Load address, add the displacement and check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
1575 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxPcRegNew, idxPcRegOld);
1576 off = iemNativeEmitAddGprImm(pReNative, off, idxPcRegNew, offDisp);
1577 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
1578
1579 /* Perform Perform the return address to the guest stack. */
1580 off = iemNativeEmitStackPushRip(pReNative, off, idxPcRegOld, RT_MAKE_U32_FROM_U8(64, 64, 0, 0),
1581 (uintptr_t)iemNativeHlpStackFlatStoreU64, idxInstr);
1582
1583 /* RSP got changed, so do this again. */
1584 off = iemNativeRegFlushPendingWrites(pReNative, off);
1585
1586 /* Store the result. */
1587 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1588
1589 /* Need to transfer the shadow information to the new RIP register. */
1590 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
1591 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
1592 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
1593
1594 return off;
1595}
1596
1597
1598/*********************************************************************************************************************************
1599* Emitters for changing PC/RIP/EIP/IP with a RETN (Iw) instruction (IEM_MC_RETN_AND_FINISH) (requires stack emmiters). *
1600*********************************************************************************************************************************/
1601
1602DECL_FORCE_INLINE_THROW(uint32_t)
1603iemNativeEmitStackPopForRetnUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
1604 uint16_t cbPopAdd, uint8_t idxRegTmp)
1605{
1606 /* Use16BitSp: */
1607#ifdef RT_ARCH_AMD64
1608 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
1609 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
1610 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbPopAdd); /* ASSUMES this does NOT modify bits [63:16]! */
1611 RT_NOREF(idxRegTmp);
1612#elif defined(RT_ARCH_ARM64)
1613 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
1614 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
1615 /* add tmp, regrsp, #cbMem */
1616 uint16_t const cbCombined = cbMem + cbPopAdd;
1617 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbCombined & (RT_BIT_32(12) - 1U), false /*f64Bit*/);
1618 if (cbCombined >= RT_BIT_32(12))
1619 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegTmp, cbCombined >> 12,
1620 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift12*/);
1621 /* and tmp, tmp, #0xffff */
1622 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
1623 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
1624 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
1625 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
1626#else
1627# error "Port me"
1628#endif
1629 return off;
1630}
1631
1632
1633DECL_FORCE_INLINE_THROW(uint32_t)
1634iemNativeEmitStackPopForRetnUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
1635 uint16_t cbPopAdd)
1636{
1637 /* Use32BitSp: */
1638 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
1639 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem + cbPopAdd);
1640 return off;
1641}
1642
1643
1644/** Variant of IEM_MC_RETN_AND_FINISH for pre-386 targets. */
1645#define IEM_MC_RETN_AND_FINISH_THREADED_PC16(a_u16Pop, a_cbInstr) \
1646 off = iemNativeEmitRetn(pReNative, off, (a_cbInstr), (a_u16Pop), false /*f64Bit*/, IEMMODE_16BIT, pCallEntry->idxInstr)
1647
1648/** Variant of IEM_MC_RETN_AND_FINISH for 386+ targets. */
1649#define IEM_MC_RETN_AND_FINISH_THREADED_PC32(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
1650 off = iemNativeEmitRetn(pReNative, off, (a_cbInstr), (a_u16Pop), false /*f64Bit*/, (a_enmEffOpSize), pCallEntry->idxInstr)
1651
1652/** Variant of IEM_MC_RETN_AND_FINISH for use in 64-bit code. */
1653#define IEM_MC_RETN_AND_FINISH_THREADED_PC64(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
1654 off = iemNativeEmitRetn(pReNative, off, (a_cbInstr), (a_u16Pop), true /*f64Bit*/, (a_enmEffOpSize), pCallEntry->idxInstr)
1655
1656/** Variant of IEM_MC_RETN_AND_FINISH for pre-386 targets that checks and
1657 * clears flags. */
1658#define IEM_MC_RETN_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16Pop, a_cbInstr) \
1659 IEM_MC_RETN_AND_FINISH_THREADED_PC16(a_u16Pop, a_cbInstr); \
1660 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1661
1662/** Variant of IEM_MC_RETN_AND_FINISH for 386+ targets that checks and
1663 * clears flags. */
1664#define IEM_MC_RETN_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
1665 IEM_MC_RETN_AND_FINISH_THREADED_PC32(a_u16Pop, a_cbInstr, a_enmEffOpSize); \
1666 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1667
1668/** Variant of IEM_MC_RETN_AND_FINISH for use in 64-bit code that checks and
1669 * clears flags. */
1670#define IEM_MC_RETN_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
1671 IEM_MC_RETN_AND_FINISH_THREADED_PC64(a_u16Pop, a_cbInstr, a_enmEffOpSize); \
1672 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1673
1674/** IEM_MC[|_FLAT32|_FLAT64]_RETN_AND_FINISH */
1675DECL_INLINE_THROW(uint32_t)
1676iemNativeEmitRetn(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint16_t cbPop, bool f64Bit,
1677 IEMMODE enmEffOpSize, uint8_t idxInstr)
1678{
1679 RT_NOREF(cbInstr);
1680
1681#ifdef VBOX_STRICT
1682 /*
1683 * Check that the fExec flags we've got make sense.
1684 */
1685 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
1686#endif
1687
1688 /*
1689 * To keep things simple we have to commit any pending writes first as we
1690 * may end up making calls.
1691 */
1692 off = iemNativeRegFlushPendingWrites(pReNative, off);
1693
1694 /*
1695 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
1696 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
1697 * directly as the effective stack pointer.
1698 * (Code structure is very similar to that of PUSH)
1699 *
1700 * Note! As a simplification, we treat opsize overridden returns (o16 ret)
1701 * in FLAT 32-bit mode as if we weren't in FLAT mode since these
1702 * aren't commonly used (or useful) and thus not in need of optimizing.
1703 *
1704 * Note! For non flat modes the guest RSP is not allocated for update but rather for calculation
1705 * as the shadowed register would remain modified even if the return address throws a \#GP(0)
1706 * due to being outside the CS limit causing a wrong stack pointer value in the guest (see
1707 * the near return testcase in bs3-cpu-basic-2). If no exception is thrown the shadowing is transfered
1708 * to the new register returned by iemNativeRegAllocTmpForGuestReg() at the end.
1709 */
1710 uint8_t const cbMem = enmEffOpSize == IEMMODE_64BIT
1711 ? sizeof(uint64_t)
1712 : enmEffOpSize == IEMMODE_32BIT
1713 ? sizeof(uint32_t)
1714 : sizeof(uint16_t);
1715 bool const fFlat = IEM_F_MODE_X86_IS_FLAT(pReNative->fExec) && enmEffOpSize != IEMMODE_16BIT; /* see note */
1716 uintptr_t const pfnFunction = fFlat
1717 ? enmEffOpSize == IEMMODE_64BIT
1718 ? (uintptr_t)iemNativeHlpStackFlatFetchU64
1719 : (uintptr_t)iemNativeHlpStackFlatFetchU32
1720 : enmEffOpSize == IEMMODE_32BIT
1721 ? (uintptr_t)iemNativeHlpStackFetchU32
1722 : (uintptr_t)iemNativeHlpStackFetchU16;
1723 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
1724 fFlat ? kIemNativeGstRegUse_ForUpdate
1725 : kIemNativeGstRegUse_Calculation,
1726 true /*fNoVolatileRegs*/);
1727 uint8_t const idxRegEffSp = fFlat ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
1728 /** @todo can do a better job picking the register here. For cbMem >= 4 this
1729 * will be the resulting register value. */
1730 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
1731
1732 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
1733 if (fFlat)
1734 Assert(idxRegEffSp == idxRegRsp);
1735 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
1736 {
1737 Assert(idxRegEffSp != idxRegRsp);
1738 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
1739 kIemNativeGstRegUse_ReadOnly);
1740#ifdef RT_ARCH_AMD64
1741 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1742#else
1743 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
1744#endif
1745 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
1746 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
1747 offFixupJumpToUseOtherBitSp = off;
1748 if (enmEffOpSize == IEMMODE_32BIT)
1749 {
1750 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
1751 off = iemNativeEmitStackPopForRetnUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop);
1752 }
1753 else
1754 {
1755 Assert(enmEffOpSize == IEMMODE_16BIT);
1756 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
1757 off = iemNativeEmitStackPopForRetnUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop,
1758 idxRegMemResult);
1759 }
1760 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1761 }
1762 /* SpUpdateEnd: */
1763 uint32_t const offLabelSpUpdateEnd = off;
1764
1765 /*
1766 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
1767 * we're skipping lookup).
1768 */
1769 uint8_t const iSegReg = fFlat ? UINT8_MAX : X86_SREG_SS;
1770 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
1771 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
1772 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
1773 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
1774 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
1775 : UINT32_MAX;
1776
1777 if (!TlbState.fSkip)
1778 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
1779 else
1780 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
1781
1782 /*
1783 * Use16BitSp:
1784 */
1785 if (!fFlat)
1786 {
1787#ifdef RT_ARCH_AMD64
1788 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1789#else
1790 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
1791#endif
1792 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
1793 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
1794 off = iemNativeEmitStackPopForRetnUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop,
1795 idxRegMemResult);
1796 else
1797 off = iemNativeEmitStackPopForRetnUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop);
1798 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
1799 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1800 }
1801
1802 /*
1803 * TlbMiss:
1804 *
1805 * Call helper to do the pushing.
1806 */
1807 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
1808
1809#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1810 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1811#else
1812 RT_NOREF(idxInstr);
1813#endif
1814
1815 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
1816 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
1817 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
1818 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
1819
1820
1821 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
1822 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
1823 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
1824
1825 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
1826 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
1827
1828 /* Done setting up parameters, make the call. */
1829 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
1830
1831 /* Move the return register content to idxRegMemResult. */
1832 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
1833 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
1834
1835 /* Restore variables and guest shadow registers to volatile registers. */
1836 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
1837 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
1838
1839#ifdef IEMNATIVE_WITH_TLB_LOOKUP
1840 if (!TlbState.fSkip)
1841 {
1842 /* end of TlbMiss - Jump to the done label. */
1843 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
1844 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
1845
1846 /*
1847 * TlbLookup:
1848 */
1849 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
1850 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
1851
1852 /*
1853 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
1854 */
1855 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1856# ifdef IEM_WITH_TLB_STATISTICS
1857 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
1858 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
1859# endif
1860 switch (cbMem)
1861 {
1862 case 2:
1863 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
1864 break;
1865 case 4:
1866 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
1867 break;
1868 case 8:
1869 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
1870 break;
1871 default:
1872 AssertFailed();
1873 }
1874
1875 TlbState.freeRegsAndReleaseVars(pReNative);
1876
1877 /*
1878 * TlbDone:
1879 *
1880 * Set the new RSP value (FLAT accesses needs to calculate it first) and
1881 * commit the popped register value.
1882 */
1883 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
1884 }
1885#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
1886
1887 /* Check limit before committing RIP and RSP (may #GP(0) + exit TB). */
1888 if (!f64Bit)
1889/** @todo we can skip this test in FLAT 32-bit mode. */
1890 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxRegMemResult, idxInstr);
1891 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
1892 else if (enmEffOpSize == IEMMODE_64BIT)
1893 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxRegMemResult, idxInstr);
1894
1895 /* Complete RSP calculation for FLAT mode. */
1896 if (idxRegEffSp == idxRegRsp)
1897 {
1898 if (enmEffOpSize == IEMMODE_64BIT)
1899 off = iemNativeEmitAddGprImm(pReNative, off, idxRegRsp, sizeof(uint64_t) + cbPop);
1900 else
1901 {
1902 Assert(enmEffOpSize == IEMMODE_32BIT);
1903 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxRegRsp, sizeof(uint32_t) + cbPop);
1904 }
1905 }
1906
1907 /* Commit the result and clear any current guest shadows for RIP. */
1908 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
1909 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1910 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, kIemNativeGstReg_Pc, off);
1911
1912 /* Need to transfer the shadowing information to the host register containing the updated value now. */
1913 if (!fFlat)
1914 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegRsp, IEMNATIVEGSTREG_GPR(X86_GREG_xSP), off);
1915
1916 iemNativeRegFreeTmp(pReNative, idxRegRsp);
1917 if (idxRegEffSp != idxRegRsp)
1918 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
1919 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
1920 return off;
1921}
1922
1923
1924/*********************************************************************************************************************************
1925* Emitters for raising exceptions (IEM_MC_MAYBE_RAISE_XXX) *
1926*********************************************************************************************************************************/
1927
1928#define IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE() \
1929 off = iemNativeEmitMaybeRaiseDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
1930
1931/**
1932 * Emits code to check if a \#NM exception should be raised.
1933 *
1934 * @returns New code buffer offset, UINT32_MAX on failure.
1935 * @param pReNative The native recompile state.
1936 * @param off The code buffer offset.
1937 * @param idxInstr The current instruction.
1938 */
1939DECL_INLINE_THROW(uint32_t)
1940iemNativeEmitMaybeRaiseDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
1941{
1942#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1943 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeDeviceNotAvailXcptCheckPotential);
1944
1945 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE))
1946 {
1947#endif
1948 /*
1949 * Make sure we don't have any outstanding guest register writes as we may
1950 * raise an #NM and all guest register must be up to date in CPUMCTX.
1951 */
1952 /** @todo r=aeichner Can we postpone this to the RaiseNm path? */
1953 off = iemNativeRegFlushPendingWrites(pReNative, off);
1954
1955#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1956 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1957#else
1958 RT_NOREF(idxInstr);
1959#endif
1960
1961 /* Allocate a temporary CR0 register. */
1962 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0,
1963 kIemNativeGstRegUse_ReadOnly);
1964
1965 /*
1966 * if (cr0 & (X86_CR0_EM | X86_CR0_TS) != 0)
1967 * return raisexcpt();
1968 */
1969 /* Test and jump. */
1970 off = iemNativeEmitTestAnyBitsInGprAndTbExitIfAnySet(pReNative, off, idxCr0Reg, X86_CR0_EM | X86_CR0_TS,
1971 kIemNativeLabelType_RaiseNm);
1972
1973 /* Free but don't flush the CR0 register. */
1974 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
1975
1976#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1977 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE;
1978 }
1979 else
1980 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeDeviceNotAvailXcptCheckOmitted);
1981#endif
1982
1983 return off;
1984}
1985
1986
1987#define IEM_MC_MAYBE_RAISE_WAIT_DEVICE_NOT_AVAILABLE() \
1988 off = iemNativeEmitMaybeRaiseWaitDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
1989
1990/**
1991 * Emits code to check if a \#NM exception should be raised.
1992 *
1993 * @returns New code buffer offset, UINT32_MAX on failure.
1994 * @param pReNative The native recompile state.
1995 * @param off The code buffer offset.
1996 * @param idxInstr The current instruction.
1997 */
1998DECL_INLINE_THROW(uint32_t)
1999iemNativeEmitMaybeRaiseWaitDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2000{
2001#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2002 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeWaitDeviceNotAvailXcptCheckPotential);
2003
2004 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_WAIT_DEVICE_NOT_AVAILABLE))
2005 {
2006#endif
2007 /*
2008 * Make sure we don't have any outstanding guest register writes as we may
2009 * raise an #NM and all guest register must be up to date in CPUMCTX.
2010 */
2011 /** @todo r=aeichner Can we postpone this to the RaiseNm path? */
2012 off = iemNativeRegFlushPendingWrites(pReNative, off);
2013
2014#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2015 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2016#else
2017 RT_NOREF(idxInstr);
2018#endif
2019
2020 /* Allocate a temporary CR0 register. */
2021 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0,
2022 kIemNativeGstRegUse_Calculation);
2023
2024 /*
2025 * if (cr0 & (X86_CR0_MP | X86_CR0_TS) == (X86_CR0_MP | X86_CR0_TS))
2026 * return raisexcpt();
2027 */
2028 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxCr0Reg, X86_CR0_MP | X86_CR0_TS);
2029 /* Test and jump. */
2030 off = iemNativeEmitTestIfGpr32EqualsImmAndTbExit(pReNative, off, idxCr0Reg, X86_CR0_MP | X86_CR0_TS,
2031 kIemNativeLabelType_RaiseNm);
2032
2033 /* Free the CR0 register. */
2034 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2035
2036#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2037 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_WAIT_DEVICE_NOT_AVAILABLE;
2038 }
2039 else
2040 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeWaitDeviceNotAvailXcptCheckOmitted);
2041#endif
2042
2043 return off;
2044}
2045
2046
2047#define IEM_MC_MAYBE_RAISE_FPU_XCPT() \
2048 off = iemNativeEmitMaybeRaiseFpuException(pReNative, off, pCallEntry->idxInstr)
2049
2050/**
2051 * Emits code to check if a \#MF exception should be raised.
2052 *
2053 * @returns New code buffer offset, UINT32_MAX on failure.
2054 * @param pReNative The native recompile state.
2055 * @param off The code buffer offset.
2056 * @param idxInstr The current instruction.
2057 */
2058DECL_INLINE_THROW(uint32_t)
2059iemNativeEmitMaybeRaiseFpuException(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2060{
2061 /*
2062 * Make sure we don't have any outstanding guest register writes as we may
2063 * raise an #MF and all guest register must be up to date in CPUMCTX.
2064 */
2065 /** @todo r=aeichner Can we postpone this to the RaiseMf path? */
2066 off = iemNativeRegFlushPendingWrites(pReNative, off);
2067
2068#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2069 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2070#else
2071 RT_NOREF(idxInstr);
2072#endif
2073
2074 /* Allocate a temporary FSW register. */
2075 uint8_t const idxFpuFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw,
2076 kIemNativeGstRegUse_ReadOnly);
2077
2078 /*
2079 * if (FSW & X86_FSW_ES != 0)
2080 * return raisexcpt();
2081 */
2082 /* Test and jump. */
2083 off = iemNativeEmitTestBitInGprAndTbExitIfSet(pReNative, off, idxFpuFswReg, X86_FSW_ES_BIT, kIemNativeLabelType_RaiseMf);
2084
2085 /* Free but don't flush the FSW register. */
2086 iemNativeRegFreeTmp(pReNative, idxFpuFswReg);
2087
2088 return off;
2089}
2090
2091
2092#define IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT() \
2093 off = iemNativeEmitMaybeRaiseSseRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
2094
2095/**
2096 * Emits code to check if a SSE exception (either \#UD or \#NM) should be raised.
2097 *
2098 * @returns New code buffer offset, UINT32_MAX on failure.
2099 * @param pReNative The native recompile state.
2100 * @param off The code buffer offset.
2101 * @param idxInstr The current instruction.
2102 */
2103DECL_INLINE_THROW(uint32_t)
2104iemNativeEmitMaybeRaiseSseRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2105{
2106#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2107 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeSseXcptCheckPotential);
2108
2109 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE))
2110 {
2111#endif
2112 /*
2113 * Make sure we don't have any outstanding guest register writes as we may
2114 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
2115 */
2116 /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
2117 off = iemNativeRegFlushPendingWrites(pReNative, off);
2118
2119#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2120 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2121#else
2122 RT_NOREF(idxInstr);
2123#endif
2124
2125 /* Allocate a temporary CR0 and CR4 register. */
2126 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
2127 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
2128 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2129
2130 AssertCompile(!((X86_CR0_EM | X86_CR0_TS) & X86_CR4_OSFXSR));
2131#ifdef RT_ARCH_AMD64
2132 /*
2133 * We do a modified test here:
2134 * if (!(((cr4 & X86_CR4_OSFXSR) | cr0) ^ X86_CR4_OSFXSR)) { likely }
2135 * else { goto RaiseSseRelated; }
2136 * This ASSUMES that CR0[bit 9] is always zero. This is the case on
2137 * all targets except the 386, which doesn't support SSE, this should
2138 * be a safe assumption.
2139 */
2140 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6+3+3+7+7+6);
2141 //pCodeBuf[off++] = 0xcc;
2142 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, X86_CR4_OSFXSR); /* Isolate CR4.OSFXSR as CR4.TSD and */
2143 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxTmpReg, idxCr4Reg); /* CR4.DE would overlap the CR0 bits. */
2144 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, idxTmpReg, idxCr0Reg);
2145 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_CR0_EM | X86_CR0_TS | X86_CR4_OSFXSR);
2146 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_CR4_OSFXSR);
2147 off = iemNativeEmitJccTbExitEx(pReNative, pCodeBuf, off, kIemNativeLabelType_RaiseSseRelated, kIemNativeInstrCond_ne);
2148
2149#elif defined(RT_ARCH_ARM64)
2150 /*
2151 * We do a modified test here:
2152 * if (!((cr0 & (X86_CR0_EM | X86_CR0_TS)) | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1) ^ 1))) { likely }
2153 * else { goto RaiseSseRelated; }
2154 */
2155 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+5);
2156 //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
2157 Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - X86_CR0_EM_BIT) == (X86_CR0_EM | X86_CR0_TS));
2158 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxCr0Reg, 1, 32 - X86_CR0_EM_BIT, false /*f64Bit*/);
2159 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSFXSR_BIT, 1, false /*f64Bit*/);
2160 /* -> idxTmpReg[0]=OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
2161 Assert(Armv8A64ConvertImmRImmS2Mask32(0, 0) == 1);
2162 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 0, 0, false /*f64Bit*/);
2163 /* -> idxTmpReg[0]=~OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
2164 off = iemNativeEmitTestIfGprIsNotZeroAndTbExitEx(pReNative, pCodeBuf, off, idxTmpReg, false /*f64Bit*/,
2165 kIemNativeLabelType_RaiseSseRelated);
2166
2167#else
2168# error "Port me!"
2169#endif
2170
2171 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2172 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2173 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2174 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
2175
2176#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2177 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE;
2178 }
2179 else
2180 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeSseXcptCheckOmitted);
2181#endif
2182
2183 return off;
2184}
2185
2186
2187#define IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT() \
2188 off = iemNativeEmitMaybeRaiseAvxRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
2189
2190/**
2191 * Emits code to check if a AVX exception (either \#UD or \#NM) should be raised.
2192 *
2193 * @returns New code buffer offset, UINT32_MAX on failure.
2194 * @param pReNative The native recompile state.
2195 * @param off The code buffer offset.
2196 * @param idxInstr The current instruction.
2197 */
2198DECL_INLINE_THROW(uint32_t)
2199iemNativeEmitMaybeRaiseAvxRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2200{
2201#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2202 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeAvxXcptCheckPotential);
2203
2204 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX))
2205 {
2206#endif
2207 /*
2208 * Make sure we don't have any outstanding guest register writes as we may
2209 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
2210 */
2211 /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
2212 off = iemNativeRegFlushPendingWrites(pReNative, off);
2213
2214#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2215 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2216#else
2217 RT_NOREF(idxInstr);
2218#endif
2219
2220 /* Allocate a temporary CR0, CR4 and XCR0 register. */
2221 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
2222 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
2223 uint8_t const idxXcr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Xcr0);
2224 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2225
2226 /*
2227 * We have the following in IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT:
2228 * if (RT_LIKELY( ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE))
2229 * | (pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE)
2230 * | (pVCpu->cpum.GstCtx.cr0 & X86_CR0_TS))
2231 * == (XSAVE_C_YMM | XSAVE_C_SSE | X86_CR4_OSXSAVE)))
2232 * { likely }
2233 * else { goto RaiseAvxRelated; }
2234 */
2235#ifdef RT_ARCH_AMD64
2236 /* if (!( ( ((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) << 2)
2237 | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1) << 1)
2238 | ((cr0 >> X86_CR0_TS_BIT) & 1) )
2239 ^ 0x1a) ) { likely }
2240 else { goto RaiseAvxRelated; } */
2241 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6+3+5+3+5+3+7+6);
2242 //pCodeBuf[off++] = 0xcc;
2243 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, XSAVE_C_YMM | XSAVE_C_SSE);
2244 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxTmpReg, idxXcr0Reg);
2245 off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off, idxCr4Reg, X86_CR4_OSXSAVE_BIT);
2246 off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
2247 /* -> idxTmpReg[0]=CR4.OSXSAVE; idxTmpReg[1]=0; idxTmpReg[2]=SSE; idxTmpReg[3]=YMM; (the rest is zero) */
2248 off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off, idxCr0Reg, X86_CR0_TS_BIT);
2249 off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
2250 /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=SSE; idxTmpReg[4]=YMM; */
2251 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg, ((XSAVE_C_YMM | XSAVE_C_SSE) << 2) | 2);
2252 /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=~SSE; idxTmpReg[4]=~YMM; */
2253 off = iemNativeEmitJccTbExitEx(pReNative, pCodeBuf, off, kIemNativeLabelType_RaiseAvxRelated, kIemNativeInstrCond_ne);
2254
2255#elif defined(RT_ARCH_ARM64)
2256 /* if (!( (((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) | ((cr4 >> X86_CR4_OSFXSR_BIT) & 1)) ^ 7) << 1)
2257 | ((cr0 >> X86_CR0_TS_BIT) & 1) ) { likely }
2258 else { goto RaiseAvxRelated; } */
2259 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6);
2260 //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
2261 Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - XSAVE_C_SSE_BIT) == (XSAVE_C_YMM | XSAVE_C_SSE));
2262 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxXcr0Reg, 1, 32 - XSAVE_C_SSE_BIT, false /*f64Bit*/);
2263 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSXSAVE_BIT, 1, false /*f64Bit*/);
2264 /* -> idxTmpReg[0]=CR4.OSXSAVE; idxTmpReg[1]=SSE; idxTmpReg[2]=YMM; (the rest is zero) */
2265 Assert(Armv8A64ConvertImmRImmS2Mask32(2, 0) == 7);
2266 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 2, 0, false /*f64Bit*/);
2267 /* -> idxTmpReg[0]=~CR4.OSXSAVE; idxTmpReg[1]=~SSE; idxTmpReg[2]=~YMM; (the rest is zero) */
2268 pCodeBuf[off++] = Armv8A64MkInstrLslImm(idxTmpReg, idxTmpReg, 1, false /*f64Bit*/);
2269 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr0Reg, X86_CR0_TS_BIT, 1, false /*f64Bit*/);
2270 /* -> idxTmpReg[0]=CR0.TS; idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=~SSE; idxTmpReg[3]=~YMM; (the rest is zero) */
2271 off = iemNativeEmitTestIfGprIsNotZeroAndTbExitEx(pReNative, pCodeBuf, off, idxTmpReg, false /*f64Bit*/,
2272 kIemNativeLabelType_RaiseAvxRelated);
2273
2274#else
2275# error "Port me!"
2276#endif
2277
2278 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2279 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2280 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
2281 iemNativeRegFreeTmp(pReNative, idxXcr0Reg);
2282#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2283 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
2284 }
2285 else
2286 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeAvxXcptCheckOmitted);
2287#endif
2288
2289 return off;
2290}
2291
2292
2293#define IEM_MC_RAISE_DIVIDE_ERROR() \
2294 off = iemNativeEmitRaiseDivideError(pReNative, off, pCallEntry->idxInstr)
2295
2296/**
2297 * Emits code to raise a \#DE.
2298 *
2299 * @returns New code buffer offset, UINT32_MAX on failure.
2300 * @param pReNative The native recompile state.
2301 * @param off The code buffer offset.
2302 * @param idxInstr The current instruction.
2303 */
2304DECL_INLINE_THROW(uint32_t)
2305iemNativeEmitRaiseDivideError(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2306{
2307 /*
2308 * Make sure we don't have any outstanding guest register writes as we may
2309 */
2310 off = iemNativeRegFlushPendingWrites(pReNative, off);
2311
2312#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2313 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2314#else
2315 RT_NOREF(idxInstr);
2316#endif
2317
2318 /* raise \#DE exception unconditionally. */
2319 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_RaiseDe);
2320}
2321
2322
2323#define IEM_MC_RAISE_GP0_IF_EFF_ADDR_UNALIGNED(a_EffAddr, a_cbAlign) \
2324 off = iemNativeEmitRaiseGp0IfEffAddrUnaligned(pReNative, off, pCallEntry->idxInstr, a_EffAddr, a_cbAlign)
2325
2326/**
2327 * Emits code to raise a \#GP(0) if the given variable contains an unaligned address.
2328 *
2329 * @returns New code buffer offset, UINT32_MAX on failure.
2330 * @param pReNative The native recompile state.
2331 * @param off The code buffer offset.
2332 * @param idxInstr The current instruction.
2333 * @param idxVarEffAddr Index of the variable containing the effective address to check.
2334 * @param cbAlign The alignment in bytes to check against.
2335 */
2336DECL_INLINE_THROW(uint32_t)
2337iemNativeEmitRaiseGp0IfEffAddrUnaligned(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr,
2338 uint8_t idxVarEffAddr, uint8_t cbAlign)
2339{
2340 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEffAddr);
2341 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEffAddr, sizeof(RTGCPTR));
2342
2343 /*
2344 * Make sure we don't have any outstanding guest register writes as we may throw an exception.
2345 */
2346 off = iemNativeRegFlushPendingWrites(pReNative, off);
2347
2348#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2349 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2350#else
2351 RT_NOREF(idxInstr);
2352#endif
2353
2354 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarEffAddr, &off);
2355
2356 off = iemNativeEmitTestAnyBitsInGprAndTbExitIfAnySet(pReNative, off, idxVarReg, cbAlign - 1,
2357 kIemNativeLabelType_RaiseGp0);
2358
2359 iemNativeVarRegisterRelease(pReNative, idxVarEffAddr);
2360 return off;
2361}
2362
2363
2364/*********************************************************************************************************************************
2365* Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF) *
2366*********************************************************************************************************************************/
2367
2368/**
2369 * Pushes an IEM_MC_IF_XXX onto the condition stack.
2370 *
2371 * @returns Pointer to the condition stack entry on success, NULL on failure
2372 * (too many nestings)
2373 */
2374DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative, uint32_t *poff)
2375{
2376#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2377 *poff = iemNativeRegFlushPendingWrites(pReNative, *poff);
2378#endif
2379
2380 uint32_t const idxStack = pReNative->cCondDepth;
2381 AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
2382
2383 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
2384 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
2385
2386 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
2387 pEntry->fInElse = false;
2388 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
2389 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
2390
2391 return pEntry;
2392}
2393
2394
2395/**
2396 * Start of the if-block, snapshotting the register and variable state.
2397 */
2398DECL_INLINE_THROW(void)
2399iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
2400{
2401 Assert(offIfBlock != UINT32_MAX);
2402 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
2403 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
2404 Assert(!pEntry->fInElse);
2405
2406 /* Define the start of the IF block if request or for disassembly purposes. */
2407 if (idxLabelIf != UINT32_MAX)
2408 iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
2409#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2410 else
2411 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
2412#else
2413 RT_NOREF(offIfBlock);
2414#endif
2415
2416#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2417 Assert(pReNative->Core.offPc == 0);
2418#endif
2419
2420 /* Copy the initial state so we can restore it in the 'else' block. */
2421 pEntry->InitialState = pReNative->Core;
2422}
2423
2424
2425#define IEM_MC_ELSE() } while (0); \
2426 off = iemNativeEmitElse(pReNative, off); \
2427 do {
2428
2429/** Emits code related to IEM_MC_ELSE. */
2430DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2431{
2432 /* Check sanity and get the conditional stack entry. */
2433 Assert(off != UINT32_MAX);
2434 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
2435 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
2436 Assert(!pEntry->fInElse);
2437
2438#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2439 /* Writeback any dirty shadow registers. */
2440 /** @todo r=aeichner Possible optimization is to only writeback guest registers which became dirty
2441 * in one of the branches and leave guest registers already dirty before the start of the if
2442 * block alone. */
2443 off = iemNativeRegFlushDirtyGuest(pReNative, off);
2444#endif
2445
2446 /* Jump to the endif */
2447 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
2448
2449 /* Define the else label and enter the else part of the condition. */
2450 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
2451 pEntry->fInElse = true;
2452
2453#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2454 Assert(pReNative->Core.offPc == 0);
2455#endif
2456
2457 /* Snapshot the core state so we can do a merge at the endif and restore
2458 the snapshot we took at the start of the if-block. */
2459 pEntry->IfFinalState = pReNative->Core;
2460 pReNative->Core = pEntry->InitialState;
2461
2462 return off;
2463}
2464
2465
2466#define IEM_MC_ENDIF() } while (0); \
2467 off = iemNativeEmitEndIf(pReNative, off)
2468
2469/** Emits code related to IEM_MC_ENDIF. */
2470DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2471{
2472 /* Check sanity and get the conditional stack entry. */
2473 Assert(off != UINT32_MAX);
2474 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
2475 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
2476
2477#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2478 Assert(pReNative->Core.offPc == 0);
2479#endif
2480#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2481 /* Writeback any dirty shadow registers (else branch). */
2482 /** @todo r=aeichner Possible optimization is to only writeback guest registers which became dirty
2483 * in one of the branches and leave guest registers already dirty before the start of the if
2484 * block alone. */
2485 off = iemNativeRegFlushDirtyGuest(pReNative, off);
2486#endif
2487
2488 /*
2489 * Now we have find common group with the core state at the end of the
2490 * if-final. Use the smallest common denominator and just drop anything
2491 * that isn't the same in both states.
2492 */
2493 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
2494 * which is why we're doing this at the end of the else-block.
2495 * But we'd need more info about future for that to be worth the effort. */
2496 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
2497#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2498 Assert( pOther->bmGstRegShadowDirty == 0
2499 && pReNative->Core.bmGstRegShadowDirty == 0);
2500#endif
2501
2502 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
2503 {
2504 /* shadow guest stuff first. */
2505 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
2506 if (fGstRegs)
2507 {
2508 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
2509 do
2510 {
2511 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
2512 fGstRegs &= ~RT_BIT_64(idxGstReg);
2513
2514 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
2515 if ( !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg))
2516 || idxHstReg != pOther->aidxGstRegShadows[idxGstReg])
2517 {
2518 Log12(("iemNativeEmitEndIf: dropping gst %s from hst %s\n",
2519 g_aGstShadowInfo[idxGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg]));
2520
2521#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2522 /* Writeback any dirty shadow registers we are about to unshadow. */
2523 off = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, off, idxHstReg);
2524#endif
2525 iemNativeRegClearGstRegShadowing(pReNative, idxHstReg, off);
2526 }
2527 } while (fGstRegs);
2528 }
2529 else
2530 {
2531 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
2532#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2533 Assert(pReNative->Core.bmGstRegShadowDirty == 0);
2534#endif
2535 }
2536
2537 /* Check variables next. For now we must require them to be identical
2538 or stuff we can recreate. */
2539 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
2540 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
2541 if (fVars)
2542 {
2543 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
2544 do
2545 {
2546 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
2547 fVars &= ~RT_BIT_32(idxVar);
2548
2549 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
2550 {
2551 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
2552 continue;
2553 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
2554 {
2555 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
2556 if (idxHstReg != UINT8_MAX)
2557 {
2558 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
2559 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
2560 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x\n",
2561 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
2562 }
2563 continue;
2564 }
2565 }
2566 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
2567 continue;
2568
2569 /* Irreconcilable, so drop it. */
2570 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
2571 if (idxHstReg != UINT8_MAX)
2572 {
2573 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
2574 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
2575 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x (also dropped)\n",
2576 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
2577 }
2578 Log11(("iemNativeEmitEndIf: Freeing variable #%u/%#x\n", idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
2579 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
2580 } while (fVars);
2581 }
2582
2583 /* Finally, check that the host register allocations matches. */
2584 AssertMsgStmt(pReNative->Core.bmHstRegs == pOther->bmHstRegs,
2585 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
2586 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
2587 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
2588 }
2589
2590 /*
2591 * Define the endif label and maybe the else one if we're still in the 'if' part.
2592 */
2593 if (!pEntry->fInElse)
2594 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
2595 else
2596 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
2597 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
2598
2599 /* Pop the conditional stack.*/
2600 pReNative->cCondDepth -= 1;
2601
2602 return off;
2603}
2604
2605
2606#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
2607 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits)); \
2608 do {
2609
2610/** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
2611DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
2612{
2613 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitsInEfl);
2614 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2615
2616 /* Get the eflags. */
2617 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
2618 kIemNativeGstRegUse_ReadOnly);
2619
2620 /* Test and jump. */
2621 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
2622
2623 /* Free but don't flush the EFlags register. */
2624 iemNativeRegFreeTmp(pReNative, idxEflReg);
2625
2626 /* Make a copy of the core state now as we start the if-block. */
2627 iemNativeCondStartIfBlock(pReNative, off);
2628
2629 return off;
2630}
2631
2632
2633#define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
2634 off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits)); \
2635 do {
2636
2637/** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
2638DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
2639{
2640 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitsInEfl);
2641 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2642
2643 /* Get the eflags. */
2644 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
2645 kIemNativeGstRegUse_ReadOnly);
2646
2647 /* Test and jump. */
2648 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
2649
2650 /* Free but don't flush the EFlags register. */
2651 iemNativeRegFreeTmp(pReNative, idxEflReg);
2652
2653 /* Make a copy of the core state now as we start the if-block. */
2654 iemNativeCondStartIfBlock(pReNative, off);
2655
2656 return off;
2657}
2658
2659
2660#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
2661 off = iemNativeEmitIfEflagsBitSet(pReNative, off, (a_fBit)); \
2662 do {
2663
2664/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
2665DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
2666{
2667 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
2668 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2669
2670 /* Get the eflags. */
2671 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
2672 kIemNativeGstRegUse_ReadOnly);
2673
2674 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
2675 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
2676
2677 /* Test and jump. */
2678 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
2679
2680 /* Free but don't flush the EFlags register. */
2681 iemNativeRegFreeTmp(pReNative, idxEflReg);
2682
2683 /* Make a copy of the core state now as we start the if-block. */
2684 iemNativeCondStartIfBlock(pReNative, off);
2685
2686 return off;
2687}
2688
2689
2690#define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
2691 off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, (a_fBit)); \
2692 do {
2693
2694/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
2695DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
2696{
2697 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
2698 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2699
2700 /* Get the eflags. */
2701 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
2702 kIemNativeGstRegUse_ReadOnly);
2703
2704 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
2705 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
2706
2707 /* Test and jump. */
2708 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
2709
2710 /* Free but don't flush the EFlags register. */
2711 iemNativeRegFreeTmp(pReNative, idxEflReg);
2712
2713 /* Make a copy of the core state now as we start the if-block. */
2714 iemNativeCondStartIfBlock(pReNative, off);
2715
2716 return off;
2717}
2718
2719
2720#define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2) \
2721 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, false /*fInverted*/); \
2722 do {
2723
2724#define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2) \
2725 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, true /*fInverted*/); \
2726 do {
2727
2728/** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
2729DECL_INLINE_THROW(uint32_t)
2730iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
2731 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
2732{
2733 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBit1InEfl | fBit2InEfl);
2734 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2735
2736 /* Get the eflags. */
2737 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
2738 kIemNativeGstRegUse_ReadOnly);
2739
2740 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
2741 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
2742
2743 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
2744 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
2745 Assert(iBitNo1 != iBitNo2);
2746
2747#ifdef RT_ARCH_AMD64
2748 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl);
2749
2750 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
2751 if (iBitNo1 > iBitNo2)
2752 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
2753 else
2754 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
2755 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
2756
2757#elif defined(RT_ARCH_ARM64)
2758 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2759 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2760
2761 /* and tmpreg, eflreg, #1<<iBitNo1 */
2762 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
2763
2764 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
2765 if (iBitNo1 > iBitNo2)
2766 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
2767 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
2768 else
2769 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
2770 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
2771
2772 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2773
2774#else
2775# error "Port me"
2776#endif
2777
2778 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
2779 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
2780 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
2781
2782 /* Free but don't flush the EFlags and tmp registers. */
2783 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2784 iemNativeRegFreeTmp(pReNative, idxEflReg);
2785
2786 /* Make a copy of the core state now as we start the if-block. */
2787 iemNativeCondStartIfBlock(pReNative, off);
2788
2789 return off;
2790}
2791
2792
2793#define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
2794 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, false /*fInverted*/); \
2795 do {
2796
2797#define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
2798 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, true /*fInverted*/); \
2799 do {
2800
2801/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
2802 * IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
2803DECL_INLINE_THROW(uint32_t)
2804iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl,
2805 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
2806{
2807 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl | fBit1InEfl | fBit2InEfl);
2808 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2809
2810 /* We need an if-block label for the non-inverted variant. */
2811 uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
2812 pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
2813
2814 /* Get the eflags. */
2815 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
2816 kIemNativeGstRegUse_ReadOnly);
2817
2818 /* Translate the flag masks to bit numbers. */
2819 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
2820 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
2821
2822 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
2823 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
2824 Assert(iBitNo1 != iBitNo);
2825
2826 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
2827 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
2828 Assert(iBitNo2 != iBitNo);
2829 Assert(iBitNo2 != iBitNo1);
2830
2831#ifdef RT_ARCH_AMD64
2832 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl); /* This must come before we jump anywhere! */
2833#elif defined(RT_ARCH_ARM64)
2834 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2835#endif
2836
2837 /* Check for the lone bit first. */
2838 if (!fInverted)
2839 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
2840 else
2841 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
2842
2843 /* Then extract and compare the other two bits. */
2844#ifdef RT_ARCH_AMD64
2845 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
2846 if (iBitNo1 > iBitNo2)
2847 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
2848 else
2849 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
2850 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
2851
2852#elif defined(RT_ARCH_ARM64)
2853 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2854
2855 /* and tmpreg, eflreg, #1<<iBitNo1 */
2856 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
2857
2858 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
2859 if (iBitNo1 > iBitNo2)
2860 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
2861 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
2862 else
2863 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
2864 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
2865
2866 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2867
2868#else
2869# error "Port me"
2870#endif
2871
2872 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
2873 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
2874 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
2875
2876 /* Free but don't flush the EFlags and tmp registers. */
2877 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2878 iemNativeRegFreeTmp(pReNative, idxEflReg);
2879
2880 /* Make a copy of the core state now as we start the if-block. */
2881 iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
2882
2883 return off;
2884}
2885
2886
2887#define IEM_MC_IF_CX_IS_NZ() \
2888 off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
2889 do {
2890
2891/** Emits code for IEM_MC_IF_CX_IS_NZ. */
2892DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2893{
2894 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2895
2896 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
2897 kIemNativeGstRegUse_ReadOnly);
2898 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
2899 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
2900
2901 iemNativeCondStartIfBlock(pReNative, off);
2902 return off;
2903}
2904
2905
2906#define IEM_MC_IF_ECX_IS_NZ() \
2907 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
2908 do {
2909
2910#define IEM_MC_IF_RCX_IS_NZ() \
2911 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
2912 do {
2913
2914/** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
2915DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
2916{
2917 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2918
2919 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
2920 kIemNativeGstRegUse_ReadOnly);
2921 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
2922 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
2923
2924 iemNativeCondStartIfBlock(pReNative, off);
2925 return off;
2926}
2927
2928
2929#define IEM_MC_IF_CX_IS_NOT_ONE() \
2930 off = iemNativeEmitIfCxIsNotOne(pReNative, off); \
2931 do {
2932
2933/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE. */
2934DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2935{
2936 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2937
2938 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
2939 kIemNativeGstRegUse_ReadOnly);
2940#ifdef RT_ARCH_AMD64
2941 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
2942#else
2943 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2944 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
2945 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2946#endif
2947 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
2948
2949 iemNativeCondStartIfBlock(pReNative, off);
2950 return off;
2951}
2952
2953
2954#define IEM_MC_IF_ECX_IS_NOT_ONE() \
2955 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, false /*f64Bit*/); \
2956 do {
2957
2958#define IEM_MC_IF_RCX_IS_NOT_ONE() \
2959 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, true /*f64Bit*/); \
2960 do {
2961
2962/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE and IEM_MC_IF_RCX_IS_NOT_ONE. */
2963DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
2964{
2965 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2966
2967 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
2968 kIemNativeGstRegUse_ReadOnly);
2969 if (f64Bit)
2970 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
2971 else
2972 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
2973 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
2974
2975 iemNativeCondStartIfBlock(pReNative, off);
2976 return off;
2977}
2978
2979
2980#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
2981 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/); \
2982 do {
2983
2984#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
2985 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/); \
2986 do {
2987
2988/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET and
2989 * IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
2990DECL_INLINE_THROW(uint32_t)
2991iemNativeEmitIfCxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl, bool fCheckIfSet)
2992{
2993 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
2994 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2995
2996 /* We have to load both RCX and EFLAGS before we can start branching,
2997 otherwise we'll end up in the else-block with an inconsistent
2998 register allocator state.
2999 Doing EFLAGS first as it's more likely to be loaded, right? */
3000 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
3001 kIemNativeGstRegUse_ReadOnly);
3002 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3003 kIemNativeGstRegUse_ReadOnly);
3004
3005 /** @todo we could reduce this to a single branch instruction by spending a
3006 * temporary register and some setnz stuff. Not sure if loops are
3007 * worth it. */
3008 /* Check CX. */
3009#ifdef RT_ARCH_AMD64
3010 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3011#else
3012 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3013 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
3014 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3015#endif
3016
3017 /* Check the EFlags bit. */
3018 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
3019 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
3020 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
3021 !fCheckIfSet /*fJmpIfSet*/);
3022
3023 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3024 iemNativeRegFreeTmp(pReNative, idxEflReg);
3025
3026 iemNativeCondStartIfBlock(pReNative, off);
3027 return off;
3028}
3029
3030
3031#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
3032 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, false /*f64Bit*/); \
3033 do {
3034
3035#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3036 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, false /*f64Bit*/); \
3037 do {
3038
3039#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
3040 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, true /*f64Bit*/); \
3041 do {
3042
3043#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3044 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, true /*f64Bit*/); \
3045 do {
3046
3047/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET,
3048 * IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET,
3049 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET and
3050 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
3051DECL_INLINE_THROW(uint32_t)
3052iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3053 uint32_t fBitInEfl, bool fCheckIfSet, bool f64Bit)
3054{
3055 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
3056 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
3057
3058 /* We have to load both RCX and EFLAGS before we can start branching,
3059 otherwise we'll end up in the else-block with an inconsistent
3060 register allocator state.
3061 Doing EFLAGS first as it's more likely to be loaded, right? */
3062 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
3063 kIemNativeGstRegUse_ReadOnly);
3064 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3065 kIemNativeGstRegUse_ReadOnly);
3066
3067 /** @todo we could reduce this to a single branch instruction by spending a
3068 * temporary register and some setnz stuff. Not sure if loops are
3069 * worth it. */
3070 /* Check RCX/ECX. */
3071 if (f64Bit)
3072 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3073 else
3074 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3075
3076 /* Check the EFlags bit. */
3077 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
3078 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
3079 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
3080 !fCheckIfSet /*fJmpIfSet*/);
3081
3082 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3083 iemNativeRegFreeTmp(pReNative, idxEflReg);
3084
3085 iemNativeCondStartIfBlock(pReNative, off);
3086 return off;
3087}
3088
3089
3090#define IEM_MC_IF_LOCAL_IS_Z(a_Local) \
3091 off = iemNativeEmitIfLocalIsZ(pReNative, off, a_Local); \
3092 do {
3093
3094/** Emits code for IEM_MC_IF_LOCAL_IS_Z. */
3095DECL_INLINE_THROW(uint32_t)
3096iemNativeEmitIfLocalIsZ(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarLocal)
3097{
3098 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
3099
3100 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarLocal);
3101 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarLocal)];
3102 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
3103 AssertStmt(pVarRc->cbVar == sizeof(int32_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
3104
3105 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarLocal, &off);
3106
3107 off = iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(pReNative, off, idxReg, false /*f64Bit*/, pEntry->idxLabelElse);
3108
3109 iemNativeVarRegisterRelease(pReNative, idxVarLocal);
3110
3111 iemNativeCondStartIfBlock(pReNative, off);
3112 return off;
3113}
3114
3115
3116#define IEM_MC_IF_GREG_BIT_SET(a_iGReg, a_iBitNo) \
3117 off = iemNativeEmitIfGregBitSet(pReNative, off, a_iGReg, a_iBitNo); \
3118 do {
3119
3120/** Emits code for IEM_MC_IF_GREG_BIT_SET. */
3121DECL_INLINE_THROW(uint32_t)
3122iemNativeEmitIfGregBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t iBitNo)
3123{
3124 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
3125 Assert(iGReg < 16);
3126
3127 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3128 kIemNativeGstRegUse_ReadOnly);
3129
3130 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxGstFullReg, iBitNo, pEntry->idxLabelElse);
3131
3132 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3133
3134 iemNativeCondStartIfBlock(pReNative, off);
3135 return off;
3136}
3137
3138
3139
3140/*********************************************************************************************************************************
3141* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
3142*********************************************************************************************************************************/
3143
3144#define IEM_MC_NOREF(a_Name) \
3145 RT_NOREF_PV(a_Name)
3146
3147#define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
3148 uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
3149
3150#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
3151 uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
3152
3153#define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_Local, a_iArg) \
3154 uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
3155
3156#define IEM_MC_LOCAL(a_Type, a_Name) \
3157 uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
3158
3159#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
3160 uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
3161
3162#define IEM_MC_LOCAL_ASSIGN(a_Type, a_Name, a_Value) \
3163 uint8_t const a_Name = iemNativeVarAllocAssign(pReNative, &off, sizeof(a_Type), (a_Value))
3164
3165
3166/**
3167 * Sets the host register for @a idxVarRc to @a idxReg.
3168 *
3169 * The register must not be allocated. Any guest register shadowing will be
3170 * implictly dropped by this call.
3171 *
3172 * The variable must not have any register associated with it (causes
3173 * VERR_IEM_VAR_IPE_10 to be raised). Conversion to a stack variable is
3174 * implied.
3175 *
3176 * @returns idxReg
3177 * @param pReNative The recompiler state.
3178 * @param idxVar The variable.
3179 * @param idxReg The host register (typically IEMNATIVE_CALL_RET_GREG).
3180 * @param off For recording in debug info.
3181 *
3182 * @throws VERR_IEM_VAR_IPE_10, VERR_IEM_VAR_IPE_11
3183 */
3184DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off)
3185{
3186 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3187 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3188 Assert(!pVar->fRegAcquired);
3189 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3190 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_10));
3191 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_11));
3192
3193 iemNativeRegClearGstRegShadowing(pReNative, idxReg, off);
3194 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
3195
3196 iemNativeVarSetKindToStack(pReNative, idxVar);
3197 pVar->idxReg = idxReg;
3198
3199 return idxReg;
3200}
3201
3202
3203/**
3204 * A convenient helper function.
3205 */
3206DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSetAndAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
3207 uint8_t idxReg, uint32_t *poff)
3208{
3209 idxReg = iemNativeVarRegisterSet(pReNative, idxVar, idxReg, *poff);
3210 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fRegAcquired = true;
3211 return idxReg;
3212}
3213
3214
3215/**
3216 * This is called by IEM_MC_END() to clean up all variables.
3217 */
3218DECL_FORCE_INLINE(void) iemNativeVarFreeAll(PIEMRECOMPILERSTATE pReNative)
3219{
3220 uint32_t const bmVars = pReNative->Core.bmVars;
3221 if (bmVars != 0)
3222 iemNativeVarFreeAllSlow(pReNative, bmVars);
3223 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
3224 Assert(pReNative->Core.bmStack == 0);
3225}
3226
3227
3228#define IEM_MC_FREE_LOCAL(a_Name) iemNativeVarFreeLocal(pReNative, a_Name)
3229
3230/**
3231 * This is called by IEM_MC_FREE_LOCAL.
3232 */
3233DECLINLINE(void) iemNativeVarFreeLocal(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
3234{
3235 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3236 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo == UINT8_MAX);
3237 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
3238}
3239
3240
3241#define IEM_MC_FREE_ARG(a_Name) iemNativeVarFreeArg(pReNative, a_Name)
3242
3243/**
3244 * This is called by IEM_MC_FREE_ARG.
3245 */
3246DECLINLINE(void) iemNativeVarFreeArg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
3247{
3248 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3249 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars));
3250 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
3251}
3252
3253
3254#define IEM_MC_ASSIGN_TO_SMALLER(a_VarDst, a_VarSrcEol) off = iemNativeVarAssignToSmaller(pReNative, off, a_VarDst, a_VarSrcEol)
3255
3256/**
3257 * This is called by IEM_MC_ASSIGN_TO_SMALLER.
3258 */
3259DECL_INLINE_THROW(uint32_t)
3260iemNativeVarAssignToSmaller(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarDst, uint8_t idxVarSrc)
3261{
3262 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarDst);
3263 PIEMNATIVEVAR const pVarDst = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarDst)];
3264 AssertStmt(pVarDst->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
3265 Assert( pVarDst->cbVar == sizeof(uint16_t)
3266 || pVarDst->cbVar == sizeof(uint32_t));
3267
3268 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarSrc);
3269 PIEMNATIVEVAR const pVarSrc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarSrc)];
3270 AssertStmt( pVarSrc->enmKind == kIemNativeVarKind_Stack
3271 || pVarSrc->enmKind == kIemNativeVarKind_Immediate,
3272 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
3273
3274 Assert(pVarDst->cbVar < pVarSrc->cbVar);
3275
3276 /*
3277 * Special case for immediates.
3278 */
3279 if (pVarSrc->enmKind == kIemNativeVarKind_Immediate)
3280 {
3281 switch (pVarDst->cbVar)
3282 {
3283 case sizeof(uint16_t):
3284 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint16_t)pVarSrc->u.uValue);
3285 break;
3286 case sizeof(uint32_t):
3287 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint32_t)pVarSrc->u.uValue);
3288 break;
3289 default: AssertFailed(); break;
3290 }
3291 }
3292 else
3293 {
3294 /*
3295 * The generic solution for now.
3296 */
3297 /** @todo optimize this by having the python script make sure the source
3298 * variable passed to IEM_MC_ASSIGN_TO_SMALLER is not used after the
3299 * statement. Then we could just transfer the register assignments. */
3300 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off);
3301 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off);
3302 switch (pVarDst->cbVar)
3303 {
3304 case sizeof(uint16_t):
3305 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegDst, idxRegSrc);
3306 break;
3307 case sizeof(uint32_t):
3308 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegDst, idxRegSrc);
3309 break;
3310 default: AssertFailed(); break;
3311 }
3312 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
3313 iemNativeVarRegisterRelease(pReNative, idxVarDst);
3314 }
3315 return off;
3316}
3317
3318
3319
3320/*********************************************************************************************************************************
3321* Emitters for IEM_MC_CALL_CIMPL_XXX *
3322*********************************************************************************************************************************/
3323
3324/** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
3325DECL_INLINE_THROW(uint32_t)
3326iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
3327 uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cArgs)
3328
3329{
3330 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
3331
3332#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3333 /* Clear the appropriate IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_XXX flags
3334 when a calls clobber any of the relevatn control registers. */
3335# if 1
3336 if (!(fGstShwFlush & (RT_BIT_64(kIemNativeGstReg_Cr0) | RT_BIT_64(kIemNativeGstReg_Cr4) | RT_BIT_64(kIemNativeGstReg_Xcr0))))
3337 {
3338 /* Likely as long as call+ret are done via cimpl. */
3339 Assert( /*pfnCImpl != (uintptr_t)iemCImpl_mov_Cd_Rd && pfnCImpl != (uintptr_t)iemCImpl_xsetbv
3340 &&*/ pfnCImpl != (uintptr_t)iemCImpl_lmsw && pfnCImpl != (uintptr_t)iemCImpl_clts);
3341 }
3342 else if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_Xcr0))
3343 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
3344 else if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_Cr4))
3345 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
3346 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE);
3347 else
3348 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
3349 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
3350 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
3351
3352# else
3353 if (pfnCImpl == (uintptr_t)iemCImpl_xsetbv) /* Modifies xcr0 which only the AVX check uses. */
3354 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
3355 else if (pfnCImpl == (uintptr_t)iemCImpl_mov_Cd_Rd) /* Can modify cr4 which all checks use. */
3356 pReNative->fSimdRaiseXcptChecksEmitted = 0;
3357 else if ( pfnCImpl == (uintptr_t)iemCImpl_FarJmp
3358 || pfnCImpl == (uintptr_t)iemCImpl_callf
3359 || pfnCImpl == (uintptr_t)iemCImpl_lmsw
3360 || pfnCImpl == (uintptr_t)iemCImpl_clts) /* Will only modify cr0 */
3361 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
3362 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
3363 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
3364# endif
3365#endif
3366
3367 /*
3368 * Do all the call setup and cleanup.
3369 */
3370 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
3371
3372 /*
3373 * Load the two or three hidden arguments.
3374 */
3375#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
3376 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
3377 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3378 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
3379#else
3380 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3381 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
3382#endif
3383
3384 /*
3385 * Make the call and check the return code.
3386 *
3387 * Shadow PC copies are always flushed here, other stuff depends on flags.
3388 * Segment and general purpose registers are explictily flushed via the
3389 * IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG and IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG
3390 * macros.
3391 */
3392 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
3393#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
3394 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
3395#endif
3396 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, fGstShwFlush | RT_BIT_64(kIemNativeGstReg_Pc));
3397 if (!(pReNative->fMc & IEM_MC_F_WITHOUT_FLAGS)) /** @todo We don't emit with-flags/without-flags variations for CIMPL calls. */
3398 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
3399 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
3400
3401 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
3402}
3403
3404
3405#define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
3406 off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0)
3407
3408/** Emits code for IEM_MC_CALL_CIMPL_1. */
3409DECL_INLINE_THROW(uint32_t)
3410iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
3411 uintptr_t pfnCImpl, uint8_t idxArg0)
3412{
3413 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
3414 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 1);
3415}
3416
3417
3418#define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
3419 off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0, a1)
3420
3421/** Emits code for IEM_MC_CALL_CIMPL_2. */
3422DECL_INLINE_THROW(uint32_t)
3423iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
3424 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
3425{
3426 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
3427 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
3428 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 2);
3429}
3430
3431
3432#define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
3433 off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
3434 (uintptr_t)a_pfnCImpl, a0, a1, a2)
3435
3436/** Emits code for IEM_MC_CALL_CIMPL_3. */
3437DECL_INLINE_THROW(uint32_t)
3438iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
3439 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
3440{
3441 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
3442 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
3443 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
3444 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 3);
3445}
3446
3447
3448#define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3) \
3449 off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
3450 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
3451
3452/** Emits code for IEM_MC_CALL_CIMPL_4. */
3453DECL_INLINE_THROW(uint32_t)
3454iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
3455 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
3456{
3457 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
3458 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
3459 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
3460 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
3461 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 4);
3462}
3463
3464
3465#define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3, a4) \
3466 off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
3467 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
3468
3469/** Emits code for IEM_MC_CALL_CIMPL_4. */
3470DECL_INLINE_THROW(uint32_t)
3471iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
3472 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
3473{
3474 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
3475 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
3476 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
3477 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
3478 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg4, 4 + IEM_CIMPL_HIDDEN_ARGS);
3479 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 5);
3480}
3481
3482
3483/** Recompiler debugging: Flush guest register shadow copies. */
3484#define IEM_MC_HINT_FLUSH_GUEST_SHADOW(g_fGstShwFlush) iemNativeRegFlushGuestShadows(pReNative, g_fGstShwFlush)
3485
3486
3487
3488/*********************************************************************************************************************************
3489* Emitters for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX *
3490*********************************************************************************************************************************/
3491
3492/**
3493 * Common worker for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX.
3494 */
3495DECL_INLINE_THROW(uint32_t)
3496iemNativeEmitCallAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
3497 uintptr_t pfnAImpl, uint8_t cArgs)
3498{
3499 if (idxVarRc != UINT8_MAX)
3500 {
3501 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRc);
3502 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarRc)];
3503 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
3504 AssertStmt(pVarRc->cbVar <= sizeof(uint64_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
3505 }
3506
3507 /*
3508 * Do all the call setup and cleanup.
3509 *
3510 * It is only required to flush pending guest register writes in call volatile registers as
3511 * assembly helpers can't throw and don't access anything living in CPUMCTX, they only
3512 * access parameters. The flushing of call volatile registers is always done in iemNativeEmitCallCommon()
3513 * no matter the fFlushPendingWrites parameter.
3514 */
3515 off = iemNativeEmitCallCommon(pReNative, off, cArgs, 0 /*cHiddenArgs*/, false /*fFlushPendingWrites*/);
3516
3517 /*
3518 * Make the call and update the return code variable if we've got one.
3519 */
3520 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
3521 if (idxVarRc != UINT8_MAX)
3522 iemNativeVarRegisterSet(pReNative, idxVarRc, IEMNATIVE_CALL_RET_GREG, off);
3523
3524 return off;
3525}
3526
3527
3528
3529#define IEM_MC_CALL_VOID_AIMPL_0(a_pfn) \
3530 off = iemNativeEmitCallAImpl0(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn))
3531
3532#define IEM_MC_CALL_AIMPL_0(a_rc, a_pfn) \
3533 off = iemNativeEmitCallAImpl0(pReNative, off, a_rc, (uintptr_t)(a_pfn))
3534
3535/** Emits code for IEM_MC_CALL_VOID_AIMPL_0 and IEM_MC_CALL_AIMPL_0. */
3536DECL_INLINE_THROW(uint32_t)
3537iemNativeEmitCallAImpl0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl)
3538{
3539 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 0);
3540}
3541
3542
3543#define IEM_MC_CALL_VOID_AIMPL_1(a_pfn, a0) \
3544 off = iemNativeEmitCallAImpl1(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0)
3545
3546#define IEM_MC_CALL_AIMPL_1(a_rc, a_pfn, a0) \
3547 off = iemNativeEmitCallAImpl1(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0)
3548
3549/** Emits code for IEM_MC_CALL_VOID_AIMPL_1 and IEM_MC_CALL_AIMPL_1. */
3550DECL_INLINE_THROW(uint32_t)
3551iemNativeEmitCallAImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl, uint8_t idxArg0)
3552{
3553 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
3554 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 1);
3555}
3556
3557
3558#define IEM_MC_CALL_VOID_AIMPL_2(a_pfn, a0, a1) \
3559 off = iemNativeEmitCallAImpl2(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1)
3560
3561#define IEM_MC_CALL_AIMPL_2(a_rc, a_pfn, a0, a1) \
3562 off = iemNativeEmitCallAImpl2(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1)
3563
3564/** Emits code for IEM_MC_CALL_VOID_AIMPL_2 and IEM_MC_CALL_AIMPL_2. */
3565DECL_INLINE_THROW(uint32_t)
3566iemNativeEmitCallAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
3567 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
3568{
3569 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
3570 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
3571 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 2);
3572}
3573
3574
3575#define IEM_MC_CALL_VOID_AIMPL_3(a_pfn, a0, a1, a2) \
3576 off = iemNativeEmitCallAImpl3(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2)
3577
3578#define IEM_MC_CALL_AIMPL_3(a_rcType, a_rc, a_pfn, a0, a1, a2) \
3579 IEM_MC_LOCAL(a_rcType, a_rc); \
3580 off = iemNativeEmitCallAImpl3(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2)
3581
3582/** Emits code for IEM_MC_CALL_VOID_AIMPL_3 and IEM_MC_CALL_AIMPL_3. */
3583DECL_INLINE_THROW(uint32_t)
3584iemNativeEmitCallAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
3585 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
3586{
3587 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
3588 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
3589 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
3590 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 3);
3591}
3592
3593
3594#define IEM_MC_CALL_VOID_AIMPL_4(a_pfn, a0, a1, a2, a3) \
3595 off = iemNativeEmitCallAImpl4(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2, a3)
3596
3597#define IEM_MC_CALL_AIMPL_4(a_rcType, a_rc, a_pfn, a0, a1, a2, a3) \
3598 IEM_MC_LOCAL(a_rcType, a_rc); \
3599 off = iemNativeEmitCallAImpl4(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2, a3)
3600
3601/** Emits code for IEM_MC_CALL_VOID_AIMPL_4 and IEM_MC_CALL_AIMPL_4. */
3602DECL_INLINE_THROW(uint32_t)
3603iemNativeEmitCallAImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
3604 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
3605{
3606 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
3607 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
3608 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
3609 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3);
3610 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 4);
3611}
3612
3613
3614
3615/*********************************************************************************************************************************
3616* Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX). *
3617*********************************************************************************************************************************/
3618
3619#define IEM_MC_FETCH_GREG_U8_THREADED(a_u8Dst, a_iGRegEx) \
3620 off = iemNativeEmitFetchGregU8(pReNative, off, a_u8Dst, a_iGRegEx, sizeof(uint8_t) /*cbZeroExtended*/)
3621
3622#define IEM_MC_FETCH_GREG_U8_ZX_U16_THREADED(a_u16Dst, a_iGRegEx) \
3623 off = iemNativeEmitFetchGregU8(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t) /*cbZeroExtended*/)
3624
3625#define IEM_MC_FETCH_GREG_U8_ZX_U32_THREADED(a_u32Dst, a_iGRegEx) \
3626 off = iemNativeEmitFetchGregU8(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t) /*cbZeroExtended*/)
3627
3628#define IEM_MC_FETCH_GREG_U8_ZX_U64_THREADED(a_u64Dst, a_iGRegEx) \
3629 off = iemNativeEmitFetchGregU8(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t) /*cbZeroExtended*/)
3630
3631
3632/** Emits code for IEM_MC_FETCH_GREG_U8_THREADED and
3633 * IEM_MC_FETCH_GREG_U8_ZX_U16/32/64_THREADED. */
3634DECL_INLINE_THROW(uint32_t)
3635iemNativeEmitFetchGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, int8_t cbZeroExtended)
3636{
3637 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3638 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
3639 Assert(iGRegEx < 20);
3640
3641 /* Same discussion as in iemNativeEmitFetchGregU16 */
3642 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
3643 kIemNativeGstRegUse_ReadOnly);
3644
3645 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3646 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3647
3648 /* The value is zero-extended to the full 64-bit host register width. */
3649 if (iGRegEx < 16)
3650 off = iemNativeEmitLoadGprFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
3651 else
3652 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
3653
3654 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3655 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3656 return off;
3657}
3658
3659
3660#define IEM_MC_FETCH_GREG_U8_SX_U16_THREADED(a_u16Dst, a_iGRegEx) \
3661 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t))
3662
3663#define IEM_MC_FETCH_GREG_U8_SX_U32_THREADED(a_u32Dst, a_iGRegEx) \
3664 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t))
3665
3666#define IEM_MC_FETCH_GREG_U8_SX_U64_THREADED(a_u64Dst, a_iGRegEx) \
3667 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t))
3668
3669/** Emits code for IEM_MC_FETCH_GREG_U8_SX_U16/32/64_THREADED. */
3670DECL_INLINE_THROW(uint32_t)
3671iemNativeEmitFetchGregU8Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, uint8_t cbSignExtended)
3672{
3673 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3674 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
3675 Assert(iGRegEx < 20);
3676
3677 /* Same discussion as in iemNativeEmitFetchGregU16 */
3678 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
3679 kIemNativeGstRegUse_ReadOnly);
3680
3681 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3682 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3683
3684 if (iGRegEx < 16)
3685 {
3686 switch (cbSignExtended)
3687 {
3688 case sizeof(uint16_t):
3689 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
3690 break;
3691 case sizeof(uint32_t):
3692 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
3693 break;
3694 case sizeof(uint64_t):
3695 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
3696 break;
3697 default: AssertFailed(); break;
3698 }
3699 }
3700 else
3701 {
3702 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
3703 switch (cbSignExtended)
3704 {
3705 case sizeof(uint16_t):
3706 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
3707 break;
3708 case sizeof(uint32_t):
3709 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
3710 break;
3711 case sizeof(uint64_t):
3712 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
3713 break;
3714 default: AssertFailed(); break;
3715 }
3716 }
3717
3718 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3719 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3720 return off;
3721}
3722
3723
3724
3725#define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
3726 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint16_t))
3727
3728#define IEM_MC_FETCH_GREG_U16_ZX_U32(a_u16Dst, a_iGReg) \
3729 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
3730
3731#define IEM_MC_FETCH_GREG_U16_ZX_U64(a_u16Dst, a_iGReg) \
3732 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
3733
3734/** Emits code for IEM_MC_FETCH_GREG_U16 and IEM_MC_FETCH_GREG_U16_ZX_U32/64. */
3735DECL_INLINE_THROW(uint32_t)
3736iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
3737{
3738 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3739 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
3740 Assert(iGReg < 16);
3741
3742 /*
3743 * We can either just load the low 16-bit of the GPR into a host register
3744 * for the variable, or we can do so via a shadow copy host register. The
3745 * latter will avoid having to reload it if it's being stored later, but
3746 * will waste a host register if it isn't touched again. Since we don't
3747 * know what going to happen, we choose the latter for now.
3748 */
3749 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3750 kIemNativeGstRegUse_ReadOnly);
3751
3752 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3753 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3754 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
3755 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3756
3757 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3758 return off;
3759}
3760
3761
3762#define IEM_MC_FETCH_GREG_U16_SX_U32(a_u16Dst, a_iGReg) \
3763 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
3764
3765#define IEM_MC_FETCH_GREG_U16_SX_U64(a_u16Dst, a_iGReg) \
3766 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
3767
3768/** Emits code for IEM_MC_FETCH_GREG_U16_SX_U32/64. */
3769DECL_INLINE_THROW(uint32_t)
3770iemNativeEmitFetchGregU16Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbSignExtended)
3771{
3772 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3773 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
3774 Assert(iGReg < 16);
3775
3776 /*
3777 * We can either just load the low 16-bit of the GPR into a host register
3778 * for the variable, or we can do so via a shadow copy host register. The
3779 * latter will avoid having to reload it if it's being stored later, but
3780 * will waste a host register if it isn't touched again. Since we don't
3781 * know what going to happen, we choose the latter for now.
3782 */
3783 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3784 kIemNativeGstRegUse_ReadOnly);
3785
3786 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3787 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3788 if (cbSignExtended == sizeof(uint32_t))
3789 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
3790 else
3791 {
3792 Assert(cbSignExtended == sizeof(uint64_t));
3793 off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
3794 }
3795 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3796
3797 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3798 return off;
3799}
3800
3801
3802#define IEM_MC_FETCH_GREG_U32(a_u32Dst, a_iGReg) \
3803 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint32_t))
3804
3805#define IEM_MC_FETCH_GREG_U32_ZX_U64(a_u32Dst, a_iGReg) \
3806 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint64_t))
3807
3808/** Emits code for IEM_MC_FETCH_GREG_U32. */
3809DECL_INLINE_THROW(uint32_t)
3810iemNativeEmitFetchGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
3811{
3812 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3813 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
3814 Assert(iGReg < 16);
3815
3816 /*
3817 * We can either just load the low 16-bit of the GPR into a host register
3818 * for the variable, or we can do so via a shadow copy host register. The
3819 * latter will avoid having to reload it if it's being stored later, but
3820 * will waste a host register if it isn't touched again. Since we don't
3821 * know what going to happen, we choose the latter for now.
3822 */
3823 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3824 kIemNativeGstRegUse_ReadOnly);
3825
3826 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3827 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3828 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
3829 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3830
3831 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3832 return off;
3833}
3834
3835
3836#define IEM_MC_FETCH_GREG_U32_SX_U64(a_u32Dst, a_iGReg) \
3837 off = iemNativeEmitFetchGregU32SxU64(pReNative, off, a_u32Dst, a_iGReg)
3838
3839/** Emits code for IEM_MC_FETCH_GREG_U32. */
3840DECL_INLINE_THROW(uint32_t)
3841iemNativeEmitFetchGregU32SxU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
3842{
3843 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3844 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
3845 Assert(iGReg < 16);
3846
3847 /*
3848 * We can either just load the low 32-bit of the GPR into a host register
3849 * for the variable, or we can do so via a shadow copy host register. The
3850 * latter will avoid having to reload it if it's being stored later, but
3851 * will waste a host register if it isn't touched again. Since we don't
3852 * know what going to happen, we choose the latter for now.
3853 */
3854 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3855 kIemNativeGstRegUse_ReadOnly);
3856
3857 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3858 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3859 off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
3860 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3861
3862 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3863 return off;
3864}
3865
3866
3867#define IEM_MC_FETCH_GREG_U64(a_u64Dst, a_iGReg) \
3868 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
3869
3870#define IEM_MC_FETCH_GREG_U64_ZX_U64(a_u64Dst, a_iGReg) \
3871 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
3872
3873/** Emits code for IEM_MC_FETCH_GREG_U64 (and the
3874 * IEM_MC_FETCH_GREG_U64_ZX_U64 alias). */
3875DECL_INLINE_THROW(uint32_t)
3876iemNativeEmitFetchGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
3877{
3878 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3879 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
3880 Assert(iGReg < 16);
3881
3882 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3883 kIemNativeGstRegUse_ReadOnly);
3884
3885 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3886 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3887 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxVarReg, idxGstFullReg);
3888 /** @todo name the register a shadow one already? */
3889 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3890
3891 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3892 return off;
3893}
3894
3895
3896#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3897#define IEM_MC_FETCH_GREG_PAIR_U64(a_u128Dst, a_iGRegLo, a_iGRegHi) \
3898 off = iemNativeEmitFetchGregPairU64(pReNative, off, a_u128Dst, a_iGRegLo, a_iGRegHi)
3899
3900/** Emits code for IEM_MC_FETCH_GREG_PAIR_U64. */
3901DECL_INLINE_THROW(uint32_t)
3902iemNativeEmitFetchGregPairU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegLo, uint8_t iGRegHi)
3903{
3904 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3905 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
3906 Assert(iGRegLo < 16 && iGRegHi < 16);
3907
3908 uint8_t const idxGstFullRegLo = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegLo),
3909 kIemNativeGstRegUse_ReadOnly);
3910 uint8_t const idxGstFullRegHi = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegHi),
3911 kIemNativeGstRegUse_ReadOnly);
3912
3913 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3914 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
3915 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxVarReg, idxGstFullRegLo, 0);
3916 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxVarReg, idxGstFullRegHi, 1);
3917
3918 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
3919 iemNativeRegFreeTmp(pReNative, idxGstFullRegLo);
3920 iemNativeRegFreeTmp(pReNative, idxGstFullRegHi);
3921 return off;
3922}
3923#endif
3924
3925
3926/*********************************************************************************************************************************
3927* Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX). *
3928*********************************************************************************************************************************/
3929
3930#define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
3931 off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
3932
3933/** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
3934DECL_INLINE_THROW(uint32_t)
3935iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
3936{
3937 Assert(iGRegEx < 20);
3938 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
3939 kIemNativeGstRegUse_ForUpdate);
3940#ifdef RT_ARCH_AMD64
3941 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
3942
3943 /* To the lowest byte of the register: mov r8, imm8 */
3944 if (iGRegEx < 16)
3945 {
3946 if (idxGstTmpReg >= 8)
3947 pbCodeBuf[off++] = X86_OP_REX_B;
3948 else if (idxGstTmpReg >= 4)
3949 pbCodeBuf[off++] = X86_OP_REX;
3950 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
3951 pbCodeBuf[off++] = u8Value;
3952 }
3953 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
3954 else if (idxGstTmpReg < 4)
3955 {
3956 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
3957 pbCodeBuf[off++] = u8Value;
3958 }
3959 else
3960 {
3961 /* ror reg64, 8 */
3962 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
3963 pbCodeBuf[off++] = 0xc1;
3964 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
3965 pbCodeBuf[off++] = 8;
3966
3967 /* mov reg8, imm8 */
3968 if (idxGstTmpReg >= 8)
3969 pbCodeBuf[off++] = X86_OP_REX_B;
3970 else if (idxGstTmpReg >= 4)
3971 pbCodeBuf[off++] = X86_OP_REX;
3972 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
3973 pbCodeBuf[off++] = u8Value;
3974
3975 /* rol reg64, 8 */
3976 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
3977 pbCodeBuf[off++] = 0xc1;
3978 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
3979 pbCodeBuf[off++] = 8;
3980 }
3981
3982#elif defined(RT_ARCH_ARM64)
3983 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
3984 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
3985 if (iGRegEx < 16)
3986 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
3987 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
3988 else
3989 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
3990 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
3991 iemNativeRegFreeTmp(pReNative, idxImmReg);
3992
3993#else
3994# error "Port me!"
3995#endif
3996
3997 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3998
3999#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4000 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
4001#endif
4002
4003 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4004 return off;
4005}
4006
4007
4008#define IEM_MC_STORE_GREG_U8_THREADED(a_iGRegEx, a_u8Value) \
4009 off = iemNativeEmitStoreGregU8(pReNative, off, a_iGRegEx, a_u8Value)
4010
4011/** Emits code for IEM_MC_STORE_GREG_U8_THREADED. */
4012DECL_INLINE_THROW(uint32_t)
4013iemNativeEmitStoreGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t idxValueVar)
4014{
4015 Assert(iGRegEx < 20);
4016 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
4017
4018 /*
4019 * If it's a constant value (unlikely) we treat this as a
4020 * IEM_MC_STORE_GREG_U8_CONST statement.
4021 */
4022 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
4023 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
4024 { /* likely */ }
4025 else
4026 {
4027 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
4028 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4029 return iemNativeEmitStoreGregU8Const(pReNative, off, iGRegEx, (uint8_t)pValueVar->u.uValue);
4030 }
4031
4032 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
4033 kIemNativeGstRegUse_ForUpdate);
4034 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
4035
4036#ifdef RT_ARCH_AMD64
4037 /* To the lowest byte of the register: mov reg8, reg8(r/m) */
4038 if (iGRegEx < 16)
4039 {
4040 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4041 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
4042 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
4043 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
4044 pbCodeBuf[off++] = X86_OP_REX;
4045 pbCodeBuf[off++] = 0x8a;
4046 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
4047 }
4048 /* Otherwise it's to ah, ch, dh or bh from al, cl, dl or bl: use mov r8, r8 if we can, otherwise, we rotate. */
4049 else if (idxGstTmpReg < 4 && idxVarReg < 4)
4050 {
4051 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2+1);
4052 pbCodeBuf[off++] = 0x8a;
4053 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg + 4, idxVarReg);
4054 }
4055 else
4056 {
4057 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
4058
4059 /* ror reg64, 8 */
4060 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4061 pbCodeBuf[off++] = 0xc1;
4062 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4063 pbCodeBuf[off++] = 8;
4064
4065 /* mov reg8, reg8(r/m) */
4066 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
4067 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
4068 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
4069 pbCodeBuf[off++] = X86_OP_REX;
4070 pbCodeBuf[off++] = 0x8a;
4071 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
4072
4073 /* rol reg64, 8 */
4074 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4075 pbCodeBuf[off++] = 0xc1;
4076 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4077 pbCodeBuf[off++] = 8;
4078 }
4079
4080#elif defined(RT_ARCH_ARM64)
4081 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 7:0.
4082 or
4083 bfi w1, w2, 8, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 15:8. */
4084 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4085 if (iGRegEx < 16)
4086 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 8);
4087 else
4088 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 8, 8);
4089
4090#else
4091# error "Port me!"
4092#endif
4093 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4094
4095 iemNativeVarRegisterRelease(pReNative, idxValueVar);
4096
4097#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4098 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
4099#endif
4100 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4101 return off;
4102}
4103
4104
4105
4106#define IEM_MC_STORE_GREG_U16_CONST(a_iGReg, a_u16Const) \
4107 off = iemNativeEmitStoreGregU16Const(pReNative, off, a_iGReg, a_u16Const)
4108
4109/** Emits code for IEM_MC_STORE_GREG_U16. */
4110DECL_INLINE_THROW(uint32_t)
4111iemNativeEmitStoreGregU16Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint16_t uValue)
4112{
4113 Assert(iGReg < 16);
4114 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4115 kIemNativeGstRegUse_ForUpdate);
4116#ifdef RT_ARCH_AMD64
4117 /* mov reg16, imm16 */
4118 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
4119 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4120 if (idxGstTmpReg >= 8)
4121 pbCodeBuf[off++] = X86_OP_REX_B;
4122 pbCodeBuf[off++] = 0xb8 + (idxGstTmpReg & 7);
4123 pbCodeBuf[off++] = RT_BYTE1(uValue);
4124 pbCodeBuf[off++] = RT_BYTE2(uValue);
4125
4126#elif defined(RT_ARCH_ARM64)
4127 /* movk xdst, #uValue, lsl #0 */
4128 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4129 pu32CodeBuf[off++] = Armv8A64MkInstrMovK(idxGstTmpReg, uValue);
4130
4131#else
4132# error "Port me!"
4133#endif
4134
4135 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4136
4137#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4138 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4139#endif
4140 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4141 return off;
4142}
4143
4144
4145#define IEM_MC_STORE_GREG_U16(a_iGReg, a_u16Value) \
4146 off = iemNativeEmitStoreGregU16(pReNative, off, a_iGReg, a_u16Value)
4147
4148/** Emits code for IEM_MC_STORE_GREG_U16. */
4149DECL_INLINE_THROW(uint32_t)
4150iemNativeEmitStoreGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
4151{
4152 Assert(iGReg < 16);
4153 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
4154
4155 /*
4156 * If it's a constant value (unlikely) we treat this as a
4157 * IEM_MC_STORE_GREG_U16_CONST statement.
4158 */
4159 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
4160 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
4161 { /* likely */ }
4162 else
4163 {
4164 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
4165 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4166 return iemNativeEmitStoreGregU16Const(pReNative, off, iGReg, (uint16_t)pValueVar->u.uValue);
4167 }
4168
4169 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4170 kIemNativeGstRegUse_ForUpdate);
4171
4172#ifdef RT_ARCH_AMD64
4173 /* mov reg16, reg16 or [mem16] */
4174 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
4175 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4176 if (pValueVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
4177 {
4178 if (idxGstTmpReg >= 8 || pValueVar->idxReg >= 8)
4179 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0)
4180 | (pValueVar->idxReg >= 8 ? X86_OP_REX_B : 0);
4181 pbCodeBuf[off++] = 0x8b;
4182 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, pValueVar->idxReg & 7);
4183 }
4184 else
4185 {
4186 uint8_t const idxStackSlot = pValueVar->idxStackSlot;
4187 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
4188 if (idxGstTmpReg >= 8)
4189 pbCodeBuf[off++] = X86_OP_REX_R;
4190 pbCodeBuf[off++] = 0x8b;
4191 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, idxGstTmpReg, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
4192 }
4193
4194#elif defined(RT_ARCH_ARM64)
4195 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
4196 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
4197 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4198 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 16);
4199 iemNativeVarRegisterRelease(pReNative, idxValueVar);
4200
4201#else
4202# error "Port me!"
4203#endif
4204
4205 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4206
4207#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4208 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4209#endif
4210 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4211 return off;
4212}
4213
4214
4215#define IEM_MC_STORE_GREG_U32_CONST(a_iGReg, a_u32Const) \
4216 off = iemNativeEmitStoreGregU32Const(pReNative, off, a_iGReg, a_u32Const)
4217
4218/** Emits code for IEM_MC_STORE_GREG_U32_CONST. */
4219DECL_INLINE_THROW(uint32_t)
4220iemNativeEmitStoreGregU32Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint32_t uValue)
4221{
4222 Assert(iGReg < 16);
4223 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4224 kIemNativeGstRegUse_ForFullWrite);
4225 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
4226#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4227 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4228#endif
4229 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4230 return off;
4231}
4232
4233
4234#define IEM_MC_STORE_GREG_U32(a_iGReg, a_u32Value) \
4235 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_u32Value)
4236
4237#define IEM_MC_STORE_GREG_I32(a_iGReg, a_i32Value) \
4238 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_i32Value)
4239
4240/** Emits code for IEM_MC_STORE_GREG_U32/IEM_MC_STORE_GREG_I32. */
4241DECL_INLINE_THROW(uint32_t)
4242iemNativeEmitStoreGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
4243{
4244 Assert(iGReg < 16);
4245 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
4246
4247 /*
4248 * If it's a constant value (unlikely) we treat this as a
4249 * IEM_MC_STORE_GREG_U32_CONST statement.
4250 */
4251 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
4252 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
4253 { /* likely */ }
4254 else
4255 {
4256 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
4257 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4258 return iemNativeEmitStoreGregU32Const(pReNative, off, iGReg, (uint32_t)pValueVar->u.uValue);
4259 }
4260
4261 /*
4262 * For the rest we allocate a guest register for the variable and writes
4263 * it to the CPUMCTX structure.
4264 */
4265 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
4266#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4267 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4268#else
4269 RT_NOREF(idxVarReg);
4270#endif
4271#ifdef VBOX_STRICT
4272 off = iemNativeEmitTop32BitsClearCheck(pReNative, off, idxVarReg);
4273#endif
4274 iemNativeVarRegisterRelease(pReNative, idxValueVar);
4275 return off;
4276}
4277
4278
4279#define IEM_MC_STORE_GREG_U64_CONST(a_iGReg, a_u64Const) \
4280 off = iemNativeEmitStoreGregU64Const(pReNative, off, a_iGReg, a_u64Const)
4281
4282/** Emits code for IEM_MC_STORE_GREG_U64_CONST. */
4283DECL_INLINE_THROW(uint32_t)
4284iemNativeEmitStoreGregU64Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uValue)
4285{
4286 Assert(iGReg < 16);
4287 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4288 kIemNativeGstRegUse_ForFullWrite);
4289 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
4290#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4291 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4292#endif
4293 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4294 return off;
4295}
4296
4297
4298#define IEM_MC_STORE_GREG_U64(a_iGReg, a_u64Value) \
4299 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_u64Value)
4300
4301#define IEM_MC_STORE_GREG_I64(a_iGReg, a_i64Value) \
4302 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_i64Value)
4303
4304/** Emits code for IEM_MC_STORE_GREG_U64. */
4305DECL_INLINE_THROW(uint32_t)
4306iemNativeEmitStoreGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
4307{
4308 Assert(iGReg < 16);
4309 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
4310
4311 /*
4312 * If it's a constant value (unlikely) we treat this as a
4313 * IEM_MC_STORE_GREG_U64_CONST statement.
4314 */
4315 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
4316 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
4317 { /* likely */ }
4318 else
4319 {
4320 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
4321 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4322 return iemNativeEmitStoreGregU64Const(pReNative, off, iGReg, pValueVar->u.uValue);
4323 }
4324
4325 /*
4326 * For the rest we allocate a guest register for the variable and writes
4327 * it to the CPUMCTX structure.
4328 */
4329 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
4330#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4331 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4332#else
4333 RT_NOREF(idxVarReg);
4334#endif
4335 iemNativeVarRegisterRelease(pReNative, idxValueVar);
4336 return off;
4337}
4338
4339
4340#define IEM_MC_CLEAR_HIGH_GREG_U64(a_iGReg) \
4341 off = iemNativeEmitClearHighGregU64(pReNative, off, a_iGReg)
4342
4343/** Emits code for IEM_MC_CLEAR_HIGH_GREG_U64. */
4344DECL_INLINE_THROW(uint32_t)
4345iemNativeEmitClearHighGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg)
4346{
4347 Assert(iGReg < 16);
4348 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4349 kIemNativeGstRegUse_ForUpdate);
4350 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxGstTmpReg, idxGstTmpReg);
4351#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4352 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4353#endif
4354 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4355 return off;
4356}
4357
4358
4359#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4360#define IEM_MC_STORE_GREG_PAIR_U64(a_iGRegLo, a_iGRegHi, a_u128Value) \
4361 off = iemNativeEmitStoreGregPairU64(pReNative, off, a_iGRegLo, a_iGRegHi, a_u128Value)
4362
4363/** Emits code for IEM_MC_FETCH_GREG_PAIR_U64. */
4364DECL_INLINE_THROW(uint32_t)
4365iemNativeEmitStoreGregPairU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegLo, uint8_t iGRegHi, uint8_t idxDstVar)
4366{
4367 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4368 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
4369 Assert(iGRegLo < 16 && iGRegHi < 16);
4370
4371 uint8_t const idxGstFullRegLo = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegLo),
4372 kIemNativeGstRegUse_ForFullWrite);
4373 uint8_t const idxGstFullRegHi = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegHi),
4374 kIemNativeGstRegUse_ForFullWrite);
4375
4376 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4377 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
4378 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxGstFullRegLo, idxVarReg, 0);
4379 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxGstFullRegHi, idxVarReg, 1);
4380
4381 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
4382 iemNativeRegFreeTmp(pReNative, idxGstFullRegLo);
4383 iemNativeRegFreeTmp(pReNative, idxGstFullRegHi);
4384 return off;
4385}
4386#endif
4387
4388
4389/*********************************************************************************************************************************
4390* General purpose register manipulation (add, sub). *
4391*********************************************************************************************************************************/
4392
4393#define IEM_MC_ADD_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
4394 off = iemNativeEmitAddGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
4395
4396/** Emits code for IEM_MC_ADD_GREG_U16. */
4397DECL_INLINE_THROW(uint32_t)
4398iemNativeEmitAddGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend)
4399{
4400 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4401 kIemNativeGstRegUse_ForUpdate);
4402
4403#ifdef RT_ARCH_AMD64
4404 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
4405 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4406 if (idxGstTmpReg >= 8)
4407 pbCodeBuf[off++] = X86_OP_REX_B;
4408 if (uAddend == 1)
4409 {
4410 pbCodeBuf[off++] = 0xff; /* inc */
4411 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4412 }
4413 else
4414 {
4415 pbCodeBuf[off++] = 0x81;
4416 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4417 pbCodeBuf[off++] = uAddend;
4418 pbCodeBuf[off++] = 0;
4419 }
4420
4421#else
4422 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
4423 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4424
4425 /* sub tmp, gstgrp, uAddend */
4426 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxTmpReg, idxGstTmpReg, uAddend, false /*f64Bit*/);
4427
4428 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
4429 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
4430
4431 iemNativeRegFreeTmp(pReNative, idxTmpReg);
4432#endif
4433
4434 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4435
4436#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4437 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4438#endif
4439
4440 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4441 return off;
4442}
4443
4444
4445#define IEM_MC_ADD_GREG_U32(a_iGReg, a_u8Const) \
4446 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
4447
4448#define IEM_MC_ADD_GREG_U64(a_iGReg, a_u8Const) \
4449 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
4450
4451/** Emits code for IEM_MC_ADD_GREG_U32 and IEM_MC_ADD_GREG_U64. */
4452DECL_INLINE_THROW(uint32_t)
4453iemNativeEmitAddGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend, bool f64Bit)
4454{
4455 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4456 kIemNativeGstRegUse_ForUpdate);
4457
4458#ifdef RT_ARCH_AMD64
4459 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
4460 if (f64Bit)
4461 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
4462 else if (idxGstTmpReg >= 8)
4463 pbCodeBuf[off++] = X86_OP_REX_B;
4464 if (uAddend == 1)
4465 {
4466 pbCodeBuf[off++] = 0xff; /* inc */
4467 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4468 }
4469 else if (uAddend < 128)
4470 {
4471 pbCodeBuf[off++] = 0x83; /* add */
4472 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4473 pbCodeBuf[off++] = RT_BYTE1(uAddend);
4474 }
4475 else
4476 {
4477 pbCodeBuf[off++] = 0x81; /* add */
4478 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4479 pbCodeBuf[off++] = RT_BYTE1(uAddend);
4480 pbCodeBuf[off++] = 0;
4481 pbCodeBuf[off++] = 0;
4482 pbCodeBuf[off++] = 0;
4483 }
4484
4485#else
4486 /* sub tmp, gstgrp, uAddend */
4487 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4488 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGstTmpReg, idxGstTmpReg, uAddend, f64Bit);
4489
4490#endif
4491
4492 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4493
4494#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4495 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4496#endif
4497
4498 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4499 return off;
4500}
4501
4502
4503
4504#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
4505 off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
4506
4507/** Emits code for IEM_MC_SUB_GREG_U16. */
4508DECL_INLINE_THROW(uint32_t)
4509iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
4510{
4511 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4512 kIemNativeGstRegUse_ForUpdate);
4513
4514#ifdef RT_ARCH_AMD64
4515 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
4516 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4517 if (idxGstTmpReg >= 8)
4518 pbCodeBuf[off++] = X86_OP_REX_B;
4519 if (uSubtrahend == 1)
4520 {
4521 pbCodeBuf[off++] = 0xff; /* dec */
4522 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4523 }
4524 else
4525 {
4526 pbCodeBuf[off++] = 0x81;
4527 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
4528 pbCodeBuf[off++] = uSubtrahend;
4529 pbCodeBuf[off++] = 0;
4530 }
4531
4532#else
4533 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
4534 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4535
4536 /* sub tmp, gstgrp, uSubtrahend */
4537 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
4538
4539 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
4540 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
4541
4542 iemNativeRegFreeTmp(pReNative, idxTmpReg);
4543#endif
4544
4545 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4546
4547#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4548 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4549#endif
4550
4551 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4552 return off;
4553}
4554
4555
4556#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
4557 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
4558
4559#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
4560 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
4561
4562/** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
4563DECL_INLINE_THROW(uint32_t)
4564iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
4565{
4566 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4567 kIemNativeGstRegUse_ForUpdate);
4568
4569#ifdef RT_ARCH_AMD64
4570 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
4571 if (f64Bit)
4572 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
4573 else if (idxGstTmpReg >= 8)
4574 pbCodeBuf[off++] = X86_OP_REX_B;
4575 if (uSubtrahend == 1)
4576 {
4577 pbCodeBuf[off++] = 0xff; /* dec */
4578 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4579 }
4580 else if (uSubtrahend < 128)
4581 {
4582 pbCodeBuf[off++] = 0x83; /* sub */
4583 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
4584 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
4585 }
4586 else
4587 {
4588 pbCodeBuf[off++] = 0x81; /* sub */
4589 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
4590 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
4591 pbCodeBuf[off++] = 0;
4592 pbCodeBuf[off++] = 0;
4593 pbCodeBuf[off++] = 0;
4594 }
4595
4596#else
4597 /* sub tmp, gstgrp, uSubtrahend */
4598 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4599 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
4600
4601#endif
4602
4603 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4604
4605#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4606 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4607#endif
4608
4609 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4610 return off;
4611}
4612
4613
4614#define IEM_MC_AND_GREG_U8(a_iGReg, a_u8Mask) \
4615 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u8Mask, sizeof(uint8_t))
4616
4617#define IEM_MC_AND_GREG_U16(a_iGReg, a_u16Mask) \
4618 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u16Mask, sizeof(uint16_t))
4619
4620#define IEM_MC_AND_GREG_U32(a_iGReg, a_u32Mask) \
4621 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u32Mask, sizeof(uint32_t))
4622
4623#define IEM_MC_AND_GREG_U64(a_iGReg, a_u64Mask) \
4624 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u64Mask, sizeof(uint64_t))
4625
4626/** Emits code for IEM_MC_AND_GREG_U8, IEM_MC_AND_GREG_U16, IEM_MC_AND_GREG_U32 and IEM_MC_AND_GREG_U64. */
4627DECL_INLINE_THROW(uint32_t)
4628iemNativeEmitAndGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uMask, uint8_t cbMask)
4629{
4630#ifdef VBOX_STRICT
4631 switch (cbMask)
4632 {
4633 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
4634 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
4635 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
4636 case sizeof(uint64_t): break;
4637 default: AssertFailedBreak();
4638 }
4639#endif
4640
4641 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4642 kIemNativeGstRegUse_ForUpdate);
4643
4644 switch (cbMask)
4645 {
4646 case sizeof(uint8_t): /* Leaves the higher bits untouched. */
4647 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask | UINT64_C(0xffffffffffffff00));
4648 break;
4649 case sizeof(uint16_t): /* Leaves the higher bits untouched. */
4650 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask | UINT64_C(0xffffffffffff0000));
4651 break;
4652 case sizeof(uint32_t): /* Zeroes the high 32 bits of the guest register. */
4653 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxGstTmpReg, uMask);
4654 break;
4655 case sizeof(uint64_t):
4656 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask);
4657 break;
4658 default: AssertFailedBreak();
4659 }
4660
4661 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4662
4663#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4664 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4665#endif
4666
4667 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4668 return off;
4669}
4670
4671
4672#define IEM_MC_OR_GREG_U8(a_iGReg, a_u8Mask) \
4673 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u8Mask, sizeof(uint8_t))
4674
4675#define IEM_MC_OR_GREG_U16(a_iGReg, a_u16Mask) \
4676 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u16Mask, sizeof(uint16_t))
4677
4678#define IEM_MC_OR_GREG_U32(a_iGReg, a_u32Mask) \
4679 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u32Mask, sizeof(uint32_t))
4680
4681#define IEM_MC_OR_GREG_U64(a_iGReg, a_u64Mask) \
4682 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u64Mask, sizeof(uint64_t))
4683
4684/** Emits code for IEM_MC_OR_GREG_U8, IEM_MC_OR_GREG_U16, IEM_MC_OR_GREG_U32 and IEM_MC_OR_GREG_U64. */
4685DECL_INLINE_THROW(uint32_t)
4686iemNativeEmitOrGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uMask, uint8_t cbMask)
4687{
4688#ifdef VBOX_STRICT
4689 switch (cbMask)
4690 {
4691 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
4692 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
4693 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
4694 case sizeof(uint64_t): break;
4695 default: AssertFailedBreak();
4696 }
4697#endif
4698
4699 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4700 kIemNativeGstRegUse_ForUpdate);
4701
4702 switch (cbMask)
4703 {
4704 case sizeof(uint8_t): /* Leaves the higher bits untouched. */
4705 case sizeof(uint16_t):
4706 case sizeof(uint64_t):
4707 off = iemNativeEmitOrGprByImm(pReNative, off, idxGstTmpReg, uMask);
4708 break;
4709 case sizeof(uint32_t): /* Zeroes the high 32 bits of the guest register. */
4710 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxGstTmpReg, uMask);
4711 break;
4712 default: AssertFailedBreak();
4713 }
4714
4715 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4716
4717#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4718 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4719#endif
4720
4721 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4722 return off;
4723}
4724
4725
4726/*********************************************************************************************************************************
4727* Local/Argument variable manipulation (add, sub, and, or). *
4728*********************************************************************************************************************************/
4729
4730#define IEM_MC_AND_LOCAL_U8(a_u8Local, a_u8Mask) \
4731 off = iemNativeEmitAndLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
4732
4733#define IEM_MC_AND_LOCAL_U16(a_u16Local, a_u16Mask) \
4734 off = iemNativeEmitAndLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
4735
4736#define IEM_MC_AND_LOCAL_U32(a_u32Local, a_u32Mask) \
4737 off = iemNativeEmitAndLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
4738
4739#define IEM_MC_AND_LOCAL_U64(a_u64Local, a_u64Mask) \
4740 off = iemNativeEmitAndLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
4741
4742
4743#define IEM_MC_AND_ARG_U16(a_u16Arg, a_u16Mask) \
4744 off = iemNativeEmitAndLocal(pReNative, off, a_u16Arg, a_u16Mask, sizeof(uint16_t))
4745
4746#define IEM_MC_AND_ARG_U32(a_u32Arg, a_u32Mask) \
4747 off = iemNativeEmitAndLocal(pReNative, off, a_u32Arg, a_u32Mask, sizeof(uint32_t))
4748
4749#define IEM_MC_AND_ARG_U64(a_u64Arg, a_u64Mask) \
4750 off = iemNativeEmitAndLocal(pReNative, off, a_u64Arg, a_u64Mask, sizeof(uint64_t))
4751
4752/** Emits code for AND'ing a local and a constant value. */
4753DECL_INLINE_THROW(uint32_t)
4754iemNativeEmitAndLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
4755{
4756#ifdef VBOX_STRICT
4757 switch (cbMask)
4758 {
4759 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
4760 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
4761 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
4762 case sizeof(uint64_t): break;
4763 default: AssertFailedBreak();
4764 }
4765#endif
4766
4767 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
4768 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
4769
4770 if (cbMask <= sizeof(uint32_t))
4771 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg, uMask);
4772 else
4773 off = iemNativeEmitAndGprByImm(pReNative, off, idxVarReg, uMask);
4774
4775 iemNativeVarRegisterRelease(pReNative, idxVar);
4776 return off;
4777}
4778
4779
4780#define IEM_MC_OR_LOCAL_U8(a_u8Local, a_u8Mask) \
4781 off = iemNativeEmitOrLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
4782
4783#define IEM_MC_OR_LOCAL_U16(a_u16Local, a_u16Mask) \
4784 off = iemNativeEmitOrLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
4785
4786#define IEM_MC_OR_LOCAL_U32(a_u32Local, a_u32Mask) \
4787 off = iemNativeEmitOrLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
4788
4789#define IEM_MC_OR_LOCAL_U64(a_u64Local, a_u64Mask) \
4790 off = iemNativeEmitOrLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
4791
4792/** Emits code for OR'ing a local and a constant value. */
4793DECL_INLINE_THROW(uint32_t)
4794iemNativeEmitOrLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
4795{
4796#ifdef VBOX_STRICT
4797 switch (cbMask)
4798 {
4799 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
4800 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
4801 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
4802 case sizeof(uint64_t): break;
4803 default: AssertFailedBreak();
4804 }
4805#endif
4806
4807 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
4808 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
4809
4810 if (cbMask <= sizeof(uint32_t))
4811 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxVarReg, uMask);
4812 else
4813 off = iemNativeEmitOrGprByImm(pReNative, off, idxVarReg, uMask);
4814
4815 iemNativeVarRegisterRelease(pReNative, idxVar);
4816 return off;
4817}
4818
4819
4820#define IEM_MC_BSWAP_LOCAL_U16(a_u16Local) \
4821 off = iemNativeEmitBswapLocal(pReNative, off, a_u16Local, sizeof(uint16_t))
4822
4823#define IEM_MC_BSWAP_LOCAL_U32(a_u32Local) \
4824 off = iemNativeEmitBswapLocal(pReNative, off, a_u32Local, sizeof(uint32_t))
4825
4826#define IEM_MC_BSWAP_LOCAL_U64(a_u64Local) \
4827 off = iemNativeEmitBswapLocal(pReNative, off, a_u64Local, sizeof(uint64_t))
4828
4829/** Emits code for reversing the byte order in a local value. */
4830DECL_INLINE_THROW(uint32_t)
4831iemNativeEmitBswapLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal)
4832{
4833 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
4834 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
4835
4836 switch (cbLocal)
4837 {
4838 case sizeof(uint16_t): off = iemNativeEmitBswapGpr16(pReNative, off, idxVarReg); break;
4839 case sizeof(uint32_t): off = iemNativeEmitBswapGpr32(pReNative, off, idxVarReg); break;
4840 case sizeof(uint64_t): off = iemNativeEmitBswapGpr(pReNative, off, idxVarReg); break;
4841 default: AssertFailedBreak();
4842 }
4843
4844 iemNativeVarRegisterRelease(pReNative, idxVar);
4845 return off;
4846}
4847
4848
4849#define IEM_MC_SHL_LOCAL_S16(a_i16Local, a_cShift) \
4850 off = iemNativeEmitShlLocal(pReNative, off, a_i16Local, sizeof(int16_t), a_cShift)
4851
4852#define IEM_MC_SHL_LOCAL_S32(a_i32Local, a_cShift) \
4853 off = iemNativeEmitShlLocal(pReNative, off, a_i32Local, sizeof(int32_t), a_cShift)
4854
4855#define IEM_MC_SHL_LOCAL_S64(a_i64Local, a_cShift) \
4856 off = iemNativeEmitShlLocal(pReNative, off, a_i64Local, sizeof(int64_t), a_cShift)
4857
4858/** Emits code for shifting left a local value. */
4859DECL_INLINE_THROW(uint32_t)
4860iemNativeEmitShlLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal, uint8_t cShift)
4861{
4862#ifdef VBOX_STRICT
4863 switch (cbLocal)
4864 {
4865 case sizeof(uint8_t): Assert(cShift < 8); break;
4866 case sizeof(uint16_t): Assert(cShift < 16); break;
4867 case sizeof(uint32_t): Assert(cShift < 32); break;
4868 case sizeof(uint64_t): Assert(cShift < 64); break;
4869 default: AssertFailedBreak();
4870 }
4871#endif
4872
4873 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
4874 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
4875
4876 if (cbLocal <= sizeof(uint32_t))
4877 {
4878 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxVarReg, cShift);
4879 if (cbLocal < sizeof(uint32_t))
4880 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg,
4881 cbLocal == sizeof(uint16_t)
4882 ? UINT32_C(0xffff)
4883 : UINT32_C(0xff));
4884 }
4885 else
4886 off = iemNativeEmitShiftGprLeft(pReNative, off, idxVarReg, cShift);
4887
4888 iemNativeVarRegisterRelease(pReNative, idxVar);
4889 return off;
4890}
4891
4892
4893#define IEM_MC_SAR_LOCAL_S16(a_i16Local, a_cShift) \
4894 off = iemNativeEmitSarLocal(pReNative, off, a_i16Local, sizeof(int16_t), a_cShift)
4895
4896#define IEM_MC_SAR_LOCAL_S32(a_i32Local, a_cShift) \
4897 off = iemNativeEmitSarLocal(pReNative, off, a_i32Local, sizeof(int32_t), a_cShift)
4898
4899#define IEM_MC_SAR_LOCAL_S64(a_i64Local, a_cShift) \
4900 off = iemNativeEmitSarLocal(pReNative, off, a_i64Local, sizeof(int64_t), a_cShift)
4901
4902/** Emits code for shifting left a local value. */
4903DECL_INLINE_THROW(uint32_t)
4904iemNativeEmitSarLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal, uint8_t cShift)
4905{
4906#ifdef VBOX_STRICT
4907 switch (cbLocal)
4908 {
4909 case sizeof(int8_t): Assert(cShift < 8); break;
4910 case sizeof(int16_t): Assert(cShift < 16); break;
4911 case sizeof(int32_t): Assert(cShift < 32); break;
4912 case sizeof(int64_t): Assert(cShift < 64); break;
4913 default: AssertFailedBreak();
4914 }
4915#endif
4916
4917 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
4918 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
4919
4920 /* Need to sign extend the value first to make sure the sign is correct in the following arithmetic shift. */
4921 if (cbLocal == sizeof(uint8_t))
4922 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
4923 else if (cbLocal == sizeof(uint16_t))
4924 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxVarReg);
4925
4926 if (cbLocal <= sizeof(uint32_t))
4927 off = iemNativeEmitArithShiftGpr32Right(pReNative, off, idxVarReg, cShift);
4928 else
4929 off = iemNativeEmitArithShiftGprRight(pReNative, off, idxVarReg, cShift);
4930
4931 iemNativeVarRegisterRelease(pReNative, idxVar);
4932 return off;
4933}
4934
4935
4936#define IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR(a_EffAddr, a_i16) \
4937 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i16, sizeof(int16_t))
4938
4939#define IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR(a_EffAddr, a_i32) \
4940 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i32, sizeof(int32_t))
4941
4942#define IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR(a_EffAddr, a_i64) \
4943 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i64, sizeof(int64_t))
4944
4945/** Emits code for IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR/IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR/IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR. */
4946DECL_INLINE_THROW(uint32_t)
4947iemNativeEmitAddLocalToEffAddr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEffAddr, uint8_t idxVar, uint8_t cbLocal)
4948{
4949 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEffAddr);
4950 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEffAddr, sizeof(RTGCPTR));
4951 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4952 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
4953
4954 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
4955 uint8_t const idxVarRegEffAddr = iemNativeVarRegisterAcquire(pReNative, idxVarEffAddr, &off, true /*fInitialized*/);
4956
4957 /* Need to sign extend the value. */
4958 if (cbLocal <= sizeof(uint32_t))
4959 {
4960/** @todo ARM64: In case of boredone, the extended add instruction can do the
4961 * conversion directly: ADD idxVarRegEffAddr, idxVarRegEffAddr, [w]idxVarReg, SXTH/SXTW */
4962 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
4963
4964 switch (cbLocal)
4965 {
4966 case sizeof(int16_t): off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxRegTmp, idxVarReg); break;
4967 case sizeof(int32_t): off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxRegTmp, idxVarReg); break;
4968 default: AssertFailed();
4969 }
4970
4971 off = iemNativeEmitAddTwoGprs(pReNative, off, idxVarRegEffAddr, idxRegTmp);
4972 iemNativeRegFreeTmp(pReNative, idxRegTmp);
4973 }
4974 else
4975 off = iemNativeEmitAddTwoGprs(pReNative, off, idxVarRegEffAddr, idxVarReg);
4976
4977 iemNativeVarRegisterRelease(pReNative, idxVarEffAddr);
4978 iemNativeVarRegisterRelease(pReNative, idxVar);
4979 return off;
4980}
4981
4982
4983
4984/*********************************************************************************************************************************
4985* EFLAGS *
4986*********************************************************************************************************************************/
4987
4988#if !defined(VBOX_WITH_STATISTICS) || !defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
4989# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) ((void)0)
4990#else
4991# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) \
4992 iemNativeEFlagsOptimizationStats(pReNative, a_fEflInput, a_fEflOutput)
4993
4994DECLINLINE(void) iemNativeEFlagsOptimizationStats(PIEMRECOMPILERSTATE pReNative, uint32_t fEflInput, uint32_t fEflOutput)
4995{
4996 if (fEflOutput)
4997 {
4998 PVMCPUCC const pVCpu = pReNative->pVCpu;
4999# ifndef IEMLIVENESS_EXTENDED_LAYOUT
5000 IEMLIVENESSBIT const LivenessBit0 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit0;
5001 IEMLIVENESSBIT const LivenessBit1 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit1;
5002 AssertCompile(IEMLIVENESS_STATE_CLOBBERED == 0);
5003# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
5004 if (fEflOutput & (a_fEfl)) \
5005 { \
5006 if (LivenessBit0.a_fLivenessMember | LivenessBit1.a_fLivenessMember) \
5007 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
5008 else \
5009 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
5010 } else do { } while (0)
5011# else
5012 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall];
5013 IEMLIVENESSBIT const LivenessClobbered =
5014 {
5015 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
5016 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
5017 | pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
5018 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
5019 };
5020 IEMLIVENESSBIT const LivenessDelayable =
5021 {
5022 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
5023 & pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
5024 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
5025 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
5026 };
5027# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
5028 if (fEflOutput & (a_fEfl)) \
5029 { \
5030 if (LivenessClobbered.a_fLivenessMember) \
5031 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
5032 else if (LivenessDelayable.a_fLivenessMember) \
5033 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Delayable); \
5034 else \
5035 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
5036 } else do { } while (0)
5037# endif
5038 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_CF, fEflCf, StatNativeLivenessEflCf);
5039 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_PF, fEflPf, StatNativeLivenessEflPf);
5040 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_AF, fEflAf, StatNativeLivenessEflAf);
5041 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_ZF, fEflZf, StatNativeLivenessEflZf);
5042 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_SF, fEflSf, StatNativeLivenessEflSf);
5043 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_OF, fEflOf, StatNativeLivenessEflOf);
5044 //CHECK_FLAG_AND_UPDATE_STATS(~X86_EFL_STATUS_BITS, fEflOther, StatNativeLivenessEflOther);
5045# undef CHECK_FLAG_AND_UPDATE_STATS
5046 }
5047 RT_NOREF(fEflInput);
5048}
5049#endif /* VBOX_WITH_STATISTICS */
5050
5051#undef IEM_MC_FETCH_EFLAGS /* should not be used */
5052#define IEM_MC_FETCH_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5053 off = iemNativeEmitFetchEFlags(pReNative, off, a_EFlags, a_fEflInput, a_fEflOutput)
5054
5055/** Handles IEM_MC_FETCH_EFLAGS_EX. */
5056DECL_INLINE_THROW(uint32_t)
5057iemNativeEmitFetchEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags,
5058 uint32_t fEflInput, uint32_t fEflOutput)
5059{
5060 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
5061 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
5062 RT_NOREF(fEflInput, fEflOutput);
5063
5064#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
5065# ifdef VBOX_STRICT
5066 if ( pReNative->idxCurCall != 0
5067 && (fEflInput != 0 || fEflOutput != 0) /* for NOT these are both zero for now. */)
5068 {
5069 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall - 1];
5070 uint32_t const fBoth = fEflInput | fEflOutput;
5071# define ASSERT_ONE_EFL(a_fElfConst, a_idxField) \
5072 AssertMsg( !(fBoth & (a_fElfConst)) \
5073 || (!(fEflInput & (a_fElfConst)) \
5074 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
5075 : !(fEflOutput & (a_fElfConst)) \
5076 ? IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
5077 : IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) ), \
5078 ("%s - %u\n", #a_fElfConst, iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)))
5079 ASSERT_ONE_EFL(~(uint32_t)X86_EFL_STATUS_BITS, IEMLIVENESSBIT_IDX_EFL_OTHER);
5080 ASSERT_ONE_EFL(X86_EFL_CF, IEMLIVENESSBIT_IDX_EFL_CF);
5081 ASSERT_ONE_EFL(X86_EFL_PF, IEMLIVENESSBIT_IDX_EFL_PF);
5082 ASSERT_ONE_EFL(X86_EFL_AF, IEMLIVENESSBIT_IDX_EFL_AF);
5083 ASSERT_ONE_EFL(X86_EFL_ZF, IEMLIVENESSBIT_IDX_EFL_ZF);
5084 ASSERT_ONE_EFL(X86_EFL_SF, IEMLIVENESSBIT_IDX_EFL_SF);
5085 ASSERT_ONE_EFL(X86_EFL_OF, IEMLIVENESSBIT_IDX_EFL_OF);
5086# undef ASSERT_ONE_EFL
5087 }
5088# endif
5089#endif
5090
5091 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fEflInput);
5092
5093 /** @todo this is suboptimial. EFLAGS is probably shadowed and we should use
5094 * the existing shadow copy. */
5095 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, false /*fInitialized*/);
5096 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
5097 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
5098 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
5099 return off;
5100}
5101
5102
5103
5104/** @todo emit strict build assertions for IEM_MC_COMMIT_EFLAGS_EX when we
5105 * start using it with custom native code emission (inlining assembly
5106 * instruction helpers). */
5107#undef IEM_MC_COMMIT_EFLAGS /* should not be used */
5108#define IEM_MC_COMMIT_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5109 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
5110 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags, a_fEflOutput, true /*fUpdateSkipping*/)
5111
5112#undef IEM_MC_COMMIT_EFLAGS_OPT /* should not be used */
5113#define IEM_MC_COMMIT_EFLAGS_OPT_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5114 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
5115 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags, a_fEflOutput, false /*fUpdateSkipping*/)
5116
5117/** Handles IEM_MC_COMMIT_EFLAGS_EX. */
5118DECL_INLINE_THROW(uint32_t)
5119iemNativeEmitCommitEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags, uint32_t fEflOutput,
5120 bool fUpdateSkipping)
5121{
5122 RT_NOREF(fEflOutput);
5123 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, true /*fInitialized*/);
5124 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
5125
5126#ifdef VBOX_STRICT
5127 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RA1_MASK);
5128 uint32_t offFixup = off;
5129 off = iemNativeEmitJnzToFixed(pReNative, off, off);
5130 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2001));
5131 iemNativeFixupFixedJump(pReNative, offFixup, off);
5132
5133 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RAZ_MASK & CPUMX86EFLAGS_HW_MASK_32);
5134 offFixup = off;
5135 off = iemNativeEmitJzToFixed(pReNative, off, off);
5136 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2002));
5137 iemNativeFixupFixedJump(pReNative, offFixup, off);
5138
5139 /** @todo validate that only bits in the fElfOutput mask changed. */
5140#endif
5141
5142#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
5143 if (fUpdateSkipping)
5144 {
5145 if ((fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
5146 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
5147 else
5148 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~(fEflOutput & X86_EFL_STATUS_BITS),
5149 RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
5150 }
5151#else
5152 RT_NOREF_PV(fUpdateSkipping);
5153#endif
5154
5155 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
5156 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF_DYN(VMCPUCC, cpum.GstCtx.eflags));
5157 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
5158 return off;
5159}
5160
5161
5162typedef enum IEMNATIVEMITEFLOP
5163{
5164 kIemNativeEmitEflOp_Invalid = 0,
5165 kIemNativeEmitEflOp_Set,
5166 kIemNativeEmitEflOp_Clear,
5167 kIemNativeEmitEflOp_Flip
5168} IEMNATIVEMITEFLOP;
5169
5170#define IEM_MC_SET_EFL_BIT(a_fBit) \
5171 off = iemNativeEmitModifyEFlagsBit(pReNative, off, a_fBit, kIemNativeEmitEflOp_Set);
5172
5173#define IEM_MC_CLEAR_EFL_BIT(a_fBit) \
5174 off = iemNativeEmitModifyEFlagsBit(pReNative, off, a_fBit, kIemNativeEmitEflOp_Clear);
5175
5176#define IEM_MC_FLIP_EFL_BIT(a_fBit) \
5177 off = iemNativeEmitModifyEFlagsBit(pReNative, off, a_fBit, kIemNativeEmitEflOp_Flip);
5178
5179/** Handles IEM_MC_SET_EFL_BIT/IEM_MC_CLEAR_EFL_BIT/IEM_MC_FLIP_EFL_BIT. */
5180DECL_INLINE_THROW(uint32_t)
5181iemNativeEmitModifyEFlagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fEflBit, IEMNATIVEMITEFLOP enmOp)
5182{
5183 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
5184 kIemNativeGstRegUse_ForUpdate, false /*fNoVolatileRegs*/);
5185
5186 switch (enmOp)
5187 {
5188 case kIemNativeEmitEflOp_Set:
5189 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxEflReg, fEflBit);
5190 break;
5191 case kIemNativeEmitEflOp_Clear:
5192 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~fEflBit);
5193 break;
5194 case kIemNativeEmitEflOp_Flip:
5195 off = iemNativeEmitXorGpr32ByImm(pReNative, off, idxEflReg, fEflBit);
5196 break;
5197 default:
5198 AssertFailed();
5199 break;
5200 }
5201
5202 /** @todo No delayed writeback for EFLAGS right now. */
5203 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
5204
5205 /* Free but don't flush the EFLAGS register. */
5206 iemNativeRegFreeTmp(pReNative, idxEflReg);
5207
5208 return off;
5209}
5210
5211
5212/*********************************************************************************************************************************
5213* Emitters for segment register fetches (IEM_MC_FETCH_SREG_XXX).
5214*********************************************************************************************************************************/
5215
5216#define IEM_MC_FETCH_SREG_U16(a_u16Dst, a_iSReg) \
5217 off = iemNativeEmitFetchSReg(pReNative, off, a_u16Dst, a_iSReg, sizeof(uint16_t))
5218
5219#define IEM_MC_FETCH_SREG_ZX_U32(a_u32Dst, a_iSReg) \
5220 off = iemNativeEmitFetchSReg(pReNative, off, a_u32Dst, a_iSReg, sizeof(uint32_t))
5221
5222#define IEM_MC_FETCH_SREG_ZX_U64(a_u64Dst, a_iSReg) \
5223 off = iemNativeEmitFetchSReg(pReNative, off, a_u64Dst, a_iSReg, sizeof(uint64_t))
5224
5225
5226/** Emits code for IEM_MC_FETCH_SREG_U16, IEM_MC_FETCH_SREG_ZX_U32 and
5227 * IEM_MC_FETCH_SREG_ZX_U64. */
5228DECL_INLINE_THROW(uint32_t)
5229iemNativeEmitFetchSReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iSReg, int8_t cbVar)
5230{
5231 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
5232 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbVar); RT_NOREF(cbVar);
5233 Assert(iSReg < X86_SREG_COUNT);
5234
5235 /*
5236 * For now, we will not create a shadow copy of a selector. The rational
5237 * is that since we do not recompile the popping and loading of segment
5238 * registers and that the the IEM_MC_FETCH_SREG_U* MCs are only used for
5239 * pushing and moving to registers, there is only a small chance that the
5240 * shadow copy will be accessed again before the register is reloaded. One
5241 * scenario would be nested called in 16-bit code, but I doubt it's worth
5242 * the extra register pressure atm.
5243 *
5244 * What we really need first, though, is to combine iemNativeRegAllocTmpForGuestReg
5245 * and iemNativeVarRegisterAcquire for a load scenario. We only got the
5246 * store scencario covered at present (r160730).
5247 */
5248 iemNativeVarSetKindToStack(pReNative, idxDstVar);
5249 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
5250 off = iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aSRegs[iSReg].Sel));
5251 iemNativeVarRegisterRelease(pReNative, idxDstVar);
5252 return off;
5253}
5254
5255
5256
5257/*********************************************************************************************************************************
5258* Register references. *
5259*********************************************************************************************************************************/
5260
5261#define IEM_MC_REF_GREG_U8_THREADED(a_pu8Dst, a_iGRegEx) \
5262 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, false /*fConst*/)
5263
5264#define IEM_MC_REF_GREG_U8_CONST_THREADED(a_pu8Dst, a_iGRegEx) \
5265 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, true /*fConst*/)
5266
5267/** Handles IEM_MC_REF_GREG_U8[_CONST]. */
5268DECL_INLINE_THROW(uint32_t)
5269iemNativeEmitRefGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGRegEx, bool fConst)
5270{
5271 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
5272 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
5273 Assert(iGRegEx < 20);
5274
5275 if (iGRegEx < 16)
5276 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
5277 else
5278 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_GprHighByte, iGRegEx & 15);
5279
5280 /* If we've delayed writing back the register value, flush it now. */
5281 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
5282
5283 /* If it's not a const reference we need to flush the shadow copy of the register now. */
5284 if (!fConst)
5285 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGRegEx & 15)));
5286
5287 return off;
5288}
5289
5290#define IEM_MC_REF_GREG_U16(a_pu16Dst, a_iGReg) \
5291 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, false /*fConst*/)
5292
5293#define IEM_MC_REF_GREG_U16_CONST(a_pu16Dst, a_iGReg) \
5294 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, true /*fConst*/)
5295
5296#define IEM_MC_REF_GREG_U32(a_pu32Dst, a_iGReg) \
5297 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, false /*fConst*/)
5298
5299#define IEM_MC_REF_GREG_U32_CONST(a_pu32Dst, a_iGReg) \
5300 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, true /*fConst*/)
5301
5302#define IEM_MC_REF_GREG_I32(a_pi32Dst, a_iGReg) \
5303 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, false /*fConst*/)
5304
5305#define IEM_MC_REF_GREG_I32_CONST(a_pi32Dst, a_iGReg) \
5306 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, true /*fConst*/)
5307
5308#define IEM_MC_REF_GREG_U64(a_pu64Dst, a_iGReg) \
5309 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, false /*fConst*/)
5310
5311#define IEM_MC_REF_GREG_U64_CONST(a_pu64Dst, a_iGReg) \
5312 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, true /*fConst*/)
5313
5314#define IEM_MC_REF_GREG_I64(a_pi64Dst, a_iGReg) \
5315 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, false /*fConst*/)
5316
5317#define IEM_MC_REF_GREG_I64_CONST(a_pi64Dst, a_iGReg) \
5318 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, true /*fConst*/)
5319
5320/** Handles IEM_MC_REF_GREG_Uxx[_CONST] and IEM_MC_REF_GREG_Ixx[_CONST]. */
5321DECL_INLINE_THROW(uint32_t)
5322iemNativeEmitRefGregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGReg, bool fConst)
5323{
5324 Assert(iGReg < 16);
5325 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGReg);
5326 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
5327
5328 /* If we've delayed writing back the register value, flush it now. */
5329 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGReg);
5330
5331 /* If it's not a const reference we need to flush the shadow copy of the register now. */
5332 if (!fConst)
5333 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGReg)));
5334
5335 return off;
5336}
5337
5338
5339#undef IEM_MC_REF_EFLAGS /* should not be used. */
5340#define IEM_MC_REF_EFLAGS_EX(a_pEFlags, a_fEflInput, a_fEflOutput) \
5341 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
5342 off = iemNativeEmitRefEFlags(pReNative, off, a_pEFlags, a_fEflInput, a_fEflOutput)
5343
5344/** Handles IEM_MC_REF_EFLAGS. */
5345DECL_INLINE_THROW(uint32_t)
5346iemNativeEmitRefEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint32_t fEflInput, uint32_t fEflOutput)
5347{
5348 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_EFlags, 0);
5349 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
5350
5351#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
5352 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fEflInput);
5353
5354 /* Updating the skipping according to the outputs is a little early, but
5355 we don't have any other hooks for references atm. */
5356 if ((fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
5357 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
5358 else if (fEflOutput & X86_EFL_STATUS_BITS)
5359 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~(fEflOutput & X86_EFL_STATUS_BITS),
5360 RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
5361#else
5362 RT_NOREF(fEflInput, fEflOutput);
5363#endif
5364
5365 /* If we've delayed writing back the register value, flush it now. */
5366 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_EFlags, 0);
5367
5368 /* If there is a shadow copy of guest EFLAGS, flush it now. */
5369 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_EFlags));
5370
5371 return off;
5372}
5373
5374
5375/** @todo Emit code for IEM_MC_ASSERT_EFLAGS in strict builds? Once we emit
5376 * different code from threaded recompiler, maybe it would be helpful. For now
5377 * we assume the threaded recompiler catches any incorrect EFLAGS delcarations. */
5378#define IEM_MC_ASSERT_EFLAGS(a_fEflInput, a_fEflOutput) ((void)0)
5379
5380
5381#define IEM_MC_REF_XREG_U128(a_pu128Dst, a_iXReg) \
5382 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, false /*fConst*/)
5383
5384#define IEM_MC_REF_XREG_XMM(a_puXmmDst, a_iXReg) \
5385 off = iemNativeEmitRefXregXxx(pReNative, off, a_puXmmDst, a_iXReg, false /*fConst*/)
5386
5387#define IEM_MC_REF_XREG_U128_CONST(a_pu128Dst, a_iXReg) \
5388 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, true /*fConst*/)
5389
5390#define IEM_MC_REF_XREG_XMM_CONST(a_pXmmDst, a_iXReg) \
5391 off = iemNativeEmitRefXregXxx(pReNative, off, a_pXmmDst, a_iXReg, true /*fConst*/)
5392
5393#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5394/* Just being paranoid here. */
5395# ifndef _MSC_VER /* MSC can't compile this, doesn't like [0]. Added reduced version afterwards. */
5396AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].au64[0]);
5397AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].au32[0]);
5398AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].ar64[0]);
5399AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].ar32[0]);
5400# endif
5401AssertCompileMemberOffset(X86XMMREG, au64, 0);
5402AssertCompileMemberOffset(X86XMMREG, au32, 0);
5403AssertCompileMemberOffset(X86XMMREG, ar64, 0);
5404AssertCompileMemberOffset(X86XMMREG, ar32, 0);
5405
5406# define IEM_MC_REF_XREG_U32_CONST(a_pu32Dst, a_iXReg) \
5407 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu32Dst, a_iXReg, true /*fConst*/)
5408# define IEM_MC_REF_XREG_U64_CONST(a_pu64Dst, a_iXReg) \
5409 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu64Dst, a_iXReg, true /*fConst*/)
5410# define IEM_MC_REF_XREG_R32_CONST(a_pr32Dst, a_iXReg) \
5411 off = iemNativeEmitRefXregXxx(pReNative, off, a_pr32Dst, a_iXReg, true /*fConst*/)
5412# define IEM_MC_REF_XREG_R64_CONST(a_pr64Dst, a_iXReg) \
5413 off = iemNativeEmitRefXregXxx(pReNative, off, a_pr64Dst, a_iXReg, true /*fConst*/)
5414#endif
5415
5416/** Handles IEM_MC_REF_XREG_xxx[_CONST]. */
5417DECL_INLINE_THROW(uint32_t)
5418iemNativeEmitRefXregXxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iXReg, bool fConst)
5419{
5420 Assert(iXReg < 16);
5421 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_XReg, iXReg);
5422 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
5423
5424 /* If we've delayed writing back the register value, flush it now. */
5425 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_XReg, iXReg);
5426
5427#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5428 /* If it's not a const reference we need to flush the shadow copy of the register now. */
5429 if (!fConst)
5430 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(iXReg)));
5431#else
5432 RT_NOREF(fConst);
5433#endif
5434
5435 return off;
5436}
5437
5438
5439
5440/*********************************************************************************************************************************
5441* Effective Address Calculation *
5442*********************************************************************************************************************************/
5443#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_16(a_GCPtrEff, a_bRm, a_u16Disp) \
5444 off = iemNativeEmitCalcRmEffAddrThreadedAddr16(pReNative, off, a_bRm, a_u16Disp, a_GCPtrEff)
5445
5446/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_16.
5447 * @sa iemOpHlpCalcRmEffAddrThreadedAddr16 */
5448DECL_INLINE_THROW(uint32_t)
5449iemNativeEmitCalcRmEffAddrThreadedAddr16(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5450 uint8_t bRm, uint16_t u16Disp, uint8_t idxVarRet)
5451{
5452 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
5453
5454 /*
5455 * Handle the disp16 form with no registers first.
5456 *
5457 * Convert to an immediate value, as that'll delay the register allocation
5458 * and assignment till the memory access / call / whatever and we can use
5459 * a more appropriate register (or none at all).
5460 */
5461 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
5462 {
5463 iemNativeVarSetKindToConst(pReNative, idxVarRet, u16Disp);
5464 return off;
5465 }
5466
5467 /* Determin the displacment. */
5468 uint16_t u16EffAddr;
5469 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
5470 {
5471 case 0: u16EffAddr = 0; break;
5472 case 1: u16EffAddr = (int16_t)(int8_t)u16Disp; break;
5473 case 2: u16EffAddr = u16Disp; break;
5474 default: AssertFailedStmt(u16EffAddr = 0);
5475 }
5476
5477 /* Determine the registers involved. */
5478 uint8_t idxGstRegBase;
5479 uint8_t idxGstRegIndex;
5480 switch (bRm & X86_MODRM_RM_MASK)
5481 {
5482 case 0:
5483 idxGstRegBase = X86_GREG_xBX;
5484 idxGstRegIndex = X86_GREG_xSI;
5485 break;
5486 case 1:
5487 idxGstRegBase = X86_GREG_xBX;
5488 idxGstRegIndex = X86_GREG_xDI;
5489 break;
5490 case 2:
5491 idxGstRegBase = X86_GREG_xBP;
5492 idxGstRegIndex = X86_GREG_xSI;
5493 break;
5494 case 3:
5495 idxGstRegBase = X86_GREG_xBP;
5496 idxGstRegIndex = X86_GREG_xDI;
5497 break;
5498 case 4:
5499 idxGstRegBase = X86_GREG_xSI;
5500 idxGstRegIndex = UINT8_MAX;
5501 break;
5502 case 5:
5503 idxGstRegBase = X86_GREG_xDI;
5504 idxGstRegIndex = UINT8_MAX;
5505 break;
5506 case 6:
5507 idxGstRegBase = X86_GREG_xBP;
5508 idxGstRegIndex = UINT8_MAX;
5509 break;
5510#ifdef _MSC_VER /* lazy compiler, thinks idxGstRegBase and idxGstRegIndex may otherwise be used uninitialized. */
5511 default:
5512#endif
5513 case 7:
5514 idxGstRegBase = X86_GREG_xBX;
5515 idxGstRegIndex = UINT8_MAX;
5516 break;
5517 }
5518
5519 /*
5520 * Now emit code that calculates: idxRegRet = (uint16_t)(u16EffAddr + idxGstRegBase [+ idxGstRegIndex])
5521 */
5522 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
5523 uint8_t const idxRegBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
5524 kIemNativeGstRegUse_ReadOnly);
5525 uint8_t const idxRegIndex = idxGstRegIndex != UINT8_MAX
5526 ? iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
5527 kIemNativeGstRegUse_ReadOnly)
5528 : UINT8_MAX;
5529#ifdef RT_ARCH_AMD64
5530 if (idxRegIndex == UINT8_MAX)
5531 {
5532 if (u16EffAddr == 0)
5533 {
5534 /* movxz ret, base */
5535 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegRet, idxRegBase);
5536 }
5537 else
5538 {
5539 /* lea ret32, [base64 + disp32] */
5540 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
5541 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
5542 if (idxRegRet >= 8 || idxRegBase >= 8)
5543 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
5544 pbCodeBuf[off++] = 0x8d;
5545 if (idxRegBase != X86_GREG_x12 /*SIB*/)
5546 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, idxRegBase & 7);
5547 else
5548 {
5549 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, 4 /*SIB*/);
5550 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
5551 }
5552 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
5553 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
5554 pbCodeBuf[off++] = 0;
5555 pbCodeBuf[off++] = 0;
5556 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5557
5558 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
5559 }
5560 }
5561 else
5562 {
5563 /* lea ret32, [index64 + base64 (+ disp32)] */
5564 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
5565 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
5566 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
5567 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
5568 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
5569 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
5570 pbCodeBuf[off++] = 0x8d;
5571 uint8_t const bMod = u16EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0 : X86_MOD_MEM4;
5572 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
5573 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, 0);
5574 if (bMod == X86_MOD_MEM4)
5575 {
5576 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
5577 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
5578 pbCodeBuf[off++] = 0;
5579 pbCodeBuf[off++] = 0;
5580 }
5581 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5582 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
5583 }
5584
5585#elif defined(RT_ARCH_ARM64)
5586 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
5587 if (u16EffAddr == 0)
5588 {
5589 if (idxRegIndex == UINT8_MAX)
5590 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegBase);
5591 else
5592 {
5593 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex, false /*f64Bit*/);
5594 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
5595 }
5596 }
5597 else
5598 {
5599 if ((int16_t)u16EffAddr < 4096 && (int16_t)u16EffAddr >= 0)
5600 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u16EffAddr, false /*f64Bit*/);
5601 else if ((int16_t)u16EffAddr > -4096 && (int16_t)u16EffAddr < 0)
5602 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
5603 (uint16_t)-(int16_t)u16EffAddr, false /*f64Bit*/);
5604 else
5605 {
5606 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegRet, u16EffAddr);
5607 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
5608 }
5609 if (idxRegIndex != UINT8_MAX)
5610 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex, false /*f64Bit*/);
5611 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
5612 }
5613
5614#else
5615# error "port me"
5616#endif
5617
5618 if (idxRegIndex != UINT8_MAX)
5619 iemNativeRegFreeTmp(pReNative, idxRegIndex);
5620 iemNativeRegFreeTmp(pReNative, idxRegBase);
5621 iemNativeVarRegisterRelease(pReNative, idxVarRet);
5622 return off;
5623}
5624
5625
5626#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_32(a_GCPtrEff, a_bRm, a_uSibAndRspOffset, a_u32Disp) \
5627 off = iemNativeEmitCalcRmEffAddrThreadedAddr32(pReNative, off, a_bRm, a_uSibAndRspOffset, a_u32Disp, a_GCPtrEff)
5628
5629/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_32.
5630 * @see iemOpHlpCalcRmEffAddrThreadedAddr32 */
5631DECL_INLINE_THROW(uint32_t)
5632iemNativeEmitCalcRmEffAddrThreadedAddr32(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5633 uint8_t bRm, uint32_t uSibAndRspOffset, uint32_t u32Disp, uint8_t idxVarRet)
5634{
5635 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
5636
5637 /*
5638 * Handle the disp32 form with no registers first.
5639 *
5640 * Convert to an immediate value, as that'll delay the register allocation
5641 * and assignment till the memory access / call / whatever and we can use
5642 * a more appropriate register (or none at all).
5643 */
5644 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
5645 {
5646 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32Disp);
5647 return off;
5648 }
5649
5650 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
5651 uint32_t u32EffAddr = 0;
5652 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
5653 {
5654 case 0: break;
5655 case 1: u32EffAddr = (int8_t)u32Disp; break;
5656 case 2: u32EffAddr = u32Disp; break;
5657 default: AssertFailed();
5658 }
5659
5660 /* Get the register (or SIB) value. */
5661 uint8_t idxGstRegBase = UINT8_MAX;
5662 uint8_t idxGstRegIndex = UINT8_MAX;
5663 uint8_t cShiftIndex = 0;
5664 switch (bRm & X86_MODRM_RM_MASK)
5665 {
5666 case 0: idxGstRegBase = X86_GREG_xAX; break;
5667 case 1: idxGstRegBase = X86_GREG_xCX; break;
5668 case 2: idxGstRegBase = X86_GREG_xDX; break;
5669 case 3: idxGstRegBase = X86_GREG_xBX; break;
5670 case 4: /* SIB */
5671 {
5672 /* index /w scaling . */
5673 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
5674 switch ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
5675 {
5676 case 0: idxGstRegIndex = X86_GREG_xAX; break;
5677 case 1: idxGstRegIndex = X86_GREG_xCX; break;
5678 case 2: idxGstRegIndex = X86_GREG_xDX; break;
5679 case 3: idxGstRegIndex = X86_GREG_xBX; break;
5680 case 4: cShiftIndex = 0; /*no index*/ break;
5681 case 5: idxGstRegIndex = X86_GREG_xBP; break;
5682 case 6: idxGstRegIndex = X86_GREG_xSI; break;
5683 case 7: idxGstRegIndex = X86_GREG_xDI; break;
5684 }
5685
5686 /* base */
5687 switch (uSibAndRspOffset & X86_SIB_BASE_MASK)
5688 {
5689 case 0: idxGstRegBase = X86_GREG_xAX; break;
5690 case 1: idxGstRegBase = X86_GREG_xCX; break;
5691 case 2: idxGstRegBase = X86_GREG_xDX; break;
5692 case 3: idxGstRegBase = X86_GREG_xBX; break;
5693 case 4:
5694 idxGstRegBase = X86_GREG_xSP;
5695 u32EffAddr += uSibAndRspOffset >> 8;
5696 break;
5697 case 5:
5698 if ((bRm & X86_MODRM_MOD_MASK) != 0)
5699 idxGstRegBase = X86_GREG_xBP;
5700 else
5701 {
5702 Assert(u32EffAddr == 0);
5703 u32EffAddr = u32Disp;
5704 }
5705 break;
5706 case 6: idxGstRegBase = X86_GREG_xSI; break;
5707 case 7: idxGstRegBase = X86_GREG_xDI; break;
5708 }
5709 break;
5710 }
5711 case 5: idxGstRegBase = X86_GREG_xBP; break;
5712 case 6: idxGstRegBase = X86_GREG_xSI; break;
5713 case 7: idxGstRegBase = X86_GREG_xDI; break;
5714 }
5715
5716 /*
5717 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
5718 * the start of the function.
5719 */
5720 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
5721 {
5722 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32EffAddr);
5723 return off;
5724 }
5725
5726 /*
5727 * Now emit code that calculates: idxRegRet = (uint32_t)(u32EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
5728 */
5729 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
5730 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
5731 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
5732 kIemNativeGstRegUse_ReadOnly);
5733 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
5734 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
5735 kIemNativeGstRegUse_ReadOnly);
5736
5737 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
5738 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
5739 {
5740 idxRegBase = idxRegIndex;
5741 idxRegIndex = UINT8_MAX;
5742 }
5743
5744#ifdef RT_ARCH_AMD64
5745 if (idxRegIndex == UINT8_MAX)
5746 {
5747 if (u32EffAddr == 0)
5748 {
5749 /* mov ret, base */
5750 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
5751 }
5752 else
5753 {
5754 /* lea ret32, [base64 + disp32] */
5755 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
5756 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
5757 if (idxRegRet >= 8 || idxRegBase >= 8)
5758 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
5759 pbCodeBuf[off++] = 0x8d;
5760 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
5761 if (idxRegBase != X86_GREG_x12 /*SIB*/)
5762 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
5763 else
5764 {
5765 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
5766 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
5767 }
5768 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
5769 if (bMod == X86_MOD_MEM4)
5770 {
5771 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
5772 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
5773 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
5774 }
5775 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5776 }
5777 }
5778 else
5779 {
5780 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
5781 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
5782 if (idxRegBase == UINT8_MAX)
5783 {
5784 /* lea ret32, [(index64 << cShiftIndex) + disp32] */
5785 if (idxRegRet >= 8 || idxRegIndex >= 8)
5786 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
5787 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
5788 pbCodeBuf[off++] = 0x8d;
5789 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
5790 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
5791 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
5792 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
5793 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
5794 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
5795 }
5796 else
5797 {
5798 /* lea ret32, [(index64 << cShiftIndex) + base64 (+ disp32)] */
5799 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
5800 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
5801 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
5802 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
5803 pbCodeBuf[off++] = 0x8d;
5804 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
5805 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
5806 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
5807 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
5808 if (bMod != X86_MOD_MEM0)
5809 {
5810 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
5811 if (bMod == X86_MOD_MEM4)
5812 {
5813 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
5814 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
5815 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
5816 }
5817 }
5818 }
5819 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5820 }
5821
5822#elif defined(RT_ARCH_ARM64)
5823 if (u32EffAddr == 0)
5824 {
5825 if (idxRegIndex == UINT8_MAX)
5826 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
5827 else if (idxRegBase == UINT8_MAX)
5828 {
5829 if (cShiftIndex == 0)
5830 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegIndex);
5831 else
5832 {
5833 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5834 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, false /*f64Bit*/);
5835 }
5836 }
5837 else
5838 {
5839 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5840 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
5841 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
5842 }
5843 }
5844 else
5845 {
5846 if ((int32_t)u32EffAddr < 4096 && (int32_t)u32EffAddr >= 0 && idxRegBase != UINT8_MAX)
5847 {
5848 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5849 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u32EffAddr, false /*f64Bit*/);
5850 }
5851 else if ((int32_t)u32EffAddr > -4096 && (int32_t)u32EffAddr < 0 && idxRegBase != UINT8_MAX)
5852 {
5853 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5854 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
5855 (uint32_t)-(int32_t)u32EffAddr, false /*f64Bit*/);
5856 }
5857 else
5858 {
5859 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, u32EffAddr);
5860 if (idxRegBase != UINT8_MAX)
5861 {
5862 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5863 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
5864 }
5865 }
5866 if (idxRegIndex != UINT8_MAX)
5867 {
5868 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5869 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
5870 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
5871 }
5872 }
5873
5874#else
5875# error "port me"
5876#endif
5877
5878 if (idxRegIndex != UINT8_MAX)
5879 iemNativeRegFreeTmp(pReNative, idxRegIndex);
5880 if (idxRegBase != UINT8_MAX)
5881 iemNativeRegFreeTmp(pReNative, idxRegBase);
5882 iemNativeVarRegisterRelease(pReNative, idxVarRet);
5883 return off;
5884}
5885
5886
5887#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
5888 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
5889 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
5890
5891#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_FSGS(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
5892 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
5893 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
5894
5895#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_ADDR32(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
5896 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
5897 a_u32Disp, a_cbImm, a_GCPtrEff, false /*f64Bit*/)
5898
5899/**
5900 * Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_64*.
5901 *
5902 * @returns New off.
5903 * @param pReNative .
5904 * @param off .
5905 * @param bRmEx The ModRM byte but with bit 3 set to REX.B and
5906 * bit 4 to REX.X. The two bits are part of the
5907 * REG sub-field, which isn't needed in this
5908 * function.
5909 * @param uSibAndRspOffset Two parts:
5910 * - The first 8 bits make up the SIB byte.
5911 * - The next 8 bits are the fixed RSP/ESP offset
5912 * in case of a pop [xSP].
5913 * @param u32Disp The displacement byte/word/dword, if any.
5914 * @param cbInstr The size of the fully decoded instruction. Used
5915 * for RIP relative addressing.
5916 * @param idxVarRet The result variable number.
5917 * @param f64Bit Whether to use a 64-bit or 32-bit address size
5918 * when calculating the address.
5919 *
5920 * @see iemOpHlpCalcRmEffAddrThreadedAddr64
5921 */
5922DECL_INLINE_THROW(uint32_t)
5923iemNativeEmitCalcRmEffAddrThreadedAddr64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bRmEx, uint32_t uSibAndRspOffset,
5924 uint32_t u32Disp, uint8_t cbInstr, uint8_t idxVarRet, bool f64Bit)
5925{
5926 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
5927
5928 /*
5929 * Special case the rip + disp32 form first.
5930 */
5931 if ((bRmEx & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
5932 {
5933#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5934 /* Need to take the current PC offset into account for the displacement, no need to flush here
5935 * as the PC is only accessed readonly and there is no branching or calling helpers involved. */
5936 u32Disp += pReNative->Core.offPc;
5937#endif
5938
5939 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
5940 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
5941 kIemNativeGstRegUse_ReadOnly);
5942#ifdef RT_ARCH_AMD64
5943 if (f64Bit)
5944 {
5945 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr;
5946 if ((int32_t)offFinalDisp == offFinalDisp)
5947 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)offFinalDisp);
5948 else
5949 {
5950 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp);
5951 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, cbInstr);
5952 }
5953 }
5954 else
5955 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp + cbInstr);
5956
5957#elif defined(RT_ARCH_ARM64)
5958 if (f64Bit)
5959 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
5960 (int64_t)(int32_t)u32Disp + cbInstr);
5961 else
5962 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
5963 (int32_t)u32Disp + cbInstr);
5964
5965#else
5966# error "Port me!"
5967#endif
5968 iemNativeRegFreeTmp(pReNative, idxRegPc);
5969 iemNativeVarRegisterRelease(pReNative, idxVarRet);
5970 return off;
5971 }
5972
5973 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
5974 int64_t i64EffAddr = 0;
5975 switch ((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
5976 {
5977 case 0: break;
5978 case 1: i64EffAddr = (int8_t)u32Disp; break;
5979 case 2: i64EffAddr = (int32_t)u32Disp; break;
5980 default: AssertFailed();
5981 }
5982
5983 /* Get the register (or SIB) value. */
5984 uint8_t idxGstRegBase = UINT8_MAX;
5985 uint8_t idxGstRegIndex = UINT8_MAX;
5986 uint8_t cShiftIndex = 0;
5987 if ((bRmEx & X86_MODRM_RM_MASK) != 4)
5988 idxGstRegBase = bRmEx & (X86_MODRM_RM_MASK | 0x8); /* bRmEx[bit 3] = REX.B */
5989 else /* SIB: */
5990 {
5991 /* index /w scaling . */
5992 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
5993 idxGstRegIndex = ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
5994 | ((bRmEx & 0x10) >> 1); /* bRmEx[bit 4] = REX.X */
5995 if (idxGstRegIndex == 4)
5996 {
5997 /* no index */
5998 cShiftIndex = 0;
5999 idxGstRegIndex = UINT8_MAX;
6000 }
6001
6002 /* base */
6003 idxGstRegBase = (uSibAndRspOffset & X86_SIB_BASE_MASK) | (bRmEx & 0x8); /* bRmEx[bit 3] = REX.B */
6004 if (idxGstRegBase == 4)
6005 {
6006 /* pop [rsp] hack */
6007 i64EffAddr += uSibAndRspOffset >> 8; /* (this is why i64EffAddr must be 64-bit) */
6008 }
6009 else if ( (idxGstRegBase & X86_SIB_BASE_MASK) == 5
6010 && (bRmEx & X86_MODRM_MOD_MASK) == 0)
6011 {
6012 /* mod=0 and base=5 -> disp32, no base reg. */
6013 Assert(i64EffAddr == 0);
6014 i64EffAddr = (int32_t)u32Disp;
6015 idxGstRegBase = UINT8_MAX;
6016 }
6017 }
6018
6019 /*
6020 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
6021 * the start of the function.
6022 */
6023 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
6024 {
6025 if (f64Bit)
6026 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint64_t)i64EffAddr);
6027 else
6028 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint32_t)i64EffAddr);
6029 return off;
6030 }
6031
6032 /*
6033 * Now emit code that calculates:
6034 * idxRegRet = (uint64_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
6035 * or if !f64Bit:
6036 * idxRegRet = (uint32_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
6037 */
6038 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
6039 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
6040 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
6041 kIemNativeGstRegUse_ReadOnly);
6042 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
6043 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
6044 kIemNativeGstRegUse_ReadOnly);
6045
6046 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
6047 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
6048 {
6049 idxRegBase = idxRegIndex;
6050 idxRegIndex = UINT8_MAX;
6051 }
6052
6053#ifdef RT_ARCH_AMD64
6054 uint8_t bFinalAdj;
6055 if (!f64Bit || (int32_t)i64EffAddr == i64EffAddr)
6056 bFinalAdj = 0; /* likely */
6057 else
6058 {
6059 /* pop [rsp] with a problematic disp32 value. Split out the
6060 RSP offset and add it separately afterwards (bFinalAdj). */
6061 /** @todo testcase: pop [rsp] with problematic disp32 (mod4). */
6062 Assert(idxGstRegBase == X86_GREG_xSP);
6063 Assert(((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK) == X86_MOD_MEM4);
6064 bFinalAdj = (uint8_t)(uSibAndRspOffset >> 8);
6065 Assert(bFinalAdj != 0);
6066 i64EffAddr -= bFinalAdj;
6067 Assert((int32_t)i64EffAddr == i64EffAddr);
6068 }
6069 uint32_t const u32EffAddr = (uint32_t)i64EffAddr;
6070//pReNative->pInstrBuf[off++] = 0xcc;
6071
6072 if (idxRegIndex == UINT8_MAX)
6073 {
6074 if (u32EffAddr == 0)
6075 {
6076 /* mov ret, base */
6077 if (f64Bit)
6078 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRet, idxRegBase);
6079 else
6080 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
6081 }
6082 else
6083 {
6084 /* lea ret, [base + disp32] */
6085 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
6086 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6087 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8)
6088 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6089 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
6090 | (f64Bit ? X86_OP_REX_W : 0);
6091 pbCodeBuf[off++] = 0x8d;
6092 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
6093 if (idxRegBase != X86_GREG_x12 /*SIB*/)
6094 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
6095 else
6096 {
6097 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6098 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
6099 }
6100 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6101 if (bMod == X86_MOD_MEM4)
6102 {
6103 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6104 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6105 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6106 }
6107 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6108 }
6109 }
6110 else
6111 {
6112 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
6113 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6114 if (idxRegBase == UINT8_MAX)
6115 {
6116 /* lea ret, [(index64 << cShiftIndex) + disp32] */
6117 if (f64Bit || idxRegRet >= 8 || idxRegIndex >= 8)
6118 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6119 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
6120 | (f64Bit ? X86_OP_REX_W : 0);
6121 pbCodeBuf[off++] = 0x8d;
6122 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
6123 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
6124 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6125 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6126 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6127 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6128 }
6129 else
6130 {
6131 /* lea ret, [(index64 << cShiftIndex) + base64 (+ disp32)] */
6132 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
6133 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6134 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
6135 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
6136 | (f64Bit ? X86_OP_REX_W : 0);
6137 pbCodeBuf[off++] = 0x8d;
6138 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
6139 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
6140 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6141 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
6142 if (bMod != X86_MOD_MEM0)
6143 {
6144 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6145 if (bMod == X86_MOD_MEM4)
6146 {
6147 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6148 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6149 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6150 }
6151 }
6152 }
6153 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6154 }
6155
6156 if (!bFinalAdj)
6157 { /* likely */ }
6158 else
6159 {
6160 Assert(f64Bit);
6161 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, bFinalAdj);
6162 }
6163
6164#elif defined(RT_ARCH_ARM64)
6165 if (i64EffAddr == 0)
6166 {
6167 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6168 if (idxRegIndex == UINT8_MAX)
6169 pu32CodeBuf[off++] = Armv8A64MkInstrMov(idxRegRet, idxRegBase, f64Bit);
6170 else if (idxRegBase != UINT8_MAX)
6171 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
6172 f64Bit, false /*fSetFlags*/, cShiftIndex);
6173 else
6174 {
6175 Assert(cShiftIndex != 0); /* See base = index swap above when shift is 0 and we have no base reg. */
6176 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, f64Bit);
6177 }
6178 }
6179 else
6180 {
6181 if (f64Bit)
6182 { /* likely */ }
6183 else
6184 i64EffAddr = (int32_t)i64EffAddr;
6185
6186 if (i64EffAddr < 4096 && i64EffAddr >= 0 && idxRegBase != UINT8_MAX)
6187 {
6188 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6189 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, i64EffAddr, f64Bit);
6190 }
6191 else if (i64EffAddr > -4096 && i64EffAddr < 0 && idxRegBase != UINT8_MAX)
6192 {
6193 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6194 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase, (uint32_t)-i64EffAddr, f64Bit);
6195 }
6196 else
6197 {
6198 if (f64Bit)
6199 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, i64EffAddr);
6200 else
6201 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, (uint32_t)i64EffAddr);
6202 if (idxRegBase != UINT8_MAX)
6203 {
6204 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6205 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, f64Bit);
6206 }
6207 }
6208 if (idxRegIndex != UINT8_MAX)
6209 {
6210 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6211 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
6212 f64Bit, false /*fSetFlags*/, cShiftIndex);
6213 }
6214 }
6215
6216#else
6217# error "port me"
6218#endif
6219
6220 if (idxRegIndex != UINT8_MAX)
6221 iemNativeRegFreeTmp(pReNative, idxRegIndex);
6222 if (idxRegBase != UINT8_MAX)
6223 iemNativeRegFreeTmp(pReNative, idxRegBase);
6224 iemNativeVarRegisterRelease(pReNative, idxVarRet);
6225 return off;
6226}
6227
6228
6229/*********************************************************************************************************************************
6230* Memory fetches and stores common *
6231*********************************************************************************************************************************/
6232
6233typedef enum IEMNATIVEMITMEMOP
6234{
6235 kIemNativeEmitMemOp_Store = 0,
6236 kIemNativeEmitMemOp_Fetch,
6237 kIemNativeEmitMemOp_Fetch_Zx_U16,
6238 kIemNativeEmitMemOp_Fetch_Zx_U32,
6239 kIemNativeEmitMemOp_Fetch_Zx_U64,
6240 kIemNativeEmitMemOp_Fetch_Sx_U16,
6241 kIemNativeEmitMemOp_Fetch_Sx_U32,
6242 kIemNativeEmitMemOp_Fetch_Sx_U64
6243} IEMNATIVEMITMEMOP;
6244
6245/** Emits code for IEM_MC_FETCH_MEM_U8/16/32/64 and IEM_MC_STORE_MEM_U8/16/32/64,
6246 * and IEM_MC_FETCH_MEM_FLAT_U8/16/32/64 and IEM_MC_STORE_MEM_FLAT_U8/16/32/64
6247 * (with iSegReg = UINT8_MAX). */
6248DECL_INLINE_THROW(uint32_t)
6249iemNativeEmitMemFetchStoreDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue, uint8_t iSegReg,
6250 uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAlignMaskAndCtl, IEMNATIVEMITMEMOP enmOp,
6251 uintptr_t pfnFunction, uint8_t idxInstr, uint8_t offDisp = 0)
6252{
6253 /*
6254 * Assert sanity.
6255 */
6256 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
6257 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
6258 Assert( enmOp != kIemNativeEmitMemOp_Store
6259 || pVarValue->enmKind == kIemNativeVarKind_Immediate
6260 || pVarValue->enmKind == kIemNativeVarKind_Stack);
6261 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
6262 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
6263 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
6264 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
6265 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
6266 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
6267#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6268 Assert( cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8
6269 || cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U));
6270#else
6271 Assert(cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8);
6272#endif
6273 Assert(!(fAlignMaskAndCtl & ~(UINT32_C(0xff) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE)));
6274 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
6275#ifdef VBOX_STRICT
6276 if (iSegReg == UINT8_MAX)
6277 {
6278 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
6279 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
6280 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
6281 switch (cbMem)
6282 {
6283 case 1:
6284 Assert( pfnFunction
6285 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU8
6286 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
6287 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
6288 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
6289 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
6290 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16
6291 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32
6292 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64
6293 : UINT64_C(0xc000b000a0009000) ));
6294 Assert(!fAlignMaskAndCtl);
6295 break;
6296 case 2:
6297 Assert( pfnFunction
6298 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU16
6299 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
6300 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
6301 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
6302 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32
6303 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64
6304 : UINT64_C(0xc000b000a0009000) ));
6305 Assert(fAlignMaskAndCtl <= 1);
6306 break;
6307 case 4:
6308 Assert( pfnFunction
6309 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU32
6310 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
6311 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
6312 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64
6313 : UINT64_C(0xc000b000a0009000) ));
6314 Assert(fAlignMaskAndCtl <= 3);
6315 break;
6316 case 8:
6317 Assert( pfnFunction
6318 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU64
6319 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU64
6320 : UINT64_C(0xc000b000a0009000) ));
6321 Assert(fAlignMaskAndCtl <= 7);
6322 break;
6323#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6324 case sizeof(RTUINT128U):
6325 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
6326 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128
6327 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse
6328 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc))
6329 || ( enmOp == kIemNativeEmitMemOp_Store
6330 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse
6331 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128NoAc)));
6332 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse
6333 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse
6334 ? (fAlignMaskAndCtl & (IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE)) && (uint8_t)fAlignMaskAndCtl == 15
6335 : fAlignMaskAndCtl <= 15);
6336 break;
6337 case sizeof(RTUINT256U):
6338 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
6339 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc
6340 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx))
6341 || ( enmOp == kIemNativeEmitMemOp_Store
6342 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256NoAc
6343 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx)));
6344 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx
6345 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx
6346 ? (fAlignMaskAndCtl & IEM_MEMMAP_F_ALIGN_GP) && (uint8_t)fAlignMaskAndCtl == 31
6347 : fAlignMaskAndCtl <= 31);
6348 break;
6349#endif
6350 }
6351 }
6352 else
6353 {
6354 Assert(iSegReg < 6);
6355 switch (cbMem)
6356 {
6357 case 1:
6358 Assert( pfnFunction
6359 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU8
6360 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU8
6361 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8
6362 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8
6363 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8
6364 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16
6365 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32
6366 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64
6367 : UINT64_C(0xc000b000a0009000) ));
6368 Assert(!fAlignMaskAndCtl);
6369 break;
6370 case 2:
6371 Assert( pfnFunction
6372 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU16
6373 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU16
6374 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16
6375 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16
6376 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32
6377 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64
6378 : UINT64_C(0xc000b000a0009000) ));
6379 Assert(fAlignMaskAndCtl <= 1);
6380 break;
6381 case 4:
6382 Assert( pfnFunction
6383 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU32
6384 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU32
6385 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32
6386 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64
6387 : UINT64_C(0xc000b000a0009000) ));
6388 Assert(fAlignMaskAndCtl <= 3);
6389 break;
6390 case 8:
6391 Assert( pfnFunction
6392 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU64
6393 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU64
6394 : UINT64_C(0xc000b000a0009000) ));
6395 Assert(fAlignMaskAndCtl <= 7);
6396 break;
6397#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6398 case sizeof(RTUINT128U):
6399 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
6400 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128
6401 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse
6402 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128NoAc))
6403 || ( enmOp == kIemNativeEmitMemOp_Store
6404 && ( pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse
6405 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128NoAc)));
6406 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse
6407 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse
6408 ? (fAlignMaskAndCtl & (IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE)) && (uint8_t)fAlignMaskAndCtl == 15
6409 : fAlignMaskAndCtl <= 15);
6410 break;
6411 case sizeof(RTUINT256U):
6412 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
6413 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256NoAc
6414 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx))
6415 || ( enmOp == kIemNativeEmitMemOp_Store
6416 && ( pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256NoAc
6417 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx)));
6418 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx
6419 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx
6420 ? (fAlignMaskAndCtl & IEM_MEMMAP_F_ALIGN_GP) && (uint8_t)fAlignMaskAndCtl == 31
6421 : fAlignMaskAndCtl <= 31);
6422 break;
6423#endif
6424 }
6425 }
6426#endif
6427
6428#ifdef VBOX_STRICT
6429 /*
6430 * Check that the fExec flags we've got make sense.
6431 */
6432 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
6433#endif
6434
6435 /*
6436 * To keep things simple we have to commit any pending writes first as we
6437 * may end up making calls.
6438 */
6439 /** @todo we could postpone this till we make the call and reload the
6440 * registers after returning from the call. Not sure if that's sensible or
6441 * not, though. */
6442#ifndef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6443 off = iemNativeRegFlushPendingWrites(pReNative, off);
6444#else
6445 /* The program counter is treated differently for now. */
6446 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc));
6447#endif
6448
6449#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6450 /*
6451 * Move/spill/flush stuff out of call-volatile registers.
6452 * This is the easy way out. We could contain this to the tlb-miss branch
6453 * by saving and restoring active stuff here.
6454 */
6455 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
6456#endif
6457
6458 /*
6459 * Define labels and allocate the result register (trying for the return
6460 * register if we can).
6461 */
6462 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
6463#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6464 uint8_t idxRegValueFetch = UINT8_MAX;
6465
6466 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
6467 idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
6468 : iemNativeVarSimdRegisterAcquire(pReNative, idxVarValue, &off);
6469 else
6470 idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
6471 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
6472 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
6473 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
6474#else
6475 uint8_t const idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
6476 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
6477 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
6478 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
6479#endif
6480 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem, offDisp);
6481
6482#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6483 uint8_t idxRegValueStore = UINT8_MAX;
6484
6485 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
6486 idxRegValueStore = !TlbState.fSkip
6487 && enmOp == kIemNativeEmitMemOp_Store
6488 && pVarValue->enmKind != kIemNativeVarKind_Immediate
6489 ? iemNativeVarSimdRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
6490 : UINT8_MAX;
6491 else
6492 idxRegValueStore = !TlbState.fSkip
6493 && enmOp == kIemNativeEmitMemOp_Store
6494 && pVarValue->enmKind != kIemNativeVarKind_Immediate
6495 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
6496 : UINT8_MAX;
6497
6498#else
6499 uint8_t const idxRegValueStore = !TlbState.fSkip
6500 && enmOp == kIemNativeEmitMemOp_Store
6501 && pVarValue->enmKind != kIemNativeVarKind_Immediate
6502 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
6503 : UINT8_MAX;
6504#endif
6505 uint32_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
6506 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
6507 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
6508 : UINT32_MAX;
6509
6510 /*
6511 * Jump to the TLB lookup code.
6512 */
6513 if (!TlbState.fSkip)
6514 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
6515
6516 /*
6517 * TlbMiss:
6518 *
6519 * Call helper to do the fetching.
6520 * We flush all guest register shadow copies here.
6521 */
6522 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
6523
6524#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6525 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6526#else
6527 RT_NOREF(idxInstr);
6528#endif
6529
6530#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6531 if (pReNative->Core.offPc)
6532 {
6533 /*
6534 * Update the program counter but restore it at the end of the TlbMiss branch.
6535 * This should allow delaying more program counter updates for the TlbLookup and hit paths
6536 * which are hopefully much more frequent, reducing the amount of memory accesses.
6537 */
6538 /* Allocate a temporary PC register. */
6539 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6540
6541 /* Perform the addition and store the result. */
6542 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
6543 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6544
6545 /* Free and flush the PC register. */
6546 iemNativeRegFreeTmp(pReNative, idxPcReg);
6547 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
6548 }
6549#endif
6550
6551#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6552 /* Save variables in volatile registers. */
6553 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
6554 | (idxRegMemResult != UINT8_MAX ? RT_BIT_32(idxRegMemResult) : 0)
6555 | (idxRegValueFetch != UINT8_MAX ? RT_BIT_32(idxRegValueFetch) : 0);
6556 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
6557#endif
6558
6559 /* IEMNATIVE_CALL_ARG2/3_GREG = uValue (idxVarValue) - if store */
6560 uint32_t fVolGregMask = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
6561#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6562 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
6563 {
6564 /*
6565 * For SIMD based variables we pass the reference on the stack for both fetches and stores.
6566 *
6567 * @note There was a register variable assigned to the variable for the TlbLookup case above
6568 * which must not be freed or the value loaded into the register will not be synced into the register
6569 * further down the road because the variable doesn't know it had a variable assigned.
6570 *
6571 * @note For loads it is not required to sync what is in the assigned register with the stack slot
6572 * as it will be overwritten anyway.
6573 */
6574 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
6575 off = iemNativeEmitLoadArgGregWithSimdVarAddrForMemAccess(pReNative, off, idxRegArgValue, idxVarValue,
6576 enmOp == kIemNativeEmitMemOp_Store /*fSyncRegWithStack*/);
6577 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
6578 }
6579 else
6580#endif
6581 if (enmOp == kIemNativeEmitMemOp_Store)
6582 {
6583 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
6584 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, idxRegArgValue, idxVarValue, 0 /*cbAppend*/,
6585#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6586 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
6587#else
6588 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
6589 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
6590#endif
6591 }
6592
6593 /* IEMNATIVE_CALL_ARG1_GREG = GCPtrMem */
6594 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarGCPtrMem, offDisp /*cbAppend*/,
6595#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6596 fVolGregMask);
6597#else
6598 fVolGregMask, true /*fSpilledVarsInvolatileRegs*/);
6599#endif
6600
6601 if (iSegReg != UINT8_MAX)
6602 {
6603 /* IEMNATIVE_CALL_ARG2_GREG = iSegReg */
6604 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
6605 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, iSegReg);
6606 }
6607
6608 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
6609 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6610
6611 /* Done setting up parameters, make the call. */
6612 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
6613
6614 /*
6615 * Put the result in the right register if this is a fetch.
6616 */
6617 if (enmOp != kIemNativeEmitMemOp_Store)
6618 {
6619#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6620 if ( cbMem == sizeof(RTUINT128U)
6621 || cbMem == sizeof(RTUINT256U))
6622 {
6623 Assert(enmOp == kIemNativeEmitMemOp_Fetch);
6624
6625 /* Sync the value on the stack with the host register assigned to the variable. */
6626 off = iemNativeEmitSimdVarSyncStackToRegister(pReNative, off, idxVarValue);
6627 }
6628 else
6629#endif
6630 {
6631 Assert(idxRegValueFetch == pVarValue->idxReg);
6632 if (idxRegValueFetch != IEMNATIVE_CALL_RET_GREG)
6633 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegValueFetch, IEMNATIVE_CALL_RET_GREG);
6634 }
6635 }
6636
6637#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6638 /* Restore variables and guest shadow registers to volatile registers. */
6639 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
6640 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
6641#endif
6642
6643#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6644 if (pReNative->Core.offPc)
6645 {
6646 /*
6647 * Time to restore the program counter to its original value.
6648 */
6649 /* Allocate a temporary PC register. */
6650 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
6651 kIemNativeGstRegUse_ForUpdate);
6652
6653 /* Restore the original value. */
6654 off = iemNativeEmitSubGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
6655 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6656
6657 /* Free and flush the PC register. */
6658 iemNativeRegFreeTmp(pReNative, idxPcReg);
6659 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
6660 }
6661#endif
6662
6663#ifdef IEMNATIVE_WITH_TLB_LOOKUP
6664 if (!TlbState.fSkip)
6665 {
6666 /* end of TlbMiss - Jump to the done label. */
6667 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
6668 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
6669
6670 /*
6671 * TlbLookup:
6672 */
6673 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMaskAndCtl,
6674 enmOp == kIemNativeEmitMemOp_Store ? IEM_ACCESS_TYPE_WRITE : IEM_ACCESS_TYPE_READ,
6675 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult, offDisp);
6676
6677 /*
6678 * Emit code to do the actual storing / fetching.
6679 */
6680 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
6681# ifdef IEM_WITH_TLB_STATISTICS
6682 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
6683 enmOp == kIemNativeEmitMemOp_Store
6684 ? RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForFetch)
6685 : RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStore));
6686# endif
6687 switch (enmOp)
6688 {
6689 case kIemNativeEmitMemOp_Store:
6690 if (pVarValue->enmKind != kIemNativeVarKind_Immediate)
6691 {
6692 switch (cbMem)
6693 {
6694 case 1:
6695 off = iemNativeEmitStoreGpr8ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
6696 break;
6697 case 2:
6698 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
6699 break;
6700 case 4:
6701 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
6702 break;
6703 case 8:
6704 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
6705 break;
6706#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6707 case sizeof(RTUINT128U):
6708 off = iemNativeEmitStoreVecRegByGprU128Ex(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
6709 break;
6710 case sizeof(RTUINT256U):
6711 off = iemNativeEmitStoreVecRegByGprU256Ex(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
6712 break;
6713#endif
6714 default:
6715 AssertFailed();
6716 }
6717 }
6718 else
6719 {
6720 switch (cbMem)
6721 {
6722 case 1:
6723 off = iemNativeEmitStoreImm8ByGprEx(pCodeBuf, off, (uint8_t)pVarValue->u.uValue,
6724 idxRegMemResult, TlbState.idxReg1);
6725 break;
6726 case 2:
6727 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
6728 idxRegMemResult, TlbState.idxReg1);
6729 break;
6730 case 4:
6731 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
6732 idxRegMemResult, TlbState.idxReg1);
6733 break;
6734 case 8:
6735 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue,
6736 idxRegMemResult, TlbState.idxReg1);
6737 break;
6738 default:
6739 AssertFailed();
6740 }
6741 }
6742 break;
6743
6744 case kIemNativeEmitMemOp_Fetch:
6745 case kIemNativeEmitMemOp_Fetch_Zx_U16:
6746 case kIemNativeEmitMemOp_Fetch_Zx_U32:
6747 case kIemNativeEmitMemOp_Fetch_Zx_U64:
6748 switch (cbMem)
6749 {
6750 case 1:
6751 off = iemNativeEmitLoadGprByGprU8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6752 break;
6753 case 2:
6754 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6755 break;
6756 case 4:
6757 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6758 break;
6759 case 8:
6760 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6761 break;
6762#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6763 case sizeof(RTUINT128U):
6764 /*
6765 * No need to sync back the register with the stack, this is done by the generic variable handling
6766 * code if there is a register assigned to a variable and the stack must be accessed.
6767 */
6768 off = iemNativeEmitLoadVecRegByGprU128Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6769 break;
6770 case sizeof(RTUINT256U):
6771 /*
6772 * No need to sync back the register with the stack, this is done by the generic variable handling
6773 * code if there is a register assigned to a variable and the stack must be accessed.
6774 */
6775 off = iemNativeEmitLoadVecRegByGprU256Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6776 break;
6777#endif
6778 default:
6779 AssertFailed();
6780 }
6781 break;
6782
6783 case kIemNativeEmitMemOp_Fetch_Sx_U16:
6784 Assert(cbMem == 1);
6785 off = iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6786 break;
6787
6788 case kIemNativeEmitMemOp_Fetch_Sx_U32:
6789 Assert(cbMem == 1 || cbMem == 2);
6790 if (cbMem == 1)
6791 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6792 else
6793 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6794 break;
6795
6796 case kIemNativeEmitMemOp_Fetch_Sx_U64:
6797 switch (cbMem)
6798 {
6799 case 1:
6800 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6801 break;
6802 case 2:
6803 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6804 break;
6805 case 4:
6806 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6807 break;
6808 default:
6809 AssertFailed();
6810 }
6811 break;
6812
6813 default:
6814 AssertFailed();
6815 }
6816
6817 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
6818
6819 /*
6820 * TlbDone:
6821 */
6822 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
6823
6824 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
6825
6826# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6827 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
6828 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
6829# endif
6830 }
6831#else
6832 RT_NOREF(fAlignMaskAndCtl, idxLabelTlbMiss);
6833#endif
6834
6835 if (idxRegValueFetch != UINT8_MAX || idxRegValueStore != UINT8_MAX)
6836 iemNativeVarRegisterRelease(pReNative, idxVarValue);
6837 return off;
6838}
6839
6840
6841
6842/*********************************************************************************************************************************
6843* Memory fetches (IEM_MEM_FETCH_XXX). *
6844*********************************************************************************************************************************/
6845
6846/* 8-bit segmented: */
6847#define IEM_MC_FETCH_MEM_U8(a_u8Dst, a_iSeg, a_GCPtrMem) \
6848 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, a_iSeg, a_GCPtrMem, \
6849 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch, \
6850 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
6851
6852#define IEM_MC_FETCH_MEM_U8_ZX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
6853 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
6854 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
6855 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
6856
6857#define IEM_MC_FETCH_MEM_U8_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
6858 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
6859 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
6860 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
6861
6862#define IEM_MC_FETCH_MEM_U8_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
6863 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
6864 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
6865 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
6866
6867#define IEM_MC_FETCH_MEM_U8_SX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
6868 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
6869 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
6870 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16, pCallEntry->idxInstr)
6871
6872#define IEM_MC_FETCH_MEM_U8_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
6873 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
6874 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
6875 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32, pCallEntry->idxInstr)
6876
6877#define IEM_MC_FETCH_MEM_U8_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
6878 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
6879 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
6880 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64, pCallEntry->idxInstr)
6881
6882/* 16-bit segmented: */
6883#define IEM_MC_FETCH_MEM_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
6884 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
6885 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
6886 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
6887
6888#define IEM_MC_FETCH_MEM_U16_DISP(a_u16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
6889 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
6890 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
6891 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr, a_offDisp)
6892
6893#define IEM_MC_FETCH_MEM_U16_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
6894 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
6895 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
6896 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
6897
6898#define IEM_MC_FETCH_MEM_U16_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
6899 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
6900 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
6901 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
6902
6903#define IEM_MC_FETCH_MEM_U16_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
6904 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
6905 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
6906 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
6907
6908#define IEM_MC_FETCH_MEM_U16_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
6909 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
6910 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
6911 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64, pCallEntry->idxInstr)
6912
6913
6914/* 32-bit segmented: */
6915#define IEM_MC_FETCH_MEM_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
6916 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
6917 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
6918 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
6919
6920#define IEM_MC_FETCH_MEM_U32_DISP(a_u32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
6921 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
6922 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
6923 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
6924
6925#define IEM_MC_FETCH_MEM_U32_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
6926 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
6927 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
6928 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
6929
6930#define IEM_MC_FETCH_MEM_U32_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
6931 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
6932 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
6933 (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64, pCallEntry->idxInstr)
6934
6935#define IEM_MC_FETCH_MEM_I16(a_i16Dst, a_iSeg, a_GCPtrMem) \
6936 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i16Dst, a_iSeg, a_GCPtrMem, \
6937 sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch, \
6938 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
6939
6940#define IEM_MC_FETCH_MEM_I32(a_i32Dst, a_iSeg, a_GCPtrMem) \
6941 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i32Dst, a_iSeg, a_GCPtrMem, \
6942 sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, \
6943 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
6944
6945#define IEM_MC_FETCH_MEM_I64(a_i64Dst, a_iSeg, a_GCPtrMem) \
6946 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i64Dst, a_iSeg, a_GCPtrMem, \
6947 sizeof(int64_t), sizeof(int64_t) - 1, kIemNativeEmitMemOp_Fetch, \
6948 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
6949
6950AssertCompileSize(RTFLOAT32U, sizeof(uint32_t));
6951#define IEM_MC_FETCH_MEM_R32(a_r32Dst, a_iSeg, a_GCPtrMem) \
6952 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r32Dst, a_iSeg, a_GCPtrMem, \
6953 sizeof(RTFLOAT32U), sizeof(RTFLOAT32U) - 1, kIemNativeEmitMemOp_Fetch, \
6954 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
6955
6956
6957/* 64-bit segmented: */
6958#define IEM_MC_FETCH_MEM_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
6959 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
6960 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
6961 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
6962
6963AssertCompileSize(RTFLOAT64U, sizeof(uint64_t));
6964#define IEM_MC_FETCH_MEM_R64(a_r64Dst, a_iSeg, a_GCPtrMem) \
6965 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r64Dst, a_iSeg, a_GCPtrMem, \
6966 sizeof(RTFLOAT64U), sizeof(RTFLOAT64U) - 1, kIemNativeEmitMemOp_Fetch, \
6967 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
6968
6969
6970/* 8-bit flat: */
6971#define IEM_MC_FETCH_MEM_FLAT_U8(a_u8Dst, a_GCPtrMem) \
6972 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, UINT8_MAX, a_GCPtrMem, \
6973 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch, \
6974 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
6975
6976#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U16(a_u16Dst, a_GCPtrMem) \
6977 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
6978 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
6979 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
6980
6981#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U32(a_u32Dst, a_GCPtrMem) \
6982 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
6983 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
6984 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
6985
6986#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U64(a_u64Dst, a_GCPtrMem) \
6987 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
6988 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
6989 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
6990
6991#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U16(a_u16Dst, a_GCPtrMem) \
6992 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
6993 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
6994 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16, pCallEntry->idxInstr)
6995
6996#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U32(a_u32Dst, a_GCPtrMem) \
6997 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
6998 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
6999 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32, pCallEntry->idxInstr)
7000
7001#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U64(a_u64Dst, a_GCPtrMem) \
7002 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7003 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7004 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64, pCallEntry->idxInstr)
7005
7006
7007/* 16-bit flat: */
7008#define IEM_MC_FETCH_MEM_FLAT_U16(a_u16Dst, a_GCPtrMem) \
7009 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
7010 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
7011 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
7012
7013#define IEM_MC_FETCH_MEM_FLAT_U16_DISP(a_u16Dst, a_GCPtrMem, a_offDisp) \
7014 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
7015 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
7016 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr, a_offDisp)
7017
7018#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U32(a_u32Dst, a_GCPtrMem) \
7019 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7020 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
7021 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
7022
7023#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U64(a_u64Dst, a_GCPtrMem) \
7024 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7025 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7026 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
7027
7028#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U32(a_u32Dst, a_GCPtrMem) \
7029 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7030 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7031 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7032
7033#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U64(a_u64Dst, a_GCPtrMem) \
7034 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7035 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7036 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64, pCallEntry->idxInstr)
7037
7038/* 32-bit flat: */
7039#define IEM_MC_FETCH_MEM_FLAT_U32(a_u32Dst, a_GCPtrMem) \
7040 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7041 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7042 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7043
7044#define IEM_MC_FETCH_MEM_FLAT_U32_DISP(a_u32Dst, a_GCPtrMem, a_offDisp) \
7045 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7046 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7047 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7048
7049#define IEM_MC_FETCH_MEM_FLAT_U32_ZX_U64(a_u64Dst, a_GCPtrMem) \
7050 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7051 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7052 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7053
7054#define IEM_MC_FETCH_MEM_FLAT_U32_SX_U64(a_u64Dst, a_GCPtrMem) \
7055 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7056 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7057 (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64, pCallEntry->idxInstr)
7058
7059#define IEM_MC_FETCH_MEM_FLAT_I16(a_i32Dst, a_GCPtrMem) \
7060 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i16Dst, UINT8_MAX, a_GCPtrMem, \
7061 sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch, \
7062 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7063
7064#define IEM_MC_FETCH_MEM_FLAT_I32(a_i32Dst, a_GCPtrMem) \
7065 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i32Dst, UINT8_MAX, a_GCPtrMem, \
7066 sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7067 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7068
7069#define IEM_MC_FETCH_MEM_FLAT_I64(a_i64Dst, a_GCPtrMem) \
7070 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i64Dst, UINT8_MAX, a_GCPtrMem, \
7071 sizeof(int64_t), sizeof(int64_t) - 1, kIemNativeEmitMemOp_Fetch, \
7072 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
7073
7074#define IEM_MC_FETCH_MEM_FLAT_R32(a_r32Dst, a_GCPtrMem) \
7075 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r32Dst, UINT8_MAX, a_GCPtrMem, \
7076 sizeof(RTFLOAT32U), sizeof(RTFLOAT32U) - 1, kIemNativeEmitMemOp_Fetch, \
7077 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7078
7079
7080/* 64-bit flat: */
7081#define IEM_MC_FETCH_MEM_FLAT_U64(a_u64Dst, a_GCPtrMem) \
7082 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7083 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
7084 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
7085
7086#define IEM_MC_FETCH_MEM_FLAT_R64(a_r64Dst, a_GCPtrMem) \
7087 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r64Dst, UINT8_MAX, a_GCPtrMem, \
7088 sizeof(RTFLOAT64U), sizeof(RTFLOAT64U) - 1, kIemNativeEmitMemOp_Fetch, \
7089 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
7090
7091#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7092/* 128-bit segmented: */
7093#define IEM_MC_FETCH_MEM_U128(a_u128Dst, a_iSeg, a_GCPtrMem) \
7094 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
7095 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
7096 (uintptr_t)iemNativeHlpMemFetchDataU128, pCallEntry->idxInstr)
7097
7098#define IEM_MC_FETCH_MEM_U128_ALIGN_SSE(a_u128Dst, a_iSeg, a_GCPtrMem) \
7099 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
7100 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7101 kIemNativeEmitMemOp_Fetch, \
7102 (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse, pCallEntry->idxInstr)
7103
7104AssertCompileSize(X86XMMREG, sizeof(RTUINT128U));
7105#define IEM_MC_FETCH_MEM_XMM_ALIGN_SSE(a_uXmmDst, a_iSeg, a_GCPtrMem) \
7106 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uXmmDst, a_iSeg, a_GCPtrMem, sizeof(X86XMMREG), \
7107 (sizeof(X86XMMREG) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7108 kIemNativeEmitMemOp_Fetch, \
7109 (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse, pCallEntry->idxInstr)
7110
7111#define IEM_MC_FETCH_MEM_U128_NO_AC(a_u128Dst, a_iSeg, a_GCPtrMem) \
7112 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
7113 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
7114 (uintptr_t)iemNativeHlpMemFetchDataU128NoAc, pCallEntry->idxInstr)
7115
7116#define IEM_MC_FETCH_MEM_XMM_NO_AC(a_u128Dst, a_iSeg, a_GCPtrMem) \
7117 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
7118 sizeof(X86XMMREG), sizeof(X86XMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
7119 (uintptr_t)iemNativeHlpMemFetchDataU128NoAc, pCallEntry->idxInstr)
7120
7121
7122/* 128-bit flat: */
7123#define IEM_MC_FETCH_MEM_FLAT_U128(a_u128Dst, a_GCPtrMem) \
7124 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, \
7125 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
7126 (uintptr_t)iemNativeHlpMemFlatFetchDataU128, pCallEntry->idxInstr)
7127
7128#define IEM_MC_FETCH_MEM_FLAT_U128_ALIGN_SSE(a_u128Dst, a_GCPtrMem) \
7129 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
7130 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7131 kIemNativeEmitMemOp_Fetch, \
7132 (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse, pCallEntry->idxInstr)
7133
7134#define IEM_MC_FETCH_MEM_FLAT_XMM_ALIGN_SSE(a_uXmmDst, a_GCPtrMem) \
7135 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uXmmDst, UINT8_MAX, a_GCPtrMem, sizeof(X86XMMREG), \
7136 (sizeof(X86XMMREG) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7137 kIemNativeEmitMemOp_Fetch, \
7138 (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse, pCallEntry->idxInstr)
7139
7140#define IEM_MC_FETCH_MEM_FLAT_U128_NO_AC(a_u128Dst, a_GCPtrMem) \
7141 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, \
7142 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
7143 (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc, pCallEntry->idxInstr)
7144
7145#define IEM_MC_FETCH_MEM_FLAT_XMM_NO_AC(a_uXmmDst, a_GCPtrMem) \
7146 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uXmmDst, UINT8_MAX, a_GCPtrMem, \
7147 sizeof(X86XMMREG), sizeof(X86XMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
7148 (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc, pCallEntry->idxInstr)
7149
7150/* 256-bit segmented: */
7151#define IEM_MC_FETCH_MEM_U256(a_u256Dst, a_iSeg, a_GCPtrMem) \
7152 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
7153 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
7154 (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
7155
7156#define IEM_MC_FETCH_MEM_U256_NO_AC(a_u256Dst, a_iSeg, a_GCPtrMem) \
7157 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
7158 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
7159 (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
7160
7161#define IEM_MC_FETCH_MEM_U256_ALIGN_AVX(a_u256Dst, a_iSeg, a_GCPtrMem) \
7162 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, sizeof(RTUINT256U), \
7163 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, kIemNativeEmitMemOp_Fetch, \
7164 (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx, pCallEntry->idxInstr)
7165
7166#define IEM_MC_FETCH_MEM_YMM_NO_AC(a_u256Dst, a_iSeg, a_GCPtrMem) \
7167 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
7168 sizeof(X86YMMREG), sizeof(X86YMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
7169 (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
7170
7171
7172/* 256-bit flat: */
7173#define IEM_MC_FETCH_MEM_FLAT_U256(a_u256Dst, a_GCPtrMem) \
7174 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, \
7175 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
7176 (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
7177
7178#define IEM_MC_FETCH_MEM_FLAT_U256_NO_AC(a_u256Dst, a_GCPtrMem) \
7179 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, \
7180 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
7181 (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
7182
7183#define IEM_MC_FETCH_MEM_FLAT_U256_ALIGN_AVX(a_u256Dst, a_GCPtrMem) \
7184 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT256U), \
7185 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, kIemNativeEmitMemOp_Fetch, \
7186 (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx, pCallEntry->idxInstr)
7187
7188#define IEM_MC_FETCH_MEM_FLAT_YMM_NO_AC(a_uYmmDst, a_GCPtrMem) \
7189 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uYmmDst, UINT8_MAX, a_GCPtrMem, \
7190 sizeof(X86YMMREG), sizeof(X86YMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
7191 (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
7192
7193#endif
7194
7195
7196/*********************************************************************************************************************************
7197* Memory stores (IEM_MEM_STORE_XXX). *
7198*********************************************************************************************************************************/
7199
7200#define IEM_MC_STORE_MEM_U8(a_iSeg, a_GCPtrMem, a_u8Value) \
7201 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, a_iSeg, a_GCPtrMem, \
7202 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Store, \
7203 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
7204
7205#define IEM_MC_STORE_MEM_U16(a_iSeg, a_GCPtrMem, a_u16Value) \
7206 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, a_iSeg, a_GCPtrMem, \
7207 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
7208 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
7209
7210#define IEM_MC_STORE_MEM_U32(a_iSeg, a_GCPtrMem, a_u32Value) \
7211 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, a_iSeg, a_GCPtrMem, \
7212 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
7213 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
7214
7215#define IEM_MC_STORE_MEM_U64(a_iSeg, a_GCPtrMem, a_u64Value) \
7216 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, a_iSeg, a_GCPtrMem, \
7217 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
7218 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
7219
7220
7221#define IEM_MC_STORE_MEM_FLAT_U8(a_GCPtrMem, a_u8Value) \
7222 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, UINT8_MAX, a_GCPtrMem, \
7223 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Store, \
7224 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
7225
7226#define IEM_MC_STORE_MEM_FLAT_U16(a_GCPtrMem, a_u16Value) \
7227 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, UINT8_MAX, a_GCPtrMem, \
7228 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
7229 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
7230
7231#define IEM_MC_STORE_MEM_FLAT_U32(a_GCPtrMem, a_u32Value) \
7232 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, UINT8_MAX, a_GCPtrMem, \
7233 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
7234 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
7235
7236#define IEM_MC_STORE_MEM_FLAT_U64(a_GCPtrMem, a_u64Value) \
7237 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, UINT8_MAX, a_GCPtrMem, \
7238 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
7239 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
7240
7241
7242#define IEM_MC_STORE_MEM_U8_CONST(a_iSeg, a_GCPtrMem, a_u8ConstValue) \
7243 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
7244 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
7245
7246#define IEM_MC_STORE_MEM_U16_CONST(a_iSeg, a_GCPtrMem, a_u16ConstValue) \
7247 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
7248 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
7249
7250#define IEM_MC_STORE_MEM_U32_CONST(a_iSeg, a_GCPtrMem, a_u32ConstValue) \
7251 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
7252 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
7253
7254#define IEM_MC_STORE_MEM_U64_CONST(a_iSeg, a_GCPtrMem, a_u64ConstValue) \
7255 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
7256 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
7257
7258
7259#define IEM_MC_STORE_MEM_FLAT_U8_CONST(a_GCPtrMem, a_u8ConstValue) \
7260 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
7261 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
7262
7263#define IEM_MC_STORE_MEM_FLAT_U16_CONST(a_GCPtrMem, a_u16ConstValue) \
7264 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
7265 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
7266
7267#define IEM_MC_STORE_MEM_FLAT_U32_CONST(a_GCPtrMem, a_u32ConstValue) \
7268 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
7269 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
7270
7271#define IEM_MC_STORE_MEM_FLAT_U64_CONST(a_GCPtrMem, a_u64ConstValue) \
7272 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
7273 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
7274
7275/** Emits code for IEM_MC_STORE_MEM_U8/16/32/64_CONST and
7276 * IEM_MC_STORE_MEM_FLAT_U8/16/32/64_CONST (with iSegReg = UINT8_MAX). */
7277DECL_INLINE_THROW(uint32_t)
7278iemNativeEmitMemStoreConstDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uValueConst, uint8_t iSegReg,
7279 uint8_t idxVarGCPtrMem, uint8_t cbMem, uintptr_t pfnFunction, uint8_t idxInstr)
7280{
7281 /*
7282 * Create a temporary const variable and call iemNativeEmitMemFetchStoreDataCommon
7283 * to do the grunt work.
7284 */
7285 uint8_t const idxVarConstValue = iemNativeVarAllocConst(pReNative, cbMem, uValueConst);
7286 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, idxVarConstValue, iSegReg, idxVarGCPtrMem,
7287 cbMem, cbMem - 1, kIemNativeEmitMemOp_Store,
7288 pfnFunction, idxInstr);
7289 iemNativeVarFreeLocal(pReNative, idxVarConstValue);
7290 return off;
7291}
7292
7293
7294#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7295# define IEM_MC_STORE_MEM_U128_ALIGN_SSE(a_iSeg, a_GCPtrMem, a_u128Value) \
7296 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
7297 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7298 kIemNativeEmitMemOp_Store, \
7299 (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse, pCallEntry->idxInstr)
7300
7301# define IEM_MC_STORE_MEM_U128_NO_AC(a_iSeg, a_GCPtrMem, a_u128Value) \
7302 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, a_iSeg, a_GCPtrMem, \
7303 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
7304 (uintptr_t)iemNativeHlpMemStoreDataU128NoAc, pCallEntry->idxInstr)
7305
7306# define IEM_MC_STORE_MEM_U256_NO_AC(a_iSeg, a_GCPtrMem, a_u256Value) \
7307 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, a_iSeg, a_GCPtrMem, \
7308 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
7309 (uintptr_t)iemNativeHlpMemStoreDataU256NoAc, pCallEntry->idxInstr)
7310
7311# define IEM_MC_STORE_MEM_U256_ALIGN_AVX(a_iSeg, a_GCPtrMem, a_u256Value) \
7312 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, a_iSeg, a_GCPtrMem, sizeof(RTUINT256U), \
7313 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, kIemNativeEmitMemOp_Store, \
7314 (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx, pCallEntry->idxInstr)
7315
7316
7317# define IEM_MC_STORE_MEM_FLAT_U128_ALIGN_SSE(a_GCPtrMem, a_u128Value) \
7318 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
7319 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7320 kIemNativeEmitMemOp_Store, \
7321 (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse, pCallEntry->idxInstr)
7322
7323# define IEM_MC_STORE_MEM_FLAT_U128_NO_AC(a_GCPtrMem, a_u128Value) \
7324 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, UINT8_MAX, a_GCPtrMem, \
7325 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
7326 (uintptr_t)iemNativeHlpMemFlatStoreDataU128NoAc, pCallEntry->idxInstr)
7327
7328# define IEM_MC_STORE_MEM_FLAT_U256_NO_AC(a_GCPtrMem, a_u256Value) \
7329 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, UINT8_MAX, a_GCPtrMem, \
7330 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
7331 (uintptr_t)iemNativeHlpMemFlatStoreDataU256NoAc, pCallEntry->idxInstr)
7332
7333# define IEM_MC_STORE_MEM_FLAT_U256_ALIGN_AVX(a_GCPtrMem, a_u256Value) \
7334 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT256U), \
7335 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, kIemNativeEmitMemOp_Store, \
7336 (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx, pCallEntry->idxInstr)
7337#endif
7338
7339
7340
7341/*********************************************************************************************************************************
7342* Stack Accesses. *
7343*********************************************************************************************************************************/
7344/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, fSReg, 0) */
7345#define IEM_MC_PUSH_U16(a_u16Value) \
7346 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
7347 (uintptr_t)iemNativeHlpStackStoreU16, pCallEntry->idxInstr)
7348#define IEM_MC_PUSH_U32(a_u32Value) \
7349 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
7350 (uintptr_t)iemNativeHlpStackStoreU32, pCallEntry->idxInstr)
7351#define IEM_MC_PUSH_U32_SREG(a_uSegVal) \
7352 off = iemNativeEmitStackPush(pReNative, off, a_uSegVal, RT_MAKE_U32_FROM_U8(32, 0, 1, 0), \
7353 (uintptr_t)iemNativeHlpStackStoreU32SReg, pCallEntry->idxInstr)
7354#define IEM_MC_PUSH_U64(a_u64Value) \
7355 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
7356 (uintptr_t)iemNativeHlpStackStoreU64, pCallEntry->idxInstr)
7357
7358#define IEM_MC_FLAT32_PUSH_U16(a_u16Value) \
7359 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
7360 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
7361#define IEM_MC_FLAT32_PUSH_U32(a_u32Value) \
7362 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
7363 (uintptr_t)iemNativeHlpStackFlatStoreU32, pCallEntry->idxInstr)
7364#define IEM_MC_FLAT32_PUSH_U32_SREG(a_u32Value) \
7365 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 1, 0), \
7366 (uintptr_t)iemNativeHlpStackFlatStoreU32SReg, pCallEntry->idxInstr)
7367
7368#define IEM_MC_FLAT64_PUSH_U16(a_u16Value) \
7369 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
7370 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
7371#define IEM_MC_FLAT64_PUSH_U64(a_u64Value) \
7372 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
7373 (uintptr_t)iemNativeHlpStackFlatStoreU64, pCallEntry->idxInstr)
7374
7375
7376/** IEM_MC[|_FLAT32|_FLAT64]_PUSH_U16/32/32_SREG/64 */
7377DECL_INLINE_THROW(uint32_t)
7378iemNativeEmitStackPush(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue,
7379 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
7380{
7381 /*
7382 * Assert sanity.
7383 */
7384 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
7385 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
7386#ifdef VBOX_STRICT
7387 if (RT_BYTE2(cBitsVarAndFlat) != 0)
7388 {
7389 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
7390 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
7391 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
7392 Assert( pfnFunction
7393 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
7394 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
7395 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 1, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32SReg
7396 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
7397 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
7398 : UINT64_C(0xc000b000a0009000) ));
7399 }
7400 else
7401 Assert( pfnFunction
7402 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
7403 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
7404 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 1, 0) ? (uintptr_t)iemNativeHlpStackStoreU32SReg
7405 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
7406 : UINT64_C(0xc000b000a0009000) ));
7407#endif
7408
7409#ifdef VBOX_STRICT
7410 /*
7411 * Check that the fExec flags we've got make sense.
7412 */
7413 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
7414#endif
7415
7416 /*
7417 * To keep things simple we have to commit any pending writes first as we
7418 * may end up making calls.
7419 */
7420 /** @todo we could postpone this till we make the call and reload the
7421 * registers after returning from the call. Not sure if that's sensible or
7422 * not, though. */
7423 off = iemNativeRegFlushPendingWrites(pReNative, off);
7424
7425 /*
7426 * First we calculate the new RSP and the effective stack pointer value.
7427 * For 64-bit mode and flat 32-bit these two are the same.
7428 * (Code structure is very similar to that of PUSH)
7429 */
7430 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
7431 bool const fIsSegReg = RT_BYTE3(cBitsVarAndFlat) != 0;
7432 bool const fIsIntelSeg = fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
7433 uint8_t const cbMemAccess = !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
7434 ? cbMem : sizeof(uint16_t);
7435 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
7436 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
7437 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
7438 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
7439 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
7440 if (cBitsFlat != 0)
7441 {
7442 Assert(idxRegEffSp == idxRegRsp);
7443 Assert(cBitsFlat == 32 || cBitsFlat == 64);
7444 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
7445 if (cBitsFlat == 64)
7446 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
7447 else
7448 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
7449 }
7450 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
7451 {
7452 Assert(idxRegEffSp != idxRegRsp);
7453 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
7454 kIemNativeGstRegUse_ReadOnly);
7455#ifdef RT_ARCH_AMD64
7456 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
7457#else
7458 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
7459#endif
7460 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
7461 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
7462 offFixupJumpToUseOtherBitSp = off;
7463 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
7464 {
7465 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
7466 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
7467 }
7468 else
7469 {
7470 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
7471 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
7472 }
7473 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7474 }
7475 /* SpUpdateEnd: */
7476 uint32_t const offLabelSpUpdateEnd = off;
7477
7478 /*
7479 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
7480 * we're skipping lookup).
7481 */
7482 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
7483 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
7484 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
7485 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
7486 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
7487 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
7488 : UINT32_MAX;
7489 uint8_t const idxRegValue = !TlbState.fSkip
7490 && pVarValue->enmKind != kIemNativeVarKind_Immediate
7491 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/,
7492 IEMNATIVE_CALL_ARG2_GREG /*idxRegPref*/)
7493 : UINT8_MAX;
7494 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
7495
7496
7497 if (!TlbState.fSkip)
7498 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
7499 else
7500 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
7501
7502 /*
7503 * Use16BitSp:
7504 */
7505 if (cBitsFlat == 0)
7506 {
7507#ifdef RT_ARCH_AMD64
7508 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
7509#else
7510 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
7511#endif
7512 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
7513 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
7514 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
7515 else
7516 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
7517 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
7518 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7519 }
7520
7521 /*
7522 * TlbMiss:
7523 *
7524 * Call helper to do the pushing.
7525 */
7526 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
7527
7528#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7529 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7530#else
7531 RT_NOREF(idxInstr);
7532#endif
7533
7534 /* Save variables in volatile registers. */
7535 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
7536 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
7537 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
7538 | (idxRegValue < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegValue) : 0);
7539 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
7540
7541 if ( idxRegValue == IEMNATIVE_CALL_ARG1_GREG
7542 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
7543 {
7544 /* Swap them using ARG0 as temp register: */
7545 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
7546 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
7547 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
7548 }
7549 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
7550 {
7551 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue (first!) */
7552 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue,
7553 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
7554
7555 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
7556 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
7557 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
7558 }
7559 else
7560 {
7561 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
7562 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
7563
7564 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue */
7565 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue, 0 /*offAddend*/,
7566 IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG));
7567 }
7568
7569 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
7570 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7571
7572 /* Done setting up parameters, make the call. */
7573 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
7574
7575 /* Restore variables and guest shadow registers to volatile registers. */
7576 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
7577 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
7578
7579#ifdef IEMNATIVE_WITH_TLB_LOOKUP
7580 if (!TlbState.fSkip)
7581 {
7582 /* end of TlbMiss - Jump to the done label. */
7583 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
7584 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
7585
7586 /*
7587 * TlbLookup:
7588 */
7589 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1,
7590 IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
7591
7592 /*
7593 * Emit code to do the actual storing / fetching.
7594 */
7595 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
7596# ifdef IEM_WITH_TLB_STATISTICS
7597 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
7598 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
7599# endif
7600 if (idxRegValue != UINT8_MAX)
7601 {
7602 switch (cbMemAccess)
7603 {
7604 case 2:
7605 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
7606 break;
7607 case 4:
7608 if (!fIsIntelSeg)
7609 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
7610 else
7611 {
7612 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
7613 PUSH FS in real mode, so we have to try emulate that here.
7614 We borrow the now unused idxReg1 from the TLB lookup code here. */
7615 uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
7616 kIemNativeGstReg_EFlags);
7617 if (idxRegEfl != UINT8_MAX)
7618 {
7619#ifdef ARCH_AMD64
7620 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
7621 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
7622 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
7623#else
7624 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
7625 off, TlbState.idxReg1, idxRegEfl,
7626 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
7627#endif
7628 iemNativeRegFreeTmp(pReNative, idxRegEfl);
7629 }
7630 else
7631 {
7632 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
7633 RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
7634 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
7635 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
7636 }
7637 /* ASSUMES the upper half of idxRegValue is ZERO. */
7638 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegValue);
7639 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
7640 }
7641 break;
7642 case 8:
7643 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
7644 break;
7645 default:
7646 AssertFailed();
7647 }
7648 }
7649 else
7650 {
7651 switch (cbMemAccess)
7652 {
7653 case 2:
7654 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
7655 idxRegMemResult, TlbState.idxReg1);
7656 break;
7657 case 4:
7658 Assert(!fIsSegReg);
7659 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
7660 idxRegMemResult, TlbState.idxReg1);
7661 break;
7662 case 8:
7663 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue, idxRegMemResult, TlbState.idxReg1);
7664 break;
7665 default:
7666 AssertFailed();
7667 }
7668 }
7669
7670 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
7671 TlbState.freeRegsAndReleaseVars(pReNative);
7672
7673 /*
7674 * TlbDone:
7675 *
7676 * Commit the new RSP value.
7677 */
7678 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
7679 }
7680#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
7681
7682#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
7683 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
7684#endif
7685 iemNativeRegFreeTmp(pReNative, idxRegRsp);
7686 if (idxRegEffSp != idxRegRsp)
7687 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
7688
7689 /* The value variable is implictly flushed. */
7690 if (idxRegValue != UINT8_MAX)
7691 iemNativeVarRegisterRelease(pReNative, idxVarValue);
7692 iemNativeVarFreeLocal(pReNative, idxVarValue);
7693
7694 return off;
7695}
7696
7697
7698
7699/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, 0, 0) */
7700#define IEM_MC_POP_GREG_U16(a_iGReg) \
7701 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
7702 (uintptr_t)iemNativeHlpStackFetchU16, pCallEntry->idxInstr)
7703#define IEM_MC_POP_GREG_U32(a_iGReg) \
7704 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
7705 (uintptr_t)iemNativeHlpStackFetchU32, pCallEntry->idxInstr)
7706#define IEM_MC_POP_GREG_U64(a_iGReg) \
7707 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
7708 (uintptr_t)iemNativeHlpStackFetchU64, pCallEntry->idxInstr)
7709
7710#define IEM_MC_FLAT32_POP_GREG_U16(a_iGReg) \
7711 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
7712 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
7713#define IEM_MC_FLAT32_POP_GREG_U32(a_iGReg) \
7714 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
7715 (uintptr_t)iemNativeHlpStackFlatFetchU32, pCallEntry->idxInstr)
7716
7717#define IEM_MC_FLAT64_POP_GREG_U16(a_iGReg) \
7718 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
7719 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
7720#define IEM_MC_FLAT64_POP_GREG_U64(a_iGReg) \
7721 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
7722 (uintptr_t)iemNativeHlpStackFlatFetchU64, pCallEntry->idxInstr)
7723
7724
7725DECL_FORCE_INLINE_THROW(uint32_t)
7726iemNativeEmitStackPopUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
7727 uint8_t idxRegTmp)
7728{
7729 /* Use16BitSp: */
7730#ifdef RT_ARCH_AMD64
7731 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
7732 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
7733 RT_NOREF(idxRegTmp);
7734#else
7735 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
7736 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
7737 /* add tmp, regrsp, #cbMem */
7738 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbMem, false /*f64Bit*/);
7739 /* and tmp, tmp, #0xffff */
7740 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
7741 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
7742 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
7743 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
7744#endif
7745 return off;
7746}
7747
7748
7749DECL_FORCE_INLINE(uint32_t)
7750iemNativeEmitStackPopUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
7751{
7752 /* Use32BitSp: */
7753 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
7754 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
7755 return off;
7756}
7757
7758
7759/** IEM_MC[|_FLAT32|_FLAT64]_POP_GREG_U16/32/64 */
7760DECL_INLINE_THROW(uint32_t)
7761iemNativeEmitStackPopGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGReg,
7762 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
7763{
7764 /*
7765 * Assert sanity.
7766 */
7767 Assert(idxGReg < 16);
7768#ifdef VBOX_STRICT
7769 if (RT_BYTE2(cBitsVarAndFlat) != 0)
7770 {
7771 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
7772 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
7773 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
7774 Assert( pfnFunction
7775 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
7776 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU32
7777 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
7778 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU64
7779 : UINT64_C(0xc000b000a0009000) ));
7780 }
7781 else
7782 Assert( pfnFunction
7783 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU16
7784 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU32
7785 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU64
7786 : UINT64_C(0xc000b000a0009000) ));
7787#endif
7788
7789#ifdef VBOX_STRICT
7790 /*
7791 * Check that the fExec flags we've got make sense.
7792 */
7793 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
7794#endif
7795
7796 /*
7797 * To keep things simple we have to commit any pending writes first as we
7798 * may end up making calls.
7799 */
7800 off = iemNativeRegFlushPendingWrites(pReNative, off);
7801
7802 /*
7803 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
7804 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
7805 * directly as the effective stack pointer.
7806 * (Code structure is very similar to that of PUSH)
7807 */
7808 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
7809 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
7810 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
7811 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
7812 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
7813 /** @todo can do a better job picking the register here. For cbMem >= 4 this
7814 * will be the resulting register value. */
7815 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
7816
7817 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
7818 if (cBitsFlat != 0)
7819 {
7820 Assert(idxRegEffSp == idxRegRsp);
7821 Assert(cBitsFlat == 32 || cBitsFlat == 64);
7822 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
7823 }
7824 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
7825 {
7826 Assert(idxRegEffSp != idxRegRsp);
7827 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
7828 kIemNativeGstRegUse_ReadOnly);
7829#ifdef RT_ARCH_AMD64
7830 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
7831#else
7832 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
7833#endif
7834 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
7835 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
7836 offFixupJumpToUseOtherBitSp = off;
7837 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
7838 {
7839/** @todo can skip idxRegRsp updating when popping ESP. */
7840 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
7841 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
7842 }
7843 else
7844 {
7845 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
7846 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
7847 }
7848 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7849 }
7850 /* SpUpdateEnd: */
7851 uint32_t const offLabelSpUpdateEnd = off;
7852
7853 /*
7854 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
7855 * we're skipping lookup).
7856 */
7857 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
7858 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
7859 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
7860 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
7861 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
7862 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
7863 : UINT32_MAX;
7864
7865 if (!TlbState.fSkip)
7866 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
7867 else
7868 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
7869
7870 /*
7871 * Use16BitSp:
7872 */
7873 if (cBitsFlat == 0)
7874 {
7875#ifdef RT_ARCH_AMD64
7876 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
7877#else
7878 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
7879#endif
7880 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
7881 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
7882 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
7883 else
7884 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
7885 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
7886 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7887 }
7888
7889 /*
7890 * TlbMiss:
7891 *
7892 * Call helper to do the pushing.
7893 */
7894 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
7895
7896#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7897 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7898#else
7899 RT_NOREF(idxInstr);
7900#endif
7901
7902 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
7903 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
7904 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
7905 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
7906
7907
7908 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
7909 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
7910 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
7911
7912 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
7913 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7914
7915 /* Done setting up parameters, make the call. */
7916 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
7917
7918 /* Move the return register content to idxRegMemResult. */
7919 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
7920 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
7921
7922 /* Restore variables and guest shadow registers to volatile registers. */
7923 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
7924 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
7925
7926#ifdef IEMNATIVE_WITH_TLB_LOOKUP
7927 if (!TlbState.fSkip)
7928 {
7929 /* end of TlbMiss - Jump to the done label. */
7930 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
7931 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
7932
7933 /*
7934 * TlbLookup:
7935 */
7936 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
7937 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
7938
7939 /*
7940 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
7941 */
7942 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
7943# ifdef IEM_WITH_TLB_STATISTICS
7944 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
7945 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
7946# endif
7947 switch (cbMem)
7948 {
7949 case 2:
7950 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
7951 break;
7952 case 4:
7953 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
7954 break;
7955 case 8:
7956 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
7957 break;
7958 default:
7959 AssertFailed();
7960 }
7961
7962 TlbState.freeRegsAndReleaseVars(pReNative);
7963
7964 /*
7965 * TlbDone:
7966 *
7967 * Set the new RSP value (FLAT accesses needs to calculate it first) and
7968 * commit the popped register value.
7969 */
7970 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
7971 }
7972#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
7973
7974 if (idxGReg != X86_GREG_xSP)
7975 {
7976 /* Set the register. */
7977 if (cbMem >= sizeof(uint32_t))
7978 {
7979#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
7980 AssertMsg( pReNative->idxCurCall == 0
7981 || IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))),
7982 ("%s - %u\n", g_aGstShadowInfo[idxGReg].pszName,
7983 iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))));
7984#endif
7985 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, IEMNATIVEGSTREG_GPR(idxGReg), off);
7986#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
7987 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(idxGReg);
7988#endif
7989#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
7990 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult,
7991 RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
7992#endif
7993 }
7994 else
7995 {
7996 Assert(cbMem == sizeof(uint16_t));
7997 uint8_t const idxRegDst = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGReg),
7998 kIemNativeGstRegUse_ForUpdate);
7999 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegDst, idxRegMemResult);
8000#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8001 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegDst, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
8002#endif
8003 iemNativeRegFreeTmp(pReNative, idxRegDst);
8004 }
8005
8006 /* Complete RSP calculation for FLAT mode. */
8007 if (idxRegEffSp == idxRegRsp)
8008 {
8009 if (cBitsFlat == 64)
8010 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
8011 else
8012 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
8013 }
8014 }
8015 else
8016 {
8017 /* We're popping RSP, ESP or SP. Only the is a bit extra work, of course. */
8018 if (cbMem == sizeof(uint64_t))
8019 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRsp, idxRegMemResult);
8020 else if (cbMem == sizeof(uint32_t))
8021 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRsp, idxRegMemResult);
8022 else
8023 {
8024 if (idxRegEffSp == idxRegRsp)
8025 {
8026 if (cBitsFlat == 64)
8027 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
8028 else
8029 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
8030 }
8031 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegRsp, idxRegMemResult);
8032 }
8033 }
8034
8035#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8036 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
8037#endif
8038
8039 iemNativeRegFreeTmp(pReNative, idxRegRsp);
8040 if (idxRegEffSp != idxRegRsp)
8041 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
8042 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
8043
8044 return off;
8045}
8046
8047
8048
8049/*********************************************************************************************************************************
8050* Memory mapping (IEM_MEM_MAP_XXX, IEM_MEM_FLAT_MAP_XXX). *
8051*********************************************************************************************************************************/
8052
8053#define IEM_MC_MEM_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8054 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
8055 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMaskAndCtl*/, \
8056 (uintptr_t)iemNativeHlpMemMapDataU8Atomic, pCallEntry->idxInstr)
8057
8058#define IEM_MC_MEM_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8059 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
8060 IEM_ACCESS_DATA_RW, 0 /*fAlignMaskAndCtl*/, \
8061 (uintptr_t)iemNativeHlpMemMapDataU8Rw, pCallEntry->idxInstr)
8062
8063#define IEM_MC_MEM_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8064 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
8065 IEM_ACCESS_DATA_W, 0 /*fAlignMaskAndCtl*/, \
8066 (uintptr_t)iemNativeHlpMemMapDataU8Wo, pCallEntry->idxInstr) \
8067
8068#define IEM_MC_MEM_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8069 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
8070 IEM_ACCESS_DATA_R, 0 /*fAlignMaskAndCtl*/, \
8071 (uintptr_t)iemNativeHlpMemMapDataU8Ro, pCallEntry->idxInstr)
8072
8073
8074#define IEM_MC_MEM_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8075 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
8076 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8077 (uintptr_t)iemNativeHlpMemMapDataU16Atomic, pCallEntry->idxInstr)
8078
8079#define IEM_MC_MEM_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8080 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
8081 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8082 (uintptr_t)iemNativeHlpMemMapDataU16Rw, pCallEntry->idxInstr)
8083
8084#define IEM_MC_MEM_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8085 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
8086 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8087 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
8088
8089#define IEM_MC_MEM_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8090 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
8091 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8092 (uintptr_t)iemNativeHlpMemMapDataU16Ro, pCallEntry->idxInstr)
8093
8094#define IEM_MC_MEM_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8095 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int16_t), \
8096 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8097 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
8098
8099
8100#define IEM_MC_MEM_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8101 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
8102 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8103 (uintptr_t)iemNativeHlpMemMapDataU32Atomic, pCallEntry->idxInstr)
8104
8105#define IEM_MC_MEM_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8106 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
8107 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8108 (uintptr_t)iemNativeHlpMemMapDataU32Rw, pCallEntry->idxInstr)
8109
8110#define IEM_MC_MEM_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8111 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
8112 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8113 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
8114
8115#define IEM_MC_MEM_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8116 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
8117 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8118 (uintptr_t)iemNativeHlpMemMapDataU32Ro, pCallEntry->idxInstr)
8119
8120#define IEM_MC_MEM_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8121 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int32_t), \
8122 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8123 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
8124
8125
8126#define IEM_MC_MEM_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8127 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
8128 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8129 (uintptr_t)iemNativeHlpMemMapDataU64Atomic, pCallEntry->idxInstr)
8130
8131#define IEM_MC_MEM_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8132 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
8133 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8134 (uintptr_t)iemNativeHlpMemMapDataU64Rw, pCallEntry->idxInstr)
8135#define IEM_MC_MEM_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8136 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
8137 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8138 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
8139
8140#define IEM_MC_MEM_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8141 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
8142 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8143 (uintptr_t)iemNativeHlpMemMapDataU64Ro, pCallEntry->idxInstr)
8144
8145#define IEM_MC_MEM_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8146 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int64_t), \
8147 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8148 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
8149
8150
8151#define IEM_MC_MEM_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8152 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
8153 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8154 (uintptr_t)iemNativeHlpMemMapDataR80Wo, pCallEntry->idxInstr) \
8155
8156#define IEM_MC_MEM_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8157 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
8158 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, /** @todo check BCD align */ \
8159 (uintptr_t)iemNativeHlpMemMapDataD80Wo, pCallEntry->idxInstr) \
8160
8161
8162#define IEM_MC_MEM_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8163 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
8164 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8165 (uintptr_t)iemNativeHlpMemMapDataU128Atomic, pCallEntry->idxInstr)
8166
8167#define IEM_MC_MEM_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8168 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
8169 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8170 (uintptr_t)iemNativeHlpMemMapDataU128Rw, pCallEntry->idxInstr)
8171
8172#define IEM_MC_MEM_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8173 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
8174 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8175 (uintptr_t)iemNativeHlpMemMapDataU128Wo, pCallEntry->idxInstr) \
8176
8177#define IEM_MC_MEM_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8178 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
8179 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8180 (uintptr_t)iemNativeHlpMemMapDataU128Ro, pCallEntry->idxInstr)
8181
8182
8183
8184#define IEM_MC_MEM_FLAT_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
8185 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
8186 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMaskAndCtl*/, \
8187 (uintptr_t)iemNativeHlpMemFlatMapDataU8Atomic, pCallEntry->idxInstr)
8188
8189#define IEM_MC_MEM_FLAT_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
8190 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
8191 IEM_ACCESS_DATA_RW, 0 /*fAlignMaskAndCtl*/, \
8192 (uintptr_t)iemNativeHlpMemFlatMapDataU8Rw, pCallEntry->idxInstr)
8193
8194#define IEM_MC_MEM_FLAT_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
8195 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
8196 IEM_ACCESS_DATA_W, 0 /*fAlignMaskAndCtl*/, \
8197 (uintptr_t)iemNativeHlpMemFlatMapDataU8Wo, pCallEntry->idxInstr) \
8198
8199#define IEM_MC_MEM_FLAT_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
8200 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
8201 IEM_ACCESS_DATA_R, 0 /*fAlignMaskAndCtl*/, \
8202 (uintptr_t)iemNativeHlpMemFlatMapDataU8Ro, pCallEntry->idxInstr)
8203
8204
8205#define IEM_MC_MEM_FLAT_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
8206 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
8207 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8208 (uintptr_t)iemNativeHlpMemFlatMapDataU16Atomic, pCallEntry->idxInstr)
8209
8210#define IEM_MC_MEM_FLAT_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
8211 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
8212 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8213 (uintptr_t)iemNativeHlpMemFlatMapDataU16Rw, pCallEntry->idxInstr)
8214
8215#define IEM_MC_MEM_FLAT_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
8216 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
8217 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8218 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
8219
8220#define IEM_MC_MEM_FLAT_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
8221 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
8222 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8223 (uintptr_t)iemNativeHlpMemFlatMapDataU16Ro, pCallEntry->idxInstr)
8224
8225#define IEM_MC_MEM_FLAT_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_GCPtrMem) \
8226 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int16_t), \
8227 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8228 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
8229
8230
8231#define IEM_MC_MEM_FLAT_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
8232 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
8233 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8234 (uintptr_t)iemNativeHlpMemFlatMapDataU32Atomic, pCallEntry->idxInstr)
8235
8236#define IEM_MC_MEM_FLAT_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
8237 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
8238 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8239 (uintptr_t)iemNativeHlpMemFlatMapDataU32Rw, pCallEntry->idxInstr)
8240
8241#define IEM_MC_MEM_FLAT_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
8242 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
8243 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8244 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
8245
8246#define IEM_MC_MEM_FLAT_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
8247 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
8248 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8249 (uintptr_t)iemNativeHlpMemFlatMapDataU32Ro, pCallEntry->idxInstr)
8250
8251#define IEM_MC_MEM_FLAT_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_GCPtrMem) \
8252 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int32_t), \
8253 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8254 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
8255
8256
8257#define IEM_MC_MEM_FLAT_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
8258 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
8259 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8260 (uintptr_t)iemNativeHlpMemFlatMapDataU64Atomic, pCallEntry->idxInstr)
8261
8262#define IEM_MC_MEM_FLAT_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
8263 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
8264 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8265 (uintptr_t)iemNativeHlpMemFlatMapDataU64Rw, pCallEntry->idxInstr)
8266
8267#define IEM_MC_MEM_FLAT_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
8268 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
8269 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8270 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
8271
8272#define IEM_MC_MEM_FLAT_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
8273 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
8274 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8275 (uintptr_t)iemNativeHlpMemFlatMapDataU64Ro, pCallEntry->idxInstr)
8276
8277#define IEM_MC_MEM_FLAT_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_GCPtrMem) \
8278 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int64_t), \
8279 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8280 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
8281
8282
8283#define IEM_MC_MEM_FLAT_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_GCPtrMem) \
8284 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
8285 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8286 (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo, pCallEntry->idxInstr) \
8287
8288#define IEM_MC_MEM_FLAT_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_GCPtrMem) \
8289 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
8290 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, /** @todo check BCD align */ \
8291 (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo, pCallEntry->idxInstr) \
8292
8293
8294#define IEM_MC_MEM_FLAT_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
8295 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
8296 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8297 (uintptr_t)iemNativeHlpMemFlatMapDataU128Atomic, pCallEntry->idxInstr)
8298
8299#define IEM_MC_MEM_FLAT_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
8300 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
8301 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8302 (uintptr_t)iemNativeHlpMemFlatMapDataU128Rw, pCallEntry->idxInstr)
8303
8304#define IEM_MC_MEM_FLAT_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
8305 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
8306 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8307 (uintptr_t)iemNativeHlpMemFlatMapDataU128Wo, pCallEntry->idxInstr) \
8308
8309#define IEM_MC_MEM_FLAT_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
8310 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
8311 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8312 (uintptr_t)iemNativeHlpMemFlatMapDataU128Ro, pCallEntry->idxInstr)
8313
8314
8315DECL_INLINE_THROW(uint32_t)
8316iemNativeEmitMemMapCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarMem, uint8_t idxVarUnmapInfo,
8317 uint8_t iSegReg, uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAccess, uint32_t fAlignMaskAndCtl,
8318 uintptr_t pfnFunction, uint8_t idxInstr)
8319{
8320 /*
8321 * Assert sanity.
8322 */
8323 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarMem);
8324 PIEMNATIVEVAR const pVarMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarMem)];
8325 AssertStmt( pVarMem->enmKind == kIemNativeVarKind_Invalid
8326 && pVarMem->cbVar == sizeof(void *),
8327 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8328
8329 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
8330 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
8331 AssertStmt( pVarUnmapInfo->enmKind == kIemNativeVarKind_Invalid
8332 && pVarUnmapInfo->cbVar == sizeof(uint8_t),
8333 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8334
8335 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
8336 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
8337 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
8338 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
8339 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8340
8341 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
8342
8343 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
8344
8345#ifdef VBOX_STRICT
8346# define IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) \
8347 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ) \
8348 ? (uintptr_t)RT_CONCAT(a_fnBase,Rw) \
8349 : ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == IEM_ACCESS_TYPE_READ \
8350 ? (uintptr_t)RT_CONCAT(a_fnBase,Ro) : (uintptr_t)RT_CONCAT(a_fnBase,Wo) )
8351# define IEM_MAP_HLP_FN(a_fAccess, a_fnBase) \
8352 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ | IEM_ACCESS_ATOMIC) \
8353 ? (uintptr_t)RT_CONCAT(a_fnBase,Atomic) \
8354 : IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) )
8355
8356 if (iSegReg == UINT8_MAX)
8357 {
8358 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
8359 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
8360 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
8361 switch (cbMem)
8362 {
8363 case 1:
8364 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU8));
8365 Assert(!fAlignMaskAndCtl);
8366 break;
8367 case 2:
8368 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU16));
8369 Assert(fAlignMaskAndCtl < 2);
8370 break;
8371 case 4:
8372 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU32));
8373 Assert(fAlignMaskAndCtl < 4);
8374 break;
8375 case 8:
8376 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU64));
8377 Assert(fAlignMaskAndCtl < 8);
8378 break;
8379 case 10:
8380 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo
8381 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo);
8382 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
8383 Assert(fAlignMaskAndCtl < 8);
8384 break;
8385 case 16:
8386 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU128));
8387 Assert(fAlignMaskAndCtl < 16);
8388 break;
8389# if 0
8390 case 32:
8391 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU256));
8392 Assert(fAlignMaskAndCtl < 32);
8393 break;
8394 case 64:
8395 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU512));
8396 Assert(fAlignMaskAndCtl < 64);
8397 break;
8398# endif
8399 default: AssertFailed(); break;
8400 }
8401 }
8402 else
8403 {
8404 Assert(iSegReg < 6);
8405 switch (cbMem)
8406 {
8407 case 1:
8408 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU8));
8409 Assert(!fAlignMaskAndCtl);
8410 break;
8411 case 2:
8412 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU16));
8413 Assert(fAlignMaskAndCtl < 2);
8414 break;
8415 case 4:
8416 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU32));
8417 Assert(fAlignMaskAndCtl < 4);
8418 break;
8419 case 8:
8420 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU64));
8421 Assert(fAlignMaskAndCtl < 8);
8422 break;
8423 case 10:
8424 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemMapDataR80Wo
8425 || pfnFunction == (uintptr_t)iemNativeHlpMemMapDataD80Wo);
8426 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
8427 Assert(fAlignMaskAndCtl < 8);
8428 break;
8429 case 16:
8430 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU128));
8431 Assert(fAlignMaskAndCtl < 16);
8432 break;
8433# if 0
8434 case 32:
8435 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU256));
8436 Assert(fAlignMaskAndCtl < 32);
8437 break;
8438 case 64:
8439 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU512));
8440 Assert(fAlignMaskAndCtl < 64);
8441 break;
8442# endif
8443 default: AssertFailed(); break;
8444 }
8445 }
8446# undef IEM_MAP_HLP_FN
8447# undef IEM_MAP_HLP_FN_NO_AT
8448#endif
8449
8450#ifdef VBOX_STRICT
8451 /*
8452 * Check that the fExec flags we've got make sense.
8453 */
8454 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
8455#endif
8456
8457 /*
8458 * To keep things simple we have to commit any pending writes first as we
8459 * may end up making calls.
8460 */
8461 off = iemNativeRegFlushPendingWrites(pReNative, off);
8462
8463#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
8464 /*
8465 * Move/spill/flush stuff out of call-volatile registers.
8466 * This is the easy way out. We could contain this to the tlb-miss branch
8467 * by saving and restoring active stuff here.
8468 */
8469 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
8470 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
8471#endif
8472
8473 /* The bUnmapInfo variable will get a register in the tlb-hit code path,
8474 while the tlb-miss codepath will temporarily put it on the stack.
8475 Set the the type to stack here so we don't need to do it twice below. */
8476 iemNativeVarSetKindToStack(pReNative, idxVarUnmapInfo);
8477 uint8_t const idxRegUnmapInfo = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off);
8478 /** @todo use a tmp register from TlbState, since they'll be free after tlb
8479 * lookup is done. */
8480
8481 /*
8482 * Define labels and allocate the result register (trying for the return
8483 * register if we can).
8484 */
8485 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
8486 uint8_t const idxRegMemResult = !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
8487 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarMem, IEMNATIVE_CALL_RET_GREG, &off)
8488 : iemNativeVarRegisterAcquire(pReNative, idxVarMem, &off);
8489 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem);
8490 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
8491 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
8492 : UINT32_MAX;
8493//off=iemNativeEmitBrk(pReNative, off, 0);
8494 /*
8495 * Jump to the TLB lookup code.
8496 */
8497 if (!TlbState.fSkip)
8498 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
8499
8500 /*
8501 * TlbMiss:
8502 *
8503 * Call helper to do the fetching.
8504 * We flush all guest register shadow copies here.
8505 */
8506 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
8507
8508#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8509 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8510#else
8511 RT_NOREF(idxInstr);
8512#endif
8513
8514#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
8515 /* Save variables in volatile registers. */
8516 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave() | RT_BIT_32(idxRegMemResult) | RT_BIT_32(idxRegUnmapInfo);
8517 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
8518#endif
8519
8520 /* IEMNATIVE_CALL_ARG2_GREG = GCPtrMem - load first as it is from a variable. */
8521 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarGCPtrMem, 0 /*cbAppend*/,
8522#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
8523 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
8524#else
8525 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
8526#endif
8527
8528 /* IEMNATIVE_CALL_ARG3_GREG = iSegReg */
8529 if (iSegReg != UINT8_MAX)
8530 {
8531 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
8532 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, iSegReg);
8533 }
8534
8535 /* IEMNATIVE_CALL_ARG1_GREG = &idxVarUnmapInfo; stackslot address, load any register with result after the call. */
8536 int32_t const offBpDispVarUnmapInfo = iemNativeStackCalcBpDisp(iemNativeVarGetStackSlot(pReNative, idxVarUnmapInfo));
8537 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, offBpDispVarUnmapInfo);
8538
8539 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
8540 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
8541
8542 /* Done setting up parameters, make the call. */
8543 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
8544
8545 /*
8546 * Put the output in the right registers.
8547 */
8548 Assert(idxRegMemResult == pVarMem->idxReg);
8549 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
8550 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
8551
8552#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
8553 /* Restore variables and guest shadow registers to volatile registers. */
8554 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
8555 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
8556#endif
8557
8558 Assert(pVarUnmapInfo->idxReg == idxRegUnmapInfo);
8559 off = iemNativeEmitLoadGprByBpU8(pReNative, off, idxRegUnmapInfo, offBpDispVarUnmapInfo);
8560
8561#ifdef IEMNATIVE_WITH_TLB_LOOKUP
8562 if (!TlbState.fSkip)
8563 {
8564 /* end of tlbsmiss - Jump to the done label. */
8565 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
8566 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
8567
8568 /*
8569 * TlbLookup:
8570 */
8571 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMaskAndCtl, fAccess,
8572 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
8573# ifdef IEM_WITH_TLB_STATISTICS
8574 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, TlbState.idxReg1, TlbState.idxReg2,
8575 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForMapped));
8576# endif
8577
8578 /* [idxVarUnmapInfo] = 0; */
8579 off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegUnmapInfo, 0);
8580
8581 /*
8582 * TlbDone:
8583 */
8584 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
8585
8586 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
8587
8588# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
8589 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
8590 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
8591# endif
8592 }
8593#else
8594 RT_NOREF(fAccess, fAlignMaskAndCtl, idxLabelTlbMiss);
8595#endif
8596
8597 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
8598 iemNativeVarRegisterRelease(pReNative, idxVarMem);
8599
8600 return off;
8601}
8602
8603
8604#define IEM_MC_MEM_COMMIT_AND_UNMAP_ATOMIC(a_bMapInfo) \
8605 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_ATOMIC, \
8606 (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic, pCallEntry->idxInstr)
8607
8608#define IEM_MC_MEM_COMMIT_AND_UNMAP_RW(a_bMapInfo) \
8609 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_RW, \
8610 (uintptr_t)iemNativeHlpMemCommitAndUnmapRw, pCallEntry->idxInstr)
8611
8612#define IEM_MC_MEM_COMMIT_AND_UNMAP_WO(a_bMapInfo) \
8613 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_W, \
8614 (uintptr_t)iemNativeHlpMemCommitAndUnmapWo, pCallEntry->idxInstr)
8615
8616#define IEM_MC_MEM_COMMIT_AND_UNMAP_RO(a_bMapInfo) \
8617 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_R, \
8618 (uintptr_t)iemNativeHlpMemCommitAndUnmapRo, pCallEntry->idxInstr)
8619
8620DECL_INLINE_THROW(uint32_t)
8621iemNativeEmitMemCommitAndUnmap(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarUnmapInfo,
8622 uint32_t fAccess, uintptr_t pfnFunction, uint8_t idxInstr)
8623{
8624 /*
8625 * Assert sanity.
8626 */
8627 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
8628#if defined(VBOX_STRICT) || defined(RT_ARCH_AMD64)
8629 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
8630#endif
8631 Assert(pVarUnmapInfo->enmKind == kIemNativeVarKind_Stack);
8632 Assert( pVarUnmapInfo->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
8633 || pVarUnmapInfo->idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS); /* must be initialized */
8634#ifdef VBOX_STRICT
8635 switch (fAccess & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC))
8636 {
8637 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_ATOMIC:
8638 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic); break;
8639 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE:
8640 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRw); break;
8641 case IEM_ACCESS_TYPE_WRITE:
8642 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapWo); break;
8643 case IEM_ACCESS_TYPE_READ:
8644 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRo); break;
8645 default: AssertFailed();
8646 }
8647#else
8648 RT_NOREF(fAccess);
8649#endif
8650
8651 /*
8652 * To keep things simple we have to commit any pending writes first as we
8653 * may end up making calls (there shouldn't be any at this point, so this
8654 * is just for consistency).
8655 */
8656 /** @todo we could postpone this till we make the call and reload the
8657 * registers after returning from the call. Not sure if that's sensible or
8658 * not, though. */
8659 off = iemNativeRegFlushPendingWrites(pReNative, off);
8660
8661 /*
8662 * Move/spill/flush stuff out of call-volatile registers.
8663 *
8664 * We exclude any register holding the bUnmapInfo variable, as we'll be
8665 * checking it after returning from the call and will free it afterwards.
8666 */
8667 /** @todo save+restore active registers and maybe guest shadows in miss
8668 * scenario. */
8669 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */,
8670 RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)));
8671
8672 /*
8673 * If idxVarUnmapInfo is zero, we can skip all this. Otherwise we'll have
8674 * to call the unmap helper function.
8675 *
8676 * The likelyhood of it being zero is higher than for the TLB hit when doing
8677 * the mapping, as a TLB miss for an well aligned and unproblematic memory
8678 * access should also end up with a mapping that won't need special unmapping.
8679 */
8680 /** @todo Go over iemMemMapJmp and implement the no-unmap-needed case! That
8681 * should speed up things for the pure interpreter as well when TLBs
8682 * are enabled. */
8683#ifdef RT_ARCH_AMD64
8684 if (pVarUnmapInfo->idxReg == UINT8_MAX)
8685 {
8686 /* test byte [rbp - xxx], 0ffh */
8687 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
8688 pbCodeBuf[off++] = 0xf6;
8689 uint8_t const idxStackSlot = pVarUnmapInfo->idxStackSlot;
8690 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, 0, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
8691 pbCodeBuf[off++] = 0xff;
8692 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8693 }
8694 else
8695#endif
8696 {
8697 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off,
8698 true /*fInitialized*/, IEMNATIVE_CALL_ARG1_GREG /*idxRegPref*/);
8699 off = iemNativeEmitTestAnyBitsInGpr8(pReNative, off, idxVarReg, 0xff);
8700 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
8701 }
8702 uint32_t const offJmpFixup = off;
8703 off = iemNativeEmitJzToFixed(pReNative, off, off /* ASSUME jz rel8 suffices*/);
8704
8705 /*
8706 * Call the unmap helper function.
8707 */
8708#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING /** @todo This should be unnecessary, the mapping call will already have set it! */
8709 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8710#else
8711 RT_NOREF(idxInstr);
8712#endif
8713
8714 /* IEMNATIVE_CALL_ARG1_GREG = idxVarUnmapInfo (first!) */
8715 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo,
8716 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
8717
8718 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
8719 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
8720
8721 /* Done setting up parameters, make the call. */
8722 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
8723
8724 /* The bUnmapInfo variable is implictly free by these MCs. */
8725 iemNativeVarFreeLocal(pReNative, idxVarUnmapInfo);
8726
8727 /*
8728 * Done, just fixup the jump for the non-call case.
8729 */
8730 iemNativeFixupFixedJump(pReNative, offJmpFixup, off);
8731
8732 return off;
8733}
8734
8735
8736
8737/*********************************************************************************************************************************
8738* State and Exceptions *
8739*********************************************************************************************************************************/
8740
8741#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
8742#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
8743
8744#define IEM_MC_PREPARE_SSE_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
8745#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
8746#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
8747
8748#define IEM_MC_PREPARE_AVX_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
8749#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
8750#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
8751
8752
8753DECL_INLINE_THROW(uint32_t) iemNativeEmitPrepareFpuForUse(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fForChange)
8754{
8755 /** @todo this needs a lot more work later. */
8756 RT_NOREF(pReNative, fForChange);
8757 return off;
8758}
8759
8760
8761
8762/*********************************************************************************************************************************
8763* Emitters for FPU related operations. *
8764*********************************************************************************************************************************/
8765
8766#define IEM_MC_FETCH_FCW(a_u16Fcw) \
8767 off = iemNativeEmitFetchFpuFcw(pReNative, off, a_u16Fcw)
8768
8769/** Emits code for IEM_MC_FETCH_FCW. */
8770DECL_INLINE_THROW(uint32_t)
8771iemNativeEmitFetchFpuFcw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
8772{
8773 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8774 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
8775
8776 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8777
8778 /* Allocate a temporary FCW register. */
8779 /** @todo eliminate extra register */
8780 uint8_t const idxFcwReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFcw,
8781 kIemNativeGstRegUse_ReadOnly);
8782
8783 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFcwReg);
8784
8785 /* Free but don't flush the FCW register. */
8786 iemNativeRegFreeTmp(pReNative, idxFcwReg);
8787 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8788
8789 return off;
8790}
8791
8792
8793#define IEM_MC_FETCH_FSW(a_u16Fsw) \
8794 off = iemNativeEmitFetchFpuFsw(pReNative, off, a_u16Fsw)
8795
8796/** Emits code for IEM_MC_FETCH_FSW. */
8797DECL_INLINE_THROW(uint32_t)
8798iemNativeEmitFetchFpuFsw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
8799{
8800 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8801 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
8802
8803 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, false /*fInitialized*/);
8804 /* Allocate a temporary FSW register. */
8805 /** @todo eliminate extra register */
8806 uint8_t const idxFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw,
8807 kIemNativeGstRegUse_ReadOnly);
8808
8809 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFswReg);
8810
8811 /* Free but don't flush the FSW register. */
8812 iemNativeRegFreeTmp(pReNative, idxFswReg);
8813 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8814
8815 return off;
8816}
8817
8818
8819
8820#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8821
8822
8823/*********************************************************************************************************************************
8824* Emitters for SSE/AVX specific operations. *
8825*********************************************************************************************************************************/
8826
8827#define IEM_MC_COPY_XREG_U128(a_iXRegDst, a_iXRegSrc) \
8828 off = iemNativeEmitSimdCopyXregU128(pReNative, off, a_iXRegDst, a_iXRegSrc)
8829
8830/** Emits code for IEM_MC_COPY_XREG_U128. */
8831DECL_INLINE_THROW(uint32_t)
8832iemNativeEmitSimdCopyXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXRegDst, uint8_t iXRegSrc)
8833{
8834 /* This is a nop if the source and destination register are the same. */
8835 if (iXRegDst != iXRegSrc)
8836 {
8837 /* Allocate destination and source register. */
8838 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegDst),
8839 kIemNativeGstSimdRegLdStSz_Low128,
8840 kIemNativeGstRegUse_ForFullWrite);
8841 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegSrc),
8842 kIemNativeGstSimdRegLdStSz_Low128,
8843 kIemNativeGstRegUse_ReadOnly);
8844
8845 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
8846
8847 /* Free but don't flush the source and destination register. */
8848 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8849 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8850 }
8851
8852 return off;
8853}
8854
8855
8856#define IEM_MC_FETCH_XREG_U128(a_u128Value, a_iXReg) \
8857 off = iemNativeEmitSimdFetchXregU128(pReNative, off, a_u128Value, a_iXReg)
8858
8859/** Emits code for IEM_MC_FETCH_XREG_U128. */
8860DECL_INLINE_THROW(uint32_t)
8861iemNativeEmitSimdFetchXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg)
8862{
8863 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8864 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
8865
8866 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8867 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
8868
8869 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
8870
8871 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxVarReg, idxSimdRegSrc);
8872
8873 /* Free but don't flush the source register. */
8874 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8875 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
8876
8877 return off;
8878}
8879
8880
8881#define IEM_MC_FETCH_XREG_U64(a_u64Value, a_iXReg, a_iQWord) \
8882 off = iemNativeEmitSimdFetchXregU64(pReNative, off, a_u64Value, a_iXReg, a_iQWord)
8883
8884#define IEM_MC_FETCH_XREG_R64(a_r64Value, a_iXReg, a_iQWord) \
8885 off = iemNativeEmitSimdFetchXregU64(pReNative, off, a_r64Value, a_iXReg, a_iQWord)
8886
8887/** Emits code for IEM_MC_FETCH_XREG_U64. */
8888DECL_INLINE_THROW(uint32_t)
8889iemNativeEmitSimdFetchXregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iQWord)
8890{
8891 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8892 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
8893
8894 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8895 kIemNativeGstSimdRegLdStSz_Low128,
8896 kIemNativeGstRegUse_ReadOnly);
8897
8898 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8899 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8900
8901 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
8902
8903 /* Free but don't flush the source register. */
8904 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8905 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8906
8907 return off;
8908}
8909
8910
8911#define IEM_MC_FETCH_XREG_U32(a_u32Value, a_iXReg, a_iDWord) \
8912 off = iemNativeEmitSimdFetchXregU32(pReNative, off, a_u32Value, a_iXReg, a_iDWord)
8913
8914#define IEM_MC_FETCH_XREG_R32(a_r32Value, a_iXReg, a_iDWord) \
8915 off = iemNativeEmitSimdFetchXregU32(pReNative, off, a_r32Value, a_iXReg, a_iDWord)
8916
8917/** Emits code for IEM_MC_FETCH_XREG_U32/IEM_MC_FETCH_XREG_R32. */
8918DECL_INLINE_THROW(uint32_t)
8919iemNativeEmitSimdFetchXregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iDWord)
8920{
8921 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8922 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
8923
8924 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8925 kIemNativeGstSimdRegLdStSz_Low128,
8926 kIemNativeGstRegUse_ReadOnly);
8927
8928 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8929 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8930
8931 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
8932
8933 /* Free but don't flush the source register. */
8934 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8935 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8936
8937 return off;
8938}
8939
8940
8941#define IEM_MC_FETCH_XREG_U16(a_u64Value, a_iXReg, a_iWord) \
8942 off = iemNativeEmitSimdFetchXregU16(pReNative, off, a_u64Value, a_iXReg, a_iWord)
8943
8944/** Emits code for IEM_MC_FETCH_XREG_U16. */
8945DECL_INLINE_THROW(uint32_t)
8946iemNativeEmitSimdFetchXregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iWord)
8947{
8948 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8949 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
8950
8951 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8952 kIemNativeGstSimdRegLdStSz_Low128,
8953 kIemNativeGstRegUse_ReadOnly);
8954
8955 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8956 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8957
8958 off = iemNativeEmitSimdLoadGprFromVecRegU16(pReNative, off, idxVarReg, idxSimdRegSrc, iWord);
8959
8960 /* Free but don't flush the source register. */
8961 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8962 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8963
8964 return off;
8965}
8966
8967
8968#define IEM_MC_FETCH_XREG_U8(a_u64Value, a_iXReg, a_iByte) \
8969 off = iemNativeEmitSimdFetchXregU8(pReNative, off, a_u64Value, a_iXReg, a_iByte)
8970
8971/** Emits code for IEM_MC_FETCH_XREG_U8. */
8972DECL_INLINE_THROW(uint32_t)
8973iemNativeEmitSimdFetchXregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iByte)
8974{
8975 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8976 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint8_t));
8977
8978 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8979 kIemNativeGstSimdRegLdStSz_Low128,
8980 kIemNativeGstRegUse_ReadOnly);
8981
8982 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8983 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8984
8985 off = iemNativeEmitSimdLoadGprFromVecRegU8(pReNative, off, idxVarReg, idxSimdRegSrc, iByte);
8986
8987 /* Free but don't flush the source register. */
8988 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8989 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8990
8991 return off;
8992}
8993
8994
8995#define IEM_MC_STORE_XREG_U128(a_iXReg, a_u128Value) \
8996 off = iemNativeEmitSimdStoreXregU128(pReNative, off, a_iXReg, a_u128Value)
8997
8998AssertCompileSize(X86XMMREG, sizeof(RTUINT128U));
8999#define IEM_MC_STORE_XREG_XMM(a_iXReg, a_XmmValue) \
9000 off = iemNativeEmitSimdStoreXregU128(pReNative, off, a_iXReg, a_XmmValue)
9001
9002
9003/** Emits code for IEM_MC_STORE_XREG_U128/IEM_MC_STORE_XREG_XMM. */
9004DECL_INLINE_THROW(uint32_t)
9005iemNativeEmitSimdStoreXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
9006{
9007 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9008 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
9009
9010 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9011 kIemNativeGstSimdRegLdStSz_Low128,
9012 kIemNativeGstRegUse_ForFullWrite);
9013 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitialized*/);
9014
9015 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
9016
9017 /* Free but don't flush the source register. */
9018 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9019 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9020
9021 return off;
9022}
9023
9024
9025#define IEM_MC_STORE_XREG_U64(a_iXReg, a_iQWord, a_u64Value) \
9026 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u64Value, sizeof(uint64_t), a_iQWord)
9027
9028#define IEM_MC_STORE_XREG_U32(a_iXReg, a_iDWord, a_u32Value) \
9029 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint32_t), a_iDWord)
9030
9031#define IEM_MC_STORE_XREG_U16(a_iXReg, a_iWord, a_u32Value) \
9032 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint16_t), a_iWord)
9033
9034#define IEM_MC_STORE_XREG_U8(a_iXReg, a_iByte, a_u32Value) \
9035 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint8_t), a_iByte)
9036
9037#define IEM_MC_STORE_XREG_R32(a_iXReg, a_r32Value) \
9038 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_r32Value, sizeof(RTFLOAT32U), 0 /*iElem*/)
9039
9040#define IEM_MC_STORE_XREG_R64(a_iXReg, a_r64Value) \
9041 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_r64Value, sizeof(RTFLOAT64U), 0 /*iElem*/)
9042
9043/** Emits code for IEM_MC_STORE_XREG_U64/IEM_MC_STORE_XREG_U32/IEM_MC_STORE_XREG_U16/IEM_MC_STORE_XREG_U8. */
9044DECL_INLINE_THROW(uint32_t)
9045iemNativeEmitSimdStoreXregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar,
9046 uint8_t cbLocal, uint8_t iElem)
9047{
9048 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9049 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbLocal);
9050
9051#ifdef VBOX_STRICT
9052 switch (cbLocal)
9053 {
9054 case sizeof(uint64_t): Assert(iElem < 2); break;
9055 case sizeof(uint32_t): Assert(iElem < 4); break;
9056 case sizeof(uint16_t): Assert(iElem < 8); break;
9057 case sizeof(uint8_t): Assert(iElem < 16); break;
9058 default: AssertFailed();
9059 }
9060#endif
9061
9062 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9063 kIemNativeGstSimdRegLdStSz_Low128,
9064 kIemNativeGstRegUse_ForUpdate);
9065 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
9066
9067 switch (cbLocal)
9068 {
9069 case sizeof(uint64_t): off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
9070 case sizeof(uint32_t): off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
9071 case sizeof(uint16_t): off = iemNativeEmitSimdStoreGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
9072 case sizeof(uint8_t): off = iemNativeEmitSimdStoreGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
9073 default: AssertFailed();
9074 }
9075
9076 /* Free but don't flush the source register. */
9077 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9078 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9079
9080 return off;
9081}
9082
9083
9084#define IEM_MC_STORE_XREG_U64_ZX_U128(a_iXReg, a_u64Value) \
9085 off = iemNativeEmitSimdStoreXregU64ZxU128(pReNative, off, a_iXReg, a_u64Value)
9086
9087/** Emits code for IEM_MC_STORE_XREG_U64_ZX_U128. */
9088DECL_INLINE_THROW(uint32_t)
9089iemNativeEmitSimdStoreXregU64ZxU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
9090{
9091 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9092 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
9093
9094 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9095 kIemNativeGstSimdRegLdStSz_Low128,
9096 kIemNativeGstRegUse_ForUpdate);
9097 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
9098
9099 /* Zero the vector register first, then store the 64-bit value to the lower 64-bit. */
9100 off = iemNativeEmitSimdZeroVecRegLowU128(pReNative, off, idxSimdRegDst);
9101 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0);
9102
9103 /* Free but don't flush the source register. */
9104 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9105 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9106
9107 return off;
9108}
9109
9110
9111#define IEM_MC_STORE_XREG_U32_ZX_U128(a_iXReg, a_u32Value) \
9112 off = iemNativeEmitSimdStoreXregU32ZxU128(pReNative, off, a_iXReg, a_u32Value)
9113
9114/** Emits code for IEM_MC_STORE_XREG_U32_ZX_U128. */
9115DECL_INLINE_THROW(uint32_t)
9116iemNativeEmitSimdStoreXregU32ZxU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
9117{
9118 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9119 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
9120
9121 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9122 kIemNativeGstSimdRegLdStSz_Low128,
9123 kIemNativeGstRegUse_ForUpdate);
9124 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
9125
9126 /* Zero the vector register first, then store the 32-bit value to the lowest 32-bit element. */
9127 off = iemNativeEmitSimdZeroVecRegLowU128(pReNative, off, idxSimdRegDst);
9128 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, 0);
9129
9130 /* Free but don't flush the source register. */
9131 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9132 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9133
9134 return off;
9135}
9136
9137
9138#define IEM_MC_STORE_XREG_U32_U128(a_iXReg, a_iDwDst, a_u128Value, a_iDwSrc) \
9139 off = iemNativeEmitSimdStoreXregU32U128(pReNative, off, a_iXReg, a_iDwDst, a_u128Value, a_iDwSrc)
9140
9141/** Emits code for IEM_MC_STORE_XREG_U32_U128. */
9142DECL_INLINE_THROW(uint32_t)
9143iemNativeEmitSimdStoreXregU32U128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t iDwDst,
9144 uint8_t idxSrcVar, uint8_t iDwSrc)
9145{
9146 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9147 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
9148
9149 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9150 kIemNativeGstSimdRegLdStSz_Low128,
9151 kIemNativeGstRegUse_ForUpdate);
9152 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitialized*/);
9153
9154 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, idxVarReg, iDwSrc);
9155 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, IEMNATIVE_REG_FIXED_TMP0, iDwDst);
9156
9157 /* Free but don't flush the destination register. */
9158 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9159 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9160
9161 return off;
9162}
9163
9164
9165#define IEM_MC_COPY_YREG_U128_ZX_VLMAX(a_iYRegDst, a_iYRegSrc) \
9166 off = iemNativeEmitSimdCopyYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrc)
9167
9168/** Emits code for IEM_MC_COPY_YREG_U128_ZX_VLMAX. */
9169DECL_INLINE_THROW(uint32_t)
9170iemNativeEmitSimdCopyYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrc)
9171{
9172 /*
9173 * The iYRegSrc == iYRegDst case needs to be treated differently here, because
9174 * if iYRegDst gets allocated first for the full write it won't load the
9175 * actual value from CPUMCTX. When allocating iYRegSrc afterwards it will get
9176 * duplicated from the already allocated host register for iYRegDst containing
9177 * garbage. This will be catched by the guest register value checking in debug
9178 * builds.
9179 */
9180 if (iYRegDst != iYRegSrc)
9181 {
9182 /* Allocate destination and source register. */
9183 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9184 kIemNativeGstSimdRegLdStSz_256,
9185 kIemNativeGstRegUse_ForFullWrite);
9186 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
9187 kIemNativeGstSimdRegLdStSz_Low128,
9188 kIemNativeGstRegUse_ReadOnly);
9189
9190 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
9191 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9192
9193 /* Free but don't flush the source and destination register. */
9194 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9195 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9196 }
9197 else
9198 {
9199 /* This effectively only clears the upper 128-bits of the register. */
9200 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9201 kIemNativeGstSimdRegLdStSz_High128,
9202 kIemNativeGstRegUse_ForFullWrite);
9203
9204 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
9205
9206 /* Free but don't flush the destination register. */
9207 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
9208 }
9209
9210 return off;
9211}
9212
9213
9214#define IEM_MC_COPY_YREG_U256_ZX_VLMAX(a_iYRegDst, a_iYRegSrc) \
9215 off = iemNativeEmitSimdCopyYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrc)
9216
9217/** Emits code for IEM_MC_COPY_YREG_U256_ZX_VLMAX. */
9218DECL_INLINE_THROW(uint32_t)
9219iemNativeEmitSimdCopyYregU256ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrc)
9220{
9221 /*
9222 * The iYRegSrc == iYRegDst case needs to be treated differently here, because
9223 * if iYRegDst gets allocated first for the full write it won't load the
9224 * actual value from CPUMCTX. When allocating iYRegSrc afterwards it will get
9225 * duplicated from the already allocated host register for iYRegDst containing
9226 * garbage. This will be catched by the guest register value checking in debug
9227 * builds. iYRegSrc == iYRegDst would effectively only clear any upper 256-bits
9228 * for a zmm register we don't support yet, so this is just a nop.
9229 */
9230 if (iYRegDst != iYRegSrc)
9231 {
9232 /* Allocate destination and source register. */
9233 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
9234 kIemNativeGstSimdRegLdStSz_256,
9235 kIemNativeGstRegUse_ReadOnly);
9236 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9237 kIemNativeGstSimdRegLdStSz_256,
9238 kIemNativeGstRegUse_ForFullWrite);
9239
9240 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
9241
9242 /* Free but don't flush the source and destination register. */
9243 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9244 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9245 }
9246
9247 return off;
9248}
9249
9250
9251#define IEM_MC_FETCH_YREG_U128(a_u128Dst, a_iYRegSrc, a_iDQWord) \
9252 off = iemNativeEmitSimdFetchYregU128(pReNative, off, a_u128Dst, a_iYRegSrc, a_iDQWord)
9253
9254/** Emits code for IEM_MC_FETCH_YREG_U128. */
9255DECL_INLINE_THROW(uint32_t)
9256iemNativeEmitSimdFetchYregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iDQWord)
9257{
9258 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9259 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
9260
9261 Assert(iDQWord <= 1);
9262 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9263 iDQWord == 1
9264 ? kIemNativeGstSimdRegLdStSz_High128
9265 : kIemNativeGstSimdRegLdStSz_Low128,
9266 kIemNativeGstRegUse_ReadOnly);
9267
9268 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9269 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
9270
9271 if (iDQWord == 1)
9272 off = iemNativeEmitSimdLoadVecRegLowU128FromVecRegHighU128(pReNative, off, idxVarReg, idxSimdRegSrc);
9273 else
9274 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxVarReg, idxSimdRegSrc);
9275
9276 /* Free but don't flush the source register. */
9277 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9278 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
9279
9280 return off;
9281}
9282
9283
9284#define IEM_MC_FETCH_YREG_U64(a_u64Dst, a_iYRegSrc, a_iQWord) \
9285 off = iemNativeEmitSimdFetchYregU64(pReNative, off, a_u64Dst, a_iYRegSrc, a_iQWord)
9286
9287/** Emits code for IEM_MC_FETCH_YREG_U64. */
9288DECL_INLINE_THROW(uint32_t)
9289iemNativeEmitSimdFetchYregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iQWord)
9290{
9291 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9292 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
9293
9294 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9295 iQWord >= 2
9296 ? kIemNativeGstSimdRegLdStSz_High128
9297 : kIemNativeGstSimdRegLdStSz_Low128,
9298 kIemNativeGstRegUse_ReadOnly);
9299
9300 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9301 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9302
9303 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
9304
9305 /* Free but don't flush the source register. */
9306 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9307 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9308
9309 return off;
9310}
9311
9312
9313#define IEM_MC_FETCH_YREG_U32(a_u32Dst, a_iYRegSrc) \
9314 off = iemNativeEmitSimdFetchYregU32(pReNative, off, a_u32Dst, a_iYRegSrc, 0)
9315
9316/** Emits code for IEM_MC_FETCH_YREG_U32. */
9317DECL_INLINE_THROW(uint32_t)
9318iemNativeEmitSimdFetchYregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iDWord)
9319{
9320 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9321 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
9322
9323 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9324 iDWord >= 4
9325 ? kIemNativeGstSimdRegLdStSz_High128
9326 : kIemNativeGstSimdRegLdStSz_Low128,
9327 kIemNativeGstRegUse_ReadOnly);
9328
9329 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9330 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9331
9332 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
9333
9334 /* Free but don't flush the source register. */
9335 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9336 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9337
9338 return off;
9339}
9340
9341
9342#define IEM_MC_CLEAR_YREG_128_UP(a_iYReg) \
9343 off = iemNativeEmitSimdClearYregHighU128(pReNative, off, a_iYReg)
9344
9345/** Emits code for IEM_MC_CLEAR_YREG_128_UP. */
9346DECL_INLINE_THROW(uint32_t)
9347iemNativeEmitSimdClearYregHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg)
9348{
9349 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9350 kIemNativeGstSimdRegLdStSz_High128,
9351 kIemNativeGstRegUse_ForFullWrite);
9352
9353 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
9354
9355 /* Free but don't flush the register. */
9356 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
9357
9358 return off;
9359}
9360
9361
9362#define IEM_MC_STORE_YREG_U128(a_iYRegDst, a_iDQword, a_u128Value) \
9363 off = iemNativeEmitSimdStoreYregU128(pReNative, off, a_iYRegDst, a_iDQword, a_u128Value)
9364
9365/** Emits code for IEM_MC_STORE_YREG_U128. */
9366DECL_INLINE_THROW(uint32_t)
9367iemNativeEmitSimdStoreYregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t iDQword, uint8_t idxSrcVar)
9368{
9369 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9370 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
9371
9372 Assert(iDQword <= 1);
9373 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9374 iDQword == 0
9375 ? kIemNativeGstSimdRegLdStSz_Low128
9376 : kIemNativeGstSimdRegLdStSz_High128,
9377 kIemNativeGstRegUse_ForFullWrite);
9378
9379 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
9380
9381 if (iDQword == 0)
9382 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
9383 else
9384 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128(pReNative, off, idxSimdRegDst, idxVarReg);
9385
9386 /* Free but don't flush the source register. */
9387 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9388 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9389
9390 return off;
9391}
9392
9393
9394#define IEM_MC_STORE_YREG_U128_ZX_VLMAX(a_iYRegDst, a_u128Src) \
9395 off = iemNativeEmitSimdStoreYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_u128Src)
9396
9397/** Emits code for IEM_MC_STORE_YREG_U128_ZX_VLMAX. */
9398DECL_INLINE_THROW(uint32_t)
9399iemNativeEmitSimdStoreYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9400{
9401 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9402 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
9403
9404 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9405 kIemNativeGstSimdRegLdStSz_256,
9406 kIemNativeGstRegUse_ForFullWrite);
9407
9408 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
9409
9410 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
9411 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9412
9413 /* Free but don't flush the source register. */
9414 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9415 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9416
9417 return off;
9418}
9419
9420
9421#define IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX(a_iXRegDst, a_u8Src) \
9422 off = iemNativeEmitSimdBroadcastXregU8ZxVlmax(pReNative, off, a_iXRegDst, a_u8Src)
9423
9424/** Emits code for IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX. */
9425DECL_INLINE_THROW(uint32_t)
9426iemNativeEmitSimdBroadcastXregU8ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
9427{
9428 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9429 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint8_t));
9430
9431 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9432 kIemNativeGstSimdRegLdStSz_256,
9433 kIemNativeGstRegUse_ForFullWrite);
9434
9435 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9436
9437 off = iemNativeEmitSimdBroadcastGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
9438 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9439
9440 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9441 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9442
9443 return off;
9444}
9445
9446
9447#define IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX(a_iXRegDst, a_u16Src) \
9448 off = iemNativeEmitSimdBroadcastXregU16ZxVlmax(pReNative, off, a_iXRegDst, a_u16Src)
9449
9450/** Emits code for IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX. */
9451DECL_INLINE_THROW(uint32_t)
9452iemNativeEmitSimdBroadcastXregU16ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
9453{
9454 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9455 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint16_t));
9456
9457 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9458 kIemNativeGstSimdRegLdStSz_256,
9459 kIemNativeGstRegUse_ForFullWrite);
9460
9461 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9462
9463 off = iemNativeEmitSimdBroadcastGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
9464 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9465
9466 /* Free but don't flush the source register. */
9467 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9468 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9469
9470 return off;
9471}
9472
9473
9474#define IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX(a_iXRegDst, a_u32Src) \
9475 off = iemNativeEmitSimdBroadcastXregU32ZxVlmax(pReNative, off, a_iXRegDst, a_u32Src)
9476
9477/** Emits code for IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX. */
9478DECL_INLINE_THROW(uint32_t)
9479iemNativeEmitSimdBroadcastXregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
9480{
9481 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9482 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
9483
9484 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9485 kIemNativeGstSimdRegLdStSz_256,
9486 kIemNativeGstRegUse_ForFullWrite);
9487
9488 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9489
9490 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
9491 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9492
9493 /* Free but don't flush the source register. */
9494 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9495 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9496
9497 return off;
9498}
9499
9500
9501#define IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX(a_iXRegDst, a_u64Src) \
9502 off = iemNativeEmitSimdBroadcastXregU64ZxVlmax(pReNative, off, a_iXRegDst, a_u64Src)
9503
9504/** Emits code for IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX. */
9505DECL_INLINE_THROW(uint32_t)
9506iemNativeEmitSimdBroadcastXregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
9507{
9508 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9509 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
9510
9511 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9512 kIemNativeGstSimdRegLdStSz_256,
9513 kIemNativeGstRegUse_ForFullWrite);
9514
9515 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9516
9517 off = iemNativeEmitSimdBroadcastGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
9518 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9519
9520 /* Free but don't flush the source register. */
9521 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9522 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9523
9524 return off;
9525}
9526
9527
9528#define IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX(a_iYRegDst, a_u8Src) \
9529 off = iemNativeEmitSimdBroadcastYregU8ZxVlmax(pReNative, off, a_iYRegDst, a_u8Src)
9530
9531/** Emits code for IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX. */
9532DECL_INLINE_THROW(uint32_t)
9533iemNativeEmitSimdBroadcastYregU8ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9534{
9535 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9536 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint8_t));
9537
9538 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9539 kIemNativeGstSimdRegLdStSz_256,
9540 kIemNativeGstRegUse_ForFullWrite);
9541
9542 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9543
9544 off = iemNativeEmitSimdBroadcastGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
9545
9546 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9547 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9548
9549 return off;
9550}
9551
9552
9553#define IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX(a_iYRegDst, a_u16Src) \
9554 off = iemNativeEmitSimdBroadcastYregU16ZxVlmax(pReNative, off, a_iYRegDst, a_u16Src)
9555
9556/** Emits code for IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX. */
9557DECL_INLINE_THROW(uint32_t)
9558iemNativeEmitSimdBroadcastYregU16ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9559{
9560 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9561 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint16_t));
9562
9563 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9564 kIemNativeGstSimdRegLdStSz_256,
9565 kIemNativeGstRegUse_ForFullWrite);
9566
9567 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9568
9569 off = iemNativeEmitSimdBroadcastGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
9570
9571 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9572 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9573
9574 return off;
9575}
9576
9577
9578#define IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX(a_iYRegDst, a_u32Src) \
9579 off = iemNativeEmitSimdBroadcastYregU32ZxVlmax(pReNative, off, a_iYRegDst, a_u32Src)
9580
9581/** Emits code for IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX. */
9582DECL_INLINE_THROW(uint32_t)
9583iemNativeEmitSimdBroadcastYregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9584{
9585 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9586 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
9587
9588 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9589 kIemNativeGstSimdRegLdStSz_256,
9590 kIemNativeGstRegUse_ForFullWrite);
9591
9592 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9593
9594 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
9595
9596 /* Free but don't flush the source register. */
9597 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9598 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9599
9600 return off;
9601}
9602
9603
9604#define IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX(a_iYRegDst, a_u64Src) \
9605 off = iemNativeEmitSimdBroadcastYregU64ZxVlmax(pReNative, off, a_iYRegDst, a_u64Src)
9606
9607/** Emits code for IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX. */
9608DECL_INLINE_THROW(uint32_t)
9609iemNativeEmitSimdBroadcastYregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9610{
9611 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9612 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
9613
9614 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9615 kIemNativeGstSimdRegLdStSz_256,
9616 kIemNativeGstRegUse_ForFullWrite);
9617
9618 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9619
9620 off = iemNativeEmitSimdBroadcastGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
9621
9622 /* Free but don't flush the source register. */
9623 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9624 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9625
9626 return off;
9627}
9628
9629
9630#define IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX(a_iYRegDst, a_u128Src) \
9631 off = iemNativeEmitSimdBroadcastYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_u128Src)
9632
9633/** Emits code for IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX. */
9634DECL_INLINE_THROW(uint32_t)
9635iemNativeEmitSimdBroadcastYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9636{
9637 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9638 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
9639
9640 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9641 kIemNativeGstSimdRegLdStSz_256,
9642 kIemNativeGstRegUse_ForFullWrite);
9643
9644 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
9645
9646 off = iemNativeEmitSimdBroadcastVecRegU128ToVecReg(pReNative, off, idxSimdRegDst, idxVarReg);
9647
9648 /* Free but don't flush the source register. */
9649 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9650 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9651
9652 return off;
9653}
9654
9655
9656#define IEM_MC_STORE_YREG_U32_ZX_VLMAX(a_iYRegDst, a_u32Src) \
9657 off = iemNativeEmitSimdStoreYregU32ZxVlmax(pReNative, off, a_iYRegDst, a_u32Src)
9658
9659/** Emits code for IEM_MC_STORE_YREG_U32_ZX_VLMAX. */
9660DECL_INLINE_THROW(uint32_t)
9661iemNativeEmitSimdStoreYregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9662{
9663 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9664 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
9665
9666 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9667 kIemNativeGstSimdRegLdStSz_256,
9668 kIemNativeGstRegUse_ForFullWrite);
9669
9670 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9671
9672 off = iemNativeEmitSimdZeroVecRegU256(pReNative, off, idxSimdRegDst);
9673 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iDWord*/);
9674
9675 /* Free but don't flush the source register. */
9676 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9677 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9678
9679 return off;
9680}
9681
9682
9683#define IEM_MC_STORE_YREG_U64_ZX_VLMAX(a_iYRegDst, a_u64Src) \
9684 off = iemNativeEmitSimdStoreYregU64ZxVlmax(pReNative, off, a_iYRegDst, a_u64Src)
9685
9686/** Emits code for IEM_MC_STORE_YREG_U64_ZX_VLMAX. */
9687DECL_INLINE_THROW(uint32_t)
9688iemNativeEmitSimdStoreYregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9689{
9690 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9691 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
9692
9693 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9694 kIemNativeGstSimdRegLdStSz_256,
9695 kIemNativeGstRegUse_ForFullWrite);
9696
9697 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9698
9699 off = iemNativeEmitSimdZeroVecRegU256(pReNative, off, idxSimdRegDst);
9700 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iQWord*/);
9701
9702 /* Free but don't flush the source register. */
9703 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9704 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9705
9706 return off;
9707}
9708
9709
9710#define IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX(a_iYRegDst, a_u64Local, a_iYRegSrcHx) \
9711 off = iemNativeEmitSimdMergeYregU64LocalU64HiZxVlmax(pReNative, off, a_iYRegDst, a_u64Local, a_iYRegSrcHx)
9712
9713/** Emits code for IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX. */
9714DECL_INLINE_THROW(uint32_t)
9715iemNativeEmitSimdMergeYregU64LocalU64HiZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar, uint8_t iYRegSrcHx)
9716{
9717 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9718 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
9719
9720 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9721 kIemNativeGstSimdRegLdStSz_256,
9722 kIemNativeGstRegUse_ForFullWrite);
9723 uint8_t const idxSimdRegSrcHx = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrcHx),
9724 kIemNativeGstSimdRegLdStSz_Low128,
9725 kIemNativeGstRegUse_ReadOnly);
9726 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9727
9728 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrcHx);
9729 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iQWord*/);
9730 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9731
9732 /* Free but don't flush the source and destination registers. */
9733 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrcHx);
9734 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9735 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9736
9737 return off;
9738}
9739
9740
9741#define IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX(a_iYRegDst, a_iYRegSrcHx, a_u64Local) \
9742 off = iemNativeEmitSimdMergeYregU64LoU64LocalZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrcHx, a_u64Local)
9743
9744/** Emits code for IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX. */
9745DECL_INLINE_THROW(uint32_t)
9746iemNativeEmitSimdMergeYregU64LoU64LocalZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrcHx, uint8_t idxSrcVar)
9747{
9748 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9749 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
9750
9751 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9752 kIemNativeGstSimdRegLdStSz_256,
9753 kIemNativeGstRegUse_ForFullWrite);
9754 uint8_t const idxSimdRegSrcHx = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrcHx),
9755 kIemNativeGstSimdRegLdStSz_Low128,
9756 kIemNativeGstRegUse_ReadOnly);
9757 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9758
9759 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrcHx);
9760 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 1 /*iQWord*/);
9761 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9762
9763 /* Free but don't flush the source and destination registers. */
9764 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrcHx);
9765 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9766 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9767
9768 return off;
9769}
9770
9771
9772#define IEM_MC_CLEAR_XREG_U32_MASK(a_iXReg, a_bMask) \
9773 off = iemNativeEmitSimdClearXregU32Mask(pReNative, off, a_iXReg, a_bMask)
9774
9775
9776/** Emits code for IEM_MC_CLEAR_XREG_U32_MASK. */
9777DECL_INLINE_THROW(uint32_t)
9778iemNativeEmitSimdClearXregU32Mask(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t bImm8Mask)
9779{
9780 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9781 kIemNativeGstSimdRegLdStSz_Low128,
9782 kIemNativeGstRegUse_ForUpdate);
9783
9784 /** @todo r=aeichner For certain bit combinations we could reduce the number of emitted instructions. */
9785 if (bImm8Mask & RT_BIT(0))
9786 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 0 /*iDWord*/);
9787 if (bImm8Mask & RT_BIT(1))
9788 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 1 /*iDWord*/);
9789 if (bImm8Mask & RT_BIT(2))
9790 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 2 /*iDWord*/);
9791 if (bImm8Mask & RT_BIT(3))
9792 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 3 /*iDWord*/);
9793
9794 /* Free but don't flush the destination register. */
9795 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9796
9797 return off;
9798}
9799
9800
9801#define IEM_MC_FETCH_YREG_U256(a_u256Dst, a_iYRegSrc) \
9802 off = iemNativeEmitSimdFetchYregU256(pReNative, off, a_u256Dst, a_iYRegSrc)
9803
9804#define IEM_MC_FETCH_YREG_YMM(a_uYmmDst, a_iYRegSrc) \
9805 off = iemNativeEmitSimdFetchYregU256(pReNative, off, a_uYmmDst, a_iYRegSrc)
9806
9807/** Emits code for IEM_MC_FETCH_YREG_U256/IEM_MC_FETCH_YREG_YMM. */
9808DECL_INLINE_THROW(uint32_t)
9809iemNativeEmitSimdFetchYregU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYRegSrc)
9810{
9811 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9812 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT256U));
9813
9814 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
9815 kIemNativeGstSimdRegLdStSz_256,
9816 kIemNativeGstRegUse_ReadOnly);
9817 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
9818
9819 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxVarReg, idxSimdRegSrc);
9820
9821 /* Free but don't flush the source register. */
9822 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9823 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
9824
9825 return off;
9826}
9827
9828
9829#define IEM_MC_STORE_YREG_U256_ZX_VLMAX(a_iYRegDst, a_u256Src) \
9830 off = iemNativeEmitSimdStoreYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_u256Src)
9831
9832#define IEM_MC_STORE_YREG_YMM_ZX_VLMAX(a_iYRegDst, a_uYmmSrc) \
9833 off = iemNativeEmitSimdStoreYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_uYmmSrc)
9834
9835/** Emits code for IEM_MC_STORE_YREG_U256_ZX_VLMAX/IEM_MC_STORE_YREG_YMM_ZX_VLMAX. */
9836DECL_INLINE_THROW(uint32_t)
9837iemNativeEmitSimdStoreYregU256ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar)
9838{
9839 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9840 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
9841
9842 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9843 kIemNativeGstSimdRegLdStSz_256,
9844 kIemNativeGstRegUse_ForFullWrite);
9845 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
9846
9847 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxSimdRegDst, idxVarRegSrc);
9848
9849 /* Free but don't flush the source register. */
9850 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9851 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9852
9853 return off;
9854}
9855
9856
9857#define IEM_MC_STORE_YREG_U32_U256(a_iYRegDst, a_iDwDst, a_u256Value, a_iDwSrc) \
9858 off = iemNativeEmitSimdStoreYregU32FromU256(pReNative, off, a_iYRegDst, a_iDwDst, a_u256Value, a_iDwSrc)
9859
9860
9861/** Emits code for IEM_MC_STORE_YREG_U32_U256. */
9862DECL_INLINE_THROW(uint32_t)
9863iemNativeEmitSimdStoreYregU32FromU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iDwDst,
9864 uint8_t idxSrcVar, uint8_t iDwSrc)
9865{
9866 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9867 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
9868
9869 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9870 iDwDst < 4
9871 ? kIemNativeGstSimdRegLdStSz_Low128
9872 : kIemNativeGstSimdRegLdStSz_High128,
9873 kIemNativeGstRegUse_ForUpdate);
9874 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
9875 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
9876
9877 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxRegTmp, idxVarRegSrc, iDwSrc);
9878 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxRegTmp, iDwDst);
9879
9880 /* Free but don't flush the source register. */
9881 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9882 iemNativeRegFreeTmp(pReNative, idxRegTmp);
9883 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9884
9885 return off;
9886}
9887
9888
9889#define IEM_MC_STORE_YREG_U64_U256(a_iYRegDst, a_iQwDst, a_u256Value, a_iQwSrc) \
9890 off = iemNativeEmitSimdStoreYregU64FromU256(pReNative, off, a_iYRegDst, a_iQwDst, a_u256Value, a_iQwSrc)
9891
9892
9893/** Emits code for IEM_MC_STORE_YREG_U64_U256. */
9894DECL_INLINE_THROW(uint32_t)
9895iemNativeEmitSimdStoreYregU64FromU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iQwDst,
9896 uint8_t idxSrcVar, uint8_t iQwSrc)
9897{
9898 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9899 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
9900
9901 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9902 iQwDst < 2
9903 ? kIemNativeGstSimdRegLdStSz_Low128
9904 : kIemNativeGstSimdRegLdStSz_High128,
9905 kIemNativeGstRegUse_ForUpdate);
9906 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
9907 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
9908
9909 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxRegTmp, idxVarRegSrc, iQwSrc);
9910 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxRegTmp, iQwDst);
9911
9912 /* Free but don't flush the source register. */
9913 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9914 iemNativeRegFreeTmp(pReNative, idxRegTmp);
9915 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9916
9917 return off;
9918}
9919
9920
9921#define IEM_MC_STORE_YREG_U64(a_iYRegDst, a_iQword, a_u64Value) \
9922 off = iemNativeEmitSimdStoreYregU64(pReNative, off, a_iYRegDst, a_iQword, a_u64Value)
9923
9924
9925/** Emits code for IEM_MC_STORE_YREG_U64. */
9926DECL_INLINE_THROW(uint32_t)
9927iemNativeEmitSimdStoreYregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iQwDst, uint8_t idxSrcVar)
9928{
9929 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9930 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
9931
9932 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9933 iQwDst < 2
9934 ? kIemNativeGstSimdRegLdStSz_Low128
9935 : kIemNativeGstSimdRegLdStSz_High128,
9936 kIemNativeGstRegUse_ForUpdate);
9937
9938 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9939
9940 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, iQwDst);
9941
9942 /* Free but don't flush the source register. */
9943 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9944 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9945
9946 return off;
9947}
9948
9949
9950#define IEM_MC_CLEAR_ZREG_256_UP(a_iYReg) \
9951 off = iemNativeEmitSimdClearZregU256Vlmax(pReNative, off, a_iYReg)
9952
9953/** Emits code for IEM_MC_CLEAR_ZREG_256_UP. */
9954DECL_INLINE_THROW(uint32_t)
9955iemNativeEmitSimdClearZregU256Vlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg)
9956{
9957 RT_NOREF(pReNative, iYReg);
9958 /** @todo Needs to be implemented when support for AVX-512 is added. */
9959 return off;
9960}
9961
9962
9963
9964/*********************************************************************************************************************************
9965* Emitters for IEM_MC_CALL_SSE_AIMPL_XXX *
9966*********************************************************************************************************************************/
9967
9968/**
9969 * Common worker for IEM_MC_CALL_SSE_AIMPL_XXX/IEM_MC_CALL_AVX_AIMPL_XXX.
9970 */
9971DECL_INLINE_THROW(uint32_t)
9972iemNativeEmitCallSseAvxAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t cArgs, uint8_t idxInstr)
9973{
9974 /* Grab the MXCSR register, it must not be call volatile or we end up freeing it when setting up the call below. */
9975 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr,
9976 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
9977 AssertRelease(!(RT_BIT_32(idxRegMxCsr) & IEMNATIVE_CALL_VOLATILE_GREG_MASK));
9978
9979 /*
9980 * Need to do the FPU preparation.
9981 */
9982 off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/);
9983
9984 /*
9985 * Do all the call setup and cleanup.
9986 */
9987 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_SSE_AIMPL_HIDDEN_ARGS, IEM_SSE_AIMPL_HIDDEN_ARGS,
9988 false /*fFlushPendingWrites*/);
9989
9990 /*
9991 * Load the MXCSR register into the first argument and mask out the current exception flags.
9992 */
9993 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, idxRegMxCsr);
9994 off = iemNativeEmitAndGpr32ByImm(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, ~X86_MXCSR_XCPT_FLAGS);
9995
9996 /*
9997 * Make the call.
9998 */
9999 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
10000
10001 /*
10002 * The updated MXCSR is in the return register, update exception status flags.
10003 *
10004 * The return register is marked allocated as a temporary because it is required for the
10005 * exception generation check below.
10006 */
10007 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG)));
10008 uint8_t const idxRegTmp = iemNativeRegMarkAllocated(pReNative, IEMNATIVE_CALL_RET_GREG, kIemNativeWhat_Tmp);
10009 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, idxRegMxCsr, idxRegTmp);
10010
10011#ifndef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
10012 /* Writeback the MXCSR register value (there is no delayed writeback for such registers at the moment). */
10013 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxRegMxCsr, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.XState.x87.MXCSR));
10014#endif
10015
10016 /*
10017 * Make sure we don't have any outstanding guest register writes as we may
10018 * raise an \#UD or \#XF and all guest register must be up to date in CPUMCTX.
10019 */
10020 off = iemNativeRegFlushPendingWrites(pReNative, off);
10021
10022#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
10023 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
10024#else
10025 RT_NOREF(idxInstr);
10026#endif
10027
10028 /** @todo r=aeichner ANDN from BMI1 would save us a temporary and additional instruction here but I don't
10029 * want to assume the existence for this instruction at the moment. */
10030 uint8_t const idxRegTmp2 = iemNativeRegAllocTmp(pReNative, &off);
10031
10032 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegTmp2, idxRegTmp);
10033 /* tmp &= X86_MXCSR_XCPT_MASK */
10034 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK);
10035 /* tmp >>= X86_MXCSR_XCPT_MASK_SHIFT */
10036 off = iemNativeEmitShiftGprRight(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK_SHIFT);
10037 /* tmp = ~tmp */
10038 off = iemNativeEmitInvBitsGpr(pReNative, off, idxRegTmp, idxRegTmp, false /*f64Bit*/);
10039 /* tmp &= mxcsr */
10040 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxRegTmp, idxRegTmp2);
10041 off = iemNativeEmitTestAnyBitsInGprAndTbExitIfAnySet(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_FLAGS,
10042 kIemNativeLabelType_RaiseSseAvxFpRelated);
10043
10044 iemNativeRegFreeTmp(pReNative, idxRegTmp2);
10045 iemNativeRegFreeTmp(pReNative, idxRegTmp);
10046 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
10047
10048 return off;
10049}
10050
10051
10052#define IEM_MC_CALL_SSE_AIMPL_2(a_pfnAImpl, a0, a1) \
10053 off = iemNativeEmitCallSseAImpl2(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1))
10054
10055/** Emits code for IEM_MC_CALL_SSE_AIMPL_2. */
10056DECL_INLINE_THROW(uint32_t)
10057iemNativeEmitCallSseAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
10058{
10059 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_SSE_AIMPL_HIDDEN_ARGS);
10060 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_SSE_AIMPL_HIDDEN_ARGS);
10061 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 2, idxInstr);
10062}
10063
10064
10065#define IEM_MC_CALL_SSE_AIMPL_3(a_pfnAImpl, a0, a1, a2) \
10066 off = iemNativeEmitCallSseAImpl3(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1), (a2))
10067
10068/** Emits code for IEM_MC_CALL_SSE_AIMPL_3. */
10069DECL_INLINE_THROW(uint32_t)
10070iemNativeEmitCallSseAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl,
10071 uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
10072{
10073 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_SSE_AIMPL_HIDDEN_ARGS);
10074 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_SSE_AIMPL_HIDDEN_ARGS);
10075 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_SSE_AIMPL_HIDDEN_ARGS);
10076 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 3, idxInstr);
10077}
10078
10079
10080/*********************************************************************************************************************************
10081* Emitters for IEM_MC_CALL_AVX_AIMPL_XXX *
10082*********************************************************************************************************************************/
10083
10084#define IEM_MC_CALL_AVX_AIMPL_2(a_pfnAImpl, a0, a1) \
10085 off = iemNativeEmitCallAvxAImpl2(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1))
10086
10087/** Emits code for IEM_MC_CALL_AVX_AIMPL_2. */
10088DECL_INLINE_THROW(uint32_t)
10089iemNativeEmitCallAvxAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
10090{
10091 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_AVX_AIMPL_HIDDEN_ARGS);
10092 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_AVX_AIMPL_HIDDEN_ARGS);
10093 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 2, idxInstr);
10094}
10095
10096
10097#define IEM_MC_CALL_AVX_AIMPL_3(a_pfnAImpl, a0, a1, a2) \
10098 off = iemNativeEmitCallAvxAImpl3(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1), (a2))
10099
10100/** Emits code for IEM_MC_CALL_AVX_AIMPL_3. */
10101DECL_INLINE_THROW(uint32_t)
10102iemNativeEmitCallAvxAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl,
10103 uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
10104{
10105 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_AVX_AIMPL_HIDDEN_ARGS);
10106 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_AVX_AIMPL_HIDDEN_ARGS);
10107 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_AVX_AIMPL_HIDDEN_ARGS);
10108 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 3, idxInstr);
10109}
10110
10111
10112#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
10113
10114
10115/*********************************************************************************************************************************
10116* Include instruction emitters. *
10117*********************************************************************************************************************************/
10118#include "target-x86/IEMAllN8veEmit-x86.h"
10119
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette