VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompFuncs.h@ 105724

Last change on this file since 105724 was 105657, checked in by vboxsync, 3 months ago

VMM/IEM: Fix bound instruction emulation when running in the recompiler on ARM, bugref:10741 [Follow up fixes]

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 502.1 KB
Line 
1/* $Id: IEMAllN8veRecompFuncs.h 105657 2024-08-13 07:40:48Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler - Inlined Bits.
4 */
5
6/*
7 * Copyright (C) 2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
33#define IEM_WITH_OPAQUE_DECODER_STATE
34#define VMCPU_INCL_CPUM_GST_CTX
35#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
36#define IEMNATIVE_INCL_TABLE_FUNCTION_PROTOTYPES
37#include <VBox/vmm/iem.h>
38#include <VBox/vmm/cpum.h>
39#include <VBox/vmm/dbgf.h>
40#include "IEMInternal.h"
41#include <VBox/vmm/vmcc.h>
42#include <VBox/log.h>
43#include <VBox/err.h>
44#include <VBox/dis.h>
45#include <VBox/param.h>
46#include <iprt/assert.h>
47#include <iprt/heap.h>
48#include <iprt/mem.h>
49#include <iprt/string.h>
50#if defined(RT_ARCH_AMD64)
51# include <iprt/x86.h>
52#elif defined(RT_ARCH_ARM64)
53# include <iprt/armv8.h>
54#endif
55
56#include "IEMInline.h"
57#include "IEMThreadedFunctions.h"
58#include "IEMN8veRecompiler.h"
59#include "IEMN8veRecompilerEmit.h"
60#include "IEMN8veRecompilerTlbLookup.h"
61#include "IEMNativeFunctions.h"
62
63
64/*
65 * Narrow down configs here to avoid wasting time on unused configs here.
66 * Note! Same checks in IEMAllThrdRecompiler.cpp.
67 */
68
69#ifndef IEM_WITH_CODE_TLB
70# error The code TLB must be enabled for the recompiler.
71#endif
72
73#ifndef IEM_WITH_DATA_TLB
74# error The data TLB must be enabled for the recompiler.
75#endif
76
77#ifndef IEM_WITH_SETJMP
78# error The setjmp approach must be enabled for the recompiler.
79#endif
80
81#if defined(IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS) && !defined(IEMNATIVE_WITH_SIMD_REG_ALLOCATOR)
82# error "IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS requires IEMNATIVE_WITH_SIMD_REG_ALLOCATOR"
83#endif
84
85
86/*********************************************************************************************************************************
87* Code emitters for flushing pending guest register writes and sanity checks *
88*********************************************************************************************************************************/
89
90#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
91# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
92DECL_INLINE_THROW(uint32_t) iemNativePcAdjustCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
93{
94 /* Compare the shadow with the context value, they should match. */
95 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, IEMNATIVE_REG_FIXED_PC_DBG);
96 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, pReNative->Core.offPc);
97 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, kIemNativeGstReg_Pc);
98 return off;
99}
100# endif
101#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
102
103/**
104 * Flushes delayed write of a specific guest register.
105 *
106 * This must be called prior to calling CImpl functions and any helpers that use
107 * the guest state (like raising exceptions) and such.
108 *
109 * This optimization has not yet been implemented. The first target would be
110 * RIP updates, since these are the most common ones.
111 */
112DECL_INLINE_THROW(uint32_t)
113iemNativeRegFlushPendingSpecificWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREGREF enmClass, uint8_t idxReg)
114{
115#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
116 /* If for whatever reason it is possible to reference the PC register at some point we need to do the writeback here first. */
117#endif
118
119#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
120#if 0 /** @todo r=aeichner EFLAGS writeback delay. */
121 if ( enmClass == kIemNativeGstRegRef_EFlags
122 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(kIemNativeGstReg_EFlags))
123 off = iemNativeRegFlushPendingWrite(pReNative, off, kIemNativeGstReg_EFlags);
124#else
125 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(kIemNativeGstReg_EFlags)));
126#endif
127
128 if ( enmClass == kIemNativeGstRegRef_Gpr
129 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxReg))
130 off = iemNativeRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTREG_GPR(idxReg));
131#endif
132
133#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
134 if ( enmClass == kIemNativeGstRegRef_XReg
135 && pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxReg))
136 {
137 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxReg));
138 /* Flush the shadows as the register needs to be reloaded (there is no guarantee right now, that the referenced register doesn't change). */
139 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxReg];
140
141 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off);
142 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxReg)));
143 }
144#endif
145 RT_NOREF(pReNative, enmClass, idxReg);
146 return off;
147}
148
149
150
151/*********************************************************************************************************************************
152* Emitters for IEM_MC_BEGIN_EX and IEM_MC_END. *
153*********************************************************************************************************************************/
154
155#undef IEM_MC_BEGIN /* unused */
156#define IEM_MC_BEGIN_EX(a_fMcFlags, a_fCImplFlags, a_cArgsIncludingHidden) \
157 { \
158 Assert(pReNative->Core.bmVars == 0); \
159 Assert(pReNative->Core.u64ArgVars == UINT64_MAX); \
160 Assert(pReNative->Core.bmStack == 0); \
161 pReNative->fMc = (a_fMcFlags); \
162 pReNative->fCImpl = (a_fCImplFlags); \
163 pReNative->cArgsX = (a_cArgsIncludingHidden)
164
165/** We have to get to the end in recompilation mode, as otherwise we won't
166 * generate code for all the IEM_MC_IF_XXX branches. */
167#define IEM_MC_END() \
168 iemNativeVarFreeAll(pReNative); \
169 } return off
170
171
172
173/*********************************************************************************************************************************
174* Native Emitter Support. *
175*********************************************************************************************************************************/
176
177#define IEM_MC_NATIVE_IF(a_fSupportedHosts) if (RT_ARCH_VAL & (a_fSupportedHosts)) {
178
179#define IEM_MC_NATIVE_ELSE() } else {
180
181#define IEM_MC_NATIVE_ENDIF() } ((void)0)
182
183
184#define IEM_MC_NATIVE_EMIT_0(a_fnEmitter) \
185 off = a_fnEmitter(pReNative, off)
186
187#define IEM_MC_NATIVE_EMIT_1(a_fnEmitter, a0) \
188 off = a_fnEmitter(pReNative, off, (a0))
189
190#define IEM_MC_NATIVE_EMIT_2(a_fnEmitter, a0, a1) \
191 off = a_fnEmitter(pReNative, off, (a0), (a1))
192
193#define IEM_MC_NATIVE_EMIT_2_EX(a_fnEmitter, a0, a1) \
194 off = a_fnEmitter(pReNative, off, pCallEntry->idxInstr, (a0), (a1))
195
196#define IEM_MC_NATIVE_EMIT_3(a_fnEmitter, a0, a1, a2) \
197 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2))
198
199#define IEM_MC_NATIVE_EMIT_4(a_fnEmitter, a0, a1, a2, a3) \
200 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3))
201
202#define IEM_MC_NATIVE_EMIT_5(a_fnEmitter, a0, a1, a2, a3, a4) \
203 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4))
204
205#define IEM_MC_NATIVE_EMIT_6(a_fnEmitter, a0, a1, a2, a3, a4, a5) \
206 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5))
207
208#define IEM_MC_NATIVE_EMIT_7(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6) \
209 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6))
210
211#define IEM_MC_NATIVE_EMIT_8(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6, a7) \
212 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6), (a7))
213
214
215#ifndef RT_ARCH_AMD64
216# define IEM_MC_NATIVE_SET_AMD64_HOST_REG_FOR_LOCAL(a_VarNm, a_idxHostReg) ((void)0)
217#else
218/** @note This is a naive approach that ASSUMES that the register isn't
219 * allocated, so it only works safely for the first allocation(s) in
220 * a MC block. */
221# define IEM_MC_NATIVE_SET_AMD64_HOST_REG_FOR_LOCAL(a_VarNm, a_idxHostReg) \
222 off = iemNativeVarSetAmd64HostRegisterForLocal(pReNative, off, a_VarNm, a_idxHostReg)
223
224DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off);
225
226DECL_INLINE_THROW(uint32_t)
227iemNativeVarSetAmd64HostRegisterForLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t idxHstReg)
228{
229 Log12(("iemNativeVarSetAmd64HostRegisterForLocal: idxVar=%#x idxHstReg=%s (%#x) off=%#x\n", idxVar, g_apszIemNativeHstRegNames[idxHstReg], idxHstReg, off));
230 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
231 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg))); /* iemNativeVarRegisterSet does a throw/longjmp on this */
232
233# ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
234 /* Must flush the register if it hold pending writes. */
235 if ( (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
236 && (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows) )
237 off = iemNativeRegFlushDirtyGuest(pReNative, off, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
238# endif
239
240 iemNativeVarRegisterSet(pReNative, idxVar, idxHstReg, off);
241 return off;
242}
243
244#endif /* RT_ARCH_AMD64 */
245
246
247
248/*********************************************************************************************************************************
249* Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX) *
250*********************************************************************************************************************************/
251
252#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl) \
253 pReNative->fMc = 0; \
254 pReNative->fCImpl = (a_fFlags); \
255 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, \
256 a_cbInstr) /** @todo not used ... */
257
258
259#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
260 pReNative->fMc = 0; \
261 pReNative->fCImpl = (a_fFlags); \
262 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
263
264DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off,
265 uint8_t idxInstr, uint64_t a_fGstShwFlush,
266 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
267{
268 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
269}
270
271
272#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
273 pReNative->fMc = 0; \
274 pReNative->fCImpl = (a_fFlags); \
275 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
276 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
277
278DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off,
279 uint8_t idxInstr, uint64_t a_fGstShwFlush,
280 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
281{
282 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
283}
284
285
286#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
287 pReNative->fMc = 0; \
288 pReNative->fCImpl = (a_fFlags); \
289 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
290 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
291
292DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off,
293 uint8_t idxInstr, uint64_t a_fGstShwFlush,
294 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
295 uint64_t uArg2)
296{
297 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
298}
299
300
301
302/*********************************************************************************************************************************
303* Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX) *
304*********************************************************************************************************************************/
305
306/** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
307 * and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
308DECL_INLINE_THROW(uint32_t)
309iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
310{
311 /*
312 * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
313 * return with special status code and make the execution loop deal with
314 * this. If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
315 * exception and won't continue execution. While CPUMCTX_DBG_DBGF_MASK
316 * could continue w/o interruption, it probably will drop into the
317 * debugger, so not worth the effort of trying to services it here and we
318 * just lump it in with the handling of the others.
319 *
320 * To simplify the code and the register state management even more (wrt
321 * immediate in AND operation), we always update the flags and skip the
322 * extra check associated conditional jump.
323 */
324 AssertCompile( (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
325 <= UINT32_MAX);
326#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
327 AssertMsg( pReNative->idxCurCall == 0
328 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1],
329 IEMLIVENESSBIT_IDX_EFL_OTHER)),
330 ("Efl_Other - %u\n", iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1],
331 IEMLIVENESSBIT_IDX_EFL_OTHER)));
332#endif
333
334 /*
335 * As this code can break out of the execution loop when jumping to the ReturnWithFlags label
336 * any pending register writes must be flushed.
337 */
338 off = iemNativeRegFlushPendingWrites(pReNative, off);
339
340 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
341 kIemNativeGstRegUse_ForUpdate, false /*fNoVolatileRegs*/,
342 true /*fSkipLivenessAssert*/);
343 off = iemNativeEmitTestAnyBitsInGprAndTbExitIfAnySet(pReNative, off, idxEflReg,
344 X86_EFL_TF | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK,
345 kIemNativeLabelType_ReturnWithFlags);
346 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
347 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
348
349 /* Free but don't flush the EFLAGS register. */
350 iemNativeRegFreeTmp(pReNative, idxEflReg);
351
352 return off;
353}
354
355
356/** Helper for iemNativeEmitFinishInstructionWithStatus. */
357DECLINLINE(RTGCPHYS) iemNativeCallEntryToGCPhysPc(PCIEMTB pTb, PCIEMTHRDEDCALLENTRY pCallEntry)
358{
359 unsigned const offOpcodes = pCallEntry->offOpcode;
360 unsigned const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
361 for (unsigned idxRange = 0; idxRange < cRanges; idxRange++)
362 {
363 unsigned const offRange = offOpcodes - (unsigned)pTb->aRanges[idxRange].offOpcodes;
364 if (offRange < (unsigned)pTb->aRanges[idxRange].cbOpcodes)
365 return iemTbGetRangePhysPageAddr(pTb, idxRange) + offRange + pTb->aRanges[idxRange].offPhysPage;
366 }
367 AssertFailedReturn(NIL_RTGCPHYS);
368}
369
370
371/** The VINF_SUCCESS dummy. */
372template<int const a_rcNormal, bool const a_fIsJump>
373DECL_FORCE_INLINE_THROW(uint32_t)
374iemNativeEmitFinishInstructionWithStatus(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry,
375 int32_t const offJump)
376{
377 AssertCompile(a_rcNormal == VINF_SUCCESS || a_rcNormal == VINF_IEM_REEXEC_BREAK);
378 if (a_rcNormal != VINF_SUCCESS)
379 {
380#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
381 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
382#else
383 RT_NOREF_PV(pCallEntry);
384#endif
385
386 /* As this code returns from the TB any pending register writes must be flushed. */
387 off = iemNativeRegFlushPendingWrites(pReNative, off);
388
389 /*
390 * Use the lookup table for getting to the next TB quickly.
391 * Note! In this code path there can only be one entry at present.
392 */
393 uint8_t const idxTbLookupFirst = IEM_TB_LOOKUP_TAB_GET_IDX(pCallEntry->uTbLookup);
394 PCIEMTB const pTbOrg = pReNative->pTbOrg;
395 Assert(idxTbLookupFirst < pTbOrg->cTbLookupEntries);
396 Assert(IEM_TB_LOOKUP_TAB_GET_SIZE(pCallEntry->uTbLookup) == 1);
397
398#if 0
399 /* Update IEMCPU::ppTbLookupEntryR3 to get the best lookup effect. */
400 PIEMTB * const ppTbLookupFirst = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTbOrg, idxTbLookupFirst);
401 Assert(IEM_TB_LOOKUP_TAB_GET_SIZE(pCallEntry->uTbLookup) == 1); /* large stuff later/never */
402 off = iemNativeEmitStoreImmToVCpuU64(pReNative, off, (uintptr_t)ppTbLookupFirst,
403 RT_UOFFSETOF(VMCPU, iem.s.ppTbLookupEntryR3));
404
405 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnBreak);
406
407#else
408 /* Load the index as argument #1 for the helper call at the given label. */
409 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxTbLookupFirst);
410
411 /*
412 * Figure out the physical address of the current instruction and see
413 * whether the next instruction we're about to execute is in the same
414 * page so we by can optimistically skip TLB loading.
415 *
416 * - This is safe for all cases in FLAT mode.
417 * - In segmentmented modes it is complicated, given that a negative
418 * jump may underflow EIP and a forward jump may overflow or run into
419 * CS.LIM and triggering a #GP. The only thing we can get away with
420 * now at compile time is forward jumps w/o CS.LIM checks, since the
421 * lack of CS.LIM checks means we're good for the entire physical page
422 * we're executing on and another 15 bytes before we run into CS.LIM.
423 */
424 if ( IEM_F_MODE_X86_IS_FLAT(pReNative->fExec)
425# if 0 /** @todo breaks on IP/EIP/RIP wraparound tests in bs3-cpu-weird-1. See also iemNativeHlpReturnBreakViaLookup. */
426 || !(pTbOrg->fFlags & IEMTB_F_CS_LIM_CHECKS)
427# endif
428 )
429 {
430 RTGCPHYS const GCPhysPcCurrent = iemNativeCallEntryToGCPhysPc(pTbOrg, pCallEntry);
431 RTGCPHYS const GCPhysPcNext = GCPhysPcCurrent + pCallEntry->cbOpcode + (int64_t)(a_fIsJump ? offJump : 0);
432 if ( (GCPhysPcNext >> GUEST_PAGE_SHIFT) == (GCPhysPcCurrent >> GUEST_PAGE_SHIFT)
433 && GUEST_PAGE_SIZE - (GCPhysPcCurrent & GUEST_PAGE_OFFSET_MASK) >= pCallEntry->cbOpcode /* 0xfff: je -56h */ )
434
435 {
436 /* Load the next GCPhysPc into the 3rd argument for the helper call. */
437 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, GCPhysPcNext);
438
439 /* Load the key lookup flags into the 2nd argument for the helper call.
440 - This is safe wrt CS limit checking since we're only here for FLAT modes.
441 - ASSUMING that this isn't a STI or POPF instruction, we can exclude any
442 interrupt shadow.
443 - The NMI inhibiting is more questionable, though... */
444 /** @todo We don't implement NMI blocking atm, except via VT-x/AMD-V.
445 * Should we copy it into fExec to simplify this? OTOH, it's just a
446 * couple of extra instructions if EFLAGS are already in a register. */
447 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG,
448 (pReNative->fExec & IEMTB_F_KEY_MASK) | IEMTB_F_TYPE_NATIVE);
449
450 if (pReNative->idxLastCheckIrqCallNo != UINT32_MAX)
451 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnBreakViaLookup);
452 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnBreakViaLookupWithIrq);
453 }
454 }
455 if (pReNative->idxLastCheckIrqCallNo != UINT32_MAX)
456 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnBreakViaLookupWithTlb);
457 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnBreakViaLookupWithTlbAndIrq);
458#endif
459 }
460 return off;
461}
462
463
464#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr, a_rcNormal) \
465 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
466 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
467
468#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr, a_rcNormal) \
469 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
470 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
471 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
472
473/** Same as iemRegAddToRip64AndFinishingNoFlags. */
474DECL_INLINE_THROW(uint32_t)
475iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
476{
477#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
478# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
479 if (!pReNative->Core.offPc)
480 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
481# endif
482
483 /* Allocate a temporary PC register. */
484 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
485
486 /* Perform the addition and store the result. */
487 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
488 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
489
490 /* Free but don't flush the PC register. */
491 iemNativeRegFreeTmp(pReNative, idxPcReg);
492#endif
493
494#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
495 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
496
497 pReNative->Core.offPc += cbInstr;
498# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
499 off = iemNativePcAdjustCheck(pReNative, off);
500# endif
501 if (pReNative->cCondDepth)
502 off = iemNativeEmitPcWriteback(pReNative, off);
503 else
504 pReNative->Core.cInstrPcUpdateSkipped++;
505#endif
506
507 return off;
508}
509
510
511#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr, a_rcNormal) \
512 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
513 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
514
515#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr, a_rcNormal) \
516 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
517 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
518 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
519
520/** Same as iemRegAddToEip32AndFinishingNoFlags. */
521DECL_INLINE_THROW(uint32_t)
522iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
523{
524#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
525# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
526 if (!pReNative->Core.offPc)
527 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
528# endif
529
530 /* Allocate a temporary PC register. */
531 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
532
533 /* Perform the addition and store the result. */
534 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
535 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
536
537 /* Free but don't flush the PC register. */
538 iemNativeRegFreeTmp(pReNative, idxPcReg);
539#endif
540
541#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
542 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
543
544 pReNative->Core.offPc += cbInstr;
545# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
546 off = iemNativePcAdjustCheck(pReNative, off);
547# endif
548 if (pReNative->cCondDepth)
549 off = iemNativeEmitPcWriteback(pReNative, off);
550 else
551 pReNative->Core.cInstrPcUpdateSkipped++;
552#endif
553
554 return off;
555}
556
557
558#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr, a_rcNormal) \
559 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
560 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
561
562#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr, a_rcNormal) \
563 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
564 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
565 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
566
567/** Same as iemRegAddToIp16AndFinishingNoFlags. */
568DECL_INLINE_THROW(uint32_t)
569iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
570{
571#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
572# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
573 if (!pReNative->Core.offPc)
574 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
575# endif
576
577 /* Allocate a temporary PC register. */
578 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
579
580 /* Perform the addition and store the result. */
581 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
582 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
583 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
584
585 /* Free but don't flush the PC register. */
586 iemNativeRegFreeTmp(pReNative, idxPcReg);
587#endif
588
589#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
590 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
591
592 pReNative->Core.offPc += cbInstr;
593# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
594 off = iemNativePcAdjustCheck(pReNative, off);
595# endif
596 if (pReNative->cCondDepth)
597 off = iemNativeEmitPcWriteback(pReNative, off);
598 else
599 pReNative->Core.cInstrPcUpdateSkipped++;
600#endif
601
602 return off;
603}
604
605
606
607/*********************************************************************************************************************************
608* Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX). *
609*********************************************************************************************************************************/
610
611#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
612 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
613 (a_enmEffOpSize), pCallEntry->idxInstr); \
614 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
615
616#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
617 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
618 (a_enmEffOpSize), pCallEntry->idxInstr); \
619 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
620 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
621
622#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr, a_rcNormal) \
623 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
624 IEMMODE_16BIT, pCallEntry->idxInstr); \
625 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
626
627#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
628 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
629 IEMMODE_16BIT, pCallEntry->idxInstr); \
630 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
631 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
632
633#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr, a_rcNormal) \
634 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
635 IEMMODE_64BIT, pCallEntry->idxInstr); \
636 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
637
638#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
639 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
640 IEMMODE_64BIT, pCallEntry->idxInstr); \
641 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
642 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
643
644/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
645 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
646 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
647DECL_INLINE_THROW(uint32_t)
648iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
649 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
650{
651 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
652
653 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
654 off = iemNativeRegFlushPendingWrites(pReNative, off);
655
656#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
657 Assert(pReNative->Core.offPc == 0);
658
659 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
660#endif
661
662 /* Allocate a temporary PC register. */
663 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
664
665 /* Perform the addition. */
666 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr);
667
668 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
669 {
670 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
671 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
672 }
673 else
674 {
675 /* Just truncate the result to 16-bit IP. */
676 Assert(enmEffOpSize == IEMMODE_16BIT);
677 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
678 }
679 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
680
681 /* Free but don't flush the PC register. */
682 iemNativeRegFreeTmp(pReNative, idxPcReg);
683
684 return off;
685}
686
687
688#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
689 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
690 (a_enmEffOpSize), pCallEntry->idxInstr); \
691 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
692
693#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
694 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
695 (a_enmEffOpSize), pCallEntry->idxInstr); \
696 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
697 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
698
699#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr, a_rcNormal) \
700 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
701 IEMMODE_16BIT, pCallEntry->idxInstr); \
702 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
703
704#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
705 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
706 IEMMODE_16BIT, pCallEntry->idxInstr); \
707 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
708 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
709
710#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr, a_rcNormal) \
711 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
712 IEMMODE_32BIT, pCallEntry->idxInstr); \
713 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
714
715#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
716 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
717 IEMMODE_32BIT, pCallEntry->idxInstr); \
718 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
719 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
720
721/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
722 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
723 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
724DECL_INLINE_THROW(uint32_t)
725iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
726 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
727{
728 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
729
730 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
731 off = iemNativeRegFlushPendingWrites(pReNative, off);
732
733#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
734 Assert(pReNative->Core.offPc == 0);
735
736 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
737#endif
738
739 /* Allocate a temporary PC register. */
740 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
741
742 /* Perform the addition. */
743 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
744
745 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
746 if (enmEffOpSize == IEMMODE_16BIT)
747 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
748
749 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
750/** @todo we can skip this in 32-bit FLAT mode. */
751 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
752
753 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
754
755 /* Free but don't flush the PC register. */
756 iemNativeRegFreeTmp(pReNative, idxPcReg);
757
758 return off;
759}
760
761
762#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr, a_rcNormal) \
763 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
764 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
765
766#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr, a_rcNormal) \
767 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
768 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
769 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
770
771#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr, a_rcNormal) \
772 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
773 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
774
775#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
776 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
777 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
778 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
779
780#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr, a_rcNormal) \
781 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
782 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, a_i32)
783
784#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
785 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
786 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
787 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, a_i32)
788
789/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
790DECL_INLINE_THROW(uint32_t)
791iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
792 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
793{
794 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
795 off = iemNativeRegFlushPendingWrites(pReNative, off);
796
797#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
798 Assert(pReNative->Core.offPc == 0);
799
800 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
801#endif
802
803 /* Allocate a temporary PC register. */
804 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
805
806 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
807 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
808 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
809 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
810 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
811
812 /* Free but don't flush the PC register. */
813 iemNativeRegFreeTmp(pReNative, idxPcReg);
814
815 return off;
816}
817
818
819
820/*********************************************************************************************************************************
821* Emitters for changing PC/RIP/EIP/IP with a indirect jump (IEM_MC_SET_RIP_UXX_AND_FINISH). *
822*********************************************************************************************************************************/
823
824/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets. */
825#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP) \
826 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
827
828/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets. */
829#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP) \
830 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
831
832/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code. */
833#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP) \
834 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
835
836/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets that checks and
837 * clears flags. */
838#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP) \
839 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP); \
840 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
841
842/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets that checks and
843 * clears flags. */
844#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP) \
845 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP); \
846 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
847
848/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code that checks and
849 * clears flags. */
850#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16NewIP) \
851 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP); \
852 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
853
854#undef IEM_MC_SET_RIP_U16_AND_FINISH
855
856
857/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets. */
858#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP) \
859 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
860
861/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code. */
862#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP) \
863 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
864
865/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets that checks and
866 * clears flags. */
867#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP) \
868 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP); \
869 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
870
871/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code that checks
872 * and clears flags. */
873#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u32NewEIP) \
874 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP); \
875 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
876
877#undef IEM_MC_SET_RIP_U32_AND_FINISH
878
879
880/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code. */
881#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP) \
882 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u64NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
883
884/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code that checks
885 * and clears flags. */
886#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewEIP) \
887 IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP); \
888 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
889
890#undef IEM_MC_SET_RIP_U64_AND_FINISH
891
892
893/** Same as iemRegRipJumpU16AndFinishNoFlags,
894 * iemRegRipJumpU32AndFinishNoFlags and iemRegRipJumpU64AndFinishNoFlags. */
895DECL_INLINE_THROW(uint32_t)
896iemNativeEmitRipJumpNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarPc, bool f64Bit,
897 uint8_t idxInstr, uint8_t cbVar)
898{
899 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
900 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
901
902 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
903 off = iemNativeRegFlushPendingWrites(pReNative, off);
904
905#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
906 Assert(pReNative->Core.offPc == 0);
907
908 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
909#endif
910
911 /* Get a register with the new PC loaded from idxVarPc.
912 Note! This ASSUMES that the high bits of the GPR is zeroed. */
913 uint8_t const idxPcReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxVarPc, kIemNativeGstReg_Pc, &off);
914
915 /* Check limit (may #GP(0) + exit TB). */
916 if (!f64Bit)
917/** @todo we can skip this test in FLAT 32-bit mode. */
918 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
919 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
920 else if (cbVar > sizeof(uint32_t))
921 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
922
923 /* Store the result. */
924 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
925
926 iemNativeVarRegisterRelease(pReNative, idxVarPc);
927 /** @todo implictly free the variable? */
928
929 return off;
930}
931
932
933
934/*********************************************************************************************************************************
935* Emitters for changing PC/RIP/EIP/IP with a relative call jump (IEM_MC_IND_CALL_UXX_AND_FINISH) (requires stack emmiters). *
936*********************************************************************************************************************************/
937
938/** @todo These helpers belong to the stack push API naturally but we already need them up here (we could of course move
939 * this below the stack emitters but then this is not close to the rest of the PC/RIP handling...). */
940DECL_FORCE_INLINE_THROW(uint32_t)
941iemNativeEmitStackPushUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
942{
943 /* Use16BitSp: */
944#ifdef RT_ARCH_AMD64
945 off = iemNativeEmitSubGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
946 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
947#else
948 /* sub regeff, regrsp, #cbMem */
949 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegEffSp, idxRegRsp, cbMem, false /*f64Bit*/);
950 /* and regeff, regeff, #0xffff */
951 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
952 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegEffSp, idxRegEffSp, 15, 0, false /*f64Bit*/);
953 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
954 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegEffSp, 0, 16, false /*f64Bit*/);
955#endif
956 return off;
957}
958
959
960DECL_FORCE_INLINE(uint32_t)
961iemNativeEmitStackPushUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
962{
963 /* Use32BitSp: */
964 off = iemNativeEmitSubGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
965 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
966 return off;
967}
968
969
970DECL_INLINE_THROW(uint32_t)
971iemNativeEmitStackPushRip(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const idxRegPc,
972 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
973{
974 /*
975 * Assert sanity.
976 */
977#ifdef VBOX_STRICT
978 if (RT_BYTE2(cBitsVarAndFlat) != 0)
979 {
980 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
981 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
982 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
983 Assert( pfnFunction
984 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
985 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
986 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
987 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
988 : UINT64_C(0xc000b000a0009000) ));
989 }
990 else
991 Assert( pfnFunction
992 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
993 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
994 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
995 : UINT64_C(0xc000b000a0009000) ));
996#endif
997
998#ifdef VBOX_STRICT
999 /*
1000 * Check that the fExec flags we've got make sense.
1001 */
1002 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
1003#endif
1004
1005 /*
1006 * To keep things simple we have to commit any pending writes first as we
1007 * may end up making calls.
1008 */
1009 /** @todo we could postpone this till we make the call and reload the
1010 * registers after returning from the call. Not sure if that's sensible or
1011 * not, though. */
1012 off = iemNativeRegFlushPendingWrites(pReNative, off);
1013
1014 /*
1015 * First we calculate the new RSP and the effective stack pointer value.
1016 * For 64-bit mode and flat 32-bit these two are the same.
1017 * (Code structure is very similar to that of PUSH)
1018 */
1019 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
1020 bool const fIsSegReg = RT_BYTE3(cBitsVarAndFlat) != 0;
1021 bool const fIsIntelSeg = fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
1022 uint8_t const cbMemAccess = !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
1023 ? cbMem : sizeof(uint16_t);
1024 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
1025 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
1026 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
1027 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
1028 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
1029 if (cBitsFlat != 0)
1030 {
1031 Assert(idxRegEffSp == idxRegRsp);
1032 Assert(cBitsFlat == 32 || cBitsFlat == 64);
1033 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
1034 if (cBitsFlat == 64)
1035 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
1036 else
1037 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
1038 }
1039 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
1040 {
1041 Assert(idxRegEffSp != idxRegRsp);
1042 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
1043 kIemNativeGstRegUse_ReadOnly);
1044#ifdef RT_ARCH_AMD64
1045 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1046#else
1047 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1048#endif
1049 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
1050 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
1051 offFixupJumpToUseOtherBitSp = off;
1052 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
1053 {
1054 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
1055 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1056 }
1057 else
1058 {
1059 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
1060 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1061 }
1062 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1063 }
1064 /* SpUpdateEnd: */
1065 uint32_t const offLabelSpUpdateEnd = off;
1066
1067 /*
1068 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
1069 * we're skipping lookup).
1070 */
1071 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
1072 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
1073 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
1074 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
1075 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
1076 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
1077 : UINT32_MAX;
1078 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
1079
1080
1081 if (!TlbState.fSkip)
1082 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
1083 else
1084 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
1085
1086 /*
1087 * Use16BitSp:
1088 */
1089 if (cBitsFlat == 0)
1090 {
1091#ifdef RT_ARCH_AMD64
1092 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1093#else
1094 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1095#endif
1096 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
1097 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
1098 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1099 else
1100 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1101 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
1102 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1103 }
1104
1105 /*
1106 * TlbMiss:
1107 *
1108 * Call helper to do the pushing.
1109 */
1110 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
1111
1112#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1113 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1114#else
1115 RT_NOREF(idxInstr);
1116#endif
1117
1118 /* Save variables in volatile registers. */
1119 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
1120 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
1121 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
1122 | (RT_BIT_32(idxRegPc));
1123 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
1124
1125 if ( idxRegPc == IEMNATIVE_CALL_ARG1_GREG
1126 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
1127 {
1128 /* Swap them using ARG0 as temp register: */
1129 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
1130 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
1131 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
1132 }
1133 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
1134 {
1135 /* IEMNATIVE_CALL_ARG2_GREG = idxRegPc (first!) */
1136 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxRegPc);
1137
1138 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
1139 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
1140 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
1141 }
1142 else
1143 {
1144 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
1145 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
1146
1147 /* IEMNATIVE_CALL_ARG2_GREG = idxRegPc */
1148 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxRegPc);
1149 }
1150
1151 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
1152 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
1153
1154 /* Done setting up parameters, make the call. */
1155 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
1156
1157 /* Restore variables and guest shadow registers to volatile registers. */
1158 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
1159 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
1160
1161#ifdef IEMNATIVE_WITH_TLB_LOOKUP
1162 if (!TlbState.fSkip)
1163 {
1164 /* end of TlbMiss - Jump to the done label. */
1165 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
1166 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
1167
1168 /*
1169 * TlbLookup:
1170 */
1171 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1,
1172 IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
1173
1174 /*
1175 * Emit code to do the actual storing / fetching.
1176 */
1177 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
1178# ifdef IEM_WITH_TLB_STATISTICS
1179 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
1180 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
1181# endif
1182 switch (cbMemAccess)
1183 {
1184 case 2:
1185 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1186 break;
1187 case 4:
1188 if (!fIsIntelSeg)
1189 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1190 else
1191 {
1192 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
1193 PUSH FS in real mode, so we have to try emulate that here.
1194 We borrow the now unused idxReg1 from the TLB lookup code here. */
1195 uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
1196 kIemNativeGstReg_EFlags);
1197 if (idxRegEfl != UINT8_MAX)
1198 {
1199#ifdef ARCH_AMD64
1200 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
1201 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
1202 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
1203#else
1204 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
1205 off, TlbState.idxReg1, idxRegEfl,
1206 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
1207#endif
1208 iemNativeRegFreeTmp(pReNative, idxRegEfl);
1209 }
1210 else
1211 {
1212 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
1213 RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
1214 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
1215 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
1216 }
1217 /* ASSUMES the upper half of idxRegPc is ZERO. */
1218 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegPc);
1219 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
1220 }
1221 break;
1222 case 8:
1223 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1224 break;
1225 default:
1226 AssertFailed();
1227 }
1228
1229 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
1230 TlbState.freeRegsAndReleaseVars(pReNative);
1231
1232 /*
1233 * TlbDone:
1234 *
1235 * Commit the new RSP value.
1236 */
1237 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
1238 }
1239#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
1240
1241#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
1242 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
1243#endif
1244 iemNativeRegFreeTmp(pReNative, idxRegRsp);
1245 if (idxRegEffSp != idxRegRsp)
1246 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
1247
1248 return off;
1249}
1250
1251
1252/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for pre-386 targets. */
1253#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16(a_u16NewIP, a_cbInstr) \
1254 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1255
1256/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for pre-386 targets that checks and
1257 * clears flags. */
1258#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP, a_cbInstr) \
1259 IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16(a_u16NewIP, a_cbInstr); \
1260 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1261
1262/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for 386+ targets. */
1263#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32(a_u16NewIP, a_cbInstr) \
1264 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1265
1266/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for 386+ targets that checks and
1267 * clears flags. */
1268#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP, a_cbInstr) \
1269 IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32(a_u16NewIP, a_cbInstr); \
1270 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1271
1272#undef IEM_MC_IND_CALL_U16_AND_FINISH
1273
1274
1275/** Variant of IEM_MC_IND_CALL_U32_AND_FINISH for 386+ targets. */
1276#define IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP, a_cbInstr) \
1277 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
1278
1279/** Variant of IEM_MC_IND_CALL_U32_AND_FINISH for 386+ targets that checks and
1280 * clears flags. */
1281#define IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP, a_cbInstr) \
1282 IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP, a_cbInstr); \
1283 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1284
1285#undef IEM_MC_IND_CALL_U32_AND_FINISH
1286
1287
1288/** Variant of IEM_MC_IND_CALL_U64_AND_FINISH with instruction length as
1289 * an extra parameter, for use in 64-bit code. */
1290#define IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64(a_u64NewIP, a_cbInstr) \
1291 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u64NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
1292
1293
1294/** Variant of IEM_MC_IND_CALL_U64_AND_FINISH with instruction length as
1295 * an extra parameter, for use in 64-bit code and we need to check and clear
1296 * flags. */
1297#define IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewIP, a_cbInstr) \
1298 IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64(a_u64NewIP, a_cbInstr); \
1299 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1300
1301#undef IEM_MC_IND_CALL_U64_AND_FINISH
1302
1303/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
1304 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
1305DECL_INLINE_THROW(uint32_t)
1306iemNativeEmitRipIndirectCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxVarPc, bool f64Bit,
1307 uint8_t idxInstr, uint8_t cbVar)
1308{
1309 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
1310 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
1311
1312 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1313 off = iemNativeRegFlushPendingWrites(pReNative, off);
1314
1315#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1316 Assert(pReNative->Core.offPc == 0);
1317
1318 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1319#endif
1320
1321 /* Get a register with the new PC loaded from idxVarPc.
1322 Note! This ASSUMES that the high bits of the GPR is zeroed. */
1323 uint8_t const idxNewPcReg = iemNativeVarRegisterAcquire(pReNative, idxVarPc, &off);
1324
1325 /* Check limit (may #GP(0) + exit TB). */
1326 if (!f64Bit)
1327/** @todo we can skip this test in FLAT 32-bit mode. */
1328 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxNewPcReg, idxInstr);
1329 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
1330 else if (cbVar > sizeof(uint32_t))
1331 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxNewPcReg, idxInstr);
1332
1333#if 1
1334 /* Allocate a temporary PC register, we don't want it shadowed. */
1335 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1336 kIemNativeGstRegUse_Calculation, true /*fNoVolatileRegs*/);
1337#else
1338 /* Allocate a temporary PC register. */
1339 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate,
1340 true /*fNoVolatileRegs*/);
1341#endif
1342
1343 /* Perform the addition and push the variable to the guest stack. */
1344 /** @todo Flat variants for PC32 variants. */
1345 switch (cbVar)
1346 {
1347 case sizeof(uint16_t):
1348 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
1349 /* Truncate the result to 16-bit IP. */
1350 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
1351 off = iemNativeEmitStackPushRip(pReNative, off, idxPcReg, RT_MAKE_U32_FROM_U8(16, 0, 0, 0),
1352 (uintptr_t)iemNativeHlpStackStoreU16, idxInstr);
1353 break;
1354 case sizeof(uint32_t):
1355 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
1356 /** @todo In FLAT mode we can use the flat variant. */
1357 off = iemNativeEmitStackPushRip(pReNative, off, idxPcReg, RT_MAKE_U32_FROM_U8(32, 0, 0, 0),
1358 (uintptr_t)iemNativeHlpStackStoreU32, idxInstr);
1359 break;
1360 case sizeof(uint64_t):
1361 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
1362 off = iemNativeEmitStackPushRip(pReNative, off, idxPcReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0),
1363 (uintptr_t)iemNativeHlpStackFlatStoreU64, idxInstr);
1364 break;
1365 default:
1366 AssertFailed();
1367 }
1368
1369 /* RSP got changed, so do this again. */
1370 off = iemNativeRegFlushPendingWrites(pReNative, off);
1371
1372 /* Store the result. */
1373 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxNewPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1374
1375#if 1
1376 /* Need to transfer the shadow information to the new RIP register. */
1377 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxNewPcReg, kIemNativeGstReg_Pc, off);
1378#else
1379 /* Sync the new PC. */
1380 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxPcReg, idxNewPcReg);
1381#endif
1382 iemNativeVarRegisterRelease(pReNative, idxVarPc);
1383 iemNativeRegFreeTmp(pReNative, idxPcReg);
1384 /** @todo implictly free the variable? */
1385
1386 return off;
1387}
1388
1389
1390/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1391 * an extra parameter, for use in 16-bit code on a pre-386 CPU. */
1392#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr) \
1393 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
1394
1395/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1396 * an extra parameter, for use in 16-bit code on a pre-386 CPU and we need to check and clear
1397 * flags. */
1398#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr) \
1399 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr); \
1400 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1401
1402/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1403 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
1404#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr) \
1405 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
1406
1407/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1408 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
1409 * flags. */
1410#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr) \
1411 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr); \
1412 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1413
1414/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1415 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
1416#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr) \
1417 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
1418
1419/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1420 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
1421 * flags. */
1422#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr) \
1423 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr); \
1424 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1425
1426#undef IEM_MC_REL_CALL_S16_AND_FINISH
1427
1428/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
1429 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
1430DECL_INLINE_THROW(uint32_t)
1431iemNativeEmitRipRelativeCallS16NoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int16_t offDisp,
1432 uint8_t idxInstr)
1433{
1434 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1435 off = iemNativeRegFlushPendingWrites(pReNative, off);
1436
1437#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1438 Assert(pReNative->Core.offPc == 0);
1439
1440 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1441#endif
1442
1443 /* Allocate a temporary PC register. */
1444 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1445 kIemNativeGstRegUse_Calculation, true /*fNoVolatileRegs*/);
1446 uint8_t const idxPcRegNew = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
1447
1448 /* Calculate the new RIP. */
1449 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcRegOld, cbInstr);
1450 /* Truncate the result to 16-bit IP. */
1451 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcRegOld);
1452 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxPcRegNew, idxPcRegOld);
1453 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcRegNew, offDisp);
1454
1455 /* Truncate the result to 16-bit IP. */
1456 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcRegNew);
1457
1458 /* Check limit (may #GP(0) + exit TB). */
1459 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
1460
1461 /* Perform the addition and push the variable to the guest stack. */
1462 off = iemNativeEmitStackPushRip(pReNative, off, idxPcRegOld, RT_MAKE_U32_FROM_U8(16, 0, 0, 0),
1463 (uintptr_t)iemNativeHlpStackStoreU16, idxInstr);
1464
1465 /* RSP got changed, so flush again. */
1466 off = iemNativeRegFlushPendingWrites(pReNative, off);
1467
1468 /* Store the result. */
1469 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1470
1471 /* Need to transfer the shadow information to the new RIP register. */
1472 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
1473 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
1474 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
1475
1476 return off;
1477}
1478
1479
1480/** Variant of IEM_MC_REL_CALL_S32_AND_FINISH with instruction length as
1481 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
1482#define IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr) \
1483 off = iemNativeEmitEip32RelativeCallNoFlags(pReNative, off, a_cbInstr, (a_i32), pCallEntry->idxInstr)
1484
1485/** Variant of IEM_MC_REL_CALL_S32_AND_FINISH with instruction length as
1486 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
1487 * flags. */
1488#define IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr) \
1489 IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr); \
1490 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1491
1492#undef IEM_MC_REL_CALL_S32_AND_FINISH
1493
1494/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
1495 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
1496DECL_INLINE_THROW(uint32_t)
1497iemNativeEmitEip32RelativeCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int32_t offDisp,
1498 uint8_t idxInstr)
1499{
1500 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1501 off = iemNativeRegFlushPendingWrites(pReNative, off);
1502
1503#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1504 Assert(pReNative->Core.offPc == 0);
1505
1506 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1507#endif
1508
1509 /* Allocate a temporary PC register. */
1510 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1511 kIemNativeGstRegUse_ReadOnly, true /*fNoVolatileRegs*/);
1512 uint8_t const idxPcRegNew = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
1513
1514 /* Update the EIP to get the return address. */
1515 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcRegOld, cbInstr);
1516
1517 /* Load address, add the displacement and check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
1518 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxPcRegNew, idxPcRegOld);
1519 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcRegNew, offDisp);
1520 /** @todo we can skip this test in FLAT 32-bit mode. */
1521 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
1522
1523 /* Perform Perform the return address to the guest stack. */
1524 /** @todo Can avoid the stack limit checks in FLAT 32-bit mode. */
1525 off = iemNativeEmitStackPushRip(pReNative, off, idxPcRegOld, RT_MAKE_U32_FROM_U8(32, 0, 0, 0),
1526 (uintptr_t)iemNativeHlpStackStoreU32, idxInstr);
1527
1528 /* RSP got changed, so do this again. */
1529 off = iemNativeRegFlushPendingWrites(pReNative, off);
1530
1531 /* Store the result. */
1532 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1533
1534 /* Need to transfer the shadow information to the new RIP register. */
1535 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
1536 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
1537 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
1538
1539 return off;
1540}
1541
1542
1543/** Variant of IEM_MC_REL_CALL_S64_AND_FINISH with instruction length as
1544 * an extra parameter, for use in 64-bit code. */
1545#define IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64(a_i64, a_cbInstr) \
1546 off = iemNativeEmitRip64RelativeCallNoFlags(pReNative, off, a_cbInstr, (a_i64), pCallEntry->idxInstr)
1547
1548/** Variant of IEM_MC_REL_CALL_S64_AND_FINISH with instruction length as
1549 * an extra parameter, for use in 64-bit code and we need to check and clear
1550 * flags. */
1551#define IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i64, a_cbInstr) \
1552 IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64(a_i64, a_cbInstr); \
1553 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1554
1555#undef IEM_MC_REL_CALL_S64_AND_FINISH
1556
1557/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
1558 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
1559DECL_INLINE_THROW(uint32_t)
1560iemNativeEmitRip64RelativeCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int64_t offDisp,
1561 uint8_t idxInstr)
1562{
1563 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1564 off = iemNativeRegFlushPendingWrites(pReNative, off);
1565
1566#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1567 Assert(pReNative->Core.offPc == 0);
1568
1569 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1570#endif
1571
1572 /* Allocate a temporary PC register. */
1573 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1574 kIemNativeGstRegUse_ReadOnly, true /*fNoVolatileRegs*/);
1575 uint8_t const idxPcRegNew = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
1576
1577 /* Update the RIP to get the return address. */
1578 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcRegOld, cbInstr);
1579
1580 /* Load address, add the displacement and check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
1581 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxPcRegNew, idxPcRegOld);
1582 off = iemNativeEmitAddGprImm(pReNative, off, idxPcRegNew, offDisp);
1583 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
1584
1585 /* Perform Perform the return address to the guest stack. */
1586 off = iemNativeEmitStackPushRip(pReNative, off, idxPcRegOld, RT_MAKE_U32_FROM_U8(64, 64, 0, 0),
1587 (uintptr_t)iemNativeHlpStackFlatStoreU64, idxInstr);
1588
1589 /* RSP got changed, so do this again. */
1590 off = iemNativeRegFlushPendingWrites(pReNative, off);
1591
1592 /* Store the result. */
1593 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1594
1595 /* Need to transfer the shadow information to the new RIP register. */
1596 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
1597 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
1598 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
1599
1600 return off;
1601}
1602
1603
1604/*********************************************************************************************************************************
1605* Emitters for changing PC/RIP/EIP/IP with a RETN (Iw) instruction (IEM_MC_RETN_AND_FINISH) (requires stack emmiters). *
1606*********************************************************************************************************************************/
1607
1608DECL_FORCE_INLINE_THROW(uint32_t)
1609iemNativeEmitStackPopForRetnUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
1610 uint16_t cbPopAdd, uint8_t idxRegTmp)
1611{
1612 /* Use16BitSp: */
1613#ifdef RT_ARCH_AMD64
1614 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
1615 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
1616 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbPopAdd); /* ASSUMES this does NOT modify bits [63:16]! */
1617 RT_NOREF(idxRegTmp);
1618#elif defined(RT_ARCH_ARM64)
1619 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
1620 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
1621 /* add tmp, regrsp, #cbMem */
1622 uint16_t const cbCombined = cbMem + cbPopAdd;
1623 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbCombined & (RT_BIT_32(12) - 1U), false /*f64Bit*/);
1624 if (cbCombined >= RT_BIT_32(12))
1625 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegTmp, cbCombined >> 12,
1626 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift12*/);
1627 /* and tmp, tmp, #0xffff */
1628 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
1629 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
1630 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
1631 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
1632#else
1633# error "Port me"
1634#endif
1635 return off;
1636}
1637
1638
1639DECL_FORCE_INLINE_THROW(uint32_t)
1640iemNativeEmitStackPopForRetnUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
1641 uint16_t cbPopAdd)
1642{
1643 /* Use32BitSp: */
1644 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
1645 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem + cbPopAdd);
1646 return off;
1647}
1648
1649
1650/** Variant of IEM_MC_RETN_AND_FINISH for pre-386 targets. */
1651#define IEM_MC_RETN_AND_FINISH_THREADED_PC16(a_u16Pop, a_cbInstr) \
1652 off = iemNativeEmitRetn(pReNative, off, (a_cbInstr), (a_u16Pop), false /*f64Bit*/, IEMMODE_16BIT, pCallEntry->idxInstr)
1653
1654/** Variant of IEM_MC_RETN_AND_FINISH for 386+ targets. */
1655#define IEM_MC_RETN_AND_FINISH_THREADED_PC32(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
1656 off = iemNativeEmitRetn(pReNative, off, (a_cbInstr), (a_u16Pop), false /*f64Bit*/, (a_enmEffOpSize), pCallEntry->idxInstr)
1657
1658/** Variant of IEM_MC_RETN_AND_FINISH for use in 64-bit code. */
1659#define IEM_MC_RETN_AND_FINISH_THREADED_PC64(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
1660 off = iemNativeEmitRetn(pReNative, off, (a_cbInstr), (a_u16Pop), true /*f64Bit*/, (a_enmEffOpSize), pCallEntry->idxInstr)
1661
1662/** Variant of IEM_MC_RETN_AND_FINISH for pre-386 targets that checks and
1663 * clears flags. */
1664#define IEM_MC_RETN_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16Pop, a_cbInstr) \
1665 IEM_MC_RETN_AND_FINISH_THREADED_PC16(a_u16Pop, a_cbInstr); \
1666 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1667
1668/** Variant of IEM_MC_RETN_AND_FINISH for 386+ targets that checks and
1669 * clears flags. */
1670#define IEM_MC_RETN_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
1671 IEM_MC_RETN_AND_FINISH_THREADED_PC32(a_u16Pop, a_cbInstr, a_enmEffOpSize); \
1672 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1673
1674/** Variant of IEM_MC_RETN_AND_FINISH for use in 64-bit code that checks and
1675 * clears flags. */
1676#define IEM_MC_RETN_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
1677 IEM_MC_RETN_AND_FINISH_THREADED_PC64(a_u16Pop, a_cbInstr, a_enmEffOpSize); \
1678 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1679
1680/** IEM_MC[|_FLAT32|_FLAT64]_RETN_AND_FINISH */
1681DECL_INLINE_THROW(uint32_t)
1682iemNativeEmitRetn(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint16_t cbPop, bool f64Bit,
1683 IEMMODE enmEffOpSize, uint8_t idxInstr)
1684{
1685 RT_NOREF(cbInstr);
1686
1687#ifdef VBOX_STRICT
1688 /*
1689 * Check that the fExec flags we've got make sense.
1690 */
1691 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
1692#endif
1693
1694 /*
1695 * To keep things simple we have to commit any pending writes first as we
1696 * may end up making calls.
1697 */
1698 off = iemNativeRegFlushPendingWrites(pReNative, off);
1699
1700 /*
1701 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
1702 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
1703 * directly as the effective stack pointer.
1704 * (Code structure is very similar to that of PUSH)
1705 *
1706 * Note! As a simplification, we treat opsize overridden returns (o16 ret)
1707 * in FLAT 32-bit mode as if we weren't in FLAT mode since these
1708 * aren't commonly used (or useful) and thus not in need of optimizing.
1709 *
1710 * Note! For non flat modes the guest RSP is not allocated for update but rather for calculation
1711 * as the shadowed register would remain modified even if the return address throws a \#GP(0)
1712 * due to being outside the CS limit causing a wrong stack pointer value in the guest (see
1713 * the near return testcase in bs3-cpu-basic-2). If no exception is thrown the shadowing is transfered
1714 * to the new register returned by iemNativeRegAllocTmpForGuestReg() at the end.
1715 */
1716 uint8_t const cbMem = enmEffOpSize == IEMMODE_64BIT
1717 ? sizeof(uint64_t)
1718 : enmEffOpSize == IEMMODE_32BIT
1719 ? sizeof(uint32_t)
1720 : sizeof(uint16_t);
1721 bool const fFlat = IEM_F_MODE_X86_IS_FLAT(pReNative->fExec) && enmEffOpSize != IEMMODE_16BIT; /* see note */
1722 uintptr_t const pfnFunction = fFlat
1723 ? enmEffOpSize == IEMMODE_64BIT
1724 ? (uintptr_t)iemNativeHlpStackFlatFetchU64
1725 : (uintptr_t)iemNativeHlpStackFlatFetchU32
1726 : enmEffOpSize == IEMMODE_32BIT
1727 ? (uintptr_t)iemNativeHlpStackFetchU32
1728 : (uintptr_t)iemNativeHlpStackFetchU16;
1729 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
1730 fFlat ? kIemNativeGstRegUse_ForUpdate
1731 : kIemNativeGstRegUse_Calculation,
1732 true /*fNoVolatileRegs*/);
1733 uint8_t const idxRegEffSp = fFlat ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
1734 /** @todo can do a better job picking the register here. For cbMem >= 4 this
1735 * will be the resulting register value. */
1736 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
1737
1738 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
1739 if (fFlat)
1740 Assert(idxRegEffSp == idxRegRsp);
1741 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
1742 {
1743 Assert(idxRegEffSp != idxRegRsp);
1744 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
1745 kIemNativeGstRegUse_ReadOnly);
1746#ifdef RT_ARCH_AMD64
1747 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1748#else
1749 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
1750#endif
1751 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
1752 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
1753 offFixupJumpToUseOtherBitSp = off;
1754 if (enmEffOpSize == IEMMODE_32BIT)
1755 {
1756 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
1757 off = iemNativeEmitStackPopForRetnUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop);
1758 }
1759 else
1760 {
1761 Assert(enmEffOpSize == IEMMODE_16BIT);
1762 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
1763 off = iemNativeEmitStackPopForRetnUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop,
1764 idxRegMemResult);
1765 }
1766 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1767 }
1768 /* SpUpdateEnd: */
1769 uint32_t const offLabelSpUpdateEnd = off;
1770
1771 /*
1772 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
1773 * we're skipping lookup).
1774 */
1775 uint8_t const iSegReg = fFlat ? UINT8_MAX : X86_SREG_SS;
1776 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
1777 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
1778 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
1779 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
1780 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
1781 : UINT32_MAX;
1782
1783 if (!TlbState.fSkip)
1784 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
1785 else
1786 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
1787
1788 /*
1789 * Use16BitSp:
1790 */
1791 if (!fFlat)
1792 {
1793#ifdef RT_ARCH_AMD64
1794 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1795#else
1796 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
1797#endif
1798 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
1799 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
1800 off = iemNativeEmitStackPopForRetnUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop,
1801 idxRegMemResult);
1802 else
1803 off = iemNativeEmitStackPopForRetnUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop);
1804 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
1805 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1806 }
1807
1808 /*
1809 * TlbMiss:
1810 *
1811 * Call helper to do the pushing.
1812 */
1813 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
1814
1815#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1816 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1817#else
1818 RT_NOREF(idxInstr);
1819#endif
1820
1821 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
1822 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
1823 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
1824 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
1825
1826
1827 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
1828 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
1829 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
1830
1831 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
1832 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
1833
1834 /* Done setting up parameters, make the call. */
1835 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
1836
1837 /* Move the return register content to idxRegMemResult. */
1838 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
1839 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
1840
1841 /* Restore variables and guest shadow registers to volatile registers. */
1842 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
1843 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
1844
1845#ifdef IEMNATIVE_WITH_TLB_LOOKUP
1846 if (!TlbState.fSkip)
1847 {
1848 /* end of TlbMiss - Jump to the done label. */
1849 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
1850 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
1851
1852 /*
1853 * TlbLookup:
1854 */
1855 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
1856 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
1857
1858 /*
1859 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
1860 */
1861 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1862# ifdef IEM_WITH_TLB_STATISTICS
1863 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
1864 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
1865# endif
1866 switch (cbMem)
1867 {
1868 case 2:
1869 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
1870 break;
1871 case 4:
1872 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
1873 break;
1874 case 8:
1875 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
1876 break;
1877 default:
1878 AssertFailed();
1879 }
1880
1881 TlbState.freeRegsAndReleaseVars(pReNative);
1882
1883 /*
1884 * TlbDone:
1885 *
1886 * Set the new RSP value (FLAT accesses needs to calculate it first) and
1887 * commit the popped register value.
1888 */
1889 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
1890 }
1891#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
1892
1893 /* Check limit before committing RIP and RSP (may #GP(0) + exit TB). */
1894 if (!f64Bit)
1895/** @todo we can skip this test in FLAT 32-bit mode. */
1896 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxRegMemResult, idxInstr);
1897 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
1898 else if (enmEffOpSize == IEMMODE_64BIT)
1899 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxRegMemResult, idxInstr);
1900
1901 /* Complete RSP calculation for FLAT mode. */
1902 if (idxRegEffSp == idxRegRsp)
1903 {
1904 if (enmEffOpSize == IEMMODE_64BIT)
1905 off = iemNativeEmitAddGprImm(pReNative, off, idxRegRsp, sizeof(uint64_t) + cbPop);
1906 else
1907 {
1908 Assert(enmEffOpSize == IEMMODE_32BIT);
1909 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxRegRsp, sizeof(uint32_t) + cbPop);
1910 }
1911 }
1912
1913 /* Commit the result and clear any current guest shadows for RIP. */
1914 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
1915 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1916 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, kIemNativeGstReg_Pc, off);
1917
1918 /* Need to transfer the shadowing information to the host register containing the updated value now. */
1919 if (!fFlat)
1920 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegRsp, IEMNATIVEGSTREG_GPR(X86_GREG_xSP), off);
1921
1922 iemNativeRegFreeTmp(pReNative, idxRegRsp);
1923 if (idxRegEffSp != idxRegRsp)
1924 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
1925 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
1926 return off;
1927}
1928
1929
1930/*********************************************************************************************************************************
1931* Emitters for raising exceptions (IEM_MC_MAYBE_RAISE_XXX) *
1932*********************************************************************************************************************************/
1933
1934#define IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE() \
1935 off = iemNativeEmitMaybeRaiseDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
1936
1937/**
1938 * Emits code to check if a \#NM exception should be raised.
1939 *
1940 * @returns New code buffer offset, UINT32_MAX on failure.
1941 * @param pReNative The native recompile state.
1942 * @param off The code buffer offset.
1943 * @param idxInstr The current instruction.
1944 */
1945DECL_INLINE_THROW(uint32_t)
1946iemNativeEmitMaybeRaiseDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
1947{
1948#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1949 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeDeviceNotAvailXcptCheckPotential);
1950
1951 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE))
1952 {
1953#endif
1954 /*
1955 * Make sure we don't have any outstanding guest register writes as we may
1956 * raise an #NM and all guest register must be up to date in CPUMCTX.
1957 */
1958 /** @todo r=aeichner Can we postpone this to the RaiseNm path? */
1959 off = iemNativeRegFlushPendingWrites(pReNative, off);
1960
1961#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1962 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1963#else
1964 RT_NOREF(idxInstr);
1965#endif
1966
1967 /* Allocate a temporary CR0 register. */
1968 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0,
1969 kIemNativeGstRegUse_ReadOnly);
1970
1971 /*
1972 * if (cr0 & (X86_CR0_EM | X86_CR0_TS) != 0)
1973 * return raisexcpt();
1974 */
1975 /* Test and jump. */
1976 off = iemNativeEmitTestAnyBitsInGprAndTbExitIfAnySet(pReNative, off, idxCr0Reg, X86_CR0_EM | X86_CR0_TS,
1977 kIemNativeLabelType_RaiseNm);
1978
1979 /* Free but don't flush the CR0 register. */
1980 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
1981
1982#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1983 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE;
1984 }
1985 else
1986 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeDeviceNotAvailXcptCheckOmitted);
1987#endif
1988
1989 return off;
1990}
1991
1992
1993#define IEM_MC_MAYBE_RAISE_WAIT_DEVICE_NOT_AVAILABLE() \
1994 off = iemNativeEmitMaybeRaiseWaitDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
1995
1996/**
1997 * Emits code to check if a \#NM exception should be raised.
1998 *
1999 * @returns New code buffer offset, UINT32_MAX on failure.
2000 * @param pReNative The native recompile state.
2001 * @param off The code buffer offset.
2002 * @param idxInstr The current instruction.
2003 */
2004DECL_INLINE_THROW(uint32_t)
2005iemNativeEmitMaybeRaiseWaitDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2006{
2007#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2008 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeWaitDeviceNotAvailXcptCheckPotential);
2009
2010 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_WAIT_DEVICE_NOT_AVAILABLE))
2011 {
2012#endif
2013 /*
2014 * Make sure we don't have any outstanding guest register writes as we may
2015 * raise an #NM and all guest register must be up to date in CPUMCTX.
2016 */
2017 /** @todo r=aeichner Can we postpone this to the RaiseNm path? */
2018 off = iemNativeRegFlushPendingWrites(pReNative, off);
2019
2020#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2021 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2022#else
2023 RT_NOREF(idxInstr);
2024#endif
2025
2026 /* Allocate a temporary CR0 register. */
2027 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0,
2028 kIemNativeGstRegUse_Calculation);
2029
2030 /*
2031 * if (cr0 & (X86_CR0_MP | X86_CR0_TS) == (X86_CR0_MP | X86_CR0_TS))
2032 * return raisexcpt();
2033 */
2034 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxCr0Reg, X86_CR0_MP | X86_CR0_TS);
2035 /* Test and jump. */
2036 off = iemNativeEmitTestIfGpr32EqualsImmAndTbExit(pReNative, off, idxCr0Reg, X86_CR0_MP | X86_CR0_TS,
2037 kIemNativeLabelType_RaiseNm);
2038
2039 /* Free the CR0 register. */
2040 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2041
2042#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2043 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_WAIT_DEVICE_NOT_AVAILABLE;
2044 }
2045 else
2046 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeWaitDeviceNotAvailXcptCheckOmitted);
2047#endif
2048
2049 return off;
2050}
2051
2052
2053#define IEM_MC_MAYBE_RAISE_FPU_XCPT() \
2054 off = iemNativeEmitMaybeRaiseFpuException(pReNative, off, pCallEntry->idxInstr)
2055
2056/**
2057 * Emits code to check if a \#MF exception should be raised.
2058 *
2059 * @returns New code buffer offset, UINT32_MAX on failure.
2060 * @param pReNative The native recompile state.
2061 * @param off The code buffer offset.
2062 * @param idxInstr The current instruction.
2063 */
2064DECL_INLINE_THROW(uint32_t)
2065iemNativeEmitMaybeRaiseFpuException(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2066{
2067 /*
2068 * Make sure we don't have any outstanding guest register writes as we may
2069 * raise an #MF and all guest register must be up to date in CPUMCTX.
2070 */
2071 /** @todo r=aeichner Can we postpone this to the RaiseMf path? */
2072 off = iemNativeRegFlushPendingWrites(pReNative, off);
2073
2074#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2075 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2076#else
2077 RT_NOREF(idxInstr);
2078#endif
2079
2080 /* Allocate a temporary FSW register. */
2081 uint8_t const idxFpuFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw,
2082 kIemNativeGstRegUse_ReadOnly);
2083
2084 /*
2085 * if (FSW & X86_FSW_ES != 0)
2086 * return raisexcpt();
2087 */
2088 /* Test and jump. */
2089 off = iemNativeEmitTestBitInGprAndTbExitIfSet(pReNative, off, idxFpuFswReg, X86_FSW_ES_BIT, kIemNativeLabelType_RaiseMf);
2090
2091 /* Free but don't flush the FSW register. */
2092 iemNativeRegFreeTmp(pReNative, idxFpuFswReg);
2093
2094 return off;
2095}
2096
2097
2098#define IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT() \
2099 off = iemNativeEmitMaybeRaiseSseRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
2100
2101/**
2102 * Emits code to check if a SSE exception (either \#UD or \#NM) should be raised.
2103 *
2104 * @returns New code buffer offset, UINT32_MAX on failure.
2105 * @param pReNative The native recompile state.
2106 * @param off The code buffer offset.
2107 * @param idxInstr The current instruction.
2108 */
2109DECL_INLINE_THROW(uint32_t)
2110iemNativeEmitMaybeRaiseSseRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2111{
2112#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2113 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeSseXcptCheckPotential);
2114
2115 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE))
2116 {
2117#endif
2118 /*
2119 * Make sure we don't have any outstanding guest register writes as we may
2120 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
2121 */
2122 /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
2123 off = iemNativeRegFlushPendingWrites(pReNative, off);
2124
2125#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2126 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2127#else
2128 RT_NOREF(idxInstr);
2129#endif
2130
2131 /* Allocate a temporary CR0 and CR4 register. */
2132 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
2133 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
2134 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2135
2136 AssertCompile(!((X86_CR0_EM | X86_CR0_TS) & X86_CR4_OSFXSR));
2137#ifdef RT_ARCH_AMD64
2138 /*
2139 * We do a modified test here:
2140 * if (!(((cr4 & X86_CR4_OSFXSR) | cr0) ^ X86_CR4_OSFXSR)) { likely }
2141 * else { goto RaiseSseRelated; }
2142 * This ASSUMES that CR0[bit 9] is always zero. This is the case on
2143 * all targets except the 386, which doesn't support SSE, this should
2144 * be a safe assumption.
2145 */
2146 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6+3+3+7+7+6);
2147 //pCodeBuf[off++] = 0xcc;
2148 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, X86_CR4_OSFXSR); /* Isolate CR4.OSFXSR as CR4.TSD and */
2149 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxTmpReg, idxCr4Reg); /* CR4.DE would overlap the CR0 bits. */
2150 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, idxTmpReg, idxCr0Reg);
2151 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_CR0_EM | X86_CR0_TS | X86_CR4_OSFXSR);
2152 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_CR4_OSFXSR);
2153 off = iemNativeEmitJccTbExitEx(pReNative, pCodeBuf, off, kIemNativeLabelType_RaiseSseRelated, kIemNativeInstrCond_ne);
2154
2155#elif defined(RT_ARCH_ARM64)
2156 /*
2157 * We do a modified test here:
2158 * if (!((cr0 & (X86_CR0_EM | X86_CR0_TS)) | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1) ^ 1))) { likely }
2159 * else { goto RaiseSseRelated; }
2160 */
2161 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+5);
2162 //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
2163 Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - X86_CR0_EM_BIT) == (X86_CR0_EM | X86_CR0_TS));
2164 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxCr0Reg, 1, 32 - X86_CR0_EM_BIT, false /*f64Bit*/);
2165 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSFXSR_BIT, 1, false /*f64Bit*/);
2166 /* -> idxTmpReg[0]=OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
2167 Assert(Armv8A64ConvertImmRImmS2Mask32(0, 0) == 1);
2168 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 0, 0, false /*f64Bit*/);
2169 /* -> idxTmpReg[0]=~OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
2170 off = iemNativeEmitTestIfGprIsNotZeroAndTbExitEx(pReNative, pCodeBuf, off, idxTmpReg, false /*f64Bit*/,
2171 kIemNativeLabelType_RaiseSseRelated);
2172
2173#else
2174# error "Port me!"
2175#endif
2176
2177 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2178 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2179 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2180 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
2181
2182#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2183 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE;
2184 }
2185 else
2186 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeSseXcptCheckOmitted);
2187#endif
2188
2189 return off;
2190}
2191
2192
2193#define IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT() \
2194 off = iemNativeEmitMaybeRaiseAvxRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
2195
2196/**
2197 * Emits code to check if a AVX exception (either \#UD or \#NM) should be raised.
2198 *
2199 * @returns New code buffer offset, UINT32_MAX on failure.
2200 * @param pReNative The native recompile state.
2201 * @param off The code buffer offset.
2202 * @param idxInstr The current instruction.
2203 */
2204DECL_INLINE_THROW(uint32_t)
2205iemNativeEmitMaybeRaiseAvxRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2206{
2207#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2208 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeAvxXcptCheckPotential);
2209
2210 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX))
2211 {
2212#endif
2213 /*
2214 * Make sure we don't have any outstanding guest register writes as we may
2215 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
2216 */
2217 /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
2218 off = iemNativeRegFlushPendingWrites(pReNative, off);
2219
2220#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2221 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2222#else
2223 RT_NOREF(idxInstr);
2224#endif
2225
2226 /* Allocate a temporary CR0, CR4 and XCR0 register. */
2227 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
2228 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
2229 uint8_t const idxXcr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Xcr0);
2230 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2231
2232 /*
2233 * We have the following in IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT:
2234 * if (RT_LIKELY( ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE))
2235 * | (pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE)
2236 * | (pVCpu->cpum.GstCtx.cr0 & X86_CR0_TS))
2237 * == (XSAVE_C_YMM | XSAVE_C_SSE | X86_CR4_OSXSAVE)))
2238 * { likely }
2239 * else { goto RaiseAvxRelated; }
2240 */
2241#ifdef RT_ARCH_AMD64
2242 /* if (!( ( ((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) << 2)
2243 | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1) << 1)
2244 | ((cr0 >> X86_CR0_TS_BIT) & 1) )
2245 ^ 0x1a) ) { likely }
2246 else { goto RaiseAvxRelated; } */
2247 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6+3+5+3+5+3+7+6);
2248 //pCodeBuf[off++] = 0xcc;
2249 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, XSAVE_C_YMM | XSAVE_C_SSE);
2250 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxTmpReg, idxXcr0Reg);
2251 off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off, idxCr4Reg, X86_CR4_OSXSAVE_BIT);
2252 off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
2253 /* -> idxTmpReg[0]=CR4.OSXSAVE; idxTmpReg[1]=0; idxTmpReg[2]=SSE; idxTmpReg[3]=YMM; (the rest is zero) */
2254 off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off, idxCr0Reg, X86_CR0_TS_BIT);
2255 off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
2256 /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=SSE; idxTmpReg[4]=YMM; */
2257 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg, ((XSAVE_C_YMM | XSAVE_C_SSE) << 2) | 2);
2258 /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=~SSE; idxTmpReg[4]=~YMM; */
2259 off = iemNativeEmitJccTbExitEx(pReNative, pCodeBuf, off, kIemNativeLabelType_RaiseAvxRelated, kIemNativeInstrCond_ne);
2260
2261#elif defined(RT_ARCH_ARM64)
2262 /* if (!( (((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) | ((cr4 >> X86_CR4_OSFXSR_BIT) & 1)) ^ 7) << 1)
2263 | ((cr0 >> X86_CR0_TS_BIT) & 1) ) { likely }
2264 else { goto RaiseAvxRelated; } */
2265 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6);
2266 //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
2267 Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - XSAVE_C_SSE_BIT) == (XSAVE_C_YMM | XSAVE_C_SSE));
2268 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxXcr0Reg, 1, 32 - XSAVE_C_SSE_BIT, false /*f64Bit*/);
2269 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSXSAVE_BIT, 1, false /*f64Bit*/);
2270 /* -> idxTmpReg[0]=CR4.OSXSAVE; idxTmpReg[1]=SSE; idxTmpReg[2]=YMM; (the rest is zero) */
2271 Assert(Armv8A64ConvertImmRImmS2Mask32(2, 0) == 7);
2272 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 2, 0, false /*f64Bit*/);
2273 /* -> idxTmpReg[0]=~CR4.OSXSAVE; idxTmpReg[1]=~SSE; idxTmpReg[2]=~YMM; (the rest is zero) */
2274 pCodeBuf[off++] = Armv8A64MkInstrLslImm(idxTmpReg, idxTmpReg, 1, false /*f64Bit*/);
2275 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr0Reg, X86_CR0_TS_BIT, 1, false /*f64Bit*/);
2276 /* -> idxTmpReg[0]=CR0.TS; idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=~SSE; idxTmpReg[3]=~YMM; (the rest is zero) */
2277 off = iemNativeEmitTestIfGprIsNotZeroAndTbExitEx(pReNative, pCodeBuf, off, idxTmpReg, false /*f64Bit*/,
2278 kIemNativeLabelType_RaiseAvxRelated);
2279
2280#else
2281# error "Port me!"
2282#endif
2283
2284 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2285 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2286 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
2287 iemNativeRegFreeTmp(pReNative, idxXcr0Reg);
2288#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2289 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
2290 }
2291 else
2292 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeAvxXcptCheckOmitted);
2293#endif
2294
2295 return off;
2296}
2297
2298
2299#define IEM_MC_RAISE_DIVIDE_ERROR() \
2300 off = iemNativeEmitRaiseDivideError(pReNative, off, pCallEntry->idxInstr)
2301
2302/**
2303 * Emits code to raise a \#DE.
2304 *
2305 * @returns New code buffer offset, UINT32_MAX on failure.
2306 * @param pReNative The native recompile state.
2307 * @param off The code buffer offset.
2308 * @param idxInstr The current instruction.
2309 */
2310DECL_INLINE_THROW(uint32_t)
2311iemNativeEmitRaiseDivideError(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2312{
2313 /*
2314 * Make sure we don't have any outstanding guest register writes as we may
2315 */
2316 off = iemNativeRegFlushPendingWrites(pReNative, off);
2317
2318#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2319 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2320#else
2321 RT_NOREF(idxInstr);
2322#endif
2323
2324 /* raise \#DE exception unconditionally. */
2325 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_RaiseDe);
2326}
2327
2328
2329#define IEM_MC_RAISE_GP0_IF_EFF_ADDR_UNALIGNED(a_EffAddr, a_cbAlign) \
2330 off = iemNativeEmitRaiseGp0IfEffAddrUnaligned(pReNative, off, pCallEntry->idxInstr, a_EffAddr, a_cbAlign)
2331
2332/**
2333 * Emits code to raise a \#GP(0) if the given variable contains an unaligned address.
2334 *
2335 * @returns New code buffer offset, UINT32_MAX on failure.
2336 * @param pReNative The native recompile state.
2337 * @param off The code buffer offset.
2338 * @param idxInstr The current instruction.
2339 * @param idxVarEffAddr Index of the variable containing the effective address to check.
2340 * @param cbAlign The alignment in bytes to check against.
2341 */
2342DECL_INLINE_THROW(uint32_t)
2343iemNativeEmitRaiseGp0IfEffAddrUnaligned(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr,
2344 uint8_t idxVarEffAddr, uint8_t cbAlign)
2345{
2346 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEffAddr);
2347 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEffAddr, sizeof(RTGCPTR));
2348
2349 /*
2350 * Make sure we don't have any outstanding guest register writes as we may throw an exception.
2351 */
2352 off = iemNativeRegFlushPendingWrites(pReNative, off);
2353
2354#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2355 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2356#else
2357 RT_NOREF(idxInstr);
2358#endif
2359
2360 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarEffAddr, &off);
2361
2362 off = iemNativeEmitTestAnyBitsInGprAndTbExitIfAnySet(pReNative, off, idxVarReg, cbAlign - 1,
2363 kIemNativeLabelType_RaiseGp0);
2364
2365 iemNativeVarRegisterRelease(pReNative, idxVarEffAddr);
2366 return off;
2367}
2368
2369
2370/*********************************************************************************************************************************
2371* Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF) *
2372*********************************************************************************************************************************/
2373
2374/**
2375 * Pushes an IEM_MC_IF_XXX onto the condition stack.
2376 *
2377 * @returns Pointer to the condition stack entry on success, NULL on failure
2378 * (too many nestings)
2379 */
2380DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative, uint32_t *poff)
2381{
2382#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2383 *poff = iemNativeRegFlushPendingWrites(pReNative, *poff);
2384#endif
2385
2386 uint32_t const idxStack = pReNative->cCondDepth;
2387 AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
2388
2389 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
2390 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
2391
2392 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
2393 pEntry->fInElse = false;
2394 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
2395 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
2396
2397 return pEntry;
2398}
2399
2400
2401/**
2402 * Start of the if-block, snapshotting the register and variable state.
2403 */
2404DECL_INLINE_THROW(void)
2405iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
2406{
2407 Assert(offIfBlock != UINT32_MAX);
2408 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
2409 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
2410 Assert(!pEntry->fInElse);
2411
2412 /* Define the start of the IF block if request or for disassembly purposes. */
2413 if (idxLabelIf != UINT32_MAX)
2414 iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
2415#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2416 else
2417 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
2418#else
2419 RT_NOREF(offIfBlock);
2420#endif
2421
2422#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2423 Assert(pReNative->Core.offPc == 0);
2424#endif
2425
2426 /* Copy the initial state so we can restore it in the 'else' block. */
2427 pEntry->InitialState = pReNative->Core;
2428}
2429
2430
2431#define IEM_MC_ELSE() } while (0); \
2432 off = iemNativeEmitElse(pReNative, off); \
2433 do {
2434
2435/** Emits code related to IEM_MC_ELSE. */
2436DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2437{
2438 /* Check sanity and get the conditional stack entry. */
2439 Assert(off != UINT32_MAX);
2440 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
2441 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
2442 Assert(!pEntry->fInElse);
2443
2444#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2445 /* Writeback any dirty shadow registers. */
2446 /** @todo r=aeichner Possible optimization is to only writeback guest registers which became dirty
2447 * in one of the branches and leave guest registers already dirty before the start of the if
2448 * block alone. */
2449 off = iemNativeRegFlushDirtyGuest(pReNative, off);
2450#endif
2451
2452 /* Jump to the endif */
2453 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
2454
2455 /* Define the else label and enter the else part of the condition. */
2456 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
2457 pEntry->fInElse = true;
2458
2459#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2460 Assert(pReNative->Core.offPc == 0);
2461#endif
2462
2463 /* Snapshot the core state so we can do a merge at the endif and restore
2464 the snapshot we took at the start of the if-block. */
2465 pEntry->IfFinalState = pReNative->Core;
2466 pReNative->Core = pEntry->InitialState;
2467
2468 return off;
2469}
2470
2471
2472#define IEM_MC_ENDIF() } while (0); \
2473 off = iemNativeEmitEndIf(pReNative, off)
2474
2475/** Emits code related to IEM_MC_ENDIF. */
2476DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2477{
2478 /* Check sanity and get the conditional stack entry. */
2479 Assert(off != UINT32_MAX);
2480 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
2481 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
2482
2483#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2484 Assert(pReNative->Core.offPc == 0);
2485#endif
2486#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2487 /* Writeback any dirty shadow registers (else branch). */
2488 /** @todo r=aeichner Possible optimization is to only writeback guest registers which became dirty
2489 * in one of the branches and leave guest registers already dirty before the start of the if
2490 * block alone. */
2491 off = iemNativeRegFlushDirtyGuest(pReNative, off);
2492#endif
2493
2494 /*
2495 * Now we have find common group with the core state at the end of the
2496 * if-final. Use the smallest common denominator and just drop anything
2497 * that isn't the same in both states.
2498 */
2499 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
2500 * which is why we're doing this at the end of the else-block.
2501 * But we'd need more info about future for that to be worth the effort. */
2502 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
2503#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2504 Assert( pOther->bmGstRegShadowDirty == 0
2505 && pReNative->Core.bmGstRegShadowDirty == 0);
2506#endif
2507
2508 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
2509 {
2510 /* shadow guest stuff first. */
2511 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
2512 if (fGstRegs)
2513 {
2514 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
2515 do
2516 {
2517 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
2518 fGstRegs &= ~RT_BIT_64(idxGstReg);
2519
2520 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
2521 if ( !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg))
2522 || idxHstReg != pOther->aidxGstRegShadows[idxGstReg])
2523 {
2524 Log12(("iemNativeEmitEndIf: dropping gst %s from hst %s\n",
2525 g_aGstShadowInfo[idxGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg]));
2526
2527#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2528 /* Writeback any dirty shadow registers we are about to unshadow. */
2529 off = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, off, idxHstReg);
2530#endif
2531 iemNativeRegClearGstRegShadowing(pReNative, idxHstReg, off);
2532 }
2533 } while (fGstRegs);
2534 }
2535 else
2536 {
2537 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
2538#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2539 Assert(pReNative->Core.bmGstRegShadowDirty == 0);
2540#endif
2541 }
2542
2543 /* Check variables next. For now we must require them to be identical
2544 or stuff we can recreate. */
2545 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
2546 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
2547 if (fVars)
2548 {
2549 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
2550 do
2551 {
2552 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
2553 fVars &= ~RT_BIT_32(idxVar);
2554
2555 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
2556 {
2557 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
2558 continue;
2559 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
2560 {
2561 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
2562 if (idxHstReg != UINT8_MAX)
2563 {
2564 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
2565 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
2566 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x\n",
2567 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
2568 }
2569 continue;
2570 }
2571 }
2572 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
2573 continue;
2574
2575 /* Irreconcilable, so drop it. */
2576 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
2577 if (idxHstReg != UINT8_MAX)
2578 {
2579 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
2580 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
2581 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x (also dropped)\n",
2582 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
2583 }
2584 Log11(("iemNativeEmitEndIf: Freeing variable #%u/%#x\n", idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
2585 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
2586 } while (fVars);
2587 }
2588
2589 /* Finally, check that the host register allocations matches. */
2590 AssertMsgStmt(pReNative->Core.bmHstRegs == pOther->bmHstRegs,
2591 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
2592 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
2593 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
2594 }
2595
2596 /*
2597 * Define the endif label and maybe the else one if we're still in the 'if' part.
2598 */
2599 if (!pEntry->fInElse)
2600 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
2601 else
2602 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
2603 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
2604
2605 /* Pop the conditional stack.*/
2606 pReNative->cCondDepth -= 1;
2607
2608 return off;
2609}
2610
2611
2612#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
2613 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits)); \
2614 do {
2615
2616/** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
2617DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
2618{
2619 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitsInEfl);
2620 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2621
2622 /* Get the eflags. */
2623 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
2624 kIemNativeGstRegUse_ReadOnly);
2625
2626 /* Test and jump. */
2627 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
2628
2629 /* Free but don't flush the EFlags register. */
2630 iemNativeRegFreeTmp(pReNative, idxEflReg);
2631
2632 /* Make a copy of the core state now as we start the if-block. */
2633 iemNativeCondStartIfBlock(pReNative, off);
2634
2635 return off;
2636}
2637
2638
2639#define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
2640 off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits)); \
2641 do {
2642
2643/** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
2644DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
2645{
2646 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitsInEfl);
2647 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2648
2649 /* Get the eflags. */
2650 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
2651 kIemNativeGstRegUse_ReadOnly);
2652
2653 /* Test and jump. */
2654 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
2655
2656 /* Free but don't flush the EFlags register. */
2657 iemNativeRegFreeTmp(pReNative, idxEflReg);
2658
2659 /* Make a copy of the core state now as we start the if-block. */
2660 iemNativeCondStartIfBlock(pReNative, off);
2661
2662 return off;
2663}
2664
2665
2666#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
2667 off = iemNativeEmitIfEflagsBitSet(pReNative, off, (a_fBit)); \
2668 do {
2669
2670/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
2671DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
2672{
2673 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
2674 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2675
2676 /* Get the eflags. */
2677 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
2678 kIemNativeGstRegUse_ReadOnly);
2679
2680 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
2681 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
2682
2683 /* Test and jump. */
2684 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
2685
2686 /* Free but don't flush the EFlags register. */
2687 iemNativeRegFreeTmp(pReNative, idxEflReg);
2688
2689 /* Make a copy of the core state now as we start the if-block. */
2690 iemNativeCondStartIfBlock(pReNative, off);
2691
2692 return off;
2693}
2694
2695
2696#define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
2697 off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, (a_fBit)); \
2698 do {
2699
2700/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
2701DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
2702{
2703 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
2704 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2705
2706 /* Get the eflags. */
2707 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
2708 kIemNativeGstRegUse_ReadOnly);
2709
2710 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
2711 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
2712
2713 /* Test and jump. */
2714 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
2715
2716 /* Free but don't flush the EFlags register. */
2717 iemNativeRegFreeTmp(pReNative, idxEflReg);
2718
2719 /* Make a copy of the core state now as we start the if-block. */
2720 iemNativeCondStartIfBlock(pReNative, off);
2721
2722 return off;
2723}
2724
2725
2726#define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2) \
2727 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, false /*fInverted*/); \
2728 do {
2729
2730#define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2) \
2731 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, true /*fInverted*/); \
2732 do {
2733
2734/** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
2735DECL_INLINE_THROW(uint32_t)
2736iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
2737 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
2738{
2739 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBit1InEfl | fBit2InEfl);
2740 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2741
2742 /* Get the eflags. */
2743 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
2744 kIemNativeGstRegUse_ReadOnly);
2745
2746 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
2747 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
2748
2749 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
2750 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
2751 Assert(iBitNo1 != iBitNo2);
2752
2753#ifdef RT_ARCH_AMD64
2754 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl);
2755
2756 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
2757 if (iBitNo1 > iBitNo2)
2758 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
2759 else
2760 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
2761 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
2762
2763#elif defined(RT_ARCH_ARM64)
2764 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2765 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2766
2767 /* and tmpreg, eflreg, #1<<iBitNo1 */
2768 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
2769
2770 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
2771 if (iBitNo1 > iBitNo2)
2772 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
2773 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
2774 else
2775 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
2776 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
2777
2778 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2779
2780#else
2781# error "Port me"
2782#endif
2783
2784 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
2785 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
2786 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
2787
2788 /* Free but don't flush the EFlags and tmp registers. */
2789 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2790 iemNativeRegFreeTmp(pReNative, idxEflReg);
2791
2792 /* Make a copy of the core state now as we start the if-block. */
2793 iemNativeCondStartIfBlock(pReNative, off);
2794
2795 return off;
2796}
2797
2798
2799#define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
2800 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, false /*fInverted*/); \
2801 do {
2802
2803#define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
2804 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, true /*fInverted*/); \
2805 do {
2806
2807/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
2808 * IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
2809DECL_INLINE_THROW(uint32_t)
2810iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl,
2811 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
2812{
2813 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl | fBit1InEfl | fBit2InEfl);
2814 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2815
2816 /* We need an if-block label for the non-inverted variant. */
2817 uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
2818 pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
2819
2820 /* Get the eflags. */
2821 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
2822 kIemNativeGstRegUse_ReadOnly);
2823
2824 /* Translate the flag masks to bit numbers. */
2825 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
2826 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
2827
2828 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
2829 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
2830 Assert(iBitNo1 != iBitNo);
2831
2832 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
2833 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
2834 Assert(iBitNo2 != iBitNo);
2835 Assert(iBitNo2 != iBitNo1);
2836
2837#ifdef RT_ARCH_AMD64
2838 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl); /* This must come before we jump anywhere! */
2839#elif defined(RT_ARCH_ARM64)
2840 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2841#endif
2842
2843 /* Check for the lone bit first. */
2844 if (!fInverted)
2845 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
2846 else
2847 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
2848
2849 /* Then extract and compare the other two bits. */
2850#ifdef RT_ARCH_AMD64
2851 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
2852 if (iBitNo1 > iBitNo2)
2853 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
2854 else
2855 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
2856 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
2857
2858#elif defined(RT_ARCH_ARM64)
2859 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2860
2861 /* and tmpreg, eflreg, #1<<iBitNo1 */
2862 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
2863
2864 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
2865 if (iBitNo1 > iBitNo2)
2866 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
2867 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
2868 else
2869 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
2870 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
2871
2872 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2873
2874#else
2875# error "Port me"
2876#endif
2877
2878 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
2879 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
2880 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
2881
2882 /* Free but don't flush the EFlags and tmp registers. */
2883 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2884 iemNativeRegFreeTmp(pReNative, idxEflReg);
2885
2886 /* Make a copy of the core state now as we start the if-block. */
2887 iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
2888
2889 return off;
2890}
2891
2892
2893#define IEM_MC_IF_CX_IS_NZ() \
2894 off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
2895 do {
2896
2897/** Emits code for IEM_MC_IF_CX_IS_NZ. */
2898DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2899{
2900 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2901
2902 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
2903 kIemNativeGstRegUse_ReadOnly);
2904 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
2905 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
2906
2907 iemNativeCondStartIfBlock(pReNative, off);
2908 return off;
2909}
2910
2911
2912#define IEM_MC_IF_ECX_IS_NZ() \
2913 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
2914 do {
2915
2916#define IEM_MC_IF_RCX_IS_NZ() \
2917 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
2918 do {
2919
2920/** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
2921DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
2922{
2923 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2924
2925 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
2926 kIemNativeGstRegUse_ReadOnly);
2927 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
2928 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
2929
2930 iemNativeCondStartIfBlock(pReNative, off);
2931 return off;
2932}
2933
2934
2935#define IEM_MC_IF_CX_IS_NOT_ONE() \
2936 off = iemNativeEmitIfCxIsNotOne(pReNative, off); \
2937 do {
2938
2939/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE. */
2940DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2941{
2942 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2943
2944 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
2945 kIemNativeGstRegUse_ReadOnly);
2946#ifdef RT_ARCH_AMD64
2947 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
2948#else
2949 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2950 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
2951 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2952#endif
2953 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
2954
2955 iemNativeCondStartIfBlock(pReNative, off);
2956 return off;
2957}
2958
2959
2960#define IEM_MC_IF_ECX_IS_NOT_ONE() \
2961 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, false /*f64Bit*/); \
2962 do {
2963
2964#define IEM_MC_IF_RCX_IS_NOT_ONE() \
2965 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, true /*f64Bit*/); \
2966 do {
2967
2968/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE and IEM_MC_IF_RCX_IS_NOT_ONE. */
2969DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
2970{
2971 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2972
2973 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
2974 kIemNativeGstRegUse_ReadOnly);
2975 if (f64Bit)
2976 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
2977 else
2978 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
2979 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
2980
2981 iemNativeCondStartIfBlock(pReNative, off);
2982 return off;
2983}
2984
2985
2986#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
2987 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/); \
2988 do {
2989
2990#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
2991 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/); \
2992 do {
2993
2994/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET and
2995 * IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
2996DECL_INLINE_THROW(uint32_t)
2997iemNativeEmitIfCxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl, bool fCheckIfSet)
2998{
2999 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
3000 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
3001
3002 /* We have to load both RCX and EFLAGS before we can start branching,
3003 otherwise we'll end up in the else-block with an inconsistent
3004 register allocator state.
3005 Doing EFLAGS first as it's more likely to be loaded, right? */
3006 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
3007 kIemNativeGstRegUse_ReadOnly);
3008 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3009 kIemNativeGstRegUse_ReadOnly);
3010
3011 /** @todo we could reduce this to a single branch instruction by spending a
3012 * temporary register and some setnz stuff. Not sure if loops are
3013 * worth it. */
3014 /* Check CX. */
3015#ifdef RT_ARCH_AMD64
3016 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3017#else
3018 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3019 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
3020 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3021#endif
3022
3023 /* Check the EFlags bit. */
3024 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
3025 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
3026 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
3027 !fCheckIfSet /*fJmpIfSet*/);
3028
3029 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3030 iemNativeRegFreeTmp(pReNative, idxEflReg);
3031
3032 iemNativeCondStartIfBlock(pReNative, off);
3033 return off;
3034}
3035
3036
3037#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
3038 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, false /*f64Bit*/); \
3039 do {
3040
3041#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3042 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, false /*f64Bit*/); \
3043 do {
3044
3045#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
3046 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, true /*f64Bit*/); \
3047 do {
3048
3049#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3050 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, true /*f64Bit*/); \
3051 do {
3052
3053/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET,
3054 * IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET,
3055 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET and
3056 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
3057DECL_INLINE_THROW(uint32_t)
3058iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3059 uint32_t fBitInEfl, bool fCheckIfSet, bool f64Bit)
3060{
3061 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
3062 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
3063
3064 /* We have to load both RCX and EFLAGS before we can start branching,
3065 otherwise we'll end up in the else-block with an inconsistent
3066 register allocator state.
3067 Doing EFLAGS first as it's more likely to be loaded, right? */
3068 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
3069 kIemNativeGstRegUse_ReadOnly);
3070 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3071 kIemNativeGstRegUse_ReadOnly);
3072
3073 /** @todo we could reduce this to a single branch instruction by spending a
3074 * temporary register and some setnz stuff. Not sure if loops are
3075 * worth it. */
3076 /* Check RCX/ECX. */
3077 if (f64Bit)
3078 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3079 else
3080 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3081
3082 /* Check the EFlags bit. */
3083 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
3084 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
3085 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
3086 !fCheckIfSet /*fJmpIfSet*/);
3087
3088 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3089 iemNativeRegFreeTmp(pReNative, idxEflReg);
3090
3091 iemNativeCondStartIfBlock(pReNative, off);
3092 return off;
3093}
3094
3095
3096#define IEM_MC_IF_LOCAL_IS_Z(a_Local) \
3097 off = iemNativeEmitIfLocalIsZ(pReNative, off, a_Local); \
3098 do {
3099
3100/** Emits code for IEM_MC_IF_LOCAL_IS_Z. */
3101DECL_INLINE_THROW(uint32_t)
3102iemNativeEmitIfLocalIsZ(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarLocal)
3103{
3104 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
3105
3106 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarLocal);
3107 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarLocal)];
3108 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
3109 AssertStmt(pVarRc->cbVar == sizeof(int32_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
3110
3111 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarLocal, &off);
3112
3113 off = iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(pReNative, off, idxReg, false /*f64Bit*/, pEntry->idxLabelElse);
3114
3115 iemNativeVarRegisterRelease(pReNative, idxVarLocal);
3116
3117 iemNativeCondStartIfBlock(pReNative, off);
3118 return off;
3119}
3120
3121
3122#define IEM_MC_IF_GREG_BIT_SET(a_iGReg, a_iBitNo) \
3123 off = iemNativeEmitIfGregBitSet(pReNative, off, a_iGReg, a_iBitNo); \
3124 do {
3125
3126/** Emits code for IEM_MC_IF_GREG_BIT_SET. */
3127DECL_INLINE_THROW(uint32_t)
3128iemNativeEmitIfGregBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t iBitNo)
3129{
3130 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
3131 Assert(iGReg < 16);
3132
3133 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3134 kIemNativeGstRegUse_ReadOnly);
3135
3136 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxGstFullReg, iBitNo, pEntry->idxLabelElse);
3137
3138 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3139
3140 iemNativeCondStartIfBlock(pReNative, off);
3141 return off;
3142}
3143
3144
3145
3146/*********************************************************************************************************************************
3147* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
3148*********************************************************************************************************************************/
3149
3150#define IEM_MC_NOREF(a_Name) \
3151 RT_NOREF_PV(a_Name)
3152
3153#define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
3154 uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
3155
3156#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
3157 uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
3158
3159#define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_Local, a_iArg) \
3160 uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
3161
3162#define IEM_MC_LOCAL(a_Type, a_Name) \
3163 uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
3164
3165#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
3166 uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
3167
3168#define IEM_MC_LOCAL_ASSIGN(a_Type, a_Name, a_Value) \
3169 uint8_t const a_Name = iemNativeVarAllocAssign(pReNative, &off, sizeof(a_Type), (a_Value))
3170
3171
3172/**
3173 * Sets the host register for @a idxVarRc to @a idxReg.
3174 *
3175 * The register must not be allocated. Any guest register shadowing will be
3176 * implictly dropped by this call.
3177 *
3178 * The variable must not have any register associated with it (causes
3179 * VERR_IEM_VAR_IPE_10 to be raised). Conversion to a stack variable is
3180 * implied.
3181 *
3182 * @returns idxReg
3183 * @param pReNative The recompiler state.
3184 * @param idxVar The variable.
3185 * @param idxReg The host register (typically IEMNATIVE_CALL_RET_GREG).
3186 * @param off For recording in debug info.
3187 *
3188 * @throws VERR_IEM_VAR_IPE_10, VERR_IEM_VAR_IPE_11
3189 */
3190DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off)
3191{
3192 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3193 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3194 Assert(!pVar->fRegAcquired);
3195 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3196 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_10));
3197 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_11));
3198
3199 iemNativeRegClearGstRegShadowing(pReNative, idxReg, off);
3200 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
3201
3202 iemNativeVarSetKindToStack(pReNative, idxVar);
3203 pVar->idxReg = idxReg;
3204
3205 return idxReg;
3206}
3207
3208
3209/**
3210 * A convenient helper function.
3211 */
3212DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSetAndAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
3213 uint8_t idxReg, uint32_t *poff)
3214{
3215 idxReg = iemNativeVarRegisterSet(pReNative, idxVar, idxReg, *poff);
3216 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fRegAcquired = true;
3217 return idxReg;
3218}
3219
3220
3221/**
3222 * This is called by IEM_MC_END() to clean up all variables.
3223 */
3224DECL_FORCE_INLINE(void) iemNativeVarFreeAll(PIEMRECOMPILERSTATE pReNative)
3225{
3226 uint32_t const bmVars = pReNative->Core.bmVars;
3227 if (bmVars != 0)
3228 iemNativeVarFreeAllSlow(pReNative, bmVars);
3229 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
3230 Assert(pReNative->Core.bmStack == 0);
3231}
3232
3233
3234#define IEM_MC_FREE_LOCAL(a_Name) iemNativeVarFreeLocal(pReNative, a_Name)
3235
3236/**
3237 * This is called by IEM_MC_FREE_LOCAL.
3238 */
3239DECLINLINE(void) iemNativeVarFreeLocal(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
3240{
3241 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3242 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo == UINT8_MAX);
3243 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
3244}
3245
3246
3247#define IEM_MC_FREE_ARG(a_Name) iemNativeVarFreeArg(pReNative, a_Name)
3248
3249/**
3250 * This is called by IEM_MC_FREE_ARG.
3251 */
3252DECLINLINE(void) iemNativeVarFreeArg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
3253{
3254 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3255 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars));
3256 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
3257}
3258
3259
3260#define IEM_MC_ASSIGN_TO_SMALLER(a_VarDst, a_VarSrcEol) off = iemNativeVarAssignToSmaller(pReNative, off, a_VarDst, a_VarSrcEol)
3261
3262/**
3263 * This is called by IEM_MC_ASSIGN_TO_SMALLER.
3264 */
3265DECL_INLINE_THROW(uint32_t)
3266iemNativeVarAssignToSmaller(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarDst, uint8_t idxVarSrc)
3267{
3268 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarDst);
3269 PIEMNATIVEVAR const pVarDst = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarDst)];
3270 AssertStmt(pVarDst->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
3271 Assert( pVarDst->cbVar == sizeof(uint16_t)
3272 || pVarDst->cbVar == sizeof(uint32_t));
3273
3274 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarSrc);
3275 PIEMNATIVEVAR const pVarSrc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarSrc)];
3276 AssertStmt( pVarSrc->enmKind == kIemNativeVarKind_Stack
3277 || pVarSrc->enmKind == kIemNativeVarKind_Immediate,
3278 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
3279
3280 Assert(pVarDst->cbVar < pVarSrc->cbVar);
3281
3282 /*
3283 * Special case for immediates.
3284 */
3285 if (pVarSrc->enmKind == kIemNativeVarKind_Immediate)
3286 {
3287 switch (pVarDst->cbVar)
3288 {
3289 case sizeof(uint16_t):
3290 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint16_t)pVarSrc->u.uValue);
3291 break;
3292 case sizeof(uint32_t):
3293 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint32_t)pVarSrc->u.uValue);
3294 break;
3295 default: AssertFailed(); break;
3296 }
3297 }
3298 else
3299 {
3300 /*
3301 * The generic solution for now.
3302 */
3303 /** @todo optimize this by having the python script make sure the source
3304 * variable passed to IEM_MC_ASSIGN_TO_SMALLER is not used after the
3305 * statement. Then we could just transfer the register assignments. */
3306 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off);
3307 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off);
3308 switch (pVarDst->cbVar)
3309 {
3310 case sizeof(uint16_t):
3311 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegDst, idxRegSrc);
3312 break;
3313 case sizeof(uint32_t):
3314 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegDst, idxRegSrc);
3315 break;
3316 default: AssertFailed(); break;
3317 }
3318 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
3319 iemNativeVarRegisterRelease(pReNative, idxVarDst);
3320 }
3321 return off;
3322}
3323
3324
3325
3326/*********************************************************************************************************************************
3327* Emitters for IEM_MC_CALL_CIMPL_XXX *
3328*********************************************************************************************************************************/
3329
3330/** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
3331DECL_INLINE_THROW(uint32_t)
3332iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
3333 uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cArgs)
3334
3335{
3336 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
3337
3338#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3339 /* Clear the appropriate IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_XXX flags
3340 when a calls clobber any of the relevant control registers. */
3341# if 1
3342 if (!(fGstShwFlush & (RT_BIT_64(kIemNativeGstReg_Cr0) | RT_BIT_64(kIemNativeGstReg_Cr4) | RT_BIT_64(kIemNativeGstReg_Xcr0))))
3343 {
3344 /* Likely as long as call+ret are done via cimpl. */
3345 Assert( /*pfnCImpl != (uintptr_t)iemCImpl_mov_Cd_Rd && pfnCImpl != (uintptr_t)iemCImpl_xsetbv
3346 &&*/ pfnCImpl != (uintptr_t)iemCImpl_lmsw && pfnCImpl != (uintptr_t)iemCImpl_clts);
3347 }
3348 else if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_Xcr0))
3349 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
3350 else if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_Cr4))
3351 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
3352 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE);
3353 else
3354 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
3355 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
3356 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
3357
3358# else
3359 if (pfnCImpl == (uintptr_t)iemCImpl_xsetbv) /* Modifies xcr0 which only the AVX check uses. */
3360 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
3361 else if (pfnCImpl == (uintptr_t)iemCImpl_mov_Cd_Rd) /* Can modify cr4 which all checks use. */
3362 pReNative->fSimdRaiseXcptChecksEmitted = 0;
3363 else if ( pfnCImpl == (uintptr_t)iemCImpl_FarJmp
3364 || pfnCImpl == (uintptr_t)iemCImpl_callf
3365 || pfnCImpl == (uintptr_t)iemCImpl_lmsw
3366 || pfnCImpl == (uintptr_t)iemCImpl_clts) /* Will only modify cr0 */
3367 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
3368 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
3369 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
3370# endif
3371
3372# ifdef IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS
3373 /* Mark the host floating point control register as not synced if MXCSR is modified. */
3374 if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_MxCsr))
3375 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SYNCED;
3376# endif
3377#endif
3378
3379 /*
3380 * Do all the call setup and cleanup.
3381 */
3382 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
3383
3384 /*
3385 * Load the two or three hidden arguments.
3386 */
3387#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
3388 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
3389 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3390 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
3391#else
3392 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3393 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
3394#endif
3395
3396 /*
3397 * Make the call and check the return code.
3398 *
3399 * Shadow PC copies are always flushed here, other stuff depends on flags.
3400 * Segment and general purpose registers are explictily flushed via the
3401 * IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG and IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG
3402 * macros.
3403 */
3404 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
3405#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
3406 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
3407#endif
3408 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, fGstShwFlush | RT_BIT_64(kIemNativeGstReg_Pc));
3409 if (!(pReNative->fMc & IEM_MC_F_WITHOUT_FLAGS)) /** @todo We don't emit with-flags/without-flags variations for CIMPL calls. */
3410 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
3411 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
3412
3413 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
3414}
3415
3416
3417#define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
3418 off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0)
3419
3420/** Emits code for IEM_MC_CALL_CIMPL_1. */
3421DECL_INLINE_THROW(uint32_t)
3422iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
3423 uintptr_t pfnCImpl, uint8_t idxArg0)
3424{
3425 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
3426 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 1);
3427}
3428
3429
3430#define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
3431 off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0, a1)
3432
3433/** Emits code for IEM_MC_CALL_CIMPL_2. */
3434DECL_INLINE_THROW(uint32_t)
3435iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
3436 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
3437{
3438 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
3439 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
3440 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 2);
3441}
3442
3443
3444#define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
3445 off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
3446 (uintptr_t)a_pfnCImpl, a0, a1, a2)
3447
3448/** Emits code for IEM_MC_CALL_CIMPL_3. */
3449DECL_INLINE_THROW(uint32_t)
3450iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
3451 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
3452{
3453 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
3454 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
3455 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
3456 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 3);
3457}
3458
3459
3460#define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3) \
3461 off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
3462 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
3463
3464/** Emits code for IEM_MC_CALL_CIMPL_4. */
3465DECL_INLINE_THROW(uint32_t)
3466iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
3467 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
3468{
3469 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
3470 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
3471 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
3472 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
3473 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 4);
3474}
3475
3476
3477#define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3, a4) \
3478 off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
3479 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
3480
3481/** Emits code for IEM_MC_CALL_CIMPL_4. */
3482DECL_INLINE_THROW(uint32_t)
3483iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
3484 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
3485{
3486 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
3487 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
3488 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
3489 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
3490 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg4, 4 + IEM_CIMPL_HIDDEN_ARGS);
3491 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 5);
3492}
3493
3494
3495/** Recompiler debugging: Flush guest register shadow copies. */
3496#define IEM_MC_HINT_FLUSH_GUEST_SHADOW(g_fGstShwFlush) iemNativeRegFlushGuestShadows(pReNative, g_fGstShwFlush)
3497
3498
3499
3500/*********************************************************************************************************************************
3501* Emitters for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX *
3502*********************************************************************************************************************************/
3503
3504/**
3505 * Common worker for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX.
3506 */
3507DECL_INLINE_THROW(uint32_t)
3508iemNativeEmitCallAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
3509 uintptr_t pfnAImpl, uint8_t cArgs)
3510{
3511 if (idxVarRc != UINT8_MAX)
3512 {
3513 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRc);
3514 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarRc)];
3515 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
3516 AssertStmt(pVarRc->cbVar <= sizeof(uint64_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
3517 }
3518
3519 /*
3520 * Do all the call setup and cleanup.
3521 *
3522 * It is only required to flush pending guest register writes in call volatile registers as
3523 * assembly helpers can't throw and don't access anything living in CPUMCTX, they only
3524 * access parameters. The flushing of call volatile registers is always done in iemNativeEmitCallCommon()
3525 * no matter the fFlushPendingWrites parameter.
3526 */
3527 off = iemNativeEmitCallCommon(pReNative, off, cArgs, 0 /*cHiddenArgs*/, false /*fFlushPendingWrites*/);
3528
3529 /*
3530 * Make the call and update the return code variable if we've got one.
3531 */
3532 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
3533 if (idxVarRc != UINT8_MAX)
3534 iemNativeVarRegisterSet(pReNative, idxVarRc, IEMNATIVE_CALL_RET_GREG, off);
3535
3536 return off;
3537}
3538
3539
3540
3541#define IEM_MC_CALL_VOID_AIMPL_0(a_pfn) \
3542 off = iemNativeEmitCallAImpl0(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn))
3543
3544#define IEM_MC_CALL_AIMPL_0(a_rc, a_pfn) \
3545 off = iemNativeEmitCallAImpl0(pReNative, off, a_rc, (uintptr_t)(a_pfn))
3546
3547/** Emits code for IEM_MC_CALL_VOID_AIMPL_0 and IEM_MC_CALL_AIMPL_0. */
3548DECL_INLINE_THROW(uint32_t)
3549iemNativeEmitCallAImpl0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl)
3550{
3551 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 0);
3552}
3553
3554
3555#define IEM_MC_CALL_VOID_AIMPL_1(a_pfn, a0) \
3556 off = iemNativeEmitCallAImpl1(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0)
3557
3558#define IEM_MC_CALL_AIMPL_1(a_rc, a_pfn, a0) \
3559 off = iemNativeEmitCallAImpl1(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0)
3560
3561/** Emits code for IEM_MC_CALL_VOID_AIMPL_1 and IEM_MC_CALL_AIMPL_1. */
3562DECL_INLINE_THROW(uint32_t)
3563iemNativeEmitCallAImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl, uint8_t idxArg0)
3564{
3565 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
3566 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 1);
3567}
3568
3569
3570#define IEM_MC_CALL_VOID_AIMPL_2(a_pfn, a0, a1) \
3571 off = iemNativeEmitCallAImpl2(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1)
3572
3573#define IEM_MC_CALL_AIMPL_2(a_rc, a_pfn, a0, a1) \
3574 off = iemNativeEmitCallAImpl2(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1)
3575
3576/** Emits code for IEM_MC_CALL_VOID_AIMPL_2 and IEM_MC_CALL_AIMPL_2. */
3577DECL_INLINE_THROW(uint32_t)
3578iemNativeEmitCallAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
3579 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
3580{
3581 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
3582 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
3583 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 2);
3584}
3585
3586
3587#define IEM_MC_CALL_VOID_AIMPL_3(a_pfn, a0, a1, a2) \
3588 off = iemNativeEmitCallAImpl3(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2)
3589
3590#define IEM_MC_CALL_AIMPL_3(a_rcType, a_rc, a_pfn, a0, a1, a2) \
3591 IEM_MC_LOCAL(a_rcType, a_rc); \
3592 off = iemNativeEmitCallAImpl3(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2)
3593
3594/** Emits code for IEM_MC_CALL_VOID_AIMPL_3 and IEM_MC_CALL_AIMPL_3. */
3595DECL_INLINE_THROW(uint32_t)
3596iemNativeEmitCallAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
3597 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
3598{
3599 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
3600 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
3601 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
3602 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 3);
3603}
3604
3605
3606#define IEM_MC_CALL_VOID_AIMPL_4(a_pfn, a0, a1, a2, a3) \
3607 off = iemNativeEmitCallAImpl4(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2, a3)
3608
3609#define IEM_MC_CALL_AIMPL_4(a_rcType, a_rc, a_pfn, a0, a1, a2, a3) \
3610 IEM_MC_LOCAL(a_rcType, a_rc); \
3611 off = iemNativeEmitCallAImpl4(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2, a3)
3612
3613/** Emits code for IEM_MC_CALL_VOID_AIMPL_4 and IEM_MC_CALL_AIMPL_4. */
3614DECL_INLINE_THROW(uint32_t)
3615iemNativeEmitCallAImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
3616 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
3617{
3618 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
3619 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
3620 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
3621 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3);
3622 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 4);
3623}
3624
3625
3626
3627/*********************************************************************************************************************************
3628* Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX). *
3629*********************************************************************************************************************************/
3630
3631#define IEM_MC_FETCH_GREG_U8_THREADED(a_u8Dst, a_iGRegEx) \
3632 off = iemNativeEmitFetchGregU8(pReNative, off, a_u8Dst, a_iGRegEx, sizeof(uint8_t) /*cbZeroExtended*/)
3633
3634#define IEM_MC_FETCH_GREG_U8_ZX_U16_THREADED(a_u16Dst, a_iGRegEx) \
3635 off = iemNativeEmitFetchGregU8(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t) /*cbZeroExtended*/)
3636
3637#define IEM_MC_FETCH_GREG_U8_ZX_U32_THREADED(a_u32Dst, a_iGRegEx) \
3638 off = iemNativeEmitFetchGregU8(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t) /*cbZeroExtended*/)
3639
3640#define IEM_MC_FETCH_GREG_U8_ZX_U64_THREADED(a_u64Dst, a_iGRegEx) \
3641 off = iemNativeEmitFetchGregU8(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t) /*cbZeroExtended*/)
3642
3643
3644/** Emits code for IEM_MC_FETCH_GREG_U8_THREADED and
3645 * IEM_MC_FETCH_GREG_U8_ZX_U16/32/64_THREADED. */
3646DECL_INLINE_THROW(uint32_t)
3647iemNativeEmitFetchGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, int8_t cbZeroExtended)
3648{
3649 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3650 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
3651 Assert(iGRegEx < 20);
3652
3653 /* Same discussion as in iemNativeEmitFetchGregU16 */
3654 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
3655 kIemNativeGstRegUse_ReadOnly);
3656
3657 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3658 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3659
3660 /* The value is zero-extended to the full 64-bit host register width. */
3661 if (iGRegEx < 16)
3662 off = iemNativeEmitLoadGprFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
3663 else
3664 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
3665
3666 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3667 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3668 return off;
3669}
3670
3671
3672#define IEM_MC_FETCH_GREG_U8_SX_U16_THREADED(a_u16Dst, a_iGRegEx) \
3673 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t))
3674
3675#define IEM_MC_FETCH_GREG_U8_SX_U32_THREADED(a_u32Dst, a_iGRegEx) \
3676 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t))
3677
3678#define IEM_MC_FETCH_GREG_U8_SX_U64_THREADED(a_u64Dst, a_iGRegEx) \
3679 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t))
3680
3681/** Emits code for IEM_MC_FETCH_GREG_U8_SX_U16/32/64_THREADED. */
3682DECL_INLINE_THROW(uint32_t)
3683iemNativeEmitFetchGregU8Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, uint8_t cbSignExtended)
3684{
3685 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3686 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
3687 Assert(iGRegEx < 20);
3688
3689 /* Same discussion as in iemNativeEmitFetchGregU16 */
3690 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
3691 kIemNativeGstRegUse_ReadOnly);
3692
3693 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3694 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3695
3696 if (iGRegEx < 16)
3697 {
3698 switch (cbSignExtended)
3699 {
3700 case sizeof(uint16_t):
3701 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
3702 break;
3703 case sizeof(uint32_t):
3704 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
3705 break;
3706 case sizeof(uint64_t):
3707 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
3708 break;
3709 default: AssertFailed(); break;
3710 }
3711 }
3712 else
3713 {
3714 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
3715 switch (cbSignExtended)
3716 {
3717 case sizeof(uint16_t):
3718 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
3719 break;
3720 case sizeof(uint32_t):
3721 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
3722 break;
3723 case sizeof(uint64_t):
3724 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
3725 break;
3726 default: AssertFailed(); break;
3727 }
3728 }
3729
3730 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3731 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3732 return off;
3733}
3734
3735
3736
3737#define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
3738 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint16_t))
3739
3740#define IEM_MC_FETCH_GREG_U16_ZX_U32(a_u16Dst, a_iGReg) \
3741 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
3742
3743#define IEM_MC_FETCH_GREG_U16_ZX_U64(a_u16Dst, a_iGReg) \
3744 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
3745
3746/** Emits code for IEM_MC_FETCH_GREG_U16 and IEM_MC_FETCH_GREG_U16_ZX_U32/64. */
3747DECL_INLINE_THROW(uint32_t)
3748iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
3749{
3750 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3751 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
3752 Assert(iGReg < 16);
3753
3754 /*
3755 * We can either just load the low 16-bit of the GPR into a host register
3756 * for the variable, or we can do so via a shadow copy host register. The
3757 * latter will avoid having to reload it if it's being stored later, but
3758 * will waste a host register if it isn't touched again. Since we don't
3759 * know what going to happen, we choose the latter for now.
3760 */
3761 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3762 kIemNativeGstRegUse_ReadOnly);
3763
3764 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3765 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3766 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
3767 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3768
3769 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3770 return off;
3771}
3772
3773#define IEM_MC_FETCH_GREG_I16(a_i16Dst, a_iGReg) \
3774 off = iemNativeEmitFetchGregI16(pReNative, off, a_i16Dst, a_iGReg)
3775
3776/** Emits code for IEM_MC_FETCH_GREG_I16. */
3777DECL_INLINE_THROW(uint32_t)
3778iemNativeEmitFetchGregI16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
3779{
3780 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3781 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(int16_t));
3782 Assert(iGReg < 16);
3783
3784 /*
3785 * We can either just load the low 16-bit of the GPR into a host register
3786 * for the variable, or we can do so via a shadow copy host register. The
3787 * latter will avoid having to reload it if it's being stored later, but
3788 * will waste a host register if it isn't touched again. Since we don't
3789 * know what going to happen, we choose the latter for now.
3790 */
3791 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3792 kIemNativeGstRegUse_ReadOnly);
3793
3794 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3795 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3796#ifdef RT_ARCH_AMD64
3797 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
3798#elif defined(RT_ARCH_ARM64) /* Note! There are no 16-bit registers on ARM, we emulate that through 32-bit registers which requires sign extension. */
3799 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
3800#endif
3801 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3802
3803 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3804 return off;
3805}
3806
3807
3808#define IEM_MC_FETCH_GREG_U16_SX_U32(a_u16Dst, a_iGReg) \
3809 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
3810
3811#define IEM_MC_FETCH_GREG_U16_SX_U64(a_u16Dst, a_iGReg) \
3812 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
3813
3814/** Emits code for IEM_MC_FETCH_GREG_U16_SX_U32/64. */
3815DECL_INLINE_THROW(uint32_t)
3816iemNativeEmitFetchGregU16Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbSignExtended)
3817{
3818 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3819 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
3820 Assert(iGReg < 16);
3821
3822 /*
3823 * We can either just load the low 16-bit of the GPR into a host register
3824 * for the variable, or we can do so via a shadow copy host register. The
3825 * latter will avoid having to reload it if it's being stored later, but
3826 * will waste a host register if it isn't touched again. Since we don't
3827 * know what going to happen, we choose the latter for now.
3828 */
3829 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3830 kIemNativeGstRegUse_ReadOnly);
3831
3832 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3833 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3834 if (cbSignExtended == sizeof(uint32_t))
3835 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
3836 else
3837 {
3838 Assert(cbSignExtended == sizeof(uint64_t));
3839 off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
3840 }
3841 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3842
3843 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3844 return off;
3845}
3846
3847
3848#define IEM_MC_FETCH_GREG_I32(a_i32Dst, a_iGReg) \
3849 off = iemNativeEmitFetchGregU32(pReNative, off, a_i32Dst, a_iGReg, sizeof(uint32_t))
3850
3851#define IEM_MC_FETCH_GREG_U32(a_u32Dst, a_iGReg) \
3852 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint32_t))
3853
3854#define IEM_MC_FETCH_GREG_U32_ZX_U64(a_u32Dst, a_iGReg) \
3855 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint64_t))
3856
3857/** Emits code for IEM_MC_FETCH_GREG_U32. */
3858DECL_INLINE_THROW(uint32_t)
3859iemNativeEmitFetchGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
3860{
3861 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3862 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
3863 Assert(iGReg < 16);
3864
3865 /*
3866 * We can either just load the low 16-bit of the GPR into a host register
3867 * for the variable, or we can do so via a shadow copy host register. The
3868 * latter will avoid having to reload it if it's being stored later, but
3869 * will waste a host register if it isn't touched again. Since we don't
3870 * know what going to happen, we choose the latter for now.
3871 */
3872 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3873 kIemNativeGstRegUse_ReadOnly);
3874
3875 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3876 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3877 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
3878 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3879
3880 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3881 return off;
3882}
3883
3884
3885#define IEM_MC_FETCH_GREG_U32_SX_U64(a_u32Dst, a_iGReg) \
3886 off = iemNativeEmitFetchGregU32SxU64(pReNative, off, a_u32Dst, a_iGReg)
3887
3888/** Emits code for IEM_MC_FETCH_GREG_U32. */
3889DECL_INLINE_THROW(uint32_t)
3890iemNativeEmitFetchGregU32SxU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
3891{
3892 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3893 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
3894 Assert(iGReg < 16);
3895
3896 /*
3897 * We can either just load the low 32-bit of the GPR into a host register
3898 * for the variable, or we can do so via a shadow copy host register. The
3899 * latter will avoid having to reload it if it's being stored later, but
3900 * will waste a host register if it isn't touched again. Since we don't
3901 * know what going to happen, we choose the latter for now.
3902 */
3903 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3904 kIemNativeGstRegUse_ReadOnly);
3905
3906 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3907 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3908 off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
3909 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3910
3911 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3912 return off;
3913}
3914
3915
3916#define IEM_MC_FETCH_GREG_U64(a_u64Dst, a_iGReg) \
3917 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
3918
3919#define IEM_MC_FETCH_GREG_U64_ZX_U64(a_u64Dst, a_iGReg) \
3920 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
3921
3922/** Emits code for IEM_MC_FETCH_GREG_U64 (and the
3923 * IEM_MC_FETCH_GREG_U64_ZX_U64 alias). */
3924DECL_INLINE_THROW(uint32_t)
3925iemNativeEmitFetchGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
3926{
3927 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3928 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
3929 Assert(iGReg < 16);
3930
3931 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3932 kIemNativeGstRegUse_ReadOnly);
3933
3934 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3935 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3936 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxVarReg, idxGstFullReg);
3937 /** @todo name the register a shadow one already? */
3938 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3939
3940 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3941 return off;
3942}
3943
3944
3945#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3946#define IEM_MC_FETCH_GREG_PAIR_U64(a_u128Dst, a_iGRegLo, a_iGRegHi) \
3947 off = iemNativeEmitFetchGregPairU64(pReNative, off, a_u128Dst, a_iGRegLo, a_iGRegHi)
3948
3949/** Emits code for IEM_MC_FETCH_GREG_PAIR_U64. */
3950DECL_INLINE_THROW(uint32_t)
3951iemNativeEmitFetchGregPairU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegLo, uint8_t iGRegHi)
3952{
3953 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3954 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
3955 Assert(iGRegLo < 16 && iGRegHi < 16);
3956
3957 uint8_t const idxGstFullRegLo = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegLo),
3958 kIemNativeGstRegUse_ReadOnly);
3959 uint8_t const idxGstFullRegHi = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegHi),
3960 kIemNativeGstRegUse_ReadOnly);
3961
3962 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3963 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
3964 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxVarReg, idxGstFullRegLo, 0);
3965 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxVarReg, idxGstFullRegHi, 1);
3966
3967 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
3968 iemNativeRegFreeTmp(pReNative, idxGstFullRegLo);
3969 iemNativeRegFreeTmp(pReNative, idxGstFullRegHi);
3970 return off;
3971}
3972#endif
3973
3974
3975/*********************************************************************************************************************************
3976* Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX). *
3977*********************************************************************************************************************************/
3978
3979#define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
3980 off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
3981
3982/** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
3983DECL_INLINE_THROW(uint32_t)
3984iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
3985{
3986 Assert(iGRegEx < 20);
3987 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
3988 kIemNativeGstRegUse_ForUpdate);
3989#ifdef RT_ARCH_AMD64
3990 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
3991
3992 /* To the lowest byte of the register: mov r8, imm8 */
3993 if (iGRegEx < 16)
3994 {
3995 if (idxGstTmpReg >= 8)
3996 pbCodeBuf[off++] = X86_OP_REX_B;
3997 else if (idxGstTmpReg >= 4)
3998 pbCodeBuf[off++] = X86_OP_REX;
3999 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
4000 pbCodeBuf[off++] = u8Value;
4001 }
4002 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
4003 else if (idxGstTmpReg < 4)
4004 {
4005 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
4006 pbCodeBuf[off++] = u8Value;
4007 }
4008 else
4009 {
4010 /* ror reg64, 8 */
4011 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4012 pbCodeBuf[off++] = 0xc1;
4013 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4014 pbCodeBuf[off++] = 8;
4015
4016 /* mov reg8, imm8 */
4017 if (idxGstTmpReg >= 8)
4018 pbCodeBuf[off++] = X86_OP_REX_B;
4019 else if (idxGstTmpReg >= 4)
4020 pbCodeBuf[off++] = X86_OP_REX;
4021 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
4022 pbCodeBuf[off++] = u8Value;
4023
4024 /* rol reg64, 8 */
4025 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4026 pbCodeBuf[off++] = 0xc1;
4027 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4028 pbCodeBuf[off++] = 8;
4029 }
4030
4031#elif defined(RT_ARCH_ARM64)
4032 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
4033 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4034 if (iGRegEx < 16)
4035 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
4036 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
4037 else
4038 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
4039 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
4040 iemNativeRegFreeTmp(pReNative, idxImmReg);
4041
4042#else
4043# error "Port me!"
4044#endif
4045
4046 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4047
4048#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4049 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
4050#endif
4051
4052 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4053 return off;
4054}
4055
4056
4057#define IEM_MC_STORE_GREG_U8_THREADED(a_iGRegEx, a_u8Value) \
4058 off = iemNativeEmitStoreGregU8(pReNative, off, a_iGRegEx, a_u8Value)
4059
4060/** Emits code for IEM_MC_STORE_GREG_U8_THREADED. */
4061DECL_INLINE_THROW(uint32_t)
4062iemNativeEmitStoreGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t idxValueVar)
4063{
4064 Assert(iGRegEx < 20);
4065 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
4066
4067 /*
4068 * If it's a constant value (unlikely) we treat this as a
4069 * IEM_MC_STORE_GREG_U8_CONST statement.
4070 */
4071 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
4072 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
4073 { /* likely */ }
4074 else
4075 {
4076 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
4077 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4078 return iemNativeEmitStoreGregU8Const(pReNative, off, iGRegEx, (uint8_t)pValueVar->u.uValue);
4079 }
4080
4081 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
4082 kIemNativeGstRegUse_ForUpdate);
4083 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
4084
4085#ifdef RT_ARCH_AMD64
4086 /* To the lowest byte of the register: mov reg8, reg8(r/m) */
4087 if (iGRegEx < 16)
4088 {
4089 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4090 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
4091 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
4092 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
4093 pbCodeBuf[off++] = X86_OP_REX;
4094 pbCodeBuf[off++] = 0x8a;
4095 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
4096 }
4097 /* Otherwise it's to ah, ch, dh or bh from al, cl, dl or bl: use mov r8, r8 if we can, otherwise, we rotate. */
4098 else if (idxGstTmpReg < 4 && idxVarReg < 4)
4099 {
4100 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2+1);
4101 pbCodeBuf[off++] = 0x8a;
4102 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg + 4, idxVarReg);
4103 }
4104 else
4105 {
4106 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
4107
4108 /* ror reg64, 8 */
4109 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4110 pbCodeBuf[off++] = 0xc1;
4111 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4112 pbCodeBuf[off++] = 8;
4113
4114 /* mov reg8, reg8(r/m) */
4115 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
4116 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
4117 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
4118 pbCodeBuf[off++] = X86_OP_REX;
4119 pbCodeBuf[off++] = 0x8a;
4120 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
4121
4122 /* rol reg64, 8 */
4123 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4124 pbCodeBuf[off++] = 0xc1;
4125 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4126 pbCodeBuf[off++] = 8;
4127 }
4128
4129#elif defined(RT_ARCH_ARM64)
4130 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 7:0.
4131 or
4132 bfi w1, w2, 8, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 15:8. */
4133 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4134 if (iGRegEx < 16)
4135 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 8);
4136 else
4137 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 8, 8);
4138
4139#else
4140# error "Port me!"
4141#endif
4142 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4143
4144 iemNativeVarRegisterRelease(pReNative, idxValueVar);
4145
4146#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4147 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
4148#endif
4149 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4150 return off;
4151}
4152
4153
4154
4155#define IEM_MC_STORE_GREG_U16_CONST(a_iGReg, a_u16Const) \
4156 off = iemNativeEmitStoreGregU16Const(pReNative, off, a_iGReg, a_u16Const)
4157
4158/** Emits code for IEM_MC_STORE_GREG_U16. */
4159DECL_INLINE_THROW(uint32_t)
4160iemNativeEmitStoreGregU16Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint16_t uValue)
4161{
4162 Assert(iGReg < 16);
4163 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4164 kIemNativeGstRegUse_ForUpdate);
4165#ifdef RT_ARCH_AMD64
4166 /* mov reg16, imm16 */
4167 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
4168 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4169 if (idxGstTmpReg >= 8)
4170 pbCodeBuf[off++] = X86_OP_REX_B;
4171 pbCodeBuf[off++] = 0xb8 + (idxGstTmpReg & 7);
4172 pbCodeBuf[off++] = RT_BYTE1(uValue);
4173 pbCodeBuf[off++] = RT_BYTE2(uValue);
4174
4175#elif defined(RT_ARCH_ARM64)
4176 /* movk xdst, #uValue, lsl #0 */
4177 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4178 pu32CodeBuf[off++] = Armv8A64MkInstrMovK(idxGstTmpReg, uValue);
4179
4180#else
4181# error "Port me!"
4182#endif
4183
4184 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4185
4186#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4187 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4188#endif
4189 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4190 return off;
4191}
4192
4193
4194#define IEM_MC_STORE_GREG_U16(a_iGReg, a_u16Value) \
4195 off = iemNativeEmitStoreGregU16(pReNative, off, a_iGReg, a_u16Value)
4196
4197/** Emits code for IEM_MC_STORE_GREG_U16. */
4198DECL_INLINE_THROW(uint32_t)
4199iemNativeEmitStoreGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
4200{
4201 Assert(iGReg < 16);
4202 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
4203
4204 /*
4205 * If it's a constant value (unlikely) we treat this as a
4206 * IEM_MC_STORE_GREG_U16_CONST statement.
4207 */
4208 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
4209 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
4210 { /* likely */ }
4211 else
4212 {
4213 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
4214 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4215 return iemNativeEmitStoreGregU16Const(pReNative, off, iGReg, (uint16_t)pValueVar->u.uValue);
4216 }
4217
4218 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4219 kIemNativeGstRegUse_ForUpdate);
4220
4221#ifdef RT_ARCH_AMD64
4222 /* mov reg16, reg16 or [mem16] */
4223 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
4224 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4225 if (pValueVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
4226 {
4227 if (idxGstTmpReg >= 8 || pValueVar->idxReg >= 8)
4228 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0)
4229 | (pValueVar->idxReg >= 8 ? X86_OP_REX_B : 0);
4230 pbCodeBuf[off++] = 0x8b;
4231 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, pValueVar->idxReg & 7);
4232 }
4233 else
4234 {
4235 uint8_t const idxStackSlot = pValueVar->idxStackSlot;
4236 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
4237 if (idxGstTmpReg >= 8)
4238 pbCodeBuf[off++] = X86_OP_REX_R;
4239 pbCodeBuf[off++] = 0x8b;
4240 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, idxGstTmpReg, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
4241 }
4242
4243#elif defined(RT_ARCH_ARM64)
4244 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
4245 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
4246 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4247 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 16);
4248 iemNativeVarRegisterRelease(pReNative, idxValueVar);
4249
4250#else
4251# error "Port me!"
4252#endif
4253
4254 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4255
4256#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4257 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4258#endif
4259 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4260 return off;
4261}
4262
4263
4264#define IEM_MC_STORE_GREG_U32_CONST(a_iGReg, a_u32Const) \
4265 off = iemNativeEmitStoreGregU32Const(pReNative, off, a_iGReg, a_u32Const)
4266
4267/** Emits code for IEM_MC_STORE_GREG_U32_CONST. */
4268DECL_INLINE_THROW(uint32_t)
4269iemNativeEmitStoreGregU32Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint32_t uValue)
4270{
4271 Assert(iGReg < 16);
4272 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4273 kIemNativeGstRegUse_ForFullWrite);
4274 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
4275#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4276 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4277#endif
4278 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4279 return off;
4280}
4281
4282
4283#define IEM_MC_STORE_GREG_U32(a_iGReg, a_u32Value) \
4284 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_u32Value)
4285
4286#define IEM_MC_STORE_GREG_I32(a_iGReg, a_i32Value) \
4287 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_i32Value)
4288
4289/** Emits code for IEM_MC_STORE_GREG_U32/IEM_MC_STORE_GREG_I32. */
4290DECL_INLINE_THROW(uint32_t)
4291iemNativeEmitStoreGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
4292{
4293 Assert(iGReg < 16);
4294 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
4295
4296 /*
4297 * If it's a constant value (unlikely) we treat this as a
4298 * IEM_MC_STORE_GREG_U32_CONST statement.
4299 */
4300 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
4301 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
4302 { /* likely */ }
4303 else
4304 {
4305 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
4306 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4307 return iemNativeEmitStoreGregU32Const(pReNative, off, iGReg, (uint32_t)pValueVar->u.uValue);
4308 }
4309
4310 /*
4311 * For the rest we allocate a guest register for the variable and writes
4312 * it to the CPUMCTX structure.
4313 */
4314 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
4315#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4316 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4317#else
4318 RT_NOREF(idxVarReg);
4319#endif
4320#ifdef VBOX_STRICT
4321 off = iemNativeEmitTop32BitsClearCheck(pReNative, off, idxVarReg);
4322#endif
4323 iemNativeVarRegisterRelease(pReNative, idxValueVar);
4324 return off;
4325}
4326
4327
4328#define IEM_MC_STORE_GREG_U64_CONST(a_iGReg, a_u64Const) \
4329 off = iemNativeEmitStoreGregU64Const(pReNative, off, a_iGReg, a_u64Const)
4330
4331/** Emits code for IEM_MC_STORE_GREG_U64_CONST. */
4332DECL_INLINE_THROW(uint32_t)
4333iemNativeEmitStoreGregU64Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uValue)
4334{
4335 Assert(iGReg < 16);
4336 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4337 kIemNativeGstRegUse_ForFullWrite);
4338 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
4339#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4340 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4341#endif
4342 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4343 return off;
4344}
4345
4346
4347#define IEM_MC_STORE_GREG_U64(a_iGReg, a_u64Value) \
4348 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_u64Value)
4349
4350#define IEM_MC_STORE_GREG_I64(a_iGReg, a_i64Value) \
4351 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_i64Value)
4352
4353/** Emits code for IEM_MC_STORE_GREG_U64. */
4354DECL_INLINE_THROW(uint32_t)
4355iemNativeEmitStoreGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
4356{
4357 Assert(iGReg < 16);
4358 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
4359
4360 /*
4361 * If it's a constant value (unlikely) we treat this as a
4362 * IEM_MC_STORE_GREG_U64_CONST statement.
4363 */
4364 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
4365 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
4366 { /* likely */ }
4367 else
4368 {
4369 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
4370 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4371 return iemNativeEmitStoreGregU64Const(pReNative, off, iGReg, pValueVar->u.uValue);
4372 }
4373
4374 /*
4375 * For the rest we allocate a guest register for the variable and writes
4376 * it to the CPUMCTX structure.
4377 */
4378 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
4379#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4380 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4381#else
4382 RT_NOREF(idxVarReg);
4383#endif
4384 iemNativeVarRegisterRelease(pReNative, idxValueVar);
4385 return off;
4386}
4387
4388
4389#define IEM_MC_CLEAR_HIGH_GREG_U64(a_iGReg) \
4390 off = iemNativeEmitClearHighGregU64(pReNative, off, a_iGReg)
4391
4392/** Emits code for IEM_MC_CLEAR_HIGH_GREG_U64. */
4393DECL_INLINE_THROW(uint32_t)
4394iemNativeEmitClearHighGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg)
4395{
4396 Assert(iGReg < 16);
4397 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4398 kIemNativeGstRegUse_ForUpdate);
4399 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxGstTmpReg, idxGstTmpReg);
4400#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4401 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4402#endif
4403 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4404 return off;
4405}
4406
4407
4408#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4409#define IEM_MC_STORE_GREG_PAIR_U64(a_iGRegLo, a_iGRegHi, a_u128Value) \
4410 off = iemNativeEmitStoreGregPairU64(pReNative, off, a_iGRegLo, a_iGRegHi, a_u128Value)
4411
4412/** Emits code for IEM_MC_FETCH_GREG_PAIR_U64. */
4413DECL_INLINE_THROW(uint32_t)
4414iemNativeEmitStoreGregPairU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegLo, uint8_t iGRegHi, uint8_t idxDstVar)
4415{
4416 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4417 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
4418 Assert(iGRegLo < 16 && iGRegHi < 16);
4419
4420 uint8_t const idxGstFullRegLo = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegLo),
4421 kIemNativeGstRegUse_ForFullWrite);
4422 uint8_t const idxGstFullRegHi = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegHi),
4423 kIemNativeGstRegUse_ForFullWrite);
4424
4425 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4426 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
4427 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxGstFullRegLo, idxVarReg, 0);
4428 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxGstFullRegHi, idxVarReg, 1);
4429
4430 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
4431 iemNativeRegFreeTmp(pReNative, idxGstFullRegLo);
4432 iemNativeRegFreeTmp(pReNative, idxGstFullRegHi);
4433 return off;
4434}
4435#endif
4436
4437
4438/*********************************************************************************************************************************
4439* General purpose register manipulation (add, sub). *
4440*********************************************************************************************************************************/
4441
4442#define IEM_MC_ADD_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
4443 off = iemNativeEmitAddGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
4444
4445/** Emits code for IEM_MC_ADD_GREG_U16. */
4446DECL_INLINE_THROW(uint32_t)
4447iemNativeEmitAddGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend)
4448{
4449 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4450 kIemNativeGstRegUse_ForUpdate);
4451
4452#ifdef RT_ARCH_AMD64
4453 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
4454 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4455 if (idxGstTmpReg >= 8)
4456 pbCodeBuf[off++] = X86_OP_REX_B;
4457 if (uAddend == 1)
4458 {
4459 pbCodeBuf[off++] = 0xff; /* inc */
4460 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4461 }
4462 else
4463 {
4464 pbCodeBuf[off++] = 0x81;
4465 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4466 pbCodeBuf[off++] = uAddend;
4467 pbCodeBuf[off++] = 0;
4468 }
4469
4470#else
4471 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
4472 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4473
4474 /* sub tmp, gstgrp, uAddend */
4475 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxTmpReg, idxGstTmpReg, uAddend, false /*f64Bit*/);
4476
4477 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
4478 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
4479
4480 iemNativeRegFreeTmp(pReNative, idxTmpReg);
4481#endif
4482
4483 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4484
4485#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4486 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4487#endif
4488
4489 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4490 return off;
4491}
4492
4493
4494#define IEM_MC_ADD_GREG_U32(a_iGReg, a_u8Const) \
4495 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
4496
4497#define IEM_MC_ADD_GREG_U64(a_iGReg, a_u8Const) \
4498 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
4499
4500/** Emits code for IEM_MC_ADD_GREG_U32 and IEM_MC_ADD_GREG_U64. */
4501DECL_INLINE_THROW(uint32_t)
4502iemNativeEmitAddGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend, bool f64Bit)
4503{
4504 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4505 kIemNativeGstRegUse_ForUpdate);
4506
4507#ifdef RT_ARCH_AMD64
4508 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
4509 if (f64Bit)
4510 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
4511 else if (idxGstTmpReg >= 8)
4512 pbCodeBuf[off++] = X86_OP_REX_B;
4513 if (uAddend == 1)
4514 {
4515 pbCodeBuf[off++] = 0xff; /* inc */
4516 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4517 }
4518 else if (uAddend < 128)
4519 {
4520 pbCodeBuf[off++] = 0x83; /* add */
4521 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4522 pbCodeBuf[off++] = RT_BYTE1(uAddend);
4523 }
4524 else
4525 {
4526 pbCodeBuf[off++] = 0x81; /* add */
4527 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4528 pbCodeBuf[off++] = RT_BYTE1(uAddend);
4529 pbCodeBuf[off++] = 0;
4530 pbCodeBuf[off++] = 0;
4531 pbCodeBuf[off++] = 0;
4532 }
4533
4534#else
4535 /* sub tmp, gstgrp, uAddend */
4536 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4537 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGstTmpReg, idxGstTmpReg, uAddend, f64Bit);
4538
4539#endif
4540
4541 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4542
4543#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4544 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4545#endif
4546
4547 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4548 return off;
4549}
4550
4551
4552
4553#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
4554 off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
4555
4556/** Emits code for IEM_MC_SUB_GREG_U16. */
4557DECL_INLINE_THROW(uint32_t)
4558iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
4559{
4560 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4561 kIemNativeGstRegUse_ForUpdate);
4562
4563#ifdef RT_ARCH_AMD64
4564 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
4565 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4566 if (idxGstTmpReg >= 8)
4567 pbCodeBuf[off++] = X86_OP_REX_B;
4568 if (uSubtrahend == 1)
4569 {
4570 pbCodeBuf[off++] = 0xff; /* dec */
4571 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4572 }
4573 else
4574 {
4575 pbCodeBuf[off++] = 0x81;
4576 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
4577 pbCodeBuf[off++] = uSubtrahend;
4578 pbCodeBuf[off++] = 0;
4579 }
4580
4581#else
4582 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
4583 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4584
4585 /* sub tmp, gstgrp, uSubtrahend */
4586 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
4587
4588 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
4589 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
4590
4591 iemNativeRegFreeTmp(pReNative, idxTmpReg);
4592#endif
4593
4594 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4595
4596#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4597 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4598#endif
4599
4600 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4601 return off;
4602}
4603
4604
4605#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
4606 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
4607
4608#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
4609 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
4610
4611/** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
4612DECL_INLINE_THROW(uint32_t)
4613iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
4614{
4615 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4616 kIemNativeGstRegUse_ForUpdate);
4617
4618#ifdef RT_ARCH_AMD64
4619 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
4620 if (f64Bit)
4621 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
4622 else if (idxGstTmpReg >= 8)
4623 pbCodeBuf[off++] = X86_OP_REX_B;
4624 if (uSubtrahend == 1)
4625 {
4626 pbCodeBuf[off++] = 0xff; /* dec */
4627 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4628 }
4629 else if (uSubtrahend < 128)
4630 {
4631 pbCodeBuf[off++] = 0x83; /* sub */
4632 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
4633 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
4634 }
4635 else
4636 {
4637 pbCodeBuf[off++] = 0x81; /* sub */
4638 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
4639 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
4640 pbCodeBuf[off++] = 0;
4641 pbCodeBuf[off++] = 0;
4642 pbCodeBuf[off++] = 0;
4643 }
4644
4645#else
4646 /* sub tmp, gstgrp, uSubtrahend */
4647 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4648 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
4649
4650#endif
4651
4652 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4653
4654#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4655 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4656#endif
4657
4658 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4659 return off;
4660}
4661
4662
4663#define IEM_MC_AND_GREG_U8(a_iGReg, a_u8Mask) \
4664 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u8Mask, sizeof(uint8_t))
4665
4666#define IEM_MC_AND_GREG_U16(a_iGReg, a_u16Mask) \
4667 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u16Mask, sizeof(uint16_t))
4668
4669#define IEM_MC_AND_GREG_U32(a_iGReg, a_u32Mask) \
4670 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u32Mask, sizeof(uint32_t))
4671
4672#define IEM_MC_AND_GREG_U64(a_iGReg, a_u64Mask) \
4673 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u64Mask, sizeof(uint64_t))
4674
4675/** Emits code for IEM_MC_AND_GREG_U8, IEM_MC_AND_GREG_U16, IEM_MC_AND_GREG_U32 and IEM_MC_AND_GREG_U64. */
4676DECL_INLINE_THROW(uint32_t)
4677iemNativeEmitAndGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uMask, uint8_t cbMask)
4678{
4679#ifdef VBOX_STRICT
4680 switch (cbMask)
4681 {
4682 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
4683 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
4684 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
4685 case sizeof(uint64_t): break;
4686 default: AssertFailedBreak();
4687 }
4688#endif
4689
4690 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4691 kIemNativeGstRegUse_ForUpdate);
4692
4693 switch (cbMask)
4694 {
4695 case sizeof(uint8_t): /* Leaves the higher bits untouched. */
4696 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask | UINT64_C(0xffffffffffffff00));
4697 break;
4698 case sizeof(uint16_t): /* Leaves the higher bits untouched. */
4699 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask | UINT64_C(0xffffffffffff0000));
4700 break;
4701 case sizeof(uint32_t): /* Zeroes the high 32 bits of the guest register. */
4702 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxGstTmpReg, uMask);
4703 break;
4704 case sizeof(uint64_t):
4705 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask);
4706 break;
4707 default: AssertFailedBreak();
4708 }
4709
4710 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4711
4712#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4713 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4714#endif
4715
4716 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4717 return off;
4718}
4719
4720
4721#define IEM_MC_OR_GREG_U8(a_iGReg, a_u8Mask) \
4722 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u8Mask, sizeof(uint8_t))
4723
4724#define IEM_MC_OR_GREG_U16(a_iGReg, a_u16Mask) \
4725 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u16Mask, sizeof(uint16_t))
4726
4727#define IEM_MC_OR_GREG_U32(a_iGReg, a_u32Mask) \
4728 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u32Mask, sizeof(uint32_t))
4729
4730#define IEM_MC_OR_GREG_U64(a_iGReg, a_u64Mask) \
4731 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u64Mask, sizeof(uint64_t))
4732
4733/** Emits code for IEM_MC_OR_GREG_U8, IEM_MC_OR_GREG_U16, IEM_MC_OR_GREG_U32 and IEM_MC_OR_GREG_U64. */
4734DECL_INLINE_THROW(uint32_t)
4735iemNativeEmitOrGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uMask, uint8_t cbMask)
4736{
4737#ifdef VBOX_STRICT
4738 switch (cbMask)
4739 {
4740 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
4741 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
4742 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
4743 case sizeof(uint64_t): break;
4744 default: AssertFailedBreak();
4745 }
4746#endif
4747
4748 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4749 kIemNativeGstRegUse_ForUpdate);
4750
4751 switch (cbMask)
4752 {
4753 case sizeof(uint8_t): /* Leaves the higher bits untouched. */
4754 case sizeof(uint16_t):
4755 case sizeof(uint64_t):
4756 off = iemNativeEmitOrGprByImm(pReNative, off, idxGstTmpReg, uMask);
4757 break;
4758 case sizeof(uint32_t): /* Zeroes the high 32 bits of the guest register. */
4759 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxGstTmpReg, uMask);
4760 break;
4761 default: AssertFailedBreak();
4762 }
4763
4764 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4765
4766#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4767 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4768#endif
4769
4770 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4771 return off;
4772}
4773
4774
4775/*********************************************************************************************************************************
4776* Local/Argument variable manipulation (add, sub, and, or). *
4777*********************************************************************************************************************************/
4778
4779#define IEM_MC_AND_LOCAL_U8(a_u8Local, a_u8Mask) \
4780 off = iemNativeEmitAndLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
4781
4782#define IEM_MC_AND_LOCAL_U16(a_u16Local, a_u16Mask) \
4783 off = iemNativeEmitAndLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
4784
4785#define IEM_MC_AND_LOCAL_U32(a_u32Local, a_u32Mask) \
4786 off = iemNativeEmitAndLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
4787
4788#define IEM_MC_AND_LOCAL_U64(a_u64Local, a_u64Mask) \
4789 off = iemNativeEmitAndLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
4790
4791
4792#define IEM_MC_AND_ARG_U16(a_u16Arg, a_u16Mask) \
4793 off = iemNativeEmitAndLocal(pReNative, off, a_u16Arg, a_u16Mask, sizeof(uint16_t))
4794
4795#define IEM_MC_AND_ARG_U32(a_u32Arg, a_u32Mask) \
4796 off = iemNativeEmitAndLocal(pReNative, off, a_u32Arg, a_u32Mask, sizeof(uint32_t))
4797
4798#define IEM_MC_AND_ARG_U64(a_u64Arg, a_u64Mask) \
4799 off = iemNativeEmitAndLocal(pReNative, off, a_u64Arg, a_u64Mask, sizeof(uint64_t))
4800
4801/** Emits code for AND'ing a local and a constant value. */
4802DECL_INLINE_THROW(uint32_t)
4803iemNativeEmitAndLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
4804{
4805#ifdef VBOX_STRICT
4806 switch (cbMask)
4807 {
4808 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
4809 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
4810 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
4811 case sizeof(uint64_t): break;
4812 default: AssertFailedBreak();
4813 }
4814#endif
4815
4816 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
4817 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
4818
4819 if (cbMask <= sizeof(uint32_t))
4820 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg, uMask);
4821 else
4822 off = iemNativeEmitAndGprByImm(pReNative, off, idxVarReg, uMask);
4823
4824 iemNativeVarRegisterRelease(pReNative, idxVar);
4825 return off;
4826}
4827
4828
4829#define IEM_MC_OR_LOCAL_U8(a_u8Local, a_u8Mask) \
4830 off = iemNativeEmitOrLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
4831
4832#define IEM_MC_OR_LOCAL_U16(a_u16Local, a_u16Mask) \
4833 off = iemNativeEmitOrLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
4834
4835#define IEM_MC_OR_LOCAL_U32(a_u32Local, a_u32Mask) \
4836 off = iemNativeEmitOrLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
4837
4838#define IEM_MC_OR_LOCAL_U64(a_u64Local, a_u64Mask) \
4839 off = iemNativeEmitOrLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
4840
4841/** Emits code for OR'ing a local and a constant value. */
4842DECL_INLINE_THROW(uint32_t)
4843iemNativeEmitOrLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
4844{
4845#ifdef VBOX_STRICT
4846 switch (cbMask)
4847 {
4848 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
4849 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
4850 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
4851 case sizeof(uint64_t): break;
4852 default: AssertFailedBreak();
4853 }
4854#endif
4855
4856 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
4857 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
4858
4859 if (cbMask <= sizeof(uint32_t))
4860 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxVarReg, uMask);
4861 else
4862 off = iemNativeEmitOrGprByImm(pReNative, off, idxVarReg, uMask);
4863
4864 iemNativeVarRegisterRelease(pReNative, idxVar);
4865 return off;
4866}
4867
4868
4869#define IEM_MC_BSWAP_LOCAL_U16(a_u16Local) \
4870 off = iemNativeEmitBswapLocal(pReNative, off, a_u16Local, sizeof(uint16_t))
4871
4872#define IEM_MC_BSWAP_LOCAL_U32(a_u32Local) \
4873 off = iemNativeEmitBswapLocal(pReNative, off, a_u32Local, sizeof(uint32_t))
4874
4875#define IEM_MC_BSWAP_LOCAL_U64(a_u64Local) \
4876 off = iemNativeEmitBswapLocal(pReNative, off, a_u64Local, sizeof(uint64_t))
4877
4878/** Emits code for reversing the byte order in a local value. */
4879DECL_INLINE_THROW(uint32_t)
4880iemNativeEmitBswapLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal)
4881{
4882 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
4883 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
4884
4885 switch (cbLocal)
4886 {
4887 case sizeof(uint16_t): off = iemNativeEmitBswapGpr16(pReNative, off, idxVarReg); break;
4888 case sizeof(uint32_t): off = iemNativeEmitBswapGpr32(pReNative, off, idxVarReg); break;
4889 case sizeof(uint64_t): off = iemNativeEmitBswapGpr(pReNative, off, idxVarReg); break;
4890 default: AssertFailedBreak();
4891 }
4892
4893 iemNativeVarRegisterRelease(pReNative, idxVar);
4894 return off;
4895}
4896
4897
4898#define IEM_MC_SHL_LOCAL_S16(a_i16Local, a_cShift) \
4899 off = iemNativeEmitShlLocal(pReNative, off, a_i16Local, sizeof(int16_t), a_cShift)
4900
4901#define IEM_MC_SHL_LOCAL_S32(a_i32Local, a_cShift) \
4902 off = iemNativeEmitShlLocal(pReNative, off, a_i32Local, sizeof(int32_t), a_cShift)
4903
4904#define IEM_MC_SHL_LOCAL_S64(a_i64Local, a_cShift) \
4905 off = iemNativeEmitShlLocal(pReNative, off, a_i64Local, sizeof(int64_t), a_cShift)
4906
4907/** Emits code for shifting left a local value. */
4908DECL_INLINE_THROW(uint32_t)
4909iemNativeEmitShlLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal, uint8_t cShift)
4910{
4911#ifdef VBOX_STRICT
4912 switch (cbLocal)
4913 {
4914 case sizeof(uint8_t): Assert(cShift < 8); break;
4915 case sizeof(uint16_t): Assert(cShift < 16); break;
4916 case sizeof(uint32_t): Assert(cShift < 32); break;
4917 case sizeof(uint64_t): Assert(cShift < 64); break;
4918 default: AssertFailedBreak();
4919 }
4920#endif
4921
4922 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
4923 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
4924
4925 if (cbLocal <= sizeof(uint32_t))
4926 {
4927 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxVarReg, cShift);
4928 if (cbLocal < sizeof(uint32_t))
4929 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg,
4930 cbLocal == sizeof(uint16_t)
4931 ? UINT32_C(0xffff)
4932 : UINT32_C(0xff));
4933 }
4934 else
4935 off = iemNativeEmitShiftGprLeft(pReNative, off, idxVarReg, cShift);
4936
4937 iemNativeVarRegisterRelease(pReNative, idxVar);
4938 return off;
4939}
4940
4941
4942#define IEM_MC_SAR_LOCAL_S16(a_i16Local, a_cShift) \
4943 off = iemNativeEmitSarLocal(pReNative, off, a_i16Local, sizeof(int16_t), a_cShift)
4944
4945#define IEM_MC_SAR_LOCAL_S32(a_i32Local, a_cShift) \
4946 off = iemNativeEmitSarLocal(pReNative, off, a_i32Local, sizeof(int32_t), a_cShift)
4947
4948#define IEM_MC_SAR_LOCAL_S64(a_i64Local, a_cShift) \
4949 off = iemNativeEmitSarLocal(pReNative, off, a_i64Local, sizeof(int64_t), a_cShift)
4950
4951/** Emits code for shifting left a local value. */
4952DECL_INLINE_THROW(uint32_t)
4953iemNativeEmitSarLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal, uint8_t cShift)
4954{
4955#ifdef VBOX_STRICT
4956 switch (cbLocal)
4957 {
4958 case sizeof(int8_t): Assert(cShift < 8); break;
4959 case sizeof(int16_t): Assert(cShift < 16); break;
4960 case sizeof(int32_t): Assert(cShift < 32); break;
4961 case sizeof(int64_t): Assert(cShift < 64); break;
4962 default: AssertFailedBreak();
4963 }
4964#endif
4965
4966 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
4967 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
4968
4969 /* Need to sign extend the value first to make sure the sign is correct in the following arithmetic shift. */
4970 if (cbLocal == sizeof(uint8_t))
4971 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
4972 else if (cbLocal == sizeof(uint16_t))
4973 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxVarReg);
4974
4975 if (cbLocal <= sizeof(uint32_t))
4976 off = iemNativeEmitArithShiftGpr32Right(pReNative, off, idxVarReg, cShift);
4977 else
4978 off = iemNativeEmitArithShiftGprRight(pReNative, off, idxVarReg, cShift);
4979
4980 iemNativeVarRegisterRelease(pReNative, idxVar);
4981 return off;
4982}
4983
4984
4985#define IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR(a_EffAddr, a_i16) \
4986 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i16, sizeof(int16_t))
4987
4988#define IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR(a_EffAddr, a_i32) \
4989 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i32, sizeof(int32_t))
4990
4991#define IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR(a_EffAddr, a_i64) \
4992 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i64, sizeof(int64_t))
4993
4994/** Emits code for IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR/IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR/IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR. */
4995DECL_INLINE_THROW(uint32_t)
4996iemNativeEmitAddLocalToEffAddr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEffAddr, uint8_t idxVar, uint8_t cbLocal)
4997{
4998 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEffAddr);
4999 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEffAddr, sizeof(RTGCPTR));
5000 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
5001 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
5002
5003 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
5004 uint8_t const idxVarRegEffAddr = iemNativeVarRegisterAcquire(pReNative, idxVarEffAddr, &off, true /*fInitialized*/);
5005
5006 /* Need to sign extend the value. */
5007 if (cbLocal <= sizeof(uint32_t))
5008 {
5009/** @todo ARM64: In case of boredone, the extended add instruction can do the
5010 * conversion directly: ADD idxVarRegEffAddr, idxVarRegEffAddr, [w]idxVarReg, SXTH/SXTW */
5011 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
5012
5013 switch (cbLocal)
5014 {
5015 case sizeof(int16_t): off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxRegTmp, idxVarReg); break;
5016 case sizeof(int32_t): off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxRegTmp, idxVarReg); break;
5017 default: AssertFailed();
5018 }
5019
5020 off = iemNativeEmitAddTwoGprs(pReNative, off, idxVarRegEffAddr, idxRegTmp);
5021 iemNativeRegFreeTmp(pReNative, idxRegTmp);
5022 }
5023 else
5024 off = iemNativeEmitAddTwoGprs(pReNative, off, idxVarRegEffAddr, idxVarReg);
5025
5026 iemNativeVarRegisterRelease(pReNative, idxVarEffAddr);
5027 iemNativeVarRegisterRelease(pReNative, idxVar);
5028 return off;
5029}
5030
5031
5032
5033/*********************************************************************************************************************************
5034* EFLAGS *
5035*********************************************************************************************************************************/
5036
5037#if !defined(VBOX_WITH_STATISTICS) || !defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
5038# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) ((void)0)
5039#else
5040# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) \
5041 iemNativeEFlagsOptimizationStats(pReNative, a_fEflInput, a_fEflOutput)
5042
5043DECLINLINE(void) iemNativeEFlagsOptimizationStats(PIEMRECOMPILERSTATE pReNative, uint32_t fEflInput, uint32_t fEflOutput)
5044{
5045 if (fEflOutput)
5046 {
5047 PVMCPUCC const pVCpu = pReNative->pVCpu;
5048# ifndef IEMLIVENESS_EXTENDED_LAYOUT
5049 IEMLIVENESSBIT const LivenessBit0 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit0;
5050 IEMLIVENESSBIT const LivenessBit1 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit1;
5051 AssertCompile(IEMLIVENESS_STATE_CLOBBERED == 0);
5052# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
5053 if (fEflOutput & (a_fEfl)) \
5054 { \
5055 if (LivenessBit0.a_fLivenessMember | LivenessBit1.a_fLivenessMember) \
5056 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
5057 else \
5058 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
5059 } else do { } while (0)
5060# else
5061 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall];
5062 IEMLIVENESSBIT const LivenessClobbered =
5063 {
5064 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
5065 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
5066 | pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
5067 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
5068 };
5069 IEMLIVENESSBIT const LivenessDelayable =
5070 {
5071 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
5072 & pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
5073 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
5074 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
5075 };
5076# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
5077 if (fEflOutput & (a_fEfl)) \
5078 { \
5079 if (LivenessClobbered.a_fLivenessMember) \
5080 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
5081 else if (LivenessDelayable.a_fLivenessMember) \
5082 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Delayable); \
5083 else \
5084 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
5085 } else do { } while (0)
5086# endif
5087 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_CF, fEflCf, StatNativeLivenessEflCf);
5088 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_PF, fEflPf, StatNativeLivenessEflPf);
5089 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_AF, fEflAf, StatNativeLivenessEflAf);
5090 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_ZF, fEflZf, StatNativeLivenessEflZf);
5091 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_SF, fEflSf, StatNativeLivenessEflSf);
5092 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_OF, fEflOf, StatNativeLivenessEflOf);
5093 //CHECK_FLAG_AND_UPDATE_STATS(~X86_EFL_STATUS_BITS, fEflOther, StatNativeLivenessEflOther);
5094# undef CHECK_FLAG_AND_UPDATE_STATS
5095 }
5096 RT_NOREF(fEflInput);
5097}
5098#endif /* VBOX_WITH_STATISTICS */
5099
5100#undef IEM_MC_FETCH_EFLAGS /* should not be used */
5101#define IEM_MC_FETCH_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5102 off = iemNativeEmitFetchEFlags(pReNative, off, a_EFlags, a_fEflInput, a_fEflOutput)
5103
5104/** Handles IEM_MC_FETCH_EFLAGS_EX. */
5105DECL_INLINE_THROW(uint32_t)
5106iemNativeEmitFetchEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags,
5107 uint32_t fEflInput, uint32_t fEflOutput)
5108{
5109 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
5110 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
5111 RT_NOREF(fEflInput, fEflOutput);
5112
5113#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
5114# ifdef VBOX_STRICT
5115 if ( pReNative->idxCurCall != 0
5116 && (fEflInput != 0 || fEflOutput != 0) /* for NOT these are both zero for now. */)
5117 {
5118 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall - 1];
5119 uint32_t const fBoth = fEflInput | fEflOutput;
5120# define ASSERT_ONE_EFL(a_fElfConst, a_idxField) \
5121 AssertMsg( !(fBoth & (a_fElfConst)) \
5122 || (!(fEflInput & (a_fElfConst)) \
5123 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
5124 : !(fEflOutput & (a_fElfConst)) \
5125 ? IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
5126 : IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) ), \
5127 ("%s - %u\n", #a_fElfConst, iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)))
5128 ASSERT_ONE_EFL(~(uint32_t)X86_EFL_STATUS_BITS, IEMLIVENESSBIT_IDX_EFL_OTHER);
5129 ASSERT_ONE_EFL(X86_EFL_CF, IEMLIVENESSBIT_IDX_EFL_CF);
5130 ASSERT_ONE_EFL(X86_EFL_PF, IEMLIVENESSBIT_IDX_EFL_PF);
5131 ASSERT_ONE_EFL(X86_EFL_AF, IEMLIVENESSBIT_IDX_EFL_AF);
5132 ASSERT_ONE_EFL(X86_EFL_ZF, IEMLIVENESSBIT_IDX_EFL_ZF);
5133 ASSERT_ONE_EFL(X86_EFL_SF, IEMLIVENESSBIT_IDX_EFL_SF);
5134 ASSERT_ONE_EFL(X86_EFL_OF, IEMLIVENESSBIT_IDX_EFL_OF);
5135# undef ASSERT_ONE_EFL
5136 }
5137# endif
5138#endif
5139
5140 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fEflInput);
5141
5142 /** @todo this is suboptimial. EFLAGS is probably shadowed and we should use
5143 * the existing shadow copy. */
5144 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, false /*fInitialized*/);
5145 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
5146 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
5147 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
5148 return off;
5149}
5150
5151
5152
5153/** @todo emit strict build assertions for IEM_MC_COMMIT_EFLAGS_EX when we
5154 * start using it with custom native code emission (inlining assembly
5155 * instruction helpers). */
5156#undef IEM_MC_COMMIT_EFLAGS /* should not be used */
5157#define IEM_MC_COMMIT_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5158 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
5159 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags, a_fEflOutput, true /*fUpdateSkipping*/)
5160
5161#undef IEM_MC_COMMIT_EFLAGS_OPT /* should not be used */
5162#define IEM_MC_COMMIT_EFLAGS_OPT_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5163 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
5164 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags, a_fEflOutput, false /*fUpdateSkipping*/)
5165
5166/** Handles IEM_MC_COMMIT_EFLAGS_EX. */
5167DECL_INLINE_THROW(uint32_t)
5168iemNativeEmitCommitEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags, uint32_t fEflOutput,
5169 bool fUpdateSkipping)
5170{
5171 RT_NOREF(fEflOutput);
5172 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, true /*fInitialized*/);
5173 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
5174
5175#ifdef VBOX_STRICT
5176 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RA1_MASK);
5177 uint32_t offFixup = off;
5178 off = iemNativeEmitJnzToFixed(pReNative, off, off);
5179 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2001));
5180 iemNativeFixupFixedJump(pReNative, offFixup, off);
5181
5182 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RAZ_MASK & CPUMX86EFLAGS_HW_MASK_32);
5183 offFixup = off;
5184 off = iemNativeEmitJzToFixed(pReNative, off, off);
5185 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2002));
5186 iemNativeFixupFixedJump(pReNative, offFixup, off);
5187
5188 /** @todo validate that only bits in the fElfOutput mask changed. */
5189#endif
5190
5191#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
5192 if (fUpdateSkipping)
5193 {
5194 if ((fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
5195 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
5196 else
5197 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~(fEflOutput & X86_EFL_STATUS_BITS),
5198 RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
5199 }
5200#else
5201 RT_NOREF_PV(fUpdateSkipping);
5202#endif
5203
5204 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
5205 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF_DYN(VMCPUCC, cpum.GstCtx.eflags));
5206 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
5207 return off;
5208}
5209
5210
5211typedef enum IEMNATIVEMITEFLOP
5212{
5213 kIemNativeEmitEflOp_Invalid = 0,
5214 kIemNativeEmitEflOp_Set,
5215 kIemNativeEmitEflOp_Clear,
5216 kIemNativeEmitEflOp_Flip
5217} IEMNATIVEMITEFLOP;
5218
5219#define IEM_MC_SET_EFL_BIT(a_fBit) \
5220 off = iemNativeEmitModifyEFlagsBit(pReNative, off, a_fBit, kIemNativeEmitEflOp_Set);
5221
5222#define IEM_MC_CLEAR_EFL_BIT(a_fBit) \
5223 off = iemNativeEmitModifyEFlagsBit(pReNative, off, a_fBit, kIemNativeEmitEflOp_Clear);
5224
5225#define IEM_MC_FLIP_EFL_BIT(a_fBit) \
5226 off = iemNativeEmitModifyEFlagsBit(pReNative, off, a_fBit, kIemNativeEmitEflOp_Flip);
5227
5228/** Handles IEM_MC_SET_EFL_BIT/IEM_MC_CLEAR_EFL_BIT/IEM_MC_FLIP_EFL_BIT. */
5229DECL_INLINE_THROW(uint32_t)
5230iemNativeEmitModifyEFlagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fEflBit, IEMNATIVEMITEFLOP enmOp)
5231{
5232 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
5233 kIemNativeGstRegUse_ForUpdate, false /*fNoVolatileRegs*/);
5234
5235 switch (enmOp)
5236 {
5237 case kIemNativeEmitEflOp_Set:
5238 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxEflReg, fEflBit);
5239 break;
5240 case kIemNativeEmitEflOp_Clear:
5241 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~fEflBit);
5242 break;
5243 case kIemNativeEmitEflOp_Flip:
5244 off = iemNativeEmitXorGpr32ByImm(pReNative, off, idxEflReg, fEflBit);
5245 break;
5246 default:
5247 AssertFailed();
5248 break;
5249 }
5250
5251 /** @todo No delayed writeback for EFLAGS right now. */
5252 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
5253
5254 /* Free but don't flush the EFLAGS register. */
5255 iemNativeRegFreeTmp(pReNative, idxEflReg);
5256
5257 return off;
5258}
5259
5260
5261/*********************************************************************************************************************************
5262* Emitters for segment register fetches (IEM_MC_FETCH_SREG_XXX).
5263*********************************************************************************************************************************/
5264
5265#define IEM_MC_FETCH_SREG_U16(a_u16Dst, a_iSReg) \
5266 off = iemNativeEmitFetchSReg(pReNative, off, a_u16Dst, a_iSReg, sizeof(uint16_t))
5267
5268#define IEM_MC_FETCH_SREG_ZX_U32(a_u32Dst, a_iSReg) \
5269 off = iemNativeEmitFetchSReg(pReNative, off, a_u32Dst, a_iSReg, sizeof(uint32_t))
5270
5271#define IEM_MC_FETCH_SREG_ZX_U64(a_u64Dst, a_iSReg) \
5272 off = iemNativeEmitFetchSReg(pReNative, off, a_u64Dst, a_iSReg, sizeof(uint64_t))
5273
5274
5275/** Emits code for IEM_MC_FETCH_SREG_U16, IEM_MC_FETCH_SREG_ZX_U32 and
5276 * IEM_MC_FETCH_SREG_ZX_U64. */
5277DECL_INLINE_THROW(uint32_t)
5278iemNativeEmitFetchSReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iSReg, int8_t cbVar)
5279{
5280 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
5281 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbVar); RT_NOREF(cbVar);
5282 Assert(iSReg < X86_SREG_COUNT);
5283
5284 /*
5285 * For now, we will not create a shadow copy of a selector. The rational
5286 * is that since we do not recompile the popping and loading of segment
5287 * registers and that the the IEM_MC_FETCH_SREG_U* MCs are only used for
5288 * pushing and moving to registers, there is only a small chance that the
5289 * shadow copy will be accessed again before the register is reloaded. One
5290 * scenario would be nested called in 16-bit code, but I doubt it's worth
5291 * the extra register pressure atm.
5292 *
5293 * What we really need first, though, is to combine iemNativeRegAllocTmpForGuestReg
5294 * and iemNativeVarRegisterAcquire for a load scenario. We only got the
5295 * store scencario covered at present (r160730).
5296 */
5297 iemNativeVarSetKindToStack(pReNative, idxDstVar);
5298 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
5299 off = iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aSRegs[iSReg].Sel));
5300 iemNativeVarRegisterRelease(pReNative, idxDstVar);
5301 return off;
5302}
5303
5304
5305
5306/*********************************************************************************************************************************
5307* Register references. *
5308*********************************************************************************************************************************/
5309
5310#define IEM_MC_REF_GREG_U8_THREADED(a_pu8Dst, a_iGRegEx) \
5311 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, false /*fConst*/)
5312
5313#define IEM_MC_REF_GREG_U8_CONST_THREADED(a_pu8Dst, a_iGRegEx) \
5314 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, true /*fConst*/)
5315
5316/** Handles IEM_MC_REF_GREG_U8[_CONST]. */
5317DECL_INLINE_THROW(uint32_t)
5318iemNativeEmitRefGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGRegEx, bool fConst)
5319{
5320 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
5321 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
5322 Assert(iGRegEx < 20);
5323
5324 if (iGRegEx < 16)
5325 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
5326 else
5327 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_GprHighByte, iGRegEx & 15);
5328
5329 /* If we've delayed writing back the register value, flush it now. */
5330 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
5331
5332 /* If it's not a const reference we need to flush the shadow copy of the register now. */
5333 if (!fConst)
5334 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGRegEx & 15)));
5335
5336 return off;
5337}
5338
5339#define IEM_MC_REF_GREG_U16(a_pu16Dst, a_iGReg) \
5340 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, false /*fConst*/)
5341
5342#define IEM_MC_REF_GREG_U16_CONST(a_pu16Dst, a_iGReg) \
5343 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, true /*fConst*/)
5344
5345#define IEM_MC_REF_GREG_U32(a_pu32Dst, a_iGReg) \
5346 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, false /*fConst*/)
5347
5348#define IEM_MC_REF_GREG_U32_CONST(a_pu32Dst, a_iGReg) \
5349 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, true /*fConst*/)
5350
5351#define IEM_MC_REF_GREG_I32(a_pi32Dst, a_iGReg) \
5352 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, false /*fConst*/)
5353
5354#define IEM_MC_REF_GREG_I32_CONST(a_pi32Dst, a_iGReg) \
5355 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, true /*fConst*/)
5356
5357#define IEM_MC_REF_GREG_U64(a_pu64Dst, a_iGReg) \
5358 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, false /*fConst*/)
5359
5360#define IEM_MC_REF_GREG_U64_CONST(a_pu64Dst, a_iGReg) \
5361 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, true /*fConst*/)
5362
5363#define IEM_MC_REF_GREG_I64(a_pi64Dst, a_iGReg) \
5364 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, false /*fConst*/)
5365
5366#define IEM_MC_REF_GREG_I64_CONST(a_pi64Dst, a_iGReg) \
5367 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, true /*fConst*/)
5368
5369/** Handles IEM_MC_REF_GREG_Uxx[_CONST] and IEM_MC_REF_GREG_Ixx[_CONST]. */
5370DECL_INLINE_THROW(uint32_t)
5371iemNativeEmitRefGregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGReg, bool fConst)
5372{
5373 Assert(iGReg < 16);
5374 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGReg);
5375 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
5376
5377 /* If we've delayed writing back the register value, flush it now. */
5378 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGReg);
5379
5380 /* If it's not a const reference we need to flush the shadow copy of the register now. */
5381 if (!fConst)
5382 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGReg)));
5383
5384 return off;
5385}
5386
5387
5388#undef IEM_MC_REF_EFLAGS /* should not be used. */
5389#define IEM_MC_REF_EFLAGS_EX(a_pEFlags, a_fEflInput, a_fEflOutput) \
5390 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
5391 off = iemNativeEmitRefEFlags(pReNative, off, a_pEFlags, a_fEflInput, a_fEflOutput)
5392
5393/** Handles IEM_MC_REF_EFLAGS. */
5394DECL_INLINE_THROW(uint32_t)
5395iemNativeEmitRefEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint32_t fEflInput, uint32_t fEflOutput)
5396{
5397 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_EFlags, 0);
5398 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
5399
5400#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
5401 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fEflInput);
5402
5403 /* Updating the skipping according to the outputs is a little early, but
5404 we don't have any other hooks for references atm. */
5405 if ((fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
5406 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
5407 else if (fEflOutput & X86_EFL_STATUS_BITS)
5408 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~(fEflOutput & X86_EFL_STATUS_BITS),
5409 RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
5410#else
5411 RT_NOREF(fEflInput, fEflOutput);
5412#endif
5413
5414 /* If we've delayed writing back the register value, flush it now. */
5415 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_EFlags, 0);
5416
5417 /* If there is a shadow copy of guest EFLAGS, flush it now. */
5418 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_EFlags));
5419
5420 return off;
5421}
5422
5423
5424/** @todo Emit code for IEM_MC_ASSERT_EFLAGS in strict builds? Once we emit
5425 * different code from threaded recompiler, maybe it would be helpful. For now
5426 * we assume the threaded recompiler catches any incorrect EFLAGS delcarations. */
5427#define IEM_MC_ASSERT_EFLAGS(a_fEflInput, a_fEflOutput) ((void)0)
5428
5429
5430#define IEM_MC_REF_XREG_U128(a_pu128Dst, a_iXReg) \
5431 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, false /*fConst*/)
5432
5433#define IEM_MC_REF_XREG_XMM(a_puXmmDst, a_iXReg) \
5434 off = iemNativeEmitRefXregXxx(pReNative, off, a_puXmmDst, a_iXReg, false /*fConst*/)
5435
5436#define IEM_MC_REF_XREG_U128_CONST(a_pu128Dst, a_iXReg) \
5437 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, true /*fConst*/)
5438
5439#define IEM_MC_REF_XREG_XMM_CONST(a_pXmmDst, a_iXReg) \
5440 off = iemNativeEmitRefXregXxx(pReNative, off, a_pXmmDst, a_iXReg, true /*fConst*/)
5441
5442#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5443/* Just being paranoid here. */
5444# ifndef _MSC_VER /* MSC can't compile this, doesn't like [0]. Added reduced version afterwards. */
5445AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].au64[0]);
5446AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].au32[0]);
5447AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].ar64[0]);
5448AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].ar32[0]);
5449# endif
5450AssertCompileMemberOffset(X86XMMREG, au64, 0);
5451AssertCompileMemberOffset(X86XMMREG, au32, 0);
5452AssertCompileMemberOffset(X86XMMREG, ar64, 0);
5453AssertCompileMemberOffset(X86XMMREG, ar32, 0);
5454
5455# define IEM_MC_REF_XREG_U32_CONST(a_pu32Dst, a_iXReg) \
5456 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu32Dst, a_iXReg, true /*fConst*/)
5457# define IEM_MC_REF_XREG_U64_CONST(a_pu64Dst, a_iXReg) \
5458 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu64Dst, a_iXReg, true /*fConst*/)
5459# define IEM_MC_REF_XREG_R32_CONST(a_pr32Dst, a_iXReg) \
5460 off = iemNativeEmitRefXregXxx(pReNative, off, a_pr32Dst, a_iXReg, true /*fConst*/)
5461# define IEM_MC_REF_XREG_R64_CONST(a_pr64Dst, a_iXReg) \
5462 off = iemNativeEmitRefXregXxx(pReNative, off, a_pr64Dst, a_iXReg, true /*fConst*/)
5463#endif
5464
5465/** Handles IEM_MC_REF_XREG_xxx[_CONST]. */
5466DECL_INLINE_THROW(uint32_t)
5467iemNativeEmitRefXregXxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iXReg, bool fConst)
5468{
5469 Assert(iXReg < 16);
5470 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_XReg, iXReg);
5471 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
5472
5473 /* If we've delayed writing back the register value, flush it now. */
5474 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_XReg, iXReg);
5475
5476#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5477 /* If it's not a const reference we need to flush the shadow copy of the register now. */
5478 if (!fConst)
5479 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(iXReg)));
5480#else
5481 RT_NOREF(fConst);
5482#endif
5483
5484 return off;
5485}
5486
5487
5488
5489/*********************************************************************************************************************************
5490* Effective Address Calculation *
5491*********************************************************************************************************************************/
5492#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_16(a_GCPtrEff, a_bRm, a_u16Disp) \
5493 off = iemNativeEmitCalcRmEffAddrThreadedAddr16(pReNative, off, a_bRm, a_u16Disp, a_GCPtrEff)
5494
5495/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_16.
5496 * @sa iemOpHlpCalcRmEffAddrThreadedAddr16 */
5497DECL_INLINE_THROW(uint32_t)
5498iemNativeEmitCalcRmEffAddrThreadedAddr16(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5499 uint8_t bRm, uint16_t u16Disp, uint8_t idxVarRet)
5500{
5501 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
5502
5503 /*
5504 * Handle the disp16 form with no registers first.
5505 *
5506 * Convert to an immediate value, as that'll delay the register allocation
5507 * and assignment till the memory access / call / whatever and we can use
5508 * a more appropriate register (or none at all).
5509 */
5510 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
5511 {
5512 iemNativeVarSetKindToConst(pReNative, idxVarRet, u16Disp);
5513 return off;
5514 }
5515
5516 /* Determin the displacment. */
5517 uint16_t u16EffAddr;
5518 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
5519 {
5520 case 0: u16EffAddr = 0; break;
5521 case 1: u16EffAddr = (int16_t)(int8_t)u16Disp; break;
5522 case 2: u16EffAddr = u16Disp; break;
5523 default: AssertFailedStmt(u16EffAddr = 0);
5524 }
5525
5526 /* Determine the registers involved. */
5527 uint8_t idxGstRegBase;
5528 uint8_t idxGstRegIndex;
5529 switch (bRm & X86_MODRM_RM_MASK)
5530 {
5531 case 0:
5532 idxGstRegBase = X86_GREG_xBX;
5533 idxGstRegIndex = X86_GREG_xSI;
5534 break;
5535 case 1:
5536 idxGstRegBase = X86_GREG_xBX;
5537 idxGstRegIndex = X86_GREG_xDI;
5538 break;
5539 case 2:
5540 idxGstRegBase = X86_GREG_xBP;
5541 idxGstRegIndex = X86_GREG_xSI;
5542 break;
5543 case 3:
5544 idxGstRegBase = X86_GREG_xBP;
5545 idxGstRegIndex = X86_GREG_xDI;
5546 break;
5547 case 4:
5548 idxGstRegBase = X86_GREG_xSI;
5549 idxGstRegIndex = UINT8_MAX;
5550 break;
5551 case 5:
5552 idxGstRegBase = X86_GREG_xDI;
5553 idxGstRegIndex = UINT8_MAX;
5554 break;
5555 case 6:
5556 idxGstRegBase = X86_GREG_xBP;
5557 idxGstRegIndex = UINT8_MAX;
5558 break;
5559#ifdef _MSC_VER /* lazy compiler, thinks idxGstRegBase and idxGstRegIndex may otherwise be used uninitialized. */
5560 default:
5561#endif
5562 case 7:
5563 idxGstRegBase = X86_GREG_xBX;
5564 idxGstRegIndex = UINT8_MAX;
5565 break;
5566 }
5567
5568 /*
5569 * Now emit code that calculates: idxRegRet = (uint16_t)(u16EffAddr + idxGstRegBase [+ idxGstRegIndex])
5570 */
5571 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
5572 uint8_t const idxRegBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
5573 kIemNativeGstRegUse_ReadOnly);
5574 uint8_t const idxRegIndex = idxGstRegIndex != UINT8_MAX
5575 ? iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
5576 kIemNativeGstRegUse_ReadOnly)
5577 : UINT8_MAX;
5578#ifdef RT_ARCH_AMD64
5579 if (idxRegIndex == UINT8_MAX)
5580 {
5581 if (u16EffAddr == 0)
5582 {
5583 /* movxz ret, base */
5584 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegRet, idxRegBase);
5585 }
5586 else
5587 {
5588 /* lea ret32, [base64 + disp32] */
5589 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
5590 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
5591 if (idxRegRet >= 8 || idxRegBase >= 8)
5592 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
5593 pbCodeBuf[off++] = 0x8d;
5594 if (idxRegBase != X86_GREG_x12 /*SIB*/)
5595 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, idxRegBase & 7);
5596 else
5597 {
5598 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, 4 /*SIB*/);
5599 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
5600 }
5601 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
5602 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
5603 pbCodeBuf[off++] = 0;
5604 pbCodeBuf[off++] = 0;
5605 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5606
5607 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
5608 }
5609 }
5610 else
5611 {
5612 /* lea ret32, [index64 + base64 (+ disp32)] */
5613 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
5614 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
5615 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
5616 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
5617 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
5618 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
5619 pbCodeBuf[off++] = 0x8d;
5620 uint8_t const bMod = u16EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0 : X86_MOD_MEM4;
5621 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
5622 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, 0);
5623 if (bMod == X86_MOD_MEM4)
5624 {
5625 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
5626 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
5627 pbCodeBuf[off++] = 0;
5628 pbCodeBuf[off++] = 0;
5629 }
5630 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5631 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
5632 }
5633
5634#elif defined(RT_ARCH_ARM64)
5635 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
5636 if (u16EffAddr == 0)
5637 {
5638 if (idxRegIndex == UINT8_MAX)
5639 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegBase);
5640 else
5641 {
5642 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex, false /*f64Bit*/);
5643 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
5644 }
5645 }
5646 else
5647 {
5648 if ((int16_t)u16EffAddr < 4096 && (int16_t)u16EffAddr >= 0)
5649 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u16EffAddr, false /*f64Bit*/);
5650 else if ((int16_t)u16EffAddr > -4096 && (int16_t)u16EffAddr < 0)
5651 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
5652 (uint16_t)-(int16_t)u16EffAddr, false /*f64Bit*/);
5653 else
5654 {
5655 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegRet, u16EffAddr);
5656 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
5657 }
5658 if (idxRegIndex != UINT8_MAX)
5659 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex, false /*f64Bit*/);
5660 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
5661 }
5662
5663#else
5664# error "port me"
5665#endif
5666
5667 if (idxRegIndex != UINT8_MAX)
5668 iemNativeRegFreeTmp(pReNative, idxRegIndex);
5669 iemNativeRegFreeTmp(pReNative, idxRegBase);
5670 iemNativeVarRegisterRelease(pReNative, idxVarRet);
5671 return off;
5672}
5673
5674
5675#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_32(a_GCPtrEff, a_bRm, a_uSibAndRspOffset, a_u32Disp) \
5676 off = iemNativeEmitCalcRmEffAddrThreadedAddr32(pReNative, off, a_bRm, a_uSibAndRspOffset, a_u32Disp, a_GCPtrEff)
5677
5678/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_32.
5679 * @see iemOpHlpCalcRmEffAddrThreadedAddr32 */
5680DECL_INLINE_THROW(uint32_t)
5681iemNativeEmitCalcRmEffAddrThreadedAddr32(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5682 uint8_t bRm, uint32_t uSibAndRspOffset, uint32_t u32Disp, uint8_t idxVarRet)
5683{
5684 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
5685
5686 /*
5687 * Handle the disp32 form with no registers first.
5688 *
5689 * Convert to an immediate value, as that'll delay the register allocation
5690 * and assignment till the memory access / call / whatever and we can use
5691 * a more appropriate register (or none at all).
5692 */
5693 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
5694 {
5695 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32Disp);
5696 return off;
5697 }
5698
5699 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
5700 uint32_t u32EffAddr = 0;
5701 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
5702 {
5703 case 0: break;
5704 case 1: u32EffAddr = (int8_t)u32Disp; break;
5705 case 2: u32EffAddr = u32Disp; break;
5706 default: AssertFailed();
5707 }
5708
5709 /* Get the register (or SIB) value. */
5710 uint8_t idxGstRegBase = UINT8_MAX;
5711 uint8_t idxGstRegIndex = UINT8_MAX;
5712 uint8_t cShiftIndex = 0;
5713 switch (bRm & X86_MODRM_RM_MASK)
5714 {
5715 case 0: idxGstRegBase = X86_GREG_xAX; break;
5716 case 1: idxGstRegBase = X86_GREG_xCX; break;
5717 case 2: idxGstRegBase = X86_GREG_xDX; break;
5718 case 3: idxGstRegBase = X86_GREG_xBX; break;
5719 case 4: /* SIB */
5720 {
5721 /* index /w scaling . */
5722 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
5723 switch ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
5724 {
5725 case 0: idxGstRegIndex = X86_GREG_xAX; break;
5726 case 1: idxGstRegIndex = X86_GREG_xCX; break;
5727 case 2: idxGstRegIndex = X86_GREG_xDX; break;
5728 case 3: idxGstRegIndex = X86_GREG_xBX; break;
5729 case 4: cShiftIndex = 0; /*no index*/ break;
5730 case 5: idxGstRegIndex = X86_GREG_xBP; break;
5731 case 6: idxGstRegIndex = X86_GREG_xSI; break;
5732 case 7: idxGstRegIndex = X86_GREG_xDI; break;
5733 }
5734
5735 /* base */
5736 switch (uSibAndRspOffset & X86_SIB_BASE_MASK)
5737 {
5738 case 0: idxGstRegBase = X86_GREG_xAX; break;
5739 case 1: idxGstRegBase = X86_GREG_xCX; break;
5740 case 2: idxGstRegBase = X86_GREG_xDX; break;
5741 case 3: idxGstRegBase = X86_GREG_xBX; break;
5742 case 4:
5743 idxGstRegBase = X86_GREG_xSP;
5744 u32EffAddr += uSibAndRspOffset >> 8;
5745 break;
5746 case 5:
5747 if ((bRm & X86_MODRM_MOD_MASK) != 0)
5748 idxGstRegBase = X86_GREG_xBP;
5749 else
5750 {
5751 Assert(u32EffAddr == 0);
5752 u32EffAddr = u32Disp;
5753 }
5754 break;
5755 case 6: idxGstRegBase = X86_GREG_xSI; break;
5756 case 7: idxGstRegBase = X86_GREG_xDI; break;
5757 }
5758 break;
5759 }
5760 case 5: idxGstRegBase = X86_GREG_xBP; break;
5761 case 6: idxGstRegBase = X86_GREG_xSI; break;
5762 case 7: idxGstRegBase = X86_GREG_xDI; break;
5763 }
5764
5765 /*
5766 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
5767 * the start of the function.
5768 */
5769 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
5770 {
5771 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32EffAddr);
5772 return off;
5773 }
5774
5775 /*
5776 * Now emit code that calculates: idxRegRet = (uint32_t)(u32EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
5777 */
5778 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
5779 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
5780 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
5781 kIemNativeGstRegUse_ReadOnly);
5782 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
5783 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
5784 kIemNativeGstRegUse_ReadOnly);
5785
5786 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
5787 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
5788 {
5789 idxRegBase = idxRegIndex;
5790 idxRegIndex = UINT8_MAX;
5791 }
5792
5793#ifdef RT_ARCH_AMD64
5794 if (idxRegIndex == UINT8_MAX)
5795 {
5796 if (u32EffAddr == 0)
5797 {
5798 /* mov ret, base */
5799 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
5800 }
5801 else
5802 {
5803 /* lea ret32, [base64 + disp32] */
5804 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
5805 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
5806 if (idxRegRet >= 8 || idxRegBase >= 8)
5807 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
5808 pbCodeBuf[off++] = 0x8d;
5809 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
5810 if (idxRegBase != X86_GREG_x12 /*SIB*/)
5811 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
5812 else
5813 {
5814 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
5815 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
5816 }
5817 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
5818 if (bMod == X86_MOD_MEM4)
5819 {
5820 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
5821 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
5822 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
5823 }
5824 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5825 }
5826 }
5827 else
5828 {
5829 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
5830 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
5831 if (idxRegBase == UINT8_MAX)
5832 {
5833 /* lea ret32, [(index64 << cShiftIndex) + disp32] */
5834 if (idxRegRet >= 8 || idxRegIndex >= 8)
5835 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
5836 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
5837 pbCodeBuf[off++] = 0x8d;
5838 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
5839 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
5840 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
5841 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
5842 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
5843 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
5844 }
5845 else
5846 {
5847 /* lea ret32, [(index64 << cShiftIndex) + base64 (+ disp32)] */
5848 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
5849 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
5850 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
5851 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
5852 pbCodeBuf[off++] = 0x8d;
5853 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
5854 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
5855 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
5856 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
5857 if (bMod != X86_MOD_MEM0)
5858 {
5859 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
5860 if (bMod == X86_MOD_MEM4)
5861 {
5862 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
5863 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
5864 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
5865 }
5866 }
5867 }
5868 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5869 }
5870
5871#elif defined(RT_ARCH_ARM64)
5872 if (u32EffAddr == 0)
5873 {
5874 if (idxRegIndex == UINT8_MAX)
5875 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
5876 else if (idxRegBase == UINT8_MAX)
5877 {
5878 if (cShiftIndex == 0)
5879 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegIndex);
5880 else
5881 {
5882 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5883 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, false /*f64Bit*/);
5884 }
5885 }
5886 else
5887 {
5888 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5889 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
5890 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
5891 }
5892 }
5893 else
5894 {
5895 if ((int32_t)u32EffAddr < 4096 && (int32_t)u32EffAddr >= 0 && idxRegBase != UINT8_MAX)
5896 {
5897 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5898 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u32EffAddr, false /*f64Bit*/);
5899 }
5900 else if ((int32_t)u32EffAddr > -4096 && (int32_t)u32EffAddr < 0 && idxRegBase != UINT8_MAX)
5901 {
5902 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5903 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
5904 (uint32_t)-(int32_t)u32EffAddr, false /*f64Bit*/);
5905 }
5906 else
5907 {
5908 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, u32EffAddr);
5909 if (idxRegBase != UINT8_MAX)
5910 {
5911 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5912 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
5913 }
5914 }
5915 if (idxRegIndex != UINT8_MAX)
5916 {
5917 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5918 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
5919 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
5920 }
5921 }
5922
5923#else
5924# error "port me"
5925#endif
5926
5927 if (idxRegIndex != UINT8_MAX)
5928 iemNativeRegFreeTmp(pReNative, idxRegIndex);
5929 if (idxRegBase != UINT8_MAX)
5930 iemNativeRegFreeTmp(pReNative, idxRegBase);
5931 iemNativeVarRegisterRelease(pReNative, idxVarRet);
5932 return off;
5933}
5934
5935
5936#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
5937 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
5938 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
5939
5940#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_FSGS(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
5941 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
5942 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
5943
5944#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_ADDR32(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
5945 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
5946 a_u32Disp, a_cbImm, a_GCPtrEff, false /*f64Bit*/)
5947
5948/**
5949 * Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_64*.
5950 *
5951 * @returns New off.
5952 * @param pReNative .
5953 * @param off .
5954 * @param bRmEx The ModRM byte but with bit 3 set to REX.B and
5955 * bit 4 to REX.X. The two bits are part of the
5956 * REG sub-field, which isn't needed in this
5957 * function.
5958 * @param uSibAndRspOffset Two parts:
5959 * - The first 8 bits make up the SIB byte.
5960 * - The next 8 bits are the fixed RSP/ESP offset
5961 * in case of a pop [xSP].
5962 * @param u32Disp The displacement byte/word/dword, if any.
5963 * @param cbInstr The size of the fully decoded instruction. Used
5964 * for RIP relative addressing.
5965 * @param idxVarRet The result variable number.
5966 * @param f64Bit Whether to use a 64-bit or 32-bit address size
5967 * when calculating the address.
5968 *
5969 * @see iemOpHlpCalcRmEffAddrThreadedAddr64
5970 */
5971DECL_INLINE_THROW(uint32_t)
5972iemNativeEmitCalcRmEffAddrThreadedAddr64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bRmEx, uint32_t uSibAndRspOffset,
5973 uint32_t u32Disp, uint8_t cbInstr, uint8_t idxVarRet, bool f64Bit)
5974{
5975 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
5976
5977 /*
5978 * Special case the rip + disp32 form first.
5979 */
5980 if ((bRmEx & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
5981 {
5982#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5983 /* Need to take the current PC offset into account for the displacement, no need to flush here
5984 * as the PC is only accessed readonly and there is no branching or calling helpers involved. */
5985 u32Disp += pReNative->Core.offPc;
5986#endif
5987
5988 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
5989 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
5990 kIemNativeGstRegUse_ReadOnly);
5991#ifdef RT_ARCH_AMD64
5992 if (f64Bit)
5993 {
5994 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr;
5995 if ((int32_t)offFinalDisp == offFinalDisp)
5996 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)offFinalDisp);
5997 else
5998 {
5999 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp);
6000 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, cbInstr);
6001 }
6002 }
6003 else
6004 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp + cbInstr);
6005
6006#elif defined(RT_ARCH_ARM64)
6007 if (f64Bit)
6008 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
6009 (int64_t)(int32_t)u32Disp + cbInstr);
6010 else
6011 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
6012 (int32_t)u32Disp + cbInstr);
6013
6014#else
6015# error "Port me!"
6016#endif
6017 iemNativeRegFreeTmp(pReNative, idxRegPc);
6018 iemNativeVarRegisterRelease(pReNative, idxVarRet);
6019 return off;
6020 }
6021
6022 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
6023 int64_t i64EffAddr = 0;
6024 switch ((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
6025 {
6026 case 0: break;
6027 case 1: i64EffAddr = (int8_t)u32Disp; break;
6028 case 2: i64EffAddr = (int32_t)u32Disp; break;
6029 default: AssertFailed();
6030 }
6031
6032 /* Get the register (or SIB) value. */
6033 uint8_t idxGstRegBase = UINT8_MAX;
6034 uint8_t idxGstRegIndex = UINT8_MAX;
6035 uint8_t cShiftIndex = 0;
6036 if ((bRmEx & X86_MODRM_RM_MASK) != 4)
6037 idxGstRegBase = bRmEx & (X86_MODRM_RM_MASK | 0x8); /* bRmEx[bit 3] = REX.B */
6038 else /* SIB: */
6039 {
6040 /* index /w scaling . */
6041 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
6042 idxGstRegIndex = ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
6043 | ((bRmEx & 0x10) >> 1); /* bRmEx[bit 4] = REX.X */
6044 if (idxGstRegIndex == 4)
6045 {
6046 /* no index */
6047 cShiftIndex = 0;
6048 idxGstRegIndex = UINT8_MAX;
6049 }
6050
6051 /* base */
6052 idxGstRegBase = (uSibAndRspOffset & X86_SIB_BASE_MASK) | (bRmEx & 0x8); /* bRmEx[bit 3] = REX.B */
6053 if (idxGstRegBase == 4)
6054 {
6055 /* pop [rsp] hack */
6056 i64EffAddr += uSibAndRspOffset >> 8; /* (this is why i64EffAddr must be 64-bit) */
6057 }
6058 else if ( (idxGstRegBase & X86_SIB_BASE_MASK) == 5
6059 && (bRmEx & X86_MODRM_MOD_MASK) == 0)
6060 {
6061 /* mod=0 and base=5 -> disp32, no base reg. */
6062 Assert(i64EffAddr == 0);
6063 i64EffAddr = (int32_t)u32Disp;
6064 idxGstRegBase = UINT8_MAX;
6065 }
6066 }
6067
6068 /*
6069 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
6070 * the start of the function.
6071 */
6072 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
6073 {
6074 if (f64Bit)
6075 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint64_t)i64EffAddr);
6076 else
6077 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint32_t)i64EffAddr);
6078 return off;
6079 }
6080
6081 /*
6082 * Now emit code that calculates:
6083 * idxRegRet = (uint64_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
6084 * or if !f64Bit:
6085 * idxRegRet = (uint32_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
6086 */
6087 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
6088 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
6089 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
6090 kIemNativeGstRegUse_ReadOnly);
6091 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
6092 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
6093 kIemNativeGstRegUse_ReadOnly);
6094
6095 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
6096 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
6097 {
6098 idxRegBase = idxRegIndex;
6099 idxRegIndex = UINT8_MAX;
6100 }
6101
6102#ifdef RT_ARCH_AMD64
6103 uint8_t bFinalAdj;
6104 if (!f64Bit || (int32_t)i64EffAddr == i64EffAddr)
6105 bFinalAdj = 0; /* likely */
6106 else
6107 {
6108 /* pop [rsp] with a problematic disp32 value. Split out the
6109 RSP offset and add it separately afterwards (bFinalAdj). */
6110 /** @todo testcase: pop [rsp] with problematic disp32 (mod4). */
6111 Assert(idxGstRegBase == X86_GREG_xSP);
6112 Assert(((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK) == X86_MOD_MEM4);
6113 bFinalAdj = (uint8_t)(uSibAndRspOffset >> 8);
6114 Assert(bFinalAdj != 0);
6115 i64EffAddr -= bFinalAdj;
6116 Assert((int32_t)i64EffAddr == i64EffAddr);
6117 }
6118 uint32_t const u32EffAddr = (uint32_t)i64EffAddr;
6119//pReNative->pInstrBuf[off++] = 0xcc;
6120
6121 if (idxRegIndex == UINT8_MAX)
6122 {
6123 if (u32EffAddr == 0)
6124 {
6125 /* mov ret, base */
6126 if (f64Bit)
6127 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRet, idxRegBase);
6128 else
6129 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
6130 }
6131 else
6132 {
6133 /* lea ret, [base + disp32] */
6134 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
6135 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6136 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8)
6137 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6138 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
6139 | (f64Bit ? X86_OP_REX_W : 0);
6140 pbCodeBuf[off++] = 0x8d;
6141 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
6142 if (idxRegBase != X86_GREG_x12 /*SIB*/)
6143 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
6144 else
6145 {
6146 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6147 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
6148 }
6149 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6150 if (bMod == X86_MOD_MEM4)
6151 {
6152 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6153 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6154 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6155 }
6156 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6157 }
6158 }
6159 else
6160 {
6161 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
6162 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6163 if (idxRegBase == UINT8_MAX)
6164 {
6165 /* lea ret, [(index64 << cShiftIndex) + disp32] */
6166 if (f64Bit || idxRegRet >= 8 || idxRegIndex >= 8)
6167 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6168 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
6169 | (f64Bit ? X86_OP_REX_W : 0);
6170 pbCodeBuf[off++] = 0x8d;
6171 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
6172 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
6173 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6174 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6175 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6176 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6177 }
6178 else
6179 {
6180 /* lea ret, [(index64 << cShiftIndex) + base64 (+ disp32)] */
6181 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
6182 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6183 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
6184 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
6185 | (f64Bit ? X86_OP_REX_W : 0);
6186 pbCodeBuf[off++] = 0x8d;
6187 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
6188 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
6189 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6190 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
6191 if (bMod != X86_MOD_MEM0)
6192 {
6193 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6194 if (bMod == X86_MOD_MEM4)
6195 {
6196 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6197 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6198 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6199 }
6200 }
6201 }
6202 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6203 }
6204
6205 if (!bFinalAdj)
6206 { /* likely */ }
6207 else
6208 {
6209 Assert(f64Bit);
6210 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, bFinalAdj);
6211 }
6212
6213#elif defined(RT_ARCH_ARM64)
6214 if (i64EffAddr == 0)
6215 {
6216 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6217 if (idxRegIndex == UINT8_MAX)
6218 pu32CodeBuf[off++] = Armv8A64MkInstrMov(idxRegRet, idxRegBase, f64Bit);
6219 else if (idxRegBase != UINT8_MAX)
6220 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
6221 f64Bit, false /*fSetFlags*/, cShiftIndex);
6222 else
6223 {
6224 Assert(cShiftIndex != 0); /* See base = index swap above when shift is 0 and we have no base reg. */
6225 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, f64Bit);
6226 }
6227 }
6228 else
6229 {
6230 if (f64Bit)
6231 { /* likely */ }
6232 else
6233 i64EffAddr = (int32_t)i64EffAddr;
6234
6235 if (i64EffAddr < 4096 && i64EffAddr >= 0 && idxRegBase != UINT8_MAX)
6236 {
6237 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6238 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, i64EffAddr, f64Bit);
6239 }
6240 else if (i64EffAddr > -4096 && i64EffAddr < 0 && idxRegBase != UINT8_MAX)
6241 {
6242 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6243 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase, (uint32_t)-i64EffAddr, f64Bit);
6244 }
6245 else
6246 {
6247 if (f64Bit)
6248 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, i64EffAddr);
6249 else
6250 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, (uint32_t)i64EffAddr);
6251 if (idxRegBase != UINT8_MAX)
6252 {
6253 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6254 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, f64Bit);
6255 }
6256 }
6257 if (idxRegIndex != UINT8_MAX)
6258 {
6259 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6260 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
6261 f64Bit, false /*fSetFlags*/, cShiftIndex);
6262 }
6263 }
6264
6265#else
6266# error "port me"
6267#endif
6268
6269 if (idxRegIndex != UINT8_MAX)
6270 iemNativeRegFreeTmp(pReNative, idxRegIndex);
6271 if (idxRegBase != UINT8_MAX)
6272 iemNativeRegFreeTmp(pReNative, idxRegBase);
6273 iemNativeVarRegisterRelease(pReNative, idxVarRet);
6274 return off;
6275}
6276
6277
6278/*********************************************************************************************************************************
6279* Memory fetches and stores common *
6280*********************************************************************************************************************************/
6281
6282typedef enum IEMNATIVEMITMEMOP
6283{
6284 kIemNativeEmitMemOp_Store = 0,
6285 kIemNativeEmitMemOp_Fetch,
6286 kIemNativeEmitMemOp_Fetch_Zx_U16,
6287 kIemNativeEmitMemOp_Fetch_Zx_U32,
6288 kIemNativeEmitMemOp_Fetch_Zx_U64,
6289 kIemNativeEmitMemOp_Fetch_Sx_U16,
6290 kIemNativeEmitMemOp_Fetch_Sx_U32,
6291 kIemNativeEmitMemOp_Fetch_Sx_U64
6292} IEMNATIVEMITMEMOP;
6293
6294/** Emits code for IEM_MC_FETCH_MEM_U8/16/32/64 and IEM_MC_STORE_MEM_U8/16/32/64,
6295 * and IEM_MC_FETCH_MEM_FLAT_U8/16/32/64 and IEM_MC_STORE_MEM_FLAT_U8/16/32/64
6296 * (with iSegReg = UINT8_MAX). */
6297DECL_INLINE_THROW(uint32_t)
6298iemNativeEmitMemFetchStoreDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue, uint8_t iSegReg,
6299 uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAlignMaskAndCtl, IEMNATIVEMITMEMOP enmOp,
6300 uintptr_t pfnFunction, uint8_t idxInstr, uint8_t offDisp = 0)
6301{
6302 /*
6303 * Assert sanity.
6304 */
6305 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
6306 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
6307 Assert( enmOp != kIemNativeEmitMemOp_Store
6308 || pVarValue->enmKind == kIemNativeVarKind_Immediate
6309 || pVarValue->enmKind == kIemNativeVarKind_Stack);
6310 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
6311 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
6312 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
6313 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
6314 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
6315 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
6316#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6317 Assert( cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8
6318 || cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U));
6319#else
6320 Assert(cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8);
6321#endif
6322 Assert(!(fAlignMaskAndCtl & ~(UINT32_C(0xff) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE)));
6323 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
6324#ifdef VBOX_STRICT
6325 if (iSegReg == UINT8_MAX)
6326 {
6327 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
6328 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
6329 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
6330 switch (cbMem)
6331 {
6332 case 1:
6333 Assert( pfnFunction
6334 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU8
6335 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
6336 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
6337 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
6338 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
6339 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16
6340 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32
6341 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64
6342 : UINT64_C(0xc000b000a0009000) ));
6343 Assert(!fAlignMaskAndCtl);
6344 break;
6345 case 2:
6346 Assert( pfnFunction
6347 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU16
6348 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
6349 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
6350 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
6351 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32
6352 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64
6353 : UINT64_C(0xc000b000a0009000) ));
6354 Assert(fAlignMaskAndCtl <= 1);
6355 break;
6356 case 4:
6357 Assert( pfnFunction
6358 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU32
6359 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
6360 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
6361 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64
6362 : UINT64_C(0xc000b000a0009000) ));
6363 Assert(fAlignMaskAndCtl <= 3);
6364 break;
6365 case 8:
6366 Assert( pfnFunction
6367 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU64
6368 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU64
6369 : UINT64_C(0xc000b000a0009000) ));
6370 Assert(fAlignMaskAndCtl <= 7);
6371 break;
6372#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6373 case sizeof(RTUINT128U):
6374 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
6375 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128
6376 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse
6377 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc))
6378 || ( enmOp == kIemNativeEmitMemOp_Store
6379 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse
6380 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128NoAc)));
6381 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse
6382 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse
6383 ? (fAlignMaskAndCtl & (IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE)) && (uint8_t)fAlignMaskAndCtl == 15
6384 : fAlignMaskAndCtl <= 15);
6385 break;
6386 case sizeof(RTUINT256U):
6387 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
6388 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc
6389 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx))
6390 || ( enmOp == kIemNativeEmitMemOp_Store
6391 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256NoAc
6392 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx)));
6393 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx
6394 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx
6395 ? (fAlignMaskAndCtl & IEM_MEMMAP_F_ALIGN_GP) && (uint8_t)fAlignMaskAndCtl == 31
6396 : fAlignMaskAndCtl <= 31);
6397 break;
6398#endif
6399 }
6400 }
6401 else
6402 {
6403 Assert(iSegReg < 6);
6404 switch (cbMem)
6405 {
6406 case 1:
6407 Assert( pfnFunction
6408 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU8
6409 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU8
6410 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8
6411 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8
6412 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8
6413 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16
6414 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32
6415 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64
6416 : UINT64_C(0xc000b000a0009000) ));
6417 Assert(!fAlignMaskAndCtl);
6418 break;
6419 case 2:
6420 Assert( pfnFunction
6421 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU16
6422 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU16
6423 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16
6424 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16
6425 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32
6426 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64
6427 : UINT64_C(0xc000b000a0009000) ));
6428 Assert(fAlignMaskAndCtl <= 1);
6429 break;
6430 case 4:
6431 Assert( pfnFunction
6432 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU32
6433 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU32
6434 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32
6435 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64
6436 : UINT64_C(0xc000b000a0009000) ));
6437 Assert(fAlignMaskAndCtl <= 3);
6438 break;
6439 case 8:
6440 Assert( pfnFunction
6441 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU64
6442 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU64
6443 : UINT64_C(0xc000b000a0009000) ));
6444 Assert(fAlignMaskAndCtl <= 7);
6445 break;
6446#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6447 case sizeof(RTUINT128U):
6448 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
6449 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128
6450 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse
6451 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128NoAc))
6452 || ( enmOp == kIemNativeEmitMemOp_Store
6453 && ( pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse
6454 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128NoAc)));
6455 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse
6456 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse
6457 ? (fAlignMaskAndCtl & (IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE)) && (uint8_t)fAlignMaskAndCtl == 15
6458 : fAlignMaskAndCtl <= 15);
6459 break;
6460 case sizeof(RTUINT256U):
6461 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
6462 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256NoAc
6463 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx))
6464 || ( enmOp == kIemNativeEmitMemOp_Store
6465 && ( pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256NoAc
6466 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx)));
6467 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx
6468 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx
6469 ? (fAlignMaskAndCtl & IEM_MEMMAP_F_ALIGN_GP) && (uint8_t)fAlignMaskAndCtl == 31
6470 : fAlignMaskAndCtl <= 31);
6471 break;
6472#endif
6473 }
6474 }
6475#endif
6476
6477#ifdef VBOX_STRICT
6478 /*
6479 * Check that the fExec flags we've got make sense.
6480 */
6481 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
6482#endif
6483
6484 /*
6485 * To keep things simple we have to commit any pending writes first as we
6486 * may end up making calls.
6487 */
6488 /** @todo we could postpone this till we make the call and reload the
6489 * registers after returning from the call. Not sure if that's sensible or
6490 * not, though. */
6491#ifndef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6492 off = iemNativeRegFlushPendingWrites(pReNative, off);
6493#else
6494 /* The program counter is treated differently for now. */
6495 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc));
6496#endif
6497
6498#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6499 /*
6500 * Move/spill/flush stuff out of call-volatile registers.
6501 * This is the easy way out. We could contain this to the tlb-miss branch
6502 * by saving and restoring active stuff here.
6503 */
6504 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
6505#endif
6506
6507 /*
6508 * Define labels and allocate the result register (trying for the return
6509 * register if we can).
6510 */
6511 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
6512#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6513 uint8_t idxRegValueFetch = UINT8_MAX;
6514
6515 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
6516 idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
6517 : iemNativeVarSimdRegisterAcquire(pReNative, idxVarValue, &off);
6518 else
6519 idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
6520 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
6521 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
6522 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
6523#else
6524 uint8_t const idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
6525 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
6526 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
6527 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
6528#endif
6529 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem, offDisp);
6530
6531#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6532 uint8_t idxRegValueStore = UINT8_MAX;
6533
6534 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
6535 idxRegValueStore = !TlbState.fSkip
6536 && enmOp == kIemNativeEmitMemOp_Store
6537 && pVarValue->enmKind != kIemNativeVarKind_Immediate
6538 ? iemNativeVarSimdRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
6539 : UINT8_MAX;
6540 else
6541 idxRegValueStore = !TlbState.fSkip
6542 && enmOp == kIemNativeEmitMemOp_Store
6543 && pVarValue->enmKind != kIemNativeVarKind_Immediate
6544 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
6545 : UINT8_MAX;
6546
6547#else
6548 uint8_t const idxRegValueStore = !TlbState.fSkip
6549 && enmOp == kIemNativeEmitMemOp_Store
6550 && pVarValue->enmKind != kIemNativeVarKind_Immediate
6551 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
6552 : UINT8_MAX;
6553#endif
6554 uint32_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
6555 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
6556 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
6557 : UINT32_MAX;
6558
6559 /*
6560 * Jump to the TLB lookup code.
6561 */
6562 if (!TlbState.fSkip)
6563 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
6564
6565 /*
6566 * TlbMiss:
6567 *
6568 * Call helper to do the fetching.
6569 * We flush all guest register shadow copies here.
6570 */
6571 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
6572
6573#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6574 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6575#else
6576 RT_NOREF(idxInstr);
6577#endif
6578
6579#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6580 if (pReNative->Core.offPc)
6581 {
6582 /*
6583 * Update the program counter but restore it at the end of the TlbMiss branch.
6584 * This should allow delaying more program counter updates for the TlbLookup and hit paths
6585 * which are hopefully much more frequent, reducing the amount of memory accesses.
6586 */
6587 /* Allocate a temporary PC register. */
6588 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6589
6590 /* Perform the addition and store the result. */
6591 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
6592 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6593
6594 /* Free and flush the PC register. */
6595 iemNativeRegFreeTmp(pReNative, idxPcReg);
6596 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
6597 }
6598#endif
6599
6600#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6601 /* Save variables in volatile registers. */
6602 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
6603 | (idxRegMemResult != UINT8_MAX ? RT_BIT_32(idxRegMemResult) : 0)
6604 | (idxRegValueFetch != UINT8_MAX ? RT_BIT_32(idxRegValueFetch) : 0);
6605 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
6606#endif
6607
6608 /* IEMNATIVE_CALL_ARG2/3_GREG = uValue (idxVarValue) - if store */
6609 uint32_t fVolGregMask = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
6610#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6611 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
6612 {
6613 /*
6614 * For SIMD based variables we pass the reference on the stack for both fetches and stores.
6615 *
6616 * @note There was a register variable assigned to the variable for the TlbLookup case above
6617 * which must not be freed or the value loaded into the register will not be synced into the register
6618 * further down the road because the variable doesn't know it had a variable assigned.
6619 *
6620 * @note For loads it is not required to sync what is in the assigned register with the stack slot
6621 * as it will be overwritten anyway.
6622 */
6623 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
6624 off = iemNativeEmitLoadArgGregWithSimdVarAddrForMemAccess(pReNative, off, idxRegArgValue, idxVarValue,
6625 enmOp == kIemNativeEmitMemOp_Store /*fSyncRegWithStack*/);
6626 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
6627 }
6628 else
6629#endif
6630 if (enmOp == kIemNativeEmitMemOp_Store)
6631 {
6632 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
6633 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, idxRegArgValue, idxVarValue, 0 /*cbAppend*/,
6634#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6635 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
6636#else
6637 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
6638 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
6639#endif
6640 }
6641
6642 /* IEMNATIVE_CALL_ARG1_GREG = GCPtrMem */
6643 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarGCPtrMem, offDisp /*cbAppend*/,
6644#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6645 fVolGregMask);
6646#else
6647 fVolGregMask, true /*fSpilledVarsInvolatileRegs*/);
6648#endif
6649
6650 if (iSegReg != UINT8_MAX)
6651 {
6652 /* IEMNATIVE_CALL_ARG2_GREG = iSegReg */
6653 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
6654 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, iSegReg);
6655 }
6656
6657 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
6658 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6659
6660 /* Done setting up parameters, make the call. */
6661 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
6662
6663 /*
6664 * Put the result in the right register if this is a fetch.
6665 */
6666 if (enmOp != kIemNativeEmitMemOp_Store)
6667 {
6668#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6669 if ( cbMem == sizeof(RTUINT128U)
6670 || cbMem == sizeof(RTUINT256U))
6671 {
6672 Assert(enmOp == kIemNativeEmitMemOp_Fetch);
6673
6674 /* Sync the value on the stack with the host register assigned to the variable. */
6675 off = iemNativeEmitSimdVarSyncStackToRegister(pReNative, off, idxVarValue);
6676 }
6677 else
6678#endif
6679 {
6680 Assert(idxRegValueFetch == pVarValue->idxReg);
6681 if (idxRegValueFetch != IEMNATIVE_CALL_RET_GREG)
6682 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegValueFetch, IEMNATIVE_CALL_RET_GREG);
6683 }
6684 }
6685
6686#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6687 /* Restore variables and guest shadow registers to volatile registers. */
6688 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
6689 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
6690#endif
6691
6692#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6693 if (pReNative->Core.offPc)
6694 {
6695 /*
6696 * Time to restore the program counter to its original value.
6697 */
6698 /* Allocate a temporary PC register. */
6699 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
6700 kIemNativeGstRegUse_ForUpdate);
6701
6702 /* Restore the original value. */
6703 off = iemNativeEmitSubGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
6704 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6705
6706 /* Free and flush the PC register. */
6707 iemNativeRegFreeTmp(pReNative, idxPcReg);
6708 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
6709 }
6710#endif
6711
6712#ifdef IEMNATIVE_WITH_TLB_LOOKUP
6713 if (!TlbState.fSkip)
6714 {
6715 /* end of TlbMiss - Jump to the done label. */
6716 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
6717 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
6718
6719 /*
6720 * TlbLookup:
6721 */
6722 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMaskAndCtl,
6723 enmOp == kIemNativeEmitMemOp_Store ? IEM_ACCESS_TYPE_WRITE : IEM_ACCESS_TYPE_READ,
6724 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult, offDisp);
6725
6726 /*
6727 * Emit code to do the actual storing / fetching.
6728 */
6729 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
6730# ifdef IEM_WITH_TLB_STATISTICS
6731 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
6732 enmOp == kIemNativeEmitMemOp_Store
6733 ? RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForFetch)
6734 : RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStore));
6735# endif
6736 switch (enmOp)
6737 {
6738 case kIemNativeEmitMemOp_Store:
6739 if (pVarValue->enmKind != kIemNativeVarKind_Immediate)
6740 {
6741 switch (cbMem)
6742 {
6743 case 1:
6744 off = iemNativeEmitStoreGpr8ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
6745 break;
6746 case 2:
6747 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
6748 break;
6749 case 4:
6750 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
6751 break;
6752 case 8:
6753 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
6754 break;
6755#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6756 case sizeof(RTUINT128U):
6757 off = iemNativeEmitStoreVecRegByGprU128Ex(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
6758 break;
6759 case sizeof(RTUINT256U):
6760 off = iemNativeEmitStoreVecRegByGprU256Ex(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
6761 break;
6762#endif
6763 default:
6764 AssertFailed();
6765 }
6766 }
6767 else
6768 {
6769 switch (cbMem)
6770 {
6771 case 1:
6772 off = iemNativeEmitStoreImm8ByGprEx(pCodeBuf, off, (uint8_t)pVarValue->u.uValue,
6773 idxRegMemResult, TlbState.idxReg1);
6774 break;
6775 case 2:
6776 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
6777 idxRegMemResult, TlbState.idxReg1);
6778 break;
6779 case 4:
6780 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
6781 idxRegMemResult, TlbState.idxReg1);
6782 break;
6783 case 8:
6784 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue,
6785 idxRegMemResult, TlbState.idxReg1);
6786 break;
6787 default:
6788 AssertFailed();
6789 }
6790 }
6791 break;
6792
6793 case kIemNativeEmitMemOp_Fetch:
6794 case kIemNativeEmitMemOp_Fetch_Zx_U16:
6795 case kIemNativeEmitMemOp_Fetch_Zx_U32:
6796 case kIemNativeEmitMemOp_Fetch_Zx_U64:
6797 switch (cbMem)
6798 {
6799 case 1:
6800 off = iemNativeEmitLoadGprByGprU8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6801 break;
6802 case 2:
6803 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6804 break;
6805 case 4:
6806 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6807 break;
6808 case 8:
6809 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6810 break;
6811#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6812 case sizeof(RTUINT128U):
6813 /*
6814 * No need to sync back the register with the stack, this is done by the generic variable handling
6815 * code if there is a register assigned to a variable and the stack must be accessed.
6816 */
6817 off = iemNativeEmitLoadVecRegByGprU128Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6818 break;
6819 case sizeof(RTUINT256U):
6820 /*
6821 * No need to sync back the register with the stack, this is done by the generic variable handling
6822 * code if there is a register assigned to a variable and the stack must be accessed.
6823 */
6824 off = iemNativeEmitLoadVecRegByGprU256Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6825 break;
6826#endif
6827 default:
6828 AssertFailed();
6829 }
6830 break;
6831
6832 case kIemNativeEmitMemOp_Fetch_Sx_U16:
6833 Assert(cbMem == 1);
6834 off = iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6835 break;
6836
6837 case kIemNativeEmitMemOp_Fetch_Sx_U32:
6838 Assert(cbMem == 1 || cbMem == 2);
6839 if (cbMem == 1)
6840 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6841 else
6842 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6843 break;
6844
6845 case kIemNativeEmitMemOp_Fetch_Sx_U64:
6846 switch (cbMem)
6847 {
6848 case 1:
6849 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6850 break;
6851 case 2:
6852 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6853 break;
6854 case 4:
6855 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6856 break;
6857 default:
6858 AssertFailed();
6859 }
6860 break;
6861
6862 default:
6863 AssertFailed();
6864 }
6865
6866 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
6867
6868 /*
6869 * TlbDone:
6870 */
6871 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
6872
6873 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
6874
6875# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6876 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
6877 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
6878# endif
6879 }
6880#else
6881 RT_NOREF(fAlignMaskAndCtl, idxLabelTlbMiss);
6882#endif
6883
6884 if (idxRegValueFetch != UINT8_MAX || idxRegValueStore != UINT8_MAX)
6885 iemNativeVarRegisterRelease(pReNative, idxVarValue);
6886 return off;
6887}
6888
6889
6890
6891/*********************************************************************************************************************************
6892* Memory fetches (IEM_MEM_FETCH_XXX). *
6893*********************************************************************************************************************************/
6894
6895/* 8-bit segmented: */
6896#define IEM_MC_FETCH_MEM_U8(a_u8Dst, a_iSeg, a_GCPtrMem) \
6897 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, a_iSeg, a_GCPtrMem, \
6898 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch, \
6899 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
6900
6901#define IEM_MC_FETCH_MEM_U8_ZX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
6902 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
6903 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
6904 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
6905
6906#define IEM_MC_FETCH_MEM_U8_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
6907 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
6908 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
6909 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
6910
6911#define IEM_MC_FETCH_MEM_U8_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
6912 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
6913 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
6914 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
6915
6916#define IEM_MC_FETCH_MEM_U8_SX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
6917 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
6918 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
6919 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16, pCallEntry->idxInstr)
6920
6921#define IEM_MC_FETCH_MEM_U8_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
6922 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
6923 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
6924 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32, pCallEntry->idxInstr)
6925
6926#define IEM_MC_FETCH_MEM_U8_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
6927 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
6928 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
6929 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64, pCallEntry->idxInstr)
6930
6931/* 16-bit segmented: */
6932#define IEM_MC_FETCH_MEM_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
6933 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
6934 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
6935 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
6936
6937#define IEM_MC_FETCH_MEM_U16_DISP(a_u16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
6938 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
6939 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
6940 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr, a_offDisp)
6941
6942#define IEM_MC_FETCH_MEM_U16_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
6943 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
6944 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
6945 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
6946
6947#define IEM_MC_FETCH_MEM_U16_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
6948 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
6949 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
6950 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
6951
6952#define IEM_MC_FETCH_MEM_U16_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
6953 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
6954 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
6955 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
6956
6957#define IEM_MC_FETCH_MEM_U16_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
6958 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
6959 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
6960 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64, pCallEntry->idxInstr)
6961
6962
6963/* 32-bit segmented: */
6964#define IEM_MC_FETCH_MEM_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
6965 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
6966 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
6967 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
6968
6969#define IEM_MC_FETCH_MEM_U32_DISP(a_u32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
6970 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
6971 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
6972 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
6973
6974#define IEM_MC_FETCH_MEM_U32_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
6975 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
6976 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
6977 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
6978
6979#define IEM_MC_FETCH_MEM_U32_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
6980 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
6981 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
6982 (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64, pCallEntry->idxInstr)
6983
6984#define IEM_MC_FETCH_MEM_I16(a_i16Dst, a_iSeg, a_GCPtrMem) \
6985 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i16Dst, a_iSeg, a_GCPtrMem, \
6986 sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
6987 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
6988
6989#define IEM_MC_FETCH_MEM_I16_DISP(a_i16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
6990 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i16Dst, a_iSeg, a_GCPtrMem, \
6991 sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
6992 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr, a_offDisp)
6993
6994#define IEM_MC_FETCH_MEM_I32(a_i32Dst, a_iSeg, a_GCPtrMem) \
6995 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i32Dst, a_iSeg, a_GCPtrMem, \
6996 sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, \
6997 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
6998
6999#define IEM_MC_FETCH_MEM_I32_DISP(a_i32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
7000 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i32Dst, a_iSeg, a_GCPtrMem, \
7001 sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7002 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7003
7004#define IEM_MC_FETCH_MEM_I64(a_i64Dst, a_iSeg, a_GCPtrMem) \
7005 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i64Dst, a_iSeg, a_GCPtrMem, \
7006 sizeof(int64_t), sizeof(int64_t) - 1, kIemNativeEmitMemOp_Fetch, \
7007 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
7008
7009AssertCompileSize(RTFLOAT32U, sizeof(uint32_t));
7010#define IEM_MC_FETCH_MEM_R32(a_r32Dst, a_iSeg, a_GCPtrMem) \
7011 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r32Dst, a_iSeg, a_GCPtrMem, \
7012 sizeof(RTFLOAT32U), sizeof(RTFLOAT32U) - 1, kIemNativeEmitMemOp_Fetch, \
7013 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
7014
7015
7016/* 64-bit segmented: */
7017#define IEM_MC_FETCH_MEM_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7018 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
7019 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
7020 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
7021
7022AssertCompileSize(RTFLOAT64U, sizeof(uint64_t));
7023#define IEM_MC_FETCH_MEM_R64(a_r64Dst, a_iSeg, a_GCPtrMem) \
7024 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r64Dst, a_iSeg, a_GCPtrMem, \
7025 sizeof(RTFLOAT64U), sizeof(RTFLOAT64U) - 1, kIemNativeEmitMemOp_Fetch, \
7026 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
7027
7028
7029/* 8-bit flat: */
7030#define IEM_MC_FETCH_MEM_FLAT_U8(a_u8Dst, a_GCPtrMem) \
7031 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, UINT8_MAX, a_GCPtrMem, \
7032 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch, \
7033 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7034
7035#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U16(a_u16Dst, a_GCPtrMem) \
7036 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
7037 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
7038 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7039
7040#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U32(a_u32Dst, a_GCPtrMem) \
7041 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7042 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
7043 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7044
7045#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U64(a_u64Dst, a_GCPtrMem) \
7046 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7047 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7048 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7049
7050#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U16(a_u16Dst, a_GCPtrMem) \
7051 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
7052 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
7053 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16, pCallEntry->idxInstr)
7054
7055#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U32(a_u32Dst, a_GCPtrMem) \
7056 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7057 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7058 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32, pCallEntry->idxInstr)
7059
7060#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U64(a_u64Dst, a_GCPtrMem) \
7061 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7062 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7063 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64, pCallEntry->idxInstr)
7064
7065
7066/* 16-bit flat: */
7067#define IEM_MC_FETCH_MEM_FLAT_U16(a_u16Dst, a_GCPtrMem) \
7068 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
7069 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
7070 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
7071
7072#define IEM_MC_FETCH_MEM_FLAT_U16_DISP(a_u16Dst, a_GCPtrMem, a_offDisp) \
7073 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
7074 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
7075 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr, a_offDisp)
7076
7077#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U32(a_u32Dst, a_GCPtrMem) \
7078 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7079 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
7080 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
7081
7082#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U64(a_u64Dst, a_GCPtrMem) \
7083 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7084 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7085 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
7086
7087#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U32(a_u32Dst, a_GCPtrMem) \
7088 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7089 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7090 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7091
7092#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U64(a_u64Dst, a_GCPtrMem) \
7093 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7094 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7095 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64, pCallEntry->idxInstr)
7096
7097/* 32-bit flat: */
7098#define IEM_MC_FETCH_MEM_FLAT_U32(a_u32Dst, a_GCPtrMem) \
7099 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7100 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7101 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7102
7103#define IEM_MC_FETCH_MEM_FLAT_U32_DISP(a_u32Dst, a_GCPtrMem, a_offDisp) \
7104 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7105 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7106 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7107
7108#define IEM_MC_FETCH_MEM_FLAT_U32_ZX_U64(a_u64Dst, a_GCPtrMem) \
7109 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7110 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7111 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7112
7113#define IEM_MC_FETCH_MEM_FLAT_U32_SX_U64(a_u64Dst, a_GCPtrMem) \
7114 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7115 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7116 (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64, pCallEntry->idxInstr)
7117
7118#define IEM_MC_FETCH_MEM_FLAT_I16(a_i16Dst, a_GCPtrMem) \
7119 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i16Dst, UINT8_MAX, a_GCPtrMem, \
7120 sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7121 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7122
7123#define IEM_MC_FETCH_MEM_FLAT_I16_DISP(a_i16Dst, a_GCPtrMem, a_offDisp) \
7124 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i16Dst, UINT8_MAX, a_GCPtrMem, \
7125 sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7126 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr, a_offDisp)
7127
7128#define IEM_MC_FETCH_MEM_FLAT_I32(a_i32Dst, a_GCPtrMem) \
7129 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i32Dst, UINT8_MAX, a_GCPtrMem, \
7130 sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7131 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7132
7133#define IEM_MC_FETCH_MEM_FLAT_I32_DISP(a_i32Dst, a_GCPtrMem, a_offDisp) \
7134 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i32Dst, UINT8_MAX, a_GCPtrMem, \
7135 sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7136 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7137
7138#define IEM_MC_FETCH_MEM_FLAT_I64(a_i64Dst, a_GCPtrMem) \
7139 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i64Dst, UINT8_MAX, a_GCPtrMem, \
7140 sizeof(int64_t), sizeof(int64_t) - 1, kIemNativeEmitMemOp_Fetch, \
7141 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
7142
7143#define IEM_MC_FETCH_MEM_FLAT_R32(a_r32Dst, a_GCPtrMem) \
7144 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r32Dst, UINT8_MAX, a_GCPtrMem, \
7145 sizeof(RTFLOAT32U), sizeof(RTFLOAT32U) - 1, kIemNativeEmitMemOp_Fetch, \
7146 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7147
7148
7149/* 64-bit flat: */
7150#define IEM_MC_FETCH_MEM_FLAT_U64(a_u64Dst, a_GCPtrMem) \
7151 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7152 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
7153 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
7154
7155#define IEM_MC_FETCH_MEM_FLAT_R64(a_r64Dst, a_GCPtrMem) \
7156 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r64Dst, UINT8_MAX, a_GCPtrMem, \
7157 sizeof(RTFLOAT64U), sizeof(RTFLOAT64U) - 1, kIemNativeEmitMemOp_Fetch, \
7158 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
7159
7160#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7161/* 128-bit segmented: */
7162#define IEM_MC_FETCH_MEM_U128(a_u128Dst, a_iSeg, a_GCPtrMem) \
7163 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
7164 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
7165 (uintptr_t)iemNativeHlpMemFetchDataU128, pCallEntry->idxInstr)
7166
7167#define IEM_MC_FETCH_MEM_U128_ALIGN_SSE(a_u128Dst, a_iSeg, a_GCPtrMem) \
7168 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
7169 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7170 kIemNativeEmitMemOp_Fetch, \
7171 (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse, pCallEntry->idxInstr)
7172
7173AssertCompileSize(X86XMMREG, sizeof(RTUINT128U));
7174#define IEM_MC_FETCH_MEM_XMM_ALIGN_SSE(a_uXmmDst, a_iSeg, a_GCPtrMem) \
7175 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uXmmDst, a_iSeg, a_GCPtrMem, sizeof(X86XMMREG), \
7176 (sizeof(X86XMMREG) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7177 kIemNativeEmitMemOp_Fetch, \
7178 (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse, pCallEntry->idxInstr)
7179
7180#define IEM_MC_FETCH_MEM_U128_NO_AC(a_u128Dst, a_iSeg, a_GCPtrMem) \
7181 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
7182 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
7183 (uintptr_t)iemNativeHlpMemFetchDataU128NoAc, pCallEntry->idxInstr)
7184
7185#define IEM_MC_FETCH_MEM_XMM_NO_AC(a_u128Dst, a_iSeg, a_GCPtrMem) \
7186 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
7187 sizeof(X86XMMREG), sizeof(X86XMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
7188 (uintptr_t)iemNativeHlpMemFetchDataU128NoAc, pCallEntry->idxInstr)
7189
7190
7191/* 128-bit flat: */
7192#define IEM_MC_FETCH_MEM_FLAT_U128(a_u128Dst, a_GCPtrMem) \
7193 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, \
7194 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
7195 (uintptr_t)iemNativeHlpMemFlatFetchDataU128, pCallEntry->idxInstr)
7196
7197#define IEM_MC_FETCH_MEM_FLAT_U128_ALIGN_SSE(a_u128Dst, a_GCPtrMem) \
7198 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
7199 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7200 kIemNativeEmitMemOp_Fetch, \
7201 (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse, pCallEntry->idxInstr)
7202
7203#define IEM_MC_FETCH_MEM_FLAT_XMM_ALIGN_SSE(a_uXmmDst, a_GCPtrMem) \
7204 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uXmmDst, UINT8_MAX, a_GCPtrMem, sizeof(X86XMMREG), \
7205 (sizeof(X86XMMREG) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7206 kIemNativeEmitMemOp_Fetch, \
7207 (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse, pCallEntry->idxInstr)
7208
7209#define IEM_MC_FETCH_MEM_FLAT_U128_NO_AC(a_u128Dst, a_GCPtrMem) \
7210 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, \
7211 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
7212 (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc, pCallEntry->idxInstr)
7213
7214#define IEM_MC_FETCH_MEM_FLAT_XMM_NO_AC(a_uXmmDst, a_GCPtrMem) \
7215 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uXmmDst, UINT8_MAX, a_GCPtrMem, \
7216 sizeof(X86XMMREG), sizeof(X86XMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
7217 (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc, pCallEntry->idxInstr)
7218
7219/* 256-bit segmented: */
7220#define IEM_MC_FETCH_MEM_U256(a_u256Dst, a_iSeg, a_GCPtrMem) \
7221 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
7222 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
7223 (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
7224
7225#define IEM_MC_FETCH_MEM_U256_NO_AC(a_u256Dst, a_iSeg, a_GCPtrMem) \
7226 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
7227 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
7228 (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
7229
7230#define IEM_MC_FETCH_MEM_U256_ALIGN_AVX(a_u256Dst, a_iSeg, a_GCPtrMem) \
7231 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, sizeof(RTUINT256U), \
7232 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, kIemNativeEmitMemOp_Fetch, \
7233 (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx, pCallEntry->idxInstr)
7234
7235#define IEM_MC_FETCH_MEM_YMM_NO_AC(a_u256Dst, a_iSeg, a_GCPtrMem) \
7236 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
7237 sizeof(X86YMMREG), sizeof(X86YMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
7238 (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
7239
7240
7241/* 256-bit flat: */
7242#define IEM_MC_FETCH_MEM_FLAT_U256(a_u256Dst, a_GCPtrMem) \
7243 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, \
7244 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
7245 (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
7246
7247#define IEM_MC_FETCH_MEM_FLAT_U256_NO_AC(a_u256Dst, a_GCPtrMem) \
7248 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, \
7249 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
7250 (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
7251
7252#define IEM_MC_FETCH_MEM_FLAT_U256_ALIGN_AVX(a_u256Dst, a_GCPtrMem) \
7253 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT256U), \
7254 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, kIemNativeEmitMemOp_Fetch, \
7255 (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx, pCallEntry->idxInstr)
7256
7257#define IEM_MC_FETCH_MEM_FLAT_YMM_NO_AC(a_uYmmDst, a_GCPtrMem) \
7258 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uYmmDst, UINT8_MAX, a_GCPtrMem, \
7259 sizeof(X86YMMREG), sizeof(X86YMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
7260 (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
7261
7262#endif
7263
7264
7265/*********************************************************************************************************************************
7266* Memory stores (IEM_MEM_STORE_XXX). *
7267*********************************************************************************************************************************/
7268
7269#define IEM_MC_STORE_MEM_U8(a_iSeg, a_GCPtrMem, a_u8Value) \
7270 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, a_iSeg, a_GCPtrMem, \
7271 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Store, \
7272 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
7273
7274#define IEM_MC_STORE_MEM_U16(a_iSeg, a_GCPtrMem, a_u16Value) \
7275 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, a_iSeg, a_GCPtrMem, \
7276 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
7277 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
7278
7279#define IEM_MC_STORE_MEM_U32(a_iSeg, a_GCPtrMem, a_u32Value) \
7280 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, a_iSeg, a_GCPtrMem, \
7281 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
7282 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
7283
7284#define IEM_MC_STORE_MEM_U64(a_iSeg, a_GCPtrMem, a_u64Value) \
7285 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, a_iSeg, a_GCPtrMem, \
7286 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
7287 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
7288
7289
7290#define IEM_MC_STORE_MEM_FLAT_U8(a_GCPtrMem, a_u8Value) \
7291 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, UINT8_MAX, a_GCPtrMem, \
7292 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Store, \
7293 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
7294
7295#define IEM_MC_STORE_MEM_FLAT_U16(a_GCPtrMem, a_u16Value) \
7296 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, UINT8_MAX, a_GCPtrMem, \
7297 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
7298 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
7299
7300#define IEM_MC_STORE_MEM_FLAT_U32(a_GCPtrMem, a_u32Value) \
7301 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, UINT8_MAX, a_GCPtrMem, \
7302 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
7303 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
7304
7305#define IEM_MC_STORE_MEM_FLAT_U64(a_GCPtrMem, a_u64Value) \
7306 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, UINT8_MAX, a_GCPtrMem, \
7307 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
7308 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
7309
7310
7311#define IEM_MC_STORE_MEM_U8_CONST(a_iSeg, a_GCPtrMem, a_u8ConstValue) \
7312 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
7313 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
7314
7315#define IEM_MC_STORE_MEM_U16_CONST(a_iSeg, a_GCPtrMem, a_u16ConstValue) \
7316 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
7317 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
7318
7319#define IEM_MC_STORE_MEM_U32_CONST(a_iSeg, a_GCPtrMem, a_u32ConstValue) \
7320 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
7321 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
7322
7323#define IEM_MC_STORE_MEM_U64_CONST(a_iSeg, a_GCPtrMem, a_u64ConstValue) \
7324 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
7325 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
7326
7327
7328#define IEM_MC_STORE_MEM_FLAT_U8_CONST(a_GCPtrMem, a_u8ConstValue) \
7329 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
7330 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
7331
7332#define IEM_MC_STORE_MEM_FLAT_U16_CONST(a_GCPtrMem, a_u16ConstValue) \
7333 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
7334 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
7335
7336#define IEM_MC_STORE_MEM_FLAT_U32_CONST(a_GCPtrMem, a_u32ConstValue) \
7337 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
7338 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
7339
7340#define IEM_MC_STORE_MEM_FLAT_U64_CONST(a_GCPtrMem, a_u64ConstValue) \
7341 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
7342 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
7343
7344/** Emits code for IEM_MC_STORE_MEM_U8/16/32/64_CONST and
7345 * IEM_MC_STORE_MEM_FLAT_U8/16/32/64_CONST (with iSegReg = UINT8_MAX). */
7346DECL_INLINE_THROW(uint32_t)
7347iemNativeEmitMemStoreConstDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uValueConst, uint8_t iSegReg,
7348 uint8_t idxVarGCPtrMem, uint8_t cbMem, uintptr_t pfnFunction, uint8_t idxInstr)
7349{
7350 /*
7351 * Create a temporary const variable and call iemNativeEmitMemFetchStoreDataCommon
7352 * to do the grunt work.
7353 */
7354 uint8_t const idxVarConstValue = iemNativeVarAllocConst(pReNative, cbMem, uValueConst);
7355 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, idxVarConstValue, iSegReg, idxVarGCPtrMem,
7356 cbMem, cbMem - 1, kIemNativeEmitMemOp_Store,
7357 pfnFunction, idxInstr);
7358 iemNativeVarFreeLocal(pReNative, idxVarConstValue);
7359 return off;
7360}
7361
7362
7363#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7364# define IEM_MC_STORE_MEM_U128_ALIGN_SSE(a_iSeg, a_GCPtrMem, a_u128Value) \
7365 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
7366 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7367 kIemNativeEmitMemOp_Store, \
7368 (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse, pCallEntry->idxInstr)
7369
7370# define IEM_MC_STORE_MEM_U128_NO_AC(a_iSeg, a_GCPtrMem, a_u128Value) \
7371 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, a_iSeg, a_GCPtrMem, \
7372 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
7373 (uintptr_t)iemNativeHlpMemStoreDataU128NoAc, pCallEntry->idxInstr)
7374
7375# define IEM_MC_STORE_MEM_U256_NO_AC(a_iSeg, a_GCPtrMem, a_u256Value) \
7376 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, a_iSeg, a_GCPtrMem, \
7377 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
7378 (uintptr_t)iemNativeHlpMemStoreDataU256NoAc, pCallEntry->idxInstr)
7379
7380# define IEM_MC_STORE_MEM_U256_ALIGN_AVX(a_iSeg, a_GCPtrMem, a_u256Value) \
7381 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, a_iSeg, a_GCPtrMem, sizeof(RTUINT256U), \
7382 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, kIemNativeEmitMemOp_Store, \
7383 (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx, pCallEntry->idxInstr)
7384
7385
7386# define IEM_MC_STORE_MEM_FLAT_U128_ALIGN_SSE(a_GCPtrMem, a_u128Value) \
7387 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
7388 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7389 kIemNativeEmitMemOp_Store, \
7390 (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse, pCallEntry->idxInstr)
7391
7392# define IEM_MC_STORE_MEM_FLAT_U128_NO_AC(a_GCPtrMem, a_u128Value) \
7393 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, UINT8_MAX, a_GCPtrMem, \
7394 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
7395 (uintptr_t)iemNativeHlpMemFlatStoreDataU128NoAc, pCallEntry->idxInstr)
7396
7397# define IEM_MC_STORE_MEM_FLAT_U256_NO_AC(a_GCPtrMem, a_u256Value) \
7398 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, UINT8_MAX, a_GCPtrMem, \
7399 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
7400 (uintptr_t)iemNativeHlpMemFlatStoreDataU256NoAc, pCallEntry->idxInstr)
7401
7402# define IEM_MC_STORE_MEM_FLAT_U256_ALIGN_AVX(a_GCPtrMem, a_u256Value) \
7403 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT256U), \
7404 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, kIemNativeEmitMemOp_Store, \
7405 (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx, pCallEntry->idxInstr)
7406#endif
7407
7408
7409
7410/*********************************************************************************************************************************
7411* Stack Accesses. *
7412*********************************************************************************************************************************/
7413/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, fSReg, 0) */
7414#define IEM_MC_PUSH_U16(a_u16Value) \
7415 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
7416 (uintptr_t)iemNativeHlpStackStoreU16, pCallEntry->idxInstr)
7417#define IEM_MC_PUSH_U32(a_u32Value) \
7418 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
7419 (uintptr_t)iemNativeHlpStackStoreU32, pCallEntry->idxInstr)
7420#define IEM_MC_PUSH_U32_SREG(a_uSegVal) \
7421 off = iemNativeEmitStackPush(pReNative, off, a_uSegVal, RT_MAKE_U32_FROM_U8(32, 0, 1, 0), \
7422 (uintptr_t)iemNativeHlpStackStoreU32SReg, pCallEntry->idxInstr)
7423#define IEM_MC_PUSH_U64(a_u64Value) \
7424 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
7425 (uintptr_t)iemNativeHlpStackStoreU64, pCallEntry->idxInstr)
7426
7427#define IEM_MC_FLAT32_PUSH_U16(a_u16Value) \
7428 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
7429 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
7430#define IEM_MC_FLAT32_PUSH_U32(a_u32Value) \
7431 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
7432 (uintptr_t)iemNativeHlpStackFlatStoreU32, pCallEntry->idxInstr)
7433#define IEM_MC_FLAT32_PUSH_U32_SREG(a_u32Value) \
7434 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 1, 0), \
7435 (uintptr_t)iemNativeHlpStackFlatStoreU32SReg, pCallEntry->idxInstr)
7436
7437#define IEM_MC_FLAT64_PUSH_U16(a_u16Value) \
7438 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
7439 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
7440#define IEM_MC_FLAT64_PUSH_U64(a_u64Value) \
7441 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
7442 (uintptr_t)iemNativeHlpStackFlatStoreU64, pCallEntry->idxInstr)
7443
7444
7445/** IEM_MC[|_FLAT32|_FLAT64]_PUSH_U16/32/32_SREG/64 */
7446DECL_INLINE_THROW(uint32_t)
7447iemNativeEmitStackPush(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue,
7448 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
7449{
7450 /*
7451 * Assert sanity.
7452 */
7453 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
7454 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
7455#ifdef VBOX_STRICT
7456 if (RT_BYTE2(cBitsVarAndFlat) != 0)
7457 {
7458 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
7459 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
7460 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
7461 Assert( pfnFunction
7462 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
7463 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
7464 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 1, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32SReg
7465 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
7466 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
7467 : UINT64_C(0xc000b000a0009000) ));
7468 }
7469 else
7470 Assert( pfnFunction
7471 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
7472 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
7473 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 1, 0) ? (uintptr_t)iemNativeHlpStackStoreU32SReg
7474 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
7475 : UINT64_C(0xc000b000a0009000) ));
7476#endif
7477
7478#ifdef VBOX_STRICT
7479 /*
7480 * Check that the fExec flags we've got make sense.
7481 */
7482 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
7483#endif
7484
7485 /*
7486 * To keep things simple we have to commit any pending writes first as we
7487 * may end up making calls.
7488 */
7489 /** @todo we could postpone this till we make the call and reload the
7490 * registers after returning from the call. Not sure if that's sensible or
7491 * not, though. */
7492 off = iemNativeRegFlushPendingWrites(pReNative, off);
7493
7494 /*
7495 * First we calculate the new RSP and the effective stack pointer value.
7496 * For 64-bit mode and flat 32-bit these two are the same.
7497 * (Code structure is very similar to that of PUSH)
7498 */
7499 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
7500 bool const fIsSegReg = RT_BYTE3(cBitsVarAndFlat) != 0;
7501 bool const fIsIntelSeg = fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
7502 uint8_t const cbMemAccess = !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
7503 ? cbMem : sizeof(uint16_t);
7504 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
7505 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
7506 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
7507 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
7508 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
7509 if (cBitsFlat != 0)
7510 {
7511 Assert(idxRegEffSp == idxRegRsp);
7512 Assert(cBitsFlat == 32 || cBitsFlat == 64);
7513 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
7514 if (cBitsFlat == 64)
7515 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
7516 else
7517 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
7518 }
7519 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
7520 {
7521 Assert(idxRegEffSp != idxRegRsp);
7522 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
7523 kIemNativeGstRegUse_ReadOnly);
7524#ifdef RT_ARCH_AMD64
7525 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
7526#else
7527 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
7528#endif
7529 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
7530 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
7531 offFixupJumpToUseOtherBitSp = off;
7532 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
7533 {
7534 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
7535 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
7536 }
7537 else
7538 {
7539 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
7540 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
7541 }
7542 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7543 }
7544 /* SpUpdateEnd: */
7545 uint32_t const offLabelSpUpdateEnd = off;
7546
7547 /*
7548 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
7549 * we're skipping lookup).
7550 */
7551 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
7552 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
7553 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
7554 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
7555 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
7556 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
7557 : UINT32_MAX;
7558 uint8_t const idxRegValue = !TlbState.fSkip
7559 && pVarValue->enmKind != kIemNativeVarKind_Immediate
7560 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/,
7561 IEMNATIVE_CALL_ARG2_GREG /*idxRegPref*/)
7562 : UINT8_MAX;
7563 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
7564
7565
7566 if (!TlbState.fSkip)
7567 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
7568 else
7569 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
7570
7571 /*
7572 * Use16BitSp:
7573 */
7574 if (cBitsFlat == 0)
7575 {
7576#ifdef RT_ARCH_AMD64
7577 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
7578#else
7579 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
7580#endif
7581 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
7582 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
7583 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
7584 else
7585 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
7586 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
7587 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7588 }
7589
7590 /*
7591 * TlbMiss:
7592 *
7593 * Call helper to do the pushing.
7594 */
7595 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
7596
7597#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7598 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7599#else
7600 RT_NOREF(idxInstr);
7601#endif
7602
7603 /* Save variables in volatile registers. */
7604 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
7605 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
7606 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
7607 | (idxRegValue < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegValue) : 0);
7608 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
7609
7610 if ( idxRegValue == IEMNATIVE_CALL_ARG1_GREG
7611 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
7612 {
7613 /* Swap them using ARG0 as temp register: */
7614 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
7615 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
7616 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
7617 }
7618 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
7619 {
7620 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue (first!) */
7621 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue,
7622 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
7623
7624 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
7625 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
7626 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
7627 }
7628 else
7629 {
7630 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
7631 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
7632
7633 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue */
7634 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue, 0 /*offAddend*/,
7635 IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG));
7636 }
7637
7638 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
7639 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7640
7641 /* Done setting up parameters, make the call. */
7642 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
7643
7644 /* Restore variables and guest shadow registers to volatile registers. */
7645 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
7646 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
7647
7648#ifdef IEMNATIVE_WITH_TLB_LOOKUP
7649 if (!TlbState.fSkip)
7650 {
7651 /* end of TlbMiss - Jump to the done label. */
7652 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
7653 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
7654
7655 /*
7656 * TlbLookup:
7657 */
7658 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1,
7659 IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
7660
7661 /*
7662 * Emit code to do the actual storing / fetching.
7663 */
7664 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
7665# ifdef IEM_WITH_TLB_STATISTICS
7666 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
7667 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
7668# endif
7669 if (idxRegValue != UINT8_MAX)
7670 {
7671 switch (cbMemAccess)
7672 {
7673 case 2:
7674 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
7675 break;
7676 case 4:
7677 if (!fIsIntelSeg)
7678 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
7679 else
7680 {
7681 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
7682 PUSH FS in real mode, so we have to try emulate that here.
7683 We borrow the now unused idxReg1 from the TLB lookup code here. */
7684 uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
7685 kIemNativeGstReg_EFlags);
7686 if (idxRegEfl != UINT8_MAX)
7687 {
7688#ifdef ARCH_AMD64
7689 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
7690 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
7691 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
7692#else
7693 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
7694 off, TlbState.idxReg1, idxRegEfl,
7695 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
7696#endif
7697 iemNativeRegFreeTmp(pReNative, idxRegEfl);
7698 }
7699 else
7700 {
7701 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
7702 RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
7703 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
7704 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
7705 }
7706 /* ASSUMES the upper half of idxRegValue is ZERO. */
7707 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegValue);
7708 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
7709 }
7710 break;
7711 case 8:
7712 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
7713 break;
7714 default:
7715 AssertFailed();
7716 }
7717 }
7718 else
7719 {
7720 switch (cbMemAccess)
7721 {
7722 case 2:
7723 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
7724 idxRegMemResult, TlbState.idxReg1);
7725 break;
7726 case 4:
7727 Assert(!fIsSegReg);
7728 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
7729 idxRegMemResult, TlbState.idxReg1);
7730 break;
7731 case 8:
7732 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue, idxRegMemResult, TlbState.idxReg1);
7733 break;
7734 default:
7735 AssertFailed();
7736 }
7737 }
7738
7739 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
7740 TlbState.freeRegsAndReleaseVars(pReNative);
7741
7742 /*
7743 * TlbDone:
7744 *
7745 * Commit the new RSP value.
7746 */
7747 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
7748 }
7749#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
7750
7751#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
7752 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
7753#endif
7754 iemNativeRegFreeTmp(pReNative, idxRegRsp);
7755 if (idxRegEffSp != idxRegRsp)
7756 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
7757
7758 /* The value variable is implictly flushed. */
7759 if (idxRegValue != UINT8_MAX)
7760 iemNativeVarRegisterRelease(pReNative, idxVarValue);
7761 iemNativeVarFreeLocal(pReNative, idxVarValue);
7762
7763 return off;
7764}
7765
7766
7767
7768/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, 0, 0) */
7769#define IEM_MC_POP_GREG_U16(a_iGReg) \
7770 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
7771 (uintptr_t)iemNativeHlpStackFetchU16, pCallEntry->idxInstr)
7772#define IEM_MC_POP_GREG_U32(a_iGReg) \
7773 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
7774 (uintptr_t)iemNativeHlpStackFetchU32, pCallEntry->idxInstr)
7775#define IEM_MC_POP_GREG_U64(a_iGReg) \
7776 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
7777 (uintptr_t)iemNativeHlpStackFetchU64, pCallEntry->idxInstr)
7778
7779#define IEM_MC_FLAT32_POP_GREG_U16(a_iGReg) \
7780 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
7781 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
7782#define IEM_MC_FLAT32_POP_GREG_U32(a_iGReg) \
7783 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
7784 (uintptr_t)iemNativeHlpStackFlatFetchU32, pCallEntry->idxInstr)
7785
7786#define IEM_MC_FLAT64_POP_GREG_U16(a_iGReg) \
7787 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
7788 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
7789#define IEM_MC_FLAT64_POP_GREG_U64(a_iGReg) \
7790 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
7791 (uintptr_t)iemNativeHlpStackFlatFetchU64, pCallEntry->idxInstr)
7792
7793
7794DECL_FORCE_INLINE_THROW(uint32_t)
7795iemNativeEmitStackPopUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
7796 uint8_t idxRegTmp)
7797{
7798 /* Use16BitSp: */
7799#ifdef RT_ARCH_AMD64
7800 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
7801 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
7802 RT_NOREF(idxRegTmp);
7803#else
7804 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
7805 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
7806 /* add tmp, regrsp, #cbMem */
7807 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbMem, false /*f64Bit*/);
7808 /* and tmp, tmp, #0xffff */
7809 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
7810 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
7811 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
7812 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
7813#endif
7814 return off;
7815}
7816
7817
7818DECL_FORCE_INLINE(uint32_t)
7819iemNativeEmitStackPopUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
7820{
7821 /* Use32BitSp: */
7822 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
7823 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
7824 return off;
7825}
7826
7827
7828/** IEM_MC[|_FLAT32|_FLAT64]_POP_GREG_U16/32/64 */
7829DECL_INLINE_THROW(uint32_t)
7830iemNativeEmitStackPopGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGReg,
7831 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
7832{
7833 /*
7834 * Assert sanity.
7835 */
7836 Assert(idxGReg < 16);
7837#ifdef VBOX_STRICT
7838 if (RT_BYTE2(cBitsVarAndFlat) != 0)
7839 {
7840 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
7841 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
7842 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
7843 Assert( pfnFunction
7844 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
7845 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU32
7846 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
7847 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU64
7848 : UINT64_C(0xc000b000a0009000) ));
7849 }
7850 else
7851 Assert( pfnFunction
7852 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU16
7853 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU32
7854 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU64
7855 : UINT64_C(0xc000b000a0009000) ));
7856#endif
7857
7858#ifdef VBOX_STRICT
7859 /*
7860 * Check that the fExec flags we've got make sense.
7861 */
7862 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
7863#endif
7864
7865 /*
7866 * To keep things simple we have to commit any pending writes first as we
7867 * may end up making calls.
7868 */
7869 off = iemNativeRegFlushPendingWrites(pReNative, off);
7870
7871 /*
7872 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
7873 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
7874 * directly as the effective stack pointer.
7875 * (Code structure is very similar to that of PUSH)
7876 */
7877 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
7878 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
7879 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
7880 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
7881 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
7882 /** @todo can do a better job picking the register here. For cbMem >= 4 this
7883 * will be the resulting register value. */
7884 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
7885
7886 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
7887 if (cBitsFlat != 0)
7888 {
7889 Assert(idxRegEffSp == idxRegRsp);
7890 Assert(cBitsFlat == 32 || cBitsFlat == 64);
7891 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
7892 }
7893 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
7894 {
7895 Assert(idxRegEffSp != idxRegRsp);
7896 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
7897 kIemNativeGstRegUse_ReadOnly);
7898#ifdef RT_ARCH_AMD64
7899 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
7900#else
7901 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
7902#endif
7903 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
7904 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
7905 offFixupJumpToUseOtherBitSp = off;
7906 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
7907 {
7908/** @todo can skip idxRegRsp updating when popping ESP. */
7909 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
7910 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
7911 }
7912 else
7913 {
7914 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
7915 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
7916 }
7917 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7918 }
7919 /* SpUpdateEnd: */
7920 uint32_t const offLabelSpUpdateEnd = off;
7921
7922 /*
7923 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
7924 * we're skipping lookup).
7925 */
7926 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
7927 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
7928 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
7929 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
7930 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
7931 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
7932 : UINT32_MAX;
7933
7934 if (!TlbState.fSkip)
7935 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
7936 else
7937 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
7938
7939 /*
7940 * Use16BitSp:
7941 */
7942 if (cBitsFlat == 0)
7943 {
7944#ifdef RT_ARCH_AMD64
7945 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
7946#else
7947 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
7948#endif
7949 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
7950 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
7951 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
7952 else
7953 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
7954 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
7955 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7956 }
7957
7958 /*
7959 * TlbMiss:
7960 *
7961 * Call helper to do the pushing.
7962 */
7963 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
7964
7965#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7966 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7967#else
7968 RT_NOREF(idxInstr);
7969#endif
7970
7971 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
7972 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
7973 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
7974 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
7975
7976
7977 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
7978 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
7979 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
7980
7981 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
7982 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7983
7984 /* Done setting up parameters, make the call. */
7985 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
7986
7987 /* Move the return register content to idxRegMemResult. */
7988 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
7989 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
7990
7991 /* Restore variables and guest shadow registers to volatile registers. */
7992 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
7993 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
7994
7995#ifdef IEMNATIVE_WITH_TLB_LOOKUP
7996 if (!TlbState.fSkip)
7997 {
7998 /* end of TlbMiss - Jump to the done label. */
7999 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
8000 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
8001
8002 /*
8003 * TlbLookup:
8004 */
8005 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
8006 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
8007
8008 /*
8009 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
8010 */
8011 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8012# ifdef IEM_WITH_TLB_STATISTICS
8013 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
8014 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
8015# endif
8016 switch (cbMem)
8017 {
8018 case 2:
8019 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
8020 break;
8021 case 4:
8022 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
8023 break;
8024 case 8:
8025 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
8026 break;
8027 default:
8028 AssertFailed();
8029 }
8030
8031 TlbState.freeRegsAndReleaseVars(pReNative);
8032
8033 /*
8034 * TlbDone:
8035 *
8036 * Set the new RSP value (FLAT accesses needs to calculate it first) and
8037 * commit the popped register value.
8038 */
8039 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
8040 }
8041#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
8042
8043 if (idxGReg != X86_GREG_xSP)
8044 {
8045 /* Set the register. */
8046 if (cbMem >= sizeof(uint32_t))
8047 {
8048#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
8049 AssertMsg( pReNative->idxCurCall == 0
8050 || IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))),
8051 ("%s - %u\n", g_aGstShadowInfo[idxGReg].pszName,
8052 iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))));
8053#endif
8054 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, IEMNATIVEGSTREG_GPR(idxGReg), off);
8055#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8056 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(idxGReg);
8057#endif
8058#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8059 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult,
8060 RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
8061#endif
8062 }
8063 else
8064 {
8065 Assert(cbMem == sizeof(uint16_t));
8066 uint8_t const idxRegDst = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGReg),
8067 kIemNativeGstRegUse_ForUpdate);
8068 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegDst, idxRegMemResult);
8069#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8070 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegDst, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
8071#endif
8072 iemNativeRegFreeTmp(pReNative, idxRegDst);
8073 }
8074
8075 /* Complete RSP calculation for FLAT mode. */
8076 if (idxRegEffSp == idxRegRsp)
8077 {
8078 if (cBitsFlat == 64)
8079 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, cbMem);
8080 else
8081 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, cbMem);
8082 }
8083 }
8084 else
8085 {
8086 /* We're popping RSP, ESP or SP. Only the is a bit extra work, of course. */
8087 if (cbMem == sizeof(uint64_t))
8088 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRsp, idxRegMemResult);
8089 else if (cbMem == sizeof(uint32_t))
8090 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRsp, idxRegMemResult);
8091 else
8092 {
8093 if (idxRegEffSp == idxRegRsp)
8094 {
8095 if (cBitsFlat == 64)
8096 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, cbMem);
8097 else
8098 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, cbMem);
8099 }
8100 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegRsp, idxRegMemResult);
8101 }
8102 }
8103
8104#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8105 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
8106#endif
8107
8108 iemNativeRegFreeTmp(pReNative, idxRegRsp);
8109 if (idxRegEffSp != idxRegRsp)
8110 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
8111 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
8112
8113 return off;
8114}
8115
8116
8117
8118/*********************************************************************************************************************************
8119* Memory mapping (IEM_MEM_MAP_XXX, IEM_MEM_FLAT_MAP_XXX). *
8120*********************************************************************************************************************************/
8121
8122#define IEM_MC_MEM_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8123 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
8124 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMaskAndCtl*/, \
8125 (uintptr_t)iemNativeHlpMemMapDataU8Atomic, pCallEntry->idxInstr)
8126
8127#define IEM_MC_MEM_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8128 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
8129 IEM_ACCESS_DATA_RW, 0 /*fAlignMaskAndCtl*/, \
8130 (uintptr_t)iemNativeHlpMemMapDataU8Rw, pCallEntry->idxInstr)
8131
8132#define IEM_MC_MEM_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8133 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
8134 IEM_ACCESS_DATA_W, 0 /*fAlignMaskAndCtl*/, \
8135 (uintptr_t)iemNativeHlpMemMapDataU8Wo, pCallEntry->idxInstr) \
8136
8137#define IEM_MC_MEM_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8138 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
8139 IEM_ACCESS_DATA_R, 0 /*fAlignMaskAndCtl*/, \
8140 (uintptr_t)iemNativeHlpMemMapDataU8Ro, pCallEntry->idxInstr)
8141
8142
8143#define IEM_MC_MEM_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8144 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
8145 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8146 (uintptr_t)iemNativeHlpMemMapDataU16Atomic, pCallEntry->idxInstr)
8147
8148#define IEM_MC_MEM_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8149 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
8150 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8151 (uintptr_t)iemNativeHlpMemMapDataU16Rw, pCallEntry->idxInstr)
8152
8153#define IEM_MC_MEM_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8154 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
8155 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8156 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
8157
8158#define IEM_MC_MEM_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8159 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
8160 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8161 (uintptr_t)iemNativeHlpMemMapDataU16Ro, pCallEntry->idxInstr)
8162
8163#define IEM_MC_MEM_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8164 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int16_t), \
8165 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8166 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
8167
8168
8169#define IEM_MC_MEM_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8170 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
8171 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8172 (uintptr_t)iemNativeHlpMemMapDataU32Atomic, pCallEntry->idxInstr)
8173
8174#define IEM_MC_MEM_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8175 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
8176 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8177 (uintptr_t)iemNativeHlpMemMapDataU32Rw, pCallEntry->idxInstr)
8178
8179#define IEM_MC_MEM_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8180 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
8181 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8182 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
8183
8184#define IEM_MC_MEM_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8185 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
8186 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8187 (uintptr_t)iemNativeHlpMemMapDataU32Ro, pCallEntry->idxInstr)
8188
8189#define IEM_MC_MEM_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8190 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int32_t), \
8191 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8192 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
8193
8194
8195#define IEM_MC_MEM_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8196 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
8197 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8198 (uintptr_t)iemNativeHlpMemMapDataU64Atomic, pCallEntry->idxInstr)
8199
8200#define IEM_MC_MEM_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8201 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
8202 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8203 (uintptr_t)iemNativeHlpMemMapDataU64Rw, pCallEntry->idxInstr)
8204#define IEM_MC_MEM_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8205 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
8206 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8207 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
8208
8209#define IEM_MC_MEM_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8210 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
8211 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8212 (uintptr_t)iemNativeHlpMemMapDataU64Ro, pCallEntry->idxInstr)
8213
8214#define IEM_MC_MEM_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8215 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int64_t), \
8216 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8217 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
8218
8219
8220#define IEM_MC_MEM_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8221 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
8222 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8223 (uintptr_t)iemNativeHlpMemMapDataR80Wo, pCallEntry->idxInstr) \
8224
8225#define IEM_MC_MEM_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8226 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
8227 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, /** @todo check BCD align */ \
8228 (uintptr_t)iemNativeHlpMemMapDataD80Wo, pCallEntry->idxInstr) \
8229
8230
8231#define IEM_MC_MEM_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8232 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
8233 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8234 (uintptr_t)iemNativeHlpMemMapDataU128Atomic, pCallEntry->idxInstr)
8235
8236#define IEM_MC_MEM_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8237 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
8238 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8239 (uintptr_t)iemNativeHlpMemMapDataU128Rw, pCallEntry->idxInstr)
8240
8241#define IEM_MC_MEM_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8242 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
8243 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8244 (uintptr_t)iemNativeHlpMemMapDataU128Wo, pCallEntry->idxInstr) \
8245
8246#define IEM_MC_MEM_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8247 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
8248 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8249 (uintptr_t)iemNativeHlpMemMapDataU128Ro, pCallEntry->idxInstr)
8250
8251
8252
8253#define IEM_MC_MEM_FLAT_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
8254 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
8255 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMaskAndCtl*/, \
8256 (uintptr_t)iemNativeHlpMemFlatMapDataU8Atomic, pCallEntry->idxInstr)
8257
8258#define IEM_MC_MEM_FLAT_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
8259 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
8260 IEM_ACCESS_DATA_RW, 0 /*fAlignMaskAndCtl*/, \
8261 (uintptr_t)iemNativeHlpMemFlatMapDataU8Rw, pCallEntry->idxInstr)
8262
8263#define IEM_MC_MEM_FLAT_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
8264 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
8265 IEM_ACCESS_DATA_W, 0 /*fAlignMaskAndCtl*/, \
8266 (uintptr_t)iemNativeHlpMemFlatMapDataU8Wo, pCallEntry->idxInstr) \
8267
8268#define IEM_MC_MEM_FLAT_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
8269 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
8270 IEM_ACCESS_DATA_R, 0 /*fAlignMaskAndCtl*/, \
8271 (uintptr_t)iemNativeHlpMemFlatMapDataU8Ro, pCallEntry->idxInstr)
8272
8273
8274#define IEM_MC_MEM_FLAT_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
8275 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
8276 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8277 (uintptr_t)iemNativeHlpMemFlatMapDataU16Atomic, pCallEntry->idxInstr)
8278
8279#define IEM_MC_MEM_FLAT_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
8280 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
8281 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8282 (uintptr_t)iemNativeHlpMemFlatMapDataU16Rw, pCallEntry->idxInstr)
8283
8284#define IEM_MC_MEM_FLAT_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
8285 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
8286 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8287 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
8288
8289#define IEM_MC_MEM_FLAT_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
8290 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
8291 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8292 (uintptr_t)iemNativeHlpMemFlatMapDataU16Ro, pCallEntry->idxInstr)
8293
8294#define IEM_MC_MEM_FLAT_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_GCPtrMem) \
8295 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int16_t), \
8296 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8297 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
8298
8299
8300#define IEM_MC_MEM_FLAT_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
8301 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
8302 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8303 (uintptr_t)iemNativeHlpMemFlatMapDataU32Atomic, pCallEntry->idxInstr)
8304
8305#define IEM_MC_MEM_FLAT_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
8306 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
8307 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8308 (uintptr_t)iemNativeHlpMemFlatMapDataU32Rw, pCallEntry->idxInstr)
8309
8310#define IEM_MC_MEM_FLAT_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
8311 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
8312 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8313 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
8314
8315#define IEM_MC_MEM_FLAT_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
8316 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
8317 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8318 (uintptr_t)iemNativeHlpMemFlatMapDataU32Ro, pCallEntry->idxInstr)
8319
8320#define IEM_MC_MEM_FLAT_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_GCPtrMem) \
8321 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int32_t), \
8322 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8323 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
8324
8325
8326#define IEM_MC_MEM_FLAT_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
8327 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
8328 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8329 (uintptr_t)iemNativeHlpMemFlatMapDataU64Atomic, pCallEntry->idxInstr)
8330
8331#define IEM_MC_MEM_FLAT_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
8332 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
8333 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8334 (uintptr_t)iemNativeHlpMemFlatMapDataU64Rw, pCallEntry->idxInstr)
8335
8336#define IEM_MC_MEM_FLAT_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
8337 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
8338 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8339 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
8340
8341#define IEM_MC_MEM_FLAT_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
8342 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
8343 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8344 (uintptr_t)iemNativeHlpMemFlatMapDataU64Ro, pCallEntry->idxInstr)
8345
8346#define IEM_MC_MEM_FLAT_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_GCPtrMem) \
8347 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int64_t), \
8348 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8349 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
8350
8351
8352#define IEM_MC_MEM_FLAT_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_GCPtrMem) \
8353 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
8354 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8355 (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo, pCallEntry->idxInstr) \
8356
8357#define IEM_MC_MEM_FLAT_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_GCPtrMem) \
8358 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
8359 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, /** @todo check BCD align */ \
8360 (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo, pCallEntry->idxInstr) \
8361
8362
8363#define IEM_MC_MEM_FLAT_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
8364 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
8365 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8366 (uintptr_t)iemNativeHlpMemFlatMapDataU128Atomic, pCallEntry->idxInstr)
8367
8368#define IEM_MC_MEM_FLAT_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
8369 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
8370 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8371 (uintptr_t)iemNativeHlpMemFlatMapDataU128Rw, pCallEntry->idxInstr)
8372
8373#define IEM_MC_MEM_FLAT_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
8374 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
8375 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8376 (uintptr_t)iemNativeHlpMemFlatMapDataU128Wo, pCallEntry->idxInstr) \
8377
8378#define IEM_MC_MEM_FLAT_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
8379 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
8380 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8381 (uintptr_t)iemNativeHlpMemFlatMapDataU128Ro, pCallEntry->idxInstr)
8382
8383
8384DECL_INLINE_THROW(uint32_t)
8385iemNativeEmitMemMapCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarMem, uint8_t idxVarUnmapInfo,
8386 uint8_t iSegReg, uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAccess, uint32_t fAlignMaskAndCtl,
8387 uintptr_t pfnFunction, uint8_t idxInstr)
8388{
8389 /*
8390 * Assert sanity.
8391 */
8392 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarMem);
8393 PIEMNATIVEVAR const pVarMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarMem)];
8394 AssertStmt( pVarMem->enmKind == kIemNativeVarKind_Invalid
8395 && pVarMem->cbVar == sizeof(void *),
8396 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8397
8398 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
8399 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
8400 AssertStmt( pVarUnmapInfo->enmKind == kIemNativeVarKind_Invalid
8401 && pVarUnmapInfo->cbVar == sizeof(uint8_t),
8402 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8403
8404 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
8405 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
8406 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
8407 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
8408 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8409
8410 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
8411
8412 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
8413
8414#ifdef VBOX_STRICT
8415# define IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) \
8416 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ) \
8417 ? (uintptr_t)RT_CONCAT(a_fnBase,Rw) \
8418 : ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == IEM_ACCESS_TYPE_READ \
8419 ? (uintptr_t)RT_CONCAT(a_fnBase,Ro) : (uintptr_t)RT_CONCAT(a_fnBase,Wo) )
8420# define IEM_MAP_HLP_FN(a_fAccess, a_fnBase) \
8421 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ | IEM_ACCESS_ATOMIC) \
8422 ? (uintptr_t)RT_CONCAT(a_fnBase,Atomic) \
8423 : IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) )
8424
8425 if (iSegReg == UINT8_MAX)
8426 {
8427 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
8428 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
8429 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
8430 switch (cbMem)
8431 {
8432 case 1:
8433 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU8));
8434 Assert(!fAlignMaskAndCtl);
8435 break;
8436 case 2:
8437 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU16));
8438 Assert(fAlignMaskAndCtl < 2);
8439 break;
8440 case 4:
8441 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU32));
8442 Assert(fAlignMaskAndCtl < 4);
8443 break;
8444 case 8:
8445 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU64));
8446 Assert(fAlignMaskAndCtl < 8);
8447 break;
8448 case 10:
8449 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo
8450 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo);
8451 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
8452 Assert(fAlignMaskAndCtl < 8);
8453 break;
8454 case 16:
8455 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU128));
8456 Assert(fAlignMaskAndCtl < 16);
8457 break;
8458# if 0
8459 case 32:
8460 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU256));
8461 Assert(fAlignMaskAndCtl < 32);
8462 break;
8463 case 64:
8464 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU512));
8465 Assert(fAlignMaskAndCtl < 64);
8466 break;
8467# endif
8468 default: AssertFailed(); break;
8469 }
8470 }
8471 else
8472 {
8473 Assert(iSegReg < 6);
8474 switch (cbMem)
8475 {
8476 case 1:
8477 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU8));
8478 Assert(!fAlignMaskAndCtl);
8479 break;
8480 case 2:
8481 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU16));
8482 Assert(fAlignMaskAndCtl < 2);
8483 break;
8484 case 4:
8485 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU32));
8486 Assert(fAlignMaskAndCtl < 4);
8487 break;
8488 case 8:
8489 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU64));
8490 Assert(fAlignMaskAndCtl < 8);
8491 break;
8492 case 10:
8493 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemMapDataR80Wo
8494 || pfnFunction == (uintptr_t)iemNativeHlpMemMapDataD80Wo);
8495 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
8496 Assert(fAlignMaskAndCtl < 8);
8497 break;
8498 case 16:
8499 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU128));
8500 Assert(fAlignMaskAndCtl < 16);
8501 break;
8502# if 0
8503 case 32:
8504 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU256));
8505 Assert(fAlignMaskAndCtl < 32);
8506 break;
8507 case 64:
8508 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU512));
8509 Assert(fAlignMaskAndCtl < 64);
8510 break;
8511# endif
8512 default: AssertFailed(); break;
8513 }
8514 }
8515# undef IEM_MAP_HLP_FN
8516# undef IEM_MAP_HLP_FN_NO_AT
8517#endif
8518
8519#ifdef VBOX_STRICT
8520 /*
8521 * Check that the fExec flags we've got make sense.
8522 */
8523 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
8524#endif
8525
8526 /*
8527 * To keep things simple we have to commit any pending writes first as we
8528 * may end up making calls.
8529 */
8530 off = iemNativeRegFlushPendingWrites(pReNative, off);
8531
8532#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
8533 /*
8534 * Move/spill/flush stuff out of call-volatile registers.
8535 * This is the easy way out. We could contain this to the tlb-miss branch
8536 * by saving and restoring active stuff here.
8537 */
8538 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
8539 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
8540#endif
8541
8542 /* The bUnmapInfo variable will get a register in the tlb-hit code path,
8543 while the tlb-miss codepath will temporarily put it on the stack.
8544 Set the the type to stack here so we don't need to do it twice below. */
8545 iemNativeVarSetKindToStack(pReNative, idxVarUnmapInfo);
8546 uint8_t const idxRegUnmapInfo = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off);
8547 /** @todo use a tmp register from TlbState, since they'll be free after tlb
8548 * lookup is done. */
8549
8550 /*
8551 * Define labels and allocate the result register (trying for the return
8552 * register if we can).
8553 */
8554 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
8555 uint8_t const idxRegMemResult = !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
8556 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarMem, IEMNATIVE_CALL_RET_GREG, &off)
8557 : iemNativeVarRegisterAcquire(pReNative, idxVarMem, &off);
8558 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem);
8559 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
8560 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
8561 : UINT32_MAX;
8562//off=iemNativeEmitBrk(pReNative, off, 0);
8563 /*
8564 * Jump to the TLB lookup code.
8565 */
8566 if (!TlbState.fSkip)
8567 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
8568
8569 /*
8570 * TlbMiss:
8571 *
8572 * Call helper to do the fetching.
8573 * We flush all guest register shadow copies here.
8574 */
8575 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
8576
8577#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8578 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8579#else
8580 RT_NOREF(idxInstr);
8581#endif
8582
8583#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
8584 /* Save variables in volatile registers. */
8585 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave() | RT_BIT_32(idxRegMemResult) | RT_BIT_32(idxRegUnmapInfo);
8586 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
8587#endif
8588
8589 /* IEMNATIVE_CALL_ARG2_GREG = GCPtrMem - load first as it is from a variable. */
8590 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarGCPtrMem, 0 /*cbAppend*/,
8591#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
8592 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
8593#else
8594 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
8595#endif
8596
8597 /* IEMNATIVE_CALL_ARG3_GREG = iSegReg */
8598 if (iSegReg != UINT8_MAX)
8599 {
8600 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
8601 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, iSegReg);
8602 }
8603
8604 /* IEMNATIVE_CALL_ARG1_GREG = &idxVarUnmapInfo; stackslot address, load any register with result after the call. */
8605 int32_t const offBpDispVarUnmapInfo = iemNativeStackCalcBpDisp(iemNativeVarGetStackSlot(pReNative, idxVarUnmapInfo));
8606 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, offBpDispVarUnmapInfo);
8607
8608 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
8609 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
8610
8611 /* Done setting up parameters, make the call. */
8612 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
8613
8614 /*
8615 * Put the output in the right registers.
8616 */
8617 Assert(idxRegMemResult == pVarMem->idxReg);
8618 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
8619 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
8620
8621#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
8622 /* Restore variables and guest shadow registers to volatile registers. */
8623 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
8624 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
8625#endif
8626
8627 Assert(pVarUnmapInfo->idxReg == idxRegUnmapInfo);
8628 off = iemNativeEmitLoadGprByBpU8(pReNative, off, idxRegUnmapInfo, offBpDispVarUnmapInfo);
8629
8630#ifdef IEMNATIVE_WITH_TLB_LOOKUP
8631 if (!TlbState.fSkip)
8632 {
8633 /* end of tlbsmiss - Jump to the done label. */
8634 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
8635 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
8636
8637 /*
8638 * TlbLookup:
8639 */
8640 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMaskAndCtl, fAccess,
8641 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
8642# ifdef IEM_WITH_TLB_STATISTICS
8643 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, TlbState.idxReg1, TlbState.idxReg2,
8644 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForMapped));
8645# endif
8646
8647 /* [idxVarUnmapInfo] = 0; */
8648 off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegUnmapInfo, 0);
8649
8650 /*
8651 * TlbDone:
8652 */
8653 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
8654
8655 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
8656
8657# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
8658 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
8659 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
8660# endif
8661 }
8662#else
8663 RT_NOREF(fAccess, fAlignMaskAndCtl, idxLabelTlbMiss);
8664#endif
8665
8666 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
8667 iemNativeVarRegisterRelease(pReNative, idxVarMem);
8668
8669 return off;
8670}
8671
8672
8673#define IEM_MC_MEM_COMMIT_AND_UNMAP_ATOMIC(a_bMapInfo) \
8674 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_ATOMIC, \
8675 (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic, pCallEntry->idxInstr)
8676
8677#define IEM_MC_MEM_COMMIT_AND_UNMAP_RW(a_bMapInfo) \
8678 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_RW, \
8679 (uintptr_t)iemNativeHlpMemCommitAndUnmapRw, pCallEntry->idxInstr)
8680
8681#define IEM_MC_MEM_COMMIT_AND_UNMAP_WO(a_bMapInfo) \
8682 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_W, \
8683 (uintptr_t)iemNativeHlpMemCommitAndUnmapWo, pCallEntry->idxInstr)
8684
8685#define IEM_MC_MEM_COMMIT_AND_UNMAP_RO(a_bMapInfo) \
8686 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_R, \
8687 (uintptr_t)iemNativeHlpMemCommitAndUnmapRo, pCallEntry->idxInstr)
8688
8689DECL_INLINE_THROW(uint32_t)
8690iemNativeEmitMemCommitAndUnmap(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarUnmapInfo,
8691 uint32_t fAccess, uintptr_t pfnFunction, uint8_t idxInstr)
8692{
8693 /*
8694 * Assert sanity.
8695 */
8696 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
8697#if defined(VBOX_STRICT) || defined(RT_ARCH_AMD64)
8698 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
8699#endif
8700 Assert(pVarUnmapInfo->enmKind == kIemNativeVarKind_Stack);
8701 Assert( pVarUnmapInfo->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
8702 || pVarUnmapInfo->idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS); /* must be initialized */
8703#ifdef VBOX_STRICT
8704 switch (fAccess & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC))
8705 {
8706 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_ATOMIC:
8707 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic); break;
8708 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE:
8709 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRw); break;
8710 case IEM_ACCESS_TYPE_WRITE:
8711 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapWo); break;
8712 case IEM_ACCESS_TYPE_READ:
8713 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRo); break;
8714 default: AssertFailed();
8715 }
8716#else
8717 RT_NOREF(fAccess);
8718#endif
8719
8720 /*
8721 * To keep things simple we have to commit any pending writes first as we
8722 * may end up making calls (there shouldn't be any at this point, so this
8723 * is just for consistency).
8724 */
8725 /** @todo we could postpone this till we make the call and reload the
8726 * registers after returning from the call. Not sure if that's sensible or
8727 * not, though. */
8728 off = iemNativeRegFlushPendingWrites(pReNative, off);
8729
8730 /*
8731 * Move/spill/flush stuff out of call-volatile registers.
8732 *
8733 * We exclude any register holding the bUnmapInfo variable, as we'll be
8734 * checking it after returning from the call and will free it afterwards.
8735 */
8736 /** @todo save+restore active registers and maybe guest shadows in miss
8737 * scenario. */
8738 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */,
8739 RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)));
8740
8741 /*
8742 * If idxVarUnmapInfo is zero, we can skip all this. Otherwise we'll have
8743 * to call the unmap helper function.
8744 *
8745 * The likelyhood of it being zero is higher than for the TLB hit when doing
8746 * the mapping, as a TLB miss for an well aligned and unproblematic memory
8747 * access should also end up with a mapping that won't need special unmapping.
8748 */
8749 /** @todo Go over iemMemMapJmp and implement the no-unmap-needed case! That
8750 * should speed up things for the pure interpreter as well when TLBs
8751 * are enabled. */
8752#ifdef RT_ARCH_AMD64
8753 if (pVarUnmapInfo->idxReg == UINT8_MAX)
8754 {
8755 /* test byte [rbp - xxx], 0ffh */
8756 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
8757 pbCodeBuf[off++] = 0xf6;
8758 uint8_t const idxStackSlot = pVarUnmapInfo->idxStackSlot;
8759 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, 0, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
8760 pbCodeBuf[off++] = 0xff;
8761 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8762 }
8763 else
8764#endif
8765 {
8766 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off,
8767 true /*fInitialized*/, IEMNATIVE_CALL_ARG1_GREG /*idxRegPref*/);
8768 off = iemNativeEmitTestAnyBitsInGpr8(pReNative, off, idxVarReg, 0xff);
8769 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
8770 }
8771 uint32_t const offJmpFixup = off;
8772 off = iemNativeEmitJzToFixed(pReNative, off, off /* ASSUME jz rel8 suffices*/);
8773
8774 /*
8775 * Call the unmap helper function.
8776 */
8777#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING /** @todo This should be unnecessary, the mapping call will already have set it! */
8778 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8779#else
8780 RT_NOREF(idxInstr);
8781#endif
8782
8783 /* IEMNATIVE_CALL_ARG1_GREG = idxVarUnmapInfo (first!) */
8784 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo,
8785 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
8786
8787 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
8788 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
8789
8790 /* Done setting up parameters, make the call. */
8791 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
8792
8793 /* The bUnmapInfo variable is implictly free by these MCs. */
8794 iemNativeVarFreeLocal(pReNative, idxVarUnmapInfo);
8795
8796 /*
8797 * Done, just fixup the jump for the non-call case.
8798 */
8799 iemNativeFixupFixedJump(pReNative, offJmpFixup, off);
8800
8801 return off;
8802}
8803
8804
8805
8806/*********************************************************************************************************************************
8807* State and Exceptions *
8808*********************************************************************************************************************************/
8809
8810#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
8811#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
8812
8813#define IEM_MC_PREPARE_SSE_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
8814#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
8815#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
8816
8817#define IEM_MC_PREPARE_AVX_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
8818#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
8819#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
8820
8821
8822DECL_INLINE_THROW(uint32_t) iemNativeEmitPrepareFpuForUse(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fForChange)
8823{
8824#ifndef IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS
8825 RT_NOREF(pReNative, fForChange);
8826#else
8827 if ( !(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SYNCED)
8828 && fForChange)
8829 {
8830# ifdef RT_ARCH_AMD64
8831
8832 /* Need to save the host MXCSR the first time, and clear the exception flags. */
8833 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED))
8834 {
8835 PIEMNATIVEINSTR pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
8836
8837 /* stmxcsr */
8838 if (IEMNATIVE_REG_FIXED_PVMCPU >= 8)
8839 pbCodeBuf[off++] = X86_OP_REX_B;
8840 pbCodeBuf[off++] = 0x0f;
8841 pbCodeBuf[off++] = 0xae;
8842 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, 3, IEMNATIVE_REG_FIXED_PVMCPU & 7);
8843 pbCodeBuf[off++] = RT_BYTE1(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
8844 pbCodeBuf[off++] = RT_BYTE2(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
8845 pbCodeBuf[off++] = RT_BYTE3(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
8846 pbCodeBuf[off++] = RT_BYTE4(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
8847 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8848
8849 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED;
8850 }
8851
8852 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
8853 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr, kIemNativeGstRegUse_ReadOnly);
8854
8855 /*
8856 * Mask any exceptions and clear the exception status and save into MXCSR,
8857 * taking a detour through memory here because ldmxcsr/stmxcsr don't support
8858 * a register source/target (sigh).
8859 */
8860 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, idxRegMxCsr);
8861 off = iemNativeEmitOrGpr32ByImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, X86_MXCSR_XCPT_MASK);
8862 off = iemNativeEmitAndGpr32ByImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, ~X86_MXCSR_XCPT_FLAGS);
8863 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
8864
8865 PIEMNATIVEINSTR pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
8866
8867 /* ldmxcsr */
8868 if (IEMNATIVE_REG_FIXED_PVMCPU >= 8)
8869 pbCodeBuf[off++] = X86_OP_REX_B;
8870 pbCodeBuf[off++] = 0x0f;
8871 pbCodeBuf[off++] = 0xae;
8872 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, 2, IEMNATIVE_REG_FIXED_PVMCPU & 7);
8873 pbCodeBuf[off++] = RT_BYTE1(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
8874 pbCodeBuf[off++] = RT_BYTE2(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
8875 pbCodeBuf[off++] = RT_BYTE3(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
8876 pbCodeBuf[off++] = RT_BYTE4(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
8877 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8878
8879 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
8880 iemNativeRegFreeTmp(pReNative, idxRegTmp);
8881
8882# elif defined(RT_ARCH_ARM64)
8883 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
8884
8885 /* Need to save the host floating point control register the first time, clear FPSR. */
8886 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED))
8887 {
8888 PIEMNATIVEINSTR pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
8889 pu32CodeBuf[off++] = Armv8A64MkInstrMsr(ARMV8_A64_REG_XZR, ARMV8_AARCH64_SYSREG_FPSR);
8890 pu32CodeBuf[off++] = Armv8A64MkInstrMrs(idxRegTmp, ARMV8_AARCH64_SYSREG_FPCR);
8891 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
8892 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED;
8893 }
8894
8895 /*
8896 * Translate MXCSR to FPCR.
8897 *
8898 * Unfortunately we can't emulate the exact behavior of MXCSR as we can't take
8899 * FEAT_AFP on arm64 for granted (My M2 Macbook doesn't has it). So we can't map
8900 * MXCSR.DAZ to FPCR.FIZ and MXCSR.FZ to FPCR.FZ with FPCR.AH being set.
8901 * We can only use FPCR.FZ which will flush inputs _and_ output de-normals to zero.
8902 */
8903 /** @todo Check the host supported flags (needs additional work to get the host features from CPUM)
8904 * and implement alternate handling if FEAT_AFP is present. */
8905 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr, kIemNativeGstRegUse_ReadOnly);
8906
8907 PIEMNATIVEINSTR pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
8908
8909 /* First make sure that there is nothing set for the upper 16-bits (X86_MXCSR_MM, which we don't emulate right now). */
8910 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegTmp, idxRegMxCsr);
8911
8912 /* If either MXCSR.FZ or MXCSR.DAZ is set FPCR.FZ will be set. */
8913 pu32CodeBuf[off++] = Armv8A64MkInstrUbfx(IEMNATIVE_REG_FIXED_TMP0, idxRegTmp, X86_MXCSR_DAZ_BIT, 1);
8914 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegTmp, idxRegTmp, X86_MXCSR_FZ_BIT);
8915 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(idxRegTmp, idxRegTmp, IEMNATIVE_REG_FIXED_TMP0);
8916 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegTmp, idxRegTmp, ARMV8_FPCR_FZ_BIT);
8917
8918 /*
8919 * Init the rounding mode, the layout differs between MXCSR.RM[14:13] and FPCR.RMode[23:22]:
8920 *
8921 * Value MXCSR FPCR
8922 * 0 RN RN
8923 * 1 R- R+
8924 * 2 R+ R-
8925 * 3 RZ RZ
8926 *
8927 * Conversion can be achieved by switching bit positions
8928 */
8929 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxRegMxCsr, X86_MXCSR_RC_SHIFT);
8930 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxRegTmp, IEMNATIVE_REG_FIXED_TMP0, 14, 1);
8931 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxRegMxCsr, X86_MXCSR_RC_SHIFT + 1);
8932 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxRegTmp, IEMNATIVE_REG_FIXED_TMP0, 13, 1);
8933
8934 /* Write the value to FPCR. */
8935 pu32CodeBuf[off++] = Armv8A64MkInstrMsr(idxRegTmp, ARMV8_AARCH64_SYSREG_FPCR);
8936
8937 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8938 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
8939 iemNativeRegFreeTmp(pReNative, idxRegTmp);
8940# else
8941# error "Port me"
8942# endif
8943 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SYNCED;
8944 }
8945#endif
8946 return off;
8947}
8948
8949
8950
8951/*********************************************************************************************************************************
8952* Emitters for FPU related operations. *
8953*********************************************************************************************************************************/
8954
8955#define IEM_MC_FETCH_FCW(a_u16Fcw) \
8956 off = iemNativeEmitFetchFpuFcw(pReNative, off, a_u16Fcw)
8957
8958/** Emits code for IEM_MC_FETCH_FCW. */
8959DECL_INLINE_THROW(uint32_t)
8960iemNativeEmitFetchFpuFcw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
8961{
8962 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8963 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
8964
8965 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8966
8967 /* Allocate a temporary FCW register. */
8968 /** @todo eliminate extra register */
8969 uint8_t const idxFcwReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFcw,
8970 kIemNativeGstRegUse_ReadOnly);
8971
8972 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFcwReg);
8973
8974 /* Free but don't flush the FCW register. */
8975 iemNativeRegFreeTmp(pReNative, idxFcwReg);
8976 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8977
8978 return off;
8979}
8980
8981
8982#define IEM_MC_FETCH_FSW(a_u16Fsw) \
8983 off = iemNativeEmitFetchFpuFsw(pReNative, off, a_u16Fsw)
8984
8985/** Emits code for IEM_MC_FETCH_FSW. */
8986DECL_INLINE_THROW(uint32_t)
8987iemNativeEmitFetchFpuFsw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
8988{
8989 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8990 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
8991
8992 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, false /*fInitialized*/);
8993 /* Allocate a temporary FSW register. */
8994 /** @todo eliminate extra register */
8995 uint8_t const idxFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw,
8996 kIemNativeGstRegUse_ReadOnly);
8997
8998 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFswReg);
8999
9000 /* Free but don't flush the FSW register. */
9001 iemNativeRegFreeTmp(pReNative, idxFswReg);
9002 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9003
9004 return off;
9005}
9006
9007
9008
9009#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
9010
9011
9012/*********************************************************************************************************************************
9013* Emitters for SSE/AVX specific operations. *
9014*********************************************************************************************************************************/
9015
9016#define IEM_MC_COPY_XREG_U128(a_iXRegDst, a_iXRegSrc) \
9017 off = iemNativeEmitSimdCopyXregU128(pReNative, off, a_iXRegDst, a_iXRegSrc)
9018
9019/** Emits code for IEM_MC_COPY_XREG_U128. */
9020DECL_INLINE_THROW(uint32_t)
9021iemNativeEmitSimdCopyXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXRegDst, uint8_t iXRegSrc)
9022{
9023 /* This is a nop if the source and destination register are the same. */
9024 if (iXRegDst != iXRegSrc)
9025 {
9026 /* Allocate destination and source register. */
9027 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegDst),
9028 kIemNativeGstSimdRegLdStSz_Low128,
9029 kIemNativeGstRegUse_ForFullWrite);
9030 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegSrc),
9031 kIemNativeGstSimdRegLdStSz_Low128,
9032 kIemNativeGstRegUse_ReadOnly);
9033
9034 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
9035
9036 /* Free but don't flush the source and destination register. */
9037 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9038 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9039 }
9040
9041 return off;
9042}
9043
9044
9045#define IEM_MC_FETCH_XREG_U128(a_u128Value, a_iXReg) \
9046 off = iemNativeEmitSimdFetchXregU128(pReNative, off, a_u128Value, a_iXReg)
9047
9048/** Emits code for IEM_MC_FETCH_XREG_U128. */
9049DECL_INLINE_THROW(uint32_t)
9050iemNativeEmitSimdFetchXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg)
9051{
9052 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9053 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
9054
9055 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9056 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
9057
9058 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
9059
9060 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxVarReg, idxSimdRegSrc);
9061
9062 /* Free but don't flush the source register. */
9063 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9064 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
9065
9066 return off;
9067}
9068
9069
9070#define IEM_MC_FETCH_XREG_U64(a_u64Value, a_iXReg, a_iQWord) \
9071 off = iemNativeEmitSimdFetchXregU64(pReNative, off, a_u64Value, a_iXReg, a_iQWord)
9072
9073#define IEM_MC_FETCH_XREG_R64(a_r64Value, a_iXReg, a_iQWord) \
9074 off = iemNativeEmitSimdFetchXregU64(pReNative, off, a_r64Value, a_iXReg, a_iQWord)
9075
9076/** Emits code for IEM_MC_FETCH_XREG_U64. */
9077DECL_INLINE_THROW(uint32_t)
9078iemNativeEmitSimdFetchXregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iQWord)
9079{
9080 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9081 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
9082
9083 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9084 kIemNativeGstSimdRegLdStSz_Low128,
9085 kIemNativeGstRegUse_ReadOnly);
9086
9087 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9088 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9089
9090 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
9091
9092 /* Free but don't flush the source register. */
9093 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9094 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9095
9096 return off;
9097}
9098
9099
9100#define IEM_MC_FETCH_XREG_U32(a_u32Value, a_iXReg, a_iDWord) \
9101 off = iemNativeEmitSimdFetchXregU32(pReNative, off, a_u32Value, a_iXReg, a_iDWord)
9102
9103#define IEM_MC_FETCH_XREG_R32(a_r32Value, a_iXReg, a_iDWord) \
9104 off = iemNativeEmitSimdFetchXregU32(pReNative, off, a_r32Value, a_iXReg, a_iDWord)
9105
9106/** Emits code for IEM_MC_FETCH_XREG_U32/IEM_MC_FETCH_XREG_R32. */
9107DECL_INLINE_THROW(uint32_t)
9108iemNativeEmitSimdFetchXregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iDWord)
9109{
9110 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9111 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
9112
9113 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9114 kIemNativeGstSimdRegLdStSz_Low128,
9115 kIemNativeGstRegUse_ReadOnly);
9116
9117 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9118 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9119
9120 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
9121
9122 /* Free but don't flush the source register. */
9123 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9124 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9125
9126 return off;
9127}
9128
9129
9130#define IEM_MC_FETCH_XREG_U16(a_u64Value, a_iXReg, a_iWord) \
9131 off = iemNativeEmitSimdFetchXregU16(pReNative, off, a_u64Value, a_iXReg, a_iWord)
9132
9133/** Emits code for IEM_MC_FETCH_XREG_U16. */
9134DECL_INLINE_THROW(uint32_t)
9135iemNativeEmitSimdFetchXregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iWord)
9136{
9137 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9138 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
9139
9140 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9141 kIemNativeGstSimdRegLdStSz_Low128,
9142 kIemNativeGstRegUse_ReadOnly);
9143
9144 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9145 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9146
9147 off = iemNativeEmitSimdLoadGprFromVecRegU16(pReNative, off, idxVarReg, idxSimdRegSrc, iWord);
9148
9149 /* Free but don't flush the source register. */
9150 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9151 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9152
9153 return off;
9154}
9155
9156
9157#define IEM_MC_FETCH_XREG_U8(a_u64Value, a_iXReg, a_iByte) \
9158 off = iemNativeEmitSimdFetchXregU8(pReNative, off, a_u64Value, a_iXReg, a_iByte)
9159
9160/** Emits code for IEM_MC_FETCH_XREG_U8. */
9161DECL_INLINE_THROW(uint32_t)
9162iemNativeEmitSimdFetchXregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iByte)
9163{
9164 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9165 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint8_t));
9166
9167 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9168 kIemNativeGstSimdRegLdStSz_Low128,
9169 kIemNativeGstRegUse_ReadOnly);
9170
9171 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9172 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9173
9174 off = iemNativeEmitSimdLoadGprFromVecRegU8(pReNative, off, idxVarReg, idxSimdRegSrc, iByte);
9175
9176 /* Free but don't flush the source register. */
9177 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9178 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9179
9180 return off;
9181}
9182
9183
9184#define IEM_MC_STORE_XREG_U128(a_iXReg, a_u128Value) \
9185 off = iemNativeEmitSimdStoreXregU128(pReNative, off, a_iXReg, a_u128Value)
9186
9187AssertCompileSize(X86XMMREG, sizeof(RTUINT128U));
9188#define IEM_MC_STORE_XREG_XMM(a_iXReg, a_XmmValue) \
9189 off = iemNativeEmitSimdStoreXregU128(pReNative, off, a_iXReg, a_XmmValue)
9190
9191
9192/** Emits code for IEM_MC_STORE_XREG_U128/IEM_MC_STORE_XREG_XMM. */
9193DECL_INLINE_THROW(uint32_t)
9194iemNativeEmitSimdStoreXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
9195{
9196 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9197 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
9198
9199 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9200 kIemNativeGstSimdRegLdStSz_Low128,
9201 kIemNativeGstRegUse_ForFullWrite);
9202 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitialized*/);
9203
9204 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
9205
9206 /* Free but don't flush the source register. */
9207 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9208 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9209
9210 return off;
9211}
9212
9213
9214#define IEM_MC_STORE_XREG_U64(a_iXReg, a_iQWord, a_u64Value) \
9215 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u64Value, sizeof(uint64_t), a_iQWord)
9216
9217#define IEM_MC_STORE_XREG_U32(a_iXReg, a_iDWord, a_u32Value) \
9218 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint32_t), a_iDWord)
9219
9220#define IEM_MC_STORE_XREG_U16(a_iXReg, a_iWord, a_u32Value) \
9221 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint16_t), a_iWord)
9222
9223#define IEM_MC_STORE_XREG_U8(a_iXReg, a_iByte, a_u32Value) \
9224 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint8_t), a_iByte)
9225
9226#define IEM_MC_STORE_XREG_R32(a_iXReg, a_r32Value) \
9227 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_r32Value, sizeof(RTFLOAT32U), 0 /*iElem*/)
9228
9229#define IEM_MC_STORE_XREG_R64(a_iXReg, a_r64Value) \
9230 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_r64Value, sizeof(RTFLOAT64U), 0 /*iElem*/)
9231
9232/** Emits code for IEM_MC_STORE_XREG_U64/IEM_MC_STORE_XREG_U32/IEM_MC_STORE_XREG_U16/IEM_MC_STORE_XREG_U8. */
9233DECL_INLINE_THROW(uint32_t)
9234iemNativeEmitSimdStoreXregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar,
9235 uint8_t cbLocal, uint8_t iElem)
9236{
9237 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9238 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbLocal);
9239
9240#ifdef VBOX_STRICT
9241 switch (cbLocal)
9242 {
9243 case sizeof(uint64_t): Assert(iElem < 2); break;
9244 case sizeof(uint32_t): Assert(iElem < 4); break;
9245 case sizeof(uint16_t): Assert(iElem < 8); break;
9246 case sizeof(uint8_t): Assert(iElem < 16); break;
9247 default: AssertFailed();
9248 }
9249#endif
9250
9251 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9252 kIemNativeGstSimdRegLdStSz_Low128,
9253 kIemNativeGstRegUse_ForUpdate);
9254 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
9255
9256 switch (cbLocal)
9257 {
9258 case sizeof(uint64_t): off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
9259 case sizeof(uint32_t): off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
9260 case sizeof(uint16_t): off = iemNativeEmitSimdStoreGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
9261 case sizeof(uint8_t): off = iemNativeEmitSimdStoreGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
9262 default: AssertFailed();
9263 }
9264
9265 /* Free but don't flush the source register. */
9266 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9267 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9268
9269 return off;
9270}
9271
9272
9273#define IEM_MC_STORE_XREG_U64_ZX_U128(a_iXReg, a_u64Value) \
9274 off = iemNativeEmitSimdStoreXregU64ZxU128(pReNative, off, a_iXReg, a_u64Value)
9275
9276/** Emits code for IEM_MC_STORE_XREG_U64_ZX_U128. */
9277DECL_INLINE_THROW(uint32_t)
9278iemNativeEmitSimdStoreXregU64ZxU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
9279{
9280 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9281 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
9282
9283 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9284 kIemNativeGstSimdRegLdStSz_Low128,
9285 kIemNativeGstRegUse_ForUpdate);
9286 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
9287
9288 /* Zero the vector register first, then store the 64-bit value to the lower 64-bit. */
9289 off = iemNativeEmitSimdZeroVecRegLowU128(pReNative, off, idxSimdRegDst);
9290 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0);
9291
9292 /* Free but don't flush the source register. */
9293 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9294 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9295
9296 return off;
9297}
9298
9299
9300#define IEM_MC_STORE_XREG_U32_ZX_U128(a_iXReg, a_u32Value) \
9301 off = iemNativeEmitSimdStoreXregU32ZxU128(pReNative, off, a_iXReg, a_u32Value)
9302
9303/** Emits code for IEM_MC_STORE_XREG_U32_ZX_U128. */
9304DECL_INLINE_THROW(uint32_t)
9305iemNativeEmitSimdStoreXregU32ZxU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
9306{
9307 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9308 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
9309
9310 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9311 kIemNativeGstSimdRegLdStSz_Low128,
9312 kIemNativeGstRegUse_ForUpdate);
9313 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
9314
9315 /* Zero the vector register first, then store the 32-bit value to the lowest 32-bit element. */
9316 off = iemNativeEmitSimdZeroVecRegLowU128(pReNative, off, idxSimdRegDst);
9317 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, 0);
9318
9319 /* Free but don't flush the source register. */
9320 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9321 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9322
9323 return off;
9324}
9325
9326
9327#define IEM_MC_STORE_XREG_U32_U128(a_iXReg, a_iDwDst, a_u128Value, a_iDwSrc) \
9328 off = iemNativeEmitSimdStoreXregU32U128(pReNative, off, a_iXReg, a_iDwDst, a_u128Value, a_iDwSrc)
9329
9330/** Emits code for IEM_MC_STORE_XREG_U32_U128. */
9331DECL_INLINE_THROW(uint32_t)
9332iemNativeEmitSimdStoreXregU32U128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t iDwDst,
9333 uint8_t idxSrcVar, uint8_t iDwSrc)
9334{
9335 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9336 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
9337
9338 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9339 kIemNativeGstSimdRegLdStSz_Low128,
9340 kIemNativeGstRegUse_ForUpdate);
9341 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitialized*/);
9342
9343 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, idxVarReg, iDwSrc);
9344 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, IEMNATIVE_REG_FIXED_TMP0, iDwDst);
9345
9346 /* Free but don't flush the destination register. */
9347 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9348 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9349
9350 return off;
9351}
9352
9353
9354#define IEM_MC_COPY_YREG_U128_ZX_VLMAX(a_iYRegDst, a_iYRegSrc) \
9355 off = iemNativeEmitSimdCopyYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrc)
9356
9357/** Emits code for IEM_MC_COPY_YREG_U128_ZX_VLMAX. */
9358DECL_INLINE_THROW(uint32_t)
9359iemNativeEmitSimdCopyYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrc)
9360{
9361 /*
9362 * The iYRegSrc == iYRegDst case needs to be treated differently here, because
9363 * if iYRegDst gets allocated first for the full write it won't load the
9364 * actual value from CPUMCTX. When allocating iYRegSrc afterwards it will get
9365 * duplicated from the already allocated host register for iYRegDst containing
9366 * garbage. This will be catched by the guest register value checking in debug
9367 * builds.
9368 */
9369 if (iYRegDst != iYRegSrc)
9370 {
9371 /* Allocate destination and source register. */
9372 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9373 kIemNativeGstSimdRegLdStSz_256,
9374 kIemNativeGstRegUse_ForFullWrite);
9375 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
9376 kIemNativeGstSimdRegLdStSz_Low128,
9377 kIemNativeGstRegUse_ReadOnly);
9378
9379 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
9380 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9381
9382 /* Free but don't flush the source and destination register. */
9383 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9384 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9385 }
9386 else
9387 {
9388 /* This effectively only clears the upper 128-bits of the register. */
9389 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9390 kIemNativeGstSimdRegLdStSz_High128,
9391 kIemNativeGstRegUse_ForFullWrite);
9392
9393 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
9394
9395 /* Free but don't flush the destination register. */
9396 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
9397 }
9398
9399 return off;
9400}
9401
9402
9403#define IEM_MC_COPY_YREG_U256_ZX_VLMAX(a_iYRegDst, a_iYRegSrc) \
9404 off = iemNativeEmitSimdCopyYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrc)
9405
9406/** Emits code for IEM_MC_COPY_YREG_U256_ZX_VLMAX. */
9407DECL_INLINE_THROW(uint32_t)
9408iemNativeEmitSimdCopyYregU256ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrc)
9409{
9410 /*
9411 * The iYRegSrc == iYRegDst case needs to be treated differently here, because
9412 * if iYRegDst gets allocated first for the full write it won't load the
9413 * actual value from CPUMCTX. When allocating iYRegSrc afterwards it will get
9414 * duplicated from the already allocated host register for iYRegDst containing
9415 * garbage. This will be catched by the guest register value checking in debug
9416 * builds. iYRegSrc == iYRegDst would effectively only clear any upper 256-bits
9417 * for a zmm register we don't support yet, so this is just a nop.
9418 */
9419 if (iYRegDst != iYRegSrc)
9420 {
9421 /* Allocate destination and source register. */
9422 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
9423 kIemNativeGstSimdRegLdStSz_256,
9424 kIemNativeGstRegUse_ReadOnly);
9425 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9426 kIemNativeGstSimdRegLdStSz_256,
9427 kIemNativeGstRegUse_ForFullWrite);
9428
9429 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
9430
9431 /* Free but don't flush the source and destination register. */
9432 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9433 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9434 }
9435
9436 return off;
9437}
9438
9439
9440#define IEM_MC_FETCH_YREG_U128(a_u128Dst, a_iYRegSrc, a_iDQWord) \
9441 off = iemNativeEmitSimdFetchYregU128(pReNative, off, a_u128Dst, a_iYRegSrc, a_iDQWord)
9442
9443/** Emits code for IEM_MC_FETCH_YREG_U128. */
9444DECL_INLINE_THROW(uint32_t)
9445iemNativeEmitSimdFetchYregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iDQWord)
9446{
9447 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9448 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
9449
9450 Assert(iDQWord <= 1);
9451 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9452 iDQWord == 1
9453 ? kIemNativeGstSimdRegLdStSz_High128
9454 : kIemNativeGstSimdRegLdStSz_Low128,
9455 kIemNativeGstRegUse_ReadOnly);
9456
9457 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9458 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
9459
9460 if (iDQWord == 1)
9461 off = iemNativeEmitSimdLoadVecRegLowU128FromVecRegHighU128(pReNative, off, idxVarReg, idxSimdRegSrc);
9462 else
9463 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxVarReg, idxSimdRegSrc);
9464
9465 /* Free but don't flush the source register. */
9466 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9467 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
9468
9469 return off;
9470}
9471
9472
9473#define IEM_MC_FETCH_YREG_U64(a_u64Dst, a_iYRegSrc, a_iQWord) \
9474 off = iemNativeEmitSimdFetchYregU64(pReNative, off, a_u64Dst, a_iYRegSrc, a_iQWord)
9475
9476/** Emits code for IEM_MC_FETCH_YREG_U64. */
9477DECL_INLINE_THROW(uint32_t)
9478iemNativeEmitSimdFetchYregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iQWord)
9479{
9480 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9481 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
9482
9483 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9484 iQWord >= 2
9485 ? kIemNativeGstSimdRegLdStSz_High128
9486 : kIemNativeGstSimdRegLdStSz_Low128,
9487 kIemNativeGstRegUse_ReadOnly);
9488
9489 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9490 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9491
9492 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
9493
9494 /* Free but don't flush the source register. */
9495 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9496 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9497
9498 return off;
9499}
9500
9501
9502#define IEM_MC_FETCH_YREG_U32(a_u32Dst, a_iYRegSrc) \
9503 off = iemNativeEmitSimdFetchYregU32(pReNative, off, a_u32Dst, a_iYRegSrc, 0)
9504
9505/** Emits code for IEM_MC_FETCH_YREG_U32. */
9506DECL_INLINE_THROW(uint32_t)
9507iemNativeEmitSimdFetchYregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iDWord)
9508{
9509 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9510 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
9511
9512 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9513 iDWord >= 4
9514 ? kIemNativeGstSimdRegLdStSz_High128
9515 : kIemNativeGstSimdRegLdStSz_Low128,
9516 kIemNativeGstRegUse_ReadOnly);
9517
9518 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9519 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9520
9521 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
9522
9523 /* Free but don't flush the source register. */
9524 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9525 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9526
9527 return off;
9528}
9529
9530
9531#define IEM_MC_CLEAR_YREG_128_UP(a_iYReg) \
9532 off = iemNativeEmitSimdClearYregHighU128(pReNative, off, a_iYReg)
9533
9534/** Emits code for IEM_MC_CLEAR_YREG_128_UP. */
9535DECL_INLINE_THROW(uint32_t)
9536iemNativeEmitSimdClearYregHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg)
9537{
9538 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9539 kIemNativeGstSimdRegLdStSz_High128,
9540 kIemNativeGstRegUse_ForFullWrite);
9541
9542 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
9543
9544 /* Free but don't flush the register. */
9545 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
9546
9547 return off;
9548}
9549
9550
9551#define IEM_MC_STORE_YREG_U128(a_iYRegDst, a_iDQword, a_u128Value) \
9552 off = iemNativeEmitSimdStoreYregU128(pReNative, off, a_iYRegDst, a_iDQword, a_u128Value)
9553
9554/** Emits code for IEM_MC_STORE_YREG_U128. */
9555DECL_INLINE_THROW(uint32_t)
9556iemNativeEmitSimdStoreYregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t iDQword, uint8_t idxSrcVar)
9557{
9558 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9559 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
9560
9561 Assert(iDQword <= 1);
9562 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9563 iDQword == 0
9564 ? kIemNativeGstSimdRegLdStSz_Low128
9565 : kIemNativeGstSimdRegLdStSz_High128,
9566 kIemNativeGstRegUse_ForFullWrite);
9567
9568 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
9569
9570 if (iDQword == 0)
9571 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
9572 else
9573 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128(pReNative, off, idxSimdRegDst, idxVarReg);
9574
9575 /* Free but don't flush the source register. */
9576 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9577 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9578
9579 return off;
9580}
9581
9582
9583#define IEM_MC_STORE_YREG_U128_ZX_VLMAX(a_iYRegDst, a_u128Src) \
9584 off = iemNativeEmitSimdStoreYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_u128Src)
9585
9586/** Emits code for IEM_MC_STORE_YREG_U128_ZX_VLMAX. */
9587DECL_INLINE_THROW(uint32_t)
9588iemNativeEmitSimdStoreYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9589{
9590 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9591 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
9592
9593 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9594 kIemNativeGstSimdRegLdStSz_256,
9595 kIemNativeGstRegUse_ForFullWrite);
9596
9597 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
9598
9599 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
9600 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9601
9602 /* Free but don't flush the source register. */
9603 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9604 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9605
9606 return off;
9607}
9608
9609
9610#define IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX(a_iXRegDst, a_u8Src) \
9611 off = iemNativeEmitSimdBroadcastXregU8ZxVlmax(pReNative, off, a_iXRegDst, a_u8Src)
9612
9613/** Emits code for IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX. */
9614DECL_INLINE_THROW(uint32_t)
9615iemNativeEmitSimdBroadcastXregU8ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
9616{
9617 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9618 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint8_t));
9619
9620 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9621 kIemNativeGstSimdRegLdStSz_256,
9622 kIemNativeGstRegUse_ForFullWrite);
9623
9624 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9625
9626 off = iemNativeEmitSimdBroadcastGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
9627 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9628
9629 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9630 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9631
9632 return off;
9633}
9634
9635
9636#define IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX(a_iXRegDst, a_u16Src) \
9637 off = iemNativeEmitSimdBroadcastXregU16ZxVlmax(pReNative, off, a_iXRegDst, a_u16Src)
9638
9639/** Emits code for IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX. */
9640DECL_INLINE_THROW(uint32_t)
9641iemNativeEmitSimdBroadcastXregU16ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
9642{
9643 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9644 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint16_t));
9645
9646 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9647 kIemNativeGstSimdRegLdStSz_256,
9648 kIemNativeGstRegUse_ForFullWrite);
9649
9650 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9651
9652 off = iemNativeEmitSimdBroadcastGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
9653 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9654
9655 /* Free but don't flush the source register. */
9656 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9657 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9658
9659 return off;
9660}
9661
9662
9663#define IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX(a_iXRegDst, a_u32Src) \
9664 off = iemNativeEmitSimdBroadcastXregU32ZxVlmax(pReNative, off, a_iXRegDst, a_u32Src)
9665
9666/** Emits code for IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX. */
9667DECL_INLINE_THROW(uint32_t)
9668iemNativeEmitSimdBroadcastXregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
9669{
9670 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9671 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
9672
9673 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9674 kIemNativeGstSimdRegLdStSz_256,
9675 kIemNativeGstRegUse_ForFullWrite);
9676
9677 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9678
9679 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
9680 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9681
9682 /* Free but don't flush the source register. */
9683 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9684 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9685
9686 return off;
9687}
9688
9689
9690#define IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX(a_iXRegDst, a_u64Src) \
9691 off = iemNativeEmitSimdBroadcastXregU64ZxVlmax(pReNative, off, a_iXRegDst, a_u64Src)
9692
9693/** Emits code for IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX. */
9694DECL_INLINE_THROW(uint32_t)
9695iemNativeEmitSimdBroadcastXregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
9696{
9697 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9698 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
9699
9700 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9701 kIemNativeGstSimdRegLdStSz_256,
9702 kIemNativeGstRegUse_ForFullWrite);
9703
9704 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9705
9706 off = iemNativeEmitSimdBroadcastGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
9707 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9708
9709 /* Free but don't flush the source register. */
9710 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9711 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9712
9713 return off;
9714}
9715
9716
9717#define IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX(a_iYRegDst, a_u8Src) \
9718 off = iemNativeEmitSimdBroadcastYregU8ZxVlmax(pReNative, off, a_iYRegDst, a_u8Src)
9719
9720/** Emits code for IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX. */
9721DECL_INLINE_THROW(uint32_t)
9722iemNativeEmitSimdBroadcastYregU8ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9723{
9724 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9725 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint8_t));
9726
9727 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9728 kIemNativeGstSimdRegLdStSz_256,
9729 kIemNativeGstRegUse_ForFullWrite);
9730
9731 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9732
9733 off = iemNativeEmitSimdBroadcastGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
9734
9735 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9736 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9737
9738 return off;
9739}
9740
9741
9742#define IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX(a_iYRegDst, a_u16Src) \
9743 off = iemNativeEmitSimdBroadcastYregU16ZxVlmax(pReNative, off, a_iYRegDst, a_u16Src)
9744
9745/** Emits code for IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX. */
9746DECL_INLINE_THROW(uint32_t)
9747iemNativeEmitSimdBroadcastYregU16ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9748{
9749 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9750 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint16_t));
9751
9752 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9753 kIemNativeGstSimdRegLdStSz_256,
9754 kIemNativeGstRegUse_ForFullWrite);
9755
9756 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9757
9758 off = iemNativeEmitSimdBroadcastGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
9759
9760 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9761 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9762
9763 return off;
9764}
9765
9766
9767#define IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX(a_iYRegDst, a_u32Src) \
9768 off = iemNativeEmitSimdBroadcastYregU32ZxVlmax(pReNative, off, a_iYRegDst, a_u32Src)
9769
9770/** Emits code for IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX. */
9771DECL_INLINE_THROW(uint32_t)
9772iemNativeEmitSimdBroadcastYregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9773{
9774 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9775 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
9776
9777 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9778 kIemNativeGstSimdRegLdStSz_256,
9779 kIemNativeGstRegUse_ForFullWrite);
9780
9781 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9782
9783 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
9784
9785 /* Free but don't flush the source register. */
9786 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9787 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9788
9789 return off;
9790}
9791
9792
9793#define IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX(a_iYRegDst, a_u64Src) \
9794 off = iemNativeEmitSimdBroadcastYregU64ZxVlmax(pReNative, off, a_iYRegDst, a_u64Src)
9795
9796/** Emits code for IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX. */
9797DECL_INLINE_THROW(uint32_t)
9798iemNativeEmitSimdBroadcastYregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9799{
9800 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9801 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
9802
9803 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9804 kIemNativeGstSimdRegLdStSz_256,
9805 kIemNativeGstRegUse_ForFullWrite);
9806
9807 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9808
9809 off = iemNativeEmitSimdBroadcastGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
9810
9811 /* Free but don't flush the source register. */
9812 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9813 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9814
9815 return off;
9816}
9817
9818
9819#define IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX(a_iYRegDst, a_u128Src) \
9820 off = iemNativeEmitSimdBroadcastYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_u128Src)
9821
9822/** Emits code for IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX. */
9823DECL_INLINE_THROW(uint32_t)
9824iemNativeEmitSimdBroadcastYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9825{
9826 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9827 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
9828
9829 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9830 kIemNativeGstSimdRegLdStSz_256,
9831 kIemNativeGstRegUse_ForFullWrite);
9832
9833 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
9834
9835 off = iemNativeEmitSimdBroadcastVecRegU128ToVecReg(pReNative, off, idxSimdRegDst, idxVarReg);
9836
9837 /* Free but don't flush the source register. */
9838 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9839 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9840
9841 return off;
9842}
9843
9844
9845#define IEM_MC_STORE_YREG_U32_ZX_VLMAX(a_iYRegDst, a_u32Src) \
9846 off = iemNativeEmitSimdStoreYregU32ZxVlmax(pReNative, off, a_iYRegDst, a_u32Src)
9847
9848/** Emits code for IEM_MC_STORE_YREG_U32_ZX_VLMAX. */
9849DECL_INLINE_THROW(uint32_t)
9850iemNativeEmitSimdStoreYregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9851{
9852 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9853 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
9854
9855 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9856 kIemNativeGstSimdRegLdStSz_256,
9857 kIemNativeGstRegUse_ForFullWrite);
9858
9859 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9860
9861 off = iemNativeEmitSimdZeroVecRegU256(pReNative, off, idxSimdRegDst);
9862 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iDWord*/);
9863
9864 /* Free but don't flush the source register. */
9865 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9866 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9867
9868 return off;
9869}
9870
9871
9872#define IEM_MC_STORE_YREG_U64_ZX_VLMAX(a_iYRegDst, a_u64Src) \
9873 off = iemNativeEmitSimdStoreYregU64ZxVlmax(pReNative, off, a_iYRegDst, a_u64Src)
9874
9875/** Emits code for IEM_MC_STORE_YREG_U64_ZX_VLMAX. */
9876DECL_INLINE_THROW(uint32_t)
9877iemNativeEmitSimdStoreYregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9878{
9879 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9880 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
9881
9882 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9883 kIemNativeGstSimdRegLdStSz_256,
9884 kIemNativeGstRegUse_ForFullWrite);
9885
9886 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9887
9888 off = iemNativeEmitSimdZeroVecRegU256(pReNative, off, idxSimdRegDst);
9889 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iQWord*/);
9890
9891 /* Free but don't flush the source register. */
9892 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9893 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9894
9895 return off;
9896}
9897
9898
9899#define IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX(a_iYRegDst, a_u64Local, a_iYRegSrcHx) \
9900 off = iemNativeEmitSimdMergeYregU64LocalU64HiZxVlmax(pReNative, off, a_iYRegDst, a_u64Local, a_iYRegSrcHx)
9901
9902/** Emits code for IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX. */
9903DECL_INLINE_THROW(uint32_t)
9904iemNativeEmitSimdMergeYregU64LocalU64HiZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar, uint8_t iYRegSrcHx)
9905{
9906 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9907 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
9908
9909 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9910 kIemNativeGstSimdRegLdStSz_256,
9911 kIemNativeGstRegUse_ForFullWrite);
9912 uint8_t const idxSimdRegSrcHx = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrcHx),
9913 kIemNativeGstSimdRegLdStSz_Low128,
9914 kIemNativeGstRegUse_ReadOnly);
9915 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9916
9917 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrcHx);
9918 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iQWord*/);
9919 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9920
9921 /* Free but don't flush the source and destination registers. */
9922 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrcHx);
9923 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9924 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9925
9926 return off;
9927}
9928
9929
9930#define IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX(a_iYRegDst, a_iYRegSrcHx, a_u64Local) \
9931 off = iemNativeEmitSimdMergeYregU64LoU64LocalZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrcHx, a_u64Local)
9932
9933/** Emits code for IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX. */
9934DECL_INLINE_THROW(uint32_t)
9935iemNativeEmitSimdMergeYregU64LoU64LocalZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrcHx, uint8_t idxSrcVar)
9936{
9937 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9938 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
9939
9940 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9941 kIemNativeGstSimdRegLdStSz_256,
9942 kIemNativeGstRegUse_ForFullWrite);
9943 uint8_t const idxSimdRegSrcHx = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrcHx),
9944 kIemNativeGstSimdRegLdStSz_Low128,
9945 kIemNativeGstRegUse_ReadOnly);
9946 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9947
9948 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrcHx);
9949 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 1 /*iQWord*/);
9950 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9951
9952 /* Free but don't flush the source and destination registers. */
9953 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrcHx);
9954 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9955 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9956
9957 return off;
9958}
9959
9960
9961#define IEM_MC_CLEAR_XREG_U32_MASK(a_iXReg, a_bMask) \
9962 off = iemNativeEmitSimdClearXregU32Mask(pReNative, off, a_iXReg, a_bMask)
9963
9964
9965/** Emits code for IEM_MC_CLEAR_XREG_U32_MASK. */
9966DECL_INLINE_THROW(uint32_t)
9967iemNativeEmitSimdClearXregU32Mask(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t bImm8Mask)
9968{
9969 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9970 kIemNativeGstSimdRegLdStSz_Low128,
9971 kIemNativeGstRegUse_ForUpdate);
9972
9973 /** @todo r=aeichner For certain bit combinations we could reduce the number of emitted instructions. */
9974 if (bImm8Mask & RT_BIT(0))
9975 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 0 /*iDWord*/);
9976 if (bImm8Mask & RT_BIT(1))
9977 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 1 /*iDWord*/);
9978 if (bImm8Mask & RT_BIT(2))
9979 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 2 /*iDWord*/);
9980 if (bImm8Mask & RT_BIT(3))
9981 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 3 /*iDWord*/);
9982
9983 /* Free but don't flush the destination register. */
9984 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9985
9986 return off;
9987}
9988
9989
9990#define IEM_MC_FETCH_YREG_U256(a_u256Dst, a_iYRegSrc) \
9991 off = iemNativeEmitSimdFetchYregU256(pReNative, off, a_u256Dst, a_iYRegSrc)
9992
9993#define IEM_MC_FETCH_YREG_YMM(a_uYmmDst, a_iYRegSrc) \
9994 off = iemNativeEmitSimdFetchYregU256(pReNative, off, a_uYmmDst, a_iYRegSrc)
9995
9996/** Emits code for IEM_MC_FETCH_YREG_U256/IEM_MC_FETCH_YREG_YMM. */
9997DECL_INLINE_THROW(uint32_t)
9998iemNativeEmitSimdFetchYregU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYRegSrc)
9999{
10000 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10001 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT256U));
10002
10003 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
10004 kIemNativeGstSimdRegLdStSz_256,
10005 kIemNativeGstRegUse_ReadOnly);
10006 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
10007
10008 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxVarReg, idxSimdRegSrc);
10009
10010 /* Free but don't flush the source register. */
10011 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10012 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
10013
10014 return off;
10015}
10016
10017
10018#define IEM_MC_STORE_YREG_U256_ZX_VLMAX(a_iYRegDst, a_u256Src) \
10019 off = iemNativeEmitSimdStoreYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_u256Src)
10020
10021#define IEM_MC_STORE_YREG_YMM_ZX_VLMAX(a_iYRegDst, a_uYmmSrc) \
10022 off = iemNativeEmitSimdStoreYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_uYmmSrc)
10023
10024/** Emits code for IEM_MC_STORE_YREG_U256_ZX_VLMAX/IEM_MC_STORE_YREG_YMM_ZX_VLMAX. */
10025DECL_INLINE_THROW(uint32_t)
10026iemNativeEmitSimdStoreYregU256ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar)
10027{
10028 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10029 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
10030
10031 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10032 kIemNativeGstSimdRegLdStSz_256,
10033 kIemNativeGstRegUse_ForFullWrite);
10034 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
10035
10036 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxSimdRegDst, idxVarRegSrc);
10037
10038 /* Free but don't flush the source register. */
10039 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10040 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10041
10042 return off;
10043}
10044
10045
10046#define IEM_MC_STORE_YREG_U32_U256(a_iYRegDst, a_iDwDst, a_u256Value, a_iDwSrc) \
10047 off = iemNativeEmitSimdStoreYregU32FromU256(pReNative, off, a_iYRegDst, a_iDwDst, a_u256Value, a_iDwSrc)
10048
10049
10050/** Emits code for IEM_MC_STORE_YREG_U32_U256. */
10051DECL_INLINE_THROW(uint32_t)
10052iemNativeEmitSimdStoreYregU32FromU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iDwDst,
10053 uint8_t idxSrcVar, uint8_t iDwSrc)
10054{
10055 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10056 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
10057
10058 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10059 iDwDst < 4
10060 ? kIemNativeGstSimdRegLdStSz_Low128
10061 : kIemNativeGstSimdRegLdStSz_High128,
10062 kIemNativeGstRegUse_ForUpdate);
10063 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
10064 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
10065
10066 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxRegTmp, idxVarRegSrc, iDwSrc);
10067 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxRegTmp, iDwDst);
10068
10069 /* Free but don't flush the source register. */
10070 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10071 iemNativeRegFreeTmp(pReNative, idxRegTmp);
10072 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10073
10074 return off;
10075}
10076
10077
10078#define IEM_MC_STORE_YREG_U64_U256(a_iYRegDst, a_iQwDst, a_u256Value, a_iQwSrc) \
10079 off = iemNativeEmitSimdStoreYregU64FromU256(pReNative, off, a_iYRegDst, a_iQwDst, a_u256Value, a_iQwSrc)
10080
10081
10082/** Emits code for IEM_MC_STORE_YREG_U64_U256. */
10083DECL_INLINE_THROW(uint32_t)
10084iemNativeEmitSimdStoreYregU64FromU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iQwDst,
10085 uint8_t idxSrcVar, uint8_t iQwSrc)
10086{
10087 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10088 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
10089
10090 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10091 iQwDst < 2
10092 ? kIemNativeGstSimdRegLdStSz_Low128
10093 : kIemNativeGstSimdRegLdStSz_High128,
10094 kIemNativeGstRegUse_ForUpdate);
10095 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
10096 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
10097
10098 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxRegTmp, idxVarRegSrc, iQwSrc);
10099 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxRegTmp, iQwDst);
10100
10101 /* Free but don't flush the source register. */
10102 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10103 iemNativeRegFreeTmp(pReNative, idxRegTmp);
10104 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10105
10106 return off;
10107}
10108
10109
10110#define IEM_MC_STORE_YREG_U64(a_iYRegDst, a_iQword, a_u64Value) \
10111 off = iemNativeEmitSimdStoreYregU64(pReNative, off, a_iYRegDst, a_iQword, a_u64Value)
10112
10113
10114/** Emits code for IEM_MC_STORE_YREG_U64. */
10115DECL_INLINE_THROW(uint32_t)
10116iemNativeEmitSimdStoreYregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iQwDst, uint8_t idxSrcVar)
10117{
10118 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10119 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10120
10121 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10122 iQwDst < 2
10123 ? kIemNativeGstSimdRegLdStSz_Low128
10124 : kIemNativeGstSimdRegLdStSz_High128,
10125 kIemNativeGstRegUse_ForUpdate);
10126
10127 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10128
10129 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, iQwDst);
10130
10131 /* Free but don't flush the source register. */
10132 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10133 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10134
10135 return off;
10136}
10137
10138
10139#define IEM_MC_CLEAR_ZREG_256_UP(a_iYReg) \
10140 off = iemNativeEmitSimdClearZregU256Vlmax(pReNative, off, a_iYReg)
10141
10142/** Emits code for IEM_MC_CLEAR_ZREG_256_UP. */
10143DECL_INLINE_THROW(uint32_t)
10144iemNativeEmitSimdClearZregU256Vlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg)
10145{
10146 RT_NOREF(pReNative, iYReg);
10147 /** @todo Needs to be implemented when support for AVX-512 is added. */
10148 return off;
10149}
10150
10151
10152
10153/*********************************************************************************************************************************
10154* Emitters for IEM_MC_CALL_SSE_AIMPL_XXX *
10155*********************************************************************************************************************************/
10156
10157/**
10158 * Common worker for IEM_MC_CALL_SSE_AIMPL_XXX/IEM_MC_CALL_AVX_AIMPL_XXX.
10159 */
10160DECL_INLINE_THROW(uint32_t)
10161iemNativeEmitCallSseAvxAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t cArgs, uint8_t idxInstr)
10162{
10163 /* Grab the MXCSR register, it must not be call volatile or we end up freeing it when setting up the call below. */
10164 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr,
10165 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
10166 AssertRelease(!(RT_BIT_32(idxRegMxCsr) & IEMNATIVE_CALL_VOLATILE_GREG_MASK));
10167
10168#if 0 /* This is not required right now as the called helper will set up the SSE/AVX state if it is an assembly one. */
10169 /*
10170 * Need to do the FPU preparation.
10171 */
10172 off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/);
10173#endif
10174
10175 /*
10176 * Do all the call setup and cleanup.
10177 */
10178 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_SSE_AIMPL_HIDDEN_ARGS, IEM_SSE_AIMPL_HIDDEN_ARGS,
10179 false /*fFlushPendingWrites*/);
10180
10181 /*
10182 * Load the MXCSR register into the first argument and mask out the current exception flags.
10183 */
10184 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, idxRegMxCsr);
10185 off = iemNativeEmitAndGpr32ByImm(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, ~X86_MXCSR_XCPT_FLAGS);
10186
10187 /*
10188 * Make the call.
10189 */
10190 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
10191
10192 /*
10193 * The updated MXCSR is in the return register, update exception status flags.
10194 *
10195 * The return register is marked allocated as a temporary because it is required for the
10196 * exception generation check below.
10197 */
10198 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG)));
10199 uint8_t const idxRegTmp = iemNativeRegMarkAllocated(pReNative, IEMNATIVE_CALL_RET_GREG, kIemNativeWhat_Tmp);
10200 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, idxRegMxCsr, idxRegTmp);
10201
10202#ifndef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
10203 /* Writeback the MXCSR register value (there is no delayed writeback for such registers at the moment). */
10204 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxRegMxCsr, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.XState.x87.MXCSR));
10205#endif
10206
10207 /*
10208 * Make sure we don't have any outstanding guest register writes as we may
10209 * raise an \#UD or \#XF and all guest register must be up to date in CPUMCTX.
10210 */
10211 off = iemNativeRegFlushPendingWrites(pReNative, off);
10212
10213#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
10214 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
10215#else
10216 RT_NOREF(idxInstr);
10217#endif
10218
10219 /** @todo r=aeichner ANDN from BMI1 would save us a temporary and additional instruction here but I don't
10220 * want to assume the existence for this instruction at the moment. */
10221 uint8_t const idxRegTmp2 = iemNativeRegAllocTmp(pReNative, &off);
10222
10223 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegTmp2, idxRegTmp);
10224 /* tmp &= X86_MXCSR_XCPT_MASK */
10225 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK);
10226 /* tmp >>= X86_MXCSR_XCPT_MASK_SHIFT */
10227 off = iemNativeEmitShiftGprRight(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK_SHIFT);
10228 /* tmp = ~tmp */
10229 off = iemNativeEmitInvBitsGpr(pReNative, off, idxRegTmp, idxRegTmp, false /*f64Bit*/);
10230 /* tmp &= mxcsr */
10231 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxRegTmp, idxRegTmp2);
10232 off = iemNativeEmitTestAnyBitsInGprAndTbExitIfAnySet(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_FLAGS,
10233 kIemNativeLabelType_RaiseSseAvxFpRelated);
10234
10235 iemNativeRegFreeTmp(pReNative, idxRegTmp2);
10236 iemNativeRegFreeTmp(pReNative, idxRegTmp);
10237 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
10238
10239 return off;
10240}
10241
10242
10243#define IEM_MC_CALL_SSE_AIMPL_2(a_pfnAImpl, a0, a1) \
10244 off = iemNativeEmitCallSseAImpl2(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1))
10245
10246/** Emits code for IEM_MC_CALL_SSE_AIMPL_2. */
10247DECL_INLINE_THROW(uint32_t)
10248iemNativeEmitCallSseAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
10249{
10250 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_SSE_AIMPL_HIDDEN_ARGS);
10251 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_SSE_AIMPL_HIDDEN_ARGS);
10252 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 2, idxInstr);
10253}
10254
10255
10256#define IEM_MC_CALL_SSE_AIMPL_3(a_pfnAImpl, a0, a1, a2) \
10257 off = iemNativeEmitCallSseAImpl3(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1), (a2))
10258
10259/** Emits code for IEM_MC_CALL_SSE_AIMPL_3. */
10260DECL_INLINE_THROW(uint32_t)
10261iemNativeEmitCallSseAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl,
10262 uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
10263{
10264 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_SSE_AIMPL_HIDDEN_ARGS);
10265 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_SSE_AIMPL_HIDDEN_ARGS);
10266 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_SSE_AIMPL_HIDDEN_ARGS);
10267 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 3, idxInstr);
10268}
10269
10270
10271/*********************************************************************************************************************************
10272* Emitters for IEM_MC_CALL_AVX_AIMPL_XXX *
10273*********************************************************************************************************************************/
10274
10275#define IEM_MC_CALL_AVX_AIMPL_2(a_pfnAImpl, a0, a1) \
10276 off = iemNativeEmitCallAvxAImpl2(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1))
10277
10278/** Emits code for IEM_MC_CALL_AVX_AIMPL_2. */
10279DECL_INLINE_THROW(uint32_t)
10280iemNativeEmitCallAvxAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
10281{
10282 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_AVX_AIMPL_HIDDEN_ARGS);
10283 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_AVX_AIMPL_HIDDEN_ARGS);
10284 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 2, idxInstr);
10285}
10286
10287
10288#define IEM_MC_CALL_AVX_AIMPL_3(a_pfnAImpl, a0, a1, a2) \
10289 off = iemNativeEmitCallAvxAImpl3(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1), (a2))
10290
10291/** Emits code for IEM_MC_CALL_AVX_AIMPL_3. */
10292DECL_INLINE_THROW(uint32_t)
10293iemNativeEmitCallAvxAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl,
10294 uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
10295{
10296 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_AVX_AIMPL_HIDDEN_ARGS);
10297 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_AVX_AIMPL_HIDDEN_ARGS);
10298 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_AVX_AIMPL_HIDDEN_ARGS);
10299 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 3, idxInstr);
10300}
10301
10302
10303#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
10304
10305
10306/*********************************************************************************************************************************
10307* Include instruction emitters. *
10308*********************************************************************************************************************************/
10309#include "target-x86/IEMAllN8veEmit-x86.h"
10310
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette