VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompFuncs.h@ 104627

Last change on this file since 104627 was 104468, checked in by vboxsync, 8 months ago

VMM/IEM: Deal with the simples direct 'linking' of TBs scenario for relative jumps, when staying with the same code page. bugref:10656

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 482.5 KB
Line 
1/* $Id: IEMAllN8veRecompFuncs.h 104468 2024-05-01 00:43:28Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler - Inlined Bits.
4 */
5
6/*
7 * Copyright (C) 2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
33#define IEM_WITH_OPAQUE_DECODER_STATE
34#define VMCPU_INCL_CPUM_GST_CTX
35#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
36#define IEMNATIVE_INCL_TABLE_FUNCTION_PROTOTYPES
37#include <VBox/vmm/iem.h>
38#include <VBox/vmm/cpum.h>
39#include <VBox/vmm/dbgf.h>
40#include "IEMInternal.h"
41#include <VBox/vmm/vmcc.h>
42#include <VBox/log.h>
43#include <VBox/err.h>
44#include <VBox/dis.h>
45#include <VBox/param.h>
46#include <iprt/assert.h>
47#include <iprt/heap.h>
48#include <iprt/mem.h>
49#include <iprt/string.h>
50#if defined(RT_ARCH_AMD64)
51# include <iprt/x86.h>
52#elif defined(RT_ARCH_ARM64)
53# include <iprt/armv8.h>
54#endif
55
56#include "IEMInline.h"
57#include "IEMThreadedFunctions.h"
58#include "IEMN8veRecompiler.h"
59#include "IEMN8veRecompilerEmit.h"
60#include "IEMN8veRecompilerTlbLookup.h"
61#include "IEMNativeFunctions.h"
62
63
64/*
65 * Narrow down configs here to avoid wasting time on unused configs here.
66 * Note! Same checks in IEMAllThrdRecompiler.cpp.
67 */
68
69#ifndef IEM_WITH_CODE_TLB
70# error The code TLB must be enabled for the recompiler.
71#endif
72
73#ifndef IEM_WITH_DATA_TLB
74# error The data TLB must be enabled for the recompiler.
75#endif
76
77#ifndef IEM_WITH_SETJMP
78# error The setjmp approach must be enabled for the recompiler.
79#endif
80
81
82
83/*********************************************************************************************************************************
84* Code emitters for flushing pending guest register writes and sanity checks *
85*********************************************************************************************************************************/
86
87#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
88# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
89DECL_INLINE_THROW(uint32_t) iemNativePcAdjustCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
90{
91 /* Compare the shadow with the context value, they should match. */
92 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, IEMNATIVE_REG_FIXED_PC_DBG);
93 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, pReNative->Core.offPc);
94 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, kIemNativeGstReg_Pc);
95 return off;
96}
97# endif
98#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
99
100/**
101 * Flushes delayed write of a specific guest register.
102 *
103 * This must be called prior to calling CImpl functions and any helpers that use
104 * the guest state (like raising exceptions) and such.
105 *
106 * This optimization has not yet been implemented. The first target would be
107 * RIP updates, since these are the most common ones.
108 */
109DECL_INLINE_THROW(uint32_t)
110iemNativeRegFlushPendingSpecificWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREGREF enmClass, uint8_t idxReg)
111{
112#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
113 /* If for whatever reason it is possible to reference the PC register at some point we need to do the writeback here first. */
114#endif
115
116#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
117#if 0 /** @todo r=aeichner EFLAGS writeback delay. */
118 if ( enmClass == kIemNativeGstRegRef_EFlags
119 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(kIemNativeGstReg_EFlags))
120 off = iemNativeRegFlushPendingWrite(pReNative, off, kIemNativeGstReg_EFlags);
121#else
122 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(kIemNativeGstReg_EFlags)));
123#endif
124
125 if ( enmClass == kIemNativeGstRegRef_Gpr
126 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxReg))
127 off = iemNativeRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTREG_GPR(idxReg));
128#endif
129
130#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
131 if ( enmClass == kIemNativeGstRegRef_XReg
132 && pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxReg))
133 {
134 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxReg));
135 /* Flush the shadows as the register needs to be reloaded (there is no guarantee right now, that the referenced register doesn't change). */
136 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxReg];
137
138 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off);
139 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxReg)));
140 }
141#endif
142 RT_NOREF(pReNative, enmClass, idxReg);
143 return off;
144}
145
146
147
148/*********************************************************************************************************************************
149* Emitters for IEM_MC_BEGIN_EX and IEM_MC_END. *
150*********************************************************************************************************************************/
151
152#undef IEM_MC_BEGIN /* unused */
153#define IEM_MC_BEGIN_EX(a_fMcFlags, a_fCImplFlags, a_cArgsIncludingHidden) \
154 { \
155 Assert(pReNative->Core.bmVars == 0); \
156 Assert(pReNative->Core.u64ArgVars == UINT64_MAX); \
157 Assert(pReNative->Core.bmStack == 0); \
158 pReNative->fMc = (a_fMcFlags); \
159 pReNative->fCImpl = (a_fCImplFlags); \
160 pReNative->cArgsX = (a_cArgsIncludingHidden)
161
162/** We have to get to the end in recompilation mode, as otherwise we won't
163 * generate code for all the IEM_MC_IF_XXX branches. */
164#define IEM_MC_END() \
165 iemNativeVarFreeAll(pReNative); \
166 } return off
167
168
169
170/*********************************************************************************************************************************
171* Native Emitter Support. *
172*********************************************************************************************************************************/
173
174#define IEM_MC_NATIVE_IF(a_fSupportedHosts) if (RT_ARCH_VAL & (a_fSupportedHosts)) {
175
176#define IEM_MC_NATIVE_ELSE() } else {
177
178#define IEM_MC_NATIVE_ENDIF() } ((void)0)
179
180
181#define IEM_MC_NATIVE_EMIT_0(a_fnEmitter) \
182 off = a_fnEmitter(pReNative, off)
183
184#define IEM_MC_NATIVE_EMIT_1(a_fnEmitter, a0) \
185 off = a_fnEmitter(pReNative, off, (a0))
186
187#define IEM_MC_NATIVE_EMIT_2(a_fnEmitter, a0, a1) \
188 off = a_fnEmitter(pReNative, off, (a0), (a1))
189
190#define IEM_MC_NATIVE_EMIT_3(a_fnEmitter, a0, a1, a2) \
191 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2))
192
193#define IEM_MC_NATIVE_EMIT_4(a_fnEmitter, a0, a1, a2, a3) \
194 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3))
195
196#define IEM_MC_NATIVE_EMIT_5(a_fnEmitter, a0, a1, a2, a3, a4) \
197 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4))
198
199#define IEM_MC_NATIVE_EMIT_6(a_fnEmitter, a0, a1, a2, a3, a4, a5) \
200 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5))
201
202#define IEM_MC_NATIVE_EMIT_7(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6) \
203 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6))
204
205#define IEM_MC_NATIVE_EMIT_8(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6, a7) \
206 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6), (a7))
207
208
209#ifndef RT_ARCH_AMD64
210# define IEM_MC_NATIVE_SET_AMD64_HOST_REG_FOR_LOCAL(a_VarNm, a_idxHostReg) ((void)0)
211#else
212/** @note This is a naive approach that ASSUMES that the register isn't
213 * allocated, so it only works safely for the first allocation(s) in
214 * a MC block. */
215# define IEM_MC_NATIVE_SET_AMD64_HOST_REG_FOR_LOCAL(a_VarNm, a_idxHostReg) \
216 off = iemNativeVarSetAmd64HostRegisterForLocal(pReNative, off, a_VarNm, a_idxHostReg)
217
218DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off);
219
220DECL_INLINE_THROW(uint32_t)
221iemNativeVarSetAmd64HostRegisterForLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t idxHstReg)
222{
223 Log12(("iemNativeVarSetAmd64HostRegisterForLocal: idxVar=%#x idxHstReg=%s (%#x) off=%#x\n", idxVar, g_apszIemNativeHstRegNames[idxHstReg], idxHstReg, off));
224 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
225 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg))); /* iemNativeVarRegisterSet does a throw/longjmp on this */
226
227# ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
228 /* Must flush the register if it hold pending writes. */
229 if ( (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
230 && (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows) )
231 off = iemNativeRegFlushDirtyGuest(pReNative, off, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
232# endif
233
234 iemNativeVarRegisterSet(pReNative, idxVar, idxHstReg, off);
235 return off;
236}
237
238#endif /* RT_ARCH_AMD64 */
239
240
241
242/*********************************************************************************************************************************
243* Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX) *
244*********************************************************************************************************************************/
245
246#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl) \
247 pReNative->fMc = 0; \
248 pReNative->fCImpl = (a_fFlags); \
249 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr) /** @todo not used ... */
250
251
252#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
253 pReNative->fMc = 0; \
254 pReNative->fCImpl = (a_fFlags); \
255 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
256
257DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off,
258 uint8_t idxInstr, uint64_t a_fGstShwFlush,
259 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
260{
261 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
262}
263
264
265#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
266 pReNative->fMc = 0; \
267 pReNative->fCImpl = (a_fFlags); \
268 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
269 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
270
271DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off,
272 uint8_t idxInstr, uint64_t a_fGstShwFlush,
273 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
274{
275 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
276}
277
278
279#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
280 pReNative->fMc = 0; \
281 pReNative->fCImpl = (a_fFlags); \
282 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
283 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
284
285DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off,
286 uint8_t idxInstr, uint64_t a_fGstShwFlush,
287 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
288 uint64_t uArg2)
289{
290 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
291}
292
293
294
295/*********************************************************************************************************************************
296* Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX) *
297*********************************************************************************************************************************/
298
299/** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
300 * and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
301DECL_INLINE_THROW(uint32_t)
302iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
303{
304 /*
305 * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
306 * return with special status code and make the execution loop deal with
307 * this. If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
308 * exception and won't continue execution. While CPUMCTX_DBG_DBGF_MASK
309 * could continue w/o interruption, it probably will drop into the
310 * debugger, so not worth the effort of trying to services it here and we
311 * just lump it in with the handling of the others.
312 *
313 * To simplify the code and the register state management even more (wrt
314 * immediate in AND operation), we always update the flags and skip the
315 * extra check associated conditional jump.
316 */
317 AssertCompile( (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
318 <= UINT32_MAX);
319#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
320 AssertMsg( pReNative->idxCurCall == 0
321 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], IEMLIVENESSBIT_IDX_EFL_OTHER)),
322 ("Efl_Other - %u\n", iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], IEMLIVENESSBIT_IDX_EFL_OTHER)));
323#endif
324
325 /*
326 * As this code can break out of the execution loop when jumping to the ReturnWithFlags label
327 * any pending register writes must be flushed.
328 */
329 off = iemNativeRegFlushPendingWrites(pReNative, off);
330
331 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
332 kIemNativeGstRegUse_ForUpdate, false /*fNoVolatileRegs*/,
333 true /*fSkipLivenessAssert*/);
334 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg,
335 X86_EFL_TF | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK,
336 iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnWithFlags));
337 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
338 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
339
340 /* Free but don't flush the EFLAGS register. */
341 iemNativeRegFreeTmp(pReNative, idxEflReg);
342
343 return off;
344}
345
346
347/** Helper for iemNativeEmitFinishInstructionWithStatus. */
348DECLINLINE(RTGCPHYS) iemNativeCallEntryToGCPhysPc(PCIEMTB pTb, PCIEMTHRDEDCALLENTRY pCallEntry)
349{
350 unsigned const offOpcodes = pCallEntry->offOpcode;
351 unsigned const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
352 for (unsigned idxRange = 0; idxRange < cRanges; idxRange++)
353 {
354 unsigned const offRange = offOpcodes - (unsigned)pTb->aRanges[idxRange].offOpcodes;
355 if (offRange < (unsigned)pTb->aRanges[idxRange].cbOpcodes)
356 return iemTbGetRangePhysPageAddr(pTb, idxRange) + offRange + pTb->aRanges[idxRange].offPhysPage;
357 }
358 AssertFailedReturn(NIL_RTGCPHYS);
359}
360
361
362/** The VINF_SUCCESS dummy. */
363template<int const a_rcNormal, bool const a_fIsJump>
364DECL_FORCE_INLINE_THROW(uint32_t)
365iemNativeEmitFinishInstructionWithStatus(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry,
366 int32_t const offJump)
367{
368 AssertCompile(a_rcNormal == VINF_SUCCESS || a_rcNormal == VINF_IEM_REEXEC_BREAK);
369 if (a_rcNormal != VINF_SUCCESS)
370 {
371#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
372 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
373#else
374 RT_NOREF_PV(pCallEntry);
375#endif
376
377 /* As this code returns from the TB any pending register writes must be flushed. */
378 off = iemNativeRegFlushPendingWrites(pReNative, off);
379
380 /*
381 * Use the lookup table for getting to the next TB quickly.
382 * Note! In this code path there can only be one entry at present.
383 */
384 uint8_t const idxTbLookupFirst = IEM_TB_LOOKUP_TAB_GET_IDX(pCallEntry->uTbLookup);
385 PCIEMTB const pTbOrg = pReNative->pTbOrg;
386 Assert(idxTbLookupFirst < pTbOrg->cTbLookupEntries);
387 Assert(IEM_TB_LOOKUP_TAB_GET_SIZE(pCallEntry->uTbLookup) == 1);
388
389#if 0
390 /* Update IEMCPU::ppTbLookupEntryR3 to get the best lookup effect. */
391 PIEMTB * const ppTbLookupFirst = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTbOrg, idxTbLookupFirst);
392 Assert(IEM_TB_LOOKUP_TAB_GET_SIZE(pCallEntry->uTbLookup) == 1); /* large stuff later/never */
393 off = iemNativeEmitStoreImmToVCpuU64(pReNative, off, (uintptr_t)ppTbLookupFirst,
394 RT_UOFFSETOF(VMCPU, iem.s.ppTbLookupEntryR3));
395
396 return iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_ReturnBreak);
397
398#else
399 /* Load the index as argument #1 for the helper call at the given label. */
400 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxTbLookupFirst);
401
402 /*
403 * Figure out the physical address of the current instruction and see
404 * whether the next instruction we're about to execute is in the same
405 * page so we by can optimistically skip TLB loading.
406 *
407 * - This is safe for all cases in FLAT mode.
408 * - In segmentmented modes it is complicated, given that a negative
409 * jump may underflow EIP and a forward jump may overflow or run into
410 * CS.LIM and triggering a #GP. The only thing we can get away with
411 * now at compile time is forward jumps w/o CS.LIM checks, since the
412 * lack of CS.LIM checks means we're good for the entire physical page
413 * we're executing on and another 15 bytes before we run into CS.LIM.
414 */
415 if ( IEM_F_MODE_X86_IS_FLAT(pReNative->fExec)
416 || !(pTbOrg->fFlags & IEMTB_F_CS_LIM_CHECKS) )
417 {
418 RTGCPHYS const GCPhysPcCurrent = iemNativeCallEntryToGCPhysPc(pTbOrg, pCallEntry);
419 RTGCPHYS const GCPhysPcNext = GCPhysPcCurrent + pCallEntry->cbOpcode + (int64_t)(a_fIsJump ? offJump : 0);
420 if ( (GCPhysPcNext >> GUEST_PAGE_SHIFT) == (GCPhysPcCurrent >> GUEST_PAGE_SHIFT)
421 && GUEST_PAGE_SIZE - (GCPhysPcCurrent & GUEST_PAGE_OFFSET_MASK) >= pCallEntry->cbOpcode /* 0xfff: je -56h */ )
422
423 {
424 /* Load the next GCPhysPc into the 3rd argument for the helper call. */
425 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, GCPhysPcNext);
426
427 /* Load the key lookup flags into the 2nd argument for the helper call.
428 - This is safe wrt CS limit checking since we're only here for FLAT modes.
429 - ASSUMING that this isn't a STI or POPF instruction, we can exclude any
430 interrupt shadow.
431 - The NMI inhibiting is more questionable, though... */
432 /** @todo We don't implement NMI blocking atm, except via VT-x/AMD-V.
433 * Should we copy it into fExec to simplify this? OTOH, it's just a
434 * couple of extra instructions if EFLAGS are already in a register. */
435 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG,
436 (pReNative->fExec & IEMTB_F_KEY_MASK) | IEMTB_F_TYPE_NATIVE);
437
438 if (pReNative->idxLastCheckIrqCallNo != UINT32_MAX)
439 return iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_ReturnBreakViaLookup);
440 return iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_ReturnBreakViaLookupWithIrq);
441 }
442 }
443 if (pReNative->idxLastCheckIrqCallNo != UINT32_MAX)
444 return iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_ReturnBreakViaLookupWithTlb);
445 return iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_ReturnBreakViaLookupWithTlbAndIrq);
446#endif
447 }
448 return off;
449}
450
451
452#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr, a_rcNormal) \
453 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
454 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
455
456#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr, a_rcNormal) \
457 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
458 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
459 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
460
461/** Same as iemRegAddToRip64AndFinishingNoFlags. */
462DECL_INLINE_THROW(uint32_t)
463iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
464{
465#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
466# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
467 if (!pReNative->Core.offPc)
468 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
469# endif
470
471 /* Allocate a temporary PC register. */
472 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
473
474 /* Perform the addition and store the result. */
475 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
476 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
477
478 /* Free but don't flush the PC register. */
479 iemNativeRegFreeTmp(pReNative, idxPcReg);
480#endif
481
482#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
483 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
484
485 pReNative->Core.offPc += cbInstr;
486# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
487 off = iemNativePcAdjustCheck(pReNative, off);
488# endif
489 if (pReNative->cCondDepth)
490 off = iemNativeEmitPcWriteback(pReNative, off);
491 else
492 pReNative->Core.cInstrPcUpdateSkipped++;
493#endif
494
495 return off;
496}
497
498
499#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr, a_rcNormal) \
500 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
501 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
502
503#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr, a_rcNormal) \
504 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
505 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
506 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
507
508/** Same as iemRegAddToEip32AndFinishingNoFlags. */
509DECL_INLINE_THROW(uint32_t)
510iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
511{
512#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
513# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
514 if (!pReNative->Core.offPc)
515 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
516# endif
517
518 /* Allocate a temporary PC register. */
519 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
520
521 /* Perform the addition and store the result. */
522 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
523 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
524
525 /* Free but don't flush the PC register. */
526 iemNativeRegFreeTmp(pReNative, idxPcReg);
527#endif
528
529#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
530 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
531
532 pReNative->Core.offPc += cbInstr;
533# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
534 off = iemNativePcAdjustCheck(pReNative, off);
535# endif
536 if (pReNative->cCondDepth)
537 off = iemNativeEmitPcWriteback(pReNative, off);
538 else
539 pReNative->Core.cInstrPcUpdateSkipped++;
540#endif
541
542 return off;
543}
544
545
546#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr, a_rcNormal) \
547 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
548 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
549
550#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr, a_rcNormal) \
551 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
552 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
553 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
554
555/** Same as iemRegAddToIp16AndFinishingNoFlags. */
556DECL_INLINE_THROW(uint32_t)
557iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
558{
559#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
560# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
561 if (!pReNative->Core.offPc)
562 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
563# endif
564
565 /* Allocate a temporary PC register. */
566 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
567
568 /* Perform the addition and store the result. */
569 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
570 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
571 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
572
573 /* Free but don't flush the PC register. */
574 iemNativeRegFreeTmp(pReNative, idxPcReg);
575#endif
576
577#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
578 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
579
580 pReNative->Core.offPc += cbInstr;
581# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
582 off = iemNativePcAdjustCheck(pReNative, off);
583# endif
584 if (pReNative->cCondDepth)
585 off = iemNativeEmitPcWriteback(pReNative, off);
586 else
587 pReNative->Core.cInstrPcUpdateSkipped++;
588#endif
589
590 return off;
591}
592
593
594
595/*********************************************************************************************************************************
596* Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX). *
597*********************************************************************************************************************************/
598
599#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
600 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
601 (a_enmEffOpSize), pCallEntry->idxInstr); \
602 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
603
604#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
605 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
606 (a_enmEffOpSize), pCallEntry->idxInstr); \
607 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
608 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
609
610#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr, a_rcNormal) \
611 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
612 IEMMODE_16BIT, pCallEntry->idxInstr); \
613 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
614
615#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
616 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
617 IEMMODE_16BIT, pCallEntry->idxInstr); \
618 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
619 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
620
621#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr, a_rcNormal) \
622 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
623 IEMMODE_64BIT, pCallEntry->idxInstr); \
624 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
625
626#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
627 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
628 IEMMODE_64BIT, pCallEntry->idxInstr); \
629 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
630 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
631
632/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
633 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
634 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
635DECL_INLINE_THROW(uint32_t)
636iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
637 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
638{
639 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
640
641 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
642 off = iemNativeRegFlushPendingWrites(pReNative, off);
643
644#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
645 Assert(pReNative->Core.offPc == 0);
646
647 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
648#endif
649
650 /* Allocate a temporary PC register. */
651 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
652
653 /* Perform the addition. */
654 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr);
655
656 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
657 {
658 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
659 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
660 }
661 else
662 {
663 /* Just truncate the result to 16-bit IP. */
664 Assert(enmEffOpSize == IEMMODE_16BIT);
665 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
666 }
667 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
668
669 /* Free but don't flush the PC register. */
670 iemNativeRegFreeTmp(pReNative, idxPcReg);
671
672 return off;
673}
674
675
676#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
677 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
678 (a_enmEffOpSize), pCallEntry->idxInstr); \
679 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
680
681#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
682 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
683 (a_enmEffOpSize), pCallEntry->idxInstr); \
684 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
685 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
686
687#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr, a_rcNormal) \
688 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
689 IEMMODE_16BIT, pCallEntry->idxInstr); \
690 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
691
692#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
693 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
694 IEMMODE_16BIT, pCallEntry->idxInstr); \
695 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
696 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
697
698#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr, a_rcNormal) \
699 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
700 IEMMODE_32BIT, pCallEntry->idxInstr); \
701 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
702
703#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
704 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
705 IEMMODE_32BIT, pCallEntry->idxInstr); \
706 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
707 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
708
709/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
710 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
711 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
712DECL_INLINE_THROW(uint32_t)
713iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
714 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
715{
716 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
717
718 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
719 off = iemNativeRegFlushPendingWrites(pReNative, off);
720
721#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
722 Assert(pReNative->Core.offPc == 0);
723
724 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
725#endif
726
727 /* Allocate a temporary PC register. */
728 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
729
730 /* Perform the addition. */
731 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
732
733 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
734 if (enmEffOpSize == IEMMODE_16BIT)
735 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
736
737 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
738/** @todo we can skip this in 32-bit FLAT mode. */
739 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
740
741 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
742
743 /* Free but don't flush the PC register. */
744 iemNativeRegFreeTmp(pReNative, idxPcReg);
745
746 return off;
747}
748
749
750#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr, a_rcNormal) \
751 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
752 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
753
754#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr, a_rcNormal) \
755 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
756 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
757 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
758
759#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr, a_rcNormal) \
760 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
761 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
762
763#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
764 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
765 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
766 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
767
768#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr, a_rcNormal) \
769 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
770 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, a_i32)
771
772#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
773 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
774 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
775 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, a_i32)
776
777/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
778DECL_INLINE_THROW(uint32_t)
779iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
780 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
781{
782 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
783 off = iemNativeRegFlushPendingWrites(pReNative, off);
784
785#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
786 Assert(pReNative->Core.offPc == 0);
787
788 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
789#endif
790
791 /* Allocate a temporary PC register. */
792 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
793
794 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
795 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
796 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
797 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
798 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
799
800 /* Free but don't flush the PC register. */
801 iemNativeRegFreeTmp(pReNative, idxPcReg);
802
803 return off;
804}
805
806
807
808/*********************************************************************************************************************************
809* Emitters for changing PC/RIP/EIP/IP with a indirect jump (IEM_MC_SET_RIP_UXX_AND_FINISH). *
810*********************************************************************************************************************************/
811
812/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets. */
813#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP) \
814 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
815
816/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets. */
817#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP) \
818 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
819
820/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code. */
821#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP) \
822 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
823
824/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets that checks and
825 * clears flags. */
826#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP) \
827 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP); \
828 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
829
830/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets that checks and
831 * clears flags. */
832#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP) \
833 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP); \
834 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
835
836/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code that checks and
837 * clears flags. */
838#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16NewIP) \
839 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP); \
840 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
841
842#undef IEM_MC_SET_RIP_U16_AND_FINISH
843
844
845/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets. */
846#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP) \
847 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
848
849/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code. */
850#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP) \
851 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
852
853/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets that checks and
854 * clears flags. */
855#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP) \
856 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP); \
857 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
858
859/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code that checks
860 * and clears flags. */
861#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u32NewEIP) \
862 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP); \
863 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
864
865#undef IEM_MC_SET_RIP_U32_AND_FINISH
866
867
868/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code. */
869#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP) \
870 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u64NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
871
872/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code that checks
873 * and clears flags. */
874#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewEIP) \
875 IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP); \
876 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
877
878#undef IEM_MC_SET_RIP_U64_AND_FINISH
879
880
881/** Same as iemRegRipJumpU16AndFinishNoFlags,
882 * iemRegRipJumpU32AndFinishNoFlags and iemRegRipJumpU64AndFinishNoFlags. */
883DECL_INLINE_THROW(uint32_t)
884iemNativeEmitRipJumpNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarPc, bool f64Bit,
885 uint8_t idxInstr, uint8_t cbVar)
886{
887 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
888 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
889
890 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
891 off = iemNativeRegFlushPendingWrites(pReNative, off);
892
893#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
894 Assert(pReNative->Core.offPc == 0);
895
896 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
897#endif
898
899 /* Get a register with the new PC loaded from idxVarPc.
900 Note! This ASSUMES that the high bits of the GPR is zeroed. */
901 uint8_t const idxPcReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxVarPc, kIemNativeGstReg_Pc, &off);
902
903 /* Check limit (may #GP(0) + exit TB). */
904 if (!f64Bit)
905/** @todo we can skip this test in FLAT 32-bit mode. */
906 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
907 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
908 else if (cbVar > sizeof(uint32_t))
909 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
910
911 /* Store the result. */
912 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
913
914 iemNativeVarRegisterRelease(pReNative, idxVarPc);
915 /** @todo implictly free the variable? */
916
917 return off;
918}
919
920
921
922/*********************************************************************************************************************************
923* Emitters for changing PC/RIP/EIP/IP with a relative call jump (IEM_MC_IND_CALL_UXX_AND_FINISH) (requires stack emmiters). *
924*********************************************************************************************************************************/
925
926/** @todo These helpers belong to the stack push API naturally but we already need them up here (we could of course move
927 * this below the stack emitters but then this is not close to the rest of the PC/RIP handling...). */
928DECL_FORCE_INLINE_THROW(uint32_t)
929iemNativeEmitStackPushUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
930{
931 /* Use16BitSp: */
932#ifdef RT_ARCH_AMD64
933 off = iemNativeEmitSubGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
934 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
935#else
936 /* sub regeff, regrsp, #cbMem */
937 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegEffSp, idxRegRsp, cbMem, false /*f64Bit*/);
938 /* and regeff, regeff, #0xffff */
939 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
940 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegEffSp, idxRegEffSp, 15, 0, false /*f64Bit*/);
941 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
942 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegEffSp, 0, 16, false /*f64Bit*/);
943#endif
944 return off;
945}
946
947
948DECL_FORCE_INLINE(uint32_t)
949iemNativeEmitStackPushUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
950{
951 /* Use32BitSp: */
952 off = iemNativeEmitSubGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
953 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
954 return off;
955}
956
957
958DECL_INLINE_THROW(uint32_t)
959iemNativeEmitStackPushRip(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const idxRegPc,
960 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
961{
962 /*
963 * Assert sanity.
964 */
965#ifdef VBOX_STRICT
966 if (RT_BYTE2(cBitsVarAndFlat) != 0)
967 {
968 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
969 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
970 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
971 Assert( pfnFunction
972 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
973 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
974 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
975 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
976 : UINT64_C(0xc000b000a0009000) ));
977 }
978 else
979 Assert( pfnFunction
980 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
981 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
982 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
983 : UINT64_C(0xc000b000a0009000) ));
984#endif
985
986#ifdef VBOX_STRICT
987 /*
988 * Check that the fExec flags we've got make sense.
989 */
990 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
991#endif
992
993 /*
994 * To keep things simple we have to commit any pending writes first as we
995 * may end up making calls.
996 */
997 /** @todo we could postpone this till we make the call and reload the
998 * registers after returning from the call. Not sure if that's sensible or
999 * not, though. */
1000 off = iemNativeRegFlushPendingWrites(pReNative, off);
1001
1002 /*
1003 * First we calculate the new RSP and the effective stack pointer value.
1004 * For 64-bit mode and flat 32-bit these two are the same.
1005 * (Code structure is very similar to that of PUSH)
1006 */
1007 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
1008 bool const fIsSegReg = RT_BYTE3(cBitsVarAndFlat) != 0;
1009 bool const fIsIntelSeg = fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
1010 uint8_t const cbMemAccess = !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
1011 ? cbMem : sizeof(uint16_t);
1012 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
1013 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
1014 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
1015 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
1016 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
1017 if (cBitsFlat != 0)
1018 {
1019 Assert(idxRegEffSp == idxRegRsp);
1020 Assert(cBitsFlat == 32 || cBitsFlat == 64);
1021 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
1022 if (cBitsFlat == 64)
1023 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
1024 else
1025 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
1026 }
1027 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
1028 {
1029 Assert(idxRegEffSp != idxRegRsp);
1030 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
1031 kIemNativeGstRegUse_ReadOnly);
1032#ifdef RT_ARCH_AMD64
1033 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1034#else
1035 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1036#endif
1037 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
1038 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
1039 offFixupJumpToUseOtherBitSp = off;
1040 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
1041 {
1042 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
1043 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1044 }
1045 else
1046 {
1047 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
1048 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1049 }
1050 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1051 }
1052 /* SpUpdateEnd: */
1053 uint32_t const offLabelSpUpdateEnd = off;
1054
1055 /*
1056 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
1057 * we're skipping lookup).
1058 */
1059 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
1060 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
1061 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
1062 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
1063 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
1064 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
1065 : UINT32_MAX;
1066 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
1067
1068
1069 if (!TlbState.fSkip)
1070 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
1071 else
1072 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
1073
1074 /*
1075 * Use16BitSp:
1076 */
1077 if (cBitsFlat == 0)
1078 {
1079#ifdef RT_ARCH_AMD64
1080 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1081#else
1082 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1083#endif
1084 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
1085 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
1086 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1087 else
1088 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1089 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
1090 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1091 }
1092
1093 /*
1094 * TlbMiss:
1095 *
1096 * Call helper to do the pushing.
1097 */
1098 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
1099
1100#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1101 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1102#else
1103 RT_NOREF(idxInstr);
1104#endif
1105
1106 /* Save variables in volatile registers. */
1107 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
1108 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
1109 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
1110 | (RT_BIT_32(idxRegPc));
1111 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
1112
1113 if ( idxRegPc == IEMNATIVE_CALL_ARG1_GREG
1114 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
1115 {
1116 /* Swap them using ARG0 as temp register: */
1117 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
1118 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
1119 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
1120 }
1121 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
1122 {
1123 /* IEMNATIVE_CALL_ARG2_GREG = idxRegPc (first!) */
1124 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxRegPc);
1125
1126 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
1127 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
1128 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
1129 }
1130 else
1131 {
1132 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
1133 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
1134
1135 /* IEMNATIVE_CALL_ARG2_GREG = idxRegPc */
1136 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxRegPc);
1137 }
1138
1139 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
1140 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
1141
1142 /* Done setting up parameters, make the call. */
1143 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
1144
1145 /* Restore variables and guest shadow registers to volatile registers. */
1146 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
1147 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
1148
1149#ifdef IEMNATIVE_WITH_TLB_LOOKUP
1150 if (!TlbState.fSkip)
1151 {
1152 /* end of TlbMiss - Jump to the done label. */
1153 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
1154 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
1155
1156 /*
1157 * TlbLookup:
1158 */
1159 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1,
1160 IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
1161
1162 /*
1163 * Emit code to do the actual storing / fetching.
1164 */
1165 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
1166# ifdef VBOX_WITH_STATISTICS
1167 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
1168 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
1169# endif
1170 switch (cbMemAccess)
1171 {
1172 case 2:
1173 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1174 break;
1175 case 4:
1176 if (!fIsIntelSeg)
1177 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1178 else
1179 {
1180 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
1181 PUSH FS in real mode, so we have to try emulate that here.
1182 We borrow the now unused idxReg1 from the TLB lookup code here. */
1183 uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
1184 kIemNativeGstReg_EFlags);
1185 if (idxRegEfl != UINT8_MAX)
1186 {
1187#ifdef ARCH_AMD64
1188 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
1189 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
1190 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
1191#else
1192 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
1193 off, TlbState.idxReg1, idxRegEfl,
1194 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
1195#endif
1196 iemNativeRegFreeTmp(pReNative, idxRegEfl);
1197 }
1198 else
1199 {
1200 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
1201 RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
1202 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
1203 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
1204 }
1205 /* ASSUMES the upper half of idxRegPc is ZERO. */
1206 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegPc);
1207 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
1208 }
1209 break;
1210 case 8:
1211 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1212 break;
1213 default:
1214 AssertFailed();
1215 }
1216
1217 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
1218 TlbState.freeRegsAndReleaseVars(pReNative);
1219
1220 /*
1221 * TlbDone:
1222 *
1223 * Commit the new RSP value.
1224 */
1225 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
1226 }
1227#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
1228
1229#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
1230 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
1231#endif
1232 iemNativeRegFreeTmp(pReNative, idxRegRsp);
1233 if (idxRegEffSp != idxRegRsp)
1234 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
1235
1236 return off;
1237}
1238
1239
1240/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for pre-386 targets. */
1241#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16(a_u16NewIP, a_cbInstr) \
1242 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1243
1244/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for pre-386 targets that checks and
1245 * clears flags. */
1246#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP, a_cbInstr) \
1247 IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16(a_u16NewIP, a_cbInstr); \
1248 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1249
1250/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for 386+ targets. */
1251#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32(a_u16NewIP, a_cbInstr) \
1252 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1253
1254/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for 386+ targets that checks and
1255 * clears flags. */
1256#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP, a_cbInstr) \
1257 IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32(a_u16NewIP, a_cbInstr); \
1258 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1259
1260#undef IEM_MC_IND_CALL_U16_AND_FINISH
1261
1262
1263/** Variant of IEM_MC_IND_CALL_U32_AND_FINISH for 386+ targets. */
1264#define IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP, a_cbInstr) \
1265 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
1266
1267/** Variant of IEM_MC_IND_CALL_U32_AND_FINISH for 386+ targets that checks and
1268 * clears flags. */
1269#define IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP, a_cbInstr) \
1270 IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP, a_cbInstr); \
1271 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1272
1273#undef IEM_MC_IND_CALL_U32_AND_FINISH
1274
1275
1276/** Variant of IEM_MC_IND_CALL_U64_AND_FINISH with instruction length as
1277 * an extra parameter, for use in 64-bit code. */
1278#define IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64(a_u64NewIP, a_cbInstr) \
1279 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u64NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
1280
1281
1282/** Variant of IEM_MC_IND_CALL_U64_AND_FINISH with instruction length as
1283 * an extra parameter, for use in 64-bit code and we need to check and clear
1284 * flags. */
1285#define IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewIP, a_cbInstr) \
1286 IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64(a_u64NewIP, a_cbInstr); \
1287 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1288
1289#undef IEM_MC_IND_CALL_U64_AND_FINISH
1290
1291/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
1292 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
1293DECL_INLINE_THROW(uint32_t)
1294iemNativeEmitRipIndirectCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxVarPc, bool f64Bit,
1295 uint8_t idxInstr, uint8_t cbVar)
1296{
1297 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
1298 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
1299
1300 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1301 off = iemNativeRegFlushPendingWrites(pReNative, off);
1302
1303#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1304 Assert(pReNative->Core.offPc == 0);
1305
1306 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1307#endif
1308
1309 /* Get a register with the new PC loaded from idxVarPc.
1310 Note! This ASSUMES that the high bits of the GPR is zeroed. */
1311 uint8_t const idxNewPcReg = iemNativeVarRegisterAcquire(pReNative, idxVarPc, &off);
1312
1313 /* Check limit (may #GP(0) + exit TB). */
1314 if (!f64Bit)
1315/** @todo we can skip this test in FLAT 32-bit mode. */
1316 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxNewPcReg, idxInstr);
1317 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
1318 else if (cbVar > sizeof(uint32_t))
1319 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxNewPcReg, idxInstr);
1320
1321#if 1
1322 /* Allocate a temporary PC register, we don't want it shadowed. */
1323 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1324 kIemNativeGstRegUse_Calculation, true /*fNoVolatileRegs*/);
1325#else
1326 /* Allocate a temporary PC register. */
1327 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate,
1328 true /*fNoVolatileRegs*/);
1329#endif
1330
1331 /* Perform the addition and push the variable to the guest stack. */
1332 /** @todo Flat variants for PC32 variants. */
1333 switch (cbVar)
1334 {
1335 case sizeof(uint16_t):
1336 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
1337 /* Truncate the result to 16-bit IP. */
1338 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
1339 off = iemNativeEmitStackPushRip(pReNative, off, idxPcReg, RT_MAKE_U32_FROM_U8(16, 0, 0, 0),
1340 (uintptr_t)iemNativeHlpStackStoreU16, idxInstr);
1341 break;
1342 case sizeof(uint32_t):
1343 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
1344 /** @todo In FLAT mode we can use the flat variant. */
1345 off = iemNativeEmitStackPushRip(pReNative, off, idxPcReg, RT_MAKE_U32_FROM_U8(32, 0, 0, 0),
1346 (uintptr_t)iemNativeHlpStackStoreU32, idxInstr);
1347 break;
1348 case sizeof(uint64_t):
1349 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
1350 off = iemNativeEmitStackPushRip(pReNative, off, idxPcReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0),
1351 (uintptr_t)iemNativeHlpStackFlatStoreU64, idxInstr);
1352 break;
1353 default:
1354 AssertFailed();
1355 }
1356
1357 /* RSP got changed, so do this again. */
1358 off = iemNativeRegFlushPendingWrites(pReNative, off);
1359
1360 /* Store the result. */
1361 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxNewPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1362
1363#if 1
1364 /* Need to transfer the shadow information to the new RIP register. */
1365 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxNewPcReg, kIemNativeGstReg_Pc, off);
1366#else
1367 /* Sync the new PC. */
1368 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxPcReg, idxNewPcReg);
1369#endif
1370 iemNativeVarRegisterRelease(pReNative, idxVarPc);
1371 iemNativeRegFreeTmp(pReNative, idxPcReg);
1372 /** @todo implictly free the variable? */
1373
1374 return off;
1375}
1376
1377
1378/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1379 * an extra parameter, for use in 16-bit code on a pre-386 CPU. */
1380#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr) \
1381 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
1382
1383/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1384 * an extra parameter, for use in 16-bit code on a pre-386 CPU and we need to check and clear
1385 * flags. */
1386#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr) \
1387 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr); \
1388 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1389
1390/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1391 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
1392#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr) \
1393 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
1394
1395/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1396 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
1397 * flags. */
1398#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr) \
1399 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr); \
1400 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1401
1402/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1403 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
1404#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr) \
1405 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
1406
1407/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1408 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
1409 * flags. */
1410#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr) \
1411 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr); \
1412 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1413
1414#undef IEM_MC_REL_CALL_S16_AND_FINISH
1415
1416/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
1417 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
1418DECL_INLINE_THROW(uint32_t)
1419iemNativeEmitRipRelativeCallS16NoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int16_t offDisp,
1420 uint8_t idxInstr)
1421{
1422 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1423 off = iemNativeRegFlushPendingWrites(pReNative, off);
1424
1425#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1426 Assert(pReNative->Core.offPc == 0);
1427
1428 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1429#endif
1430
1431 /* Allocate a temporary PC register. */
1432 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1433 kIemNativeGstRegUse_Calculation, true /*fNoVolatileRegs*/);
1434 uint8_t const idxPcRegNew = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
1435
1436 /* Calculate the new RIP. */
1437 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcRegOld, cbInstr);
1438 /* Truncate the result to 16-bit IP. */
1439 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcRegOld);
1440 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxPcRegNew, idxPcRegOld);
1441 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcRegNew, offDisp);
1442
1443 /* Truncate the result to 16-bit IP. */
1444 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcRegNew);
1445
1446 /* Check limit (may #GP(0) + exit TB). */
1447 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
1448
1449 /* Perform the addition and push the variable to the guest stack. */
1450 off = iemNativeEmitStackPushRip(pReNative, off, idxPcRegOld, RT_MAKE_U32_FROM_U8(16, 0, 0, 0),
1451 (uintptr_t)iemNativeHlpStackStoreU16, idxInstr);
1452
1453 /* RSP got changed, so flush again. */
1454 off = iemNativeRegFlushPendingWrites(pReNative, off);
1455
1456 /* Store the result. */
1457 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1458
1459 /* Need to transfer the shadow information to the new RIP register. */
1460 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
1461 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
1462 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
1463
1464 return off;
1465}
1466
1467
1468/** Variant of IEM_MC_REL_CALL_S32_AND_FINISH with instruction length as
1469 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
1470#define IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr) \
1471 off = iemNativeEmitEip32RelativeCallNoFlags(pReNative, off, a_cbInstr, (a_i32), pCallEntry->idxInstr)
1472
1473/** Variant of IEM_MC_REL_CALL_S32_AND_FINISH with instruction length as
1474 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
1475 * flags. */
1476#define IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr) \
1477 IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr); \
1478 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1479
1480#undef IEM_MC_REL_CALL_S32_AND_FINISH
1481
1482/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
1483 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
1484DECL_INLINE_THROW(uint32_t)
1485iemNativeEmitEip32RelativeCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int32_t offDisp,
1486 uint8_t idxInstr)
1487{
1488 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1489 off = iemNativeRegFlushPendingWrites(pReNative, off);
1490
1491#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1492 Assert(pReNative->Core.offPc == 0);
1493
1494 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1495#endif
1496
1497 /* Allocate a temporary PC register. */
1498 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1499 kIemNativeGstRegUse_ReadOnly, true /*fNoVolatileRegs*/);
1500 uint8_t const idxPcRegNew = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
1501
1502 /* Update the EIP to get the return address. */
1503 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcRegOld, cbInstr);
1504
1505 /* Load address, add the displacement and check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
1506 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxPcRegNew, idxPcRegOld);
1507 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcRegNew, offDisp);
1508 /** @todo we can skip this test in FLAT 32-bit mode. */
1509 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
1510
1511 /* Perform Perform the return address to the guest stack. */
1512 /** @todo Can avoid the stack limit checks in FLAT 32-bit mode. */
1513 off = iemNativeEmitStackPushRip(pReNative, off, idxPcRegOld, RT_MAKE_U32_FROM_U8(32, 0, 0, 0),
1514 (uintptr_t)iemNativeHlpStackStoreU32, idxInstr);
1515
1516 /* RSP got changed, so do this again. */
1517 off = iemNativeRegFlushPendingWrites(pReNative, off);
1518
1519 /* Store the result. */
1520 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1521
1522 /* Need to transfer the shadow information to the new RIP register. */
1523 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
1524 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
1525 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
1526
1527 return off;
1528}
1529
1530
1531/** Variant of IEM_MC_REL_CALL_S64_AND_FINISH with instruction length as
1532 * an extra parameter, for use in 64-bit code. */
1533#define IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64(a_i64, a_cbInstr) \
1534 off = iemNativeEmitRip64RelativeCallNoFlags(pReNative, off, a_cbInstr, (a_i64), pCallEntry->idxInstr)
1535
1536/** Variant of IEM_MC_REL_CALL_S64_AND_FINISH with instruction length as
1537 * an extra parameter, for use in 64-bit code and we need to check and clear
1538 * flags. */
1539#define IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i64, a_cbInstr) \
1540 IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64(a_i64, a_cbInstr); \
1541 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1542
1543#undef IEM_MC_REL_CALL_S64_AND_FINISH
1544
1545/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
1546 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
1547DECL_INLINE_THROW(uint32_t)
1548iemNativeEmitRip64RelativeCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int64_t offDisp,
1549 uint8_t idxInstr)
1550{
1551 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1552 off = iemNativeRegFlushPendingWrites(pReNative, off);
1553
1554#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1555 Assert(pReNative->Core.offPc == 0);
1556
1557 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1558#endif
1559
1560 /* Allocate a temporary PC register. */
1561 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1562 kIemNativeGstRegUse_ReadOnly, true /*fNoVolatileRegs*/);
1563 uint8_t const idxPcRegNew = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
1564
1565 /* Update the RIP to get the return address. */
1566 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcRegOld, cbInstr);
1567
1568 /* Load address, add the displacement and check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
1569 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxPcRegNew, idxPcRegOld);
1570 off = iemNativeEmitAddGprImm(pReNative, off, idxPcRegNew, offDisp);
1571 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
1572
1573 /* Perform Perform the return address to the guest stack. */
1574 off = iemNativeEmitStackPushRip(pReNative, off, idxPcRegOld, RT_MAKE_U32_FROM_U8(64, 64, 0, 0),
1575 (uintptr_t)iemNativeHlpStackFlatStoreU64, idxInstr);
1576
1577 /* RSP got changed, so do this again. */
1578 off = iemNativeRegFlushPendingWrites(pReNative, off);
1579
1580 /* Store the result. */
1581 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1582
1583 /* Need to transfer the shadow information to the new RIP register. */
1584 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
1585 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
1586 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
1587
1588 return off;
1589}
1590
1591
1592/*********************************************************************************************************************************
1593* Emitters for changing PC/RIP/EIP/IP with a RETN (Iw) instruction (IEM_MC_RETN_AND_FINISH) (requires stack emmiters). *
1594*********************************************************************************************************************************/
1595
1596DECL_FORCE_INLINE_THROW(uint32_t)
1597iemNativeEmitStackPopForRetnUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
1598 uint16_t cbPopAdd, uint8_t idxRegTmp)
1599{
1600 /* Use16BitSp: */
1601#ifdef RT_ARCH_AMD64
1602 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
1603 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
1604 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbPopAdd); /* ASSUMES this does NOT modify bits [63:16]! */
1605 RT_NOREF(idxRegTmp);
1606#elif defined(RT_ARCH_ARM64)
1607 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
1608 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
1609 /* add tmp, regrsp, #cbMem */
1610 uint16_t const cbCombined = cbMem + cbPopAdd;
1611 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbCombined & (RT_BIT_32(12) - 1U), false /*f64Bit*/);
1612 if (cbCombined >= RT_BIT_32(12))
1613 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegTmp, cbCombined >> 12,
1614 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift12*/);
1615 /* and tmp, tmp, #0xffff */
1616 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
1617 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
1618 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
1619 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
1620#else
1621# error "Port me"
1622#endif
1623 return off;
1624}
1625
1626
1627DECL_FORCE_INLINE_THROW(uint32_t)
1628iemNativeEmitStackPopForRetnUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
1629 uint16_t cbPopAdd)
1630{
1631 /* Use32BitSp: */
1632 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
1633 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem + cbPopAdd);
1634 return off;
1635}
1636
1637
1638/** Variant of IEM_MC_RETN_AND_FINISH for pre-386 targets. */
1639#define IEM_MC_RETN_AND_FINISH_THREADED_PC16(a_u16Pop, a_cbInstr) \
1640 off = iemNativeEmitRetn(pReNative, off, (a_cbInstr), (a_u16Pop), false /*f64Bit*/, IEMMODE_16BIT, pCallEntry->idxInstr)
1641
1642/** Variant of IEM_MC_RETN_AND_FINISH for 386+ targets. */
1643#define IEM_MC_RETN_AND_FINISH_THREADED_PC32(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
1644 off = iemNativeEmitRetn(pReNative, off, (a_cbInstr), (a_u16Pop), false /*f64Bit*/, (a_enmEffOpSize), pCallEntry->idxInstr)
1645
1646/** Variant of IEM_MC_RETN_AND_FINISH for use in 64-bit code. */
1647#define IEM_MC_RETN_AND_FINISH_THREADED_PC64(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
1648 off = iemNativeEmitRetn(pReNative, off, (a_cbInstr), (a_u16Pop), true /*f64Bit*/, (a_enmEffOpSize), pCallEntry->idxInstr)
1649
1650/** Variant of IEM_MC_RETN_AND_FINISH for pre-386 targets that checks and
1651 * clears flags. */
1652#define IEM_MC_RETN_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16Pop, a_cbInstr) \
1653 IEM_MC_RETN_AND_FINISH_THREADED_PC16(a_u16Pop, a_cbInstr); \
1654 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1655
1656/** Variant of IEM_MC_RETN_AND_FINISH for 386+ targets that checks and
1657 * clears flags. */
1658#define IEM_MC_RETN_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
1659 IEM_MC_RETN_AND_FINISH_THREADED_PC32(a_u16Pop, a_cbInstr, a_enmEffOpSize); \
1660 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1661
1662/** Variant of IEM_MC_RETN_AND_FINISH for use in 64-bit code that checks and
1663 * clears flags. */
1664#define IEM_MC_RETN_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
1665 IEM_MC_RETN_AND_FINISH_THREADED_PC64(a_u16Pop, a_cbInstr, a_enmEffOpSize); \
1666 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1667
1668/** IEM_MC[|_FLAT32|_FLAT64]_RETN_AND_FINISH */
1669DECL_INLINE_THROW(uint32_t)
1670iemNativeEmitRetn(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint16_t cbPop, bool f64Bit,
1671 IEMMODE enmEffOpSize, uint8_t idxInstr)
1672{
1673 RT_NOREF(cbInstr);
1674
1675#ifdef VBOX_STRICT
1676 /*
1677 * Check that the fExec flags we've got make sense.
1678 */
1679 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
1680#endif
1681
1682 /*
1683 * To keep things simple we have to commit any pending writes first as we
1684 * may end up making calls.
1685 */
1686 off = iemNativeRegFlushPendingWrites(pReNative, off);
1687
1688 /*
1689 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
1690 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
1691 * directly as the effective stack pointer.
1692 * (Code structure is very similar to that of PUSH)
1693 *
1694 * Note! As a simplification, we treat opsize overridden returns (o16 ret)
1695 * in FLAT 32-bit mode as if we weren't in FLAT mode since these
1696 * aren't commonly used (or useful) and thus not in need of optimizing.
1697 *
1698 * Note! For non flat modes the guest RSP is not allocated for update but rather for calculation
1699 * as the shadowed register would remain modified even if the return address throws a \#GP(0)
1700 * due to being outside the CS limit causing a wrong stack pointer value in the guest (see
1701 * the near return testcase in bs3-cpu-basic-2). If no exception is thrown the shadowing is transfered
1702 * to the new register returned by iemNativeRegAllocTmpForGuestReg() at the end.
1703 */
1704 uint8_t const cbMem = enmEffOpSize == IEMMODE_64BIT
1705 ? sizeof(uint64_t)
1706 : enmEffOpSize == IEMMODE_32BIT
1707 ? sizeof(uint32_t)
1708 : sizeof(uint16_t);
1709 bool const fFlat = IEM_F_MODE_X86_IS_FLAT(pReNative->fExec) && enmEffOpSize != IEMMODE_16BIT; /* see note */
1710 uintptr_t const pfnFunction = fFlat
1711 ? enmEffOpSize == IEMMODE_64BIT
1712 ? (uintptr_t)iemNativeHlpStackFlatFetchU64
1713 : (uintptr_t)iemNativeHlpStackFlatFetchU32
1714 : enmEffOpSize == IEMMODE_32BIT
1715 ? (uintptr_t)iemNativeHlpStackFetchU32
1716 : (uintptr_t)iemNativeHlpStackFetchU16;
1717 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
1718 fFlat ? kIemNativeGstRegUse_ForUpdate : kIemNativeGstRegUse_Calculation,
1719 true /*fNoVolatileRegs*/);
1720 uint8_t const idxRegEffSp = fFlat ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
1721 /** @todo can do a better job picking the register here. For cbMem >= 4 this
1722 * will be the resulting register value. */
1723 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
1724
1725 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
1726 if (fFlat)
1727 Assert(idxRegEffSp == idxRegRsp);
1728 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
1729 {
1730 Assert(idxRegEffSp != idxRegRsp);
1731 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
1732 kIemNativeGstRegUse_ReadOnly);
1733#ifdef RT_ARCH_AMD64
1734 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1735#else
1736 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
1737#endif
1738 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
1739 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
1740 offFixupJumpToUseOtherBitSp = off;
1741 if (enmEffOpSize == IEMMODE_32BIT)
1742 {
1743 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
1744 off = iemNativeEmitStackPopForRetnUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop);
1745 }
1746 else
1747 {
1748 Assert(enmEffOpSize == IEMMODE_16BIT);
1749 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
1750 off = iemNativeEmitStackPopForRetnUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop,
1751 idxRegMemResult);
1752 }
1753 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1754 }
1755 /* SpUpdateEnd: */
1756 uint32_t const offLabelSpUpdateEnd = off;
1757
1758 /*
1759 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
1760 * we're skipping lookup).
1761 */
1762 uint8_t const iSegReg = fFlat ? UINT8_MAX : X86_SREG_SS;
1763 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
1764 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
1765 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
1766 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
1767 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
1768 : UINT32_MAX;
1769
1770 if (!TlbState.fSkip)
1771 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
1772 else
1773 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
1774
1775 /*
1776 * Use16BitSp:
1777 */
1778 if (!fFlat)
1779 {
1780#ifdef RT_ARCH_AMD64
1781 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1782#else
1783 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
1784#endif
1785 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
1786 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
1787 off = iemNativeEmitStackPopForRetnUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop,
1788 idxRegMemResult);
1789 else
1790 off = iemNativeEmitStackPopForRetnUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop);
1791 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
1792 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1793 }
1794
1795 /*
1796 * TlbMiss:
1797 *
1798 * Call helper to do the pushing.
1799 */
1800 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
1801
1802#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1803 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1804#else
1805 RT_NOREF(idxInstr);
1806#endif
1807
1808 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
1809 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
1810 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
1811 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
1812
1813
1814 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
1815 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
1816 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
1817
1818 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
1819 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
1820
1821 /* Done setting up parameters, make the call. */
1822 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
1823
1824 /* Move the return register content to idxRegMemResult. */
1825 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
1826 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
1827
1828 /* Restore variables and guest shadow registers to volatile registers. */
1829 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
1830 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
1831
1832#ifdef IEMNATIVE_WITH_TLB_LOOKUP
1833 if (!TlbState.fSkip)
1834 {
1835 /* end of TlbMiss - Jump to the done label. */
1836 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
1837 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
1838
1839 /*
1840 * TlbLookup:
1841 */
1842 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
1843 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
1844
1845 /*
1846 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
1847 */
1848 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1849# ifdef VBOX_WITH_STATISTICS
1850 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
1851 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
1852# endif
1853 switch (cbMem)
1854 {
1855 case 2:
1856 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
1857 break;
1858 case 4:
1859 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
1860 break;
1861 case 8:
1862 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
1863 break;
1864 default:
1865 AssertFailed();
1866 }
1867
1868 TlbState.freeRegsAndReleaseVars(pReNative);
1869
1870 /*
1871 * TlbDone:
1872 *
1873 * Set the new RSP value (FLAT accesses needs to calculate it first) and
1874 * commit the popped register value.
1875 */
1876 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
1877 }
1878#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
1879
1880 /* Check limit before committing RIP and RSP (may #GP(0) + exit TB). */
1881 if (!f64Bit)
1882/** @todo we can skip this test in FLAT 32-bit mode. */
1883 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxRegMemResult, idxInstr);
1884 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
1885 else if (enmEffOpSize == IEMMODE_64BIT)
1886 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxRegMemResult, idxInstr);
1887
1888 /* Complete RSP calculation for FLAT mode. */
1889 if (idxRegEffSp == idxRegRsp)
1890 {
1891 if (enmEffOpSize == IEMMODE_64BIT)
1892 off = iemNativeEmitAddGprImm(pReNative, off, idxRegRsp, sizeof(uint64_t) + cbPop);
1893 else
1894 {
1895 Assert(enmEffOpSize == IEMMODE_32BIT);
1896 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxRegRsp, sizeof(uint32_t) + cbPop);
1897 }
1898 }
1899
1900 /* Commit the result and clear any current guest shadows for RIP. */
1901 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
1902 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1903 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, kIemNativeGstReg_Pc, off);
1904
1905 /* Need to transfer the shadowing information to the host register containing the updated value now. */
1906 if (!fFlat)
1907 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegRsp, IEMNATIVEGSTREG_GPR(X86_GREG_xSP), off);
1908
1909 iemNativeRegFreeTmp(pReNative, idxRegRsp);
1910 if (idxRegEffSp != idxRegRsp)
1911 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
1912 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
1913 return off;
1914}
1915
1916
1917/*********************************************************************************************************************************
1918* Emitters for raising exceptions (IEM_MC_MAYBE_RAISE_XXX) *
1919*********************************************************************************************************************************/
1920
1921#define IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE() \
1922 off = iemNativeEmitMaybeRaiseDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
1923
1924/**
1925 * Emits code to check if a \#NM exception should be raised.
1926 *
1927 * @returns New code buffer offset, UINT32_MAX on failure.
1928 * @param pReNative The native recompile state.
1929 * @param off The code buffer offset.
1930 * @param idxInstr The current instruction.
1931 */
1932DECL_INLINE_THROW(uint32_t)
1933iemNativeEmitMaybeRaiseDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
1934{
1935#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1936 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeDeviceNotAvailXcptCheckPotential);
1937
1938 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE))
1939 {
1940#endif
1941 /*
1942 * Make sure we don't have any outstanding guest register writes as we may
1943 * raise an #NM and all guest register must be up to date in CPUMCTX.
1944 */
1945 /** @todo r=aeichner Can we postpone this to the RaiseNm path? */
1946 off = iemNativeRegFlushPendingWrites(pReNative, off);
1947
1948#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1949 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1950#else
1951 RT_NOREF(idxInstr);
1952#endif
1953
1954 /* Allocate a temporary CR0 register. */
1955 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0, kIemNativeGstRegUse_ReadOnly);
1956 uint8_t const idxLabelRaiseNm = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseNm);
1957
1958 /*
1959 * if (cr0 & (X86_CR0_EM | X86_CR0_TS) != 0)
1960 * return raisexcpt();
1961 */
1962 /* Test and jump. */
1963 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxCr0Reg, X86_CR0_EM | X86_CR0_TS, idxLabelRaiseNm);
1964
1965 /* Free but don't flush the CR0 register. */
1966 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
1967
1968#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1969 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE;
1970 }
1971 else
1972 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeDeviceNotAvailXcptCheckOmitted);
1973#endif
1974
1975 return off;
1976}
1977
1978
1979#define IEM_MC_MAYBE_RAISE_WAIT_DEVICE_NOT_AVAILABLE() \
1980 off = iemNativeEmitMaybeRaiseWaitDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
1981
1982/**
1983 * Emits code to check if a \#NM exception should be raised.
1984 *
1985 * @returns New code buffer offset, UINT32_MAX on failure.
1986 * @param pReNative The native recompile state.
1987 * @param off The code buffer offset.
1988 * @param idxInstr The current instruction.
1989 */
1990DECL_INLINE_THROW(uint32_t)
1991iemNativeEmitMaybeRaiseWaitDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
1992{
1993#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1994 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeWaitDeviceNotAvailXcptCheckPotential);
1995
1996 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_WAIT_DEVICE_NOT_AVAILABLE))
1997 {
1998#endif
1999 /*
2000 * Make sure we don't have any outstanding guest register writes as we may
2001 * raise an #NM and all guest register must be up to date in CPUMCTX.
2002 */
2003 /** @todo r=aeichner Can we postpone this to the RaiseNm path? */
2004 off = iemNativeRegFlushPendingWrites(pReNative, off);
2005
2006#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2007 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2008#else
2009 RT_NOREF(idxInstr);
2010#endif
2011
2012 /* Allocate a temporary CR0 register. */
2013 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0, kIemNativeGstRegUse_Calculation);
2014
2015 /*
2016 * if (cr0 & (X86_CR0_MP | X86_CR0_TS) == (X86_CR0_MP | X86_CR0_TS))
2017 * return raisexcpt();
2018 */
2019 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxCr0Reg, X86_CR0_MP | X86_CR0_TS);
2020 /* Test and jump. */
2021 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToNewLabel(pReNative, off, idxCr0Reg, X86_CR0_MP | X86_CR0_TS, kIemNativeLabelType_RaiseNm);
2022
2023 /* Free the CR0 register. */
2024 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2025
2026#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2027 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_WAIT_DEVICE_NOT_AVAILABLE;
2028 }
2029 else
2030 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeWaitDeviceNotAvailXcptCheckOmitted);
2031#endif
2032
2033 return off;
2034}
2035
2036
2037#define IEM_MC_MAYBE_RAISE_FPU_XCPT() \
2038 off = iemNativeEmitMaybeRaiseFpuException(pReNative, off, pCallEntry->idxInstr)
2039
2040/**
2041 * Emits code to check if a \#MF exception should be raised.
2042 *
2043 * @returns New code buffer offset, UINT32_MAX on failure.
2044 * @param pReNative The native recompile state.
2045 * @param off The code buffer offset.
2046 * @param idxInstr The current instruction.
2047 */
2048DECL_INLINE_THROW(uint32_t)
2049iemNativeEmitMaybeRaiseFpuException(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2050{
2051 /*
2052 * Make sure we don't have any outstanding guest register writes as we may
2053 * raise an #MF and all guest register must be up to date in CPUMCTX.
2054 */
2055 /** @todo r=aeichner Can we postpone this to the RaiseMf path? */
2056 off = iemNativeRegFlushPendingWrites(pReNative, off);
2057
2058#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2059 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2060#else
2061 RT_NOREF(idxInstr);
2062#endif
2063
2064 /* Allocate a temporary FSW register. */
2065 uint8_t const idxFpuFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw, kIemNativeGstRegUse_ReadOnly);
2066 uint8_t const idxLabelRaiseMf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseMf);
2067
2068 /*
2069 * if (FSW & X86_FSW_ES != 0)
2070 * return raisexcpt();
2071 */
2072 /* Test and jump. */
2073 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxFpuFswReg, X86_FSW_ES_BIT, idxLabelRaiseMf);
2074
2075 /* Free but don't flush the FSW register. */
2076 iemNativeRegFreeTmp(pReNative, idxFpuFswReg);
2077
2078 return off;
2079}
2080
2081
2082#define IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT() \
2083 off = iemNativeEmitMaybeRaiseSseRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
2084
2085/**
2086 * Emits code to check if a SSE exception (either \#UD or \#NM) should be raised.
2087 *
2088 * @returns New code buffer offset, UINT32_MAX on failure.
2089 * @param pReNative The native recompile state.
2090 * @param off The code buffer offset.
2091 * @param idxInstr The current instruction.
2092 */
2093DECL_INLINE_THROW(uint32_t)
2094iemNativeEmitMaybeRaiseSseRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2095{
2096#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2097 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeSseXcptCheckPotential);
2098
2099 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE))
2100 {
2101#endif
2102 /*
2103 * Make sure we don't have any outstanding guest register writes as we may
2104 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
2105 */
2106 /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
2107 off = iemNativeRegFlushPendingWrites(pReNative, off);
2108
2109#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2110 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2111#else
2112 RT_NOREF(idxInstr);
2113#endif
2114
2115 /* Allocate a temporary CR0 and CR4 register. */
2116 uint8_t const idxLabelRaiseSseRelated = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseSseRelated);
2117 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
2118 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
2119 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2120
2121 AssertCompile(!((X86_CR0_EM | X86_CR0_TS) & X86_CR4_OSFXSR));
2122#ifdef RT_ARCH_AMD64
2123 /*
2124 * We do a modified test here:
2125 * if (!(((cr4 & X86_CR4_OSFXSR) | cr0) ^ X86_CR4_OSFXSR)) { likely }
2126 * else { goto RaiseSseRelated; }
2127 * This ASSUMES that CR0[bit 9] is always zero. This is the case on
2128 * all targets except the 386, which doesn't support SSE, this should
2129 * be a safe assumption.
2130 */
2131 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6+3+3+7+7+6);
2132 //pCodeBuf[off++] = 0xcc;
2133 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, X86_CR4_OSFXSR); /* Isolate CR4.OSFXSR as CR4.TSD and */
2134 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxTmpReg, idxCr4Reg); /* CR4.DE would overlap the CR0 bits. */
2135 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, idxTmpReg, idxCr0Reg);
2136 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_CR0_EM | X86_CR0_TS | X86_CR4_OSFXSR);
2137 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_CR4_OSFXSR);
2138 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelRaiseSseRelated, kIemNativeInstrCond_ne);
2139
2140#elif defined(RT_ARCH_ARM64)
2141 /*
2142 * We do a modified test here:
2143 * if (!((cr0 & (X86_CR0_EM | X86_CR0_TS)) | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1) ^ 1))) { likely }
2144 * else { goto RaiseSseRelated; }
2145 */
2146 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+5);
2147 //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
2148 Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - X86_CR0_EM_BIT) == (X86_CR0_EM | X86_CR0_TS));
2149 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxCr0Reg, 1, 32 - X86_CR0_EM_BIT, false /*f64Bit*/);
2150 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSFXSR_BIT, 1, false /*f64Bit*/);
2151 /* -> idxTmpReg[0]=OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
2152 Assert(Armv8A64ConvertImmRImmS2Mask32(0, 0) == 1);
2153 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 0, 0, false /*f64Bit*/);
2154 /* -> idxTmpReg[0]=~OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
2155 off = iemNativeEmitTestIfGprIsNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, idxTmpReg, false /*f64Bit*/,
2156 idxLabelRaiseSseRelated);
2157
2158#else
2159# error "Port me!"
2160#endif
2161
2162 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2163 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2164 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2165 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
2166
2167#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2168 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE;
2169 }
2170 else
2171 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeSseXcptCheckOmitted);
2172#endif
2173
2174 return off;
2175}
2176
2177
2178#define IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT() \
2179 off = iemNativeEmitMaybeRaiseAvxRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
2180
2181/**
2182 * Emits code to check if a AVX exception (either \#UD or \#NM) should be raised.
2183 *
2184 * @returns New code buffer offset, UINT32_MAX on failure.
2185 * @param pReNative The native recompile state.
2186 * @param off The code buffer offset.
2187 * @param idxInstr The current instruction.
2188 */
2189DECL_INLINE_THROW(uint32_t)
2190iemNativeEmitMaybeRaiseAvxRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2191{
2192#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2193 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeAvxXcptCheckPotential);
2194
2195 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX))
2196 {
2197#endif
2198 /*
2199 * Make sure we don't have any outstanding guest register writes as we may
2200 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
2201 */
2202 /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
2203 off = iemNativeRegFlushPendingWrites(pReNative, off);
2204
2205#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2206 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2207#else
2208 RT_NOREF(idxInstr);
2209#endif
2210
2211 /* Allocate a temporary CR0, CR4 and XCR0 register. */
2212 uint8_t const idxLabelRaiseAvxRelated = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseAvxRelated);
2213 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
2214 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
2215 uint8_t const idxXcr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Xcr0);
2216 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2217
2218 /*
2219 * We have the following in IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT:
2220 * if (RT_LIKELY( ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE))
2221 * | (pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE)
2222 * | (pVCpu->cpum.GstCtx.cr0 & X86_CR0_TS))
2223 * == (XSAVE_C_YMM | XSAVE_C_SSE | X86_CR4_OSXSAVE)))
2224 * { likely }
2225 * else { goto RaiseAvxRelated; }
2226 */
2227#ifdef RT_ARCH_AMD64
2228 /* if (!( ( ((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) << 2)
2229 | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1) << 1)
2230 | ((cr0 >> X86_CR0_TS_BIT) & 1) )
2231 ^ 0x1a) ) { likely }
2232 else { goto RaiseAvxRelated; } */
2233 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6+3+5+3+5+3+7+6);
2234 //pCodeBuf[off++] = 0xcc;
2235 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, XSAVE_C_YMM | XSAVE_C_SSE);
2236 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxTmpReg, idxXcr0Reg);
2237 off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off, idxCr4Reg, X86_CR4_OSXSAVE_BIT);
2238 off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
2239 /* -> idxTmpReg[0]=CR4.OSXSAVE; idxTmpReg[1]=0; idxTmpReg[2]=SSE; idxTmpReg[3]=YMM; (the rest is zero) */
2240 off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off, idxCr0Reg, X86_CR0_TS_BIT);
2241 off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
2242 /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=SSE; idxTmpReg[4]=YMM; */
2243 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg, ((XSAVE_C_YMM | XSAVE_C_SSE) << 2) | 2);
2244 /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=~SSE; idxTmpReg[4]=~YMM; */
2245 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelRaiseAvxRelated, kIemNativeInstrCond_ne);
2246
2247#elif defined(RT_ARCH_ARM64)
2248 /* if (!( (((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) | ((cr4 >> X86_CR4_OSFXSR_BIT) & 1)) ^ 7) << 1)
2249 | ((cr0 >> X86_CR0_TS_BIT) & 1) ) { likely }
2250 else { goto RaiseAvxRelated; } */
2251 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6);
2252 //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
2253 Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - XSAVE_C_SSE_BIT) == (XSAVE_C_YMM | XSAVE_C_SSE));
2254 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxXcr0Reg, 1, 32 - XSAVE_C_SSE_BIT, false /*f64Bit*/);
2255 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSXSAVE_BIT, 1, false /*f64Bit*/);
2256 /* -> idxTmpReg[0]=CR4.OSXSAVE; idxTmpReg[1]=SSE; idxTmpReg[2]=YMM; (the rest is zero) */
2257 Assert(Armv8A64ConvertImmRImmS2Mask32(2, 0) == 7);
2258 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 2, 0, false /*f64Bit*/);
2259 /* -> idxTmpReg[0]=~CR4.OSXSAVE; idxTmpReg[1]=~SSE; idxTmpReg[2]=~YMM; (the rest is zero) */
2260 pCodeBuf[off++] = Armv8A64MkInstrLslImm(idxTmpReg, idxTmpReg, 1, false /*f64Bit*/);
2261 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr0Reg, X86_CR0_TS_BIT, 1, false /*f64Bit*/);
2262 /* -> idxTmpReg[0]=CR0.TS; idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=~SSE; idxTmpReg[3]=~YMM; (the rest is zero) */
2263 off = iemNativeEmitTestIfGprIsNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, idxTmpReg, false /*f64Bit*/,
2264 idxLabelRaiseAvxRelated);
2265
2266#else
2267# error "Port me!"
2268#endif
2269
2270 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2271 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2272 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
2273 iemNativeRegFreeTmp(pReNative, idxXcr0Reg);
2274#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2275 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
2276 }
2277 else
2278 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeAvxXcptCheckOmitted);
2279#endif
2280
2281 return off;
2282}
2283
2284
2285#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2286#define IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT() \
2287 off = iemNativeEmitSimdMaybeRaiseSseAvxSimdFpOrUdXcpt(pReNative, off, pCallEntry->idxInstr)
2288
2289/** Emits code for IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT. */
2290DECL_INLINE_THROW(uint32_t)
2291iemNativeEmitSimdMaybeRaiseSseAvxSimdFpOrUdXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2292{
2293 /*
2294 * Make sure we don't have any outstanding guest register writes as we may
2295 * raise an \#UD or \#XF and all guest register must be up to date in CPUMCTX.
2296 */
2297 off = iemNativeRegFlushPendingWrites(pReNative, off);
2298
2299#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2300 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2301#else
2302 RT_NOREF(idxInstr);
2303#endif
2304
2305 uint8_t const idxLabelRaiseSseAvxFpRelated = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseSseAvxFpRelated);
2306 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr, kIemNativeGstRegUse_ReadOnly);
2307 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
2308
2309 /* mov tmp, varmxcsr */
2310 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegTmp, idxRegMxCsr);
2311 /* tmp &= X86_MXCSR_XCPT_MASK */
2312 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK);
2313 /* tmp >>= X86_MXCSR_XCPT_MASK_SHIFT */
2314 off = iemNativeEmitShiftGprRight(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK_SHIFT);
2315 /* tmp = ~tmp */
2316 off = iemNativeEmitInvBitsGpr(pReNative, off, idxRegTmp, idxRegTmp, false /*f64Bit*/);
2317 /* tmp &= mxcsr */
2318 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxRegTmp, idxRegMxCsr);
2319 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_FLAGS,
2320 idxLabelRaiseSseAvxFpRelated);
2321
2322 /* Free but don't flush the MXCSR register. */
2323 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
2324 iemNativeRegFreeTmp(pReNative, idxRegTmp);
2325
2326 return off;
2327}
2328#endif
2329
2330
2331#define IEM_MC_RAISE_DIVIDE_ERROR() \
2332 off = iemNativeEmitRaiseDivideError(pReNative, off, pCallEntry->idxInstr)
2333
2334/**
2335 * Emits code to raise a \#DE.
2336 *
2337 * @returns New code buffer offset, UINT32_MAX on failure.
2338 * @param pReNative The native recompile state.
2339 * @param off The code buffer offset.
2340 * @param idxInstr The current instruction.
2341 */
2342DECL_INLINE_THROW(uint32_t)
2343iemNativeEmitRaiseDivideError(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2344{
2345 /*
2346 * Make sure we don't have any outstanding guest register writes as we may
2347 */
2348 off = iemNativeRegFlushPendingWrites(pReNative, off);
2349
2350#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2351 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2352#else
2353 RT_NOREF(idxInstr);
2354#endif
2355
2356 uint8_t const idxLabelRaiseDe = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseDe);
2357
2358 /* raise \#DE exception unconditionally. */
2359 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelRaiseDe);
2360
2361 return off;
2362}
2363
2364
2365#define IEM_MC_RAISE_GP0_IF_EFF_ADDR_UNALIGNED(a_EffAddr, a_cbAlign) \
2366 off = iemNativeEmitRaiseGp0IfEffAddrUnaligned(pReNative, off, pCallEntry->idxInstr, a_EffAddr, a_cbAlign)
2367
2368/**
2369 * Emits code to raise a \#GP(0) if the given variable contains an unaligned address.
2370 *
2371 * @returns New code buffer offset, UINT32_MAX on failure.
2372 * @param pReNative The native recompile state.
2373 * @param off The code buffer offset.
2374 * @param idxInstr The current instruction.
2375 * @param idxVarEffAddr Index of the variable containing the effective address to check.
2376 * @param cbAlign The alignment in bytes to check against.
2377 */
2378DECL_INLINE_THROW(uint32_t)
2379iemNativeEmitRaiseGp0IfEffAddrUnaligned(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint8_t idxVarEffAddr, uint8_t cbAlign)
2380{
2381 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEffAddr);
2382 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEffAddr, sizeof(RTGCPTR));
2383
2384 /*
2385 * Make sure we don't have any outstanding guest register writes as we may throw an exception.
2386 */
2387 off = iemNativeRegFlushPendingWrites(pReNative, off);
2388
2389#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2390 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2391#else
2392 RT_NOREF(idxInstr);
2393#endif
2394
2395 uint8_t const idxLabelRaiseGp0 = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseGp0);
2396 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarEffAddr, &off);
2397
2398 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxVarReg, cbAlign - 1, idxLabelRaiseGp0);
2399
2400 iemNativeVarRegisterRelease(pReNative, idxVarEffAddr);
2401 return off;
2402}
2403
2404
2405/*********************************************************************************************************************************
2406* Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF) *
2407*********************************************************************************************************************************/
2408
2409/**
2410 * Pushes an IEM_MC_IF_XXX onto the condition stack.
2411 *
2412 * @returns Pointer to the condition stack entry on success, NULL on failure
2413 * (too many nestings)
2414 */
2415DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative, uint32_t *poff)
2416{
2417#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2418 *poff = iemNativeRegFlushPendingWrites(pReNative, *poff);
2419#endif
2420
2421 uint32_t const idxStack = pReNative->cCondDepth;
2422 AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
2423
2424 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
2425 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
2426
2427 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
2428 pEntry->fInElse = false;
2429 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
2430 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
2431
2432 return pEntry;
2433}
2434
2435
2436/**
2437 * Start of the if-block, snapshotting the register and variable state.
2438 */
2439DECL_INLINE_THROW(void)
2440iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
2441{
2442 Assert(offIfBlock != UINT32_MAX);
2443 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
2444 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
2445 Assert(!pEntry->fInElse);
2446
2447 /* Define the start of the IF block if request or for disassembly purposes. */
2448 if (idxLabelIf != UINT32_MAX)
2449 iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
2450#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2451 else
2452 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
2453#else
2454 RT_NOREF(offIfBlock);
2455#endif
2456
2457#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2458 Assert(pReNative->Core.offPc == 0);
2459#endif
2460
2461 /* Copy the initial state so we can restore it in the 'else' block. */
2462 pEntry->InitialState = pReNative->Core;
2463}
2464
2465
2466#define IEM_MC_ELSE() } while (0); \
2467 off = iemNativeEmitElse(pReNative, off); \
2468 do {
2469
2470/** Emits code related to IEM_MC_ELSE. */
2471DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2472{
2473 /* Check sanity and get the conditional stack entry. */
2474 Assert(off != UINT32_MAX);
2475 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
2476 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
2477 Assert(!pEntry->fInElse);
2478
2479#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2480 /* Writeback any dirty shadow registers. */
2481 /** @todo r=aeichner Possible optimization is to only writeback guest registers which became dirty
2482 * in one of the branches and leave guest registers already dirty before the start of the if
2483 * block alone. */
2484 off = iemNativeRegFlushDirtyGuest(pReNative, off);
2485#endif
2486
2487 /* Jump to the endif */
2488 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
2489
2490 /* Define the else label and enter the else part of the condition. */
2491 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
2492 pEntry->fInElse = true;
2493
2494#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2495 Assert(pReNative->Core.offPc == 0);
2496#endif
2497
2498 /* Snapshot the core state so we can do a merge at the endif and restore
2499 the snapshot we took at the start of the if-block. */
2500 pEntry->IfFinalState = pReNative->Core;
2501 pReNative->Core = pEntry->InitialState;
2502
2503 return off;
2504}
2505
2506
2507#define IEM_MC_ENDIF() } while (0); \
2508 off = iemNativeEmitEndIf(pReNative, off)
2509
2510/** Emits code related to IEM_MC_ENDIF. */
2511DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2512{
2513 /* Check sanity and get the conditional stack entry. */
2514 Assert(off != UINT32_MAX);
2515 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
2516 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
2517
2518#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2519 Assert(pReNative->Core.offPc == 0);
2520#endif
2521#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2522 /* Writeback any dirty shadow registers (else branch). */
2523 /** @todo r=aeichner Possible optimization is to only writeback guest registers which became dirty
2524 * in one of the branches and leave guest registers already dirty before the start of the if
2525 * block alone. */
2526 off = iemNativeRegFlushDirtyGuest(pReNative, off);
2527#endif
2528
2529 /*
2530 * Now we have find common group with the core state at the end of the
2531 * if-final. Use the smallest common denominator and just drop anything
2532 * that isn't the same in both states.
2533 */
2534 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
2535 * which is why we're doing this at the end of the else-block.
2536 * But we'd need more info about future for that to be worth the effort. */
2537 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
2538#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2539 Assert( pOther->bmGstRegShadowDirty == 0
2540 && pReNative->Core.bmGstRegShadowDirty == 0);
2541#endif
2542
2543 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
2544 {
2545 /* shadow guest stuff first. */
2546 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
2547 if (fGstRegs)
2548 {
2549 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
2550 do
2551 {
2552 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
2553 fGstRegs &= ~RT_BIT_64(idxGstReg);
2554
2555 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
2556 if ( !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg))
2557 || idxHstReg != pOther->aidxGstRegShadows[idxGstReg])
2558 {
2559 Log12(("iemNativeEmitEndIf: dropping gst %s from hst %s\n",
2560 g_aGstShadowInfo[idxGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg]));
2561
2562#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2563 /* Writeback any dirty shadow registers we are about to unshadow. */
2564 off = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, off, idxHstReg);
2565#endif
2566 iemNativeRegClearGstRegShadowing(pReNative, idxHstReg, off);
2567 }
2568 } while (fGstRegs);
2569 }
2570 else
2571 {
2572 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
2573#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2574 Assert(pReNative->Core.bmGstRegShadowDirty == 0);
2575#endif
2576 }
2577
2578 /* Check variables next. For now we must require them to be identical
2579 or stuff we can recreate. */
2580 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
2581 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
2582 if (fVars)
2583 {
2584 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
2585 do
2586 {
2587 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
2588 fVars &= ~RT_BIT_32(idxVar);
2589
2590 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
2591 {
2592 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
2593 continue;
2594 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
2595 {
2596 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
2597 if (idxHstReg != UINT8_MAX)
2598 {
2599 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
2600 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
2601 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x\n",
2602 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
2603 }
2604 continue;
2605 }
2606 }
2607 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
2608 continue;
2609
2610 /* Irreconcilable, so drop it. */
2611 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
2612 if (idxHstReg != UINT8_MAX)
2613 {
2614 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
2615 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
2616 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x (also dropped)\n",
2617 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
2618 }
2619 Log11(("iemNativeEmitEndIf: Freeing variable #%u/%#x\n", idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
2620 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
2621 } while (fVars);
2622 }
2623
2624 /* Finally, check that the host register allocations matches. */
2625 AssertMsgStmt(pReNative->Core.bmHstRegs == pOther->bmHstRegs,
2626 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
2627 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
2628 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
2629 }
2630
2631 /*
2632 * Define the endif label and maybe the else one if we're still in the 'if' part.
2633 */
2634 if (!pEntry->fInElse)
2635 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
2636 else
2637 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
2638 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
2639
2640 /* Pop the conditional stack.*/
2641 pReNative->cCondDepth -= 1;
2642
2643 return off;
2644}
2645
2646
2647#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
2648 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits)); \
2649 do {
2650
2651/** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
2652DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
2653{
2654 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitsInEfl);
2655 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2656
2657 /* Get the eflags. */
2658 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
2659 kIemNativeGstRegUse_ReadOnly);
2660
2661 /* Test and jump. */
2662 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
2663
2664 /* Free but don't flush the EFlags register. */
2665 iemNativeRegFreeTmp(pReNative, idxEflReg);
2666
2667 /* Make a copy of the core state now as we start the if-block. */
2668 iemNativeCondStartIfBlock(pReNative, off);
2669
2670 return off;
2671}
2672
2673
2674#define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
2675 off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits)); \
2676 do {
2677
2678/** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
2679DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
2680{
2681 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitsInEfl);
2682 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2683
2684 /* Get the eflags. */
2685 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
2686 kIemNativeGstRegUse_ReadOnly);
2687
2688 /* Test and jump. */
2689 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
2690
2691 /* Free but don't flush the EFlags register. */
2692 iemNativeRegFreeTmp(pReNative, idxEflReg);
2693
2694 /* Make a copy of the core state now as we start the if-block. */
2695 iemNativeCondStartIfBlock(pReNative, off);
2696
2697 return off;
2698}
2699
2700
2701#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
2702 off = iemNativeEmitIfEflagsBitSet(pReNative, off, (a_fBit)); \
2703 do {
2704
2705/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
2706DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
2707{
2708 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
2709 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2710
2711 /* Get the eflags. */
2712 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
2713 kIemNativeGstRegUse_ReadOnly);
2714
2715 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
2716 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
2717
2718 /* Test and jump. */
2719 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
2720
2721 /* Free but don't flush the EFlags register. */
2722 iemNativeRegFreeTmp(pReNative, idxEflReg);
2723
2724 /* Make a copy of the core state now as we start the if-block. */
2725 iemNativeCondStartIfBlock(pReNative, off);
2726
2727 return off;
2728}
2729
2730
2731#define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
2732 off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, (a_fBit)); \
2733 do {
2734
2735/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
2736DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
2737{
2738 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
2739 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2740
2741 /* Get the eflags. */
2742 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
2743 kIemNativeGstRegUse_ReadOnly);
2744
2745 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
2746 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
2747
2748 /* Test and jump. */
2749 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
2750
2751 /* Free but don't flush the EFlags register. */
2752 iemNativeRegFreeTmp(pReNative, idxEflReg);
2753
2754 /* Make a copy of the core state now as we start the if-block. */
2755 iemNativeCondStartIfBlock(pReNative, off);
2756
2757 return off;
2758}
2759
2760
2761#define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2) \
2762 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, false /*fInverted*/); \
2763 do {
2764
2765#define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2) \
2766 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, true /*fInverted*/); \
2767 do {
2768
2769/** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
2770DECL_INLINE_THROW(uint32_t)
2771iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
2772 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
2773{
2774 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBit1InEfl | fBit2InEfl);
2775 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2776
2777 /* Get the eflags. */
2778 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
2779 kIemNativeGstRegUse_ReadOnly);
2780
2781 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
2782 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
2783
2784 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
2785 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
2786 Assert(iBitNo1 != iBitNo2);
2787
2788#ifdef RT_ARCH_AMD64
2789 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl);
2790
2791 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
2792 if (iBitNo1 > iBitNo2)
2793 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
2794 else
2795 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
2796 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
2797
2798#elif defined(RT_ARCH_ARM64)
2799 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2800 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2801
2802 /* and tmpreg, eflreg, #1<<iBitNo1 */
2803 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
2804
2805 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
2806 if (iBitNo1 > iBitNo2)
2807 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
2808 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
2809 else
2810 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
2811 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
2812
2813 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2814
2815#else
2816# error "Port me"
2817#endif
2818
2819 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
2820 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
2821 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
2822
2823 /* Free but don't flush the EFlags and tmp registers. */
2824 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2825 iemNativeRegFreeTmp(pReNative, idxEflReg);
2826
2827 /* Make a copy of the core state now as we start the if-block. */
2828 iemNativeCondStartIfBlock(pReNative, off);
2829
2830 return off;
2831}
2832
2833
2834#define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
2835 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, false /*fInverted*/); \
2836 do {
2837
2838#define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
2839 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, true /*fInverted*/); \
2840 do {
2841
2842/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
2843 * IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
2844DECL_INLINE_THROW(uint32_t)
2845iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl,
2846 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
2847{
2848 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl | fBit1InEfl | fBit2InEfl);
2849 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2850
2851 /* We need an if-block label for the non-inverted variant. */
2852 uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
2853 pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
2854
2855 /* Get the eflags. */
2856 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
2857 kIemNativeGstRegUse_ReadOnly);
2858
2859 /* Translate the flag masks to bit numbers. */
2860 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
2861 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
2862
2863 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
2864 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
2865 Assert(iBitNo1 != iBitNo);
2866
2867 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
2868 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
2869 Assert(iBitNo2 != iBitNo);
2870 Assert(iBitNo2 != iBitNo1);
2871
2872#ifdef RT_ARCH_AMD64
2873 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl); /* This must come before we jump anywhere! */
2874#elif defined(RT_ARCH_ARM64)
2875 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2876#endif
2877
2878 /* Check for the lone bit first. */
2879 if (!fInverted)
2880 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
2881 else
2882 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
2883
2884 /* Then extract and compare the other two bits. */
2885#ifdef RT_ARCH_AMD64
2886 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
2887 if (iBitNo1 > iBitNo2)
2888 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
2889 else
2890 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
2891 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
2892
2893#elif defined(RT_ARCH_ARM64)
2894 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2895
2896 /* and tmpreg, eflreg, #1<<iBitNo1 */
2897 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
2898
2899 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
2900 if (iBitNo1 > iBitNo2)
2901 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
2902 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
2903 else
2904 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
2905 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
2906
2907 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2908
2909#else
2910# error "Port me"
2911#endif
2912
2913 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
2914 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
2915 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
2916
2917 /* Free but don't flush the EFlags and tmp registers. */
2918 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2919 iemNativeRegFreeTmp(pReNative, idxEflReg);
2920
2921 /* Make a copy of the core state now as we start the if-block. */
2922 iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
2923
2924 return off;
2925}
2926
2927
2928#define IEM_MC_IF_CX_IS_NZ() \
2929 off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
2930 do {
2931
2932/** Emits code for IEM_MC_IF_CX_IS_NZ. */
2933DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2934{
2935 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2936
2937 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
2938 kIemNativeGstRegUse_ReadOnly);
2939 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
2940 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
2941
2942 iemNativeCondStartIfBlock(pReNative, off);
2943 return off;
2944}
2945
2946
2947#define IEM_MC_IF_ECX_IS_NZ() \
2948 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
2949 do {
2950
2951#define IEM_MC_IF_RCX_IS_NZ() \
2952 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
2953 do {
2954
2955/** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
2956DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
2957{
2958 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2959
2960 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
2961 kIemNativeGstRegUse_ReadOnly);
2962 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
2963 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
2964
2965 iemNativeCondStartIfBlock(pReNative, off);
2966 return off;
2967}
2968
2969
2970#define IEM_MC_IF_CX_IS_NOT_ONE() \
2971 off = iemNativeEmitIfCxIsNotOne(pReNative, off); \
2972 do {
2973
2974/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE. */
2975DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2976{
2977 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2978
2979 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
2980 kIemNativeGstRegUse_ReadOnly);
2981#ifdef RT_ARCH_AMD64
2982 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
2983#else
2984 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2985 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
2986 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2987#endif
2988 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
2989
2990 iemNativeCondStartIfBlock(pReNative, off);
2991 return off;
2992}
2993
2994
2995#define IEM_MC_IF_ECX_IS_NOT_ONE() \
2996 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, false /*f64Bit*/); \
2997 do {
2998
2999#define IEM_MC_IF_RCX_IS_NOT_ONE() \
3000 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, true /*f64Bit*/); \
3001 do {
3002
3003/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE and IEM_MC_IF_RCX_IS_NOT_ONE. */
3004DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
3005{
3006 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
3007
3008 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3009 kIemNativeGstRegUse_ReadOnly);
3010 if (f64Bit)
3011 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3012 else
3013 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3014 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3015
3016 iemNativeCondStartIfBlock(pReNative, off);
3017 return off;
3018}
3019
3020
3021#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
3022 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/); \
3023 do {
3024
3025#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3026 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/); \
3027 do {
3028
3029/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET and
3030 * IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
3031DECL_INLINE_THROW(uint32_t)
3032iemNativeEmitIfCxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl, bool fCheckIfSet)
3033{
3034 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
3035 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
3036
3037 /* We have to load both RCX and EFLAGS before we can start branching,
3038 otherwise we'll end up in the else-block with an inconsistent
3039 register allocator state.
3040 Doing EFLAGS first as it's more likely to be loaded, right? */
3041 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
3042 kIemNativeGstRegUse_ReadOnly);
3043 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3044 kIemNativeGstRegUse_ReadOnly);
3045
3046 /** @todo we could reduce this to a single branch instruction by spending a
3047 * temporary register and some setnz stuff. Not sure if loops are
3048 * worth it. */
3049 /* Check CX. */
3050#ifdef RT_ARCH_AMD64
3051 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3052#else
3053 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3054 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
3055 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3056#endif
3057
3058 /* Check the EFlags bit. */
3059 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
3060 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
3061 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
3062 !fCheckIfSet /*fJmpIfSet*/);
3063
3064 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3065 iemNativeRegFreeTmp(pReNative, idxEflReg);
3066
3067 iemNativeCondStartIfBlock(pReNative, off);
3068 return off;
3069}
3070
3071
3072#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
3073 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, false /*f64Bit*/); \
3074 do {
3075
3076#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3077 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, false /*f64Bit*/); \
3078 do {
3079
3080#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
3081 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, true /*f64Bit*/); \
3082 do {
3083
3084#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3085 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, true /*f64Bit*/); \
3086 do {
3087
3088/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET,
3089 * IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET,
3090 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET and
3091 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
3092DECL_INLINE_THROW(uint32_t)
3093iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3094 uint32_t fBitInEfl, bool fCheckIfSet, bool f64Bit)
3095{
3096 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
3097 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
3098
3099 /* We have to load both RCX and EFLAGS before we can start branching,
3100 otherwise we'll end up in the else-block with an inconsistent
3101 register allocator state.
3102 Doing EFLAGS first as it's more likely to be loaded, right? */
3103 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
3104 kIemNativeGstRegUse_ReadOnly);
3105 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3106 kIemNativeGstRegUse_ReadOnly);
3107
3108 /** @todo we could reduce this to a single branch instruction by spending a
3109 * temporary register and some setnz stuff. Not sure if loops are
3110 * worth it. */
3111 /* Check RCX/ECX. */
3112 if (f64Bit)
3113 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3114 else
3115 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3116
3117 /* Check the EFlags bit. */
3118 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
3119 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
3120 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
3121 !fCheckIfSet /*fJmpIfSet*/);
3122
3123 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3124 iemNativeRegFreeTmp(pReNative, idxEflReg);
3125
3126 iemNativeCondStartIfBlock(pReNative, off);
3127 return off;
3128}
3129
3130
3131#define IEM_MC_IF_LOCAL_IS_Z(a_Local) \
3132 off = iemNativeEmitIfLocalIsZ(pReNative, off, a_Local); \
3133 do {
3134
3135/** Emits code for IEM_MC_IF_LOCAL_IS_Z. */
3136DECL_INLINE_THROW(uint32_t)
3137iemNativeEmitIfLocalIsZ(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarLocal)
3138{
3139 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
3140
3141 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarLocal);
3142 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarLocal)];
3143 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
3144 AssertStmt(pVarRc->cbVar == sizeof(int32_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
3145
3146 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarLocal, &off);
3147
3148 off = iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(pReNative, off, idxReg, false /*f64Bit*/, pEntry->idxLabelElse);
3149
3150 iemNativeVarRegisterRelease(pReNative, idxVarLocal);
3151
3152 iemNativeCondStartIfBlock(pReNative, off);
3153 return off;
3154}
3155
3156
3157#define IEM_MC_IF_GREG_BIT_SET(a_iGReg, a_iBitNo) \
3158 off = iemNativeEmitIfGregBitSet(pReNative, off, a_iGReg, a_iBitNo); \
3159 do {
3160
3161/** Emits code for IEM_MC_IF_GREG_BIT_SET. */
3162DECL_INLINE_THROW(uint32_t)
3163iemNativeEmitIfGregBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t iBitNo)
3164{
3165 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
3166 Assert(iGReg < 16);
3167
3168 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3169 kIemNativeGstRegUse_ReadOnly);
3170
3171 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxGstFullReg, iBitNo, pEntry->idxLabelElse);
3172
3173 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3174
3175 iemNativeCondStartIfBlock(pReNative, off);
3176 return off;
3177}
3178
3179
3180
3181/*********************************************************************************************************************************
3182* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
3183*********************************************************************************************************************************/
3184
3185#define IEM_MC_NOREF(a_Name) \
3186 RT_NOREF_PV(a_Name)
3187
3188#define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
3189 uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
3190
3191#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
3192 uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
3193
3194#define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_Local, a_iArg) \
3195 uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
3196
3197#define IEM_MC_LOCAL(a_Type, a_Name) \
3198 uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
3199
3200#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
3201 uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
3202
3203#define IEM_MC_LOCAL_ASSIGN(a_Type, a_Name, a_Value) \
3204 uint8_t const a_Name = iemNativeVarAllocAssign(pReNative, &off, sizeof(a_Type), (a_Value))
3205
3206
3207/**
3208 * Sets the host register for @a idxVarRc to @a idxReg.
3209 *
3210 * The register must not be allocated. Any guest register shadowing will be
3211 * implictly dropped by this call.
3212 *
3213 * The variable must not have any register associated with it (causes
3214 * VERR_IEM_VAR_IPE_10 to be raised). Conversion to a stack variable is
3215 * implied.
3216 *
3217 * @returns idxReg
3218 * @param pReNative The recompiler state.
3219 * @param idxVar The variable.
3220 * @param idxReg The host register (typically IEMNATIVE_CALL_RET_GREG).
3221 * @param off For recording in debug info.
3222 *
3223 * @throws VERR_IEM_VAR_IPE_10, VERR_IEM_VAR_IPE_11
3224 */
3225DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off)
3226{
3227 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3228 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3229 Assert(!pVar->fRegAcquired);
3230 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3231 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_10));
3232 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_11));
3233
3234 iemNativeRegClearGstRegShadowing(pReNative, idxReg, off);
3235 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
3236
3237 iemNativeVarSetKindToStack(pReNative, idxVar);
3238 pVar->idxReg = idxReg;
3239
3240 return idxReg;
3241}
3242
3243
3244/**
3245 * A convenient helper function.
3246 */
3247DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSetAndAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
3248 uint8_t idxReg, uint32_t *poff)
3249{
3250 idxReg = iemNativeVarRegisterSet(pReNative, idxVar, idxReg, *poff);
3251 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fRegAcquired = true;
3252 return idxReg;
3253}
3254
3255
3256/**
3257 * This is called by IEM_MC_END() to clean up all variables.
3258 */
3259DECL_FORCE_INLINE(void) iemNativeVarFreeAll(PIEMRECOMPILERSTATE pReNative)
3260{
3261 uint32_t const bmVars = pReNative->Core.bmVars;
3262 if (bmVars != 0)
3263 iemNativeVarFreeAllSlow(pReNative, bmVars);
3264 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
3265 Assert(pReNative->Core.bmStack == 0);
3266}
3267
3268
3269#define IEM_MC_FREE_LOCAL(a_Name) iemNativeVarFreeLocal(pReNative, a_Name)
3270
3271/**
3272 * This is called by IEM_MC_FREE_LOCAL.
3273 */
3274DECLINLINE(void) iemNativeVarFreeLocal(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
3275{
3276 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3277 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo == UINT8_MAX);
3278 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
3279}
3280
3281
3282#define IEM_MC_FREE_ARG(a_Name) iemNativeVarFreeArg(pReNative, a_Name)
3283
3284/**
3285 * This is called by IEM_MC_FREE_ARG.
3286 */
3287DECLINLINE(void) iemNativeVarFreeArg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
3288{
3289 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3290 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars));
3291 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
3292}
3293
3294
3295#define IEM_MC_ASSIGN_TO_SMALLER(a_VarDst, a_VarSrcEol) off = iemNativeVarAssignToSmaller(pReNative, off, a_VarDst, a_VarSrcEol)
3296
3297/**
3298 * This is called by IEM_MC_ASSIGN_TO_SMALLER.
3299 */
3300DECL_INLINE_THROW(uint32_t)
3301iemNativeVarAssignToSmaller(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarDst, uint8_t idxVarSrc)
3302{
3303 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarDst);
3304 PIEMNATIVEVAR const pVarDst = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarDst)];
3305 AssertStmt(pVarDst->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
3306 Assert( pVarDst->cbVar == sizeof(uint16_t)
3307 || pVarDst->cbVar == sizeof(uint32_t));
3308
3309 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarSrc);
3310 PIEMNATIVEVAR const pVarSrc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarSrc)];
3311 AssertStmt( pVarSrc->enmKind == kIemNativeVarKind_Stack
3312 || pVarSrc->enmKind == kIemNativeVarKind_Immediate,
3313 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
3314
3315 Assert(pVarDst->cbVar < pVarSrc->cbVar);
3316
3317 /*
3318 * Special case for immediates.
3319 */
3320 if (pVarSrc->enmKind == kIemNativeVarKind_Immediate)
3321 {
3322 switch (pVarDst->cbVar)
3323 {
3324 case sizeof(uint16_t):
3325 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint16_t)pVarSrc->u.uValue);
3326 break;
3327 case sizeof(uint32_t):
3328 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint32_t)pVarSrc->u.uValue);
3329 break;
3330 default: AssertFailed(); break;
3331 }
3332 }
3333 else
3334 {
3335 /*
3336 * The generic solution for now.
3337 */
3338 /** @todo optimize this by having the python script make sure the source
3339 * variable passed to IEM_MC_ASSIGN_TO_SMALLER is not used after the
3340 * statement. Then we could just transfer the register assignments. */
3341 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off);
3342 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off);
3343 switch (pVarDst->cbVar)
3344 {
3345 case sizeof(uint16_t):
3346 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegDst, idxRegSrc);
3347 break;
3348 case sizeof(uint32_t):
3349 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegDst, idxRegSrc);
3350 break;
3351 default: AssertFailed(); break;
3352 }
3353 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
3354 iemNativeVarRegisterRelease(pReNative, idxVarDst);
3355 }
3356 return off;
3357}
3358
3359
3360
3361/*********************************************************************************************************************************
3362* Emitters for IEM_MC_CALL_CIMPL_XXX *
3363*********************************************************************************************************************************/
3364
3365/** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
3366DECL_INLINE_THROW(uint32_t)
3367iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
3368 uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cArgs)
3369
3370{
3371 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
3372
3373#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3374 /* Clear the appropriate IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_XXX flags
3375 when a calls clobber any of the relevatn control registers. */
3376# if 1
3377 if (!(fGstShwFlush & (RT_BIT_64(kIemNativeGstReg_Cr0) | RT_BIT_64(kIemNativeGstReg_Cr4) | RT_BIT_64(kIemNativeGstReg_Xcr0))))
3378 {
3379 /* Likely as long as call+ret are done via cimpl. */
3380 Assert( /*pfnCImpl != (uintptr_t)iemCImpl_mov_Cd_Rd && pfnCImpl != (uintptr_t)iemCImpl_xsetbv
3381 &&*/ pfnCImpl != (uintptr_t)iemCImpl_lmsw && pfnCImpl != (uintptr_t)iemCImpl_clts);
3382 }
3383 else if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_Xcr0))
3384 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
3385 else if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_Cr4))
3386 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
3387 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE);
3388 else
3389 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
3390 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
3391 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
3392
3393# else
3394 if (pfnCImpl == (uintptr_t)iemCImpl_xsetbv) /* Modifies xcr0 which only the AVX check uses. */
3395 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
3396 else if (pfnCImpl == (uintptr_t)iemCImpl_mov_Cd_Rd) /* Can modify cr4 which all checks use. */
3397 pReNative->fSimdRaiseXcptChecksEmitted = 0;
3398 else if ( pfnCImpl == (uintptr_t)iemCImpl_FarJmp
3399 || pfnCImpl == (uintptr_t)iemCImpl_callf
3400 || pfnCImpl == (uintptr_t)iemCImpl_lmsw
3401 || pfnCImpl == (uintptr_t)iemCImpl_clts) /* Will only modify cr0 */
3402 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
3403 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
3404 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
3405# endif
3406#endif
3407
3408 /*
3409 * Do all the call setup and cleanup.
3410 */
3411 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
3412
3413 /*
3414 * Load the two or three hidden arguments.
3415 */
3416#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
3417 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
3418 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3419 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
3420#else
3421 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3422 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
3423#endif
3424
3425 /*
3426 * Make the call and check the return code.
3427 *
3428 * Shadow PC copies are always flushed here, other stuff depends on flags.
3429 * Segment and general purpose registers are explictily flushed via the
3430 * IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG and IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG
3431 * macros.
3432 */
3433 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
3434#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
3435 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
3436#endif
3437 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, fGstShwFlush | RT_BIT_64(kIemNativeGstReg_Pc));
3438 if (!(pReNative->fMc & IEM_MC_F_WITHOUT_FLAGS)) /** @todo We don't emit with-flags/without-flags variations for CIMPL calls. */
3439 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
3440 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
3441
3442 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
3443}
3444
3445
3446#define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
3447 off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0)
3448
3449/** Emits code for IEM_MC_CALL_CIMPL_1. */
3450DECL_INLINE_THROW(uint32_t)
3451iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
3452 uintptr_t pfnCImpl, uint8_t idxArg0)
3453{
3454 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
3455 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 1);
3456}
3457
3458
3459#define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
3460 off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0, a1)
3461
3462/** Emits code for IEM_MC_CALL_CIMPL_2. */
3463DECL_INLINE_THROW(uint32_t)
3464iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
3465 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
3466{
3467 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
3468 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
3469 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 2);
3470}
3471
3472
3473#define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
3474 off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
3475 (uintptr_t)a_pfnCImpl, a0, a1, a2)
3476
3477/** Emits code for IEM_MC_CALL_CIMPL_3. */
3478DECL_INLINE_THROW(uint32_t)
3479iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
3480 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
3481{
3482 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
3483 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
3484 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
3485 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 3);
3486}
3487
3488
3489#define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3) \
3490 off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
3491 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
3492
3493/** Emits code for IEM_MC_CALL_CIMPL_4. */
3494DECL_INLINE_THROW(uint32_t)
3495iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
3496 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
3497{
3498 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
3499 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
3500 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
3501 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
3502 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 4);
3503}
3504
3505
3506#define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3, a4) \
3507 off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
3508 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
3509
3510/** Emits code for IEM_MC_CALL_CIMPL_4. */
3511DECL_INLINE_THROW(uint32_t)
3512iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
3513 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
3514{
3515 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
3516 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
3517 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
3518 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
3519 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg4, 4 + IEM_CIMPL_HIDDEN_ARGS);
3520 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 5);
3521}
3522
3523
3524/** Recompiler debugging: Flush guest register shadow copies. */
3525#define IEM_MC_HINT_FLUSH_GUEST_SHADOW(g_fGstShwFlush) iemNativeRegFlushGuestShadows(pReNative, g_fGstShwFlush)
3526
3527
3528
3529/*********************************************************************************************************************************
3530* Emitters for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX *
3531*********************************************************************************************************************************/
3532
3533/**
3534 * Common worker for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX.
3535 */
3536DECL_INLINE_THROW(uint32_t)
3537iemNativeEmitCallAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
3538 uintptr_t pfnAImpl, uint8_t cArgs)
3539{
3540 if (idxVarRc != UINT8_MAX)
3541 {
3542 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRc);
3543 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarRc)];
3544 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
3545 AssertStmt(pVarRc->cbVar <= sizeof(uint64_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
3546 }
3547
3548 /*
3549 * Do all the call setup and cleanup.
3550 *
3551 * It is only required to flush pending guest register writes in call volatile registers as
3552 * assembly helpers can't throw and don't access anything living in CPUMCTX, they only
3553 * access parameters. The flushing of call volatile registers is always done in iemNativeEmitCallCommon()
3554 * no matter the fFlushPendingWrites parameter.
3555 */
3556 off = iemNativeEmitCallCommon(pReNative, off, cArgs, 0 /*cHiddenArgs*/, false /*fFlushPendingWrites*/);
3557
3558 /*
3559 * Make the call and update the return code variable if we've got one.
3560 */
3561 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
3562 if (idxVarRc != UINT8_MAX)
3563 iemNativeVarRegisterSet(pReNative, idxVarRc, IEMNATIVE_CALL_RET_GREG, off);
3564
3565 return off;
3566}
3567
3568
3569
3570#define IEM_MC_CALL_VOID_AIMPL_0(a_pfn) \
3571 off = iemNativeEmitCallAImpl0(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn))
3572
3573#define IEM_MC_CALL_AIMPL_0(a_rc, a_pfn) \
3574 off = iemNativeEmitCallAImpl0(pReNative, off, a_rc, (uintptr_t)(a_pfn))
3575
3576/** Emits code for IEM_MC_CALL_VOID_AIMPL_0 and IEM_MC_CALL_AIMPL_0. */
3577DECL_INLINE_THROW(uint32_t)
3578iemNativeEmitCallAImpl0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl)
3579{
3580 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 0);
3581}
3582
3583
3584#define IEM_MC_CALL_VOID_AIMPL_1(a_pfn, a0) \
3585 off = iemNativeEmitCallAImpl1(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0)
3586
3587#define IEM_MC_CALL_AIMPL_1(a_rc, a_pfn, a0) \
3588 off = iemNativeEmitCallAImpl1(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0)
3589
3590/** Emits code for IEM_MC_CALL_VOID_AIMPL_1 and IEM_MC_CALL_AIMPL_1. */
3591DECL_INLINE_THROW(uint32_t)
3592iemNativeEmitCallAImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl, uint8_t idxArg0)
3593{
3594 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
3595 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 1);
3596}
3597
3598
3599#define IEM_MC_CALL_VOID_AIMPL_2(a_pfn, a0, a1) \
3600 off = iemNativeEmitCallAImpl2(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1)
3601
3602#define IEM_MC_CALL_AIMPL_2(a_rc, a_pfn, a0, a1) \
3603 off = iemNativeEmitCallAImpl2(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1)
3604
3605/** Emits code for IEM_MC_CALL_VOID_AIMPL_2 and IEM_MC_CALL_AIMPL_2. */
3606DECL_INLINE_THROW(uint32_t)
3607iemNativeEmitCallAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
3608 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
3609{
3610 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
3611 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
3612 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 2);
3613}
3614
3615
3616#define IEM_MC_CALL_VOID_AIMPL_3(a_pfn, a0, a1, a2) \
3617 off = iemNativeEmitCallAImpl3(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2)
3618
3619#define IEM_MC_CALL_AIMPL_3(a_rcType, a_rc, a_pfn, a0, a1, a2) \
3620 IEM_MC_LOCAL(a_rcType, a_rc); \
3621 off = iemNativeEmitCallAImpl3(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2)
3622
3623/** Emits code for IEM_MC_CALL_VOID_AIMPL_3 and IEM_MC_CALL_AIMPL_3. */
3624DECL_INLINE_THROW(uint32_t)
3625iemNativeEmitCallAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
3626 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
3627{
3628 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
3629 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
3630 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
3631 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 3);
3632}
3633
3634
3635#define IEM_MC_CALL_VOID_AIMPL_4(a_pfn, a0, a1, a2, a3) \
3636 off = iemNativeEmitCallAImpl4(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2, a3)
3637
3638#define IEM_MC_CALL_AIMPL_4(a_rcType, a_rc, a_pfn, a0, a1, a2, a3) \
3639 IEM_MC_LOCAL(a_rcType, a_rc); \
3640 off = iemNativeEmitCallAImpl4(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2, a3)
3641
3642/** Emits code for IEM_MC_CALL_VOID_AIMPL_4 and IEM_MC_CALL_AIMPL_4. */
3643DECL_INLINE_THROW(uint32_t)
3644iemNativeEmitCallAImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
3645 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
3646{
3647 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
3648 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
3649 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
3650 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3);
3651 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 4);
3652}
3653
3654
3655
3656/*********************************************************************************************************************************
3657* Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX). *
3658*********************************************************************************************************************************/
3659
3660#define IEM_MC_FETCH_GREG_U8_THREADED(a_u8Dst, a_iGRegEx) \
3661 off = iemNativeEmitFetchGregU8(pReNative, off, a_u8Dst, a_iGRegEx, sizeof(uint8_t) /*cbZeroExtended*/)
3662
3663#define IEM_MC_FETCH_GREG_U8_ZX_U16_THREADED(a_u16Dst, a_iGRegEx) \
3664 off = iemNativeEmitFetchGregU8(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t) /*cbZeroExtended*/)
3665
3666#define IEM_MC_FETCH_GREG_U8_ZX_U32_THREADED(a_u32Dst, a_iGRegEx) \
3667 off = iemNativeEmitFetchGregU8(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t) /*cbZeroExtended*/)
3668
3669#define IEM_MC_FETCH_GREG_U8_ZX_U64_THREADED(a_u64Dst, a_iGRegEx) \
3670 off = iemNativeEmitFetchGregU8(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t) /*cbZeroExtended*/)
3671
3672
3673/** Emits code for IEM_MC_FETCH_GREG_U8_THREADED and
3674 * IEM_MC_FETCH_GREG_U8_ZX_U16/32/64_THREADED. */
3675DECL_INLINE_THROW(uint32_t)
3676iemNativeEmitFetchGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, int8_t cbZeroExtended)
3677{
3678 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3679 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
3680 Assert(iGRegEx < 20);
3681
3682 /* Same discussion as in iemNativeEmitFetchGregU16 */
3683 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
3684 kIemNativeGstRegUse_ReadOnly);
3685
3686 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3687 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3688
3689 /* The value is zero-extended to the full 64-bit host register width. */
3690 if (iGRegEx < 16)
3691 off = iemNativeEmitLoadGprFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
3692 else
3693 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
3694
3695 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3696 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3697 return off;
3698}
3699
3700
3701#define IEM_MC_FETCH_GREG_U8_SX_U16_THREADED(a_u16Dst, a_iGRegEx) \
3702 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t))
3703
3704#define IEM_MC_FETCH_GREG_U8_SX_U32_THREADED(a_u32Dst, a_iGRegEx) \
3705 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t))
3706
3707#define IEM_MC_FETCH_GREG_U8_SX_U64_THREADED(a_u64Dst, a_iGRegEx) \
3708 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t))
3709
3710/** Emits code for IEM_MC_FETCH_GREG_U8_SX_U16/32/64_THREADED. */
3711DECL_INLINE_THROW(uint32_t)
3712iemNativeEmitFetchGregU8Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, uint8_t cbSignExtended)
3713{
3714 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3715 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
3716 Assert(iGRegEx < 20);
3717
3718 /* Same discussion as in iemNativeEmitFetchGregU16 */
3719 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
3720 kIemNativeGstRegUse_ReadOnly);
3721
3722 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3723 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3724
3725 if (iGRegEx < 16)
3726 {
3727 switch (cbSignExtended)
3728 {
3729 case sizeof(uint16_t):
3730 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
3731 break;
3732 case sizeof(uint32_t):
3733 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
3734 break;
3735 case sizeof(uint64_t):
3736 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
3737 break;
3738 default: AssertFailed(); break;
3739 }
3740 }
3741 else
3742 {
3743 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
3744 switch (cbSignExtended)
3745 {
3746 case sizeof(uint16_t):
3747 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
3748 break;
3749 case sizeof(uint32_t):
3750 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
3751 break;
3752 case sizeof(uint64_t):
3753 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
3754 break;
3755 default: AssertFailed(); break;
3756 }
3757 }
3758
3759 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3760 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3761 return off;
3762}
3763
3764
3765
3766#define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
3767 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint16_t))
3768
3769#define IEM_MC_FETCH_GREG_U16_ZX_U32(a_u16Dst, a_iGReg) \
3770 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
3771
3772#define IEM_MC_FETCH_GREG_U16_ZX_U64(a_u16Dst, a_iGReg) \
3773 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
3774
3775/** Emits code for IEM_MC_FETCH_GREG_U16 and IEM_MC_FETCH_GREG_U16_ZX_U32/64. */
3776DECL_INLINE_THROW(uint32_t)
3777iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
3778{
3779 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3780 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
3781 Assert(iGReg < 16);
3782
3783 /*
3784 * We can either just load the low 16-bit of the GPR into a host register
3785 * for the variable, or we can do so via a shadow copy host register. The
3786 * latter will avoid having to reload it if it's being stored later, but
3787 * will waste a host register if it isn't touched again. Since we don't
3788 * know what going to happen, we choose the latter for now.
3789 */
3790 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3791 kIemNativeGstRegUse_ReadOnly);
3792
3793 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3794 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3795 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
3796 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3797
3798 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3799 return off;
3800}
3801
3802
3803#define IEM_MC_FETCH_GREG_U16_SX_U32(a_u16Dst, a_iGReg) \
3804 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
3805
3806#define IEM_MC_FETCH_GREG_U16_SX_U64(a_u16Dst, a_iGReg) \
3807 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
3808
3809/** Emits code for IEM_MC_FETCH_GREG_U16_SX_U32/64. */
3810DECL_INLINE_THROW(uint32_t)
3811iemNativeEmitFetchGregU16Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbSignExtended)
3812{
3813 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3814 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
3815 Assert(iGReg < 16);
3816
3817 /*
3818 * We can either just load the low 16-bit of the GPR into a host register
3819 * for the variable, or we can do so via a shadow copy host register. The
3820 * latter will avoid having to reload it if it's being stored later, but
3821 * will waste a host register if it isn't touched again. Since we don't
3822 * know what going to happen, we choose the latter for now.
3823 */
3824 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3825 kIemNativeGstRegUse_ReadOnly);
3826
3827 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3828 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3829 if (cbSignExtended == sizeof(uint32_t))
3830 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
3831 else
3832 {
3833 Assert(cbSignExtended == sizeof(uint64_t));
3834 off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
3835 }
3836 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3837
3838 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3839 return off;
3840}
3841
3842
3843#define IEM_MC_FETCH_GREG_U32(a_u32Dst, a_iGReg) \
3844 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint32_t))
3845
3846#define IEM_MC_FETCH_GREG_U32_ZX_U64(a_u32Dst, a_iGReg) \
3847 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint64_t))
3848
3849/** Emits code for IEM_MC_FETCH_GREG_U32. */
3850DECL_INLINE_THROW(uint32_t)
3851iemNativeEmitFetchGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
3852{
3853 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3854 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
3855 Assert(iGReg < 16);
3856
3857 /*
3858 * We can either just load the low 16-bit of the GPR into a host register
3859 * for the variable, or we can do so via a shadow copy host register. The
3860 * latter will avoid having to reload it if it's being stored later, but
3861 * will waste a host register if it isn't touched again. Since we don't
3862 * know what going to happen, we choose the latter for now.
3863 */
3864 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3865 kIemNativeGstRegUse_ReadOnly);
3866
3867 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3868 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3869 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
3870 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3871
3872 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3873 return off;
3874}
3875
3876
3877#define IEM_MC_FETCH_GREG_U32_SX_U64(a_u32Dst, a_iGReg) \
3878 off = iemNativeEmitFetchGregU32SxU64(pReNative, off, a_u32Dst, a_iGReg)
3879
3880/** Emits code for IEM_MC_FETCH_GREG_U32. */
3881DECL_INLINE_THROW(uint32_t)
3882iemNativeEmitFetchGregU32SxU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
3883{
3884 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3885 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
3886 Assert(iGReg < 16);
3887
3888 /*
3889 * We can either just load the low 32-bit of the GPR into a host register
3890 * for the variable, or we can do so via a shadow copy host register. The
3891 * latter will avoid having to reload it if it's being stored later, but
3892 * will waste a host register if it isn't touched again. Since we don't
3893 * know what going to happen, we choose the latter for now.
3894 */
3895 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3896 kIemNativeGstRegUse_ReadOnly);
3897
3898 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3899 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3900 off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
3901 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3902
3903 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3904 return off;
3905}
3906
3907
3908#define IEM_MC_FETCH_GREG_U64(a_u64Dst, a_iGReg) \
3909 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
3910
3911#define IEM_MC_FETCH_GREG_U64_ZX_U64(a_u64Dst, a_iGReg) \
3912 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
3913
3914/** Emits code for IEM_MC_FETCH_GREG_U64 (and the
3915 * IEM_MC_FETCH_GREG_U64_ZX_U64 alias). */
3916DECL_INLINE_THROW(uint32_t)
3917iemNativeEmitFetchGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
3918{
3919 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3920 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
3921 Assert(iGReg < 16);
3922
3923 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3924 kIemNativeGstRegUse_ReadOnly);
3925
3926 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3927 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3928 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxVarReg, idxGstFullReg);
3929 /** @todo name the register a shadow one already? */
3930 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3931
3932 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3933 return off;
3934}
3935
3936
3937#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3938#define IEM_MC_FETCH_GREG_PAIR_U64(a_u128Dst, a_iGRegLo, a_iGRegHi) \
3939 off = iemNativeEmitFetchGregPairU64(pReNative, off, a_u128Dst, a_iGRegLo, a_iGRegHi)
3940
3941/** Emits code for IEM_MC_FETCH_GREG_PAIR_U64. */
3942DECL_INLINE_THROW(uint32_t)
3943iemNativeEmitFetchGregPairU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegLo, uint8_t iGRegHi)
3944{
3945 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3946 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
3947 Assert(iGRegLo < 16 && iGRegHi < 16);
3948
3949 uint8_t const idxGstFullRegLo = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegLo),
3950 kIemNativeGstRegUse_ReadOnly);
3951 uint8_t const idxGstFullRegHi = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegHi),
3952 kIemNativeGstRegUse_ReadOnly);
3953
3954 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3955 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
3956 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxVarReg, idxGstFullRegLo, 0);
3957 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxVarReg, idxGstFullRegHi, 1);
3958
3959 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
3960 iemNativeRegFreeTmp(pReNative, idxGstFullRegLo);
3961 iemNativeRegFreeTmp(pReNative, idxGstFullRegHi);
3962 return off;
3963}
3964#endif
3965
3966
3967/*********************************************************************************************************************************
3968* Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX). *
3969*********************************************************************************************************************************/
3970
3971#define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
3972 off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
3973
3974/** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
3975DECL_INLINE_THROW(uint32_t)
3976iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
3977{
3978 Assert(iGRegEx < 20);
3979 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
3980 kIemNativeGstRegUse_ForUpdate);
3981#ifdef RT_ARCH_AMD64
3982 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
3983
3984 /* To the lowest byte of the register: mov r8, imm8 */
3985 if (iGRegEx < 16)
3986 {
3987 if (idxGstTmpReg >= 8)
3988 pbCodeBuf[off++] = X86_OP_REX_B;
3989 else if (idxGstTmpReg >= 4)
3990 pbCodeBuf[off++] = X86_OP_REX;
3991 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
3992 pbCodeBuf[off++] = u8Value;
3993 }
3994 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
3995 else if (idxGstTmpReg < 4)
3996 {
3997 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
3998 pbCodeBuf[off++] = u8Value;
3999 }
4000 else
4001 {
4002 /* ror reg64, 8 */
4003 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4004 pbCodeBuf[off++] = 0xc1;
4005 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4006 pbCodeBuf[off++] = 8;
4007
4008 /* mov reg8, imm8 */
4009 if (idxGstTmpReg >= 8)
4010 pbCodeBuf[off++] = X86_OP_REX_B;
4011 else if (idxGstTmpReg >= 4)
4012 pbCodeBuf[off++] = X86_OP_REX;
4013 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
4014 pbCodeBuf[off++] = u8Value;
4015
4016 /* rol reg64, 8 */
4017 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4018 pbCodeBuf[off++] = 0xc1;
4019 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4020 pbCodeBuf[off++] = 8;
4021 }
4022
4023#elif defined(RT_ARCH_ARM64)
4024 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
4025 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4026 if (iGRegEx < 16)
4027 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
4028 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
4029 else
4030 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
4031 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
4032 iemNativeRegFreeTmp(pReNative, idxImmReg);
4033
4034#else
4035# error "Port me!"
4036#endif
4037
4038 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4039
4040#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4041 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
4042#endif
4043
4044 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4045 return off;
4046}
4047
4048
4049#define IEM_MC_STORE_GREG_U8_THREADED(a_iGRegEx, a_u8Value) \
4050 off = iemNativeEmitStoreGregU8(pReNative, off, a_iGRegEx, a_u8Value)
4051
4052/** Emits code for IEM_MC_STORE_GREG_U8_THREADED. */
4053DECL_INLINE_THROW(uint32_t)
4054iemNativeEmitStoreGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t idxValueVar)
4055{
4056 Assert(iGRegEx < 20);
4057 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
4058
4059 /*
4060 * If it's a constant value (unlikely) we treat this as a
4061 * IEM_MC_STORE_GREG_U8_CONST statement.
4062 */
4063 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
4064 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
4065 { /* likely */ }
4066 else
4067 {
4068 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
4069 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4070 return iemNativeEmitStoreGregU8Const(pReNative, off, iGRegEx, (uint8_t)pValueVar->u.uValue);
4071 }
4072
4073 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
4074 kIemNativeGstRegUse_ForUpdate);
4075 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
4076
4077#ifdef RT_ARCH_AMD64
4078 /* To the lowest byte of the register: mov reg8, reg8(r/m) */
4079 if (iGRegEx < 16)
4080 {
4081 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4082 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
4083 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
4084 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
4085 pbCodeBuf[off++] = X86_OP_REX;
4086 pbCodeBuf[off++] = 0x8a;
4087 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
4088 }
4089 /* Otherwise it's to ah, ch, dh or bh from al, cl, dl or bl: use mov r8, r8 if we can, otherwise, we rotate. */
4090 else if (idxGstTmpReg < 4 && idxVarReg < 4)
4091 {
4092 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2+1);
4093 pbCodeBuf[off++] = 0x8a;
4094 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg + 4, idxVarReg);
4095 }
4096 else
4097 {
4098 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
4099
4100 /* ror reg64, 8 */
4101 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4102 pbCodeBuf[off++] = 0xc1;
4103 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4104 pbCodeBuf[off++] = 8;
4105
4106 /* mov reg8, reg8(r/m) */
4107 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
4108 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
4109 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
4110 pbCodeBuf[off++] = X86_OP_REX;
4111 pbCodeBuf[off++] = 0x8a;
4112 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
4113
4114 /* rol reg64, 8 */
4115 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4116 pbCodeBuf[off++] = 0xc1;
4117 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4118 pbCodeBuf[off++] = 8;
4119 }
4120
4121#elif defined(RT_ARCH_ARM64)
4122 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 7:0.
4123 or
4124 bfi w1, w2, 8, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 15:8. */
4125 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4126 if (iGRegEx < 16)
4127 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 8);
4128 else
4129 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 8, 8);
4130
4131#else
4132# error "Port me!"
4133#endif
4134 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4135
4136 iemNativeVarRegisterRelease(pReNative, idxValueVar);
4137
4138#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4139 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
4140#endif
4141 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4142 return off;
4143}
4144
4145
4146
4147#define IEM_MC_STORE_GREG_U16_CONST(a_iGReg, a_u16Const) \
4148 off = iemNativeEmitStoreGregU16Const(pReNative, off, a_iGReg, a_u16Const)
4149
4150/** Emits code for IEM_MC_STORE_GREG_U16. */
4151DECL_INLINE_THROW(uint32_t)
4152iemNativeEmitStoreGregU16Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint16_t uValue)
4153{
4154 Assert(iGReg < 16);
4155 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4156 kIemNativeGstRegUse_ForUpdate);
4157#ifdef RT_ARCH_AMD64
4158 /* mov reg16, imm16 */
4159 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
4160 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4161 if (idxGstTmpReg >= 8)
4162 pbCodeBuf[off++] = X86_OP_REX_B;
4163 pbCodeBuf[off++] = 0xb8 + (idxGstTmpReg & 7);
4164 pbCodeBuf[off++] = RT_BYTE1(uValue);
4165 pbCodeBuf[off++] = RT_BYTE2(uValue);
4166
4167#elif defined(RT_ARCH_ARM64)
4168 /* movk xdst, #uValue, lsl #0 */
4169 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4170 pu32CodeBuf[off++] = Armv8A64MkInstrMovK(idxGstTmpReg, uValue);
4171
4172#else
4173# error "Port me!"
4174#endif
4175
4176 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4177
4178#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4179 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4180#endif
4181 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4182 return off;
4183}
4184
4185
4186#define IEM_MC_STORE_GREG_U16(a_iGReg, a_u16Value) \
4187 off = iemNativeEmitStoreGregU16(pReNative, off, a_iGReg, a_u16Value)
4188
4189/** Emits code for IEM_MC_STORE_GREG_U16. */
4190DECL_INLINE_THROW(uint32_t)
4191iemNativeEmitStoreGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
4192{
4193 Assert(iGReg < 16);
4194 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
4195
4196 /*
4197 * If it's a constant value (unlikely) we treat this as a
4198 * IEM_MC_STORE_GREG_U16_CONST statement.
4199 */
4200 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
4201 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
4202 { /* likely */ }
4203 else
4204 {
4205 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
4206 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4207 return iemNativeEmitStoreGregU16Const(pReNative, off, iGReg, (uint16_t)pValueVar->u.uValue);
4208 }
4209
4210 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4211 kIemNativeGstRegUse_ForUpdate);
4212
4213#ifdef RT_ARCH_AMD64
4214 /* mov reg16, reg16 or [mem16] */
4215 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
4216 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4217 if (pValueVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
4218 {
4219 if (idxGstTmpReg >= 8 || pValueVar->idxReg >= 8)
4220 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0)
4221 | (pValueVar->idxReg >= 8 ? X86_OP_REX_B : 0);
4222 pbCodeBuf[off++] = 0x8b;
4223 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, pValueVar->idxReg & 7);
4224 }
4225 else
4226 {
4227 uint8_t const idxStackSlot = pValueVar->idxStackSlot;
4228 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
4229 if (idxGstTmpReg >= 8)
4230 pbCodeBuf[off++] = X86_OP_REX_R;
4231 pbCodeBuf[off++] = 0x8b;
4232 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, idxGstTmpReg, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
4233 }
4234
4235#elif defined(RT_ARCH_ARM64)
4236 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
4237 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
4238 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4239 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 16);
4240 iemNativeVarRegisterRelease(pReNative, idxValueVar);
4241
4242#else
4243# error "Port me!"
4244#endif
4245
4246 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4247
4248#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4249 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4250#endif
4251 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4252 return off;
4253}
4254
4255
4256#define IEM_MC_STORE_GREG_U32_CONST(a_iGReg, a_u32Const) \
4257 off = iemNativeEmitStoreGregU32Const(pReNative, off, a_iGReg, a_u32Const)
4258
4259/** Emits code for IEM_MC_STORE_GREG_U32_CONST. */
4260DECL_INLINE_THROW(uint32_t)
4261iemNativeEmitStoreGregU32Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint32_t uValue)
4262{
4263 Assert(iGReg < 16);
4264 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4265 kIemNativeGstRegUse_ForFullWrite);
4266 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
4267#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4268 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4269#endif
4270 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4271 return off;
4272}
4273
4274
4275#define IEM_MC_STORE_GREG_U32(a_iGReg, a_u32Value) \
4276 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_u32Value)
4277
4278/** Emits code for IEM_MC_STORE_GREG_U32. */
4279DECL_INLINE_THROW(uint32_t)
4280iemNativeEmitStoreGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
4281{
4282 Assert(iGReg < 16);
4283 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
4284
4285 /*
4286 * If it's a constant value (unlikely) we treat this as a
4287 * IEM_MC_STORE_GREG_U32_CONST statement.
4288 */
4289 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
4290 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
4291 { /* likely */ }
4292 else
4293 {
4294 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
4295 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4296 return iemNativeEmitStoreGregU32Const(pReNative, off, iGReg, (uint32_t)pValueVar->u.uValue);
4297 }
4298
4299 /*
4300 * For the rest we allocate a guest register for the variable and writes
4301 * it to the CPUMCTX structure.
4302 */
4303 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
4304#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4305 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4306#else
4307 RT_NOREF(idxVarReg);
4308#endif
4309#ifdef VBOX_STRICT
4310 off = iemNativeEmitTop32BitsClearCheck(pReNative, off, idxVarReg);
4311#endif
4312 iemNativeVarRegisterRelease(pReNative, idxValueVar);
4313 return off;
4314}
4315
4316
4317#define IEM_MC_STORE_GREG_U64_CONST(a_iGReg, a_u64Const) \
4318 off = iemNativeEmitStoreGregU64Const(pReNative, off, a_iGReg, a_u64Const)
4319
4320/** Emits code for IEM_MC_STORE_GREG_U64_CONST. */
4321DECL_INLINE_THROW(uint32_t)
4322iemNativeEmitStoreGregU64Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uValue)
4323{
4324 Assert(iGReg < 16);
4325 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4326 kIemNativeGstRegUse_ForFullWrite);
4327 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
4328#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4329 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4330#endif
4331 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4332 return off;
4333}
4334
4335
4336#define IEM_MC_STORE_GREG_U64(a_iGReg, a_u64Value) \
4337 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_u64Value)
4338
4339#define IEM_MC_STORE_GREG_I64(a_iGReg, a_i64Value) \
4340 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_i64Value)
4341
4342/** Emits code for IEM_MC_STORE_GREG_U64. */
4343DECL_INLINE_THROW(uint32_t)
4344iemNativeEmitStoreGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
4345{
4346 Assert(iGReg < 16);
4347 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
4348
4349 /*
4350 * If it's a constant value (unlikely) we treat this as a
4351 * IEM_MC_STORE_GREG_U64_CONST statement.
4352 */
4353 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
4354 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
4355 { /* likely */ }
4356 else
4357 {
4358 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
4359 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4360 return iemNativeEmitStoreGregU64Const(pReNative, off, iGReg, pValueVar->u.uValue);
4361 }
4362
4363 /*
4364 * For the rest we allocate a guest register for the variable and writes
4365 * it to the CPUMCTX structure.
4366 */
4367 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
4368#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4369 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4370#else
4371 RT_NOREF(idxVarReg);
4372#endif
4373 iemNativeVarRegisterRelease(pReNative, idxValueVar);
4374 return off;
4375}
4376
4377
4378#define IEM_MC_CLEAR_HIGH_GREG_U64(a_iGReg) \
4379 off = iemNativeEmitClearHighGregU64(pReNative, off, a_iGReg)
4380
4381/** Emits code for IEM_MC_CLEAR_HIGH_GREG_U64. */
4382DECL_INLINE_THROW(uint32_t)
4383iemNativeEmitClearHighGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg)
4384{
4385 Assert(iGReg < 16);
4386 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4387 kIemNativeGstRegUse_ForUpdate);
4388 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxGstTmpReg, idxGstTmpReg);
4389#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4390 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4391#endif
4392 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4393 return off;
4394}
4395
4396
4397#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4398#define IEM_MC_STORE_GREG_PAIR_U64(a_iGRegLo, a_iGRegHi, a_u128Value) \
4399 off = iemNativeEmitStoreGregPairU64(pReNative, off, a_iGRegLo, a_iGRegHi, a_u128Value)
4400
4401/** Emits code for IEM_MC_FETCH_GREG_PAIR_U64. */
4402DECL_INLINE_THROW(uint32_t)
4403iemNativeEmitStoreGregPairU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegLo, uint8_t iGRegHi, uint8_t idxDstVar)
4404{
4405 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4406 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
4407 Assert(iGRegLo < 16 && iGRegHi < 16);
4408
4409 uint8_t const idxGstFullRegLo = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegLo),
4410 kIemNativeGstRegUse_ForFullWrite);
4411 uint8_t const idxGstFullRegHi = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegHi),
4412 kIemNativeGstRegUse_ForFullWrite);
4413
4414 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4415 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
4416 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxGstFullRegLo, idxVarReg, 0);
4417 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxGstFullRegHi, idxVarReg, 1);
4418
4419 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
4420 iemNativeRegFreeTmp(pReNative, idxGstFullRegLo);
4421 iemNativeRegFreeTmp(pReNative, idxGstFullRegHi);
4422 return off;
4423}
4424#endif
4425
4426
4427/*********************************************************************************************************************************
4428* General purpose register manipulation (add, sub). *
4429*********************************************************************************************************************************/
4430
4431#define IEM_MC_ADD_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
4432 off = iemNativeEmitAddGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
4433
4434/** Emits code for IEM_MC_ADD_GREG_U16. */
4435DECL_INLINE_THROW(uint32_t)
4436iemNativeEmitAddGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend)
4437{
4438 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4439 kIemNativeGstRegUse_ForUpdate);
4440
4441#ifdef RT_ARCH_AMD64
4442 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
4443 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4444 if (idxGstTmpReg >= 8)
4445 pbCodeBuf[off++] = X86_OP_REX_B;
4446 if (uAddend == 1)
4447 {
4448 pbCodeBuf[off++] = 0xff; /* inc */
4449 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4450 }
4451 else
4452 {
4453 pbCodeBuf[off++] = 0x81;
4454 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4455 pbCodeBuf[off++] = uAddend;
4456 pbCodeBuf[off++] = 0;
4457 }
4458
4459#else
4460 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
4461 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4462
4463 /* sub tmp, gstgrp, uAddend */
4464 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxTmpReg, idxGstTmpReg, uAddend, false /*f64Bit*/);
4465
4466 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
4467 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
4468
4469 iemNativeRegFreeTmp(pReNative, idxTmpReg);
4470#endif
4471
4472 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4473
4474#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4475 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4476#endif
4477
4478 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4479 return off;
4480}
4481
4482
4483#define IEM_MC_ADD_GREG_U32(a_iGReg, a_u8Const) \
4484 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
4485
4486#define IEM_MC_ADD_GREG_U64(a_iGReg, a_u8Const) \
4487 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
4488
4489/** Emits code for IEM_MC_ADD_GREG_U32 and IEM_MC_ADD_GREG_U64. */
4490DECL_INLINE_THROW(uint32_t)
4491iemNativeEmitAddGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend, bool f64Bit)
4492{
4493 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4494 kIemNativeGstRegUse_ForUpdate);
4495
4496#ifdef RT_ARCH_AMD64
4497 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
4498 if (f64Bit)
4499 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
4500 else if (idxGstTmpReg >= 8)
4501 pbCodeBuf[off++] = X86_OP_REX_B;
4502 if (uAddend == 1)
4503 {
4504 pbCodeBuf[off++] = 0xff; /* inc */
4505 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4506 }
4507 else if (uAddend < 128)
4508 {
4509 pbCodeBuf[off++] = 0x83; /* add */
4510 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4511 pbCodeBuf[off++] = RT_BYTE1(uAddend);
4512 }
4513 else
4514 {
4515 pbCodeBuf[off++] = 0x81; /* add */
4516 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4517 pbCodeBuf[off++] = RT_BYTE1(uAddend);
4518 pbCodeBuf[off++] = 0;
4519 pbCodeBuf[off++] = 0;
4520 pbCodeBuf[off++] = 0;
4521 }
4522
4523#else
4524 /* sub tmp, gstgrp, uAddend */
4525 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4526 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGstTmpReg, idxGstTmpReg, uAddend, f64Bit);
4527
4528#endif
4529
4530 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4531
4532#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4533 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4534#endif
4535
4536 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4537 return off;
4538}
4539
4540
4541
4542#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
4543 off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
4544
4545/** Emits code for IEM_MC_SUB_GREG_U16. */
4546DECL_INLINE_THROW(uint32_t)
4547iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
4548{
4549 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4550 kIemNativeGstRegUse_ForUpdate);
4551
4552#ifdef RT_ARCH_AMD64
4553 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
4554 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4555 if (idxGstTmpReg >= 8)
4556 pbCodeBuf[off++] = X86_OP_REX_B;
4557 if (uSubtrahend == 1)
4558 {
4559 pbCodeBuf[off++] = 0xff; /* dec */
4560 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4561 }
4562 else
4563 {
4564 pbCodeBuf[off++] = 0x81;
4565 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
4566 pbCodeBuf[off++] = uSubtrahend;
4567 pbCodeBuf[off++] = 0;
4568 }
4569
4570#else
4571 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
4572 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4573
4574 /* sub tmp, gstgrp, uSubtrahend */
4575 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
4576
4577 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
4578 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
4579
4580 iemNativeRegFreeTmp(pReNative, idxTmpReg);
4581#endif
4582
4583 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4584
4585#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4586 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4587#endif
4588
4589 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4590 return off;
4591}
4592
4593
4594#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
4595 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
4596
4597#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
4598 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
4599
4600/** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
4601DECL_INLINE_THROW(uint32_t)
4602iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
4603{
4604 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4605 kIemNativeGstRegUse_ForUpdate);
4606
4607#ifdef RT_ARCH_AMD64
4608 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
4609 if (f64Bit)
4610 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
4611 else if (idxGstTmpReg >= 8)
4612 pbCodeBuf[off++] = X86_OP_REX_B;
4613 if (uSubtrahend == 1)
4614 {
4615 pbCodeBuf[off++] = 0xff; /* dec */
4616 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4617 }
4618 else if (uSubtrahend < 128)
4619 {
4620 pbCodeBuf[off++] = 0x83; /* sub */
4621 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
4622 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
4623 }
4624 else
4625 {
4626 pbCodeBuf[off++] = 0x81; /* sub */
4627 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
4628 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
4629 pbCodeBuf[off++] = 0;
4630 pbCodeBuf[off++] = 0;
4631 pbCodeBuf[off++] = 0;
4632 }
4633
4634#else
4635 /* sub tmp, gstgrp, uSubtrahend */
4636 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4637 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
4638
4639#endif
4640
4641 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4642
4643#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4644 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4645#endif
4646
4647 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4648 return off;
4649}
4650
4651
4652#define IEM_MC_AND_GREG_U8(a_iGReg, a_u8Mask) \
4653 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u8Mask, sizeof(uint8_t))
4654
4655#define IEM_MC_AND_GREG_U16(a_iGReg, a_u16Mask) \
4656 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u16Mask, sizeof(uint16_t))
4657
4658#define IEM_MC_AND_GREG_U32(a_iGReg, a_u32Mask) \
4659 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u32Mask, sizeof(uint32_t))
4660
4661#define IEM_MC_AND_GREG_U64(a_iGReg, a_u64Mask) \
4662 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u64Mask, sizeof(uint64_t))
4663
4664/** Emits code for IEM_MC_AND_GREG_U8, IEM_MC_AND_GREG_U16, IEM_MC_AND_GREG_U32 and IEM_MC_AND_GREG_U64. */
4665DECL_INLINE_THROW(uint32_t)
4666iemNativeEmitAndGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uMask, uint8_t cbMask)
4667{
4668#ifdef VBOX_STRICT
4669 switch (cbMask)
4670 {
4671 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
4672 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
4673 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
4674 case sizeof(uint64_t): break;
4675 default: AssertFailedBreak();
4676 }
4677#endif
4678
4679 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4680 kIemNativeGstRegUse_ForUpdate);
4681
4682 switch (cbMask)
4683 {
4684 case sizeof(uint8_t): /* Leaves the higher bits untouched. */
4685 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask | UINT64_C(0xffffffffffffff00));
4686 break;
4687 case sizeof(uint16_t): /* Leaves the higher bits untouched. */
4688 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask | UINT64_C(0xffffffffffff0000));
4689 break;
4690 case sizeof(uint32_t): /* Zeroes the high 32 bits of the guest register. */
4691 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxGstTmpReg, uMask);
4692 break;
4693 case sizeof(uint64_t):
4694 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask);
4695 break;
4696 default: AssertFailedBreak();
4697 }
4698
4699 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4700
4701#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4702 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4703#endif
4704
4705 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4706 return off;
4707}
4708
4709
4710#define IEM_MC_OR_GREG_U8(a_iGReg, a_u8Mask) \
4711 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u8Mask, sizeof(uint8_t))
4712
4713#define IEM_MC_OR_GREG_U16(a_iGReg, a_u16Mask) \
4714 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u16Mask, sizeof(uint16_t))
4715
4716#define IEM_MC_OR_GREG_U32(a_iGReg, a_u32Mask) \
4717 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u32Mask, sizeof(uint32_t))
4718
4719#define IEM_MC_OR_GREG_U64(a_iGReg, a_u64Mask) \
4720 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u64Mask, sizeof(uint64_t))
4721
4722/** Emits code for IEM_MC_OR_GREG_U8, IEM_MC_OR_GREG_U16, IEM_MC_OR_GREG_U32 and IEM_MC_OR_GREG_U64. */
4723DECL_INLINE_THROW(uint32_t)
4724iemNativeEmitOrGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uMask, uint8_t cbMask)
4725{
4726#ifdef VBOX_STRICT
4727 switch (cbMask)
4728 {
4729 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
4730 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
4731 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
4732 case sizeof(uint64_t): break;
4733 default: AssertFailedBreak();
4734 }
4735#endif
4736
4737 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4738 kIemNativeGstRegUse_ForUpdate);
4739
4740 switch (cbMask)
4741 {
4742 case sizeof(uint8_t): /* Leaves the higher bits untouched. */
4743 case sizeof(uint16_t):
4744 case sizeof(uint64_t):
4745 off = iemNativeEmitOrGprByImm(pReNative, off, idxGstTmpReg, uMask);
4746 break;
4747 case sizeof(uint32_t): /* Zeroes the high 32 bits of the guest register. */
4748 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxGstTmpReg, uMask);
4749 break;
4750 default: AssertFailedBreak();
4751 }
4752
4753 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4754
4755#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4756 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4757#endif
4758
4759 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4760 return off;
4761}
4762
4763
4764/*********************************************************************************************************************************
4765* Local/Argument variable manipulation (add, sub, and, or). *
4766*********************************************************************************************************************************/
4767
4768#define IEM_MC_AND_LOCAL_U8(a_u8Local, a_u8Mask) \
4769 off = iemNativeEmitAndLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
4770
4771#define IEM_MC_AND_LOCAL_U16(a_u16Local, a_u16Mask) \
4772 off = iemNativeEmitAndLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
4773
4774#define IEM_MC_AND_LOCAL_U32(a_u32Local, a_u32Mask) \
4775 off = iemNativeEmitAndLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
4776
4777#define IEM_MC_AND_LOCAL_U64(a_u64Local, a_u64Mask) \
4778 off = iemNativeEmitAndLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
4779
4780
4781#define IEM_MC_AND_ARG_U16(a_u16Arg, a_u16Mask) \
4782 off = iemNativeEmitAndLocal(pReNative, off, a_u16Arg, a_u16Mask, sizeof(uint16_t))
4783
4784#define IEM_MC_AND_ARG_U32(a_u32Arg, a_u32Mask) \
4785 off = iemNativeEmitAndLocal(pReNative, off, a_u32Arg, a_u32Mask, sizeof(uint32_t))
4786
4787#define IEM_MC_AND_ARG_U64(a_u64Arg, a_u64Mask) \
4788 off = iemNativeEmitAndLocal(pReNative, off, a_u64Arg, a_u64Mask, sizeof(uint64_t))
4789
4790/** Emits code for AND'ing a local and a constant value. */
4791DECL_INLINE_THROW(uint32_t)
4792iemNativeEmitAndLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
4793{
4794#ifdef VBOX_STRICT
4795 switch (cbMask)
4796 {
4797 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
4798 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
4799 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
4800 case sizeof(uint64_t): break;
4801 default: AssertFailedBreak();
4802 }
4803#endif
4804
4805 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
4806 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
4807
4808 if (cbMask <= sizeof(uint32_t))
4809 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg, uMask);
4810 else
4811 off = iemNativeEmitAndGprByImm(pReNative, off, idxVarReg, uMask);
4812
4813 iemNativeVarRegisterRelease(pReNative, idxVar);
4814 return off;
4815}
4816
4817
4818#define IEM_MC_OR_LOCAL_U8(a_u8Local, a_u8Mask) \
4819 off = iemNativeEmitOrLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
4820
4821#define IEM_MC_OR_LOCAL_U16(a_u16Local, a_u16Mask) \
4822 off = iemNativeEmitOrLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
4823
4824#define IEM_MC_OR_LOCAL_U32(a_u32Local, a_u32Mask) \
4825 off = iemNativeEmitOrLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
4826
4827#define IEM_MC_OR_LOCAL_U64(a_u64Local, a_u64Mask) \
4828 off = iemNativeEmitOrLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
4829
4830/** Emits code for OR'ing a local and a constant value. */
4831DECL_INLINE_THROW(uint32_t)
4832iemNativeEmitOrLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
4833{
4834#ifdef VBOX_STRICT
4835 switch (cbMask)
4836 {
4837 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
4838 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
4839 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
4840 case sizeof(uint64_t): break;
4841 default: AssertFailedBreak();
4842 }
4843#endif
4844
4845 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
4846 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
4847
4848 if (cbMask <= sizeof(uint32_t))
4849 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxVarReg, uMask);
4850 else
4851 off = iemNativeEmitOrGprByImm(pReNative, off, idxVarReg, uMask);
4852
4853 iemNativeVarRegisterRelease(pReNative, idxVar);
4854 return off;
4855}
4856
4857
4858#define IEM_MC_BSWAP_LOCAL_U16(a_u16Local) \
4859 off = iemNativeEmitBswapLocal(pReNative, off, a_u16Local, sizeof(uint16_t))
4860
4861#define IEM_MC_BSWAP_LOCAL_U32(a_u32Local) \
4862 off = iemNativeEmitBswapLocal(pReNative, off, a_u32Local, sizeof(uint32_t))
4863
4864#define IEM_MC_BSWAP_LOCAL_U64(a_u64Local) \
4865 off = iemNativeEmitBswapLocal(pReNative, off, a_u64Local, sizeof(uint64_t))
4866
4867/** Emits code for reversing the byte order in a local value. */
4868DECL_INLINE_THROW(uint32_t)
4869iemNativeEmitBswapLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal)
4870{
4871 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
4872 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
4873
4874 switch (cbLocal)
4875 {
4876 case sizeof(uint16_t): off = iemNativeEmitBswapGpr16(pReNative, off, idxVarReg); break;
4877 case sizeof(uint32_t): off = iemNativeEmitBswapGpr32(pReNative, off, idxVarReg); break;
4878 case sizeof(uint64_t): off = iemNativeEmitBswapGpr(pReNative, off, idxVarReg); break;
4879 default: AssertFailedBreak();
4880 }
4881
4882 iemNativeVarRegisterRelease(pReNative, idxVar);
4883 return off;
4884}
4885
4886
4887#define IEM_MC_SHL_LOCAL_S16(a_i16Local, a_cShift) \
4888 off = iemNativeEmitShlLocal(pReNative, off, a_i16Local, sizeof(int16_t), a_cShift)
4889
4890#define IEM_MC_SHL_LOCAL_S32(a_i32Local, a_cShift) \
4891 off = iemNativeEmitShlLocal(pReNative, off, a_i32Local, sizeof(int32_t), a_cShift)
4892
4893#define IEM_MC_SHL_LOCAL_S64(a_i64Local, a_cShift) \
4894 off = iemNativeEmitShlLocal(pReNative, off, a_i64Local, sizeof(int64_t), a_cShift)
4895
4896/** Emits code for shifting left a local value. */
4897DECL_INLINE_THROW(uint32_t)
4898iemNativeEmitShlLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal, uint8_t cShift)
4899{
4900#ifdef VBOX_STRICT
4901 switch (cbLocal)
4902 {
4903 case sizeof(uint8_t): Assert(cShift < 8); break;
4904 case sizeof(uint16_t): Assert(cShift < 16); break;
4905 case sizeof(uint32_t): Assert(cShift < 32); break;
4906 case sizeof(uint64_t): Assert(cShift < 64); break;
4907 default: AssertFailedBreak();
4908 }
4909#endif
4910
4911 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
4912 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
4913
4914 if (cbLocal <= sizeof(uint32_t))
4915 {
4916 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxVarReg, cShift);
4917 if (cbLocal < sizeof(uint32_t))
4918 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg,
4919 cbLocal == sizeof(uint16_t)
4920 ? UINT32_C(0xffff)
4921 : UINT32_C(0xff));
4922 }
4923 else
4924 off = iemNativeEmitShiftGprLeft(pReNative, off, idxVarReg, cShift);
4925
4926 iemNativeVarRegisterRelease(pReNative, idxVar);
4927 return off;
4928}
4929
4930
4931#define IEM_MC_SAR_LOCAL_S16(a_i16Local, a_cShift) \
4932 off = iemNativeEmitSarLocal(pReNative, off, a_i16Local, sizeof(int16_t), a_cShift)
4933
4934#define IEM_MC_SAR_LOCAL_S32(a_i32Local, a_cShift) \
4935 off = iemNativeEmitSarLocal(pReNative, off, a_i32Local, sizeof(int32_t), a_cShift)
4936
4937#define IEM_MC_SAR_LOCAL_S64(a_i64Local, a_cShift) \
4938 off = iemNativeEmitSarLocal(pReNative, off, a_i64Local, sizeof(int64_t), a_cShift)
4939
4940/** Emits code for shifting left a local value. */
4941DECL_INLINE_THROW(uint32_t)
4942iemNativeEmitSarLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal, uint8_t cShift)
4943{
4944#ifdef VBOX_STRICT
4945 switch (cbLocal)
4946 {
4947 case sizeof(int8_t): Assert(cShift < 8); break;
4948 case sizeof(int16_t): Assert(cShift < 16); break;
4949 case sizeof(int32_t): Assert(cShift < 32); break;
4950 case sizeof(int64_t): Assert(cShift < 64); break;
4951 default: AssertFailedBreak();
4952 }
4953#endif
4954
4955 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
4956 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
4957
4958 /* Need to sign extend the value first to make sure the sign is correct in the following arithmetic shift. */
4959 if (cbLocal == sizeof(uint8_t))
4960 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
4961 else if (cbLocal == sizeof(uint16_t))
4962 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxVarReg);
4963
4964 if (cbLocal <= sizeof(uint32_t))
4965 off = iemNativeEmitArithShiftGpr32Right(pReNative, off, idxVarReg, cShift);
4966 else
4967 off = iemNativeEmitArithShiftGprRight(pReNative, off, idxVarReg, cShift);
4968
4969 iemNativeVarRegisterRelease(pReNative, idxVar);
4970 return off;
4971}
4972
4973
4974#define IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR(a_EffAddr, a_i16) \
4975 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i16, sizeof(int16_t))
4976
4977#define IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR(a_EffAddr, a_i32) \
4978 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i32, sizeof(int32_t))
4979
4980#define IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR(a_EffAddr, a_i64) \
4981 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i64, sizeof(int64_t))
4982
4983/** Emits code for IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR/IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR/IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR. */
4984DECL_INLINE_THROW(uint32_t)
4985iemNativeEmitAddLocalToEffAddr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEffAddr, uint8_t idxVar, uint8_t cbLocal)
4986{
4987 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEffAddr);
4988 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEffAddr, sizeof(RTGCPTR));
4989 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4990 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
4991
4992 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
4993 uint8_t const idxVarRegEffAddr = iemNativeVarRegisterAcquire(pReNative, idxVarEffAddr, &off, true /*fInitialized*/);
4994
4995 /* Need to sign extend the value. */
4996 if (cbLocal <= sizeof(uint32_t))
4997 {
4998/** @todo ARM64: In case of boredone, the extended add instruction can do the
4999 * conversion directly: ADD idxVarRegEffAddr, idxVarRegEffAddr, [w]idxVarReg, SXTH/SXTW */
5000 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
5001
5002 switch (cbLocal)
5003 {
5004 case sizeof(int16_t): off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxRegTmp, idxVarReg); break;
5005 case sizeof(int32_t): off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxRegTmp, idxVarReg); break;
5006 default: AssertFailed();
5007 }
5008
5009 off = iemNativeEmitAddTwoGprs(pReNative, off, idxVarRegEffAddr, idxRegTmp);
5010 iemNativeRegFreeTmp(pReNative, idxRegTmp);
5011 }
5012 else
5013 off = iemNativeEmitAddTwoGprs(pReNative, off, idxVarRegEffAddr, idxVarReg);
5014
5015 iemNativeVarRegisterRelease(pReNative, idxVarEffAddr);
5016 iemNativeVarRegisterRelease(pReNative, idxVar);
5017 return off;
5018}
5019
5020
5021
5022/*********************************************************************************************************************************
5023* EFLAGS *
5024*********************************************************************************************************************************/
5025
5026#if !defined(VBOX_WITH_STATISTICS) || !defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
5027# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) ((void)0)
5028#else
5029# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) \
5030 iemNativeEFlagsOptimizationStats(pReNative, a_fEflInput, a_fEflOutput)
5031
5032DECLINLINE(void) iemNativeEFlagsOptimizationStats(PIEMRECOMPILERSTATE pReNative, uint32_t fEflInput, uint32_t fEflOutput)
5033{
5034 if (fEflOutput)
5035 {
5036 PVMCPUCC const pVCpu = pReNative->pVCpu;
5037# ifndef IEMLIVENESS_EXTENDED_LAYOUT
5038 IEMLIVENESSBIT const LivenessBit0 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit0;
5039 IEMLIVENESSBIT const LivenessBit1 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit1;
5040 AssertCompile(IEMLIVENESS_STATE_CLOBBERED == 0);
5041# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
5042 if (fEflOutput & (a_fEfl)) \
5043 { \
5044 if (LivenessBit0.a_fLivenessMember | LivenessBit1.a_fLivenessMember) \
5045 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
5046 else \
5047 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
5048 } else do { } while (0)
5049# else
5050 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall];
5051 IEMLIVENESSBIT const LivenessClobbered =
5052 {
5053 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
5054 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
5055 | pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
5056 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
5057 };
5058 IEMLIVENESSBIT const LivenessDelayable =
5059 {
5060 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
5061 & pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
5062 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
5063 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
5064 };
5065# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
5066 if (fEflOutput & (a_fEfl)) \
5067 { \
5068 if (LivenessClobbered.a_fLivenessMember) \
5069 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
5070 else if (LivenessDelayable.a_fLivenessMember) \
5071 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Delayable); \
5072 else \
5073 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
5074 } else do { } while (0)
5075# endif
5076 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_CF, fEflCf, StatNativeLivenessEflCf);
5077 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_PF, fEflPf, StatNativeLivenessEflPf);
5078 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_AF, fEflAf, StatNativeLivenessEflAf);
5079 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_ZF, fEflZf, StatNativeLivenessEflZf);
5080 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_SF, fEflSf, StatNativeLivenessEflSf);
5081 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_OF, fEflOf, StatNativeLivenessEflOf);
5082 //CHECK_FLAG_AND_UPDATE_STATS(~X86_EFL_STATUS_BITS, fEflOther, StatNativeLivenessEflOther);
5083# undef CHECK_FLAG_AND_UPDATE_STATS
5084 }
5085 RT_NOREF(fEflInput);
5086}
5087#endif /* VBOX_WITH_STATISTICS */
5088
5089#undef IEM_MC_FETCH_EFLAGS /* should not be used */
5090#define IEM_MC_FETCH_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5091 off = iemNativeEmitFetchEFlags(pReNative, off, a_EFlags, a_fEflInput, a_fEflOutput)
5092
5093/** Handles IEM_MC_FETCH_EFLAGS_EX. */
5094DECL_INLINE_THROW(uint32_t)
5095iemNativeEmitFetchEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags,
5096 uint32_t fEflInput, uint32_t fEflOutput)
5097{
5098 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
5099 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
5100 RT_NOREF(fEflInput, fEflOutput);
5101
5102#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
5103# ifdef VBOX_STRICT
5104 if ( pReNative->idxCurCall != 0
5105 && (fEflInput != 0 || fEflOutput != 0) /* for NOT these are both zero for now. */)
5106 {
5107 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall - 1];
5108 uint32_t const fBoth = fEflInput | fEflOutput;
5109# define ASSERT_ONE_EFL(a_fElfConst, a_idxField) \
5110 AssertMsg( !(fBoth & (a_fElfConst)) \
5111 || (!(fEflInput & (a_fElfConst)) \
5112 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
5113 : !(fEflOutput & (a_fElfConst)) \
5114 ? IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
5115 : IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) ), \
5116 ("%s - %u\n", #a_fElfConst, iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)))
5117 ASSERT_ONE_EFL(~(uint32_t)X86_EFL_STATUS_BITS, IEMLIVENESSBIT_IDX_EFL_OTHER);
5118 ASSERT_ONE_EFL(X86_EFL_CF, IEMLIVENESSBIT_IDX_EFL_CF);
5119 ASSERT_ONE_EFL(X86_EFL_PF, IEMLIVENESSBIT_IDX_EFL_PF);
5120 ASSERT_ONE_EFL(X86_EFL_AF, IEMLIVENESSBIT_IDX_EFL_AF);
5121 ASSERT_ONE_EFL(X86_EFL_ZF, IEMLIVENESSBIT_IDX_EFL_ZF);
5122 ASSERT_ONE_EFL(X86_EFL_SF, IEMLIVENESSBIT_IDX_EFL_SF);
5123 ASSERT_ONE_EFL(X86_EFL_OF, IEMLIVENESSBIT_IDX_EFL_OF);
5124# undef ASSERT_ONE_EFL
5125 }
5126# endif
5127#endif
5128
5129 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fEflInput);
5130
5131 /** @todo this is suboptimial. EFLAGS is probably shadowed and we should use
5132 * the existing shadow copy. */
5133 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, false /*fInitialized*/);
5134 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
5135 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
5136 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
5137 return off;
5138}
5139
5140
5141
5142/** @todo emit strict build assertions for IEM_MC_COMMIT_EFLAGS_EX when we
5143 * start using it with custom native code emission (inlining assembly
5144 * instruction helpers). */
5145#undef IEM_MC_COMMIT_EFLAGS /* should not be used */
5146#define IEM_MC_COMMIT_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5147 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
5148 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags, a_fEflOutput, true /*fUpdateSkipping*/)
5149
5150#undef IEM_MC_COMMIT_EFLAGS_OPT /* should not be used */
5151#define IEM_MC_COMMIT_EFLAGS_OPT_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5152 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
5153 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags, a_fEflOutput, false /*fUpdateSkipping*/)
5154
5155/** Handles IEM_MC_COMMIT_EFLAGS_EX. */
5156DECL_INLINE_THROW(uint32_t)
5157iemNativeEmitCommitEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags, uint32_t fEflOutput,
5158 bool fUpdateSkipping)
5159{
5160 RT_NOREF(fEflOutput);
5161 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, true /*fInitialized*/);
5162 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
5163
5164#ifdef VBOX_STRICT
5165 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RA1_MASK);
5166 uint32_t offFixup = off;
5167 off = iemNativeEmitJnzToFixed(pReNative, off, off);
5168 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2001));
5169 iemNativeFixupFixedJump(pReNative, offFixup, off);
5170
5171 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RAZ_MASK & CPUMX86EFLAGS_HW_MASK_32);
5172 offFixup = off;
5173 off = iemNativeEmitJzToFixed(pReNative, off, off);
5174 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2002));
5175 iemNativeFixupFixedJump(pReNative, offFixup, off);
5176
5177 /** @todo validate that only bits in the fElfOutput mask changed. */
5178#endif
5179
5180#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
5181 if (fUpdateSkipping)
5182 {
5183 if ((fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
5184 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
5185 else
5186 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~(fEflOutput & X86_EFL_STATUS_BITS),
5187 RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
5188 }
5189#else
5190 RT_NOREF_PV(fUpdateSkipping);
5191#endif
5192
5193 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
5194 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF_DYN(VMCPUCC, cpum.GstCtx.eflags));
5195 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
5196 return off;
5197}
5198
5199
5200typedef enum IEMNATIVEMITEFLOP
5201{
5202 kIemNativeEmitEflOp_Invalid = 0,
5203 kIemNativeEmitEflOp_Set,
5204 kIemNativeEmitEflOp_Clear,
5205 kIemNativeEmitEflOp_Flip
5206} IEMNATIVEMITEFLOP;
5207
5208#define IEM_MC_SET_EFL_BIT(a_fBit) \
5209 off = iemNativeEmitModifyEFlagsBit(pReNative, off, a_fBit, kIemNativeEmitEflOp_Set);
5210
5211#define IEM_MC_CLEAR_EFL_BIT(a_fBit) \
5212 off = iemNativeEmitModifyEFlagsBit(pReNative, off, a_fBit, kIemNativeEmitEflOp_Clear);
5213
5214#define IEM_MC_FLIP_EFL_BIT(a_fBit) \
5215 off = iemNativeEmitModifyEFlagsBit(pReNative, off, a_fBit, kIemNativeEmitEflOp_Flip);
5216
5217/** Handles IEM_MC_SET_EFL_BIT/IEM_MC_CLEAR_EFL_BIT/IEM_MC_FLIP_EFL_BIT. */
5218DECL_INLINE_THROW(uint32_t) iemNativeEmitModifyEFlagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fEflBit, IEMNATIVEMITEFLOP enmOp)
5219{
5220 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
5221 kIemNativeGstRegUse_ForUpdate, false /*fNoVolatileRegs*/);
5222
5223 switch (enmOp)
5224 {
5225 case kIemNativeEmitEflOp_Set:
5226 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxEflReg, fEflBit);
5227 break;
5228 case kIemNativeEmitEflOp_Clear:
5229 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~fEflBit);
5230 break;
5231 case kIemNativeEmitEflOp_Flip:
5232 off = iemNativeEmitXorGpr32ByImm(pReNative, off, idxEflReg, fEflBit);
5233 break;
5234 default:
5235 AssertFailed();
5236 break;
5237 }
5238
5239 /** @todo No delayed writeback for EFLAGS right now. */
5240 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
5241
5242 /* Free but don't flush the EFLAGS register. */
5243 iemNativeRegFreeTmp(pReNative, idxEflReg);
5244
5245 return off;
5246}
5247
5248
5249/*********************************************************************************************************************************
5250* Emitters for segment register fetches (IEM_MC_FETCH_SREG_XXX).
5251*********************************************************************************************************************************/
5252
5253#define IEM_MC_FETCH_SREG_U16(a_u16Dst, a_iSReg) \
5254 off = iemNativeEmitFetchSReg(pReNative, off, a_u16Dst, a_iSReg, sizeof(uint16_t))
5255
5256#define IEM_MC_FETCH_SREG_ZX_U32(a_u32Dst, a_iSReg) \
5257 off = iemNativeEmitFetchSReg(pReNative, off, a_u32Dst, a_iSReg, sizeof(uint32_t))
5258
5259#define IEM_MC_FETCH_SREG_ZX_U64(a_u64Dst, a_iSReg) \
5260 off = iemNativeEmitFetchSReg(pReNative, off, a_u64Dst, a_iSReg, sizeof(uint64_t))
5261
5262
5263/** Emits code for IEM_MC_FETCH_SREG_U16, IEM_MC_FETCH_SREG_ZX_U32 and
5264 * IEM_MC_FETCH_SREG_ZX_U64. */
5265DECL_INLINE_THROW(uint32_t)
5266iemNativeEmitFetchSReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iSReg, int8_t cbVar)
5267{
5268 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
5269 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbVar); RT_NOREF(cbVar);
5270 Assert(iSReg < X86_SREG_COUNT);
5271
5272 /*
5273 * For now, we will not create a shadow copy of a selector. The rational
5274 * is that since we do not recompile the popping and loading of segment
5275 * registers and that the the IEM_MC_FETCH_SREG_U* MCs are only used for
5276 * pushing and moving to registers, there is only a small chance that the
5277 * shadow copy will be accessed again before the register is reloaded. One
5278 * scenario would be nested called in 16-bit code, but I doubt it's worth
5279 * the extra register pressure atm.
5280 *
5281 * What we really need first, though, is to combine iemNativeRegAllocTmpForGuestReg
5282 * and iemNativeVarRegisterAcquire for a load scenario. We only got the
5283 * store scencario covered at present (r160730).
5284 */
5285 iemNativeVarSetKindToStack(pReNative, idxDstVar);
5286 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
5287 off = iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aSRegs[iSReg].Sel));
5288 iemNativeVarRegisterRelease(pReNative, idxDstVar);
5289 return off;
5290}
5291
5292
5293
5294/*********************************************************************************************************************************
5295* Register references. *
5296*********************************************************************************************************************************/
5297
5298#define IEM_MC_REF_GREG_U8_THREADED(a_pu8Dst, a_iGRegEx) \
5299 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, false /*fConst*/)
5300
5301#define IEM_MC_REF_GREG_U8_CONST_THREADED(a_pu8Dst, a_iGRegEx) \
5302 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, true /*fConst*/)
5303
5304/** Handles IEM_MC_REF_GREG_U8[_CONST]. */
5305DECL_INLINE_THROW(uint32_t)
5306iemNativeEmitRefGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGRegEx, bool fConst)
5307{
5308 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
5309 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
5310 Assert(iGRegEx < 20);
5311
5312 if (iGRegEx < 16)
5313 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
5314 else
5315 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_GprHighByte, iGRegEx & 15);
5316
5317 /* If we've delayed writing back the register value, flush it now. */
5318 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
5319
5320 /* If it's not a const reference we need to flush the shadow copy of the register now. */
5321 if (!fConst)
5322 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGRegEx & 15)));
5323
5324 return off;
5325}
5326
5327#define IEM_MC_REF_GREG_U16(a_pu16Dst, a_iGReg) \
5328 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, false /*fConst*/)
5329
5330#define IEM_MC_REF_GREG_U16_CONST(a_pu16Dst, a_iGReg) \
5331 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, true /*fConst*/)
5332
5333#define IEM_MC_REF_GREG_U32(a_pu32Dst, a_iGReg) \
5334 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, false /*fConst*/)
5335
5336#define IEM_MC_REF_GREG_U32_CONST(a_pu32Dst, a_iGReg) \
5337 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, true /*fConst*/)
5338
5339#define IEM_MC_REF_GREG_I32(a_pi32Dst, a_iGReg) \
5340 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, false /*fConst*/)
5341
5342#define IEM_MC_REF_GREG_I32_CONST(a_pi32Dst, a_iGReg) \
5343 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, true /*fConst*/)
5344
5345#define IEM_MC_REF_GREG_U64(a_pu64Dst, a_iGReg) \
5346 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, false /*fConst*/)
5347
5348#define IEM_MC_REF_GREG_U64_CONST(a_pu64Dst, a_iGReg) \
5349 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, true /*fConst*/)
5350
5351#define IEM_MC_REF_GREG_I64(a_pi64Dst, a_iGReg) \
5352 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, false /*fConst*/)
5353
5354#define IEM_MC_REF_GREG_I64_CONST(a_pi64Dst, a_iGReg) \
5355 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, true /*fConst*/)
5356
5357/** Handles IEM_MC_REF_GREG_Uxx[_CONST] and IEM_MC_REF_GREG_Ixx[_CONST]. */
5358DECL_INLINE_THROW(uint32_t)
5359iemNativeEmitRefGregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGReg, bool fConst)
5360{
5361 Assert(iGReg < 16);
5362 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGReg);
5363 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
5364
5365 /* If we've delayed writing back the register value, flush it now. */
5366 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGReg);
5367
5368 /* If it's not a const reference we need to flush the shadow copy of the register now. */
5369 if (!fConst)
5370 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGReg)));
5371
5372 return off;
5373}
5374
5375
5376#undef IEM_MC_REF_EFLAGS /* should not be used. */
5377#define IEM_MC_REF_EFLAGS_EX(a_pEFlags, a_fEflInput, a_fEflOutput) \
5378 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
5379 off = iemNativeEmitRefEFlags(pReNative, off, a_pEFlags, a_fEflInput, a_fEflOutput)
5380
5381/** Handles IEM_MC_REF_EFLAGS. */
5382DECL_INLINE_THROW(uint32_t)
5383iemNativeEmitRefEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint32_t fEflInput, uint32_t fEflOutput)
5384{
5385 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_EFlags, 0);
5386 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
5387
5388#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
5389 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fEflInput);
5390
5391 /* Updating the skipping according to the outputs is a little early, but
5392 we don't have any other hooks for references atm. */
5393 if ((fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
5394 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
5395 else if (fEflOutput & X86_EFL_STATUS_BITS)
5396 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~(fEflOutput & X86_EFL_STATUS_BITS),
5397 RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
5398#else
5399 RT_NOREF(fEflInput, fEflOutput);
5400#endif
5401
5402 /* If we've delayed writing back the register value, flush it now. */
5403 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_EFlags, 0);
5404
5405 /* If there is a shadow copy of guest EFLAGS, flush it now. */
5406 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_EFlags));
5407
5408 return off;
5409}
5410
5411
5412/** @todo Emit code for IEM_MC_ASSERT_EFLAGS in strict builds? Once we emit
5413 * different code from threaded recompiler, maybe it would be helpful. For now
5414 * we assume the threaded recompiler catches any incorrect EFLAGS delcarations. */
5415#define IEM_MC_ASSERT_EFLAGS(a_fEflInput, a_fEflOutput) ((void)0)
5416
5417
5418#define IEM_MC_REF_XREG_U128(a_pu128Dst, a_iXReg) \
5419 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, false /*fConst*/)
5420
5421#define IEM_MC_REF_XREG_U128_CONST(a_pu128Dst, a_iXReg) \
5422 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, true /*fConst*/)
5423
5424#define IEM_MC_REF_XREG_XMM_CONST(a_pXmmDst, a_iXReg) \
5425 off = iemNativeEmitRefXregXxx(pReNative, off, a_pXmmDst, a_iXReg, true /*fConst*/)
5426
5427#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5428/* Just being paranoid here. */
5429# ifndef _MSC_VER /* MSC can't compile this, doesn't like [0]. Added reduced version afterwards. */
5430AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].au64[0]);
5431AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].au32[0]);
5432AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].ar64[0]);
5433AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].ar32[0]);
5434# endif
5435AssertCompileMemberOffset(X86XMMREG, au64, 0);
5436AssertCompileMemberOffset(X86XMMREG, au32, 0);
5437AssertCompileMemberOffset(X86XMMREG, ar64, 0);
5438AssertCompileMemberOffset(X86XMMREG, ar32, 0);
5439
5440# define IEM_MC_REF_XREG_U32_CONST(a_pu32Dst, a_iXReg) \
5441 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu32Dst, a_iXReg, true /*fConst*/)
5442# define IEM_MC_REF_XREG_U64_CONST(a_pu64Dst, a_iXReg) \
5443 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu64Dst, a_iXReg, true /*fConst*/)
5444# define IEM_MC_REF_XREG_R32_CONST(a_pr32Dst, a_iXReg) \
5445 off = iemNativeEmitRefXregXxx(pReNative, off, a_pr32Dst, a_iXReg, true /*fConst*/)
5446# define IEM_MC_REF_XREG_R64_CONST(a_pr64Dst, a_iXReg) \
5447 off = iemNativeEmitRefXregXxx(pReNative, off, a_pr64Dst, a_iXReg, true /*fConst*/)
5448#endif
5449
5450/** Handles IEM_MC_REF_XREG_xxx[_CONST]. */
5451DECL_INLINE_THROW(uint32_t)
5452iemNativeEmitRefXregXxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iXReg, bool fConst)
5453{
5454 Assert(iXReg < 16);
5455 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_XReg, iXReg);
5456 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
5457
5458 /* If we've delayed writing back the register value, flush it now. */
5459 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_XReg, iXReg);
5460
5461#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5462 /* If it's not a const reference we need to flush the shadow copy of the register now. */
5463 if (!fConst)
5464 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(iXReg)));
5465#else
5466 RT_NOREF(fConst);
5467#endif
5468
5469 return off;
5470}
5471
5472
5473
5474/*********************************************************************************************************************************
5475* Effective Address Calculation *
5476*********************************************************************************************************************************/
5477#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_16(a_GCPtrEff, a_bRm, a_u16Disp) \
5478 off = iemNativeEmitCalcRmEffAddrThreadedAddr16(pReNative, off, a_bRm, a_u16Disp, a_GCPtrEff)
5479
5480/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_16.
5481 * @sa iemOpHlpCalcRmEffAddrThreadedAddr16 */
5482DECL_INLINE_THROW(uint32_t)
5483iemNativeEmitCalcRmEffAddrThreadedAddr16(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5484 uint8_t bRm, uint16_t u16Disp, uint8_t idxVarRet)
5485{
5486 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
5487
5488 /*
5489 * Handle the disp16 form with no registers first.
5490 *
5491 * Convert to an immediate value, as that'll delay the register allocation
5492 * and assignment till the memory access / call / whatever and we can use
5493 * a more appropriate register (or none at all).
5494 */
5495 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
5496 {
5497 iemNativeVarSetKindToConst(pReNative, idxVarRet, u16Disp);
5498 return off;
5499 }
5500
5501 /* Determin the displacment. */
5502 uint16_t u16EffAddr;
5503 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
5504 {
5505 case 0: u16EffAddr = 0; break;
5506 case 1: u16EffAddr = (int16_t)(int8_t)u16Disp; break;
5507 case 2: u16EffAddr = u16Disp; break;
5508 default: AssertFailedStmt(u16EffAddr = 0);
5509 }
5510
5511 /* Determine the registers involved. */
5512 uint8_t idxGstRegBase;
5513 uint8_t idxGstRegIndex;
5514 switch (bRm & X86_MODRM_RM_MASK)
5515 {
5516 case 0:
5517 idxGstRegBase = X86_GREG_xBX;
5518 idxGstRegIndex = X86_GREG_xSI;
5519 break;
5520 case 1:
5521 idxGstRegBase = X86_GREG_xBX;
5522 idxGstRegIndex = X86_GREG_xDI;
5523 break;
5524 case 2:
5525 idxGstRegBase = X86_GREG_xBP;
5526 idxGstRegIndex = X86_GREG_xSI;
5527 break;
5528 case 3:
5529 idxGstRegBase = X86_GREG_xBP;
5530 idxGstRegIndex = X86_GREG_xDI;
5531 break;
5532 case 4:
5533 idxGstRegBase = X86_GREG_xSI;
5534 idxGstRegIndex = UINT8_MAX;
5535 break;
5536 case 5:
5537 idxGstRegBase = X86_GREG_xDI;
5538 idxGstRegIndex = UINT8_MAX;
5539 break;
5540 case 6:
5541 idxGstRegBase = X86_GREG_xBP;
5542 idxGstRegIndex = UINT8_MAX;
5543 break;
5544#ifdef _MSC_VER /* lazy compiler, thinks idxGstRegBase and idxGstRegIndex may otherwise be used uninitialized. */
5545 default:
5546#endif
5547 case 7:
5548 idxGstRegBase = X86_GREG_xBX;
5549 idxGstRegIndex = UINT8_MAX;
5550 break;
5551 }
5552
5553 /*
5554 * Now emit code that calculates: idxRegRet = (uint16_t)(u16EffAddr + idxGstRegBase [+ idxGstRegIndex])
5555 */
5556 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
5557 uint8_t const idxRegBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
5558 kIemNativeGstRegUse_ReadOnly);
5559 uint8_t const idxRegIndex = idxGstRegIndex != UINT8_MAX
5560 ? iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
5561 kIemNativeGstRegUse_ReadOnly)
5562 : UINT8_MAX;
5563#ifdef RT_ARCH_AMD64
5564 if (idxRegIndex == UINT8_MAX)
5565 {
5566 if (u16EffAddr == 0)
5567 {
5568 /* movxz ret, base */
5569 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegRet, idxRegBase);
5570 }
5571 else
5572 {
5573 /* lea ret32, [base64 + disp32] */
5574 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
5575 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
5576 if (idxRegRet >= 8 || idxRegBase >= 8)
5577 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
5578 pbCodeBuf[off++] = 0x8d;
5579 if (idxRegBase != X86_GREG_x12 /*SIB*/)
5580 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, idxRegBase & 7);
5581 else
5582 {
5583 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, 4 /*SIB*/);
5584 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
5585 }
5586 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
5587 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
5588 pbCodeBuf[off++] = 0;
5589 pbCodeBuf[off++] = 0;
5590 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5591
5592 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
5593 }
5594 }
5595 else
5596 {
5597 /* lea ret32, [index64 + base64 (+ disp32)] */
5598 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
5599 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
5600 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
5601 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
5602 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
5603 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
5604 pbCodeBuf[off++] = 0x8d;
5605 uint8_t const bMod = u16EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0 : X86_MOD_MEM4;
5606 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
5607 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, 0);
5608 if (bMod == X86_MOD_MEM4)
5609 {
5610 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
5611 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
5612 pbCodeBuf[off++] = 0;
5613 pbCodeBuf[off++] = 0;
5614 }
5615 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5616 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
5617 }
5618
5619#elif defined(RT_ARCH_ARM64)
5620 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
5621 if (u16EffAddr == 0)
5622 {
5623 if (idxRegIndex == UINT8_MAX)
5624 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegBase);
5625 else
5626 {
5627 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex, false /*f64Bit*/);
5628 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
5629 }
5630 }
5631 else
5632 {
5633 if ((int16_t)u16EffAddr < 4096 && (int16_t)u16EffAddr >= 0)
5634 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u16EffAddr, false /*f64Bit*/);
5635 else if ((int16_t)u16EffAddr > -4096 && (int16_t)u16EffAddr < 0)
5636 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
5637 (uint16_t)-(int16_t)u16EffAddr, false /*f64Bit*/);
5638 else
5639 {
5640 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegRet, u16EffAddr);
5641 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
5642 }
5643 if (idxRegIndex != UINT8_MAX)
5644 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex, false /*f64Bit*/);
5645 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
5646 }
5647
5648#else
5649# error "port me"
5650#endif
5651
5652 if (idxRegIndex != UINT8_MAX)
5653 iemNativeRegFreeTmp(pReNative, idxRegIndex);
5654 iemNativeRegFreeTmp(pReNative, idxRegBase);
5655 iemNativeVarRegisterRelease(pReNative, idxVarRet);
5656 return off;
5657}
5658
5659
5660#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_32(a_GCPtrEff, a_bRm, a_uSibAndRspOffset, a_u32Disp) \
5661 off = iemNativeEmitCalcRmEffAddrThreadedAddr32(pReNative, off, a_bRm, a_uSibAndRspOffset, a_u32Disp, a_GCPtrEff)
5662
5663/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_32.
5664 * @see iemOpHlpCalcRmEffAddrThreadedAddr32 */
5665DECL_INLINE_THROW(uint32_t)
5666iemNativeEmitCalcRmEffAddrThreadedAddr32(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5667 uint8_t bRm, uint32_t uSibAndRspOffset, uint32_t u32Disp, uint8_t idxVarRet)
5668{
5669 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
5670
5671 /*
5672 * Handle the disp32 form with no registers first.
5673 *
5674 * Convert to an immediate value, as that'll delay the register allocation
5675 * and assignment till the memory access / call / whatever and we can use
5676 * a more appropriate register (or none at all).
5677 */
5678 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
5679 {
5680 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32Disp);
5681 return off;
5682 }
5683
5684 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
5685 uint32_t u32EffAddr = 0;
5686 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
5687 {
5688 case 0: break;
5689 case 1: u32EffAddr = (int8_t)u32Disp; break;
5690 case 2: u32EffAddr = u32Disp; break;
5691 default: AssertFailed();
5692 }
5693
5694 /* Get the register (or SIB) value. */
5695 uint8_t idxGstRegBase = UINT8_MAX;
5696 uint8_t idxGstRegIndex = UINT8_MAX;
5697 uint8_t cShiftIndex = 0;
5698 switch (bRm & X86_MODRM_RM_MASK)
5699 {
5700 case 0: idxGstRegBase = X86_GREG_xAX; break;
5701 case 1: idxGstRegBase = X86_GREG_xCX; break;
5702 case 2: idxGstRegBase = X86_GREG_xDX; break;
5703 case 3: idxGstRegBase = X86_GREG_xBX; break;
5704 case 4: /* SIB */
5705 {
5706 /* index /w scaling . */
5707 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
5708 switch ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
5709 {
5710 case 0: idxGstRegIndex = X86_GREG_xAX; break;
5711 case 1: idxGstRegIndex = X86_GREG_xCX; break;
5712 case 2: idxGstRegIndex = X86_GREG_xDX; break;
5713 case 3: idxGstRegIndex = X86_GREG_xBX; break;
5714 case 4: cShiftIndex = 0; /*no index*/ break;
5715 case 5: idxGstRegIndex = X86_GREG_xBP; break;
5716 case 6: idxGstRegIndex = X86_GREG_xSI; break;
5717 case 7: idxGstRegIndex = X86_GREG_xDI; break;
5718 }
5719
5720 /* base */
5721 switch (uSibAndRspOffset & X86_SIB_BASE_MASK)
5722 {
5723 case 0: idxGstRegBase = X86_GREG_xAX; break;
5724 case 1: idxGstRegBase = X86_GREG_xCX; break;
5725 case 2: idxGstRegBase = X86_GREG_xDX; break;
5726 case 3: idxGstRegBase = X86_GREG_xBX; break;
5727 case 4:
5728 idxGstRegBase = X86_GREG_xSP;
5729 u32EffAddr += uSibAndRspOffset >> 8;
5730 break;
5731 case 5:
5732 if ((bRm & X86_MODRM_MOD_MASK) != 0)
5733 idxGstRegBase = X86_GREG_xBP;
5734 else
5735 {
5736 Assert(u32EffAddr == 0);
5737 u32EffAddr = u32Disp;
5738 }
5739 break;
5740 case 6: idxGstRegBase = X86_GREG_xSI; break;
5741 case 7: idxGstRegBase = X86_GREG_xDI; break;
5742 }
5743 break;
5744 }
5745 case 5: idxGstRegBase = X86_GREG_xBP; break;
5746 case 6: idxGstRegBase = X86_GREG_xSI; break;
5747 case 7: idxGstRegBase = X86_GREG_xDI; break;
5748 }
5749
5750 /*
5751 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
5752 * the start of the function.
5753 */
5754 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
5755 {
5756 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32EffAddr);
5757 return off;
5758 }
5759
5760 /*
5761 * Now emit code that calculates: idxRegRet = (uint32_t)(u32EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
5762 */
5763 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
5764 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
5765 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
5766 kIemNativeGstRegUse_ReadOnly);
5767 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
5768 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
5769 kIemNativeGstRegUse_ReadOnly);
5770
5771 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
5772 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
5773 {
5774 idxRegBase = idxRegIndex;
5775 idxRegIndex = UINT8_MAX;
5776 }
5777
5778#ifdef RT_ARCH_AMD64
5779 if (idxRegIndex == UINT8_MAX)
5780 {
5781 if (u32EffAddr == 0)
5782 {
5783 /* mov ret, base */
5784 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
5785 }
5786 else
5787 {
5788 /* lea ret32, [base64 + disp32] */
5789 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
5790 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
5791 if (idxRegRet >= 8 || idxRegBase >= 8)
5792 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
5793 pbCodeBuf[off++] = 0x8d;
5794 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
5795 if (idxRegBase != X86_GREG_x12 /*SIB*/)
5796 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
5797 else
5798 {
5799 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
5800 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
5801 }
5802 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
5803 if (bMod == X86_MOD_MEM4)
5804 {
5805 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
5806 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
5807 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
5808 }
5809 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5810 }
5811 }
5812 else
5813 {
5814 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
5815 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
5816 if (idxRegBase == UINT8_MAX)
5817 {
5818 /* lea ret32, [(index64 << cShiftIndex) + disp32] */
5819 if (idxRegRet >= 8 || idxRegIndex >= 8)
5820 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
5821 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
5822 pbCodeBuf[off++] = 0x8d;
5823 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
5824 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
5825 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
5826 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
5827 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
5828 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
5829 }
5830 else
5831 {
5832 /* lea ret32, [(index64 << cShiftIndex) + base64 (+ disp32)] */
5833 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
5834 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
5835 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
5836 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
5837 pbCodeBuf[off++] = 0x8d;
5838 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
5839 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
5840 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
5841 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
5842 if (bMod != X86_MOD_MEM0)
5843 {
5844 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
5845 if (bMod == X86_MOD_MEM4)
5846 {
5847 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
5848 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
5849 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
5850 }
5851 }
5852 }
5853 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5854 }
5855
5856#elif defined(RT_ARCH_ARM64)
5857 if (u32EffAddr == 0)
5858 {
5859 if (idxRegIndex == UINT8_MAX)
5860 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
5861 else if (idxRegBase == UINT8_MAX)
5862 {
5863 if (cShiftIndex == 0)
5864 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegIndex);
5865 else
5866 {
5867 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5868 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, false /*f64Bit*/);
5869 }
5870 }
5871 else
5872 {
5873 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5874 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
5875 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
5876 }
5877 }
5878 else
5879 {
5880 if ((int32_t)u32EffAddr < 4096 && (int32_t)u32EffAddr >= 0 && idxRegBase != UINT8_MAX)
5881 {
5882 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5883 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u32EffAddr, false /*f64Bit*/);
5884 }
5885 else if ((int32_t)u32EffAddr > -4096 && (int32_t)u32EffAddr < 0 && idxRegBase != UINT8_MAX)
5886 {
5887 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5888 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
5889 (uint32_t)-(int32_t)u32EffAddr, false /*f64Bit*/);
5890 }
5891 else
5892 {
5893 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, u32EffAddr);
5894 if (idxRegBase != UINT8_MAX)
5895 {
5896 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5897 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
5898 }
5899 }
5900 if (idxRegIndex != UINT8_MAX)
5901 {
5902 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5903 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
5904 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
5905 }
5906 }
5907
5908#else
5909# error "port me"
5910#endif
5911
5912 if (idxRegIndex != UINT8_MAX)
5913 iemNativeRegFreeTmp(pReNative, idxRegIndex);
5914 if (idxRegBase != UINT8_MAX)
5915 iemNativeRegFreeTmp(pReNative, idxRegBase);
5916 iemNativeVarRegisterRelease(pReNative, idxVarRet);
5917 return off;
5918}
5919
5920
5921#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
5922 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
5923 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
5924
5925#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_FSGS(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
5926 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
5927 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
5928
5929#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_ADDR32(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
5930 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
5931 a_u32Disp, a_cbImm, a_GCPtrEff, false /*f64Bit*/)
5932
5933/**
5934 * Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_64*.
5935 *
5936 * @returns New off.
5937 * @param pReNative .
5938 * @param off .
5939 * @param bRmEx The ModRM byte but with bit 3 set to REX.B and
5940 * bit 4 to REX.X. The two bits are part of the
5941 * REG sub-field, which isn't needed in this
5942 * function.
5943 * @param uSibAndRspOffset Two parts:
5944 * - The first 8 bits make up the SIB byte.
5945 * - The next 8 bits are the fixed RSP/ESP offset
5946 * in case of a pop [xSP].
5947 * @param u32Disp The displacement byte/word/dword, if any.
5948 * @param cbInstr The size of the fully decoded instruction. Used
5949 * for RIP relative addressing.
5950 * @param idxVarRet The result variable number.
5951 * @param f64Bit Whether to use a 64-bit or 32-bit address size
5952 * when calculating the address.
5953 *
5954 * @see iemOpHlpCalcRmEffAddrThreadedAddr64
5955 */
5956DECL_INLINE_THROW(uint32_t)
5957iemNativeEmitCalcRmEffAddrThreadedAddr64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bRmEx, uint32_t uSibAndRspOffset,
5958 uint32_t u32Disp, uint8_t cbInstr, uint8_t idxVarRet, bool f64Bit)
5959{
5960 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
5961
5962 /*
5963 * Special case the rip + disp32 form first.
5964 */
5965 if ((bRmEx & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
5966 {
5967#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5968 /* Need to take the current PC offset into account for the displacement, no need to flush here
5969 * as the PC is only accessed readonly and there is no branching or calling helpers involved. */
5970 u32Disp += pReNative->Core.offPc;
5971#endif
5972
5973 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
5974 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
5975 kIemNativeGstRegUse_ReadOnly);
5976#ifdef RT_ARCH_AMD64
5977 if (f64Bit)
5978 {
5979 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr;
5980 if ((int32_t)offFinalDisp == offFinalDisp)
5981 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)offFinalDisp);
5982 else
5983 {
5984 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp);
5985 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, cbInstr);
5986 }
5987 }
5988 else
5989 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp + cbInstr);
5990
5991#elif defined(RT_ARCH_ARM64)
5992 if (f64Bit)
5993 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
5994 (int64_t)(int32_t)u32Disp + cbInstr);
5995 else
5996 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
5997 (int32_t)u32Disp + cbInstr);
5998
5999#else
6000# error "Port me!"
6001#endif
6002 iemNativeRegFreeTmp(pReNative, idxRegPc);
6003 iemNativeVarRegisterRelease(pReNative, idxVarRet);
6004 return off;
6005 }
6006
6007 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
6008 int64_t i64EffAddr = 0;
6009 switch ((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
6010 {
6011 case 0: break;
6012 case 1: i64EffAddr = (int8_t)u32Disp; break;
6013 case 2: i64EffAddr = (int32_t)u32Disp; break;
6014 default: AssertFailed();
6015 }
6016
6017 /* Get the register (or SIB) value. */
6018 uint8_t idxGstRegBase = UINT8_MAX;
6019 uint8_t idxGstRegIndex = UINT8_MAX;
6020 uint8_t cShiftIndex = 0;
6021 if ((bRmEx & X86_MODRM_RM_MASK) != 4)
6022 idxGstRegBase = bRmEx & (X86_MODRM_RM_MASK | 0x8); /* bRmEx[bit 3] = REX.B */
6023 else /* SIB: */
6024 {
6025 /* index /w scaling . */
6026 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
6027 idxGstRegIndex = ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
6028 | ((bRmEx & 0x10) >> 1); /* bRmEx[bit 4] = REX.X */
6029 if (idxGstRegIndex == 4)
6030 {
6031 /* no index */
6032 cShiftIndex = 0;
6033 idxGstRegIndex = UINT8_MAX;
6034 }
6035
6036 /* base */
6037 idxGstRegBase = (uSibAndRspOffset & X86_SIB_BASE_MASK) | (bRmEx & 0x8); /* bRmEx[bit 3] = REX.B */
6038 if (idxGstRegBase == 4)
6039 {
6040 /* pop [rsp] hack */
6041 i64EffAddr += uSibAndRspOffset >> 8; /* (this is why i64EffAddr must be 64-bit) */
6042 }
6043 else if ( (idxGstRegBase & X86_SIB_BASE_MASK) == 5
6044 && (bRmEx & X86_MODRM_MOD_MASK) == 0)
6045 {
6046 /* mod=0 and base=5 -> disp32, no base reg. */
6047 Assert(i64EffAddr == 0);
6048 i64EffAddr = (int32_t)u32Disp;
6049 idxGstRegBase = UINT8_MAX;
6050 }
6051 }
6052
6053 /*
6054 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
6055 * the start of the function.
6056 */
6057 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
6058 {
6059 if (f64Bit)
6060 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint64_t)i64EffAddr);
6061 else
6062 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint32_t)i64EffAddr);
6063 return off;
6064 }
6065
6066 /*
6067 * Now emit code that calculates:
6068 * idxRegRet = (uint64_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
6069 * or if !f64Bit:
6070 * idxRegRet = (uint32_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
6071 */
6072 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
6073 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
6074 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
6075 kIemNativeGstRegUse_ReadOnly);
6076 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
6077 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
6078 kIemNativeGstRegUse_ReadOnly);
6079
6080 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
6081 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
6082 {
6083 idxRegBase = idxRegIndex;
6084 idxRegIndex = UINT8_MAX;
6085 }
6086
6087#ifdef RT_ARCH_AMD64
6088 uint8_t bFinalAdj;
6089 if (!f64Bit || (int32_t)i64EffAddr == i64EffAddr)
6090 bFinalAdj = 0; /* likely */
6091 else
6092 {
6093 /* pop [rsp] with a problematic disp32 value. Split out the
6094 RSP offset and add it separately afterwards (bFinalAdj). */
6095 /** @todo testcase: pop [rsp] with problematic disp32 (mod4). */
6096 Assert(idxGstRegBase == X86_GREG_xSP);
6097 Assert(((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK) == X86_MOD_MEM4);
6098 bFinalAdj = (uint8_t)(uSibAndRspOffset >> 8);
6099 Assert(bFinalAdj != 0);
6100 i64EffAddr -= bFinalAdj;
6101 Assert((int32_t)i64EffAddr == i64EffAddr);
6102 }
6103 uint32_t const u32EffAddr = (uint32_t)i64EffAddr;
6104//pReNative->pInstrBuf[off++] = 0xcc;
6105
6106 if (idxRegIndex == UINT8_MAX)
6107 {
6108 if (u32EffAddr == 0)
6109 {
6110 /* mov ret, base */
6111 if (f64Bit)
6112 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRet, idxRegBase);
6113 else
6114 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
6115 }
6116 else
6117 {
6118 /* lea ret, [base + disp32] */
6119 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
6120 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6121 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8)
6122 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6123 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
6124 | (f64Bit ? X86_OP_REX_W : 0);
6125 pbCodeBuf[off++] = 0x8d;
6126 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
6127 if (idxRegBase != X86_GREG_x12 /*SIB*/)
6128 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
6129 else
6130 {
6131 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6132 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
6133 }
6134 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6135 if (bMod == X86_MOD_MEM4)
6136 {
6137 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6138 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6139 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6140 }
6141 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6142 }
6143 }
6144 else
6145 {
6146 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
6147 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6148 if (idxRegBase == UINT8_MAX)
6149 {
6150 /* lea ret, [(index64 << cShiftIndex) + disp32] */
6151 if (f64Bit || idxRegRet >= 8 || idxRegIndex >= 8)
6152 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6153 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
6154 | (f64Bit ? X86_OP_REX_W : 0);
6155 pbCodeBuf[off++] = 0x8d;
6156 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
6157 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
6158 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6159 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6160 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6161 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6162 }
6163 else
6164 {
6165 /* lea ret, [(index64 << cShiftIndex) + base64 (+ disp32)] */
6166 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
6167 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6168 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
6169 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
6170 | (f64Bit ? X86_OP_REX_W : 0);
6171 pbCodeBuf[off++] = 0x8d;
6172 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
6173 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
6174 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6175 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
6176 if (bMod != X86_MOD_MEM0)
6177 {
6178 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6179 if (bMod == X86_MOD_MEM4)
6180 {
6181 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6182 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6183 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6184 }
6185 }
6186 }
6187 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6188 }
6189
6190 if (!bFinalAdj)
6191 { /* likely */ }
6192 else
6193 {
6194 Assert(f64Bit);
6195 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, bFinalAdj);
6196 }
6197
6198#elif defined(RT_ARCH_ARM64)
6199 if (i64EffAddr == 0)
6200 {
6201 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6202 if (idxRegIndex == UINT8_MAX)
6203 pu32CodeBuf[off++] = Armv8A64MkInstrMov(idxRegRet, idxRegBase, f64Bit);
6204 else if (idxRegBase != UINT8_MAX)
6205 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
6206 f64Bit, false /*fSetFlags*/, cShiftIndex);
6207 else
6208 {
6209 Assert(cShiftIndex != 0); /* See base = index swap above when shift is 0 and we have no base reg. */
6210 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, f64Bit);
6211 }
6212 }
6213 else
6214 {
6215 if (f64Bit)
6216 { /* likely */ }
6217 else
6218 i64EffAddr = (int32_t)i64EffAddr;
6219
6220 if (i64EffAddr < 4096 && i64EffAddr >= 0 && idxRegBase != UINT8_MAX)
6221 {
6222 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6223 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, i64EffAddr, f64Bit);
6224 }
6225 else if (i64EffAddr > -4096 && i64EffAddr < 0 && idxRegBase != UINT8_MAX)
6226 {
6227 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6228 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase, (uint32_t)-i64EffAddr, f64Bit);
6229 }
6230 else
6231 {
6232 if (f64Bit)
6233 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, i64EffAddr);
6234 else
6235 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, (uint32_t)i64EffAddr);
6236 if (idxRegBase != UINT8_MAX)
6237 {
6238 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6239 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, f64Bit);
6240 }
6241 }
6242 if (idxRegIndex != UINT8_MAX)
6243 {
6244 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6245 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
6246 f64Bit, false /*fSetFlags*/, cShiftIndex);
6247 }
6248 }
6249
6250#else
6251# error "port me"
6252#endif
6253
6254 if (idxRegIndex != UINT8_MAX)
6255 iemNativeRegFreeTmp(pReNative, idxRegIndex);
6256 if (idxRegBase != UINT8_MAX)
6257 iemNativeRegFreeTmp(pReNative, idxRegBase);
6258 iemNativeVarRegisterRelease(pReNative, idxVarRet);
6259 return off;
6260}
6261
6262
6263/*********************************************************************************************************************************
6264* Memory fetches and stores common *
6265*********************************************************************************************************************************/
6266
6267typedef enum IEMNATIVEMITMEMOP
6268{
6269 kIemNativeEmitMemOp_Store = 0,
6270 kIemNativeEmitMemOp_Fetch,
6271 kIemNativeEmitMemOp_Fetch_Zx_U16,
6272 kIemNativeEmitMemOp_Fetch_Zx_U32,
6273 kIemNativeEmitMemOp_Fetch_Zx_U64,
6274 kIemNativeEmitMemOp_Fetch_Sx_U16,
6275 kIemNativeEmitMemOp_Fetch_Sx_U32,
6276 kIemNativeEmitMemOp_Fetch_Sx_U64
6277} IEMNATIVEMITMEMOP;
6278
6279/** Emits code for IEM_MC_FETCH_MEM_U8/16/32/64 and IEM_MC_STORE_MEM_U8/16/32/64,
6280 * and IEM_MC_FETCH_MEM_FLAT_U8/16/32/64 and IEM_MC_STORE_MEM_FLAT_U8/16/32/64
6281 * (with iSegReg = UINT8_MAX). */
6282DECL_INLINE_THROW(uint32_t)
6283iemNativeEmitMemFetchStoreDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue, uint8_t iSegReg,
6284 uint8_t idxVarGCPtrMem, uint8_t cbMem, uint8_t fAlignMask, IEMNATIVEMITMEMOP enmOp,
6285 uintptr_t pfnFunction, uint8_t idxInstr, uint8_t offDisp = 0)
6286{
6287 /*
6288 * Assert sanity.
6289 */
6290 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
6291 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
6292 Assert( enmOp != kIemNativeEmitMemOp_Store
6293 || pVarValue->enmKind == kIemNativeVarKind_Immediate
6294 || pVarValue->enmKind == kIemNativeVarKind_Stack);
6295 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
6296 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
6297 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
6298 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
6299 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
6300 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
6301#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6302 Assert( cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8
6303 || cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U));
6304#else
6305 Assert(cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8);
6306#endif
6307 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
6308#ifdef VBOX_STRICT
6309 if (iSegReg == UINT8_MAX)
6310 {
6311 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
6312 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
6313 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
6314 switch (cbMem)
6315 {
6316 case 1:
6317 Assert( pfnFunction
6318 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU8
6319 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
6320 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
6321 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
6322 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
6323 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16
6324 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32
6325 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64
6326 : UINT64_C(0xc000b000a0009000) ));
6327 break;
6328 case 2:
6329 Assert( pfnFunction
6330 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU16
6331 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
6332 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
6333 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
6334 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32
6335 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64
6336 : UINT64_C(0xc000b000a0009000) ));
6337 break;
6338 case 4:
6339 Assert( pfnFunction
6340 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU32
6341 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
6342 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
6343 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64
6344 : UINT64_C(0xc000b000a0009000) ));
6345 break;
6346 case 8:
6347 Assert( pfnFunction
6348 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU64
6349 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU64
6350 : UINT64_C(0xc000b000a0009000) ));
6351 break;
6352#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6353 case sizeof(RTUINT128U):
6354 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
6355 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128
6356 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse
6357 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc))
6358 || ( enmOp == kIemNativeEmitMemOp_Store
6359 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse
6360 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128NoAc)));
6361 break;
6362 case sizeof(RTUINT256U):
6363 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
6364 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc
6365 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx))
6366 || ( enmOp == kIemNativeEmitMemOp_Store
6367 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256NoAc
6368 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx)));
6369 break;
6370#endif
6371 }
6372 }
6373 else
6374 {
6375 Assert(iSegReg < 6);
6376 switch (cbMem)
6377 {
6378 case 1:
6379 Assert( pfnFunction
6380 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU8
6381 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU8
6382 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8
6383 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8
6384 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8
6385 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16
6386 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32
6387 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64
6388 : UINT64_C(0xc000b000a0009000) ));
6389 break;
6390 case 2:
6391 Assert( pfnFunction
6392 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU16
6393 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU16
6394 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16
6395 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16
6396 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32
6397 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64
6398 : UINT64_C(0xc000b000a0009000) ));
6399 break;
6400 case 4:
6401 Assert( pfnFunction
6402 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU32
6403 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU32
6404 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32
6405 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64
6406 : UINT64_C(0xc000b000a0009000) ));
6407 break;
6408 case 8:
6409 Assert( pfnFunction
6410 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU64
6411 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU64
6412 : UINT64_C(0xc000b000a0009000) ));
6413 break;
6414#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6415 case sizeof(RTUINT128U):
6416 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
6417 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128
6418 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse
6419 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128NoAc))
6420 || ( enmOp == kIemNativeEmitMemOp_Store
6421 && ( pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse
6422 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128NoAc)));
6423 break;
6424 case sizeof(RTUINT256U):
6425 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
6426 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256NoAc
6427 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx))
6428 || ( enmOp == kIemNativeEmitMemOp_Store
6429 && ( pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256NoAc
6430 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx)));
6431 break;
6432#endif
6433 }
6434 }
6435#endif
6436
6437#ifdef VBOX_STRICT
6438 /*
6439 * Check that the fExec flags we've got make sense.
6440 */
6441 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
6442#endif
6443
6444 /*
6445 * To keep things simple we have to commit any pending writes first as we
6446 * may end up making calls.
6447 */
6448 /** @todo we could postpone this till we make the call and reload the
6449 * registers after returning from the call. Not sure if that's sensible or
6450 * not, though. */
6451#ifndef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6452 off = iemNativeRegFlushPendingWrites(pReNative, off);
6453#else
6454 /* The program counter is treated differently for now. */
6455 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc));
6456#endif
6457
6458#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6459 /*
6460 * Move/spill/flush stuff out of call-volatile registers.
6461 * This is the easy way out. We could contain this to the tlb-miss branch
6462 * by saving and restoring active stuff here.
6463 */
6464 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
6465#endif
6466
6467 /*
6468 * Define labels and allocate the result register (trying for the return
6469 * register if we can).
6470 */
6471 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
6472#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6473 uint8_t idxRegValueFetch = UINT8_MAX;
6474
6475 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
6476 idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
6477 : iemNativeVarSimdRegisterAcquire(pReNative, idxVarValue, &off);
6478 else
6479 idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
6480 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
6481 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
6482 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
6483#else
6484 uint8_t const idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
6485 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
6486 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
6487 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
6488#endif
6489 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem, offDisp);
6490
6491#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6492 uint8_t idxRegValueStore = UINT8_MAX;
6493
6494 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
6495 idxRegValueStore = !TlbState.fSkip
6496 && enmOp == kIemNativeEmitMemOp_Store
6497 && pVarValue->enmKind != kIemNativeVarKind_Immediate
6498 ? iemNativeVarSimdRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
6499 : UINT8_MAX;
6500 else
6501 idxRegValueStore = !TlbState.fSkip
6502 && enmOp == kIemNativeEmitMemOp_Store
6503 && pVarValue->enmKind != kIemNativeVarKind_Immediate
6504 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
6505 : UINT8_MAX;
6506
6507#else
6508 uint8_t const idxRegValueStore = !TlbState.fSkip
6509 && enmOp == kIemNativeEmitMemOp_Store
6510 && pVarValue->enmKind != kIemNativeVarKind_Immediate
6511 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
6512 : UINT8_MAX;
6513#endif
6514 uint32_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
6515 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
6516 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
6517 : UINT32_MAX;
6518
6519 /*
6520 * Jump to the TLB lookup code.
6521 */
6522 if (!TlbState.fSkip)
6523 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
6524
6525 /*
6526 * TlbMiss:
6527 *
6528 * Call helper to do the fetching.
6529 * We flush all guest register shadow copies here.
6530 */
6531 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
6532
6533#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6534 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6535#else
6536 RT_NOREF(idxInstr);
6537#endif
6538
6539#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6540 if (pReNative->Core.offPc)
6541 {
6542 /*
6543 * Update the program counter but restore it at the end of the TlbMiss branch.
6544 * This should allow delaying more program counter updates for the TlbLookup and hit paths
6545 * which are hopefully much more frequent, reducing the amount of memory accesses.
6546 */
6547 /* Allocate a temporary PC register. */
6548 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6549
6550 /* Perform the addition and store the result. */
6551 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
6552 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6553
6554 /* Free and flush the PC register. */
6555 iemNativeRegFreeTmp(pReNative, idxPcReg);
6556 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
6557 }
6558#endif
6559
6560#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6561 /* Save variables in volatile registers. */
6562 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
6563 | (idxRegMemResult != UINT8_MAX ? RT_BIT_32(idxRegMemResult) : 0)
6564 | (idxRegValueFetch != UINT8_MAX ? RT_BIT_32(idxRegValueFetch) : 0);
6565 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
6566#endif
6567
6568 /* IEMNATIVE_CALL_ARG2/3_GREG = uValue (idxVarValue) - if store */
6569 uint32_t fVolGregMask = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
6570#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6571 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
6572 {
6573 /*
6574 * For SIMD based variables we pass the reference on the stack for both fetches and stores.
6575 *
6576 * @note There was a register variable assigned to the variable for the TlbLookup case above
6577 * which must not be freed or the value loaded into the register will not be synced into the register
6578 * further down the road because the variable doesn't know it had a variable assigned.
6579 *
6580 * @note For loads it is not required to sync what is in the assigned register with the stack slot
6581 * as it will be overwritten anyway.
6582 */
6583 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
6584 off = iemNativeEmitLoadArgGregWithSimdVarAddrForMemAccess(pReNative, off, idxRegArgValue, idxVarValue,
6585 enmOp == kIemNativeEmitMemOp_Store /*fSyncRegWithStack*/);
6586 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
6587 }
6588 else
6589#endif
6590 if (enmOp == kIemNativeEmitMemOp_Store)
6591 {
6592 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
6593 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, idxRegArgValue, idxVarValue, 0 /*cbAppend*/,
6594#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6595 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
6596#else
6597 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
6598 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
6599#endif
6600 }
6601
6602 /* IEMNATIVE_CALL_ARG1_GREG = GCPtrMem */
6603 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarGCPtrMem, offDisp /*cbAppend*/,
6604#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6605 fVolGregMask);
6606#else
6607 fVolGregMask, true /*fSpilledVarsInvolatileRegs*/);
6608#endif
6609
6610 if (iSegReg != UINT8_MAX)
6611 {
6612 /* IEMNATIVE_CALL_ARG2_GREG = iSegReg */
6613 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
6614 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, iSegReg);
6615 }
6616
6617 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
6618 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6619
6620 /* Done setting up parameters, make the call. */
6621 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
6622
6623 /*
6624 * Put the result in the right register if this is a fetch.
6625 */
6626 if (enmOp != kIemNativeEmitMemOp_Store)
6627 {
6628#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6629 if ( cbMem == sizeof(RTUINT128U)
6630 || cbMem == sizeof(RTUINT256U))
6631 {
6632 Assert(enmOp == kIemNativeEmitMemOp_Fetch);
6633
6634 /* Sync the value on the stack with the host register assigned to the variable. */
6635 off = iemNativeEmitSimdVarSyncStackToRegister(pReNative, off, idxVarValue);
6636 }
6637 else
6638#endif
6639 {
6640 Assert(idxRegValueFetch == pVarValue->idxReg);
6641 if (idxRegValueFetch != IEMNATIVE_CALL_RET_GREG)
6642 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegValueFetch, IEMNATIVE_CALL_RET_GREG);
6643 }
6644 }
6645
6646#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6647 /* Restore variables and guest shadow registers to volatile registers. */
6648 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
6649 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
6650#endif
6651
6652#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6653 if (pReNative->Core.offPc)
6654 {
6655 /*
6656 * Time to restore the program counter to its original value.
6657 */
6658 /* Allocate a temporary PC register. */
6659 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6660
6661 /* Restore the original value. */
6662 off = iemNativeEmitSubGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
6663 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6664
6665 /* Free and flush the PC register. */
6666 iemNativeRegFreeTmp(pReNative, idxPcReg);
6667 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
6668 }
6669#endif
6670
6671#ifdef IEMNATIVE_WITH_TLB_LOOKUP
6672 if (!TlbState.fSkip)
6673 {
6674 /* end of TlbMiss - Jump to the done label. */
6675 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
6676 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
6677
6678 /*
6679 * TlbLookup:
6680 */
6681 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask,
6682 enmOp == kIemNativeEmitMemOp_Store ? IEM_ACCESS_TYPE_WRITE : IEM_ACCESS_TYPE_READ,
6683 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult, offDisp);
6684
6685 /*
6686 * Emit code to do the actual storing / fetching.
6687 */
6688 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
6689# ifdef VBOX_WITH_STATISTICS
6690 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
6691 enmOp == kIemNativeEmitMemOp_Store
6692 ? RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForFetch)
6693 : RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStore));
6694# endif
6695 switch (enmOp)
6696 {
6697 case kIemNativeEmitMemOp_Store:
6698 if (pVarValue->enmKind != kIemNativeVarKind_Immediate)
6699 {
6700 switch (cbMem)
6701 {
6702 case 1:
6703 off = iemNativeEmitStoreGpr8ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
6704 break;
6705 case 2:
6706 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
6707 break;
6708 case 4:
6709 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
6710 break;
6711 case 8:
6712 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
6713 break;
6714#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6715 case sizeof(RTUINT128U):
6716 off = iemNativeEmitStoreVecRegByGprU128Ex(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
6717 break;
6718 case sizeof(RTUINT256U):
6719 off = iemNativeEmitStoreVecRegByGprU256Ex(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
6720 break;
6721#endif
6722 default:
6723 AssertFailed();
6724 }
6725 }
6726 else
6727 {
6728 switch (cbMem)
6729 {
6730 case 1:
6731 off = iemNativeEmitStoreImm8ByGprEx(pCodeBuf, off, (uint8_t)pVarValue->u.uValue,
6732 idxRegMemResult, TlbState.idxReg1);
6733 break;
6734 case 2:
6735 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
6736 idxRegMemResult, TlbState.idxReg1);
6737 break;
6738 case 4:
6739 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
6740 idxRegMemResult, TlbState.idxReg1);
6741 break;
6742 case 8:
6743 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue,
6744 idxRegMemResult, TlbState.idxReg1);
6745 break;
6746 default:
6747 AssertFailed();
6748 }
6749 }
6750 break;
6751
6752 case kIemNativeEmitMemOp_Fetch:
6753 case kIemNativeEmitMemOp_Fetch_Zx_U16:
6754 case kIemNativeEmitMemOp_Fetch_Zx_U32:
6755 case kIemNativeEmitMemOp_Fetch_Zx_U64:
6756 switch (cbMem)
6757 {
6758 case 1:
6759 off = iemNativeEmitLoadGprByGprU8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6760 break;
6761 case 2:
6762 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6763 break;
6764 case 4:
6765 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6766 break;
6767 case 8:
6768 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6769 break;
6770#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6771 case sizeof(RTUINT128U):
6772 /*
6773 * No need to sync back the register with the stack, this is done by the generic variable handling
6774 * code if there is a register assigned to a variable and the stack must be accessed.
6775 */
6776 off = iemNativeEmitLoadVecRegByGprU128Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6777 break;
6778 case sizeof(RTUINT256U):
6779 /*
6780 * No need to sync back the register with the stack, this is done by the generic variable handling
6781 * code if there is a register assigned to a variable and the stack must be accessed.
6782 */
6783 off = iemNativeEmitLoadVecRegByGprU256Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6784 break;
6785#endif
6786 default:
6787 AssertFailed();
6788 }
6789 break;
6790
6791 case kIemNativeEmitMemOp_Fetch_Sx_U16:
6792 Assert(cbMem == 1);
6793 off = iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6794 break;
6795
6796 case kIemNativeEmitMemOp_Fetch_Sx_U32:
6797 Assert(cbMem == 1 || cbMem == 2);
6798 if (cbMem == 1)
6799 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6800 else
6801 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6802 break;
6803
6804 case kIemNativeEmitMemOp_Fetch_Sx_U64:
6805 switch (cbMem)
6806 {
6807 case 1:
6808 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6809 break;
6810 case 2:
6811 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6812 break;
6813 case 4:
6814 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6815 break;
6816 default:
6817 AssertFailed();
6818 }
6819 break;
6820
6821 default:
6822 AssertFailed();
6823 }
6824
6825 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
6826
6827 /*
6828 * TlbDone:
6829 */
6830 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
6831
6832 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
6833
6834# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6835 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
6836 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
6837# endif
6838 }
6839#else
6840 RT_NOREF(fAlignMask, idxLabelTlbMiss);
6841#endif
6842
6843 if (idxRegValueFetch != UINT8_MAX || idxRegValueStore != UINT8_MAX)
6844 iemNativeVarRegisterRelease(pReNative, idxVarValue);
6845 return off;
6846}
6847
6848
6849
6850/*********************************************************************************************************************************
6851* Memory fetches (IEM_MEM_FETCH_XXX). *
6852*********************************************************************************************************************************/
6853
6854/* 8-bit segmented: */
6855#define IEM_MC_FETCH_MEM_U8(a_u8Dst, a_iSeg, a_GCPtrMem) \
6856 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, a_iSeg, a_GCPtrMem, \
6857 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
6858 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
6859
6860#define IEM_MC_FETCH_MEM_U8_ZX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
6861 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
6862 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
6863 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
6864
6865#define IEM_MC_FETCH_MEM_U8_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
6866 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
6867 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
6868 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
6869
6870#define IEM_MC_FETCH_MEM_U8_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
6871 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
6872 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
6873 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
6874
6875#define IEM_MC_FETCH_MEM_U8_SX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
6876 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
6877 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
6878 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16, pCallEntry->idxInstr)
6879
6880#define IEM_MC_FETCH_MEM_U8_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
6881 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
6882 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
6883 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32, pCallEntry->idxInstr)
6884
6885#define IEM_MC_FETCH_MEM_U8_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
6886 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
6887 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
6888 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64, pCallEntry->idxInstr)
6889
6890/* 16-bit segmented: */
6891#define IEM_MC_FETCH_MEM_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
6892 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
6893 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
6894 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
6895
6896#define IEM_MC_FETCH_MEM_U16_DISP(a_u16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
6897 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
6898 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
6899 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr, a_offDisp)
6900
6901#define IEM_MC_FETCH_MEM_U16_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
6902 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
6903 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
6904 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
6905
6906#define IEM_MC_FETCH_MEM_U16_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
6907 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
6908 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
6909 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
6910
6911#define IEM_MC_FETCH_MEM_U16_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
6912 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
6913 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
6914 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
6915
6916#define IEM_MC_FETCH_MEM_U16_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
6917 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
6918 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
6919 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64, pCallEntry->idxInstr)
6920
6921
6922/* 32-bit segmented: */
6923#define IEM_MC_FETCH_MEM_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
6924 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
6925 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
6926 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
6927
6928#define IEM_MC_FETCH_MEM_U32_DISP(a_u32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
6929 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
6930 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
6931 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
6932
6933#define IEM_MC_FETCH_MEM_U32_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
6934 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
6935 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
6936 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
6937
6938#define IEM_MC_FETCH_MEM_U32_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
6939 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
6940 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
6941 (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64, pCallEntry->idxInstr)
6942
6943#define IEM_MC_FETCH_MEM_I16(a_i16Dst, a_iSeg, a_GCPtrMem) \
6944 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i16Dst, a_iSeg, a_GCPtrMem, \
6945 sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch, \
6946 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
6947
6948#define IEM_MC_FETCH_MEM_I32(a_i32Dst, a_iSeg, a_GCPtrMem) \
6949 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i32Dst, a_iSeg, a_GCPtrMem, \
6950 sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, \
6951 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
6952
6953#define IEM_MC_FETCH_MEM_I64(a_i64Dst, a_iSeg, a_GCPtrMem) \
6954 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i64Dst, a_iSeg, a_GCPtrMem, \
6955 sizeof(int64_t), sizeof(int64_t) - 1, kIemNativeEmitMemOp_Fetch, \
6956 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
6957
6958AssertCompileSize(RTFLOAT32U, sizeof(uint32_t));
6959#define IEM_MC_FETCH_MEM_R32(a_r32Dst, a_iSeg, a_GCPtrMem) \
6960 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r32Dst, a_iSeg, a_GCPtrMem, \
6961 sizeof(RTFLOAT32U), sizeof(RTFLOAT32U) - 1, kIemNativeEmitMemOp_Fetch, \
6962 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
6963
6964
6965/* 64-bit segmented: */
6966#define IEM_MC_FETCH_MEM_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
6967 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
6968 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
6969 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
6970
6971AssertCompileSize(RTFLOAT64U, sizeof(uint64_t));
6972#define IEM_MC_FETCH_MEM_R64(a_r64Dst, a_iSeg, a_GCPtrMem) \
6973 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r64Dst, a_iSeg, a_GCPtrMem, \
6974 sizeof(RTFLOAT64U), sizeof(RTFLOAT64U) - 1, kIemNativeEmitMemOp_Fetch, \
6975 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
6976
6977
6978/* 8-bit flat: */
6979#define IEM_MC_FETCH_MEM_FLAT_U8(a_u8Dst, a_GCPtrMem) \
6980 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, UINT8_MAX, a_GCPtrMem, \
6981 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
6982 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
6983
6984#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U16(a_u16Dst, a_GCPtrMem) \
6985 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
6986 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
6987 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
6988
6989#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U32(a_u32Dst, a_GCPtrMem) \
6990 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
6991 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
6992 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
6993
6994#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U64(a_u64Dst, a_GCPtrMem) \
6995 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
6996 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
6997 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
6998
6999#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U16(a_u16Dst, a_GCPtrMem) \
7000 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
7001 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
7002 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16, pCallEntry->idxInstr)
7003
7004#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U32(a_u32Dst, a_GCPtrMem) \
7005 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7006 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7007 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32, pCallEntry->idxInstr)
7008
7009#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U64(a_u64Dst, a_GCPtrMem) \
7010 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7011 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7012 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64, pCallEntry->idxInstr)
7013
7014
7015/* 16-bit flat: */
7016#define IEM_MC_FETCH_MEM_FLAT_U16(a_u16Dst, a_GCPtrMem) \
7017 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
7018 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
7019 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
7020
7021#define IEM_MC_FETCH_MEM_FLAT_U16_DISP(a_u16Dst, a_GCPtrMem, a_offDisp) \
7022 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
7023 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
7024 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr, a_offDisp)
7025
7026#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U32(a_u32Dst, a_GCPtrMem) \
7027 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7028 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
7029 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
7030
7031#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U64(a_u64Dst, a_GCPtrMem) \
7032 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7033 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7034 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
7035
7036#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U32(a_u32Dst, a_GCPtrMem) \
7037 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7038 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7039 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7040
7041#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U64(a_u64Dst, a_GCPtrMem) \
7042 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7043 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7044 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64, pCallEntry->idxInstr)
7045
7046/* 32-bit flat: */
7047#define IEM_MC_FETCH_MEM_FLAT_U32(a_u32Dst, a_GCPtrMem) \
7048 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7049 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7050 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7051
7052#define IEM_MC_FETCH_MEM_FLAT_U32_DISP(a_u32Dst, a_GCPtrMem, a_offDisp) \
7053 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7054 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7055 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7056
7057#define IEM_MC_FETCH_MEM_FLAT_U32_ZX_U64(a_u64Dst, a_GCPtrMem) \
7058 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7059 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7060 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7061
7062#define IEM_MC_FETCH_MEM_FLAT_U32_SX_U64(a_u64Dst, a_GCPtrMem) \
7063 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7064 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7065 (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64, pCallEntry->idxInstr)
7066
7067#define IEM_MC_FETCH_MEM_FLAT_I16(a_i32Dst, a_GCPtrMem) \
7068 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i16Dst, UINT8_MAX, a_GCPtrMem, \
7069 sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch, \
7070 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7071
7072#define IEM_MC_FETCH_MEM_FLAT_I32(a_i32Dst, a_GCPtrMem) \
7073 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i32Dst, UINT8_MAX, a_GCPtrMem, \
7074 sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7075 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7076
7077#define IEM_MC_FETCH_MEM_FLAT_I64(a_i64Dst, a_GCPtrMem) \
7078 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i64Dst, UINT8_MAX, a_GCPtrMem, \
7079 sizeof(int64_t), sizeof(int64_t) - 1, kIemNativeEmitMemOp_Fetch, \
7080 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
7081
7082#define IEM_MC_FETCH_MEM_FLAT_R32(a_r32Dst, a_GCPtrMem) \
7083 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r32Dst, UINT8_MAX, a_GCPtrMem, \
7084 sizeof(RTFLOAT32U), sizeof(RTFLOAT32U) - 1, kIemNativeEmitMemOp_Fetch, \
7085 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7086
7087
7088/* 64-bit flat: */
7089#define IEM_MC_FETCH_MEM_FLAT_U64(a_u64Dst, a_GCPtrMem) \
7090 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7091 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
7092 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
7093
7094#define IEM_MC_FETCH_MEM_FLAT_R64(a_r64Dst, a_GCPtrMem) \
7095 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r64Dst, UINT8_MAX, a_GCPtrMem, \
7096 sizeof(RTFLOAT64U), sizeof(RTFLOAT64U) - 1, kIemNativeEmitMemOp_Fetch, \
7097 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
7098
7099#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7100/* 128-bit segmented: */
7101#define IEM_MC_FETCH_MEM_U128(a_u128Dst, a_iSeg, a_GCPtrMem) \
7102 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
7103 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
7104 (uintptr_t)iemNativeHlpMemFetchDataU128, pCallEntry->idxInstr)
7105
7106#define IEM_MC_FETCH_MEM_U128_ALIGN_SSE(a_u128Dst, a_iSeg, a_GCPtrMem) \
7107 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
7108 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
7109 (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse, pCallEntry->idxInstr)
7110
7111AssertCompileSize(X86XMMREG, sizeof(RTUINT128U));
7112#define IEM_MC_FETCH_MEM_XMM_ALIGN_SSE(a_uXmmDst, a_iSeg, a_GCPtrMem) \
7113 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uXmmDst, a_iSeg, a_GCPtrMem, \
7114 sizeof(X86XMMREG), sizeof(X86XMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
7115 (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse, pCallEntry->idxInstr)
7116
7117#define IEM_MC_FETCH_MEM_U128_NO_AC(a_u128Dst, a_iSeg, a_GCPtrMem) \
7118 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
7119 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
7120 (uintptr_t)iemNativeHlpMemFetchDataU128NoAc, pCallEntry->idxInstr)
7121
7122/* 128-bit flat: */
7123#define IEM_MC_FETCH_MEM_FLAT_U128(a_u128Dst, a_GCPtrMem) \
7124 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, \
7125 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
7126 (uintptr_t)iemNativeHlpMemFlatFetchDataU128, pCallEntry->idxInstr)
7127
7128#define IEM_MC_FETCH_MEM_FLAT_U128_ALIGN_SSE(a_u128Dst, a_GCPtrMem) \
7129 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, \
7130 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
7131 (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse, pCallEntry->idxInstr)
7132
7133#define IEM_MC_FETCH_MEM_FLAT_XMM_ALIGN_SSE(a_uXmmDst, a_GCPtrMem) \
7134 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uXmmDst, UINT8_MAX, a_GCPtrMem, \
7135 sizeof(X86XMMREG), sizeof(X86XMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
7136 (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse, pCallEntry->idxInstr)
7137
7138#define IEM_MC_FETCH_MEM_FLAT_U128_NO_AC(a_u128Dst, a_GCPtrMem) \
7139 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, \
7140 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
7141 (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc, pCallEntry->idxInstr)
7142
7143/* 256-bit segmented: */
7144#define IEM_MC_FETCH_MEM_U256(a_u256Dst, a_iSeg, a_GCPtrMem) \
7145 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
7146 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
7147 (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
7148
7149#define IEM_MC_FETCH_MEM_U256_NO_AC(a_u256Dst, a_iSeg, a_GCPtrMem) \
7150 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
7151 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
7152 (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
7153
7154#define IEM_MC_FETCH_MEM_U256_ALIGN_AVX(a_u256Dst, a_iSeg, a_GCPtrMem) \
7155 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
7156 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
7157 (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx, pCallEntry->idxInstr)
7158
7159
7160/* 256-bit flat: */
7161#define IEM_MC_FETCH_MEM_FLAT_U256(a_u256Dst, a_GCPtrMem) \
7162 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, \
7163 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
7164 (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
7165
7166#define IEM_MC_FETCH_MEM_FLAT_U256_NO_AC(a_u256Dst, a_GCPtrMem) \
7167 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, \
7168 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
7169 (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
7170
7171#define IEM_MC_FETCH_MEM_FLAT_U256_ALIGN_AVX(a_u256Dst, a_GCPtrMem) \
7172 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, \
7173 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
7174 (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx, pCallEntry->idxInstr)
7175#endif
7176
7177
7178/*********************************************************************************************************************************
7179* Memory stores (IEM_MEM_STORE_XXX). *
7180*********************************************************************************************************************************/
7181
7182#define IEM_MC_STORE_MEM_U8(a_iSeg, a_GCPtrMem, a_u8Value) \
7183 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, a_iSeg, a_GCPtrMem, \
7184 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
7185 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
7186
7187#define IEM_MC_STORE_MEM_U16(a_iSeg, a_GCPtrMem, a_u16Value) \
7188 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, a_iSeg, a_GCPtrMem, \
7189 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
7190 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
7191
7192#define IEM_MC_STORE_MEM_U32(a_iSeg, a_GCPtrMem, a_u32Value) \
7193 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, a_iSeg, a_GCPtrMem, \
7194 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
7195 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
7196
7197#define IEM_MC_STORE_MEM_U64(a_iSeg, a_GCPtrMem, a_u64Value) \
7198 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, a_iSeg, a_GCPtrMem, \
7199 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
7200 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
7201
7202
7203#define IEM_MC_STORE_MEM_FLAT_U8(a_GCPtrMem, a_u8Value) \
7204 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, UINT8_MAX, a_GCPtrMem, \
7205 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
7206 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
7207
7208#define IEM_MC_STORE_MEM_FLAT_U16(a_GCPtrMem, a_u16Value) \
7209 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, UINT8_MAX, a_GCPtrMem, \
7210 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
7211 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
7212
7213#define IEM_MC_STORE_MEM_FLAT_U32(a_GCPtrMem, a_u32Value) \
7214 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, UINT8_MAX, a_GCPtrMem, \
7215 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
7216 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
7217
7218#define IEM_MC_STORE_MEM_FLAT_U64(a_GCPtrMem, a_u64Value) \
7219 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, UINT8_MAX, a_GCPtrMem, \
7220 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
7221 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
7222
7223
7224#define IEM_MC_STORE_MEM_U8_CONST(a_iSeg, a_GCPtrMem, a_u8ConstValue) \
7225 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
7226 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
7227
7228#define IEM_MC_STORE_MEM_U16_CONST(a_iSeg, a_GCPtrMem, a_u16ConstValue) \
7229 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
7230 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
7231
7232#define IEM_MC_STORE_MEM_U32_CONST(a_iSeg, a_GCPtrMem, a_u32ConstValue) \
7233 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
7234 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
7235
7236#define IEM_MC_STORE_MEM_U64_CONST(a_iSeg, a_GCPtrMem, a_u64ConstValue) \
7237 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
7238 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
7239
7240
7241#define IEM_MC_STORE_MEM_FLAT_U8_CONST(a_GCPtrMem, a_u8ConstValue) \
7242 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
7243 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
7244
7245#define IEM_MC_STORE_MEM_FLAT_U16_CONST(a_GCPtrMem, a_u16ConstValue) \
7246 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
7247 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
7248
7249#define IEM_MC_STORE_MEM_FLAT_U32_CONST(a_GCPtrMem, a_u32ConstValue) \
7250 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
7251 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
7252
7253#define IEM_MC_STORE_MEM_FLAT_U64_CONST(a_GCPtrMem, a_u64ConstValue) \
7254 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
7255 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
7256
7257/** Emits code for IEM_MC_STORE_MEM_U8/16/32/64_CONST and
7258 * IEM_MC_STORE_MEM_FLAT_U8/16/32/64_CONST (with iSegReg = UINT8_MAX). */
7259DECL_INLINE_THROW(uint32_t)
7260iemNativeEmitMemStoreConstDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uValueConst, uint8_t iSegReg,
7261 uint8_t idxVarGCPtrMem, uint8_t cbMem, uintptr_t pfnFunction, uint8_t idxInstr)
7262{
7263 /*
7264 * Create a temporary const variable and call iemNativeEmitMemFetchStoreDataCommon
7265 * to do the grunt work.
7266 */
7267 uint8_t const idxVarConstValue = iemNativeVarAllocConst(pReNative, cbMem, uValueConst);
7268 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, idxVarConstValue, iSegReg, idxVarGCPtrMem,
7269 cbMem, cbMem - 1, kIemNativeEmitMemOp_Store,
7270 pfnFunction, idxInstr);
7271 iemNativeVarFreeLocal(pReNative, idxVarConstValue);
7272 return off;
7273}
7274
7275
7276#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7277# define IEM_MC_STORE_MEM_U128_ALIGN_SSE(a_iSeg, a_GCPtrMem, a_u128Value) \
7278 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, a_iSeg, a_GCPtrMem, \
7279 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
7280 (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse, pCallEntry->idxInstr)
7281
7282# define IEM_MC_STORE_MEM_U128_NO_AC(a_iSeg, a_GCPtrMem, a_u128Value) \
7283 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, a_iSeg, a_GCPtrMem, \
7284 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
7285 (uintptr_t)iemNativeHlpMemStoreDataU128NoAc, pCallEntry->idxInstr)
7286
7287# define IEM_MC_STORE_MEM_U256_NO_AC(a_iSeg, a_GCPtrMem, a_u256Value) \
7288 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, a_iSeg, a_GCPtrMem, \
7289 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
7290 (uintptr_t)iemNativeHlpMemStoreDataU256NoAc, pCallEntry->idxInstr)
7291
7292# define IEM_MC_STORE_MEM_U256_ALIGN_AVX(a_iSeg, a_GCPtrMem, a_u256Value) \
7293 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, a_iSeg, a_GCPtrMem, \
7294 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
7295 (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx, pCallEntry->idxInstr)
7296
7297
7298# define IEM_MC_STORE_MEM_FLAT_U128_ALIGN_SSE(a_GCPtrMem, a_u128Value) \
7299 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, UINT8_MAX, a_GCPtrMem, \
7300 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
7301 (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse, pCallEntry->idxInstr)
7302
7303# define IEM_MC_STORE_MEM_FLAT_U128_NO_AC(a_GCPtrMem, a_u128Value) \
7304 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, UINT8_MAX, a_GCPtrMem, \
7305 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
7306 (uintptr_t)iemNativeHlpMemFlatStoreDataU128NoAc, pCallEntry->idxInstr)
7307
7308# define IEM_MC_STORE_MEM_FLAT_U256_NO_AC(a_GCPtrMem, a_u256Value) \
7309 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, UINT8_MAX, a_GCPtrMem, \
7310 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
7311 (uintptr_t)iemNativeHlpMemFlatStoreDataU256NoAc, pCallEntry->idxInstr)
7312
7313# define IEM_MC_STORE_MEM_FLAT_U256_ALIGN_AVX(a_GCPtrMem, a_u256Value) \
7314 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, UINT8_MAX, a_GCPtrMem, \
7315 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
7316 (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx, pCallEntry->idxInstr)
7317#endif
7318
7319
7320
7321/*********************************************************************************************************************************
7322* Stack Accesses. *
7323*********************************************************************************************************************************/
7324/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, fSReg, 0) */
7325#define IEM_MC_PUSH_U16(a_u16Value) \
7326 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
7327 (uintptr_t)iemNativeHlpStackStoreU16, pCallEntry->idxInstr)
7328#define IEM_MC_PUSH_U32(a_u32Value) \
7329 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
7330 (uintptr_t)iemNativeHlpStackStoreU32, pCallEntry->idxInstr)
7331#define IEM_MC_PUSH_U32_SREG(a_uSegVal) \
7332 off = iemNativeEmitStackPush(pReNative, off, a_uSegVal, RT_MAKE_U32_FROM_U8(32, 0, 1, 0), \
7333 (uintptr_t)iemNativeHlpStackStoreU32SReg, pCallEntry->idxInstr)
7334#define IEM_MC_PUSH_U64(a_u64Value) \
7335 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
7336 (uintptr_t)iemNativeHlpStackStoreU64, pCallEntry->idxInstr)
7337
7338#define IEM_MC_FLAT32_PUSH_U16(a_u16Value) \
7339 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
7340 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
7341#define IEM_MC_FLAT32_PUSH_U32(a_u32Value) \
7342 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
7343 (uintptr_t)iemNativeHlpStackFlatStoreU32, pCallEntry->idxInstr)
7344#define IEM_MC_FLAT32_PUSH_U32_SREG(a_u32Value) \
7345 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 1, 0), \
7346 (uintptr_t)iemNativeHlpStackFlatStoreU32SReg, pCallEntry->idxInstr)
7347
7348#define IEM_MC_FLAT64_PUSH_U16(a_u16Value) \
7349 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
7350 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
7351#define IEM_MC_FLAT64_PUSH_U64(a_u64Value) \
7352 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
7353 (uintptr_t)iemNativeHlpStackFlatStoreU64, pCallEntry->idxInstr)
7354
7355
7356/** IEM_MC[|_FLAT32|_FLAT64]_PUSH_U16/32/32_SREG/64 */
7357DECL_INLINE_THROW(uint32_t)
7358iemNativeEmitStackPush(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue,
7359 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
7360{
7361 /*
7362 * Assert sanity.
7363 */
7364 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
7365 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
7366#ifdef VBOX_STRICT
7367 if (RT_BYTE2(cBitsVarAndFlat) != 0)
7368 {
7369 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
7370 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
7371 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
7372 Assert( pfnFunction
7373 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
7374 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
7375 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 1, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32SReg
7376 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
7377 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
7378 : UINT64_C(0xc000b000a0009000) ));
7379 }
7380 else
7381 Assert( pfnFunction
7382 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
7383 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
7384 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 1, 0) ? (uintptr_t)iemNativeHlpStackStoreU32SReg
7385 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
7386 : UINT64_C(0xc000b000a0009000) ));
7387#endif
7388
7389#ifdef VBOX_STRICT
7390 /*
7391 * Check that the fExec flags we've got make sense.
7392 */
7393 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
7394#endif
7395
7396 /*
7397 * To keep things simple we have to commit any pending writes first as we
7398 * may end up making calls.
7399 */
7400 /** @todo we could postpone this till we make the call and reload the
7401 * registers after returning from the call. Not sure if that's sensible or
7402 * not, though. */
7403 off = iemNativeRegFlushPendingWrites(pReNative, off);
7404
7405 /*
7406 * First we calculate the new RSP and the effective stack pointer value.
7407 * For 64-bit mode and flat 32-bit these two are the same.
7408 * (Code structure is very similar to that of PUSH)
7409 */
7410 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
7411 bool const fIsSegReg = RT_BYTE3(cBitsVarAndFlat) != 0;
7412 bool const fIsIntelSeg = fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
7413 uint8_t const cbMemAccess = !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
7414 ? cbMem : sizeof(uint16_t);
7415 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
7416 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
7417 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
7418 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
7419 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
7420 if (cBitsFlat != 0)
7421 {
7422 Assert(idxRegEffSp == idxRegRsp);
7423 Assert(cBitsFlat == 32 || cBitsFlat == 64);
7424 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
7425 if (cBitsFlat == 64)
7426 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
7427 else
7428 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
7429 }
7430 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
7431 {
7432 Assert(idxRegEffSp != idxRegRsp);
7433 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
7434 kIemNativeGstRegUse_ReadOnly);
7435#ifdef RT_ARCH_AMD64
7436 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
7437#else
7438 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
7439#endif
7440 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
7441 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
7442 offFixupJumpToUseOtherBitSp = off;
7443 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
7444 {
7445 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
7446 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
7447 }
7448 else
7449 {
7450 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
7451 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
7452 }
7453 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7454 }
7455 /* SpUpdateEnd: */
7456 uint32_t const offLabelSpUpdateEnd = off;
7457
7458 /*
7459 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
7460 * we're skipping lookup).
7461 */
7462 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
7463 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
7464 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
7465 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
7466 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
7467 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
7468 : UINT32_MAX;
7469 uint8_t const idxRegValue = !TlbState.fSkip
7470 && pVarValue->enmKind != kIemNativeVarKind_Immediate
7471 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/,
7472 IEMNATIVE_CALL_ARG2_GREG /*idxRegPref*/)
7473 : UINT8_MAX;
7474 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
7475
7476
7477 if (!TlbState.fSkip)
7478 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
7479 else
7480 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
7481
7482 /*
7483 * Use16BitSp:
7484 */
7485 if (cBitsFlat == 0)
7486 {
7487#ifdef RT_ARCH_AMD64
7488 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
7489#else
7490 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
7491#endif
7492 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
7493 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
7494 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
7495 else
7496 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
7497 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
7498 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7499 }
7500
7501 /*
7502 * TlbMiss:
7503 *
7504 * Call helper to do the pushing.
7505 */
7506 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
7507
7508#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7509 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7510#else
7511 RT_NOREF(idxInstr);
7512#endif
7513
7514 /* Save variables in volatile registers. */
7515 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
7516 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
7517 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
7518 | (idxRegValue < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegValue) : 0);
7519 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
7520
7521 if ( idxRegValue == IEMNATIVE_CALL_ARG1_GREG
7522 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
7523 {
7524 /* Swap them using ARG0 as temp register: */
7525 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
7526 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
7527 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
7528 }
7529 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
7530 {
7531 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue (first!) */
7532 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue,
7533 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
7534
7535 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
7536 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
7537 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
7538 }
7539 else
7540 {
7541 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
7542 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
7543
7544 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue */
7545 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue, 0 /*offAddend*/,
7546 IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG));
7547 }
7548
7549 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
7550 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7551
7552 /* Done setting up parameters, make the call. */
7553 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
7554
7555 /* Restore variables and guest shadow registers to volatile registers. */
7556 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
7557 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
7558
7559#ifdef IEMNATIVE_WITH_TLB_LOOKUP
7560 if (!TlbState.fSkip)
7561 {
7562 /* end of TlbMiss - Jump to the done label. */
7563 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
7564 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
7565
7566 /*
7567 * TlbLookup:
7568 */
7569 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1,
7570 IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
7571
7572 /*
7573 * Emit code to do the actual storing / fetching.
7574 */
7575 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
7576# ifdef VBOX_WITH_STATISTICS
7577 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
7578 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
7579# endif
7580 if (idxRegValue != UINT8_MAX)
7581 {
7582 switch (cbMemAccess)
7583 {
7584 case 2:
7585 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
7586 break;
7587 case 4:
7588 if (!fIsIntelSeg)
7589 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
7590 else
7591 {
7592 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
7593 PUSH FS in real mode, so we have to try emulate that here.
7594 We borrow the now unused idxReg1 from the TLB lookup code here. */
7595 uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
7596 kIemNativeGstReg_EFlags);
7597 if (idxRegEfl != UINT8_MAX)
7598 {
7599#ifdef ARCH_AMD64
7600 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
7601 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
7602 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
7603#else
7604 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
7605 off, TlbState.idxReg1, idxRegEfl,
7606 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
7607#endif
7608 iemNativeRegFreeTmp(pReNative, idxRegEfl);
7609 }
7610 else
7611 {
7612 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
7613 RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
7614 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
7615 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
7616 }
7617 /* ASSUMES the upper half of idxRegValue is ZERO. */
7618 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegValue);
7619 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
7620 }
7621 break;
7622 case 8:
7623 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
7624 break;
7625 default:
7626 AssertFailed();
7627 }
7628 }
7629 else
7630 {
7631 switch (cbMemAccess)
7632 {
7633 case 2:
7634 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
7635 idxRegMemResult, TlbState.idxReg1);
7636 break;
7637 case 4:
7638 Assert(!fIsSegReg);
7639 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
7640 idxRegMemResult, TlbState.idxReg1);
7641 break;
7642 case 8:
7643 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue, idxRegMemResult, TlbState.idxReg1);
7644 break;
7645 default:
7646 AssertFailed();
7647 }
7648 }
7649
7650 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
7651 TlbState.freeRegsAndReleaseVars(pReNative);
7652
7653 /*
7654 * TlbDone:
7655 *
7656 * Commit the new RSP value.
7657 */
7658 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
7659 }
7660#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
7661
7662#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
7663 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
7664#endif
7665 iemNativeRegFreeTmp(pReNative, idxRegRsp);
7666 if (idxRegEffSp != idxRegRsp)
7667 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
7668
7669 /* The value variable is implictly flushed. */
7670 if (idxRegValue != UINT8_MAX)
7671 iemNativeVarRegisterRelease(pReNative, idxVarValue);
7672 iemNativeVarFreeLocal(pReNative, idxVarValue);
7673
7674 return off;
7675}
7676
7677
7678
7679/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, 0, 0) */
7680#define IEM_MC_POP_GREG_U16(a_iGReg) \
7681 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
7682 (uintptr_t)iemNativeHlpStackFetchU16, pCallEntry->idxInstr)
7683#define IEM_MC_POP_GREG_U32(a_iGReg) \
7684 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
7685 (uintptr_t)iemNativeHlpStackFetchU32, pCallEntry->idxInstr)
7686#define IEM_MC_POP_GREG_U64(a_iGReg) \
7687 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
7688 (uintptr_t)iemNativeHlpStackFetchU64, pCallEntry->idxInstr)
7689
7690#define IEM_MC_FLAT32_POP_GREG_U16(a_iGReg) \
7691 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
7692 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
7693#define IEM_MC_FLAT32_POP_GREG_U32(a_iGReg) \
7694 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
7695 (uintptr_t)iemNativeHlpStackFlatFetchU32, pCallEntry->idxInstr)
7696
7697#define IEM_MC_FLAT64_POP_GREG_U16(a_iGReg) \
7698 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
7699 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
7700#define IEM_MC_FLAT64_POP_GREG_U64(a_iGReg) \
7701 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
7702 (uintptr_t)iemNativeHlpStackFlatFetchU64, pCallEntry->idxInstr)
7703
7704
7705DECL_FORCE_INLINE_THROW(uint32_t)
7706iemNativeEmitStackPopUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
7707 uint8_t idxRegTmp)
7708{
7709 /* Use16BitSp: */
7710#ifdef RT_ARCH_AMD64
7711 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
7712 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
7713 RT_NOREF(idxRegTmp);
7714#else
7715 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
7716 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
7717 /* add tmp, regrsp, #cbMem */
7718 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbMem, false /*f64Bit*/);
7719 /* and tmp, tmp, #0xffff */
7720 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
7721 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
7722 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
7723 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
7724#endif
7725 return off;
7726}
7727
7728
7729DECL_FORCE_INLINE(uint32_t)
7730iemNativeEmitStackPopUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
7731{
7732 /* Use32BitSp: */
7733 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
7734 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
7735 return off;
7736}
7737
7738
7739/** IEM_MC[|_FLAT32|_FLAT64]_POP_GREG_U16/32/64 */
7740DECL_INLINE_THROW(uint32_t)
7741iemNativeEmitStackPopGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGReg,
7742 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
7743{
7744 /*
7745 * Assert sanity.
7746 */
7747 Assert(idxGReg < 16);
7748#ifdef VBOX_STRICT
7749 if (RT_BYTE2(cBitsVarAndFlat) != 0)
7750 {
7751 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
7752 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
7753 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
7754 Assert( pfnFunction
7755 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
7756 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU32
7757 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
7758 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU64
7759 : UINT64_C(0xc000b000a0009000) ));
7760 }
7761 else
7762 Assert( pfnFunction
7763 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU16
7764 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU32
7765 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU64
7766 : UINT64_C(0xc000b000a0009000) ));
7767#endif
7768
7769#ifdef VBOX_STRICT
7770 /*
7771 * Check that the fExec flags we've got make sense.
7772 */
7773 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
7774#endif
7775
7776 /*
7777 * To keep things simple we have to commit any pending writes first as we
7778 * may end up making calls.
7779 */
7780 off = iemNativeRegFlushPendingWrites(pReNative, off);
7781
7782 /*
7783 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
7784 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
7785 * directly as the effective stack pointer.
7786 * (Code structure is very similar to that of PUSH)
7787 */
7788 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
7789 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
7790 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
7791 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
7792 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
7793 /** @todo can do a better job picking the register here. For cbMem >= 4 this
7794 * will be the resulting register value. */
7795 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
7796
7797 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
7798 if (cBitsFlat != 0)
7799 {
7800 Assert(idxRegEffSp == idxRegRsp);
7801 Assert(cBitsFlat == 32 || cBitsFlat == 64);
7802 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
7803 }
7804 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
7805 {
7806 Assert(idxRegEffSp != idxRegRsp);
7807 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
7808 kIemNativeGstRegUse_ReadOnly);
7809#ifdef RT_ARCH_AMD64
7810 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
7811#else
7812 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
7813#endif
7814 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
7815 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
7816 offFixupJumpToUseOtherBitSp = off;
7817 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
7818 {
7819/** @todo can skip idxRegRsp updating when popping ESP. */
7820 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
7821 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
7822 }
7823 else
7824 {
7825 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
7826 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
7827 }
7828 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7829 }
7830 /* SpUpdateEnd: */
7831 uint32_t const offLabelSpUpdateEnd = off;
7832
7833 /*
7834 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
7835 * we're skipping lookup).
7836 */
7837 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
7838 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
7839 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
7840 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
7841 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
7842 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
7843 : UINT32_MAX;
7844
7845 if (!TlbState.fSkip)
7846 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
7847 else
7848 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
7849
7850 /*
7851 * Use16BitSp:
7852 */
7853 if (cBitsFlat == 0)
7854 {
7855#ifdef RT_ARCH_AMD64
7856 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
7857#else
7858 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
7859#endif
7860 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
7861 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
7862 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
7863 else
7864 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
7865 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
7866 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7867 }
7868
7869 /*
7870 * TlbMiss:
7871 *
7872 * Call helper to do the pushing.
7873 */
7874 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
7875
7876#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7877 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7878#else
7879 RT_NOREF(idxInstr);
7880#endif
7881
7882 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
7883 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
7884 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
7885 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
7886
7887
7888 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
7889 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
7890 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
7891
7892 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
7893 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7894
7895 /* Done setting up parameters, make the call. */
7896 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
7897
7898 /* Move the return register content to idxRegMemResult. */
7899 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
7900 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
7901
7902 /* Restore variables and guest shadow registers to volatile registers. */
7903 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
7904 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
7905
7906#ifdef IEMNATIVE_WITH_TLB_LOOKUP
7907 if (!TlbState.fSkip)
7908 {
7909 /* end of TlbMiss - Jump to the done label. */
7910 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
7911 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
7912
7913 /*
7914 * TlbLookup:
7915 */
7916 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
7917 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
7918
7919 /*
7920 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
7921 */
7922 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
7923# ifdef VBOX_WITH_STATISTICS
7924 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
7925 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
7926# endif
7927 switch (cbMem)
7928 {
7929 case 2:
7930 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
7931 break;
7932 case 4:
7933 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
7934 break;
7935 case 8:
7936 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
7937 break;
7938 default:
7939 AssertFailed();
7940 }
7941
7942 TlbState.freeRegsAndReleaseVars(pReNative);
7943
7944 /*
7945 * TlbDone:
7946 *
7947 * Set the new RSP value (FLAT accesses needs to calculate it first) and
7948 * commit the popped register value.
7949 */
7950 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
7951 }
7952#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
7953
7954 if (idxGReg != X86_GREG_xSP)
7955 {
7956 /* Set the register. */
7957 if (cbMem >= sizeof(uint32_t))
7958 {
7959#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
7960 AssertMsg( pReNative->idxCurCall == 0
7961 || IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))),
7962 ("%s - %u\n", g_aGstShadowInfo[idxGReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))));
7963#endif
7964 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, IEMNATIVEGSTREG_GPR(idxGReg), off);
7965#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
7966 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(idxGReg);
7967#endif
7968#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
7969 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult,
7970 RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
7971#endif
7972 }
7973 else
7974 {
7975 Assert(cbMem == sizeof(uint16_t));
7976 uint8_t const idxRegDst = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGReg),
7977 kIemNativeGstRegUse_ForUpdate);
7978 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegDst, idxRegMemResult);
7979#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
7980 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegDst, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
7981#endif
7982 iemNativeRegFreeTmp(pReNative, idxRegDst);
7983 }
7984
7985 /* Complete RSP calculation for FLAT mode. */
7986 if (idxRegEffSp == idxRegRsp)
7987 {
7988 if (cBitsFlat == 64)
7989 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
7990 else
7991 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
7992 }
7993 }
7994 else
7995 {
7996 /* We're popping RSP, ESP or SP. Only the is a bit extra work, of course. */
7997 if (cbMem == sizeof(uint64_t))
7998 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRsp, idxRegMemResult);
7999 else if (cbMem == sizeof(uint32_t))
8000 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRsp, idxRegMemResult);
8001 else
8002 {
8003 if (idxRegEffSp == idxRegRsp)
8004 {
8005 if (cBitsFlat == 64)
8006 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
8007 else
8008 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
8009 }
8010 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegRsp, idxRegMemResult);
8011 }
8012 }
8013
8014#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8015 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
8016#endif
8017
8018 iemNativeRegFreeTmp(pReNative, idxRegRsp);
8019 if (idxRegEffSp != idxRegRsp)
8020 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
8021 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
8022
8023 return off;
8024}
8025
8026
8027
8028/*********************************************************************************************************************************
8029* Memory mapping (IEM_MEM_MAP_XXX, IEM_MEM_FLAT_MAP_XXX). *
8030*********************************************************************************************************************************/
8031
8032#define IEM_MC_MEM_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8033 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
8034 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMask*/, \
8035 (uintptr_t)iemNativeHlpMemMapDataU8Atomic, pCallEntry->idxInstr)
8036
8037#define IEM_MC_MEM_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8038 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
8039 IEM_ACCESS_DATA_RW, 0 /*fAlignMask*/, \
8040 (uintptr_t)iemNativeHlpMemMapDataU8Rw, pCallEntry->idxInstr)
8041
8042#define IEM_MC_MEM_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8043 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
8044 IEM_ACCESS_DATA_W, 0 /*fAlignMask*/, \
8045 (uintptr_t)iemNativeHlpMemMapDataU8Wo, pCallEntry->idxInstr) \
8046
8047#define IEM_MC_MEM_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8048 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
8049 IEM_ACCESS_DATA_R, 0 /*fAlignMask*/, \
8050 (uintptr_t)iemNativeHlpMemMapDataU8Ro, pCallEntry->idxInstr)
8051
8052
8053#define IEM_MC_MEM_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8054 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
8055 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMask*/, \
8056 (uintptr_t)iemNativeHlpMemMapDataU16Atomic, pCallEntry->idxInstr)
8057
8058#define IEM_MC_MEM_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8059 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
8060 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMask*/, \
8061 (uintptr_t)iemNativeHlpMemMapDataU16Rw, pCallEntry->idxInstr)
8062
8063#define IEM_MC_MEM_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8064 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
8065 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
8066 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
8067
8068#define IEM_MC_MEM_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8069 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
8070 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMask*/, \
8071 (uintptr_t)iemNativeHlpMemMapDataU16Ro, pCallEntry->idxInstr)
8072
8073#define IEM_MC_MEM_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8074 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int16_t), \
8075 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
8076 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
8077
8078
8079#define IEM_MC_MEM_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8080 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
8081 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMask*/, \
8082 (uintptr_t)iemNativeHlpMemMapDataU32Atomic, pCallEntry->idxInstr)
8083
8084#define IEM_MC_MEM_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8085 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
8086 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMask*/, \
8087 (uintptr_t)iemNativeHlpMemMapDataU32Rw, pCallEntry->idxInstr)
8088
8089#define IEM_MC_MEM_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8090 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
8091 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
8092 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
8093
8094#define IEM_MC_MEM_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8095 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
8096 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMask*/, \
8097 (uintptr_t)iemNativeHlpMemMapDataU32Ro, pCallEntry->idxInstr)
8098
8099#define IEM_MC_MEM_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8100 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int32_t), \
8101 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
8102 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
8103
8104
8105#define IEM_MC_MEM_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8106 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
8107 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMask*/, \
8108 (uintptr_t)iemNativeHlpMemMapDataU64Atomic, pCallEntry->idxInstr)
8109
8110#define IEM_MC_MEM_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8111 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
8112 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMask*/, \
8113 (uintptr_t)iemNativeHlpMemMapDataU64Rw, pCallEntry->idxInstr)
8114#define IEM_MC_MEM_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8115 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
8116 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
8117 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
8118
8119#define IEM_MC_MEM_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8120 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
8121 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMask*/, \
8122 (uintptr_t)iemNativeHlpMemMapDataU64Ro, pCallEntry->idxInstr)
8123
8124#define IEM_MC_MEM_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8125 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int64_t), \
8126 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
8127 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
8128
8129
8130#define IEM_MC_MEM_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8131 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
8132 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
8133 (uintptr_t)iemNativeHlpMemMapDataR80Wo, pCallEntry->idxInstr) \
8134
8135#define IEM_MC_MEM_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8136 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
8137 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
8138 (uintptr_t)iemNativeHlpMemMapDataD80Wo, pCallEntry->idxInstr) \
8139
8140
8141#define IEM_MC_MEM_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8142 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
8143 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
8144 (uintptr_t)iemNativeHlpMemMapDataU128Atomic, pCallEntry->idxInstr)
8145
8146#define IEM_MC_MEM_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8147 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
8148 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
8149 (uintptr_t)iemNativeHlpMemMapDataU128Rw, pCallEntry->idxInstr)
8150
8151#define IEM_MC_MEM_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8152 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
8153 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
8154 (uintptr_t)iemNativeHlpMemMapDataU128Wo, pCallEntry->idxInstr) \
8155
8156#define IEM_MC_MEM_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8157 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
8158 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
8159 (uintptr_t)iemNativeHlpMemMapDataU128Ro, pCallEntry->idxInstr)
8160
8161
8162
8163#define IEM_MC_MEM_FLAT_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
8164 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
8165 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMask*/, \
8166 (uintptr_t)iemNativeHlpMemFlatMapDataU8Atomic, pCallEntry->idxInstr)
8167
8168#define IEM_MC_MEM_FLAT_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
8169 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
8170 IEM_ACCESS_DATA_RW, 0 /*fAlignMask*/, \
8171 (uintptr_t)iemNativeHlpMemFlatMapDataU8Rw, pCallEntry->idxInstr)
8172
8173#define IEM_MC_MEM_FLAT_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
8174 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
8175 IEM_ACCESS_DATA_W, 0 /*fAlignMask*/, \
8176 (uintptr_t)iemNativeHlpMemFlatMapDataU8Wo, pCallEntry->idxInstr) \
8177
8178#define IEM_MC_MEM_FLAT_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
8179 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
8180 IEM_ACCESS_DATA_R, 0 /*fAlignMask*/, \
8181 (uintptr_t)iemNativeHlpMemFlatMapDataU8Ro, pCallEntry->idxInstr)
8182
8183
8184#define IEM_MC_MEM_FLAT_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
8185 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
8186 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMask*/, \
8187 (uintptr_t)iemNativeHlpMemFlatMapDataU16Atomic, pCallEntry->idxInstr)
8188
8189#define IEM_MC_MEM_FLAT_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
8190 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
8191 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMask*/, \
8192 (uintptr_t)iemNativeHlpMemFlatMapDataU16Rw, pCallEntry->idxInstr)
8193
8194#define IEM_MC_MEM_FLAT_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
8195 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
8196 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
8197 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
8198
8199#define IEM_MC_MEM_FLAT_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
8200 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
8201 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMask*/, \
8202 (uintptr_t)iemNativeHlpMemFlatMapDataU16Ro, pCallEntry->idxInstr)
8203
8204#define IEM_MC_MEM_FLAT_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_GCPtrMem) \
8205 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int16_t), \
8206 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
8207 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
8208
8209
8210#define IEM_MC_MEM_FLAT_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
8211 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
8212 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMask*/, \
8213 (uintptr_t)iemNativeHlpMemFlatMapDataU32Atomic, pCallEntry->idxInstr)
8214
8215#define IEM_MC_MEM_FLAT_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
8216 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
8217 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMask*/, \
8218 (uintptr_t)iemNativeHlpMemFlatMapDataU32Rw, pCallEntry->idxInstr)
8219
8220#define IEM_MC_MEM_FLAT_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
8221 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
8222 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
8223 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
8224
8225#define IEM_MC_MEM_FLAT_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
8226 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
8227 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMask*/, \
8228 (uintptr_t)iemNativeHlpMemFlatMapDataU32Ro, pCallEntry->idxInstr)
8229
8230#define IEM_MC_MEM_FLAT_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_GCPtrMem) \
8231 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int32_t), \
8232 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
8233 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
8234
8235
8236#define IEM_MC_MEM_FLAT_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
8237 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
8238 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMask*/, \
8239 (uintptr_t)iemNativeHlpMemFlatMapDataU64Atomic, pCallEntry->idxInstr)
8240
8241#define IEM_MC_MEM_FLAT_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
8242 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
8243 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMask*/, \
8244 (uintptr_t)iemNativeHlpMemFlatMapDataU64Rw, pCallEntry->idxInstr)
8245
8246#define IEM_MC_MEM_FLAT_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
8247 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
8248 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
8249 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
8250
8251#define IEM_MC_MEM_FLAT_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
8252 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
8253 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMask*/, \
8254 (uintptr_t)iemNativeHlpMemFlatMapDataU64Ro, pCallEntry->idxInstr)
8255
8256#define IEM_MC_MEM_FLAT_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_GCPtrMem) \
8257 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int64_t), \
8258 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
8259 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
8260
8261
8262#define IEM_MC_MEM_FLAT_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_GCPtrMem) \
8263 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
8264 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
8265 (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo, pCallEntry->idxInstr) \
8266
8267#define IEM_MC_MEM_FLAT_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_GCPtrMem) \
8268 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
8269 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
8270 (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo, pCallEntry->idxInstr) \
8271
8272
8273#define IEM_MC_MEM_FLAT_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
8274 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
8275 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
8276 (uintptr_t)iemNativeHlpMemFlatMapDataU128Atomic, pCallEntry->idxInstr)
8277
8278#define IEM_MC_MEM_FLAT_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
8279 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
8280 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
8281 (uintptr_t)iemNativeHlpMemFlatMapDataU128Rw, pCallEntry->idxInstr)
8282
8283#define IEM_MC_MEM_FLAT_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
8284 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
8285 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
8286 (uintptr_t)iemNativeHlpMemFlatMapDataU128Wo, pCallEntry->idxInstr) \
8287
8288#define IEM_MC_MEM_FLAT_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
8289 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
8290 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
8291 (uintptr_t)iemNativeHlpMemFlatMapDataU128Ro, pCallEntry->idxInstr)
8292
8293
8294DECL_INLINE_THROW(uint32_t)
8295iemNativeEmitMemMapCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarMem, uint8_t idxVarUnmapInfo,
8296 uint8_t iSegReg, uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAccess, uint8_t fAlignMask,
8297 uintptr_t pfnFunction, uint8_t idxInstr)
8298{
8299 /*
8300 * Assert sanity.
8301 */
8302 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarMem);
8303 PIEMNATIVEVAR const pVarMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarMem)];
8304 AssertStmt( pVarMem->enmKind == kIemNativeVarKind_Invalid
8305 && pVarMem->cbVar == sizeof(void *),
8306 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8307
8308 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
8309 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
8310 AssertStmt( pVarUnmapInfo->enmKind == kIemNativeVarKind_Invalid
8311 && pVarUnmapInfo->cbVar == sizeof(uint8_t),
8312 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8313
8314 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
8315 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
8316 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
8317 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
8318 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8319
8320 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
8321
8322 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
8323
8324#ifdef VBOX_STRICT
8325# define IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) \
8326 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ) \
8327 ? (uintptr_t)RT_CONCAT(a_fnBase,Rw) \
8328 : ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == IEM_ACCESS_TYPE_READ \
8329 ? (uintptr_t)RT_CONCAT(a_fnBase,Ro) : (uintptr_t)RT_CONCAT(a_fnBase,Wo) )
8330# define IEM_MAP_HLP_FN(a_fAccess, a_fnBase) \
8331 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ | IEM_ACCESS_ATOMIC) \
8332 ? (uintptr_t)RT_CONCAT(a_fnBase,Atomic) \
8333 : IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) )
8334
8335 if (iSegReg == UINT8_MAX)
8336 {
8337 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
8338 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
8339 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
8340 switch (cbMem)
8341 {
8342 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU8)); break;
8343 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU16)); break;
8344 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU32)); break;
8345 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU64)); break;
8346 case 10:
8347 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo
8348 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo);
8349 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
8350 break;
8351 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU128)); break;
8352# if 0
8353 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU256)); break;
8354 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU512)); break;
8355# endif
8356 default: AssertFailed(); break;
8357 }
8358 }
8359 else
8360 {
8361 Assert(iSegReg < 6);
8362 switch (cbMem)
8363 {
8364 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU8)); break;
8365 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU16)); break;
8366 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU32)); break;
8367 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU64)); break;
8368 case 10:
8369 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemMapDataR80Wo
8370 || pfnFunction == (uintptr_t)iemNativeHlpMemMapDataD80Wo);
8371 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
8372 break;
8373 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU128)); break;
8374# if 0
8375 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU256)); break;
8376 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU512)); break;
8377# endif
8378 default: AssertFailed(); break;
8379 }
8380 }
8381# undef IEM_MAP_HLP_FN
8382# undef IEM_MAP_HLP_FN_NO_AT
8383#endif
8384
8385#ifdef VBOX_STRICT
8386 /*
8387 * Check that the fExec flags we've got make sense.
8388 */
8389 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
8390#endif
8391
8392 /*
8393 * To keep things simple we have to commit any pending writes first as we
8394 * may end up making calls.
8395 */
8396 off = iemNativeRegFlushPendingWrites(pReNative, off);
8397
8398#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
8399 /*
8400 * Move/spill/flush stuff out of call-volatile registers.
8401 * This is the easy way out. We could contain this to the tlb-miss branch
8402 * by saving and restoring active stuff here.
8403 */
8404 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
8405 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
8406#endif
8407
8408 /* The bUnmapInfo variable will get a register in the tlb-hit code path,
8409 while the tlb-miss codepath will temporarily put it on the stack.
8410 Set the the type to stack here so we don't need to do it twice below. */
8411 iemNativeVarSetKindToStack(pReNative, idxVarUnmapInfo);
8412 uint8_t const idxRegUnmapInfo = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off);
8413 /** @todo use a tmp register from TlbState, since they'll be free after tlb
8414 * lookup is done. */
8415
8416 /*
8417 * Define labels and allocate the result register (trying for the return
8418 * register if we can).
8419 */
8420 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
8421 uint8_t const idxRegMemResult = !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
8422 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarMem, IEMNATIVE_CALL_RET_GREG, &off)
8423 : iemNativeVarRegisterAcquire(pReNative, idxVarMem, &off);
8424 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem);
8425 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
8426 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
8427 : UINT32_MAX;
8428//off=iemNativeEmitBrk(pReNative, off, 0);
8429 /*
8430 * Jump to the TLB lookup code.
8431 */
8432 if (!TlbState.fSkip)
8433 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
8434
8435 /*
8436 * TlbMiss:
8437 *
8438 * Call helper to do the fetching.
8439 * We flush all guest register shadow copies here.
8440 */
8441 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
8442
8443#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8444 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8445#else
8446 RT_NOREF(idxInstr);
8447#endif
8448
8449#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
8450 /* Save variables in volatile registers. */
8451 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave() | RT_BIT_32(idxRegMemResult) | RT_BIT_32(idxRegUnmapInfo);
8452 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
8453#endif
8454
8455 /* IEMNATIVE_CALL_ARG2_GREG = GCPtrMem - load first as it is from a variable. */
8456 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarGCPtrMem, 0 /*cbAppend*/,
8457#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
8458 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
8459#else
8460 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
8461#endif
8462
8463 /* IEMNATIVE_CALL_ARG3_GREG = iSegReg */
8464 if (iSegReg != UINT8_MAX)
8465 {
8466 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
8467 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, iSegReg);
8468 }
8469
8470 /* IEMNATIVE_CALL_ARG1_GREG = &idxVarUnmapInfo; stackslot address, load any register with result after the call. */
8471 int32_t const offBpDispVarUnmapInfo = iemNativeStackCalcBpDisp(iemNativeVarGetStackSlot(pReNative, idxVarUnmapInfo));
8472 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, offBpDispVarUnmapInfo);
8473
8474 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
8475 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
8476
8477 /* Done setting up parameters, make the call. */
8478 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
8479
8480 /*
8481 * Put the output in the right registers.
8482 */
8483 Assert(idxRegMemResult == pVarMem->idxReg);
8484 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
8485 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
8486
8487#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
8488 /* Restore variables and guest shadow registers to volatile registers. */
8489 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
8490 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
8491#endif
8492
8493 Assert(pVarUnmapInfo->idxReg == idxRegUnmapInfo);
8494 off = iemNativeEmitLoadGprByBpU8(pReNative, off, idxRegUnmapInfo, offBpDispVarUnmapInfo);
8495
8496#ifdef IEMNATIVE_WITH_TLB_LOOKUP
8497 if (!TlbState.fSkip)
8498 {
8499 /* end of tlbsmiss - Jump to the done label. */
8500 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
8501 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
8502
8503 /*
8504 * TlbLookup:
8505 */
8506 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask, fAccess,
8507 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
8508# ifdef VBOX_WITH_STATISTICS
8509 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, TlbState.idxReg1, TlbState.idxReg2,
8510 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForMapped));
8511# endif
8512
8513 /* [idxVarUnmapInfo] = 0; */
8514 off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegUnmapInfo, 0);
8515
8516 /*
8517 * TlbDone:
8518 */
8519 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
8520
8521 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
8522
8523# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
8524 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
8525 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
8526# endif
8527 }
8528#else
8529 RT_NOREF(fAccess, fAlignMask, idxLabelTlbMiss);
8530#endif
8531
8532 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
8533 iemNativeVarRegisterRelease(pReNative, idxVarMem);
8534
8535 return off;
8536}
8537
8538
8539#define IEM_MC_MEM_COMMIT_AND_UNMAP_ATOMIC(a_bMapInfo) \
8540 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_ATOMIC, \
8541 (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic, pCallEntry->idxInstr)
8542
8543#define IEM_MC_MEM_COMMIT_AND_UNMAP_RW(a_bMapInfo) \
8544 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_RW, \
8545 (uintptr_t)iemNativeHlpMemCommitAndUnmapRw, pCallEntry->idxInstr)
8546
8547#define IEM_MC_MEM_COMMIT_AND_UNMAP_WO(a_bMapInfo) \
8548 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_W, \
8549 (uintptr_t)iemNativeHlpMemCommitAndUnmapWo, pCallEntry->idxInstr)
8550
8551#define IEM_MC_MEM_COMMIT_AND_UNMAP_RO(a_bMapInfo) \
8552 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_R, \
8553 (uintptr_t)iemNativeHlpMemCommitAndUnmapRo, pCallEntry->idxInstr)
8554
8555DECL_INLINE_THROW(uint32_t)
8556iemNativeEmitMemCommitAndUnmap(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarUnmapInfo,
8557 uint32_t fAccess, uintptr_t pfnFunction, uint8_t idxInstr)
8558{
8559 /*
8560 * Assert sanity.
8561 */
8562 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
8563#if defined(VBOX_STRICT) || defined(RT_ARCH_AMD64)
8564 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
8565#endif
8566 Assert(pVarUnmapInfo->enmKind == kIemNativeVarKind_Stack);
8567 Assert( pVarUnmapInfo->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
8568 || pVarUnmapInfo->idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS); /* must be initialized */
8569#ifdef VBOX_STRICT
8570 switch (fAccess & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC))
8571 {
8572 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_ATOMIC:
8573 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic); break;
8574 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE:
8575 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRw); break;
8576 case IEM_ACCESS_TYPE_WRITE:
8577 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapWo); break;
8578 case IEM_ACCESS_TYPE_READ:
8579 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRo); break;
8580 default: AssertFailed();
8581 }
8582#else
8583 RT_NOREF(fAccess);
8584#endif
8585
8586 /*
8587 * To keep things simple we have to commit any pending writes first as we
8588 * may end up making calls (there shouldn't be any at this point, so this
8589 * is just for consistency).
8590 */
8591 /** @todo we could postpone this till we make the call and reload the
8592 * registers after returning from the call. Not sure if that's sensible or
8593 * not, though. */
8594 off = iemNativeRegFlushPendingWrites(pReNative, off);
8595
8596 /*
8597 * Move/spill/flush stuff out of call-volatile registers.
8598 *
8599 * We exclude any register holding the bUnmapInfo variable, as we'll be
8600 * checking it after returning from the call and will free it afterwards.
8601 */
8602 /** @todo save+restore active registers and maybe guest shadows in miss
8603 * scenario. */
8604 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */,
8605 RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)));
8606
8607 /*
8608 * If idxVarUnmapInfo is zero, we can skip all this. Otherwise we'll have
8609 * to call the unmap helper function.
8610 *
8611 * The likelyhood of it being zero is higher than for the TLB hit when doing
8612 * the mapping, as a TLB miss for an well aligned and unproblematic memory
8613 * access should also end up with a mapping that won't need special unmapping.
8614 */
8615 /** @todo Go over iemMemMapJmp and implement the no-unmap-needed case! That
8616 * should speed up things for the pure interpreter as well when TLBs
8617 * are enabled. */
8618#ifdef RT_ARCH_AMD64
8619 if (pVarUnmapInfo->idxReg == UINT8_MAX)
8620 {
8621 /* test byte [rbp - xxx], 0ffh */
8622 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
8623 pbCodeBuf[off++] = 0xf6;
8624 uint8_t const idxStackSlot = pVarUnmapInfo->idxStackSlot;
8625 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, 0, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
8626 pbCodeBuf[off++] = 0xff;
8627 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8628 }
8629 else
8630#endif
8631 {
8632 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off,
8633 true /*fInitialized*/, IEMNATIVE_CALL_ARG1_GREG /*idxRegPref*/);
8634 off = iemNativeEmitTestAnyBitsInGpr8(pReNative, off, idxVarReg, 0xff);
8635 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
8636 }
8637 uint32_t const offJmpFixup = off;
8638 off = iemNativeEmitJzToFixed(pReNative, off, off /* ASSUME jz rel8 suffices*/);
8639
8640 /*
8641 * Call the unmap helper function.
8642 */
8643#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING /** @todo This should be unnecessary, the mapping call will already have set it! */
8644 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8645#else
8646 RT_NOREF(idxInstr);
8647#endif
8648
8649 /* IEMNATIVE_CALL_ARG1_GREG = idxVarUnmapInfo (first!) */
8650 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo,
8651 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
8652
8653 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
8654 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
8655
8656 /* Done setting up parameters, make the call. */
8657 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
8658
8659 /* The bUnmapInfo variable is implictly free by these MCs. */
8660 iemNativeVarFreeLocal(pReNative, idxVarUnmapInfo);
8661
8662 /*
8663 * Done, just fixup the jump for the non-call case.
8664 */
8665 iemNativeFixupFixedJump(pReNative, offJmpFixup, off);
8666
8667 return off;
8668}
8669
8670
8671
8672/*********************************************************************************************************************************
8673* State and Exceptions *
8674*********************************************************************************************************************************/
8675
8676#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
8677#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
8678
8679#define IEM_MC_PREPARE_SSE_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
8680#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
8681#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
8682
8683#define IEM_MC_PREPARE_AVX_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
8684#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
8685#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
8686
8687
8688DECL_INLINE_THROW(uint32_t) iemNativeEmitPrepareFpuForUse(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fForChange)
8689{
8690 /** @todo this needs a lot more work later. */
8691 RT_NOREF(pReNative, fForChange);
8692 return off;
8693}
8694
8695
8696
8697/*********************************************************************************************************************************
8698* Emitters for FPU related operations. *
8699*********************************************************************************************************************************/
8700
8701#define IEM_MC_FETCH_FCW(a_u16Fcw) \
8702 off = iemNativeEmitFetchFpuFcw(pReNative, off, a_u16Fcw)
8703
8704/** Emits code for IEM_MC_FETCH_FCW. */
8705DECL_INLINE_THROW(uint32_t)
8706iemNativeEmitFetchFpuFcw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
8707{
8708 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8709 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
8710
8711 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8712
8713 /* Allocate a temporary FCW register. */
8714 /** @todo eliminate extra register */
8715 uint8_t const idxFcwReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFcw,
8716 kIemNativeGstRegUse_ReadOnly);
8717
8718 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFcwReg);
8719
8720 /* Free but don't flush the FCW register. */
8721 iemNativeRegFreeTmp(pReNative, idxFcwReg);
8722 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8723
8724 return off;
8725}
8726
8727
8728#define IEM_MC_FETCH_FSW(a_u16Fsw) \
8729 off = iemNativeEmitFetchFpuFsw(pReNative, off, a_u16Fsw)
8730
8731/** Emits code for IEM_MC_FETCH_FSW. */
8732DECL_INLINE_THROW(uint32_t)
8733iemNativeEmitFetchFpuFsw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
8734{
8735 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8736 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
8737
8738 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, false /*fInitialized*/);
8739 /* Allocate a temporary FSW register. */
8740 /** @todo eliminate extra register */
8741 uint8_t const idxFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw,
8742 kIemNativeGstRegUse_ReadOnly);
8743
8744 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFswReg);
8745
8746 /* Free but don't flush the FSW register. */
8747 iemNativeRegFreeTmp(pReNative, idxFswReg);
8748 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8749
8750 return off;
8751}
8752
8753
8754
8755#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8756
8757
8758/*********************************************************************************************************************************
8759* Emitters for SSE/AVX specific operations. *
8760*********************************************************************************************************************************/
8761
8762#define IEM_MC_COPY_XREG_U128(a_iXRegDst, a_iXRegSrc) \
8763 off = iemNativeEmitSimdCopyXregU128(pReNative, off, a_iXRegDst, a_iXRegSrc)
8764
8765/** Emits code for IEM_MC_COPY_XREG_U128. */
8766DECL_INLINE_THROW(uint32_t)
8767iemNativeEmitSimdCopyXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXRegDst, uint8_t iXRegSrc)
8768{
8769 /* This is a nop if the source and destination register are the same. */
8770 if (iXRegDst != iXRegSrc)
8771 {
8772 /* Allocate destination and source register. */
8773 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegDst),
8774 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForFullWrite);
8775 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegSrc),
8776 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
8777
8778 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
8779
8780 /* Free but don't flush the source and destination register. */
8781 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8782 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8783 }
8784
8785 return off;
8786}
8787
8788
8789#define IEM_MC_FETCH_XREG_U128(a_u128Value, a_iXReg) \
8790 off = iemNativeEmitSimdFetchXregU128(pReNative, off, a_u128Value, a_iXReg)
8791
8792/** Emits code for IEM_MC_FETCH_XREG_U128. */
8793DECL_INLINE_THROW(uint32_t)
8794iemNativeEmitSimdFetchXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg)
8795{
8796 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8797 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
8798
8799 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8800 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
8801
8802 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
8803
8804 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxVarReg, idxSimdRegSrc);
8805
8806 /* Free but don't flush the source register. */
8807 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8808 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
8809
8810 return off;
8811}
8812
8813
8814#define IEM_MC_FETCH_XREG_U64(a_u64Value, a_iXReg, a_iQWord) \
8815 off = iemNativeEmitSimdFetchXregU64(pReNative, off, a_u64Value, a_iXReg, a_iQWord)
8816
8817#define IEM_MC_FETCH_XREG_R64(a_r64Value, a_iXReg, a_iQWord) \
8818 off = iemNativeEmitSimdFetchXregU64(pReNative, off, a_r64Value, a_iXReg, a_iQWord)
8819
8820/** Emits code for IEM_MC_FETCH_XREG_U64. */
8821DECL_INLINE_THROW(uint32_t)
8822iemNativeEmitSimdFetchXregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iQWord)
8823{
8824 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8825 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
8826
8827 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8828 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
8829
8830 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8831 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8832
8833 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
8834
8835 /* Free but don't flush the source register. */
8836 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8837 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8838
8839 return off;
8840}
8841
8842
8843#define IEM_MC_FETCH_XREG_U32(a_u32Value, a_iXReg, a_iDWord) \
8844 off = iemNativeEmitSimdFetchXregU32(pReNative, off, a_u32Value, a_iXReg, a_iDWord)
8845
8846#define IEM_MC_FETCH_XREG_R32(a_r32Value, a_iXReg, a_iDWord) \
8847 off = iemNativeEmitSimdFetchXregU32(pReNative, off, a_r32Value, a_iXReg, a_iDWord)
8848
8849/** Emits code for IEM_MC_FETCH_XREG_U32/IEM_MC_FETCH_XREG_R32. */
8850DECL_INLINE_THROW(uint32_t)
8851iemNativeEmitSimdFetchXregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iDWord)
8852{
8853 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8854 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
8855
8856 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8857 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
8858
8859 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8860 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8861
8862 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
8863
8864 /* Free but don't flush the source register. */
8865 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8866 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8867
8868 return off;
8869}
8870
8871
8872#define IEM_MC_FETCH_XREG_U16(a_u64Value, a_iXReg, a_iWord) \
8873 off = iemNativeEmitSimdFetchXregU16(pReNative, off, a_u64Value, a_iXReg, a_iWord)
8874
8875/** Emits code for IEM_MC_FETCH_XREG_U16. */
8876DECL_INLINE_THROW(uint32_t)
8877iemNativeEmitSimdFetchXregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iWord)
8878{
8879 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8880 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
8881
8882 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8883 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
8884
8885 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8886 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8887
8888 off = iemNativeEmitSimdLoadGprFromVecRegU16(pReNative, off, idxVarReg, idxSimdRegSrc, iWord);
8889
8890 /* Free but don't flush the source register. */
8891 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8892 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8893
8894 return off;
8895}
8896
8897
8898#define IEM_MC_FETCH_XREG_U8(a_u64Value, a_iXReg, a_iByte) \
8899 off = iemNativeEmitSimdFetchXregU8(pReNative, off, a_u64Value, a_iXReg, a_iByte)
8900
8901/** Emits code for IEM_MC_FETCH_XREG_U8. */
8902DECL_INLINE_THROW(uint32_t)
8903iemNativeEmitSimdFetchXregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iByte)
8904{
8905 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8906 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint8_t));
8907
8908 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8909 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
8910
8911 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8912 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8913
8914 off = iemNativeEmitSimdLoadGprFromVecRegU8(pReNative, off, idxVarReg, idxSimdRegSrc, iByte);
8915
8916 /* Free but don't flush the source register. */
8917 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8918 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8919
8920 return off;
8921}
8922
8923
8924#define IEM_MC_STORE_XREG_U128(a_iXReg, a_u128Value) \
8925 off = iemNativeEmitSimdStoreXregU128(pReNative, off, a_iXReg, a_u128Value)
8926
8927AssertCompileSize(X86XMMREG, sizeof(RTUINT128U));
8928#define IEM_MC_STORE_XREG_XMM(a_iXReg, a_XmmValue) \
8929 off = iemNativeEmitSimdStoreXregU128(pReNative, off, a_iXReg, a_XmmValue)
8930
8931
8932/** Emits code for IEM_MC_STORE_XREG_U128/IEM_MC_STORE_XREG_XMM. */
8933DECL_INLINE_THROW(uint32_t)
8934iemNativeEmitSimdStoreXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
8935{
8936 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8937 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
8938
8939 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8940 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForFullWrite);
8941 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitialized*/);
8942
8943 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
8944
8945 /* Free but don't flush the source register. */
8946 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8947 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
8948
8949 return off;
8950}
8951
8952
8953#define IEM_MC_STORE_XREG_U64(a_iXReg, a_iQWord, a_u64Value) \
8954 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u64Value, sizeof(uint64_t), a_iQWord)
8955
8956#define IEM_MC_STORE_XREG_U32(a_iXReg, a_iDWord, a_u32Value) \
8957 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint32_t), a_iDWord)
8958
8959#define IEM_MC_STORE_XREG_U16(a_iXReg, a_iWord, a_u32Value) \
8960 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint16_t), a_iWord)
8961
8962#define IEM_MC_STORE_XREG_U8(a_iXReg, a_iByte, a_u32Value) \
8963 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint8_t), a_iByte)
8964
8965#define IEM_MC_STORE_XREG_R32(a_iXReg, a_r32Value) \
8966 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_r32Value, sizeof(RTFLOAT32U), 0 /*iElem*/)
8967
8968#define IEM_MC_STORE_XREG_R64(a_iXReg, a_r64Value) \
8969 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_r64Value, sizeof(RTFLOAT64U), 0 /*iElem*/)
8970
8971/** Emits code for IEM_MC_STORE_XREG_U64/IEM_MC_STORE_XREG_U32/IEM_MC_STORE_XREG_U16/IEM_MC_STORE_XREG_U8. */
8972DECL_INLINE_THROW(uint32_t)
8973iemNativeEmitSimdStoreXregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar,
8974 uint8_t cbLocal, uint8_t iElem)
8975{
8976 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8977 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbLocal);
8978
8979#ifdef VBOX_STRICT
8980 switch (cbLocal)
8981 {
8982 case sizeof(uint64_t): Assert(iElem < 2); break;
8983 case sizeof(uint32_t): Assert(iElem < 4); break;
8984 case sizeof(uint16_t): Assert(iElem < 8); break;
8985 case sizeof(uint8_t): Assert(iElem < 16); break;
8986 default: AssertFailed();
8987 }
8988#endif
8989
8990 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8991 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
8992 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
8993
8994 switch (cbLocal)
8995 {
8996 case sizeof(uint64_t): off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
8997 case sizeof(uint32_t): off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
8998 case sizeof(uint16_t): off = iemNativeEmitSimdStoreGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
8999 case sizeof(uint8_t): off = iemNativeEmitSimdStoreGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
9000 default: AssertFailed();
9001 }
9002
9003 /* Free but don't flush the source register. */
9004 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9005 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9006
9007 return off;
9008}
9009
9010
9011#define IEM_MC_STORE_XREG_U64_ZX_U128(a_iXReg, a_u64Value) \
9012 off = iemNativeEmitSimdStoreXregU64ZxU128(pReNative, off, a_iXReg, a_u64Value)
9013
9014/** Emits code for IEM_MC_STORE_XREG_U64_ZX_U128. */
9015DECL_INLINE_THROW(uint32_t)
9016iemNativeEmitSimdStoreXregU64ZxU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
9017{
9018 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9019 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
9020
9021 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9022 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
9023 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
9024
9025 /* Zero the vector register first, then store the 64-bit value to the lower 64-bit. */
9026 off = iemNativeEmitSimdZeroVecRegLowU128(pReNative, off, idxSimdRegDst);
9027 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0);
9028
9029 /* Free but don't flush the source register. */
9030 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9031 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9032
9033 return off;
9034}
9035
9036
9037#define IEM_MC_STORE_XREG_U32_ZX_U128(a_iXReg, a_u32Value) \
9038 off = iemNativeEmitSimdStoreXregU32ZxU128(pReNative, off, a_iXReg, a_u32Value)
9039
9040/** Emits code for IEM_MC_STORE_XREG_U32_ZX_U128. */
9041DECL_INLINE_THROW(uint32_t)
9042iemNativeEmitSimdStoreXregU32ZxU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
9043{
9044 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9045 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
9046
9047 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9048 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
9049 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
9050
9051 /* Zero the vector register first, then store the 32-bit value to the lowest 32-bit element. */
9052 off = iemNativeEmitSimdZeroVecRegLowU128(pReNative, off, idxSimdRegDst);
9053 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, 0);
9054
9055 /* Free but don't flush the source register. */
9056 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9057 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9058
9059 return off;
9060}
9061
9062
9063#define IEM_MC_STORE_XREG_U32_U128(a_iXReg, a_iDwDst, a_u128Value, a_iDwSrc) \
9064 off = iemNativeEmitSimdStoreXregU32U128(pReNative, off, a_iXReg, a_iDwDst, a_u128Value, a_iDwSrc)
9065
9066/** Emits code for IEM_MC_STORE_XREG_U32_U128. */
9067DECL_INLINE_THROW(uint32_t)
9068iemNativeEmitSimdStoreXregU32U128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t iDwDst,
9069 uint8_t idxSrcVar, uint8_t iDwSrc)
9070{
9071 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9072 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
9073
9074 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9075 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
9076 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitialized*/);
9077
9078 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, idxVarReg, iDwSrc);
9079 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, IEMNATIVE_REG_FIXED_TMP0, iDwDst);
9080
9081 /* Free but don't flush the destination register. */
9082 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9083 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9084
9085 return off;
9086}
9087
9088
9089#define IEM_MC_COPY_YREG_U128_ZX_VLMAX(a_iYRegDst, a_iYRegSrc) \
9090 off = iemNativeEmitSimdCopyYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrc)
9091
9092/** Emits code for IEM_MC_COPY_YREG_U128_ZX_VLMAX. */
9093DECL_INLINE_THROW(uint32_t)
9094iemNativeEmitSimdCopyYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrc)
9095{
9096 /*
9097 * The iYRegSrc == iYRegDst case needs to be treated differently here, because if iYRegDst gets allocated first for the full write
9098 * it won't load the actual value from CPUMCTX. When allocating iYRegSrc afterwards it will get duplicated from the already
9099 * allocated host register for iYRegDst containing garbage. This will be catched by the guest register value checking in debug builds.
9100 */
9101 if (iYRegDst != iYRegSrc)
9102 {
9103 /* Allocate destination and source register. */
9104 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9105 kIemNativeGstSimdRegLdStSz_256,
9106 kIemNativeGstRegUse_ForFullWrite);
9107 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
9108 kIemNativeGstSimdRegLdStSz_Low128,
9109 kIemNativeGstRegUse_ReadOnly);
9110
9111 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
9112 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9113
9114 /* Free but don't flush the source and destination register. */
9115 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9116 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9117 }
9118 else
9119 {
9120 /* This effectively only clears the upper 128-bits of the register. */
9121 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9122 kIemNativeGstSimdRegLdStSz_High128, kIemNativeGstRegUse_ForFullWrite);
9123
9124 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
9125
9126 /* Free but don't flush the destination register. */
9127 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
9128 }
9129
9130 return off;
9131}
9132
9133
9134#define IEM_MC_COPY_YREG_U256_ZX_VLMAX(a_iYRegDst, a_iYRegSrc) \
9135 off = iemNativeEmitSimdCopyYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrc)
9136
9137/** Emits code for IEM_MC_COPY_YREG_U256_ZX_VLMAX. */
9138DECL_INLINE_THROW(uint32_t)
9139iemNativeEmitSimdCopyYregU256ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrc)
9140{
9141 /*
9142 * The iYRegSrc == iYRegDst case needs to be treated differently here, because if iYRegDst gets allocated first for the full write
9143 * it won't load the actual value from CPUMCTX. When allocating iYRegSrc afterwards it will get duplicated from the already
9144 * allocated host register for iYRegDst containing garbage. This will be catched by the guest register value checking in debug builds.
9145 * iYRegSrc == iYRegDst would effectively only clear any upper 256-bits for a zmm register we don't support yet, so this is just a nop.
9146 */
9147 if (iYRegDst != iYRegSrc)
9148 {
9149 /* Allocate destination and source register. */
9150 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
9151 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ReadOnly);
9152 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9153 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9154
9155 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
9156
9157 /* Free but don't flush the source and destination register. */
9158 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9159 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9160 }
9161
9162 return off;
9163}
9164
9165
9166#define IEM_MC_FETCH_YREG_U128(a_u128Dst, a_iYRegSrc, a_iDQWord) \
9167 off = iemNativeEmitSimdFetchYregU128(pReNative, off, a_u128Dst, a_iYRegSrc, a_iDQWord)
9168
9169/** Emits code for IEM_MC_FETCH_YREG_U128. */
9170DECL_INLINE_THROW(uint32_t)
9171iemNativeEmitSimdFetchYregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iDQWord)
9172{
9173 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9174 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
9175
9176 Assert(iDQWord <= 1);
9177 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9178 iDQWord == 1
9179 ? kIemNativeGstSimdRegLdStSz_High128
9180 : kIemNativeGstSimdRegLdStSz_Low128,
9181 kIemNativeGstRegUse_ReadOnly);
9182
9183 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9184 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
9185
9186 if (iDQWord == 1)
9187 off = iemNativeEmitSimdLoadVecRegLowU128FromVecRegHighU128(pReNative, off, idxVarReg, idxSimdRegSrc);
9188 else
9189 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxVarReg, idxSimdRegSrc);
9190
9191 /* Free but don't flush the source register. */
9192 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9193 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
9194
9195 return off;
9196}
9197
9198
9199#define IEM_MC_FETCH_YREG_U64(a_u64Dst, a_iYRegSrc, a_iQWord) \
9200 off = iemNativeEmitSimdFetchYregU64(pReNative, off, a_u64Dst, a_iYRegSrc, a_iQWord)
9201
9202/** Emits code for IEM_MC_FETCH_YREG_U64. */
9203DECL_INLINE_THROW(uint32_t)
9204iemNativeEmitSimdFetchYregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iQWord)
9205{
9206 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9207 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
9208
9209 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9210 iQWord >= 2
9211 ? kIemNativeGstSimdRegLdStSz_High128
9212 : kIemNativeGstSimdRegLdStSz_Low128,
9213 kIemNativeGstRegUse_ReadOnly);
9214
9215 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9216 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9217
9218 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
9219
9220 /* Free but don't flush the source register. */
9221 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9222 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9223
9224 return off;
9225}
9226
9227
9228#define IEM_MC_FETCH_YREG_U32(a_u32Dst, a_iYRegSrc) \
9229 off = iemNativeEmitSimdFetchYregU32(pReNative, off, a_u32Dst, a_iYRegSrc, 0)
9230
9231/** Emits code for IEM_MC_FETCH_YREG_U32. */
9232DECL_INLINE_THROW(uint32_t)
9233iemNativeEmitSimdFetchYregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iDWord)
9234{
9235 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9236 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
9237
9238 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9239 iDWord >= 4
9240 ? kIemNativeGstSimdRegLdStSz_High128
9241 : kIemNativeGstSimdRegLdStSz_Low128,
9242 kIemNativeGstRegUse_ReadOnly);
9243
9244 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9245 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9246
9247 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
9248
9249 /* Free but don't flush the source register. */
9250 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9251 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9252
9253 return off;
9254}
9255
9256
9257#define IEM_MC_CLEAR_YREG_128_UP(a_iYReg) \
9258 off = iemNativeEmitSimdClearYregHighU128(pReNative, off, a_iYReg)
9259
9260/** Emits code for IEM_MC_CLEAR_YREG_128_UP. */
9261DECL_INLINE_THROW(uint32_t)
9262iemNativeEmitSimdClearYregHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg)
9263{
9264 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9265 kIemNativeGstSimdRegLdStSz_High128, kIemNativeGstRegUse_ForFullWrite);
9266
9267 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
9268
9269 /* Free but don't flush the register. */
9270 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
9271
9272 return off;
9273}
9274
9275
9276#define IEM_MC_STORE_YREG_U128(a_iYRegDst, a_iDQword, a_u128Value) \
9277 off = iemNativeEmitSimdStoreYregU128(pReNative, off, a_iYRegDst, a_iDQword, a_u128Value)
9278
9279/** Emits code for IEM_MC_STORE_YREG_U128. */
9280DECL_INLINE_THROW(uint32_t)
9281iemNativeEmitSimdStoreYregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t iDQword, uint8_t idxSrcVar)
9282{
9283 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9284 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
9285
9286 Assert(iDQword <= 1);
9287 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9288 iDQword == 0
9289 ? kIemNativeGstSimdRegLdStSz_Low128
9290 : kIemNativeGstSimdRegLdStSz_High128,
9291 kIemNativeGstRegUse_ForFullWrite);
9292
9293 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
9294
9295 if (iDQword == 0)
9296 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
9297 else
9298 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128(pReNative, off, idxSimdRegDst, idxVarReg);
9299
9300 /* Free but don't flush the source register. */
9301 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9302 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9303
9304 return off;
9305}
9306
9307
9308#define IEM_MC_STORE_YREG_U128_ZX_VLMAX(a_iYRegDst, a_u128Src) \
9309 off = iemNativeEmitSimdStoreYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_u128Src)
9310
9311/** Emits code for IEM_MC_STORE_YREG_U128_ZX_VLMAX. */
9312DECL_INLINE_THROW(uint32_t)
9313iemNativeEmitSimdStoreYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9314{
9315 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9316 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
9317
9318 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9319 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9320
9321 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
9322
9323 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
9324 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9325
9326 /* Free but don't flush the source register. */
9327 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9328 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9329
9330 return off;
9331}
9332
9333
9334#define IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX(a_iXRegDst, a_u8Src) \
9335 off = iemNativeEmitSimdBroadcastXregU8ZxVlmax(pReNative, off, a_iXRegDst, a_u8Src)
9336
9337/** Emits code for IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX. */
9338DECL_INLINE_THROW(uint32_t)
9339iemNativeEmitSimdBroadcastXregU8ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
9340{
9341 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9342 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint8_t));
9343
9344 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9345 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9346
9347 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9348
9349 off = iemNativeEmitSimdBroadcastGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
9350 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9351
9352 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9353 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9354
9355 return off;
9356}
9357
9358
9359#define IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX(a_iXRegDst, a_u16Src) \
9360 off = iemNativeEmitSimdBroadcastXregU16ZxVlmax(pReNative, off, a_iXRegDst, a_u16Src)
9361
9362/** Emits code for IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX. */
9363DECL_INLINE_THROW(uint32_t)
9364iemNativeEmitSimdBroadcastXregU16ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
9365{
9366 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9367 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint16_t));
9368
9369 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9370 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9371
9372 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9373
9374 off = iemNativeEmitSimdBroadcastGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
9375 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9376
9377 /* Free but don't flush the source register. */
9378 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9379 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9380
9381 return off;
9382}
9383
9384
9385#define IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX(a_iXRegDst, a_u32Src) \
9386 off = iemNativeEmitSimdBroadcastXregU32ZxVlmax(pReNative, off, a_iXRegDst, a_u32Src)
9387
9388/** Emits code for IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX. */
9389DECL_INLINE_THROW(uint32_t)
9390iemNativeEmitSimdBroadcastXregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
9391{
9392 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9393 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
9394
9395 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9396 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9397
9398 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9399
9400 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
9401 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9402
9403 /* Free but don't flush the source register. */
9404 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9405 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9406
9407 return off;
9408}
9409
9410
9411#define IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX(a_iXRegDst, a_u64Src) \
9412 off = iemNativeEmitSimdBroadcastXregU64ZxVlmax(pReNative, off, a_iXRegDst, a_u64Src)
9413
9414/** Emits code for IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX. */
9415DECL_INLINE_THROW(uint32_t)
9416iemNativeEmitSimdBroadcastXregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
9417{
9418 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9419 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
9420
9421 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9422 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9423
9424 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9425
9426 off = iemNativeEmitSimdBroadcastGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
9427 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9428
9429 /* Free but don't flush the source register. */
9430 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9431 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9432
9433 return off;
9434}
9435
9436
9437#define IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX(a_iYRegDst, a_u8Src) \
9438 off = iemNativeEmitSimdBroadcastYregU8ZxVlmax(pReNative, off, a_iYRegDst, a_u8Src)
9439
9440/** Emits code for IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX. */
9441DECL_INLINE_THROW(uint32_t)
9442iemNativeEmitSimdBroadcastYregU8ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9443{
9444 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9445 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint8_t));
9446
9447 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9448 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9449
9450 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9451
9452 off = iemNativeEmitSimdBroadcastGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
9453
9454 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9455 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9456
9457 return off;
9458}
9459
9460
9461#define IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX(a_iYRegDst, a_u16Src) \
9462 off = iemNativeEmitSimdBroadcastYregU16ZxVlmax(pReNative, off, a_iYRegDst, a_u16Src)
9463
9464/** Emits code for IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX. */
9465DECL_INLINE_THROW(uint32_t)
9466iemNativeEmitSimdBroadcastYregU16ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9467{
9468 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9469 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint16_t));
9470
9471 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9472 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9473
9474 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9475
9476 off = iemNativeEmitSimdBroadcastGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
9477
9478 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9479 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9480
9481 return off;
9482}
9483
9484
9485#define IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX(a_iYRegDst, a_u32Src) \
9486 off = iemNativeEmitSimdBroadcastYregU32ZxVlmax(pReNative, off, a_iYRegDst, a_u32Src)
9487
9488/** Emits code for IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX. */
9489DECL_INLINE_THROW(uint32_t)
9490iemNativeEmitSimdBroadcastYregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9491{
9492 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9493 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
9494
9495 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9496 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9497
9498 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9499
9500 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
9501
9502 /* Free but don't flush the source register. */
9503 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9504 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9505
9506 return off;
9507}
9508
9509
9510#define IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX(a_iYRegDst, a_u64Src) \
9511 off = iemNativeEmitSimdBroadcastYregU64ZxVlmax(pReNative, off, a_iYRegDst, a_u64Src)
9512
9513/** Emits code for IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX. */
9514DECL_INLINE_THROW(uint32_t)
9515iemNativeEmitSimdBroadcastYregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9516{
9517 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9518 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
9519
9520 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9521 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9522
9523 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9524
9525 off = iemNativeEmitSimdBroadcastGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
9526
9527 /* Free but don't flush the source register. */
9528 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9529 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9530
9531 return off;
9532}
9533
9534
9535#define IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX(a_iYRegDst, a_u128Src) \
9536 off = iemNativeEmitSimdBroadcastYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_u128Src)
9537
9538/** Emits code for IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX. */
9539DECL_INLINE_THROW(uint32_t)
9540iemNativeEmitSimdBroadcastYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9541{
9542 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9543 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
9544
9545 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9546 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9547
9548 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
9549
9550 off = iemNativeEmitSimdBroadcastVecRegU128ToVecReg(pReNative, off, idxSimdRegDst, idxVarReg);
9551
9552 /* Free but don't flush the source register. */
9553 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9554 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9555
9556 return off;
9557}
9558
9559
9560#define IEM_MC_STORE_YREG_U32_ZX_VLMAX(a_iYRegDst, a_u32Src) \
9561 off = iemNativeEmitSimdStoreYregU32ZxVlmax(pReNative, off, a_iYRegDst, a_u32Src)
9562
9563/** Emits code for IEM_MC_STORE_YREG_U32_ZX_VLMAX. */
9564DECL_INLINE_THROW(uint32_t)
9565iemNativeEmitSimdStoreYregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9566{
9567 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9568 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
9569
9570 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9571 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9572
9573 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9574
9575 off = iemNativeEmitSimdZeroVecRegU256(pReNative, off, idxSimdRegDst);
9576 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iDWord*/);
9577
9578 /* Free but don't flush the source register. */
9579 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9580 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9581
9582 return off;
9583}
9584
9585
9586#define IEM_MC_STORE_YREG_U64_ZX_VLMAX(a_iYRegDst, a_u64Src) \
9587 off = iemNativeEmitSimdStoreYregU64ZxVlmax(pReNative, off, a_iYRegDst, a_u64Src)
9588
9589/** Emits code for IEM_MC_STORE_YREG_U64_ZX_VLMAX. */
9590DECL_INLINE_THROW(uint32_t)
9591iemNativeEmitSimdStoreYregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9592{
9593 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9594 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
9595
9596 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9597 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9598
9599 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9600
9601 off = iemNativeEmitSimdZeroVecRegU256(pReNative, off, idxSimdRegDst);
9602 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iQWord*/);
9603
9604 /* Free but don't flush the source register. */
9605 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9606 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9607
9608 return off;
9609}
9610
9611
9612#define IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX(a_iYRegDst, a_u64Local, a_iYRegSrcHx) \
9613 off = iemNativeEmitSimdMergeYregU64LocalU64HiZxVlmax(pReNative, off, a_iYRegDst, a_u64Local, a_iYRegSrcHx)
9614
9615/** Emits code for IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX. */
9616DECL_INLINE_THROW(uint32_t)
9617iemNativeEmitSimdMergeYregU64LocalU64HiZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar, uint8_t iYRegSrcHx)
9618{
9619 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9620 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
9621
9622 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9623 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9624 uint8_t const idxSimdRegSrcHx = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrcHx),
9625 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
9626 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9627
9628 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrcHx);
9629 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iQWord*/);
9630 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9631
9632 /* Free but don't flush the source and destination registers. */
9633 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrcHx);
9634 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9635 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9636
9637 return off;
9638}
9639
9640
9641#define IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX(a_iYRegDst, a_iYRegSrcHx, a_u64Local) \
9642 off = iemNativeEmitSimdMergeYregU64LoU64LocalZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrcHx, a_u64Local)
9643
9644/** Emits code for IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX. */
9645DECL_INLINE_THROW(uint32_t)
9646iemNativeEmitSimdMergeYregU64LoU64LocalZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrcHx, uint8_t idxSrcVar)
9647{
9648 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9649 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
9650
9651 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9652 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9653 uint8_t const idxSimdRegSrcHx = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrcHx),
9654 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
9655 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9656
9657 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrcHx);
9658 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 1 /*iQWord*/);
9659 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9660
9661 /* Free but don't flush the source and destination registers. */
9662 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrcHx);
9663 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9664 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9665
9666 return off;
9667}
9668
9669
9670#define IEM_MC_CLEAR_XREG_U32_MASK(a_iXReg, a_bMask) \
9671 off = iemNativeEmitSimdClearXregU32Mask(pReNative, off, a_iXReg, a_bMask)
9672
9673
9674/** Emits code for IEM_MC_CLEAR_XREG_U32_MASK. */
9675DECL_INLINE_THROW(uint32_t)
9676iemNativeEmitSimdClearXregU32Mask(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t bImm8Mask)
9677{
9678 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9679 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
9680
9681 /** @todo r=aeichner For certain bit combinations we could reduce the number of emitted instructions. */
9682 if (bImm8Mask & RT_BIT(0))
9683 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 0 /*iDWord*/);
9684 if (bImm8Mask & RT_BIT(1))
9685 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 1 /*iDWord*/);
9686 if (bImm8Mask & RT_BIT(2))
9687 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 2 /*iDWord*/);
9688 if (bImm8Mask & RT_BIT(3))
9689 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 3 /*iDWord*/);
9690
9691 /* Free but don't flush the destination register. */
9692 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9693
9694 return off;
9695}
9696
9697
9698#define IEM_MC_FETCH_YREG_U256(a_u256Dst, a_iYRegSrc) \
9699 off = iemNativeEmitSimdFetchYregU256(pReNative, off, a_u256Dst, a_iYRegSrc)
9700
9701
9702/** Emits code for IEM_MC_FETCH_YREG_U256. */
9703DECL_INLINE_THROW(uint32_t)
9704iemNativeEmitSimdFetchYregU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYRegSrc)
9705{
9706 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9707 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT256U));
9708
9709 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
9710 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ReadOnly);
9711 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
9712
9713 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxVarReg, idxSimdRegSrc);
9714
9715 /* Free but don't flush the source register. */
9716 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9717 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
9718
9719 return off;
9720}
9721
9722
9723#define IEM_MC_STORE_YREG_U256_ZX_VLMAX(a_iYRegDst, a_u256Src) \
9724 off = iemNativeEmitSimdStoreYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_u256Src)
9725
9726
9727/** Emits code for IEM_MC_STORE_YREG_U256_ZX_VLMAX. */
9728DECL_INLINE_THROW(uint32_t)
9729iemNativeEmitSimdStoreYregU256ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar)
9730{
9731 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9732 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
9733
9734 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9735 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
9736 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
9737
9738 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxSimdRegDst, idxVarRegSrc);
9739
9740 /* Free but don't flush the source register. */
9741 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9742 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9743
9744 return off;
9745}
9746
9747
9748#define IEM_MC_STORE_YREG_U32_U256(a_iYRegDst, a_iDwDst, a_u256Value, a_iDwSrc) \
9749 off = iemNativeEmitSimdStoreYregU32FromU256(pReNative, off, a_iYRegDst, a_iDwDst, a_u256Value, a_iDwSrc)
9750
9751
9752/** Emits code for IEM_MC_STORE_YREG_U32_U256. */
9753DECL_INLINE_THROW(uint32_t)
9754iemNativeEmitSimdStoreYregU32FromU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iDwDst,
9755 uint8_t idxSrcVar, uint8_t iDwSrc)
9756{
9757 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9758 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
9759
9760 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9761 iDwDst < 4
9762 ? kIemNativeGstSimdRegLdStSz_Low128
9763 : kIemNativeGstSimdRegLdStSz_High128,
9764 kIemNativeGstRegUse_ForUpdate);
9765 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
9766 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
9767
9768 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxRegTmp, idxVarRegSrc, iDwSrc);
9769 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxRegTmp, iDwDst);
9770
9771 /* Free but don't flush the source register. */
9772 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9773 iemNativeRegFreeTmp(pReNative, idxRegTmp);
9774 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9775
9776 return off;
9777}
9778
9779
9780#define IEM_MC_STORE_YREG_U64_U256(a_iYRegDst, a_iQwDst, a_u256Value, a_iQwSrc) \
9781 off = iemNativeEmitSimdStoreYregU64FromU256(pReNative, off, a_iYRegDst, a_iQwDst, a_u256Value, a_iQwSrc)
9782
9783
9784/** Emits code for IEM_MC_STORE_YREG_U64_U256. */
9785DECL_INLINE_THROW(uint32_t)
9786iemNativeEmitSimdStoreYregU64FromU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iQwDst,
9787 uint8_t idxSrcVar, uint8_t iQwSrc)
9788{
9789 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9790 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
9791
9792 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9793 iQwDst < 2
9794 ? kIemNativeGstSimdRegLdStSz_Low128
9795 : kIemNativeGstSimdRegLdStSz_High128,
9796 kIemNativeGstRegUse_ForUpdate);
9797 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
9798 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
9799
9800 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxRegTmp, idxVarRegSrc, iQwSrc);
9801 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxRegTmp, iQwDst);
9802
9803 /* Free but don't flush the source register. */
9804 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9805 iemNativeRegFreeTmp(pReNative, idxRegTmp);
9806 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9807
9808 return off;
9809}
9810
9811
9812#define IEM_MC_STORE_YREG_U64(a_iYRegDst, a_iQword, a_u64Value) \
9813 off = iemNativeEmitSimdStoreYregU64(pReNative, off, a_iYRegDst, a_iQword, a_u64Value)
9814
9815
9816/** Emits code for IEM_MC_STORE_YREG_U64. */
9817DECL_INLINE_THROW(uint32_t)
9818iemNativeEmitSimdStoreYregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iQwDst, uint8_t idxSrcVar)
9819{
9820 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9821 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
9822
9823 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9824 iQwDst < 2
9825 ? kIemNativeGstSimdRegLdStSz_Low128
9826 : kIemNativeGstSimdRegLdStSz_High128,
9827 kIemNativeGstRegUse_ForUpdate);
9828
9829 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9830
9831 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, iQwDst);
9832
9833 /* Free but don't flush the source register. */
9834 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9835 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9836
9837 return off;
9838}
9839
9840
9841#define IEM_MC_CLEAR_ZREG_256_UP(a_iYReg) \
9842 off = iemNativeEmitSimdClearZregU256Vlmax(pReNative, off, a_iYReg)
9843
9844/** Emits code for IEM_MC_CLEAR_ZREG_256_UP. */
9845DECL_INLINE_THROW(uint32_t)
9846iemNativeEmitSimdClearZregU256Vlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg)
9847{
9848 RT_NOREF(pReNative, iYReg);
9849 /** @todo Needs to be implemented when support for AVX-512 is added. */
9850 return off;
9851}
9852
9853
9854
9855/*********************************************************************************************************************************
9856* Emitters for IEM_MC_CALL_SSE_AIMPL_XXX *
9857*********************************************************************************************************************************/
9858
9859/**
9860 * Common worker for IEM_MC_CALL_SSE_AIMPL_XXX/IEM_MC_CALL_AVX_AIMPL_XXX.
9861 */
9862DECL_INLINE_THROW(uint32_t)
9863iemNativeEmitCallSseAvxAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t cArgs)
9864{
9865 /* Grab the MXCSR register, it must not be call volatile or we end up freeing it when setting up the call below. */
9866 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr,
9867 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
9868 AssertRelease(!(RT_BIT_32(idxRegMxCsr) & IEMNATIVE_CALL_VOLATILE_GREG_MASK));
9869
9870 /*
9871 * Need to do the FPU preparation.
9872 */
9873 off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/);
9874
9875 /*
9876 * Do all the call setup and cleanup.
9877 */
9878 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_SSE_AIMPL_HIDDEN_ARGS, IEM_SSE_AIMPL_HIDDEN_ARGS, false /*fFlushPendingWrites*/);
9879
9880 /*
9881 * Load the MXCSR register into the first argument and mask out the current exception flags.
9882 */
9883 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, idxRegMxCsr);
9884 off = iemNativeEmitAndGpr32ByImm(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, ~X86_MXCSR_XCPT_FLAGS);
9885
9886 /*
9887 * Make the call.
9888 */
9889 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
9890
9891 /*
9892 * The updated MXCSR is in the return register.
9893 */
9894 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegMxCsr, IEMNATIVE_CALL_RET_GREG);
9895
9896#ifndef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
9897 /* Writeback the MXCSR register value (there is no delayed writeback for such registers at the moment). */
9898 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxRegMxCsr, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.XState.x87.MXCSR));
9899#endif
9900 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
9901
9902 return off;
9903}
9904
9905
9906#define IEM_MC_CALL_SSE_AIMPL_2(a_pfnAImpl, a0, a1) \
9907 off = iemNativeEmitCallSseAImpl2(pReNative, off, (uintptr_t)(a_pfnAImpl), (a0), (a1))
9908
9909/** Emits code for IEM_MC_CALL_SSE_AIMPL_2. */
9910DECL_INLINE_THROW(uint32_t)
9911iemNativeEmitCallSseAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
9912{
9913 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_SSE_AIMPL_HIDDEN_ARGS);
9914 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_SSE_AIMPL_HIDDEN_ARGS);
9915 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 2);
9916}
9917
9918
9919#define IEM_MC_CALL_SSE_AIMPL_3(a_pfnAImpl, a0, a1, a2) \
9920 off = iemNativeEmitCallSseAImpl3(pReNative, off, (uintptr_t)(a_pfnAImpl), (a0), (a1), (a2))
9921
9922/** Emits code for IEM_MC_CALL_SSE_AIMPL_3. */
9923DECL_INLINE_THROW(uint32_t)
9924iemNativeEmitCallSseAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
9925{
9926 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_SSE_AIMPL_HIDDEN_ARGS);
9927 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_SSE_AIMPL_HIDDEN_ARGS);
9928 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_SSE_AIMPL_HIDDEN_ARGS);
9929 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 3);
9930}
9931
9932
9933/*********************************************************************************************************************************
9934* Emitters for IEM_MC_CALL_AVX_AIMPL_XXX *
9935*********************************************************************************************************************************/
9936
9937#define IEM_MC_CALL_AVX_AIMPL_2(a_pfnAImpl, a0, a1) \
9938 off = iemNativeEmitCallAvxAImpl2(pReNative, off, (uintptr_t)(a_pfnAImpl), (a0), (a1))
9939
9940/** Emits code for IEM_MC_CALL_AVX_AIMPL_2. */
9941DECL_INLINE_THROW(uint32_t)
9942iemNativeEmitCallAvxAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
9943{
9944 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_AVX_AIMPL_HIDDEN_ARGS);
9945 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_AVX_AIMPL_HIDDEN_ARGS);
9946 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 2);
9947}
9948
9949
9950#define IEM_MC_CALL_AVX_AIMPL_3(a_pfnAImpl, a0, a1, a2) \
9951 off = iemNativeEmitCallAvxAImpl3(pReNative, off, (uintptr_t)(a_pfnAImpl), (a0), (a1), (a2))
9952
9953/** Emits code for IEM_MC_CALL_AVX_AIMPL_3. */
9954DECL_INLINE_THROW(uint32_t)
9955iemNativeEmitCallAvxAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
9956{
9957 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_AVX_AIMPL_HIDDEN_ARGS);
9958 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_AVX_AIMPL_HIDDEN_ARGS);
9959 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_AVX_AIMPL_HIDDEN_ARGS);
9960 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 3);
9961}
9962#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
9963
9964
9965/*********************************************************************************************************************************
9966* Include instruction emitters. *
9967*********************************************************************************************************************************/
9968#include "target-x86/IEMAllN8veEmit-x86.h"
9969
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette