VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompFuncs.h@ 105834

Last change on this file since 105834 was 105818, checked in by vboxsync, 3 months ago

VMM/IEM: Record whether any of the branches of an IEM_MC_IF_XXX/IEM_MC_ELSE/IEM_MC_ENDIF block exits the TB and make use of that to skip the recompiler state consolidation in the IEM_MC_ENDIF handler (iemNativeEmitEndIf). This is a preparatory step for addressing todo 4. bugref:10720 bugref:10373

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 509.9 KB
Line 
1/* $Id: IEMAllN8veRecompFuncs.h 105818 2024-08-22 14:19:39Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler - Inlined Bits.
4 */
5
6/*
7 * Copyright (C) 2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
33#define IEM_WITH_OPAQUE_DECODER_STATE
34#define VMCPU_INCL_CPUM_GST_CTX
35#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
36#define IEMNATIVE_INCL_TABLE_FUNCTION_PROTOTYPES
37#include <VBox/vmm/iem.h>
38#include <VBox/vmm/cpum.h>
39#include <VBox/vmm/dbgf.h>
40#include "IEMInternal.h"
41#include <VBox/vmm/vmcc.h>
42#include <VBox/log.h>
43#include <VBox/err.h>
44#include <VBox/dis.h>
45#include <VBox/param.h>
46#include <iprt/assert.h>
47#include <iprt/heap.h>
48#include <iprt/mem.h>
49#include <iprt/string.h>
50#if defined(RT_ARCH_AMD64)
51# include <iprt/x86.h>
52#elif defined(RT_ARCH_ARM64)
53# include <iprt/armv8.h>
54#endif
55
56#include "IEMInline.h"
57#include "IEMThreadedFunctions.h"
58#include "IEMN8veRecompiler.h"
59#include "IEMN8veRecompilerEmit.h"
60#include "IEMN8veRecompilerTlbLookup.h"
61#include "IEMNativeFunctions.h"
62
63
64/*
65 * Narrow down configs here to avoid wasting time on unused configs here.
66 * Note! Same checks in IEMAllThrdRecompiler.cpp.
67 */
68
69#ifndef IEM_WITH_CODE_TLB
70# error The code TLB must be enabled for the recompiler.
71#endif
72
73#ifndef IEM_WITH_DATA_TLB
74# error The data TLB must be enabled for the recompiler.
75#endif
76
77#ifndef IEM_WITH_SETJMP
78# error The setjmp approach must be enabled for the recompiler.
79#endif
80
81#if defined(IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS) && !defined(IEMNATIVE_WITH_SIMD_REG_ALLOCATOR)
82# error "IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS requires IEMNATIVE_WITH_SIMD_REG_ALLOCATOR"
83#endif
84
85
86/*********************************************************************************************************************************
87* Code emitters for flushing pending guest register writes and sanity checks *
88*********************************************************************************************************************************/
89
90#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
91# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
92DECL_INLINE_THROW(uint32_t) iemNativePcAdjustCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
93{
94 /* Compare the shadow with the context value, they should match. */
95 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, IEMNATIVE_REG_FIXED_PC_DBG);
96 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, pReNative->Core.offPc);
97 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, kIemNativeGstReg_Pc);
98 return off;
99}
100# endif
101#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
102
103/**
104 * Flushes delayed write of a specific guest register.
105 *
106 * This must be called prior to calling CImpl functions and any helpers that use
107 * the guest state (like raising exceptions) and such.
108 *
109 * This optimization has not yet been implemented. The first target would be
110 * RIP updates, since these are the most common ones.
111 */
112DECL_INLINE_THROW(uint32_t)
113iemNativeRegFlushPendingSpecificWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREGREF enmClass, uint8_t idxReg)
114{
115#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
116 /* If for whatever reason it is possible to reference the PC register at some point we need to do the writeback here first. */
117#endif
118
119#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
120#if 0 /** @todo r=aeichner EFLAGS writeback delay. */
121 if ( enmClass == kIemNativeGstRegRef_EFlags
122 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(kIemNativeGstReg_EFlags))
123 off = iemNativeRegFlushPendingWrite(pReNative, off, kIemNativeGstReg_EFlags);
124#else
125 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(kIemNativeGstReg_EFlags)));
126#endif
127
128 if ( enmClass == kIemNativeGstRegRef_Gpr
129 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxReg))
130 off = iemNativeRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTREG_GPR(idxReg));
131#endif
132
133#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
134 if ( enmClass == kIemNativeGstRegRef_XReg
135 && pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxReg))
136 {
137 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxReg));
138 /* Flush the shadows as the register needs to be reloaded (there is no guarantee right now, that the referenced register doesn't change). */
139 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxReg];
140
141 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off);
142 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxReg)));
143 }
144#endif
145 RT_NOREF(pReNative, enmClass, idxReg);
146 return off;
147}
148
149
150
151/*********************************************************************************************************************************
152* Emitters for IEM_MC_BEGIN_EX and IEM_MC_END. *
153*********************************************************************************************************************************/
154
155#undef IEM_MC_BEGIN /* unused */
156#define IEM_MC_BEGIN_EX(a_fMcFlags, a_fCImplFlags, a_cArgsIncludingHidden) \
157 { \
158 Assert(pReNative->Core.bmVars == 0); \
159 Assert(pReNative->Core.u64ArgVars == UINT64_MAX); \
160 Assert(pReNative->Core.bmStack == 0); \
161 pReNative->fMc = (a_fMcFlags); \
162 pReNative->fCImpl = (a_fCImplFlags); \
163 pReNative->cArgsX = (a_cArgsIncludingHidden)
164
165/** We have to get to the end in recompilation mode, as otherwise we won't
166 * generate code for all the IEM_MC_IF_XXX branches. */
167#define IEM_MC_END() \
168 iemNativeVarFreeAll(pReNative); \
169 } return off
170
171
172
173/*********************************************************************************************************************************
174* Native Emitter Support. *
175*********************************************************************************************************************************/
176
177#define IEM_MC_NATIVE_IF(a_fSupportedHosts) if (RT_ARCH_VAL & (a_fSupportedHosts)) {
178
179#define IEM_MC_NATIVE_ELSE() } else {
180
181#define IEM_MC_NATIVE_ENDIF() } ((void)0)
182
183
184#define IEM_MC_NATIVE_EMIT_0(a_fnEmitter) \
185 off = a_fnEmitter(pReNative, off)
186
187#define IEM_MC_NATIVE_EMIT_1(a_fnEmitter, a0) \
188 off = a_fnEmitter(pReNative, off, (a0))
189
190#define IEM_MC_NATIVE_EMIT_2(a_fnEmitter, a0, a1) \
191 off = a_fnEmitter(pReNative, off, (a0), (a1))
192
193#define IEM_MC_NATIVE_EMIT_2_EX(a_fnEmitter, a0, a1) \
194 off = a_fnEmitter(pReNative, off, pCallEntry->idxInstr, (a0), (a1))
195
196#define IEM_MC_NATIVE_EMIT_3(a_fnEmitter, a0, a1, a2) \
197 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2))
198
199#define IEM_MC_NATIVE_EMIT_4(a_fnEmitter, a0, a1, a2, a3) \
200 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3))
201
202#define IEM_MC_NATIVE_EMIT_5(a_fnEmitter, a0, a1, a2, a3, a4) \
203 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4))
204
205#define IEM_MC_NATIVE_EMIT_6(a_fnEmitter, a0, a1, a2, a3, a4, a5) \
206 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5))
207
208#define IEM_MC_NATIVE_EMIT_7(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6) \
209 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6))
210
211#define IEM_MC_NATIVE_EMIT_8(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6, a7) \
212 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6), (a7))
213
214
215#ifndef RT_ARCH_AMD64
216# define IEM_MC_NATIVE_SET_AMD64_HOST_REG_FOR_LOCAL(a_VarNm, a_idxHostReg) ((void)0)
217#else
218/** @note This is a naive approach that ASSUMES that the register isn't
219 * allocated, so it only works safely for the first allocation(s) in
220 * a MC block. */
221# define IEM_MC_NATIVE_SET_AMD64_HOST_REG_FOR_LOCAL(a_VarNm, a_idxHostReg) \
222 off = iemNativeVarSetAmd64HostRegisterForLocal(pReNative, off, a_VarNm, a_idxHostReg)
223
224DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off);
225
226DECL_INLINE_THROW(uint32_t)
227iemNativeVarSetAmd64HostRegisterForLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t idxHstReg)
228{
229 Log12(("iemNativeVarSetAmd64HostRegisterForLocal: idxVar=%#x idxHstReg=%s (%#x) off=%#x\n", idxVar, g_apszIemNativeHstRegNames[idxHstReg], idxHstReg, off));
230 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
231 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg))); /* iemNativeVarRegisterSet does a throw/longjmp on this */
232
233# ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
234 /* Must flush the register if it hold pending writes. */
235 if ( (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
236 && (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows) )
237 off = iemNativeRegFlushDirtyGuest(pReNative, off, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
238# endif
239
240 iemNativeVarRegisterSet(pReNative, idxVar, idxHstReg, off);
241 return off;
242}
243
244#endif /* RT_ARCH_AMD64 */
245
246
247
248/*********************************************************************************************************************************
249* Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX) *
250*********************************************************************************************************************************/
251
252#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl) \
253 pReNative->fMc = 0; \
254 pReNative->fCImpl = (a_fFlags); \
255 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, \
256 a_cbInstr) /** @todo not used ... */
257
258
259#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
260 pReNative->fMc = 0; \
261 pReNative->fCImpl = (a_fFlags); \
262 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
263
264DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off,
265 uint8_t idxInstr, uint64_t a_fGstShwFlush,
266 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
267{
268 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
269}
270
271
272#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
273 pReNative->fMc = 0; \
274 pReNative->fCImpl = (a_fFlags); \
275 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
276 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
277
278DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off,
279 uint8_t idxInstr, uint64_t a_fGstShwFlush,
280 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
281{
282 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
283}
284
285
286#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
287 pReNative->fMc = 0; \
288 pReNative->fCImpl = (a_fFlags); \
289 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
290 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
291
292DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off,
293 uint8_t idxInstr, uint64_t a_fGstShwFlush,
294 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
295 uint64_t uArg2)
296{
297 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
298}
299
300
301
302/*********************************************************************************************************************************
303* Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX) *
304*********************************************************************************************************************************/
305
306/** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
307 * and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
308DECL_INLINE_THROW(uint32_t)
309iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
310{
311 /*
312 * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
313 * return with special status code and make the execution loop deal with
314 * this. If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
315 * exception and won't continue execution. While CPUMCTX_DBG_DBGF_MASK
316 * could continue w/o interruption, it probably will drop into the
317 * debugger, so not worth the effort of trying to services it here and we
318 * just lump it in with the handling of the others.
319 *
320 * To simplify the code and the register state management even more (wrt
321 * immediate in AND operation), we always update the flags and skip the
322 * extra check associated conditional jump.
323 */
324 AssertCompile( (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
325 <= UINT32_MAX);
326#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
327 AssertMsg( pReNative->idxCurCall == 0
328 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1],
329 IEMLIVENESSBIT_IDX_EFL_OTHER)),
330 ("Efl_Other - %u\n", iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1],
331 IEMLIVENESSBIT_IDX_EFL_OTHER)));
332#endif
333
334 /*
335 * As this code can break out of the execution loop when jumping to the ReturnWithFlags label
336 * any pending register writes must be flushed.
337 */
338 off = iemNativeRegFlushPendingWrites(pReNative, off);
339
340 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
341 kIemNativeGstRegUse_ForUpdate, false /*fNoVolatileRegs*/,
342 true /*fSkipLivenessAssert*/);
343 off = iemNativeEmitTestAnyBitsInGprAndTbExitIfAnySet(pReNative, off, idxEflReg,
344 X86_EFL_TF | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK,
345 kIemNativeLabelType_ReturnWithFlags);
346 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
347 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
348
349 /* Free but don't flush the EFLAGS register. */
350 iemNativeRegFreeTmp(pReNative, idxEflReg);
351
352 return off;
353}
354
355
356/** Helper for iemNativeEmitFinishInstructionWithStatus. */
357DECLINLINE(RTGCPHYS) iemNativeCallEntryToGCPhysPc(PCIEMTB pTb, PCIEMTHRDEDCALLENTRY pCallEntry)
358{
359 unsigned const offOpcodes = pCallEntry->offOpcode;
360 unsigned const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
361 for (unsigned idxRange = 0; idxRange < cRanges; idxRange++)
362 {
363 unsigned const offRange = offOpcodes - (unsigned)pTb->aRanges[idxRange].offOpcodes;
364 if (offRange < (unsigned)pTb->aRanges[idxRange].cbOpcodes)
365 return iemTbGetRangePhysPageAddr(pTb, idxRange) + offRange + pTb->aRanges[idxRange].offPhysPage;
366 }
367 AssertFailedReturn(NIL_RTGCPHYS);
368}
369
370
371/** The VINF_SUCCESS dummy. */
372template<int const a_rcNormal, bool const a_fIsJump>
373DECL_FORCE_INLINE_THROW(uint32_t)
374iemNativeEmitFinishInstructionWithStatus(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry,
375 int32_t const offJump)
376{
377 AssertCompile(a_rcNormal == VINF_SUCCESS || a_rcNormal == VINF_IEM_REEXEC_BREAK);
378 if (a_rcNormal != VINF_SUCCESS)
379 {
380#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
381 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
382#else
383 RT_NOREF_PV(pCallEntry);
384#endif
385
386 /* As this code returns from the TB any pending register writes must be flushed. */
387 off = iemNativeRegFlushPendingWrites(pReNative, off);
388
389 /*
390 * If we're in a conditional, mark the current branch as exiting so we
391 * can disregard its state when we hit the IEM_MC_ENDIF.
392 */
393 uint8_t idxCondDepth = pReNative->cCondDepth;
394 if (idxCondDepth)
395 {
396 idxCondDepth--;
397 if (pReNative->aCondStack[idxCondDepth].fInElse)
398 pReNative->aCondStack[idxCondDepth].fElseExitTb = true;
399 else
400 pReNative->aCondStack[idxCondDepth].fIfExitTb = true;
401 }
402
403 /*
404 * Use the lookup table for getting to the next TB quickly.
405 * Note! In this code path there can only be one entry at present.
406 */
407 uint8_t const idxTbLookupFirst = IEM_TB_LOOKUP_TAB_GET_IDX(pCallEntry->uTbLookup);
408 PCIEMTB const pTbOrg = pReNative->pTbOrg;
409 Assert(idxTbLookupFirst < pTbOrg->cTbLookupEntries);
410 Assert(IEM_TB_LOOKUP_TAB_GET_SIZE(pCallEntry->uTbLookup) == 1);
411
412#if 0
413 /* Update IEMCPU::ppTbLookupEntryR3 to get the best lookup effect. */
414 PIEMTB * const ppTbLookupFirst = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTbOrg, idxTbLookupFirst);
415 Assert(IEM_TB_LOOKUP_TAB_GET_SIZE(pCallEntry->uTbLookup) == 1); /* large stuff later/never */
416 off = iemNativeEmitStoreImmToVCpuU64(pReNative, off, (uintptr_t)ppTbLookupFirst,
417 RT_UOFFSETOF(VMCPU, iem.s.ppTbLookupEntryR3));
418
419 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnBreak);
420
421#else
422 /* Load the index as argument #1 for the helper call at the given label. */
423 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxTbLookupFirst);
424
425 /*
426 * Figure out the physical address of the current instruction and see
427 * whether the next instruction we're about to execute is in the same
428 * page so we by can optimistically skip TLB loading.
429 *
430 * - This is safe for all cases in FLAT mode.
431 * - In segmentmented modes it is complicated, given that a negative
432 * jump may underflow EIP and a forward jump may overflow or run into
433 * CS.LIM and triggering a #GP. The only thing we can get away with
434 * now at compile time is forward jumps w/o CS.LIM checks, since the
435 * lack of CS.LIM checks means we're good for the entire physical page
436 * we're executing on and another 15 bytes before we run into CS.LIM.
437 */
438 if ( IEM_F_MODE_X86_IS_FLAT(pReNative->fExec)
439# if 0 /** @todo breaks on IP/EIP/RIP wraparound tests in bs3-cpu-weird-1. See also iemNativeHlpReturnBreakViaLookup. */
440 || !(pTbOrg->fFlags & IEMTB_F_CS_LIM_CHECKS)
441# endif
442 )
443 {
444 RTGCPHYS const GCPhysPcCurrent = iemNativeCallEntryToGCPhysPc(pTbOrg, pCallEntry);
445 RTGCPHYS const GCPhysPcNext = GCPhysPcCurrent + pCallEntry->cbOpcode + (int64_t)(a_fIsJump ? offJump : 0);
446 if ( (GCPhysPcNext >> GUEST_PAGE_SHIFT) == (GCPhysPcCurrent >> GUEST_PAGE_SHIFT)
447 && GUEST_PAGE_SIZE - (GCPhysPcCurrent & GUEST_PAGE_OFFSET_MASK) >= pCallEntry->cbOpcode /* 0xfff: je -56h */ )
448
449 {
450 /* Load the next GCPhysPc into the 3rd argument for the helper call. */
451 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, GCPhysPcNext);
452
453 /* Load the key lookup flags into the 2nd argument for the helper call.
454 - This is safe wrt CS limit checking since we're only here for FLAT modes.
455 - ASSUMING that this isn't a STI or POPF instruction, we can exclude any
456 interrupt shadow.
457 - The NMI inhibiting is more questionable, though... */
458 /** @todo We don't implement NMI blocking atm, except via VT-x/AMD-V.
459 * Should we copy it into fExec to simplify this? OTOH, it's just a
460 * couple of extra instructions if EFLAGS are already in a register. */
461 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG,
462 (pReNative->fExec & IEMTB_F_KEY_MASK) | IEMTB_F_TYPE_NATIVE);
463
464 if (pReNative->idxLastCheckIrqCallNo != UINT32_MAX)
465 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnBreakViaLookup);
466 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnBreakViaLookupWithIrq);
467 }
468 }
469 if (pReNative->idxLastCheckIrqCallNo != UINT32_MAX)
470 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnBreakViaLookupWithTlb);
471 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnBreakViaLookupWithTlbAndIrq);
472#endif
473 }
474 return off;
475}
476
477
478#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr, a_rcNormal) \
479 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
480 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
481
482#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr, a_rcNormal) \
483 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
484 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
485 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
486
487/** Same as iemRegAddToRip64AndFinishingNoFlags. */
488DECL_INLINE_THROW(uint32_t)
489iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
490{
491#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
492# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
493 if (!pReNative->Core.offPc)
494 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
495# endif
496
497 /* Allocate a temporary PC register. */
498 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
499
500 /* Perform the addition and store the result. */
501 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
502 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
503
504 /* Free but don't flush the PC register. */
505 iemNativeRegFreeTmp(pReNative, idxPcReg);
506#endif
507
508#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
509 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
510
511 pReNative->Core.offPc += cbInstr;
512# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
513 off = iemNativePcAdjustCheck(pReNative, off);
514# endif
515 if (pReNative->cCondDepth)
516 off = iemNativeEmitPcWriteback(pReNative, off);
517 else
518 pReNative->Core.cInstrPcUpdateSkipped++;
519#endif
520
521 return off;
522}
523
524
525#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr, a_rcNormal) \
526 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
527 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
528
529#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr, a_rcNormal) \
530 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
531 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
532 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
533
534/** Same as iemRegAddToEip32AndFinishingNoFlags. */
535DECL_INLINE_THROW(uint32_t)
536iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
537{
538#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
539# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
540 if (!pReNative->Core.offPc)
541 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
542# endif
543
544 /* Allocate a temporary PC register. */
545 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
546
547 /* Perform the addition and store the result. */
548 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
549 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
550
551 /* Free but don't flush the PC register. */
552 iemNativeRegFreeTmp(pReNative, idxPcReg);
553#endif
554
555#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
556 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
557
558 pReNative->Core.offPc += cbInstr;
559# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
560 off = iemNativePcAdjustCheck(pReNative, off);
561# endif
562 if (pReNative->cCondDepth)
563 off = iemNativeEmitPcWriteback(pReNative, off);
564 else
565 pReNative->Core.cInstrPcUpdateSkipped++;
566#endif
567
568 return off;
569}
570
571
572#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr, a_rcNormal) \
573 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
574 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
575
576#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr, a_rcNormal) \
577 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
578 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
579 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
580
581/** Same as iemRegAddToIp16AndFinishingNoFlags. */
582DECL_INLINE_THROW(uint32_t)
583iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
584{
585#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
586# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
587 if (!pReNative->Core.offPc)
588 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
589# endif
590
591 /* Allocate a temporary PC register. */
592 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
593
594 /* Perform the addition and store the result. */
595 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
596 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
597 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
598
599 /* Free but don't flush the PC register. */
600 iemNativeRegFreeTmp(pReNative, idxPcReg);
601#endif
602
603#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
604 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
605
606 pReNative->Core.offPc += cbInstr;
607# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
608 off = iemNativePcAdjustCheck(pReNative, off);
609# endif
610 if (pReNative->cCondDepth)
611 off = iemNativeEmitPcWriteback(pReNative, off);
612 else
613 pReNative->Core.cInstrPcUpdateSkipped++;
614#endif
615
616 return off;
617}
618
619
620
621/*********************************************************************************************************************************
622* Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX). *
623*********************************************************************************************************************************/
624
625#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
626 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
627 (a_enmEffOpSize), pCallEntry->idxInstr); \
628 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
629
630#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
631 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
632 (a_enmEffOpSize), pCallEntry->idxInstr); \
633 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
634 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
635
636#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr, a_rcNormal) \
637 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
638 IEMMODE_16BIT, pCallEntry->idxInstr); \
639 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
640
641#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
642 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
643 IEMMODE_16BIT, pCallEntry->idxInstr); \
644 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
645 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
646
647#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr, a_rcNormal) \
648 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (a_i32), \
649 IEMMODE_64BIT, pCallEntry->idxInstr); \
650 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
651
652#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
653 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (a_i32), \
654 IEMMODE_64BIT, pCallEntry->idxInstr); \
655 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
656 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
657
658
659#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_INTRAPG(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
660 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
661 (a_enmEffOpSize), pCallEntry->idxInstr); \
662 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
663
664#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_INTRAPG_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
665 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
666 (a_enmEffOpSize), pCallEntry->idxInstr); \
667 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
668 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
669
670#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_INTRAPG(a_i16, a_cbInstr, a_rcNormal) \
671 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
672 IEMMODE_16BIT, pCallEntry->idxInstr); \
673 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
674
675#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_INTRAPG_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
676 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
677 IEMMODE_16BIT, pCallEntry->idxInstr); \
678 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
679 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
680
681#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_INTRAPG(a_i32, a_cbInstr, a_rcNormal) \
682 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (a_i32), \
683 IEMMODE_64BIT, pCallEntry->idxInstr); \
684 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
685
686#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_INTRAPG_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
687 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (a_i32), \
688 IEMMODE_64BIT, pCallEntry->idxInstr); \
689 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
690 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
691
692/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
693 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
694 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
695template<bool const a_fWithinPage>
696DECL_INLINE_THROW(uint32_t)
697iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
698 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
699{
700 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
701
702 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
703 off = iemNativeRegFlushPendingWrites(pReNative, off);
704
705#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
706 Assert(pReNative->Core.offPc == 0);
707
708 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
709#endif
710
711 /* Allocate a temporary PC register. */
712 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
713
714 /* Perform the addition. */
715 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr);
716
717 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
718 {
719 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't.
720 We can skip this if the target is within the same page. */
721 if (!a_fWithinPage)
722 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
723 }
724 else
725 {
726 /* Just truncate the result to 16-bit IP. */
727 Assert(enmEffOpSize == IEMMODE_16BIT);
728 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
729 }
730 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
731
732 /* Free but don't flush the PC register. */
733 iemNativeRegFreeTmp(pReNative, idxPcReg);
734
735 return off;
736}
737
738
739#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
740 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
741 (a_enmEffOpSize), pCallEntry->idxInstr); \
742 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
743
744#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
745 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
746 (a_enmEffOpSize), pCallEntry->idxInstr); \
747 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
748 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
749
750#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr, a_rcNormal) \
751 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
752 IEMMODE_16BIT, pCallEntry->idxInstr); \
753 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
754
755#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
756 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
757 IEMMODE_16BIT, pCallEntry->idxInstr); \
758 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
759 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
760
761#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr, a_rcNormal) \
762 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (a_i32), \
763 IEMMODE_32BIT, pCallEntry->idxInstr); \
764 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
765
766#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
767 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (a_i32), \
768 IEMMODE_32BIT, pCallEntry->idxInstr); \
769 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
770 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
771
772
773#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_FLAT(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
774 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
775 (a_enmEffOpSize), pCallEntry->idxInstr); \
776 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
777
778#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_FLAT_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
779 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
780 (a_enmEffOpSize), pCallEntry->idxInstr); \
781 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
782 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
783
784#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_FLAT(a_i16, a_cbInstr, a_rcNormal) \
785 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
786 IEMMODE_16BIT, pCallEntry->idxInstr); \
787 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
788
789#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_FLAT_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
790 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
791 IEMMODE_16BIT, pCallEntry->idxInstr); \
792 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
793 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
794
795#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_FLAT(a_i32, a_cbInstr, a_rcNormal) \
796 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (a_i32), \
797 IEMMODE_32BIT, pCallEntry->idxInstr); \
798 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
799
800#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_FLAT_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
801 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (a_i32), \
802 IEMMODE_32BIT, pCallEntry->idxInstr); \
803 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
804 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
805
806/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
807 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
808 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
809template<bool const a_fFlat>
810DECL_INLINE_THROW(uint32_t)
811iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
812 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
813{
814 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
815
816 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
817 off = iemNativeRegFlushPendingWrites(pReNative, off);
818
819#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
820 Assert(pReNative->Core.offPc == 0);
821
822 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
823#endif
824
825 /* Allocate a temporary PC register. */
826 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
827
828 /* Perform the addition. */
829 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
830
831 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
832 if (enmEffOpSize == IEMMODE_16BIT)
833 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
834
835 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
836 if (!a_fFlat)
837 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
838
839 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
840
841 /* Free but don't flush the PC register. */
842 iemNativeRegFreeTmp(pReNative, idxPcReg);
843
844 return off;
845}
846
847
848#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr, a_rcNormal) \
849 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
850 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
851
852#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr, a_rcNormal) \
853 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
854 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
855 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
856
857#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr, a_rcNormal) \
858 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
859 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
860
861#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
862 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
863 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
864 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
865
866#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr, a_rcNormal) \
867 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
868 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, a_i32)
869
870#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
871 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
872 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
873 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, a_i32)
874
875/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
876DECL_INLINE_THROW(uint32_t)
877iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
878 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
879{
880 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
881 off = iemNativeRegFlushPendingWrites(pReNative, off);
882
883#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
884 Assert(pReNative->Core.offPc == 0);
885
886 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
887#endif
888
889 /* Allocate a temporary PC register. */
890 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
891
892 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
893 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
894 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
895 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
896 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
897
898 /* Free but don't flush the PC register. */
899 iemNativeRegFreeTmp(pReNative, idxPcReg);
900
901 return off;
902}
903
904
905
906/*********************************************************************************************************************************
907* Emitters for changing PC/RIP/EIP/IP with a indirect jump (IEM_MC_SET_RIP_UXX_AND_FINISH). *
908*********************************************************************************************************************************/
909
910/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets. */
911#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP) \
912 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
913
914/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets. */
915#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP) \
916 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
917
918/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code. */
919#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP) \
920 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
921
922/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets that checks and
923 * clears flags. */
924#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP) \
925 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP); \
926 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
927
928/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets that checks and
929 * clears flags. */
930#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP) \
931 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP); \
932 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
933
934/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code that checks and
935 * clears flags. */
936#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16NewIP) \
937 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP); \
938 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
939
940#undef IEM_MC_SET_RIP_U16_AND_FINISH
941
942
943/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets. */
944#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP) \
945 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
946
947/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code. */
948#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP) \
949 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
950
951/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets that checks and
952 * clears flags. */
953#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP) \
954 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP); \
955 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
956
957/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code that checks
958 * and clears flags. */
959#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u32NewEIP) \
960 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP); \
961 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
962
963#undef IEM_MC_SET_RIP_U32_AND_FINISH
964
965
966/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code. */
967#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP) \
968 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u64NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
969
970/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code that checks
971 * and clears flags. */
972#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewEIP) \
973 IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP); \
974 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
975
976#undef IEM_MC_SET_RIP_U64_AND_FINISH
977
978
979/** Same as iemRegRipJumpU16AndFinishNoFlags,
980 * iemRegRipJumpU32AndFinishNoFlags and iemRegRipJumpU64AndFinishNoFlags. */
981DECL_INLINE_THROW(uint32_t)
982iemNativeEmitRipJumpNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarPc, bool f64Bit,
983 uint8_t idxInstr, uint8_t cbVar)
984{
985 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
986 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
987
988 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
989 off = iemNativeRegFlushPendingWrites(pReNative, off);
990
991#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
992 Assert(pReNative->Core.offPc == 0);
993
994 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
995#endif
996
997 /* Get a register with the new PC loaded from idxVarPc.
998 Note! This ASSUMES that the high bits of the GPR is zeroed. */
999 uint8_t const idxPcReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxVarPc, kIemNativeGstReg_Pc, &off);
1000
1001 /* Check limit (may #GP(0) + exit TB). */
1002 if (!f64Bit)
1003/** @todo we can skip this test in FLAT 32-bit mode. */
1004 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
1005 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
1006 else if (cbVar > sizeof(uint32_t))
1007 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
1008
1009 /* Store the result. */
1010 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1011
1012 iemNativeVarRegisterRelease(pReNative, idxVarPc);
1013 /** @todo implictly free the variable? */
1014
1015 return off;
1016}
1017
1018
1019
1020/*********************************************************************************************************************************
1021* Emitters for changing PC/RIP/EIP/IP with a relative call jump (IEM_MC_IND_CALL_UXX_AND_FINISH) (requires stack emmiters). *
1022*********************************************************************************************************************************/
1023
1024/** @todo These helpers belong to the stack push API naturally but we already need them up here (we could of course move
1025 * this below the stack emitters but then this is not close to the rest of the PC/RIP handling...). */
1026DECL_FORCE_INLINE_THROW(uint32_t)
1027iemNativeEmitStackPushUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
1028{
1029 /* Use16BitSp: */
1030#ifdef RT_ARCH_AMD64
1031 off = iemNativeEmitSubGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
1032 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
1033#else
1034 /* sub regeff, regrsp, #cbMem */
1035 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegEffSp, idxRegRsp, cbMem, false /*f64Bit*/);
1036 /* and regeff, regeff, #0xffff */
1037 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
1038 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegEffSp, idxRegEffSp, 15, 0, false /*f64Bit*/);
1039 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
1040 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegEffSp, 0, 16, false /*f64Bit*/);
1041#endif
1042 return off;
1043}
1044
1045
1046DECL_FORCE_INLINE(uint32_t)
1047iemNativeEmitStackPushUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
1048{
1049 /* Use32BitSp: */
1050 off = iemNativeEmitSubGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
1051 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
1052 return off;
1053}
1054
1055
1056DECL_INLINE_THROW(uint32_t)
1057iemNativeEmitStackPushRip(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const idxRegPc,
1058 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
1059{
1060 /*
1061 * Assert sanity.
1062 */
1063#ifdef VBOX_STRICT
1064 if (RT_BYTE2(cBitsVarAndFlat) != 0)
1065 {
1066 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
1067 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
1068 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
1069 Assert( pfnFunction
1070 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
1071 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
1072 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
1073 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
1074 : UINT64_C(0xc000b000a0009000) ));
1075 }
1076 else
1077 Assert( pfnFunction
1078 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
1079 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
1080 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
1081 : UINT64_C(0xc000b000a0009000) ));
1082#endif
1083
1084#ifdef VBOX_STRICT
1085 /*
1086 * Check that the fExec flags we've got make sense.
1087 */
1088 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
1089#endif
1090
1091 /*
1092 * To keep things simple we have to commit any pending writes first as we
1093 * may end up making calls.
1094 */
1095 /** @todo we could postpone this till we make the call and reload the
1096 * registers after returning from the call. Not sure if that's sensible or
1097 * not, though. */
1098 off = iemNativeRegFlushPendingWrites(pReNative, off);
1099
1100 /*
1101 * First we calculate the new RSP and the effective stack pointer value.
1102 * For 64-bit mode and flat 32-bit these two are the same.
1103 * (Code structure is very similar to that of PUSH)
1104 */
1105 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
1106 bool const fIsSegReg = RT_BYTE3(cBitsVarAndFlat) != 0;
1107 bool const fIsIntelSeg = fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
1108 uint8_t const cbMemAccess = !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
1109 ? cbMem : sizeof(uint16_t);
1110 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
1111 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
1112 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
1113 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
1114 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
1115 if (cBitsFlat != 0)
1116 {
1117 Assert(idxRegEffSp == idxRegRsp);
1118 Assert(cBitsFlat == 32 || cBitsFlat == 64);
1119 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
1120 if (cBitsFlat == 64)
1121 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
1122 else
1123 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
1124 }
1125 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
1126 {
1127 Assert(idxRegEffSp != idxRegRsp);
1128 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
1129 kIemNativeGstRegUse_ReadOnly);
1130#ifdef RT_ARCH_AMD64
1131 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1132#else
1133 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1134#endif
1135 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
1136 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
1137 offFixupJumpToUseOtherBitSp = off;
1138 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
1139 {
1140 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
1141 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1142 }
1143 else
1144 {
1145 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
1146 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1147 }
1148 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1149 }
1150 /* SpUpdateEnd: */
1151 uint32_t const offLabelSpUpdateEnd = off;
1152
1153 /*
1154 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
1155 * we're skipping lookup).
1156 */
1157 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
1158 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
1159 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
1160 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
1161 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
1162 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
1163 : UINT32_MAX;
1164 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
1165
1166
1167 if (!TlbState.fSkip)
1168 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
1169 else
1170 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
1171
1172 /*
1173 * Use16BitSp:
1174 */
1175 if (cBitsFlat == 0)
1176 {
1177#ifdef RT_ARCH_AMD64
1178 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1179#else
1180 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1181#endif
1182 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
1183 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
1184 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1185 else
1186 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1187 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
1188 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1189 }
1190
1191 /*
1192 * TlbMiss:
1193 *
1194 * Call helper to do the pushing.
1195 */
1196 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
1197
1198#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1199 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1200#else
1201 RT_NOREF(idxInstr);
1202#endif
1203
1204 /* Save variables in volatile registers. */
1205 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
1206 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
1207 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
1208 | (RT_BIT_32(idxRegPc));
1209 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
1210
1211 if ( idxRegPc == IEMNATIVE_CALL_ARG1_GREG
1212 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
1213 {
1214 /* Swap them using ARG0 as temp register: */
1215 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
1216 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
1217 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
1218 }
1219 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
1220 {
1221 /* IEMNATIVE_CALL_ARG2_GREG = idxRegPc (first!) */
1222 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxRegPc);
1223
1224 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
1225 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
1226 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
1227 }
1228 else
1229 {
1230 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
1231 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
1232
1233 /* IEMNATIVE_CALL_ARG2_GREG = idxRegPc */
1234 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxRegPc);
1235 }
1236
1237 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
1238 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
1239
1240 /* Done setting up parameters, make the call. */
1241 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
1242
1243 /* Restore variables and guest shadow registers to volatile registers. */
1244 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
1245 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
1246
1247#ifdef IEMNATIVE_WITH_TLB_LOOKUP
1248 if (!TlbState.fSkip)
1249 {
1250 /* end of TlbMiss - Jump to the done label. */
1251 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
1252 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
1253
1254 /*
1255 * TlbLookup:
1256 */
1257 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1,
1258 IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
1259
1260 /*
1261 * Emit code to do the actual storing / fetching.
1262 */
1263 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
1264# ifdef IEM_WITH_TLB_STATISTICS
1265 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
1266 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
1267# endif
1268 switch (cbMemAccess)
1269 {
1270 case 2:
1271 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1272 break;
1273 case 4:
1274 if (!fIsIntelSeg)
1275 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1276 else
1277 {
1278 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
1279 PUSH FS in real mode, so we have to try emulate that here.
1280 We borrow the now unused idxReg1 from the TLB lookup code here. */
1281 uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
1282 kIemNativeGstReg_EFlags);
1283 if (idxRegEfl != UINT8_MAX)
1284 {
1285#ifdef ARCH_AMD64
1286 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
1287 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
1288 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
1289#else
1290 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
1291 off, TlbState.idxReg1, idxRegEfl,
1292 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
1293#endif
1294 iemNativeRegFreeTmp(pReNative, idxRegEfl);
1295 }
1296 else
1297 {
1298 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
1299 RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
1300 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
1301 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
1302 }
1303 /* ASSUMES the upper half of idxRegPc is ZERO. */
1304 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegPc);
1305 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
1306 }
1307 break;
1308 case 8:
1309 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1310 break;
1311 default:
1312 AssertFailed();
1313 }
1314
1315 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
1316 TlbState.freeRegsAndReleaseVars(pReNative);
1317
1318 /*
1319 * TlbDone:
1320 *
1321 * Commit the new RSP value.
1322 */
1323 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
1324 }
1325#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
1326
1327#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
1328 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
1329#endif
1330 iemNativeRegFreeTmp(pReNative, idxRegRsp);
1331 if (idxRegEffSp != idxRegRsp)
1332 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
1333
1334 return off;
1335}
1336
1337
1338/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for pre-386 targets. */
1339#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16(a_u16NewIP, a_cbInstr) \
1340 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1341
1342/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for pre-386 targets that checks and
1343 * clears flags. */
1344#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP, a_cbInstr) \
1345 IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16(a_u16NewIP, a_cbInstr); \
1346 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1347
1348/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for 386+ targets. */
1349#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32(a_u16NewIP, a_cbInstr) \
1350 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1351
1352/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for 386+ targets that checks and
1353 * clears flags. */
1354#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP, a_cbInstr) \
1355 IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32(a_u16NewIP, a_cbInstr); \
1356 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1357
1358#undef IEM_MC_IND_CALL_U16_AND_FINISH
1359
1360
1361/** Variant of IEM_MC_IND_CALL_U32_AND_FINISH for 386+ targets. */
1362#define IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP, a_cbInstr) \
1363 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
1364
1365/** Variant of IEM_MC_IND_CALL_U32_AND_FINISH for 386+ targets that checks and
1366 * clears flags. */
1367#define IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP, a_cbInstr) \
1368 IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP, a_cbInstr); \
1369 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1370
1371#undef IEM_MC_IND_CALL_U32_AND_FINISH
1372
1373
1374/** Variant of IEM_MC_IND_CALL_U64_AND_FINISH with instruction length as
1375 * an extra parameter, for use in 64-bit code. */
1376#define IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64(a_u64NewIP, a_cbInstr) \
1377 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u64NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
1378
1379
1380/** Variant of IEM_MC_IND_CALL_U64_AND_FINISH with instruction length as
1381 * an extra parameter, for use in 64-bit code and we need to check and clear
1382 * flags. */
1383#define IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewIP, a_cbInstr) \
1384 IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64(a_u64NewIP, a_cbInstr); \
1385 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1386
1387#undef IEM_MC_IND_CALL_U64_AND_FINISH
1388
1389/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
1390 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
1391DECL_INLINE_THROW(uint32_t)
1392iemNativeEmitRipIndirectCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxVarPc, bool f64Bit,
1393 uint8_t idxInstr, uint8_t cbVar)
1394{
1395 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
1396 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
1397
1398 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1399 off = iemNativeRegFlushPendingWrites(pReNative, off);
1400
1401#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1402 Assert(pReNative->Core.offPc == 0);
1403
1404 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1405#endif
1406
1407 /* Get a register with the new PC loaded from idxVarPc.
1408 Note! This ASSUMES that the high bits of the GPR is zeroed. */
1409 uint8_t const idxNewPcReg = iemNativeVarRegisterAcquire(pReNative, idxVarPc, &off);
1410
1411 /* Check limit (may #GP(0) + exit TB). */
1412 if (!f64Bit)
1413/** @todo we can skip this test in FLAT 32-bit mode. */
1414 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxNewPcReg, idxInstr);
1415 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
1416 else if (cbVar > sizeof(uint32_t))
1417 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxNewPcReg, idxInstr);
1418
1419#if 1
1420 /* Allocate a temporary PC register, we don't want it shadowed. */
1421 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1422 kIemNativeGstRegUse_Calculation, true /*fNoVolatileRegs*/);
1423#else
1424 /* Allocate a temporary PC register. */
1425 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate,
1426 true /*fNoVolatileRegs*/);
1427#endif
1428
1429 /* Perform the addition and push the variable to the guest stack. */
1430 /** @todo Flat variants for PC32 variants. */
1431 switch (cbVar)
1432 {
1433 case sizeof(uint16_t):
1434 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
1435 /* Truncate the result to 16-bit IP. */
1436 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
1437 off = iemNativeEmitStackPushRip(pReNative, off, idxPcReg, RT_MAKE_U32_FROM_U8(16, 0, 0, 0),
1438 (uintptr_t)iemNativeHlpStackStoreU16, idxInstr);
1439 break;
1440 case sizeof(uint32_t):
1441 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
1442 /** @todo In FLAT mode we can use the flat variant. */
1443 off = iemNativeEmitStackPushRip(pReNative, off, idxPcReg, RT_MAKE_U32_FROM_U8(32, 0, 0, 0),
1444 (uintptr_t)iemNativeHlpStackStoreU32, idxInstr);
1445 break;
1446 case sizeof(uint64_t):
1447 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
1448 off = iemNativeEmitStackPushRip(pReNative, off, idxPcReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0),
1449 (uintptr_t)iemNativeHlpStackFlatStoreU64, idxInstr);
1450 break;
1451 default:
1452 AssertFailed();
1453 }
1454
1455 /* RSP got changed, so do this again. */
1456 off = iemNativeRegFlushPendingWrites(pReNative, off);
1457
1458 /* Store the result. */
1459 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxNewPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1460
1461#if 1
1462 /* Need to transfer the shadow information to the new RIP register. */
1463 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxNewPcReg, kIemNativeGstReg_Pc, off);
1464#else
1465 /* Sync the new PC. */
1466 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxPcReg, idxNewPcReg);
1467#endif
1468 iemNativeVarRegisterRelease(pReNative, idxVarPc);
1469 iemNativeRegFreeTmp(pReNative, idxPcReg);
1470 /** @todo implictly free the variable? */
1471
1472 return off;
1473}
1474
1475
1476/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1477 * an extra parameter, for use in 16-bit code on a pre-386 CPU. */
1478#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr) \
1479 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
1480
1481/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1482 * an extra parameter, for use in 16-bit code on a pre-386 CPU and we need to check and clear
1483 * flags. */
1484#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr) \
1485 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr); \
1486 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1487
1488/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1489 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
1490#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr) \
1491 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
1492
1493/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1494 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
1495 * flags. */
1496#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr) \
1497 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr); \
1498 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1499
1500/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1501 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
1502#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr) \
1503 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
1504
1505/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1506 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
1507 * flags. */
1508#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr) \
1509 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr); \
1510 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1511
1512#undef IEM_MC_REL_CALL_S16_AND_FINISH
1513
1514/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
1515 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
1516DECL_INLINE_THROW(uint32_t)
1517iemNativeEmitRipRelativeCallS16NoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int16_t offDisp,
1518 uint8_t idxInstr)
1519{
1520 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1521 off = iemNativeRegFlushPendingWrites(pReNative, off);
1522
1523#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1524 Assert(pReNative->Core.offPc == 0);
1525
1526 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1527#endif
1528
1529 /* Allocate a temporary PC register. */
1530 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1531 kIemNativeGstRegUse_Calculation, true /*fNoVolatileRegs*/);
1532 uint8_t const idxPcRegNew = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
1533
1534 /* Calculate the new RIP. */
1535 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcRegOld, cbInstr);
1536 /* Truncate the result to 16-bit IP. */
1537 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcRegOld);
1538 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxPcRegNew, idxPcRegOld);
1539 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcRegNew, offDisp);
1540
1541 /* Truncate the result to 16-bit IP. */
1542 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcRegNew);
1543
1544 /* Check limit (may #GP(0) + exit TB). */
1545 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
1546
1547 /* Perform the addition and push the variable to the guest stack. */
1548 off = iemNativeEmitStackPushRip(pReNative, off, idxPcRegOld, RT_MAKE_U32_FROM_U8(16, 0, 0, 0),
1549 (uintptr_t)iemNativeHlpStackStoreU16, idxInstr);
1550
1551 /* RSP got changed, so flush again. */
1552 off = iemNativeRegFlushPendingWrites(pReNative, off);
1553
1554 /* Store the result. */
1555 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1556
1557 /* Need to transfer the shadow information to the new RIP register. */
1558 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
1559 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
1560 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
1561
1562 return off;
1563}
1564
1565
1566/** Variant of IEM_MC_REL_CALL_S32_AND_FINISH with instruction length as
1567 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
1568#define IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr) \
1569 off = iemNativeEmitEip32RelativeCallNoFlags(pReNative, off, a_cbInstr, (a_i32), pCallEntry->idxInstr)
1570
1571/** Variant of IEM_MC_REL_CALL_S32_AND_FINISH with instruction length as
1572 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
1573 * flags. */
1574#define IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr) \
1575 IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr); \
1576 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1577
1578#undef IEM_MC_REL_CALL_S32_AND_FINISH
1579
1580/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
1581 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
1582DECL_INLINE_THROW(uint32_t)
1583iemNativeEmitEip32RelativeCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int32_t offDisp,
1584 uint8_t idxInstr)
1585{
1586 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1587 off = iemNativeRegFlushPendingWrites(pReNative, off);
1588
1589#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1590 Assert(pReNative->Core.offPc == 0);
1591
1592 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1593#endif
1594
1595 /* Allocate a temporary PC register. */
1596 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1597 kIemNativeGstRegUse_ReadOnly, true /*fNoVolatileRegs*/);
1598 uint8_t const idxPcRegNew = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
1599
1600 /* Update the EIP to get the return address. */
1601 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcRegOld, cbInstr);
1602
1603 /* Load address, add the displacement and check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
1604 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxPcRegNew, idxPcRegOld);
1605 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcRegNew, offDisp);
1606 /** @todo we can skip this test in FLAT 32-bit mode. */
1607 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
1608
1609 /* Perform Perform the return address to the guest stack. */
1610 /** @todo Can avoid the stack limit checks in FLAT 32-bit mode. */
1611 off = iemNativeEmitStackPushRip(pReNative, off, idxPcRegOld, RT_MAKE_U32_FROM_U8(32, 0, 0, 0),
1612 (uintptr_t)iemNativeHlpStackStoreU32, idxInstr);
1613
1614 /* RSP got changed, so do this again. */
1615 off = iemNativeRegFlushPendingWrites(pReNative, off);
1616
1617 /* Store the result. */
1618 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1619
1620 /* Need to transfer the shadow information to the new RIP register. */
1621 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
1622 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
1623 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
1624
1625 return off;
1626}
1627
1628
1629/** Variant of IEM_MC_REL_CALL_S64_AND_FINISH with instruction length as
1630 * an extra parameter, for use in 64-bit code. */
1631#define IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64(a_i64, a_cbInstr) \
1632 off = iemNativeEmitRip64RelativeCallNoFlags(pReNative, off, a_cbInstr, (a_i64), pCallEntry->idxInstr)
1633
1634/** Variant of IEM_MC_REL_CALL_S64_AND_FINISH with instruction length as
1635 * an extra parameter, for use in 64-bit code and we need to check and clear
1636 * flags. */
1637#define IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i64, a_cbInstr) \
1638 IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64(a_i64, a_cbInstr); \
1639 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1640
1641#undef IEM_MC_REL_CALL_S64_AND_FINISH
1642
1643/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
1644 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
1645DECL_INLINE_THROW(uint32_t)
1646iemNativeEmitRip64RelativeCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int64_t offDisp,
1647 uint8_t idxInstr)
1648{
1649 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1650 off = iemNativeRegFlushPendingWrites(pReNative, off);
1651
1652#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1653 Assert(pReNative->Core.offPc == 0);
1654
1655 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1656#endif
1657
1658 /* Allocate a temporary PC register. */
1659 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1660 kIemNativeGstRegUse_ReadOnly, true /*fNoVolatileRegs*/);
1661 uint8_t const idxPcRegNew = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
1662
1663 /* Update the RIP to get the return address. */
1664 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcRegOld, cbInstr);
1665
1666 /* Load address, add the displacement and check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
1667 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxPcRegNew, idxPcRegOld);
1668 off = iemNativeEmitAddGprImm(pReNative, off, idxPcRegNew, offDisp);
1669 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
1670
1671 /* Perform Perform the return address to the guest stack. */
1672 off = iemNativeEmitStackPushRip(pReNative, off, idxPcRegOld, RT_MAKE_U32_FROM_U8(64, 64, 0, 0),
1673 (uintptr_t)iemNativeHlpStackFlatStoreU64, idxInstr);
1674
1675 /* RSP got changed, so do this again. */
1676 off = iemNativeRegFlushPendingWrites(pReNative, off);
1677
1678 /* Store the result. */
1679 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1680
1681 /* Need to transfer the shadow information to the new RIP register. */
1682 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
1683 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
1684 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
1685
1686 return off;
1687}
1688
1689
1690/*********************************************************************************************************************************
1691* Emitters for changing PC/RIP/EIP/IP with a RETN (Iw) instruction (IEM_MC_RETN_AND_FINISH) (requires stack emmiters). *
1692*********************************************************************************************************************************/
1693
1694DECL_FORCE_INLINE_THROW(uint32_t)
1695iemNativeEmitStackPopForRetnUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
1696 uint16_t cbPopAdd, uint8_t idxRegTmp)
1697{
1698 /* Use16BitSp: */
1699#ifdef RT_ARCH_AMD64
1700 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
1701 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
1702 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbPopAdd); /* ASSUMES this does NOT modify bits [63:16]! */
1703 RT_NOREF(idxRegTmp);
1704#elif defined(RT_ARCH_ARM64)
1705 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
1706 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
1707 /* add tmp, regrsp, #cbMem */
1708 uint16_t const cbCombined = cbMem + cbPopAdd;
1709 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbCombined & (RT_BIT_32(12) - 1U), false /*f64Bit*/);
1710 if (cbCombined >= RT_BIT_32(12))
1711 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegTmp, cbCombined >> 12,
1712 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift12*/);
1713 /* and tmp, tmp, #0xffff */
1714 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
1715 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
1716 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
1717 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
1718#else
1719# error "Port me"
1720#endif
1721 return off;
1722}
1723
1724
1725DECL_FORCE_INLINE_THROW(uint32_t)
1726iemNativeEmitStackPopForRetnUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
1727 uint16_t cbPopAdd)
1728{
1729 /* Use32BitSp: */
1730 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
1731 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem + cbPopAdd);
1732 return off;
1733}
1734
1735
1736/** Variant of IEM_MC_RETN_AND_FINISH for pre-386 targets. */
1737#define IEM_MC_RETN_AND_FINISH_THREADED_PC16(a_u16Pop, a_cbInstr) \
1738 off = iemNativeEmitRetn(pReNative, off, (a_cbInstr), (a_u16Pop), false /*f64Bit*/, IEMMODE_16BIT, pCallEntry->idxInstr)
1739
1740/** Variant of IEM_MC_RETN_AND_FINISH for 386+ targets. */
1741#define IEM_MC_RETN_AND_FINISH_THREADED_PC32(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
1742 off = iemNativeEmitRetn(pReNative, off, (a_cbInstr), (a_u16Pop), false /*f64Bit*/, (a_enmEffOpSize), pCallEntry->idxInstr)
1743
1744/** Variant of IEM_MC_RETN_AND_FINISH for use in 64-bit code. */
1745#define IEM_MC_RETN_AND_FINISH_THREADED_PC64(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
1746 off = iemNativeEmitRetn(pReNative, off, (a_cbInstr), (a_u16Pop), true /*f64Bit*/, (a_enmEffOpSize), pCallEntry->idxInstr)
1747
1748/** Variant of IEM_MC_RETN_AND_FINISH for pre-386 targets that checks and
1749 * clears flags. */
1750#define IEM_MC_RETN_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16Pop, a_cbInstr) \
1751 IEM_MC_RETN_AND_FINISH_THREADED_PC16(a_u16Pop, a_cbInstr); \
1752 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1753
1754/** Variant of IEM_MC_RETN_AND_FINISH for 386+ targets that checks and
1755 * clears flags. */
1756#define IEM_MC_RETN_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
1757 IEM_MC_RETN_AND_FINISH_THREADED_PC32(a_u16Pop, a_cbInstr, a_enmEffOpSize); \
1758 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1759
1760/** Variant of IEM_MC_RETN_AND_FINISH for use in 64-bit code that checks and
1761 * clears flags. */
1762#define IEM_MC_RETN_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
1763 IEM_MC_RETN_AND_FINISH_THREADED_PC64(a_u16Pop, a_cbInstr, a_enmEffOpSize); \
1764 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1765
1766/** IEM_MC[|_FLAT32|_FLAT64]_RETN_AND_FINISH */
1767DECL_INLINE_THROW(uint32_t)
1768iemNativeEmitRetn(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint16_t cbPop, bool f64Bit,
1769 IEMMODE enmEffOpSize, uint8_t idxInstr)
1770{
1771 RT_NOREF(cbInstr);
1772
1773#ifdef VBOX_STRICT
1774 /*
1775 * Check that the fExec flags we've got make sense.
1776 */
1777 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
1778#endif
1779
1780 /*
1781 * To keep things simple we have to commit any pending writes first as we
1782 * may end up making calls.
1783 */
1784 off = iemNativeRegFlushPendingWrites(pReNative, off);
1785
1786 /*
1787 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
1788 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
1789 * directly as the effective stack pointer.
1790 * (Code structure is very similar to that of PUSH)
1791 *
1792 * Note! As a simplification, we treat opsize overridden returns (o16 ret)
1793 * in FLAT 32-bit mode as if we weren't in FLAT mode since these
1794 * aren't commonly used (or useful) and thus not in need of optimizing.
1795 *
1796 * Note! For non flat modes the guest RSP is not allocated for update but rather for calculation
1797 * as the shadowed register would remain modified even if the return address throws a \#GP(0)
1798 * due to being outside the CS limit causing a wrong stack pointer value in the guest (see
1799 * the near return testcase in bs3-cpu-basic-2). If no exception is thrown the shadowing is transfered
1800 * to the new register returned by iemNativeRegAllocTmpForGuestReg() at the end.
1801 */
1802 uint8_t const cbMem = enmEffOpSize == IEMMODE_64BIT
1803 ? sizeof(uint64_t)
1804 : enmEffOpSize == IEMMODE_32BIT
1805 ? sizeof(uint32_t)
1806 : sizeof(uint16_t);
1807 bool const fFlat = IEM_F_MODE_X86_IS_FLAT(pReNative->fExec) && enmEffOpSize != IEMMODE_16BIT; /* see note */
1808 uintptr_t const pfnFunction = fFlat
1809 ? enmEffOpSize == IEMMODE_64BIT
1810 ? (uintptr_t)iemNativeHlpStackFlatFetchU64
1811 : (uintptr_t)iemNativeHlpStackFlatFetchU32
1812 : enmEffOpSize == IEMMODE_32BIT
1813 ? (uintptr_t)iemNativeHlpStackFetchU32
1814 : (uintptr_t)iemNativeHlpStackFetchU16;
1815 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
1816 fFlat ? kIemNativeGstRegUse_ForUpdate
1817 : kIemNativeGstRegUse_Calculation,
1818 true /*fNoVolatileRegs*/);
1819 uint8_t const idxRegEffSp = fFlat ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
1820 /** @todo can do a better job picking the register here. For cbMem >= 4 this
1821 * will be the resulting register value. */
1822 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
1823
1824 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
1825 if (fFlat)
1826 Assert(idxRegEffSp == idxRegRsp);
1827 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
1828 {
1829 Assert(idxRegEffSp != idxRegRsp);
1830 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
1831 kIemNativeGstRegUse_ReadOnly);
1832#ifdef RT_ARCH_AMD64
1833 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1834#else
1835 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
1836#endif
1837 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
1838 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
1839 offFixupJumpToUseOtherBitSp = off;
1840 if (enmEffOpSize == IEMMODE_32BIT)
1841 {
1842 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
1843 off = iemNativeEmitStackPopForRetnUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop);
1844 }
1845 else
1846 {
1847 Assert(enmEffOpSize == IEMMODE_16BIT);
1848 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
1849 off = iemNativeEmitStackPopForRetnUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop,
1850 idxRegMemResult);
1851 }
1852 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1853 }
1854 /* SpUpdateEnd: */
1855 uint32_t const offLabelSpUpdateEnd = off;
1856
1857 /*
1858 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
1859 * we're skipping lookup).
1860 */
1861 uint8_t const iSegReg = fFlat ? UINT8_MAX : X86_SREG_SS;
1862 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
1863 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
1864 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
1865 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
1866 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
1867 : UINT32_MAX;
1868
1869 if (!TlbState.fSkip)
1870 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
1871 else
1872 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
1873
1874 /*
1875 * Use16BitSp:
1876 */
1877 if (!fFlat)
1878 {
1879#ifdef RT_ARCH_AMD64
1880 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1881#else
1882 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
1883#endif
1884 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
1885 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
1886 off = iemNativeEmitStackPopForRetnUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop,
1887 idxRegMemResult);
1888 else
1889 off = iemNativeEmitStackPopForRetnUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop);
1890 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
1891 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1892 }
1893
1894 /*
1895 * TlbMiss:
1896 *
1897 * Call helper to do the pushing.
1898 */
1899 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
1900
1901#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1902 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1903#else
1904 RT_NOREF(idxInstr);
1905#endif
1906
1907 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
1908 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
1909 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
1910 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
1911
1912
1913 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
1914 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
1915 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
1916
1917 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
1918 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
1919
1920 /* Done setting up parameters, make the call. */
1921 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
1922
1923 /* Move the return register content to idxRegMemResult. */
1924 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
1925 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
1926
1927 /* Restore variables and guest shadow registers to volatile registers. */
1928 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
1929 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
1930
1931#ifdef IEMNATIVE_WITH_TLB_LOOKUP
1932 if (!TlbState.fSkip)
1933 {
1934 /* end of TlbMiss - Jump to the done label. */
1935 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
1936 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
1937
1938 /*
1939 * TlbLookup:
1940 */
1941 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
1942 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
1943
1944 /*
1945 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
1946 */
1947 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1948# ifdef IEM_WITH_TLB_STATISTICS
1949 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
1950 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
1951# endif
1952 switch (cbMem)
1953 {
1954 case 2:
1955 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
1956 break;
1957 case 4:
1958 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
1959 break;
1960 case 8:
1961 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
1962 break;
1963 default:
1964 AssertFailed();
1965 }
1966
1967 TlbState.freeRegsAndReleaseVars(pReNative);
1968
1969 /*
1970 * TlbDone:
1971 *
1972 * Set the new RSP value (FLAT accesses needs to calculate it first) and
1973 * commit the popped register value.
1974 */
1975 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
1976 }
1977#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
1978
1979 /* Check limit before committing RIP and RSP (may #GP(0) + exit TB). */
1980 if (!f64Bit)
1981/** @todo we can skip this test in FLAT 32-bit mode. */
1982 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxRegMemResult, idxInstr);
1983 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
1984 else if (enmEffOpSize == IEMMODE_64BIT)
1985 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxRegMemResult, idxInstr);
1986
1987 /* Complete RSP calculation for FLAT mode. */
1988 if (idxRegEffSp == idxRegRsp)
1989 {
1990 if (enmEffOpSize == IEMMODE_64BIT)
1991 off = iemNativeEmitAddGprImm(pReNative, off, idxRegRsp, sizeof(uint64_t) + cbPop);
1992 else
1993 {
1994 Assert(enmEffOpSize == IEMMODE_32BIT);
1995 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxRegRsp, sizeof(uint32_t) + cbPop);
1996 }
1997 }
1998
1999 /* Commit the result and clear any current guest shadows for RIP. */
2000 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
2001 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
2002 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, kIemNativeGstReg_Pc, off);
2003
2004 /* Need to transfer the shadowing information to the host register containing the updated value now. */
2005 if (!fFlat)
2006 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegRsp, IEMNATIVEGSTREG_GPR(X86_GREG_xSP), off);
2007
2008 iemNativeRegFreeTmp(pReNative, idxRegRsp);
2009 if (idxRegEffSp != idxRegRsp)
2010 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
2011 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
2012 return off;
2013}
2014
2015
2016/*********************************************************************************************************************************
2017* Emitters for raising exceptions (IEM_MC_MAYBE_RAISE_XXX) *
2018*********************************************************************************************************************************/
2019
2020#define IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE() \
2021 off = iemNativeEmitMaybeRaiseDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
2022
2023/**
2024 * Emits code to check if a \#NM exception should be raised.
2025 *
2026 * @returns New code buffer offset, UINT32_MAX on failure.
2027 * @param pReNative The native recompile state.
2028 * @param off The code buffer offset.
2029 * @param idxInstr The current instruction.
2030 */
2031DECL_INLINE_THROW(uint32_t)
2032iemNativeEmitMaybeRaiseDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2033{
2034#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2035 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeDeviceNotAvailXcptCheckPotential);
2036
2037 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE))
2038 {
2039#endif
2040 /*
2041 * Make sure we don't have any outstanding guest register writes as we may
2042 * raise an #NM and all guest register must be up to date in CPUMCTX.
2043 */
2044 /** @todo r=aeichner Can we postpone this to the RaiseNm path? */
2045 off = iemNativeRegFlushPendingWrites(pReNative, off);
2046
2047#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2048 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2049#else
2050 RT_NOREF(idxInstr);
2051#endif
2052
2053 /* Allocate a temporary CR0 register. */
2054 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0,
2055 kIemNativeGstRegUse_ReadOnly);
2056
2057 /*
2058 * if (cr0 & (X86_CR0_EM | X86_CR0_TS) != 0)
2059 * return raisexcpt();
2060 */
2061 /* Test and jump. */
2062 off = iemNativeEmitTestAnyBitsInGprAndTbExitIfAnySet(pReNative, off, idxCr0Reg, X86_CR0_EM | X86_CR0_TS,
2063 kIemNativeLabelType_RaiseNm);
2064
2065 /* Free but don't flush the CR0 register. */
2066 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2067
2068#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2069 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE;
2070 }
2071 else
2072 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeDeviceNotAvailXcptCheckOmitted);
2073#endif
2074
2075 return off;
2076}
2077
2078
2079#define IEM_MC_MAYBE_RAISE_WAIT_DEVICE_NOT_AVAILABLE() \
2080 off = iemNativeEmitMaybeRaiseWaitDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
2081
2082/**
2083 * Emits code to check if a \#NM exception should be raised.
2084 *
2085 * @returns New code buffer offset, UINT32_MAX on failure.
2086 * @param pReNative The native recompile state.
2087 * @param off The code buffer offset.
2088 * @param idxInstr The current instruction.
2089 */
2090DECL_INLINE_THROW(uint32_t)
2091iemNativeEmitMaybeRaiseWaitDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2092{
2093#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2094 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeWaitDeviceNotAvailXcptCheckPotential);
2095
2096 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_WAIT_DEVICE_NOT_AVAILABLE))
2097 {
2098#endif
2099 /*
2100 * Make sure we don't have any outstanding guest register writes as we may
2101 * raise an #NM and all guest register must be up to date in CPUMCTX.
2102 */
2103 /** @todo r=aeichner Can we postpone this to the RaiseNm path? */
2104 off = iemNativeRegFlushPendingWrites(pReNative, off);
2105
2106#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2107 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2108#else
2109 RT_NOREF(idxInstr);
2110#endif
2111
2112 /* Allocate a temporary CR0 register. */
2113 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0,
2114 kIemNativeGstRegUse_Calculation);
2115
2116 /*
2117 * if (cr0 & (X86_CR0_MP | X86_CR0_TS) == (X86_CR0_MP | X86_CR0_TS))
2118 * return raisexcpt();
2119 */
2120 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxCr0Reg, X86_CR0_MP | X86_CR0_TS);
2121 /* Test and jump. */
2122 off = iemNativeEmitTestIfGpr32EqualsImmAndTbExit(pReNative, off, idxCr0Reg, X86_CR0_MP | X86_CR0_TS,
2123 kIemNativeLabelType_RaiseNm);
2124
2125 /* Free the CR0 register. */
2126 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2127
2128#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2129 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_WAIT_DEVICE_NOT_AVAILABLE;
2130 }
2131 else
2132 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeWaitDeviceNotAvailXcptCheckOmitted);
2133#endif
2134
2135 return off;
2136}
2137
2138
2139#define IEM_MC_MAYBE_RAISE_FPU_XCPT() \
2140 off = iemNativeEmitMaybeRaiseFpuException(pReNative, off, pCallEntry->idxInstr)
2141
2142/**
2143 * Emits code to check if a \#MF exception should be raised.
2144 *
2145 * @returns New code buffer offset, UINT32_MAX on failure.
2146 * @param pReNative The native recompile state.
2147 * @param off The code buffer offset.
2148 * @param idxInstr The current instruction.
2149 */
2150DECL_INLINE_THROW(uint32_t)
2151iemNativeEmitMaybeRaiseFpuException(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2152{
2153 /*
2154 * Make sure we don't have any outstanding guest register writes as we may
2155 * raise an #MF and all guest register must be up to date in CPUMCTX.
2156 */
2157 /** @todo r=aeichner Can we postpone this to the RaiseMf path? */
2158 off = iemNativeRegFlushPendingWrites(pReNative, off);
2159
2160#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2161 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2162#else
2163 RT_NOREF(idxInstr);
2164#endif
2165
2166 /* Allocate a temporary FSW register. */
2167 uint8_t const idxFpuFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw,
2168 kIemNativeGstRegUse_ReadOnly);
2169
2170 /*
2171 * if (FSW & X86_FSW_ES != 0)
2172 * return raisexcpt();
2173 */
2174 /* Test and jump. */
2175 off = iemNativeEmitTestBitInGprAndTbExitIfSet(pReNative, off, idxFpuFswReg, X86_FSW_ES_BIT, kIemNativeLabelType_RaiseMf);
2176
2177 /* Free but don't flush the FSW register. */
2178 iemNativeRegFreeTmp(pReNative, idxFpuFswReg);
2179
2180 return off;
2181}
2182
2183
2184#define IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT() \
2185 off = iemNativeEmitMaybeRaiseSseRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
2186
2187/**
2188 * Emits code to check if a SSE exception (either \#UD or \#NM) should be raised.
2189 *
2190 * @returns New code buffer offset, UINT32_MAX on failure.
2191 * @param pReNative The native recompile state.
2192 * @param off The code buffer offset.
2193 * @param idxInstr The current instruction.
2194 */
2195DECL_INLINE_THROW(uint32_t)
2196iemNativeEmitMaybeRaiseSseRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2197{
2198#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2199 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeSseXcptCheckPotential);
2200
2201 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE))
2202 {
2203#endif
2204 /*
2205 * Make sure we don't have any outstanding guest register writes as we may
2206 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
2207 */
2208 /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
2209 off = iemNativeRegFlushPendingWrites(pReNative, off);
2210
2211#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2212 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2213#else
2214 RT_NOREF(idxInstr);
2215#endif
2216
2217 /* Allocate a temporary CR0 and CR4 register. */
2218 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
2219 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
2220 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2221
2222 AssertCompile(!((X86_CR0_EM | X86_CR0_TS) & X86_CR4_OSFXSR));
2223#ifdef RT_ARCH_AMD64
2224 /*
2225 * We do a modified test here:
2226 * if (!(((cr4 & X86_CR4_OSFXSR) | cr0) ^ X86_CR4_OSFXSR)) { likely }
2227 * else { goto RaiseSseRelated; }
2228 * This ASSUMES that CR0[bit 9] is always zero. This is the case on
2229 * all targets except the 386, which doesn't support SSE, this should
2230 * be a safe assumption.
2231 */
2232 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6+3+3+7+7+6);
2233 //pCodeBuf[off++] = 0xcc;
2234 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, X86_CR4_OSFXSR); /* Isolate CR4.OSFXSR as CR4.TSD and */
2235 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxTmpReg, idxCr4Reg); /* CR4.DE would overlap the CR0 bits. */
2236 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, idxTmpReg, idxCr0Reg);
2237 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_CR0_EM | X86_CR0_TS | X86_CR4_OSFXSR);
2238 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_CR4_OSFXSR);
2239 off = iemNativeEmitJccTbExitEx(pReNative, pCodeBuf, off, kIemNativeLabelType_RaiseSseRelated, kIemNativeInstrCond_ne);
2240
2241#elif defined(RT_ARCH_ARM64)
2242 /*
2243 * We do a modified test here:
2244 * if (!((cr0 & (X86_CR0_EM | X86_CR0_TS)) | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1) ^ 1))) { likely }
2245 * else { goto RaiseSseRelated; }
2246 */
2247 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+5);
2248 //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
2249 Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - X86_CR0_EM_BIT) == (X86_CR0_EM | X86_CR0_TS));
2250 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxCr0Reg, 1, 32 - X86_CR0_EM_BIT, false /*f64Bit*/);
2251 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSFXSR_BIT, 1, false /*f64Bit*/);
2252 /* -> idxTmpReg[0]=OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
2253 Assert(Armv8A64ConvertImmRImmS2Mask32(0, 0) == 1);
2254 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 0, 0, false /*f64Bit*/);
2255 /* -> idxTmpReg[0]=~OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
2256 off = iemNativeEmitTestIfGprIsNotZeroAndTbExitEx(pReNative, pCodeBuf, off, idxTmpReg, false /*f64Bit*/,
2257 kIemNativeLabelType_RaiseSseRelated);
2258
2259#else
2260# error "Port me!"
2261#endif
2262
2263 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2264 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2265 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2266 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
2267
2268#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2269 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE;
2270 }
2271 else
2272 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeSseXcptCheckOmitted);
2273#endif
2274
2275 return off;
2276}
2277
2278
2279#define IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT() \
2280 off = iemNativeEmitMaybeRaiseAvxRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
2281
2282/**
2283 * Emits code to check if a AVX exception (either \#UD or \#NM) should be raised.
2284 *
2285 * @returns New code buffer offset, UINT32_MAX on failure.
2286 * @param pReNative The native recompile state.
2287 * @param off The code buffer offset.
2288 * @param idxInstr The current instruction.
2289 */
2290DECL_INLINE_THROW(uint32_t)
2291iemNativeEmitMaybeRaiseAvxRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2292{
2293#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2294 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeAvxXcptCheckPotential);
2295
2296 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX))
2297 {
2298#endif
2299 /*
2300 * Make sure we don't have any outstanding guest register writes as we may
2301 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
2302 */
2303 /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
2304 off = iemNativeRegFlushPendingWrites(pReNative, off);
2305
2306#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2307 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2308#else
2309 RT_NOREF(idxInstr);
2310#endif
2311
2312 /* Allocate a temporary CR0, CR4 and XCR0 register. */
2313 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
2314 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
2315 uint8_t const idxXcr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Xcr0);
2316 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2317
2318 /*
2319 * We have the following in IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT:
2320 * if (RT_LIKELY( ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE))
2321 * | (pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE)
2322 * | (pVCpu->cpum.GstCtx.cr0 & X86_CR0_TS))
2323 * == (XSAVE_C_YMM | XSAVE_C_SSE | X86_CR4_OSXSAVE)))
2324 * { likely }
2325 * else { goto RaiseAvxRelated; }
2326 */
2327#ifdef RT_ARCH_AMD64
2328 /* if (!( ( ((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) << 2)
2329 | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1) << 1)
2330 | ((cr0 >> X86_CR0_TS_BIT) & 1) )
2331 ^ 0x1a) ) { likely }
2332 else { goto RaiseAvxRelated; } */
2333 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6+3+5+3+5+3+7+6);
2334 //pCodeBuf[off++] = 0xcc;
2335 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, XSAVE_C_YMM | XSAVE_C_SSE);
2336 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxTmpReg, idxXcr0Reg);
2337 off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off, idxCr4Reg, X86_CR4_OSXSAVE_BIT);
2338 off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
2339 /* -> idxTmpReg[0]=CR4.OSXSAVE; idxTmpReg[1]=0; idxTmpReg[2]=SSE; idxTmpReg[3]=YMM; (the rest is zero) */
2340 off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off, idxCr0Reg, X86_CR0_TS_BIT);
2341 off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
2342 /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=SSE; idxTmpReg[4]=YMM; */
2343 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg, ((XSAVE_C_YMM | XSAVE_C_SSE) << 2) | 2);
2344 /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=~SSE; idxTmpReg[4]=~YMM; */
2345 off = iemNativeEmitJccTbExitEx(pReNative, pCodeBuf, off, kIemNativeLabelType_RaiseAvxRelated, kIemNativeInstrCond_ne);
2346
2347#elif defined(RT_ARCH_ARM64)
2348 /* if (!( (((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) | ((cr4 >> X86_CR4_OSFXSR_BIT) & 1)) ^ 7) << 1)
2349 | ((cr0 >> X86_CR0_TS_BIT) & 1) ) { likely }
2350 else { goto RaiseAvxRelated; } */
2351 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6);
2352 //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
2353 Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - XSAVE_C_SSE_BIT) == (XSAVE_C_YMM | XSAVE_C_SSE));
2354 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxXcr0Reg, 1, 32 - XSAVE_C_SSE_BIT, false /*f64Bit*/);
2355 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSXSAVE_BIT, 1, false /*f64Bit*/);
2356 /* -> idxTmpReg[0]=CR4.OSXSAVE; idxTmpReg[1]=SSE; idxTmpReg[2]=YMM; (the rest is zero) */
2357 Assert(Armv8A64ConvertImmRImmS2Mask32(2, 0) == 7);
2358 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 2, 0, false /*f64Bit*/);
2359 /* -> idxTmpReg[0]=~CR4.OSXSAVE; idxTmpReg[1]=~SSE; idxTmpReg[2]=~YMM; (the rest is zero) */
2360 pCodeBuf[off++] = Armv8A64MkInstrLslImm(idxTmpReg, idxTmpReg, 1, false /*f64Bit*/);
2361 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr0Reg, X86_CR0_TS_BIT, 1, false /*f64Bit*/);
2362 /* -> idxTmpReg[0]=CR0.TS; idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=~SSE; idxTmpReg[3]=~YMM; (the rest is zero) */
2363 off = iemNativeEmitTestIfGprIsNotZeroAndTbExitEx(pReNative, pCodeBuf, off, idxTmpReg, false /*f64Bit*/,
2364 kIemNativeLabelType_RaiseAvxRelated);
2365
2366#else
2367# error "Port me!"
2368#endif
2369
2370 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2371 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2372 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
2373 iemNativeRegFreeTmp(pReNative, idxXcr0Reg);
2374#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2375 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
2376 }
2377 else
2378 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeAvxXcptCheckOmitted);
2379#endif
2380
2381 return off;
2382}
2383
2384
2385#define IEM_MC_RAISE_DIVIDE_ERROR() \
2386 off = iemNativeEmitRaiseDivideError(pReNative, off, pCallEntry->idxInstr)
2387
2388/**
2389 * Emits code to raise a \#DE.
2390 *
2391 * @returns New code buffer offset, UINT32_MAX on failure.
2392 * @param pReNative The native recompile state.
2393 * @param off The code buffer offset.
2394 * @param idxInstr The current instruction.
2395 */
2396DECL_INLINE_THROW(uint32_t)
2397iemNativeEmitRaiseDivideError(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2398{
2399 /*
2400 * Make sure we don't have any outstanding guest register writes as we may
2401 */
2402 off = iemNativeRegFlushPendingWrites(pReNative, off);
2403
2404#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2405 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2406#else
2407 RT_NOREF(idxInstr);
2408#endif
2409
2410 /* raise \#DE exception unconditionally. */
2411 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_RaiseDe);
2412}
2413
2414
2415#define IEM_MC_RAISE_GP0_IF_EFF_ADDR_UNALIGNED(a_EffAddr, a_cbAlign) \
2416 off = iemNativeEmitRaiseGp0IfEffAddrUnaligned(pReNative, off, pCallEntry->idxInstr, a_EffAddr, a_cbAlign)
2417
2418/**
2419 * Emits code to raise a \#GP(0) if the given variable contains an unaligned address.
2420 *
2421 * @returns New code buffer offset, UINT32_MAX on failure.
2422 * @param pReNative The native recompile state.
2423 * @param off The code buffer offset.
2424 * @param idxInstr The current instruction.
2425 * @param idxVarEffAddr Index of the variable containing the effective address to check.
2426 * @param cbAlign The alignment in bytes to check against.
2427 */
2428DECL_INLINE_THROW(uint32_t)
2429iemNativeEmitRaiseGp0IfEffAddrUnaligned(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr,
2430 uint8_t idxVarEffAddr, uint8_t cbAlign)
2431{
2432 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEffAddr);
2433 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEffAddr, sizeof(RTGCPTR));
2434
2435 /*
2436 * Make sure we don't have any outstanding guest register writes as we may throw an exception.
2437 */
2438 off = iemNativeRegFlushPendingWrites(pReNative, off);
2439
2440#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2441 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2442#else
2443 RT_NOREF(idxInstr);
2444#endif
2445
2446 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarEffAddr, &off);
2447
2448 off = iemNativeEmitTestAnyBitsInGprAndTbExitIfAnySet(pReNative, off, idxVarReg, cbAlign - 1,
2449 kIemNativeLabelType_RaiseGp0);
2450
2451 iemNativeVarRegisterRelease(pReNative, idxVarEffAddr);
2452 return off;
2453}
2454
2455
2456/*********************************************************************************************************************************
2457* Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF) *
2458*********************************************************************************************************************************/
2459
2460/**
2461 * Pushes an IEM_MC_IF_XXX onto the condition stack.
2462 *
2463 * @returns Pointer to the condition stack entry on success, NULL on failure
2464 * (too many nestings)
2465 */
2466DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative, uint32_t *poff)
2467{
2468#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2469 *poff = iemNativeRegFlushPendingWrites(pReNative, *poff);
2470#endif
2471
2472 uint32_t const idxStack = pReNative->cCondDepth;
2473 AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
2474
2475 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
2476 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
2477
2478 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
2479 pEntry->fInElse = false;
2480 pEntry->fIfExitTb = false;
2481 pEntry->fElseExitTb = false;
2482 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
2483 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
2484
2485 return pEntry;
2486}
2487
2488
2489/**
2490 * Start of the if-block, snapshotting the register and variable state.
2491 */
2492DECL_INLINE_THROW(void)
2493iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
2494{
2495 Assert(offIfBlock != UINT32_MAX);
2496 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
2497 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
2498 Assert(!pEntry->fInElse);
2499
2500 /* Define the start of the IF block if request or for disassembly purposes. */
2501 if (idxLabelIf != UINT32_MAX)
2502 iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
2503#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2504 else
2505 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
2506#else
2507 RT_NOREF(offIfBlock);
2508#endif
2509
2510#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2511 Assert(pReNative->Core.offPc == 0);
2512#endif
2513
2514 /* Copy the initial state so we can restore it in the 'else' block. */
2515 pEntry->InitialState = pReNative->Core;
2516}
2517
2518
2519#define IEM_MC_ELSE() } while (0); \
2520 off = iemNativeEmitElse(pReNative, off); \
2521 do {
2522
2523/** Emits code related to IEM_MC_ELSE. */
2524DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2525{
2526 /* Check sanity and get the conditional stack entry. */
2527 Assert(off != UINT32_MAX);
2528 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
2529 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
2530 Assert(!pEntry->fInElse);
2531
2532#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2533 /* Writeback any dirty shadow registers. */
2534 /** @todo r=aeichner Possible optimization is to only writeback guest registers which became dirty
2535 * in one of the branches and leave guest registers already dirty before the start of the if
2536 * block alone. */
2537 off = iemNativeRegFlushDirtyGuest(pReNative, off);
2538#endif
2539
2540 /* Jump to the endif */
2541 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
2542
2543 /* Define the else label and enter the else part of the condition. */
2544 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
2545 pEntry->fInElse = true;
2546
2547#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2548 Assert(pReNative->Core.offPc == 0);
2549#endif
2550
2551 /* Snapshot the core state so we can do a merge at the endif and restore
2552 the snapshot we took at the start of the if-block. */
2553 pEntry->IfFinalState = pReNative->Core;
2554 pReNative->Core = pEntry->InitialState;
2555
2556 return off;
2557}
2558
2559
2560#define IEM_MC_ENDIF() } while (0); \
2561 off = iemNativeEmitEndIf(pReNative, off)
2562
2563/** Emits code related to IEM_MC_ENDIF. */
2564DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2565{
2566 /* Check sanity and get the conditional stack entry. */
2567 Assert(off != UINT32_MAX);
2568 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
2569 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
2570#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2571 Assert(pReNative->Core.offPc == 0);
2572#endif
2573
2574 /*
2575 * If either of the branches exited the TB, we can take the state from the
2576 * other branch and skip all the merging headache.
2577 */
2578 if (pEntry->fElseExitTb || pEntry->fIfExitTb)
2579 {
2580#ifdef VBOX_STRICT
2581 Assert(pReNative->cCondDepth == 1); /* Assuming this only happens in simple conditional structures. */
2582 Assert(pEntry->fElseExitTb != pEntry->fIfExitTb); /* Assuming we don't have any code where both branches exits. */
2583 PCIEMNATIVECORESTATE const pExitCoreState = pEntry->fIfExitTb && pEntry->fInElse
2584 ? &pEntry->IfFinalState : &pReNative->Core;
2585# ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2586 Assert(pExitCoreState->bmGstRegShadowDirty == 0);
2587# endif
2588# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2589 Assert(pExitCoreState->offPc == 0);
2590# endif
2591 RT_NOREF(pExitCoreState);
2592#endif
2593
2594 if (!pEntry->fIfExitTb)
2595 {
2596 Assert(pEntry->fInElse);
2597 pReNative->Core = pEntry->IfFinalState;
2598 }
2599 }
2600 else
2601 {
2602#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2603 /* Writeback any dirty shadow registers (else branch). */
2604 /** @todo r=aeichner Possible optimization is to only writeback guest registers which became dirty
2605 * in one of the branches and leave guest registers already dirty before the start of the if
2606 * block alone. */
2607 off = iemNativeRegFlushDirtyGuest(pReNative, off);
2608#endif
2609
2610 /*
2611 * Now we have find common group with the core state at the end of the
2612 * if-final. Use the smallest common denominator and just drop anything
2613 * that isn't the same in both states.
2614 */
2615 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
2616 * which is why we're doing this at the end of the else-block.
2617 * But we'd need more info about future for that to be worth the effort. */
2618 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
2619#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2620 Assert( pOther->bmGstRegShadowDirty == 0
2621 && pReNative->Core.bmGstRegShadowDirty == 0);
2622#endif
2623
2624 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
2625 {
2626 /* shadow guest stuff first. */
2627 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
2628 if (fGstRegs)
2629 {
2630 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
2631 do
2632 {
2633 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
2634 fGstRegs &= ~RT_BIT_64(idxGstReg);
2635
2636 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
2637 if ( !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg))
2638 || idxHstReg != pOther->aidxGstRegShadows[idxGstReg])
2639 {
2640 Log12(("iemNativeEmitEndIf: dropping gst %s from hst %s\n",
2641 g_aGstShadowInfo[idxGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg]));
2642
2643#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2644 /* Writeback any dirty shadow registers we are about to unshadow. */
2645 off = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, off, idxHstReg);
2646#endif
2647 iemNativeRegClearGstRegShadowing(pReNative, idxHstReg, off);
2648 }
2649 } while (fGstRegs);
2650 }
2651 else
2652 {
2653 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
2654#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2655 Assert(pReNative->Core.bmGstRegShadowDirty == 0);
2656#endif
2657 }
2658
2659 /* Check variables next. For now we must require them to be identical
2660 or stuff we can recreate. */
2661 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
2662 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
2663 if (fVars)
2664 {
2665 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
2666 do
2667 {
2668 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
2669 fVars &= ~RT_BIT_32(idxVar);
2670
2671 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
2672 {
2673 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
2674 continue;
2675 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
2676 {
2677 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
2678 if (idxHstReg != UINT8_MAX)
2679 {
2680 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
2681 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
2682 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x\n",
2683 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
2684 }
2685 continue;
2686 }
2687 }
2688 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
2689 continue;
2690
2691 /* Irreconcilable, so drop it. */
2692 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
2693 if (idxHstReg != UINT8_MAX)
2694 {
2695 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
2696 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
2697 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x (also dropped)\n",
2698 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
2699 }
2700 Log11(("iemNativeEmitEndIf: Freeing variable #%u/%#x\n", idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
2701 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
2702 } while (fVars);
2703 }
2704
2705 /* Finally, check that the host register allocations matches. */
2706 AssertMsgStmt(pReNative->Core.bmHstRegs == pOther->bmHstRegs,
2707 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
2708 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
2709 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
2710 }
2711 }
2712
2713 /*
2714 * Define the endif label and maybe the else one if we're still in the 'if' part.
2715 */
2716 if (!pEntry->fInElse)
2717 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
2718 else
2719 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
2720 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
2721
2722 /* Pop the conditional stack.*/
2723 pReNative->cCondDepth -= 1;
2724
2725 return off;
2726}
2727
2728
2729#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
2730 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits)); \
2731 do {
2732
2733/** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
2734DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
2735{
2736 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitsInEfl);
2737 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2738
2739 /* Get the eflags. */
2740 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
2741 kIemNativeGstRegUse_ReadOnly);
2742
2743 /* Test and jump. */
2744 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
2745
2746 /* Free but don't flush the EFlags register. */
2747 iemNativeRegFreeTmp(pReNative, idxEflReg);
2748
2749 /* Make a copy of the core state now as we start the if-block. */
2750 iemNativeCondStartIfBlock(pReNative, off);
2751
2752 return off;
2753}
2754
2755
2756#define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
2757 off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits)); \
2758 do {
2759
2760/** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
2761DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
2762{
2763 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitsInEfl);
2764 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2765
2766 /* Get the eflags. */
2767 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
2768 kIemNativeGstRegUse_ReadOnly);
2769
2770 /* Test and jump. */
2771 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
2772
2773 /* Free but don't flush the EFlags register. */
2774 iemNativeRegFreeTmp(pReNative, idxEflReg);
2775
2776 /* Make a copy of the core state now as we start the if-block. */
2777 iemNativeCondStartIfBlock(pReNative, off);
2778
2779 return off;
2780}
2781
2782
2783#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
2784 off = iemNativeEmitIfEflagsBitSet(pReNative, off, (a_fBit)); \
2785 do {
2786
2787/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
2788DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
2789{
2790 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
2791 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2792
2793 /* Get the eflags. */
2794 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
2795 kIemNativeGstRegUse_ReadOnly);
2796
2797 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
2798 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
2799
2800 /* Test and jump. */
2801 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
2802
2803 /* Free but don't flush the EFlags register. */
2804 iemNativeRegFreeTmp(pReNative, idxEflReg);
2805
2806 /* Make a copy of the core state now as we start the if-block. */
2807 iemNativeCondStartIfBlock(pReNative, off);
2808
2809 return off;
2810}
2811
2812
2813#define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
2814 off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, (a_fBit)); \
2815 do {
2816
2817/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
2818DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
2819{
2820 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
2821 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2822
2823 /* Get the eflags. */
2824 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
2825 kIemNativeGstRegUse_ReadOnly);
2826
2827 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
2828 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
2829
2830 /* Test and jump. */
2831 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
2832
2833 /* Free but don't flush the EFlags register. */
2834 iemNativeRegFreeTmp(pReNative, idxEflReg);
2835
2836 /* Make a copy of the core state now as we start the if-block. */
2837 iemNativeCondStartIfBlock(pReNative, off);
2838
2839 return off;
2840}
2841
2842
2843#define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2) \
2844 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, false /*fInverted*/); \
2845 do {
2846
2847#define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2) \
2848 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, true /*fInverted*/); \
2849 do {
2850
2851/** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
2852DECL_INLINE_THROW(uint32_t)
2853iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
2854 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
2855{
2856 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBit1InEfl | fBit2InEfl);
2857 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2858
2859 /* Get the eflags. */
2860 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
2861 kIemNativeGstRegUse_ReadOnly);
2862
2863 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
2864 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
2865
2866 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
2867 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
2868 Assert(iBitNo1 != iBitNo2);
2869
2870#ifdef RT_ARCH_AMD64
2871 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl);
2872
2873 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
2874 if (iBitNo1 > iBitNo2)
2875 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
2876 else
2877 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
2878 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
2879
2880#elif defined(RT_ARCH_ARM64)
2881 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2882 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2883
2884 /* and tmpreg, eflreg, #1<<iBitNo1 */
2885 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
2886
2887 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
2888 if (iBitNo1 > iBitNo2)
2889 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
2890 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
2891 else
2892 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
2893 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
2894
2895 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2896
2897#else
2898# error "Port me"
2899#endif
2900
2901 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
2902 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
2903 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
2904
2905 /* Free but don't flush the EFlags and tmp registers. */
2906 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2907 iemNativeRegFreeTmp(pReNative, idxEflReg);
2908
2909 /* Make a copy of the core state now as we start the if-block. */
2910 iemNativeCondStartIfBlock(pReNative, off);
2911
2912 return off;
2913}
2914
2915
2916#define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
2917 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, false /*fInverted*/); \
2918 do {
2919
2920#define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
2921 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, true /*fInverted*/); \
2922 do {
2923
2924/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
2925 * IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
2926DECL_INLINE_THROW(uint32_t)
2927iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl,
2928 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
2929{
2930 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl | fBit1InEfl | fBit2InEfl);
2931 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2932
2933 /* We need an if-block label for the non-inverted variant. */
2934 uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
2935 pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
2936
2937 /* Get the eflags. */
2938 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
2939 kIemNativeGstRegUse_ReadOnly);
2940
2941 /* Translate the flag masks to bit numbers. */
2942 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
2943 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
2944
2945 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
2946 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
2947 Assert(iBitNo1 != iBitNo);
2948
2949 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
2950 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
2951 Assert(iBitNo2 != iBitNo);
2952 Assert(iBitNo2 != iBitNo1);
2953
2954#ifdef RT_ARCH_AMD64
2955 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl); /* This must come before we jump anywhere! */
2956#elif defined(RT_ARCH_ARM64)
2957 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2958#endif
2959
2960 /* Check for the lone bit first. */
2961 if (!fInverted)
2962 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
2963 else
2964 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
2965
2966 /* Then extract and compare the other two bits. */
2967#ifdef RT_ARCH_AMD64
2968 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
2969 if (iBitNo1 > iBitNo2)
2970 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
2971 else
2972 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
2973 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
2974
2975#elif defined(RT_ARCH_ARM64)
2976 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2977
2978 /* and tmpreg, eflreg, #1<<iBitNo1 */
2979 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
2980
2981 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
2982 if (iBitNo1 > iBitNo2)
2983 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
2984 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
2985 else
2986 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
2987 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
2988
2989 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2990
2991#else
2992# error "Port me"
2993#endif
2994
2995 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
2996 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
2997 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
2998
2999 /* Free but don't flush the EFlags and tmp registers. */
3000 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3001 iemNativeRegFreeTmp(pReNative, idxEflReg);
3002
3003 /* Make a copy of the core state now as we start the if-block. */
3004 iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
3005
3006 return off;
3007}
3008
3009
3010#define IEM_MC_IF_CX_IS_NZ() \
3011 off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
3012 do {
3013
3014/** Emits code for IEM_MC_IF_CX_IS_NZ. */
3015DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3016{
3017 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
3018
3019 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3020 kIemNativeGstRegUse_ReadOnly);
3021 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
3022 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3023
3024 iemNativeCondStartIfBlock(pReNative, off);
3025 return off;
3026}
3027
3028
3029#define IEM_MC_IF_ECX_IS_NZ() \
3030 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
3031 do {
3032
3033#define IEM_MC_IF_RCX_IS_NZ() \
3034 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
3035 do {
3036
3037/** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
3038DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
3039{
3040 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
3041
3042 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3043 kIemNativeGstRegUse_ReadOnly);
3044 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
3045 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3046
3047 iemNativeCondStartIfBlock(pReNative, off);
3048 return off;
3049}
3050
3051
3052#define IEM_MC_IF_CX_IS_NOT_ONE() \
3053 off = iemNativeEmitIfCxIsNotOne(pReNative, off); \
3054 do {
3055
3056/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE. */
3057DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3058{
3059 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
3060
3061 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3062 kIemNativeGstRegUse_ReadOnly);
3063#ifdef RT_ARCH_AMD64
3064 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3065#else
3066 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3067 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
3068 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3069#endif
3070 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3071
3072 iemNativeCondStartIfBlock(pReNative, off);
3073 return off;
3074}
3075
3076
3077#define IEM_MC_IF_ECX_IS_NOT_ONE() \
3078 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, false /*f64Bit*/); \
3079 do {
3080
3081#define IEM_MC_IF_RCX_IS_NOT_ONE() \
3082 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, true /*f64Bit*/); \
3083 do {
3084
3085/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE and IEM_MC_IF_RCX_IS_NOT_ONE. */
3086DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
3087{
3088 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
3089
3090 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3091 kIemNativeGstRegUse_ReadOnly);
3092 if (f64Bit)
3093 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3094 else
3095 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3096 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3097
3098 iemNativeCondStartIfBlock(pReNative, off);
3099 return off;
3100}
3101
3102
3103#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
3104 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/); \
3105 do {
3106
3107#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3108 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/); \
3109 do {
3110
3111/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET and
3112 * IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
3113DECL_INLINE_THROW(uint32_t)
3114iemNativeEmitIfCxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl, bool fCheckIfSet)
3115{
3116 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
3117 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
3118
3119 /* We have to load both RCX and EFLAGS before we can start branching,
3120 otherwise we'll end up in the else-block with an inconsistent
3121 register allocator state.
3122 Doing EFLAGS first as it's more likely to be loaded, right? */
3123 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
3124 kIemNativeGstRegUse_ReadOnly);
3125 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3126 kIemNativeGstRegUse_ReadOnly);
3127
3128 /** @todo we could reduce this to a single branch instruction by spending a
3129 * temporary register and some setnz stuff. Not sure if loops are
3130 * worth it. */
3131 /* Check CX. */
3132#ifdef RT_ARCH_AMD64
3133 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3134#else
3135 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3136 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
3137 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3138#endif
3139
3140 /* Check the EFlags bit. */
3141 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
3142 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
3143 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
3144 !fCheckIfSet /*fJmpIfSet*/);
3145
3146 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3147 iemNativeRegFreeTmp(pReNative, idxEflReg);
3148
3149 iemNativeCondStartIfBlock(pReNative, off);
3150 return off;
3151}
3152
3153
3154#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
3155 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, false /*f64Bit*/); \
3156 do {
3157
3158#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3159 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, false /*f64Bit*/); \
3160 do {
3161
3162#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
3163 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, true /*f64Bit*/); \
3164 do {
3165
3166#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3167 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, true /*f64Bit*/); \
3168 do {
3169
3170/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET,
3171 * IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET,
3172 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET and
3173 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
3174DECL_INLINE_THROW(uint32_t)
3175iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3176 uint32_t fBitInEfl, bool fCheckIfSet, bool f64Bit)
3177{
3178 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
3179 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
3180
3181 /* We have to load both RCX and EFLAGS before we can start branching,
3182 otherwise we'll end up in the else-block with an inconsistent
3183 register allocator state.
3184 Doing EFLAGS first as it's more likely to be loaded, right? */
3185 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
3186 kIemNativeGstRegUse_ReadOnly);
3187 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3188 kIemNativeGstRegUse_ReadOnly);
3189
3190 /** @todo we could reduce this to a single branch instruction by spending a
3191 * temporary register and some setnz stuff. Not sure if loops are
3192 * worth it. */
3193 /* Check RCX/ECX. */
3194 if (f64Bit)
3195 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3196 else
3197 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3198
3199 /* Check the EFlags bit. */
3200 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
3201 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
3202 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
3203 !fCheckIfSet /*fJmpIfSet*/);
3204
3205 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3206 iemNativeRegFreeTmp(pReNative, idxEflReg);
3207
3208 iemNativeCondStartIfBlock(pReNative, off);
3209 return off;
3210}
3211
3212
3213#define IEM_MC_IF_LOCAL_IS_Z(a_Local) \
3214 off = iemNativeEmitIfLocalIsZ(pReNative, off, a_Local); \
3215 do {
3216
3217/** Emits code for IEM_MC_IF_LOCAL_IS_Z. */
3218DECL_INLINE_THROW(uint32_t)
3219iemNativeEmitIfLocalIsZ(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarLocal)
3220{
3221 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
3222
3223 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarLocal);
3224 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarLocal)];
3225 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
3226 AssertStmt(pVarRc->cbVar == sizeof(int32_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
3227
3228 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarLocal, &off);
3229
3230 off = iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(pReNative, off, idxReg, false /*f64Bit*/, pEntry->idxLabelElse);
3231
3232 iemNativeVarRegisterRelease(pReNative, idxVarLocal);
3233
3234 iemNativeCondStartIfBlock(pReNative, off);
3235 return off;
3236}
3237
3238
3239#define IEM_MC_IF_GREG_BIT_SET(a_iGReg, a_iBitNo) \
3240 off = iemNativeEmitIfGregBitSet(pReNative, off, a_iGReg, a_iBitNo); \
3241 do {
3242
3243/** Emits code for IEM_MC_IF_GREG_BIT_SET. */
3244DECL_INLINE_THROW(uint32_t)
3245iemNativeEmitIfGregBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t iBitNo)
3246{
3247 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
3248 Assert(iGReg < 16);
3249
3250 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3251 kIemNativeGstRegUse_ReadOnly);
3252
3253 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxGstFullReg, iBitNo, pEntry->idxLabelElse);
3254
3255 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3256
3257 iemNativeCondStartIfBlock(pReNative, off);
3258 return off;
3259}
3260
3261
3262
3263/*********************************************************************************************************************************
3264* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
3265*********************************************************************************************************************************/
3266
3267#define IEM_MC_NOREF(a_Name) \
3268 RT_NOREF_PV(a_Name)
3269
3270#define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
3271 uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
3272
3273#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
3274 uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
3275
3276#define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_Local, a_iArg) \
3277 uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
3278
3279#define IEM_MC_LOCAL(a_Type, a_Name) \
3280 uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
3281
3282#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
3283 uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
3284
3285#define IEM_MC_LOCAL_ASSIGN(a_Type, a_Name, a_Value) \
3286 uint8_t const a_Name = iemNativeVarAllocAssign(pReNative, &off, sizeof(a_Type), (a_Value))
3287
3288
3289/**
3290 * Sets the host register for @a idxVarRc to @a idxReg.
3291 *
3292 * The register must not be allocated. Any guest register shadowing will be
3293 * implictly dropped by this call.
3294 *
3295 * The variable must not have any register associated with it (causes
3296 * VERR_IEM_VAR_IPE_10 to be raised). Conversion to a stack variable is
3297 * implied.
3298 *
3299 * @returns idxReg
3300 * @param pReNative The recompiler state.
3301 * @param idxVar The variable.
3302 * @param idxReg The host register (typically IEMNATIVE_CALL_RET_GREG).
3303 * @param off For recording in debug info.
3304 *
3305 * @throws VERR_IEM_VAR_IPE_10, VERR_IEM_VAR_IPE_11
3306 */
3307DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off)
3308{
3309 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3310 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3311 Assert(!pVar->fRegAcquired);
3312 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3313 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_10));
3314 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_11));
3315
3316 iemNativeRegClearGstRegShadowing(pReNative, idxReg, off);
3317 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
3318
3319 iemNativeVarSetKindToStack(pReNative, idxVar);
3320 pVar->idxReg = idxReg;
3321
3322 return idxReg;
3323}
3324
3325
3326/**
3327 * A convenient helper function.
3328 */
3329DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSetAndAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
3330 uint8_t idxReg, uint32_t *poff)
3331{
3332 idxReg = iemNativeVarRegisterSet(pReNative, idxVar, idxReg, *poff);
3333 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fRegAcquired = true;
3334 return idxReg;
3335}
3336
3337
3338/**
3339 * This is called by IEM_MC_END() to clean up all variables.
3340 */
3341DECL_FORCE_INLINE(void) iemNativeVarFreeAll(PIEMRECOMPILERSTATE pReNative)
3342{
3343 uint32_t const bmVars = pReNative->Core.bmVars;
3344 if (bmVars != 0)
3345 iemNativeVarFreeAllSlow(pReNative, bmVars);
3346 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
3347 Assert(pReNative->Core.bmStack == 0);
3348}
3349
3350
3351#define IEM_MC_FREE_LOCAL(a_Name) iemNativeVarFreeLocal(pReNative, a_Name)
3352
3353/**
3354 * This is called by IEM_MC_FREE_LOCAL.
3355 */
3356DECLINLINE(void) iemNativeVarFreeLocal(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
3357{
3358 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3359 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo == UINT8_MAX);
3360 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
3361}
3362
3363
3364#define IEM_MC_FREE_ARG(a_Name) iemNativeVarFreeArg(pReNative, a_Name)
3365
3366/**
3367 * This is called by IEM_MC_FREE_ARG.
3368 */
3369DECLINLINE(void) iemNativeVarFreeArg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
3370{
3371 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3372 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars));
3373 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
3374}
3375
3376
3377#define IEM_MC_ASSIGN_TO_SMALLER(a_VarDst, a_VarSrcEol) off = iemNativeVarAssignToSmaller(pReNative, off, a_VarDst, a_VarSrcEol)
3378
3379/**
3380 * This is called by IEM_MC_ASSIGN_TO_SMALLER.
3381 */
3382DECL_INLINE_THROW(uint32_t)
3383iemNativeVarAssignToSmaller(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarDst, uint8_t idxVarSrc)
3384{
3385 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarDst);
3386 PIEMNATIVEVAR const pVarDst = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarDst)];
3387 AssertStmt(pVarDst->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
3388 Assert( pVarDst->cbVar == sizeof(uint16_t)
3389 || pVarDst->cbVar == sizeof(uint32_t));
3390
3391 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarSrc);
3392 PIEMNATIVEVAR const pVarSrc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarSrc)];
3393 AssertStmt( pVarSrc->enmKind == kIemNativeVarKind_Stack
3394 || pVarSrc->enmKind == kIemNativeVarKind_Immediate,
3395 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
3396
3397 Assert(pVarDst->cbVar < pVarSrc->cbVar);
3398
3399 /*
3400 * Special case for immediates.
3401 */
3402 if (pVarSrc->enmKind == kIemNativeVarKind_Immediate)
3403 {
3404 switch (pVarDst->cbVar)
3405 {
3406 case sizeof(uint16_t):
3407 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint16_t)pVarSrc->u.uValue);
3408 break;
3409 case sizeof(uint32_t):
3410 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint32_t)pVarSrc->u.uValue);
3411 break;
3412 default: AssertFailed(); break;
3413 }
3414 }
3415 else
3416 {
3417 /*
3418 * The generic solution for now.
3419 */
3420 /** @todo optimize this by having the python script make sure the source
3421 * variable passed to IEM_MC_ASSIGN_TO_SMALLER is not used after the
3422 * statement. Then we could just transfer the register assignments. */
3423 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off);
3424 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off);
3425 switch (pVarDst->cbVar)
3426 {
3427 case sizeof(uint16_t):
3428 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegDst, idxRegSrc);
3429 break;
3430 case sizeof(uint32_t):
3431 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegDst, idxRegSrc);
3432 break;
3433 default: AssertFailed(); break;
3434 }
3435 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
3436 iemNativeVarRegisterRelease(pReNative, idxVarDst);
3437 }
3438 return off;
3439}
3440
3441
3442
3443/*********************************************************************************************************************************
3444* Emitters for IEM_MC_CALL_CIMPL_XXX *
3445*********************************************************************************************************************************/
3446
3447/** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
3448DECL_INLINE_THROW(uint32_t)
3449iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
3450 uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cArgs)
3451
3452{
3453 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
3454
3455#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3456 /* Clear the appropriate IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_XXX flags
3457 when a calls clobber any of the relevant control registers. */
3458# if 1
3459 if (!(fGstShwFlush & (RT_BIT_64(kIemNativeGstReg_Cr0) | RT_BIT_64(kIemNativeGstReg_Cr4) | RT_BIT_64(kIemNativeGstReg_Xcr0))))
3460 {
3461 /* Likely as long as call+ret are done via cimpl. */
3462 Assert( /*pfnCImpl != (uintptr_t)iemCImpl_mov_Cd_Rd && pfnCImpl != (uintptr_t)iemCImpl_xsetbv
3463 &&*/ pfnCImpl != (uintptr_t)iemCImpl_lmsw && pfnCImpl != (uintptr_t)iemCImpl_clts);
3464 }
3465 else if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_Xcr0))
3466 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
3467 else if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_Cr4))
3468 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
3469 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE);
3470 else
3471 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
3472 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
3473 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
3474
3475# else
3476 if (pfnCImpl == (uintptr_t)iemCImpl_xsetbv) /* Modifies xcr0 which only the AVX check uses. */
3477 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
3478 else if (pfnCImpl == (uintptr_t)iemCImpl_mov_Cd_Rd) /* Can modify cr4 which all checks use. */
3479 pReNative->fSimdRaiseXcptChecksEmitted = 0;
3480 else if ( pfnCImpl == (uintptr_t)iemCImpl_FarJmp
3481 || pfnCImpl == (uintptr_t)iemCImpl_callf
3482 || pfnCImpl == (uintptr_t)iemCImpl_lmsw
3483 || pfnCImpl == (uintptr_t)iemCImpl_clts) /* Will only modify cr0 */
3484 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
3485 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
3486 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
3487# endif
3488
3489# ifdef IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS
3490 /* Mark the host floating point control register as not synced if MXCSR is modified. */
3491 if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_MxCsr))
3492 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SYNCED;
3493# endif
3494#endif
3495
3496 /*
3497 * Do all the call setup and cleanup.
3498 */
3499 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
3500
3501 /*
3502 * Load the two or three hidden arguments.
3503 */
3504#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
3505 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
3506 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3507 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
3508#else
3509 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
3510 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
3511#endif
3512
3513 /*
3514 * Make the call and check the return code.
3515 *
3516 * Shadow PC copies are always flushed here, other stuff depends on flags.
3517 * Segment and general purpose registers are explictily flushed via the
3518 * IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG and IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG
3519 * macros.
3520 */
3521 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
3522#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
3523 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
3524#endif
3525 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, fGstShwFlush | RT_BIT_64(kIemNativeGstReg_Pc));
3526 if (!(pReNative->fMc & IEM_MC_F_WITHOUT_FLAGS)) /** @todo We don't emit with-flags/without-flags variations for CIMPL calls. */
3527 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
3528 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
3529
3530 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
3531}
3532
3533
3534#define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
3535 off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0)
3536
3537/** Emits code for IEM_MC_CALL_CIMPL_1. */
3538DECL_INLINE_THROW(uint32_t)
3539iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
3540 uintptr_t pfnCImpl, uint8_t idxArg0)
3541{
3542 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
3543 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 1);
3544}
3545
3546
3547#define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
3548 off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0, a1)
3549
3550/** Emits code for IEM_MC_CALL_CIMPL_2. */
3551DECL_INLINE_THROW(uint32_t)
3552iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
3553 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
3554{
3555 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
3556 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
3557 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 2);
3558}
3559
3560
3561#define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
3562 off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
3563 (uintptr_t)a_pfnCImpl, a0, a1, a2)
3564
3565/** Emits code for IEM_MC_CALL_CIMPL_3. */
3566DECL_INLINE_THROW(uint32_t)
3567iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
3568 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
3569{
3570 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
3571 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
3572 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
3573 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 3);
3574}
3575
3576
3577#define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3) \
3578 off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
3579 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
3580
3581/** Emits code for IEM_MC_CALL_CIMPL_4. */
3582DECL_INLINE_THROW(uint32_t)
3583iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
3584 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
3585{
3586 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
3587 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
3588 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
3589 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
3590 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 4);
3591}
3592
3593
3594#define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3, a4) \
3595 off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
3596 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
3597
3598/** Emits code for IEM_MC_CALL_CIMPL_4. */
3599DECL_INLINE_THROW(uint32_t)
3600iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
3601 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
3602{
3603 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
3604 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
3605 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
3606 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
3607 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg4, 4 + IEM_CIMPL_HIDDEN_ARGS);
3608 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 5);
3609}
3610
3611
3612/** Recompiler debugging: Flush guest register shadow copies. */
3613#define IEM_MC_HINT_FLUSH_GUEST_SHADOW(g_fGstShwFlush) iemNativeRegFlushGuestShadows(pReNative, g_fGstShwFlush)
3614
3615
3616
3617/*********************************************************************************************************************************
3618* Emitters for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX *
3619*********************************************************************************************************************************/
3620
3621/**
3622 * Common worker for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX.
3623 */
3624DECL_INLINE_THROW(uint32_t)
3625iemNativeEmitCallAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
3626 uintptr_t pfnAImpl, uint8_t cArgs)
3627{
3628 if (idxVarRc != UINT8_MAX)
3629 {
3630 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRc);
3631 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarRc)];
3632 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
3633 AssertStmt(pVarRc->cbVar <= sizeof(uint64_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
3634 }
3635
3636 /*
3637 * Do all the call setup and cleanup.
3638 *
3639 * It is only required to flush pending guest register writes in call volatile registers as
3640 * assembly helpers can't throw and don't access anything living in CPUMCTX, they only
3641 * access parameters. The flushing of call volatile registers is always done in iemNativeEmitCallCommon()
3642 * no matter the fFlushPendingWrites parameter.
3643 */
3644 off = iemNativeEmitCallCommon(pReNative, off, cArgs, 0 /*cHiddenArgs*/, false /*fFlushPendingWrites*/);
3645
3646 /*
3647 * Make the call and update the return code variable if we've got one.
3648 */
3649 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
3650 if (idxVarRc != UINT8_MAX)
3651 iemNativeVarRegisterSet(pReNative, idxVarRc, IEMNATIVE_CALL_RET_GREG, off);
3652
3653 return off;
3654}
3655
3656
3657
3658#define IEM_MC_CALL_VOID_AIMPL_0(a_pfn) \
3659 off = iemNativeEmitCallAImpl0(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn))
3660
3661#define IEM_MC_CALL_AIMPL_0(a_rc, a_pfn) \
3662 off = iemNativeEmitCallAImpl0(pReNative, off, a_rc, (uintptr_t)(a_pfn))
3663
3664/** Emits code for IEM_MC_CALL_VOID_AIMPL_0 and IEM_MC_CALL_AIMPL_0. */
3665DECL_INLINE_THROW(uint32_t)
3666iemNativeEmitCallAImpl0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl)
3667{
3668 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 0);
3669}
3670
3671
3672#define IEM_MC_CALL_VOID_AIMPL_1(a_pfn, a0) \
3673 off = iemNativeEmitCallAImpl1(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0)
3674
3675#define IEM_MC_CALL_AIMPL_1(a_rc, a_pfn, a0) \
3676 off = iemNativeEmitCallAImpl1(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0)
3677
3678/** Emits code for IEM_MC_CALL_VOID_AIMPL_1 and IEM_MC_CALL_AIMPL_1. */
3679DECL_INLINE_THROW(uint32_t)
3680iemNativeEmitCallAImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl, uint8_t idxArg0)
3681{
3682 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
3683 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 1);
3684}
3685
3686
3687#define IEM_MC_CALL_VOID_AIMPL_2(a_pfn, a0, a1) \
3688 off = iemNativeEmitCallAImpl2(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1)
3689
3690#define IEM_MC_CALL_AIMPL_2(a_rc, a_pfn, a0, a1) \
3691 off = iemNativeEmitCallAImpl2(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1)
3692
3693/** Emits code for IEM_MC_CALL_VOID_AIMPL_2 and IEM_MC_CALL_AIMPL_2. */
3694DECL_INLINE_THROW(uint32_t)
3695iemNativeEmitCallAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
3696 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
3697{
3698 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
3699 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
3700 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 2);
3701}
3702
3703
3704#define IEM_MC_CALL_VOID_AIMPL_3(a_pfn, a0, a1, a2) \
3705 off = iemNativeEmitCallAImpl3(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2)
3706
3707#define IEM_MC_CALL_AIMPL_3(a_rcType, a_rc, a_pfn, a0, a1, a2) \
3708 IEM_MC_LOCAL(a_rcType, a_rc); \
3709 off = iemNativeEmitCallAImpl3(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2)
3710
3711/** Emits code for IEM_MC_CALL_VOID_AIMPL_3 and IEM_MC_CALL_AIMPL_3. */
3712DECL_INLINE_THROW(uint32_t)
3713iemNativeEmitCallAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
3714 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
3715{
3716 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
3717 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
3718 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
3719 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 3);
3720}
3721
3722
3723#define IEM_MC_CALL_VOID_AIMPL_4(a_pfn, a0, a1, a2, a3) \
3724 off = iemNativeEmitCallAImpl4(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2, a3)
3725
3726#define IEM_MC_CALL_AIMPL_4(a_rcType, a_rc, a_pfn, a0, a1, a2, a3) \
3727 IEM_MC_LOCAL(a_rcType, a_rc); \
3728 off = iemNativeEmitCallAImpl4(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2, a3)
3729
3730/** Emits code for IEM_MC_CALL_VOID_AIMPL_4 and IEM_MC_CALL_AIMPL_4. */
3731DECL_INLINE_THROW(uint32_t)
3732iemNativeEmitCallAImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
3733 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
3734{
3735 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
3736 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
3737 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
3738 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3);
3739 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 4);
3740}
3741
3742
3743
3744/*********************************************************************************************************************************
3745* Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX). *
3746*********************************************************************************************************************************/
3747
3748#define IEM_MC_FETCH_GREG_U8_THREADED(a_u8Dst, a_iGRegEx) \
3749 off = iemNativeEmitFetchGregU8(pReNative, off, a_u8Dst, a_iGRegEx, sizeof(uint8_t) /*cbZeroExtended*/)
3750
3751#define IEM_MC_FETCH_GREG_U8_ZX_U16_THREADED(a_u16Dst, a_iGRegEx) \
3752 off = iemNativeEmitFetchGregU8(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t) /*cbZeroExtended*/)
3753
3754#define IEM_MC_FETCH_GREG_U8_ZX_U32_THREADED(a_u32Dst, a_iGRegEx) \
3755 off = iemNativeEmitFetchGregU8(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t) /*cbZeroExtended*/)
3756
3757#define IEM_MC_FETCH_GREG_U8_ZX_U64_THREADED(a_u64Dst, a_iGRegEx) \
3758 off = iemNativeEmitFetchGregU8(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t) /*cbZeroExtended*/)
3759
3760
3761/** Emits code for IEM_MC_FETCH_GREG_U8_THREADED and
3762 * IEM_MC_FETCH_GREG_U8_ZX_U16/32/64_THREADED. */
3763DECL_INLINE_THROW(uint32_t)
3764iemNativeEmitFetchGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, int8_t cbZeroExtended)
3765{
3766 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3767 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
3768 Assert(iGRegEx < 20);
3769
3770 /* Same discussion as in iemNativeEmitFetchGregU16 */
3771 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
3772 kIemNativeGstRegUse_ReadOnly);
3773
3774 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3775 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3776
3777 /* The value is zero-extended to the full 64-bit host register width. */
3778 if (iGRegEx < 16)
3779 off = iemNativeEmitLoadGprFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
3780 else
3781 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
3782
3783 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3784 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3785 return off;
3786}
3787
3788
3789#define IEM_MC_FETCH_GREG_U8_SX_U16_THREADED(a_u16Dst, a_iGRegEx) \
3790 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t))
3791
3792#define IEM_MC_FETCH_GREG_U8_SX_U32_THREADED(a_u32Dst, a_iGRegEx) \
3793 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t))
3794
3795#define IEM_MC_FETCH_GREG_U8_SX_U64_THREADED(a_u64Dst, a_iGRegEx) \
3796 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t))
3797
3798/** Emits code for IEM_MC_FETCH_GREG_U8_SX_U16/32/64_THREADED. */
3799DECL_INLINE_THROW(uint32_t)
3800iemNativeEmitFetchGregU8Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, uint8_t cbSignExtended)
3801{
3802 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3803 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
3804 Assert(iGRegEx < 20);
3805
3806 /* Same discussion as in iemNativeEmitFetchGregU16 */
3807 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
3808 kIemNativeGstRegUse_ReadOnly);
3809
3810 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3811 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3812
3813 if (iGRegEx < 16)
3814 {
3815 switch (cbSignExtended)
3816 {
3817 case sizeof(uint16_t):
3818 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
3819 break;
3820 case sizeof(uint32_t):
3821 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
3822 break;
3823 case sizeof(uint64_t):
3824 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
3825 break;
3826 default: AssertFailed(); break;
3827 }
3828 }
3829 else
3830 {
3831 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
3832 switch (cbSignExtended)
3833 {
3834 case sizeof(uint16_t):
3835 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
3836 break;
3837 case sizeof(uint32_t):
3838 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
3839 break;
3840 case sizeof(uint64_t):
3841 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
3842 break;
3843 default: AssertFailed(); break;
3844 }
3845 }
3846
3847 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3848 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3849 return off;
3850}
3851
3852
3853
3854#define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
3855 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint16_t))
3856
3857#define IEM_MC_FETCH_GREG_U16_ZX_U32(a_u16Dst, a_iGReg) \
3858 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
3859
3860#define IEM_MC_FETCH_GREG_U16_ZX_U64(a_u16Dst, a_iGReg) \
3861 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
3862
3863/** Emits code for IEM_MC_FETCH_GREG_U16 and IEM_MC_FETCH_GREG_U16_ZX_U32/64. */
3864DECL_INLINE_THROW(uint32_t)
3865iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
3866{
3867 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3868 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
3869 Assert(iGReg < 16);
3870
3871 /*
3872 * We can either just load the low 16-bit of the GPR into a host register
3873 * for the variable, or we can do so via a shadow copy host register. The
3874 * latter will avoid having to reload it if it's being stored later, but
3875 * will waste a host register if it isn't touched again. Since we don't
3876 * know what going to happen, we choose the latter for now.
3877 */
3878 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3879 kIemNativeGstRegUse_ReadOnly);
3880
3881 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3882 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3883 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
3884 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3885
3886 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3887 return off;
3888}
3889
3890#define IEM_MC_FETCH_GREG_I16(a_i16Dst, a_iGReg) \
3891 off = iemNativeEmitFetchGregI16(pReNative, off, a_i16Dst, a_iGReg)
3892
3893/** Emits code for IEM_MC_FETCH_GREG_I16. */
3894DECL_INLINE_THROW(uint32_t)
3895iemNativeEmitFetchGregI16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
3896{
3897 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3898 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(int16_t));
3899 Assert(iGReg < 16);
3900
3901 /*
3902 * We can either just load the low 16-bit of the GPR into a host register
3903 * for the variable, or we can do so via a shadow copy host register. The
3904 * latter will avoid having to reload it if it's being stored later, but
3905 * will waste a host register if it isn't touched again. Since we don't
3906 * know what going to happen, we choose the latter for now.
3907 */
3908 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3909 kIemNativeGstRegUse_ReadOnly);
3910
3911 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3912 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3913#ifdef RT_ARCH_AMD64
3914 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
3915#elif defined(RT_ARCH_ARM64) /* Note! There are no 16-bit registers on ARM, we emulate that through 32-bit registers which requires sign extension. */
3916 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
3917#endif
3918 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3919
3920 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3921 return off;
3922}
3923
3924
3925#define IEM_MC_FETCH_GREG_U16_SX_U32(a_u16Dst, a_iGReg) \
3926 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
3927
3928#define IEM_MC_FETCH_GREG_U16_SX_U64(a_u16Dst, a_iGReg) \
3929 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
3930
3931/** Emits code for IEM_MC_FETCH_GREG_U16_SX_U32/64. */
3932DECL_INLINE_THROW(uint32_t)
3933iemNativeEmitFetchGregU16Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbSignExtended)
3934{
3935 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3936 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
3937 Assert(iGReg < 16);
3938
3939 /*
3940 * We can either just load the low 16-bit of the GPR into a host register
3941 * for the variable, or we can do so via a shadow copy host register. The
3942 * latter will avoid having to reload it if it's being stored later, but
3943 * will waste a host register if it isn't touched again. Since we don't
3944 * know what going to happen, we choose the latter for now.
3945 */
3946 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3947 kIemNativeGstRegUse_ReadOnly);
3948
3949 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3950 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3951 if (cbSignExtended == sizeof(uint32_t))
3952 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
3953 else
3954 {
3955 Assert(cbSignExtended == sizeof(uint64_t));
3956 off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
3957 }
3958 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3959
3960 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3961 return off;
3962}
3963
3964
3965#define IEM_MC_FETCH_GREG_I32(a_i32Dst, a_iGReg) \
3966 off = iemNativeEmitFetchGregU32(pReNative, off, a_i32Dst, a_iGReg, sizeof(uint32_t))
3967
3968#define IEM_MC_FETCH_GREG_U32(a_u32Dst, a_iGReg) \
3969 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint32_t))
3970
3971#define IEM_MC_FETCH_GREG_U32_ZX_U64(a_u32Dst, a_iGReg) \
3972 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint64_t))
3973
3974/** Emits code for IEM_MC_FETCH_GREG_U32. */
3975DECL_INLINE_THROW(uint32_t)
3976iemNativeEmitFetchGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
3977{
3978 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3979 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
3980 Assert(iGReg < 16);
3981
3982 /*
3983 * We can either just load the low 16-bit of the GPR into a host register
3984 * for the variable, or we can do so via a shadow copy host register. The
3985 * latter will avoid having to reload it if it's being stored later, but
3986 * will waste a host register if it isn't touched again. Since we don't
3987 * know what going to happen, we choose the latter for now.
3988 */
3989 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3990 kIemNativeGstRegUse_ReadOnly);
3991
3992 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3993 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
3994 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
3995 iemNativeVarRegisterRelease(pReNative, idxDstVar);
3996
3997 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3998 return off;
3999}
4000
4001
4002#define IEM_MC_FETCH_GREG_U32_SX_U64(a_u32Dst, a_iGReg) \
4003 off = iemNativeEmitFetchGregU32SxU64(pReNative, off, a_u32Dst, a_iGReg)
4004
4005/** Emits code for IEM_MC_FETCH_GREG_U32. */
4006DECL_INLINE_THROW(uint32_t)
4007iemNativeEmitFetchGregU32SxU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
4008{
4009 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4010 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
4011 Assert(iGReg < 16);
4012
4013 /*
4014 * We can either just load the low 32-bit of the GPR into a host register
4015 * for the variable, or we can do so via a shadow copy host register. The
4016 * latter will avoid having to reload it if it's being stored later, but
4017 * will waste a host register if it isn't touched again. Since we don't
4018 * know what going to happen, we choose the latter for now.
4019 */
4020 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4021 kIemNativeGstRegUse_ReadOnly);
4022
4023 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4024 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4025 off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
4026 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4027
4028 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4029 return off;
4030}
4031
4032
4033#define IEM_MC_FETCH_GREG_U64(a_u64Dst, a_iGReg) \
4034 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
4035
4036#define IEM_MC_FETCH_GREG_U64_ZX_U64(a_u64Dst, a_iGReg) \
4037 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
4038
4039/** Emits code for IEM_MC_FETCH_GREG_U64 (and the
4040 * IEM_MC_FETCH_GREG_U64_ZX_U64 alias). */
4041DECL_INLINE_THROW(uint32_t)
4042iemNativeEmitFetchGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
4043{
4044 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4045 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
4046 Assert(iGReg < 16);
4047
4048 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4049 kIemNativeGstRegUse_ReadOnly);
4050
4051 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4052 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4053 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxVarReg, idxGstFullReg);
4054 /** @todo name the register a shadow one already? */
4055 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4056
4057 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4058 return off;
4059}
4060
4061
4062#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4063#define IEM_MC_FETCH_GREG_PAIR_U64(a_u128Dst, a_iGRegLo, a_iGRegHi) \
4064 off = iemNativeEmitFetchGregPairU64(pReNative, off, a_u128Dst, a_iGRegLo, a_iGRegHi)
4065
4066/** Emits code for IEM_MC_FETCH_GREG_PAIR_U64. */
4067DECL_INLINE_THROW(uint32_t)
4068iemNativeEmitFetchGregPairU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegLo, uint8_t iGRegHi)
4069{
4070 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4071 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
4072 Assert(iGRegLo < 16 && iGRegHi < 16);
4073
4074 uint8_t const idxGstFullRegLo = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegLo),
4075 kIemNativeGstRegUse_ReadOnly);
4076 uint8_t const idxGstFullRegHi = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegHi),
4077 kIemNativeGstRegUse_ReadOnly);
4078
4079 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4080 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
4081 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxVarReg, idxGstFullRegLo, 0);
4082 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxVarReg, idxGstFullRegHi, 1);
4083
4084 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
4085 iemNativeRegFreeTmp(pReNative, idxGstFullRegLo);
4086 iemNativeRegFreeTmp(pReNative, idxGstFullRegHi);
4087 return off;
4088}
4089#endif
4090
4091
4092/*********************************************************************************************************************************
4093* Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX). *
4094*********************************************************************************************************************************/
4095
4096#define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
4097 off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
4098
4099/** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
4100DECL_INLINE_THROW(uint32_t)
4101iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
4102{
4103 Assert(iGRegEx < 20);
4104 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
4105 kIemNativeGstRegUse_ForUpdate);
4106#ifdef RT_ARCH_AMD64
4107 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
4108
4109 /* To the lowest byte of the register: mov r8, imm8 */
4110 if (iGRegEx < 16)
4111 {
4112 if (idxGstTmpReg >= 8)
4113 pbCodeBuf[off++] = X86_OP_REX_B;
4114 else if (idxGstTmpReg >= 4)
4115 pbCodeBuf[off++] = X86_OP_REX;
4116 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
4117 pbCodeBuf[off++] = u8Value;
4118 }
4119 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
4120 else if (idxGstTmpReg < 4)
4121 {
4122 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
4123 pbCodeBuf[off++] = u8Value;
4124 }
4125 else
4126 {
4127 /* ror reg64, 8 */
4128 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4129 pbCodeBuf[off++] = 0xc1;
4130 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4131 pbCodeBuf[off++] = 8;
4132
4133 /* mov reg8, imm8 */
4134 if (idxGstTmpReg >= 8)
4135 pbCodeBuf[off++] = X86_OP_REX_B;
4136 else if (idxGstTmpReg >= 4)
4137 pbCodeBuf[off++] = X86_OP_REX;
4138 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
4139 pbCodeBuf[off++] = u8Value;
4140
4141 /* rol reg64, 8 */
4142 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4143 pbCodeBuf[off++] = 0xc1;
4144 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4145 pbCodeBuf[off++] = 8;
4146 }
4147
4148#elif defined(RT_ARCH_ARM64)
4149 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
4150 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4151 if (iGRegEx < 16)
4152 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
4153 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
4154 else
4155 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
4156 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
4157 iemNativeRegFreeTmp(pReNative, idxImmReg);
4158
4159#else
4160# error "Port me!"
4161#endif
4162
4163 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4164
4165#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4166 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
4167#endif
4168
4169 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4170 return off;
4171}
4172
4173
4174#define IEM_MC_STORE_GREG_U8_THREADED(a_iGRegEx, a_u8Value) \
4175 off = iemNativeEmitStoreGregU8(pReNative, off, a_iGRegEx, a_u8Value)
4176
4177/** Emits code for IEM_MC_STORE_GREG_U8_THREADED. */
4178DECL_INLINE_THROW(uint32_t)
4179iemNativeEmitStoreGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t idxValueVar)
4180{
4181 Assert(iGRegEx < 20);
4182 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
4183
4184 /*
4185 * If it's a constant value (unlikely) we treat this as a
4186 * IEM_MC_STORE_GREG_U8_CONST statement.
4187 */
4188 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
4189 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
4190 { /* likely */ }
4191 else
4192 {
4193 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
4194 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4195 return iemNativeEmitStoreGregU8Const(pReNative, off, iGRegEx, (uint8_t)pValueVar->u.uValue);
4196 }
4197
4198 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
4199 kIemNativeGstRegUse_ForUpdate);
4200 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
4201
4202#ifdef RT_ARCH_AMD64
4203 /* To the lowest byte of the register: mov reg8, reg8(r/m) */
4204 if (iGRegEx < 16)
4205 {
4206 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4207 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
4208 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
4209 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
4210 pbCodeBuf[off++] = X86_OP_REX;
4211 pbCodeBuf[off++] = 0x8a;
4212 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
4213 }
4214 /* Otherwise it's to ah, ch, dh or bh from al, cl, dl or bl: use mov r8, r8 if we can, otherwise, we rotate. */
4215 else if (idxGstTmpReg < 4 && idxVarReg < 4)
4216 {
4217 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2+1);
4218 pbCodeBuf[off++] = 0x8a;
4219 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg + 4, idxVarReg);
4220 }
4221 else
4222 {
4223 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
4224
4225 /* ror reg64, 8 */
4226 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4227 pbCodeBuf[off++] = 0xc1;
4228 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4229 pbCodeBuf[off++] = 8;
4230
4231 /* mov reg8, reg8(r/m) */
4232 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
4233 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
4234 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
4235 pbCodeBuf[off++] = X86_OP_REX;
4236 pbCodeBuf[off++] = 0x8a;
4237 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
4238
4239 /* rol reg64, 8 */
4240 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4241 pbCodeBuf[off++] = 0xc1;
4242 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4243 pbCodeBuf[off++] = 8;
4244 }
4245
4246#elif defined(RT_ARCH_ARM64)
4247 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 7:0.
4248 or
4249 bfi w1, w2, 8, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 15:8. */
4250 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4251 if (iGRegEx < 16)
4252 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 8);
4253 else
4254 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 8, 8);
4255
4256#else
4257# error "Port me!"
4258#endif
4259 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4260
4261 iemNativeVarRegisterRelease(pReNative, idxValueVar);
4262
4263#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4264 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
4265#endif
4266 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4267 return off;
4268}
4269
4270
4271
4272#define IEM_MC_STORE_GREG_U16_CONST(a_iGReg, a_u16Const) \
4273 off = iemNativeEmitStoreGregU16Const(pReNative, off, a_iGReg, a_u16Const)
4274
4275/** Emits code for IEM_MC_STORE_GREG_U16. */
4276DECL_INLINE_THROW(uint32_t)
4277iemNativeEmitStoreGregU16Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint16_t uValue)
4278{
4279 Assert(iGReg < 16);
4280 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4281 kIemNativeGstRegUse_ForUpdate);
4282#ifdef RT_ARCH_AMD64
4283 /* mov reg16, imm16 */
4284 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
4285 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4286 if (idxGstTmpReg >= 8)
4287 pbCodeBuf[off++] = X86_OP_REX_B;
4288 pbCodeBuf[off++] = 0xb8 + (idxGstTmpReg & 7);
4289 pbCodeBuf[off++] = RT_BYTE1(uValue);
4290 pbCodeBuf[off++] = RT_BYTE2(uValue);
4291
4292#elif defined(RT_ARCH_ARM64)
4293 /* movk xdst, #uValue, lsl #0 */
4294 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4295 pu32CodeBuf[off++] = Armv8A64MkInstrMovK(idxGstTmpReg, uValue);
4296
4297#else
4298# error "Port me!"
4299#endif
4300
4301 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4302
4303#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4304 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4305#endif
4306 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4307 return off;
4308}
4309
4310
4311#define IEM_MC_STORE_GREG_U16(a_iGReg, a_u16Value) \
4312 off = iemNativeEmitStoreGregU16(pReNative, off, a_iGReg, a_u16Value)
4313
4314/** Emits code for IEM_MC_STORE_GREG_U16. */
4315DECL_INLINE_THROW(uint32_t)
4316iemNativeEmitStoreGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
4317{
4318 Assert(iGReg < 16);
4319 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
4320
4321 /*
4322 * If it's a constant value (unlikely) we treat this as a
4323 * IEM_MC_STORE_GREG_U16_CONST statement.
4324 */
4325 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
4326 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
4327 { /* likely */ }
4328 else
4329 {
4330 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
4331 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4332 return iemNativeEmitStoreGregU16Const(pReNative, off, iGReg, (uint16_t)pValueVar->u.uValue);
4333 }
4334
4335 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4336 kIemNativeGstRegUse_ForUpdate);
4337
4338#ifdef RT_ARCH_AMD64
4339 /* mov reg16, reg16 or [mem16] */
4340 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
4341 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4342 if (pValueVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
4343 {
4344 if (idxGstTmpReg >= 8 || pValueVar->idxReg >= 8)
4345 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0)
4346 | (pValueVar->idxReg >= 8 ? X86_OP_REX_B : 0);
4347 pbCodeBuf[off++] = 0x8b;
4348 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, pValueVar->idxReg & 7);
4349 }
4350 else
4351 {
4352 uint8_t const idxStackSlot = pValueVar->idxStackSlot;
4353 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
4354 if (idxGstTmpReg >= 8)
4355 pbCodeBuf[off++] = X86_OP_REX_R;
4356 pbCodeBuf[off++] = 0x8b;
4357 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, idxGstTmpReg, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
4358 }
4359
4360#elif defined(RT_ARCH_ARM64)
4361 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
4362 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
4363 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4364 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 16);
4365 iemNativeVarRegisterRelease(pReNative, idxValueVar);
4366
4367#else
4368# error "Port me!"
4369#endif
4370
4371 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4372
4373#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4374 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4375#endif
4376 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4377 return off;
4378}
4379
4380
4381#define IEM_MC_STORE_GREG_U32_CONST(a_iGReg, a_u32Const) \
4382 off = iemNativeEmitStoreGregU32Const(pReNative, off, a_iGReg, a_u32Const)
4383
4384/** Emits code for IEM_MC_STORE_GREG_U32_CONST. */
4385DECL_INLINE_THROW(uint32_t)
4386iemNativeEmitStoreGregU32Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint32_t uValue)
4387{
4388 Assert(iGReg < 16);
4389 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4390 kIemNativeGstRegUse_ForFullWrite);
4391 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
4392#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4393 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4394#endif
4395 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4396 return off;
4397}
4398
4399
4400#define IEM_MC_STORE_GREG_U32(a_iGReg, a_u32Value) \
4401 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_u32Value)
4402
4403#define IEM_MC_STORE_GREG_I32(a_iGReg, a_i32Value) \
4404 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_i32Value)
4405
4406/** Emits code for IEM_MC_STORE_GREG_U32/IEM_MC_STORE_GREG_I32. */
4407DECL_INLINE_THROW(uint32_t)
4408iemNativeEmitStoreGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
4409{
4410 Assert(iGReg < 16);
4411 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
4412
4413 /*
4414 * If it's a constant value (unlikely) we treat this as a
4415 * IEM_MC_STORE_GREG_U32_CONST statement.
4416 */
4417 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
4418 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
4419 { /* likely */ }
4420 else
4421 {
4422 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
4423 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4424 return iemNativeEmitStoreGregU32Const(pReNative, off, iGReg, (uint32_t)pValueVar->u.uValue);
4425 }
4426
4427 /*
4428 * For the rest we allocate a guest register for the variable and writes
4429 * it to the CPUMCTX structure.
4430 */
4431 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
4432#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4433 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4434#else
4435 RT_NOREF(idxVarReg);
4436#endif
4437#ifdef VBOX_STRICT
4438 off = iemNativeEmitTop32BitsClearCheck(pReNative, off, idxVarReg);
4439#endif
4440 iemNativeVarRegisterRelease(pReNative, idxValueVar);
4441 return off;
4442}
4443
4444
4445#define IEM_MC_STORE_GREG_U64_CONST(a_iGReg, a_u64Const) \
4446 off = iemNativeEmitStoreGregU64Const(pReNative, off, a_iGReg, a_u64Const)
4447
4448/** Emits code for IEM_MC_STORE_GREG_U64_CONST. */
4449DECL_INLINE_THROW(uint32_t)
4450iemNativeEmitStoreGregU64Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uValue)
4451{
4452 Assert(iGReg < 16);
4453 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4454 kIemNativeGstRegUse_ForFullWrite);
4455 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
4456#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4457 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4458#endif
4459 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4460 return off;
4461}
4462
4463
4464#define IEM_MC_STORE_GREG_U64(a_iGReg, a_u64Value) \
4465 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_u64Value)
4466
4467#define IEM_MC_STORE_GREG_I64(a_iGReg, a_i64Value) \
4468 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_i64Value)
4469
4470/** Emits code for IEM_MC_STORE_GREG_U64. */
4471DECL_INLINE_THROW(uint32_t)
4472iemNativeEmitStoreGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
4473{
4474 Assert(iGReg < 16);
4475 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
4476
4477 /*
4478 * If it's a constant value (unlikely) we treat this as a
4479 * IEM_MC_STORE_GREG_U64_CONST statement.
4480 */
4481 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
4482 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
4483 { /* likely */ }
4484 else
4485 {
4486 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
4487 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4488 return iemNativeEmitStoreGregU64Const(pReNative, off, iGReg, pValueVar->u.uValue);
4489 }
4490
4491 /*
4492 * For the rest we allocate a guest register for the variable and writes
4493 * it to the CPUMCTX structure.
4494 */
4495 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
4496#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4497 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4498#else
4499 RT_NOREF(idxVarReg);
4500#endif
4501 iemNativeVarRegisterRelease(pReNative, idxValueVar);
4502 return off;
4503}
4504
4505
4506#define IEM_MC_CLEAR_HIGH_GREG_U64(a_iGReg) \
4507 off = iemNativeEmitClearHighGregU64(pReNative, off, a_iGReg)
4508
4509/** Emits code for IEM_MC_CLEAR_HIGH_GREG_U64. */
4510DECL_INLINE_THROW(uint32_t)
4511iemNativeEmitClearHighGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg)
4512{
4513 Assert(iGReg < 16);
4514 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4515 kIemNativeGstRegUse_ForUpdate);
4516 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxGstTmpReg, idxGstTmpReg);
4517#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4518 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4519#endif
4520 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4521 return off;
4522}
4523
4524
4525#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4526#define IEM_MC_STORE_GREG_PAIR_U64(a_iGRegLo, a_iGRegHi, a_u128Value) \
4527 off = iemNativeEmitStoreGregPairU64(pReNative, off, a_iGRegLo, a_iGRegHi, a_u128Value)
4528
4529/** Emits code for IEM_MC_FETCH_GREG_PAIR_U64. */
4530DECL_INLINE_THROW(uint32_t)
4531iemNativeEmitStoreGregPairU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegLo, uint8_t iGRegHi, uint8_t idxDstVar)
4532{
4533 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4534 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
4535 Assert(iGRegLo < 16 && iGRegHi < 16);
4536
4537 uint8_t const idxGstFullRegLo = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegLo),
4538 kIemNativeGstRegUse_ForFullWrite);
4539 uint8_t const idxGstFullRegHi = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegHi),
4540 kIemNativeGstRegUse_ForFullWrite);
4541
4542 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4543 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
4544 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxGstFullRegLo, idxVarReg, 0);
4545 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxGstFullRegHi, idxVarReg, 1);
4546
4547 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
4548 iemNativeRegFreeTmp(pReNative, idxGstFullRegLo);
4549 iemNativeRegFreeTmp(pReNative, idxGstFullRegHi);
4550 return off;
4551}
4552#endif
4553
4554
4555/*********************************************************************************************************************************
4556* General purpose register manipulation (add, sub). *
4557*********************************************************************************************************************************/
4558
4559#define IEM_MC_ADD_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
4560 off = iemNativeEmitAddGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
4561
4562/** Emits code for IEM_MC_ADD_GREG_U16. */
4563DECL_INLINE_THROW(uint32_t)
4564iemNativeEmitAddGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend)
4565{
4566 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4567 kIemNativeGstRegUse_ForUpdate);
4568
4569#ifdef RT_ARCH_AMD64
4570 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
4571 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4572 if (idxGstTmpReg >= 8)
4573 pbCodeBuf[off++] = X86_OP_REX_B;
4574 if (uAddend == 1)
4575 {
4576 pbCodeBuf[off++] = 0xff; /* inc */
4577 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4578 }
4579 else
4580 {
4581 pbCodeBuf[off++] = 0x81;
4582 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4583 pbCodeBuf[off++] = uAddend;
4584 pbCodeBuf[off++] = 0;
4585 }
4586
4587#else
4588 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
4589 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4590
4591 /* sub tmp, gstgrp, uAddend */
4592 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxTmpReg, idxGstTmpReg, uAddend, false /*f64Bit*/);
4593
4594 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
4595 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
4596
4597 iemNativeRegFreeTmp(pReNative, idxTmpReg);
4598#endif
4599
4600 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4601
4602#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4603 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4604#endif
4605
4606 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4607 return off;
4608}
4609
4610
4611#define IEM_MC_ADD_GREG_U32(a_iGReg, a_u8Const) \
4612 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
4613
4614#define IEM_MC_ADD_GREG_U64(a_iGReg, a_u8Const) \
4615 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
4616
4617/** Emits code for IEM_MC_ADD_GREG_U32 and IEM_MC_ADD_GREG_U64. */
4618DECL_INLINE_THROW(uint32_t)
4619iemNativeEmitAddGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend, bool f64Bit)
4620{
4621 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4622 kIemNativeGstRegUse_ForUpdate);
4623
4624#ifdef RT_ARCH_AMD64
4625 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
4626 if (f64Bit)
4627 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
4628 else if (idxGstTmpReg >= 8)
4629 pbCodeBuf[off++] = X86_OP_REX_B;
4630 if (uAddend == 1)
4631 {
4632 pbCodeBuf[off++] = 0xff; /* inc */
4633 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4634 }
4635 else if (uAddend < 128)
4636 {
4637 pbCodeBuf[off++] = 0x83; /* add */
4638 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4639 pbCodeBuf[off++] = RT_BYTE1(uAddend);
4640 }
4641 else
4642 {
4643 pbCodeBuf[off++] = 0x81; /* add */
4644 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4645 pbCodeBuf[off++] = RT_BYTE1(uAddend);
4646 pbCodeBuf[off++] = 0;
4647 pbCodeBuf[off++] = 0;
4648 pbCodeBuf[off++] = 0;
4649 }
4650
4651#else
4652 /* sub tmp, gstgrp, uAddend */
4653 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4654 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGstTmpReg, idxGstTmpReg, uAddend, f64Bit);
4655
4656#endif
4657
4658 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4659
4660#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4661 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4662#endif
4663
4664 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4665 return off;
4666}
4667
4668
4669
4670#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
4671 off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
4672
4673/** Emits code for IEM_MC_SUB_GREG_U16. */
4674DECL_INLINE_THROW(uint32_t)
4675iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
4676{
4677 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4678 kIemNativeGstRegUse_ForUpdate);
4679
4680#ifdef RT_ARCH_AMD64
4681 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
4682 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4683 if (idxGstTmpReg >= 8)
4684 pbCodeBuf[off++] = X86_OP_REX_B;
4685 if (uSubtrahend == 1)
4686 {
4687 pbCodeBuf[off++] = 0xff; /* dec */
4688 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4689 }
4690 else
4691 {
4692 pbCodeBuf[off++] = 0x81;
4693 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
4694 pbCodeBuf[off++] = uSubtrahend;
4695 pbCodeBuf[off++] = 0;
4696 }
4697
4698#else
4699 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
4700 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4701
4702 /* sub tmp, gstgrp, uSubtrahend */
4703 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
4704
4705 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
4706 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
4707
4708 iemNativeRegFreeTmp(pReNative, idxTmpReg);
4709#endif
4710
4711 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4712
4713#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4714 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4715#endif
4716
4717 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4718 return off;
4719}
4720
4721
4722#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
4723 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
4724
4725#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
4726 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
4727
4728/** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
4729DECL_INLINE_THROW(uint32_t)
4730iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
4731{
4732 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4733 kIemNativeGstRegUse_ForUpdate);
4734
4735#ifdef RT_ARCH_AMD64
4736 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
4737 if (f64Bit)
4738 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
4739 else if (idxGstTmpReg >= 8)
4740 pbCodeBuf[off++] = X86_OP_REX_B;
4741 if (uSubtrahend == 1)
4742 {
4743 pbCodeBuf[off++] = 0xff; /* dec */
4744 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4745 }
4746 else if (uSubtrahend < 128)
4747 {
4748 pbCodeBuf[off++] = 0x83; /* sub */
4749 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
4750 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
4751 }
4752 else
4753 {
4754 pbCodeBuf[off++] = 0x81; /* sub */
4755 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
4756 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
4757 pbCodeBuf[off++] = 0;
4758 pbCodeBuf[off++] = 0;
4759 pbCodeBuf[off++] = 0;
4760 }
4761
4762#else
4763 /* sub tmp, gstgrp, uSubtrahend */
4764 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4765 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
4766
4767#endif
4768
4769 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4770
4771#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4772 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4773#endif
4774
4775 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4776 return off;
4777}
4778
4779
4780#define IEM_MC_AND_GREG_U8(a_iGReg, a_u8Mask) \
4781 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u8Mask, sizeof(uint8_t))
4782
4783#define IEM_MC_AND_GREG_U16(a_iGReg, a_u16Mask) \
4784 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u16Mask, sizeof(uint16_t))
4785
4786#define IEM_MC_AND_GREG_U32(a_iGReg, a_u32Mask) \
4787 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u32Mask, sizeof(uint32_t))
4788
4789#define IEM_MC_AND_GREG_U64(a_iGReg, a_u64Mask) \
4790 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u64Mask, sizeof(uint64_t))
4791
4792/** Emits code for IEM_MC_AND_GREG_U8, IEM_MC_AND_GREG_U16, IEM_MC_AND_GREG_U32 and IEM_MC_AND_GREG_U64. */
4793DECL_INLINE_THROW(uint32_t)
4794iemNativeEmitAndGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uMask, uint8_t cbMask)
4795{
4796#ifdef VBOX_STRICT
4797 switch (cbMask)
4798 {
4799 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
4800 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
4801 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
4802 case sizeof(uint64_t): break;
4803 default: AssertFailedBreak();
4804 }
4805#endif
4806
4807 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4808 kIemNativeGstRegUse_ForUpdate);
4809
4810 switch (cbMask)
4811 {
4812 case sizeof(uint8_t): /* Leaves the higher bits untouched. */
4813 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask | UINT64_C(0xffffffffffffff00));
4814 break;
4815 case sizeof(uint16_t): /* Leaves the higher bits untouched. */
4816 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask | UINT64_C(0xffffffffffff0000));
4817 break;
4818 case sizeof(uint32_t): /* Zeroes the high 32 bits of the guest register. */
4819 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxGstTmpReg, uMask);
4820 break;
4821 case sizeof(uint64_t):
4822 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask);
4823 break;
4824 default: AssertFailedBreak();
4825 }
4826
4827 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4828
4829#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4830 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4831#endif
4832
4833 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4834 return off;
4835}
4836
4837
4838#define IEM_MC_OR_GREG_U8(a_iGReg, a_u8Mask) \
4839 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u8Mask, sizeof(uint8_t))
4840
4841#define IEM_MC_OR_GREG_U16(a_iGReg, a_u16Mask) \
4842 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u16Mask, sizeof(uint16_t))
4843
4844#define IEM_MC_OR_GREG_U32(a_iGReg, a_u32Mask) \
4845 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u32Mask, sizeof(uint32_t))
4846
4847#define IEM_MC_OR_GREG_U64(a_iGReg, a_u64Mask) \
4848 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u64Mask, sizeof(uint64_t))
4849
4850/** Emits code for IEM_MC_OR_GREG_U8, IEM_MC_OR_GREG_U16, IEM_MC_OR_GREG_U32 and IEM_MC_OR_GREG_U64. */
4851DECL_INLINE_THROW(uint32_t)
4852iemNativeEmitOrGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uMask, uint8_t cbMask)
4853{
4854#ifdef VBOX_STRICT
4855 switch (cbMask)
4856 {
4857 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
4858 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
4859 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
4860 case sizeof(uint64_t): break;
4861 default: AssertFailedBreak();
4862 }
4863#endif
4864
4865 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4866 kIemNativeGstRegUse_ForUpdate);
4867
4868 switch (cbMask)
4869 {
4870 case sizeof(uint8_t): /* Leaves the higher bits untouched. */
4871 case sizeof(uint16_t):
4872 case sizeof(uint64_t):
4873 off = iemNativeEmitOrGprByImm(pReNative, off, idxGstTmpReg, uMask);
4874 break;
4875 case sizeof(uint32_t): /* Zeroes the high 32 bits of the guest register. */
4876 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxGstTmpReg, uMask);
4877 break;
4878 default: AssertFailedBreak();
4879 }
4880
4881 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4882
4883#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4884 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4885#endif
4886
4887 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4888 return off;
4889}
4890
4891
4892/*********************************************************************************************************************************
4893* Local/Argument variable manipulation (add, sub, and, or). *
4894*********************************************************************************************************************************/
4895
4896#define IEM_MC_AND_LOCAL_U8(a_u8Local, a_u8Mask) \
4897 off = iemNativeEmitAndLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
4898
4899#define IEM_MC_AND_LOCAL_U16(a_u16Local, a_u16Mask) \
4900 off = iemNativeEmitAndLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
4901
4902#define IEM_MC_AND_LOCAL_U32(a_u32Local, a_u32Mask) \
4903 off = iemNativeEmitAndLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
4904
4905#define IEM_MC_AND_LOCAL_U64(a_u64Local, a_u64Mask) \
4906 off = iemNativeEmitAndLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
4907
4908
4909#define IEM_MC_AND_ARG_U16(a_u16Arg, a_u16Mask) \
4910 off = iemNativeEmitAndLocal(pReNative, off, a_u16Arg, a_u16Mask, sizeof(uint16_t))
4911
4912#define IEM_MC_AND_ARG_U32(a_u32Arg, a_u32Mask) \
4913 off = iemNativeEmitAndLocal(pReNative, off, a_u32Arg, a_u32Mask, sizeof(uint32_t))
4914
4915#define IEM_MC_AND_ARG_U64(a_u64Arg, a_u64Mask) \
4916 off = iemNativeEmitAndLocal(pReNative, off, a_u64Arg, a_u64Mask, sizeof(uint64_t))
4917
4918/** Emits code for AND'ing a local and a constant value. */
4919DECL_INLINE_THROW(uint32_t)
4920iemNativeEmitAndLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
4921{
4922#ifdef VBOX_STRICT
4923 switch (cbMask)
4924 {
4925 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
4926 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
4927 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
4928 case sizeof(uint64_t): break;
4929 default: AssertFailedBreak();
4930 }
4931#endif
4932
4933 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
4934 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
4935
4936 if (cbMask <= sizeof(uint32_t))
4937 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg, uMask);
4938 else
4939 off = iemNativeEmitAndGprByImm(pReNative, off, idxVarReg, uMask);
4940
4941 iemNativeVarRegisterRelease(pReNative, idxVar);
4942 return off;
4943}
4944
4945
4946#define IEM_MC_OR_LOCAL_U8(a_u8Local, a_u8Mask) \
4947 off = iemNativeEmitOrLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
4948
4949#define IEM_MC_OR_LOCAL_U16(a_u16Local, a_u16Mask) \
4950 off = iemNativeEmitOrLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
4951
4952#define IEM_MC_OR_LOCAL_U32(a_u32Local, a_u32Mask) \
4953 off = iemNativeEmitOrLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
4954
4955#define IEM_MC_OR_LOCAL_U64(a_u64Local, a_u64Mask) \
4956 off = iemNativeEmitOrLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
4957
4958/** Emits code for OR'ing a local and a constant value. */
4959DECL_INLINE_THROW(uint32_t)
4960iemNativeEmitOrLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
4961{
4962#ifdef VBOX_STRICT
4963 switch (cbMask)
4964 {
4965 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
4966 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
4967 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
4968 case sizeof(uint64_t): break;
4969 default: AssertFailedBreak();
4970 }
4971#endif
4972
4973 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
4974 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
4975
4976 if (cbMask <= sizeof(uint32_t))
4977 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxVarReg, uMask);
4978 else
4979 off = iemNativeEmitOrGprByImm(pReNative, off, idxVarReg, uMask);
4980
4981 iemNativeVarRegisterRelease(pReNative, idxVar);
4982 return off;
4983}
4984
4985
4986#define IEM_MC_BSWAP_LOCAL_U16(a_u16Local) \
4987 off = iemNativeEmitBswapLocal(pReNative, off, a_u16Local, sizeof(uint16_t))
4988
4989#define IEM_MC_BSWAP_LOCAL_U32(a_u32Local) \
4990 off = iemNativeEmitBswapLocal(pReNative, off, a_u32Local, sizeof(uint32_t))
4991
4992#define IEM_MC_BSWAP_LOCAL_U64(a_u64Local) \
4993 off = iemNativeEmitBswapLocal(pReNative, off, a_u64Local, sizeof(uint64_t))
4994
4995/** Emits code for reversing the byte order in a local value. */
4996DECL_INLINE_THROW(uint32_t)
4997iemNativeEmitBswapLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal)
4998{
4999 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
5000 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
5001
5002 switch (cbLocal)
5003 {
5004 case sizeof(uint16_t): off = iemNativeEmitBswapGpr16(pReNative, off, idxVarReg); break;
5005 case sizeof(uint32_t): off = iemNativeEmitBswapGpr32(pReNative, off, idxVarReg); break;
5006 case sizeof(uint64_t): off = iemNativeEmitBswapGpr(pReNative, off, idxVarReg); break;
5007 default: AssertFailedBreak();
5008 }
5009
5010 iemNativeVarRegisterRelease(pReNative, idxVar);
5011 return off;
5012}
5013
5014
5015#define IEM_MC_SHL_LOCAL_S16(a_i16Local, a_cShift) \
5016 off = iemNativeEmitShlLocal(pReNative, off, a_i16Local, sizeof(int16_t), a_cShift)
5017
5018#define IEM_MC_SHL_LOCAL_S32(a_i32Local, a_cShift) \
5019 off = iemNativeEmitShlLocal(pReNative, off, a_i32Local, sizeof(int32_t), a_cShift)
5020
5021#define IEM_MC_SHL_LOCAL_S64(a_i64Local, a_cShift) \
5022 off = iemNativeEmitShlLocal(pReNative, off, a_i64Local, sizeof(int64_t), a_cShift)
5023
5024/** Emits code for shifting left a local value. */
5025DECL_INLINE_THROW(uint32_t)
5026iemNativeEmitShlLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal, uint8_t cShift)
5027{
5028#ifdef VBOX_STRICT
5029 switch (cbLocal)
5030 {
5031 case sizeof(uint8_t): Assert(cShift < 8); break;
5032 case sizeof(uint16_t): Assert(cShift < 16); break;
5033 case sizeof(uint32_t): Assert(cShift < 32); break;
5034 case sizeof(uint64_t): Assert(cShift < 64); break;
5035 default: AssertFailedBreak();
5036 }
5037#endif
5038
5039 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
5040 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
5041
5042 if (cbLocal <= sizeof(uint32_t))
5043 {
5044 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxVarReg, cShift);
5045 if (cbLocal < sizeof(uint32_t))
5046 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg,
5047 cbLocal == sizeof(uint16_t)
5048 ? UINT32_C(0xffff)
5049 : UINT32_C(0xff));
5050 }
5051 else
5052 off = iemNativeEmitShiftGprLeft(pReNative, off, idxVarReg, cShift);
5053
5054 iemNativeVarRegisterRelease(pReNative, idxVar);
5055 return off;
5056}
5057
5058
5059#define IEM_MC_SAR_LOCAL_S16(a_i16Local, a_cShift) \
5060 off = iemNativeEmitSarLocal(pReNative, off, a_i16Local, sizeof(int16_t), a_cShift)
5061
5062#define IEM_MC_SAR_LOCAL_S32(a_i32Local, a_cShift) \
5063 off = iemNativeEmitSarLocal(pReNative, off, a_i32Local, sizeof(int32_t), a_cShift)
5064
5065#define IEM_MC_SAR_LOCAL_S64(a_i64Local, a_cShift) \
5066 off = iemNativeEmitSarLocal(pReNative, off, a_i64Local, sizeof(int64_t), a_cShift)
5067
5068/** Emits code for shifting left a local value. */
5069DECL_INLINE_THROW(uint32_t)
5070iemNativeEmitSarLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal, uint8_t cShift)
5071{
5072#ifdef VBOX_STRICT
5073 switch (cbLocal)
5074 {
5075 case sizeof(int8_t): Assert(cShift < 8); break;
5076 case sizeof(int16_t): Assert(cShift < 16); break;
5077 case sizeof(int32_t): Assert(cShift < 32); break;
5078 case sizeof(int64_t): Assert(cShift < 64); break;
5079 default: AssertFailedBreak();
5080 }
5081#endif
5082
5083 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
5084 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
5085
5086 /* Need to sign extend the value first to make sure the sign is correct in the following arithmetic shift. */
5087 if (cbLocal == sizeof(uint8_t))
5088 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
5089 else if (cbLocal == sizeof(uint16_t))
5090 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxVarReg);
5091
5092 if (cbLocal <= sizeof(uint32_t))
5093 off = iemNativeEmitArithShiftGpr32Right(pReNative, off, idxVarReg, cShift);
5094 else
5095 off = iemNativeEmitArithShiftGprRight(pReNative, off, idxVarReg, cShift);
5096
5097 iemNativeVarRegisterRelease(pReNative, idxVar);
5098 return off;
5099}
5100
5101
5102#define IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR(a_EffAddr, a_i16) \
5103 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i16, sizeof(int16_t))
5104
5105#define IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR(a_EffAddr, a_i32) \
5106 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i32, sizeof(int32_t))
5107
5108#define IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR(a_EffAddr, a_i64) \
5109 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i64, sizeof(int64_t))
5110
5111/** Emits code for IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR/IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR/IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR. */
5112DECL_INLINE_THROW(uint32_t)
5113iemNativeEmitAddLocalToEffAddr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEffAddr, uint8_t idxVar, uint8_t cbLocal)
5114{
5115 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEffAddr);
5116 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEffAddr, sizeof(RTGCPTR));
5117 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
5118 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
5119
5120 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
5121 uint8_t const idxVarRegEffAddr = iemNativeVarRegisterAcquire(pReNative, idxVarEffAddr, &off, true /*fInitialized*/);
5122
5123 /* Need to sign extend the value. */
5124 if (cbLocal <= sizeof(uint32_t))
5125 {
5126/** @todo ARM64: In case of boredone, the extended add instruction can do the
5127 * conversion directly: ADD idxVarRegEffAddr, idxVarRegEffAddr, [w]idxVarReg, SXTH/SXTW */
5128 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
5129
5130 switch (cbLocal)
5131 {
5132 case sizeof(int16_t): off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxRegTmp, idxVarReg); break;
5133 case sizeof(int32_t): off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxRegTmp, idxVarReg); break;
5134 default: AssertFailed();
5135 }
5136
5137 off = iemNativeEmitAddTwoGprs(pReNative, off, idxVarRegEffAddr, idxRegTmp);
5138 iemNativeRegFreeTmp(pReNative, idxRegTmp);
5139 }
5140 else
5141 off = iemNativeEmitAddTwoGprs(pReNative, off, idxVarRegEffAddr, idxVarReg);
5142
5143 iemNativeVarRegisterRelease(pReNative, idxVarEffAddr);
5144 iemNativeVarRegisterRelease(pReNative, idxVar);
5145 return off;
5146}
5147
5148
5149
5150/*********************************************************************************************************************************
5151* EFLAGS *
5152*********************************************************************************************************************************/
5153
5154#if !defined(VBOX_WITH_STATISTICS) || !defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
5155# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) ((void)0)
5156#else
5157# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) \
5158 iemNativeEFlagsOptimizationStats(pReNative, a_fEflInput, a_fEflOutput)
5159
5160DECLINLINE(void) iemNativeEFlagsOptimizationStats(PIEMRECOMPILERSTATE pReNative, uint32_t fEflInput, uint32_t fEflOutput)
5161{
5162 if (fEflOutput)
5163 {
5164 PVMCPUCC const pVCpu = pReNative->pVCpu;
5165# ifndef IEMLIVENESS_EXTENDED_LAYOUT
5166 IEMLIVENESSBIT const LivenessBit0 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit0;
5167 IEMLIVENESSBIT const LivenessBit1 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit1;
5168 AssertCompile(IEMLIVENESS_STATE_CLOBBERED == 0);
5169# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
5170 if (fEflOutput & (a_fEfl)) \
5171 { \
5172 if (LivenessBit0.a_fLivenessMember | LivenessBit1.a_fLivenessMember) \
5173 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
5174 else \
5175 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
5176 } else do { } while (0)
5177# else
5178 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall];
5179 IEMLIVENESSBIT const LivenessClobbered =
5180 {
5181 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
5182 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
5183 | pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
5184 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
5185 };
5186 IEMLIVENESSBIT const LivenessDelayable =
5187 {
5188 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
5189 & pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
5190 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
5191 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
5192 };
5193# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
5194 if (fEflOutput & (a_fEfl)) \
5195 { \
5196 if (LivenessClobbered.a_fLivenessMember) \
5197 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
5198 else if (LivenessDelayable.a_fLivenessMember) \
5199 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Delayable); \
5200 else \
5201 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
5202 } else do { } while (0)
5203# endif
5204 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_CF, fEflCf, StatNativeLivenessEflCf);
5205 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_PF, fEflPf, StatNativeLivenessEflPf);
5206 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_AF, fEflAf, StatNativeLivenessEflAf);
5207 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_ZF, fEflZf, StatNativeLivenessEflZf);
5208 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_SF, fEflSf, StatNativeLivenessEflSf);
5209 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_OF, fEflOf, StatNativeLivenessEflOf);
5210 //CHECK_FLAG_AND_UPDATE_STATS(~X86_EFL_STATUS_BITS, fEflOther, StatNativeLivenessEflOther);
5211# undef CHECK_FLAG_AND_UPDATE_STATS
5212 }
5213 RT_NOREF(fEflInput);
5214}
5215#endif /* VBOX_WITH_STATISTICS */
5216
5217#undef IEM_MC_FETCH_EFLAGS /* should not be used */
5218#define IEM_MC_FETCH_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5219 off = iemNativeEmitFetchEFlags(pReNative, off, a_EFlags, a_fEflInput, a_fEflOutput)
5220
5221/** Handles IEM_MC_FETCH_EFLAGS_EX. */
5222DECL_INLINE_THROW(uint32_t)
5223iemNativeEmitFetchEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags,
5224 uint32_t fEflInput, uint32_t fEflOutput)
5225{
5226 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
5227 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
5228 RT_NOREF(fEflInput, fEflOutput);
5229
5230#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
5231# ifdef VBOX_STRICT
5232 if ( pReNative->idxCurCall != 0
5233 && (fEflInput != 0 || fEflOutput != 0) /* for NOT these are both zero for now. */)
5234 {
5235 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall - 1];
5236 uint32_t const fBoth = fEflInput | fEflOutput;
5237# define ASSERT_ONE_EFL(a_fElfConst, a_idxField) \
5238 AssertMsg( !(fBoth & (a_fElfConst)) \
5239 || (!(fEflInput & (a_fElfConst)) \
5240 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
5241 : !(fEflOutput & (a_fElfConst)) \
5242 ? IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
5243 : IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) ), \
5244 ("%s - %u\n", #a_fElfConst, iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)))
5245 ASSERT_ONE_EFL(~(uint32_t)X86_EFL_STATUS_BITS, IEMLIVENESSBIT_IDX_EFL_OTHER);
5246 ASSERT_ONE_EFL(X86_EFL_CF, IEMLIVENESSBIT_IDX_EFL_CF);
5247 ASSERT_ONE_EFL(X86_EFL_PF, IEMLIVENESSBIT_IDX_EFL_PF);
5248 ASSERT_ONE_EFL(X86_EFL_AF, IEMLIVENESSBIT_IDX_EFL_AF);
5249 ASSERT_ONE_EFL(X86_EFL_ZF, IEMLIVENESSBIT_IDX_EFL_ZF);
5250 ASSERT_ONE_EFL(X86_EFL_SF, IEMLIVENESSBIT_IDX_EFL_SF);
5251 ASSERT_ONE_EFL(X86_EFL_OF, IEMLIVENESSBIT_IDX_EFL_OF);
5252# undef ASSERT_ONE_EFL
5253 }
5254# endif
5255#endif
5256
5257 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fEflInput);
5258
5259 /** @todo this is suboptimial. EFLAGS is probably shadowed and we should use
5260 * the existing shadow copy. */
5261 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, false /*fInitialized*/);
5262 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
5263 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
5264 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
5265 return off;
5266}
5267
5268
5269
5270/** @todo emit strict build assertions for IEM_MC_COMMIT_EFLAGS_EX when we
5271 * start using it with custom native code emission (inlining assembly
5272 * instruction helpers). */
5273#undef IEM_MC_COMMIT_EFLAGS /* should not be used */
5274#define IEM_MC_COMMIT_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5275 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
5276 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags, a_fEflOutput, true /*fUpdateSkipping*/)
5277
5278#undef IEM_MC_COMMIT_EFLAGS_OPT /* should not be used */
5279#define IEM_MC_COMMIT_EFLAGS_OPT_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5280 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
5281 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags, a_fEflOutput, false /*fUpdateSkipping*/)
5282
5283/** Handles IEM_MC_COMMIT_EFLAGS_EX. */
5284DECL_INLINE_THROW(uint32_t)
5285iemNativeEmitCommitEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags, uint32_t fEflOutput,
5286 bool fUpdateSkipping)
5287{
5288 RT_NOREF(fEflOutput);
5289 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, true /*fInitialized*/);
5290 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
5291
5292#ifdef VBOX_STRICT
5293 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RA1_MASK);
5294 uint32_t offFixup = off;
5295 off = iemNativeEmitJnzToFixed(pReNative, off, off);
5296 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2001));
5297 iemNativeFixupFixedJump(pReNative, offFixup, off);
5298
5299 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RAZ_MASK & CPUMX86EFLAGS_HW_MASK_32);
5300 offFixup = off;
5301 off = iemNativeEmitJzToFixed(pReNative, off, off);
5302 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2002));
5303 iemNativeFixupFixedJump(pReNative, offFixup, off);
5304
5305 /** @todo validate that only bits in the fElfOutput mask changed. */
5306#endif
5307
5308#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
5309 if (fUpdateSkipping)
5310 {
5311 if ((fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
5312 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
5313 else
5314 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~(fEflOutput & X86_EFL_STATUS_BITS),
5315 RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
5316 }
5317#else
5318 RT_NOREF_PV(fUpdateSkipping);
5319#endif
5320
5321 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
5322 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF_DYN(VMCPUCC, cpum.GstCtx.eflags));
5323 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
5324 return off;
5325}
5326
5327
5328typedef enum IEMNATIVEMITEFLOP
5329{
5330 kIemNativeEmitEflOp_Invalid = 0,
5331 kIemNativeEmitEflOp_Set,
5332 kIemNativeEmitEflOp_Clear,
5333 kIemNativeEmitEflOp_Flip
5334} IEMNATIVEMITEFLOP;
5335
5336#define IEM_MC_SET_EFL_BIT(a_fBit) \
5337 off = iemNativeEmitModifyEFlagsBit(pReNative, off, a_fBit, kIemNativeEmitEflOp_Set);
5338
5339#define IEM_MC_CLEAR_EFL_BIT(a_fBit) \
5340 off = iemNativeEmitModifyEFlagsBit(pReNative, off, a_fBit, kIemNativeEmitEflOp_Clear);
5341
5342#define IEM_MC_FLIP_EFL_BIT(a_fBit) \
5343 off = iemNativeEmitModifyEFlagsBit(pReNative, off, a_fBit, kIemNativeEmitEflOp_Flip);
5344
5345/** Handles IEM_MC_SET_EFL_BIT/IEM_MC_CLEAR_EFL_BIT/IEM_MC_FLIP_EFL_BIT. */
5346DECL_INLINE_THROW(uint32_t)
5347iemNativeEmitModifyEFlagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fEflBit, IEMNATIVEMITEFLOP enmOp)
5348{
5349 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
5350 kIemNativeGstRegUse_ForUpdate, false /*fNoVolatileRegs*/);
5351
5352 switch (enmOp)
5353 {
5354 case kIemNativeEmitEflOp_Set:
5355 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxEflReg, fEflBit);
5356 break;
5357 case kIemNativeEmitEflOp_Clear:
5358 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~fEflBit);
5359 break;
5360 case kIemNativeEmitEflOp_Flip:
5361 off = iemNativeEmitXorGpr32ByImm(pReNative, off, idxEflReg, fEflBit);
5362 break;
5363 default:
5364 AssertFailed();
5365 break;
5366 }
5367
5368 /** @todo No delayed writeback for EFLAGS right now. */
5369 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
5370
5371 /* Free but don't flush the EFLAGS register. */
5372 iemNativeRegFreeTmp(pReNative, idxEflReg);
5373
5374 return off;
5375}
5376
5377
5378/*********************************************************************************************************************************
5379* Emitters for segment register fetches (IEM_MC_FETCH_SREG_XXX).
5380*********************************************************************************************************************************/
5381
5382#define IEM_MC_FETCH_SREG_U16(a_u16Dst, a_iSReg) \
5383 off = iemNativeEmitFetchSReg(pReNative, off, a_u16Dst, a_iSReg, sizeof(uint16_t))
5384
5385#define IEM_MC_FETCH_SREG_ZX_U32(a_u32Dst, a_iSReg) \
5386 off = iemNativeEmitFetchSReg(pReNative, off, a_u32Dst, a_iSReg, sizeof(uint32_t))
5387
5388#define IEM_MC_FETCH_SREG_ZX_U64(a_u64Dst, a_iSReg) \
5389 off = iemNativeEmitFetchSReg(pReNative, off, a_u64Dst, a_iSReg, sizeof(uint64_t))
5390
5391
5392/** Emits code for IEM_MC_FETCH_SREG_U16, IEM_MC_FETCH_SREG_ZX_U32 and
5393 * IEM_MC_FETCH_SREG_ZX_U64. */
5394DECL_INLINE_THROW(uint32_t)
5395iemNativeEmitFetchSReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iSReg, int8_t cbVar)
5396{
5397 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
5398 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbVar); RT_NOREF(cbVar);
5399 Assert(iSReg < X86_SREG_COUNT);
5400
5401 /*
5402 * For now, we will not create a shadow copy of a selector. The rational
5403 * is that since we do not recompile the popping and loading of segment
5404 * registers and that the the IEM_MC_FETCH_SREG_U* MCs are only used for
5405 * pushing and moving to registers, there is only a small chance that the
5406 * shadow copy will be accessed again before the register is reloaded. One
5407 * scenario would be nested called in 16-bit code, but I doubt it's worth
5408 * the extra register pressure atm.
5409 *
5410 * What we really need first, though, is to combine iemNativeRegAllocTmpForGuestReg
5411 * and iemNativeVarRegisterAcquire for a load scenario. We only got the
5412 * store scencario covered at present (r160730).
5413 */
5414 iemNativeVarSetKindToStack(pReNative, idxDstVar);
5415 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
5416 off = iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aSRegs[iSReg].Sel));
5417 iemNativeVarRegisterRelease(pReNative, idxDstVar);
5418 return off;
5419}
5420
5421
5422
5423/*********************************************************************************************************************************
5424* Register references. *
5425*********************************************************************************************************************************/
5426
5427#define IEM_MC_REF_GREG_U8_THREADED(a_pu8Dst, a_iGRegEx) \
5428 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, false /*fConst*/)
5429
5430#define IEM_MC_REF_GREG_U8_CONST_THREADED(a_pu8Dst, a_iGRegEx) \
5431 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, true /*fConst*/)
5432
5433/** Handles IEM_MC_REF_GREG_U8[_CONST]. */
5434DECL_INLINE_THROW(uint32_t)
5435iemNativeEmitRefGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGRegEx, bool fConst)
5436{
5437 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
5438 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
5439 Assert(iGRegEx < 20);
5440
5441 if (iGRegEx < 16)
5442 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
5443 else
5444 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_GprHighByte, iGRegEx & 15);
5445
5446 /* If we've delayed writing back the register value, flush it now. */
5447 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
5448
5449 /* If it's not a const reference we need to flush the shadow copy of the register now. */
5450 if (!fConst)
5451 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGRegEx & 15)));
5452
5453 return off;
5454}
5455
5456#define IEM_MC_REF_GREG_U16(a_pu16Dst, a_iGReg) \
5457 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, false /*fConst*/)
5458
5459#define IEM_MC_REF_GREG_U16_CONST(a_pu16Dst, a_iGReg) \
5460 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, true /*fConst*/)
5461
5462#define IEM_MC_REF_GREG_U32(a_pu32Dst, a_iGReg) \
5463 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, false /*fConst*/)
5464
5465#define IEM_MC_REF_GREG_U32_CONST(a_pu32Dst, a_iGReg) \
5466 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, true /*fConst*/)
5467
5468#define IEM_MC_REF_GREG_I32(a_pi32Dst, a_iGReg) \
5469 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, false /*fConst*/)
5470
5471#define IEM_MC_REF_GREG_I32_CONST(a_pi32Dst, a_iGReg) \
5472 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, true /*fConst*/)
5473
5474#define IEM_MC_REF_GREG_U64(a_pu64Dst, a_iGReg) \
5475 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, false /*fConst*/)
5476
5477#define IEM_MC_REF_GREG_U64_CONST(a_pu64Dst, a_iGReg) \
5478 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, true /*fConst*/)
5479
5480#define IEM_MC_REF_GREG_I64(a_pi64Dst, a_iGReg) \
5481 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, false /*fConst*/)
5482
5483#define IEM_MC_REF_GREG_I64_CONST(a_pi64Dst, a_iGReg) \
5484 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, true /*fConst*/)
5485
5486/** Handles IEM_MC_REF_GREG_Uxx[_CONST] and IEM_MC_REF_GREG_Ixx[_CONST]. */
5487DECL_INLINE_THROW(uint32_t)
5488iemNativeEmitRefGregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGReg, bool fConst)
5489{
5490 Assert(iGReg < 16);
5491 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGReg);
5492 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
5493
5494 /* If we've delayed writing back the register value, flush it now. */
5495 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGReg);
5496
5497 /* If it's not a const reference we need to flush the shadow copy of the register now. */
5498 if (!fConst)
5499 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGReg)));
5500
5501 return off;
5502}
5503
5504
5505#undef IEM_MC_REF_EFLAGS /* should not be used. */
5506#define IEM_MC_REF_EFLAGS_EX(a_pEFlags, a_fEflInput, a_fEflOutput) \
5507 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
5508 off = iemNativeEmitRefEFlags(pReNative, off, a_pEFlags, a_fEflInput, a_fEflOutput)
5509
5510/** Handles IEM_MC_REF_EFLAGS. */
5511DECL_INLINE_THROW(uint32_t)
5512iemNativeEmitRefEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint32_t fEflInput, uint32_t fEflOutput)
5513{
5514 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_EFlags, 0);
5515 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
5516
5517#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
5518 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fEflInput);
5519
5520 /* Updating the skipping according to the outputs is a little early, but
5521 we don't have any other hooks for references atm. */
5522 if ((fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
5523 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
5524 else if (fEflOutput & X86_EFL_STATUS_BITS)
5525 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~(fEflOutput & X86_EFL_STATUS_BITS),
5526 RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
5527#else
5528 RT_NOREF(fEflInput, fEflOutput);
5529#endif
5530
5531 /* If we've delayed writing back the register value, flush it now. */
5532 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_EFlags, 0);
5533
5534 /* If there is a shadow copy of guest EFLAGS, flush it now. */
5535 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_EFlags));
5536
5537 return off;
5538}
5539
5540
5541/** @todo Emit code for IEM_MC_ASSERT_EFLAGS in strict builds? Once we emit
5542 * different code from threaded recompiler, maybe it would be helpful. For now
5543 * we assume the threaded recompiler catches any incorrect EFLAGS delcarations. */
5544#define IEM_MC_ASSERT_EFLAGS(a_fEflInput, a_fEflOutput) ((void)0)
5545
5546
5547#define IEM_MC_REF_XREG_U128(a_pu128Dst, a_iXReg) \
5548 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, false /*fConst*/)
5549
5550#define IEM_MC_REF_XREG_XMM(a_puXmmDst, a_iXReg) \
5551 off = iemNativeEmitRefXregXxx(pReNative, off, a_puXmmDst, a_iXReg, false /*fConst*/)
5552
5553#define IEM_MC_REF_XREG_U128_CONST(a_pu128Dst, a_iXReg) \
5554 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, true /*fConst*/)
5555
5556#define IEM_MC_REF_XREG_XMM_CONST(a_pXmmDst, a_iXReg) \
5557 off = iemNativeEmitRefXregXxx(pReNative, off, a_pXmmDst, a_iXReg, true /*fConst*/)
5558
5559#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5560/* Just being paranoid here. */
5561# ifndef _MSC_VER /* MSC can't compile this, doesn't like [0]. Added reduced version afterwards. */
5562AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].au64[0]);
5563AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].au32[0]);
5564AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].ar64[0]);
5565AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].ar32[0]);
5566# endif
5567AssertCompileMemberOffset(X86XMMREG, au64, 0);
5568AssertCompileMemberOffset(X86XMMREG, au32, 0);
5569AssertCompileMemberOffset(X86XMMREG, ar64, 0);
5570AssertCompileMemberOffset(X86XMMREG, ar32, 0);
5571
5572# define IEM_MC_REF_XREG_U32_CONST(a_pu32Dst, a_iXReg) \
5573 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu32Dst, a_iXReg, true /*fConst*/)
5574# define IEM_MC_REF_XREG_U64_CONST(a_pu64Dst, a_iXReg) \
5575 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu64Dst, a_iXReg, true /*fConst*/)
5576# define IEM_MC_REF_XREG_R32_CONST(a_pr32Dst, a_iXReg) \
5577 off = iemNativeEmitRefXregXxx(pReNative, off, a_pr32Dst, a_iXReg, true /*fConst*/)
5578# define IEM_MC_REF_XREG_R64_CONST(a_pr64Dst, a_iXReg) \
5579 off = iemNativeEmitRefXregXxx(pReNative, off, a_pr64Dst, a_iXReg, true /*fConst*/)
5580#endif
5581
5582/** Handles IEM_MC_REF_XREG_xxx[_CONST]. */
5583DECL_INLINE_THROW(uint32_t)
5584iemNativeEmitRefXregXxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iXReg, bool fConst)
5585{
5586 Assert(iXReg < 16);
5587 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_XReg, iXReg);
5588 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
5589
5590 /* If we've delayed writing back the register value, flush it now. */
5591 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_XReg, iXReg);
5592
5593#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5594 /* If it's not a const reference we need to flush the shadow copy of the register now. */
5595 if (!fConst)
5596 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(iXReg)));
5597#else
5598 RT_NOREF(fConst);
5599#endif
5600
5601 return off;
5602}
5603
5604
5605
5606/*********************************************************************************************************************************
5607* Effective Address Calculation *
5608*********************************************************************************************************************************/
5609#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_16(a_GCPtrEff, a_bRm, a_u16Disp) \
5610 off = iemNativeEmitCalcRmEffAddrThreadedAddr16(pReNative, off, a_bRm, a_u16Disp, a_GCPtrEff)
5611
5612/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_16.
5613 * @sa iemOpHlpCalcRmEffAddrThreadedAddr16 */
5614DECL_INLINE_THROW(uint32_t)
5615iemNativeEmitCalcRmEffAddrThreadedAddr16(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5616 uint8_t bRm, uint16_t u16Disp, uint8_t idxVarRet)
5617{
5618 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
5619
5620 /*
5621 * Handle the disp16 form with no registers first.
5622 *
5623 * Convert to an immediate value, as that'll delay the register allocation
5624 * and assignment till the memory access / call / whatever and we can use
5625 * a more appropriate register (or none at all).
5626 */
5627 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
5628 {
5629 iemNativeVarSetKindToConst(pReNative, idxVarRet, u16Disp);
5630 return off;
5631 }
5632
5633 /* Determin the displacment. */
5634 uint16_t u16EffAddr;
5635 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
5636 {
5637 case 0: u16EffAddr = 0; break;
5638 case 1: u16EffAddr = (int16_t)(int8_t)u16Disp; break;
5639 case 2: u16EffAddr = u16Disp; break;
5640 default: AssertFailedStmt(u16EffAddr = 0);
5641 }
5642
5643 /* Determine the registers involved. */
5644 uint8_t idxGstRegBase;
5645 uint8_t idxGstRegIndex;
5646 switch (bRm & X86_MODRM_RM_MASK)
5647 {
5648 case 0:
5649 idxGstRegBase = X86_GREG_xBX;
5650 idxGstRegIndex = X86_GREG_xSI;
5651 break;
5652 case 1:
5653 idxGstRegBase = X86_GREG_xBX;
5654 idxGstRegIndex = X86_GREG_xDI;
5655 break;
5656 case 2:
5657 idxGstRegBase = X86_GREG_xBP;
5658 idxGstRegIndex = X86_GREG_xSI;
5659 break;
5660 case 3:
5661 idxGstRegBase = X86_GREG_xBP;
5662 idxGstRegIndex = X86_GREG_xDI;
5663 break;
5664 case 4:
5665 idxGstRegBase = X86_GREG_xSI;
5666 idxGstRegIndex = UINT8_MAX;
5667 break;
5668 case 5:
5669 idxGstRegBase = X86_GREG_xDI;
5670 idxGstRegIndex = UINT8_MAX;
5671 break;
5672 case 6:
5673 idxGstRegBase = X86_GREG_xBP;
5674 idxGstRegIndex = UINT8_MAX;
5675 break;
5676#ifdef _MSC_VER /* lazy compiler, thinks idxGstRegBase and idxGstRegIndex may otherwise be used uninitialized. */
5677 default:
5678#endif
5679 case 7:
5680 idxGstRegBase = X86_GREG_xBX;
5681 idxGstRegIndex = UINT8_MAX;
5682 break;
5683 }
5684
5685 /*
5686 * Now emit code that calculates: idxRegRet = (uint16_t)(u16EffAddr + idxGstRegBase [+ idxGstRegIndex])
5687 */
5688 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
5689 uint8_t const idxRegBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
5690 kIemNativeGstRegUse_ReadOnly);
5691 uint8_t const idxRegIndex = idxGstRegIndex != UINT8_MAX
5692 ? iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
5693 kIemNativeGstRegUse_ReadOnly)
5694 : UINT8_MAX;
5695#ifdef RT_ARCH_AMD64
5696 if (idxRegIndex == UINT8_MAX)
5697 {
5698 if (u16EffAddr == 0)
5699 {
5700 /* movxz ret, base */
5701 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegRet, idxRegBase);
5702 }
5703 else
5704 {
5705 /* lea ret32, [base64 + disp32] */
5706 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
5707 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
5708 if (idxRegRet >= 8 || idxRegBase >= 8)
5709 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
5710 pbCodeBuf[off++] = 0x8d;
5711 if (idxRegBase != X86_GREG_x12 /*SIB*/)
5712 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, idxRegBase & 7);
5713 else
5714 {
5715 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, 4 /*SIB*/);
5716 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
5717 }
5718 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
5719 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
5720 pbCodeBuf[off++] = 0;
5721 pbCodeBuf[off++] = 0;
5722 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5723
5724 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
5725 }
5726 }
5727 else
5728 {
5729 /* lea ret32, [index64 + base64 (+ disp32)] */
5730 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
5731 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
5732 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
5733 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
5734 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
5735 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
5736 pbCodeBuf[off++] = 0x8d;
5737 uint8_t const bMod = u16EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0 : X86_MOD_MEM4;
5738 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
5739 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, 0);
5740 if (bMod == X86_MOD_MEM4)
5741 {
5742 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
5743 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
5744 pbCodeBuf[off++] = 0;
5745 pbCodeBuf[off++] = 0;
5746 }
5747 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5748 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
5749 }
5750
5751#elif defined(RT_ARCH_ARM64)
5752 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
5753 if (u16EffAddr == 0)
5754 {
5755 if (idxRegIndex == UINT8_MAX)
5756 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegBase);
5757 else
5758 {
5759 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex, false /*f64Bit*/);
5760 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
5761 }
5762 }
5763 else
5764 {
5765 if ((int16_t)u16EffAddr < 4096 && (int16_t)u16EffAddr >= 0)
5766 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u16EffAddr, false /*f64Bit*/);
5767 else if ((int16_t)u16EffAddr > -4096 && (int16_t)u16EffAddr < 0)
5768 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
5769 (uint16_t)-(int16_t)u16EffAddr, false /*f64Bit*/);
5770 else
5771 {
5772 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegRet, u16EffAddr);
5773 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
5774 }
5775 if (idxRegIndex != UINT8_MAX)
5776 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex, false /*f64Bit*/);
5777 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
5778 }
5779
5780#else
5781# error "port me"
5782#endif
5783
5784 if (idxRegIndex != UINT8_MAX)
5785 iemNativeRegFreeTmp(pReNative, idxRegIndex);
5786 iemNativeRegFreeTmp(pReNative, idxRegBase);
5787 iemNativeVarRegisterRelease(pReNative, idxVarRet);
5788 return off;
5789}
5790
5791
5792#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_32(a_GCPtrEff, a_bRm, a_uSibAndRspOffset, a_u32Disp) \
5793 off = iemNativeEmitCalcRmEffAddrThreadedAddr32(pReNative, off, a_bRm, a_uSibAndRspOffset, a_u32Disp, a_GCPtrEff)
5794
5795/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_32.
5796 * @see iemOpHlpCalcRmEffAddrThreadedAddr32 */
5797DECL_INLINE_THROW(uint32_t)
5798iemNativeEmitCalcRmEffAddrThreadedAddr32(PIEMRECOMPILERSTATE pReNative, uint32_t off,
5799 uint8_t bRm, uint32_t uSibAndRspOffset, uint32_t u32Disp, uint8_t idxVarRet)
5800{
5801 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
5802
5803 /*
5804 * Handle the disp32 form with no registers first.
5805 *
5806 * Convert to an immediate value, as that'll delay the register allocation
5807 * and assignment till the memory access / call / whatever and we can use
5808 * a more appropriate register (or none at all).
5809 */
5810 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
5811 {
5812 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32Disp);
5813 return off;
5814 }
5815
5816 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
5817 uint32_t u32EffAddr = 0;
5818 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
5819 {
5820 case 0: break;
5821 case 1: u32EffAddr = (int8_t)u32Disp; break;
5822 case 2: u32EffAddr = u32Disp; break;
5823 default: AssertFailed();
5824 }
5825
5826 /* Get the register (or SIB) value. */
5827 uint8_t idxGstRegBase = UINT8_MAX;
5828 uint8_t idxGstRegIndex = UINT8_MAX;
5829 uint8_t cShiftIndex = 0;
5830 switch (bRm & X86_MODRM_RM_MASK)
5831 {
5832 case 0: idxGstRegBase = X86_GREG_xAX; break;
5833 case 1: idxGstRegBase = X86_GREG_xCX; break;
5834 case 2: idxGstRegBase = X86_GREG_xDX; break;
5835 case 3: idxGstRegBase = X86_GREG_xBX; break;
5836 case 4: /* SIB */
5837 {
5838 /* index /w scaling . */
5839 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
5840 switch ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
5841 {
5842 case 0: idxGstRegIndex = X86_GREG_xAX; break;
5843 case 1: idxGstRegIndex = X86_GREG_xCX; break;
5844 case 2: idxGstRegIndex = X86_GREG_xDX; break;
5845 case 3: idxGstRegIndex = X86_GREG_xBX; break;
5846 case 4: cShiftIndex = 0; /*no index*/ break;
5847 case 5: idxGstRegIndex = X86_GREG_xBP; break;
5848 case 6: idxGstRegIndex = X86_GREG_xSI; break;
5849 case 7: idxGstRegIndex = X86_GREG_xDI; break;
5850 }
5851
5852 /* base */
5853 switch (uSibAndRspOffset & X86_SIB_BASE_MASK)
5854 {
5855 case 0: idxGstRegBase = X86_GREG_xAX; break;
5856 case 1: idxGstRegBase = X86_GREG_xCX; break;
5857 case 2: idxGstRegBase = X86_GREG_xDX; break;
5858 case 3: idxGstRegBase = X86_GREG_xBX; break;
5859 case 4:
5860 idxGstRegBase = X86_GREG_xSP;
5861 u32EffAddr += uSibAndRspOffset >> 8;
5862 break;
5863 case 5:
5864 if ((bRm & X86_MODRM_MOD_MASK) != 0)
5865 idxGstRegBase = X86_GREG_xBP;
5866 else
5867 {
5868 Assert(u32EffAddr == 0);
5869 u32EffAddr = u32Disp;
5870 }
5871 break;
5872 case 6: idxGstRegBase = X86_GREG_xSI; break;
5873 case 7: idxGstRegBase = X86_GREG_xDI; break;
5874 }
5875 break;
5876 }
5877 case 5: idxGstRegBase = X86_GREG_xBP; break;
5878 case 6: idxGstRegBase = X86_GREG_xSI; break;
5879 case 7: idxGstRegBase = X86_GREG_xDI; break;
5880 }
5881
5882 /*
5883 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
5884 * the start of the function.
5885 */
5886 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
5887 {
5888 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32EffAddr);
5889 return off;
5890 }
5891
5892 /*
5893 * Now emit code that calculates: idxRegRet = (uint32_t)(u32EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
5894 */
5895 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
5896 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
5897 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
5898 kIemNativeGstRegUse_ReadOnly);
5899 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
5900 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
5901 kIemNativeGstRegUse_ReadOnly);
5902
5903 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
5904 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
5905 {
5906 idxRegBase = idxRegIndex;
5907 idxRegIndex = UINT8_MAX;
5908 }
5909
5910#ifdef RT_ARCH_AMD64
5911 if (idxRegIndex == UINT8_MAX)
5912 {
5913 if (u32EffAddr == 0)
5914 {
5915 /* mov ret, base */
5916 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
5917 }
5918 else
5919 {
5920 /* lea ret32, [base64 + disp32] */
5921 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
5922 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
5923 if (idxRegRet >= 8 || idxRegBase >= 8)
5924 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
5925 pbCodeBuf[off++] = 0x8d;
5926 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
5927 if (idxRegBase != X86_GREG_x12 /*SIB*/)
5928 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
5929 else
5930 {
5931 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
5932 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
5933 }
5934 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
5935 if (bMod == X86_MOD_MEM4)
5936 {
5937 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
5938 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
5939 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
5940 }
5941 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5942 }
5943 }
5944 else
5945 {
5946 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
5947 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
5948 if (idxRegBase == UINT8_MAX)
5949 {
5950 /* lea ret32, [(index64 << cShiftIndex) + disp32] */
5951 if (idxRegRet >= 8 || idxRegIndex >= 8)
5952 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
5953 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
5954 pbCodeBuf[off++] = 0x8d;
5955 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
5956 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
5957 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
5958 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
5959 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
5960 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
5961 }
5962 else
5963 {
5964 /* lea ret32, [(index64 << cShiftIndex) + base64 (+ disp32)] */
5965 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
5966 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
5967 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
5968 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
5969 pbCodeBuf[off++] = 0x8d;
5970 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
5971 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
5972 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
5973 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
5974 if (bMod != X86_MOD_MEM0)
5975 {
5976 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
5977 if (bMod == X86_MOD_MEM4)
5978 {
5979 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
5980 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
5981 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
5982 }
5983 }
5984 }
5985 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5986 }
5987
5988#elif defined(RT_ARCH_ARM64)
5989 if (u32EffAddr == 0)
5990 {
5991 if (idxRegIndex == UINT8_MAX)
5992 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
5993 else if (idxRegBase == UINT8_MAX)
5994 {
5995 if (cShiftIndex == 0)
5996 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegIndex);
5997 else
5998 {
5999 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6000 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, false /*f64Bit*/);
6001 }
6002 }
6003 else
6004 {
6005 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6006 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
6007 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
6008 }
6009 }
6010 else
6011 {
6012 if ((int32_t)u32EffAddr < 4096 && (int32_t)u32EffAddr >= 0 && idxRegBase != UINT8_MAX)
6013 {
6014 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6015 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u32EffAddr, false /*f64Bit*/);
6016 }
6017 else if ((int32_t)u32EffAddr > -4096 && (int32_t)u32EffAddr < 0 && idxRegBase != UINT8_MAX)
6018 {
6019 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6020 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
6021 (uint32_t)-(int32_t)u32EffAddr, false /*f64Bit*/);
6022 }
6023 else
6024 {
6025 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, u32EffAddr);
6026 if (idxRegBase != UINT8_MAX)
6027 {
6028 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6029 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
6030 }
6031 }
6032 if (idxRegIndex != UINT8_MAX)
6033 {
6034 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6035 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
6036 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
6037 }
6038 }
6039
6040#else
6041# error "port me"
6042#endif
6043
6044 if (idxRegIndex != UINT8_MAX)
6045 iemNativeRegFreeTmp(pReNative, idxRegIndex);
6046 if (idxRegBase != UINT8_MAX)
6047 iemNativeRegFreeTmp(pReNative, idxRegBase);
6048 iemNativeVarRegisterRelease(pReNative, idxVarRet);
6049 return off;
6050}
6051
6052
6053#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
6054 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
6055 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
6056
6057#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_FSGS(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
6058 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
6059 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
6060
6061#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_ADDR32(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
6062 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
6063 a_u32Disp, a_cbImm, a_GCPtrEff, false /*f64Bit*/)
6064
6065/**
6066 * Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_64*.
6067 *
6068 * @returns New off.
6069 * @param pReNative .
6070 * @param off .
6071 * @param bRmEx The ModRM byte but with bit 3 set to REX.B and
6072 * bit 4 to REX.X. The two bits are part of the
6073 * REG sub-field, which isn't needed in this
6074 * function.
6075 * @param uSibAndRspOffset Two parts:
6076 * - The first 8 bits make up the SIB byte.
6077 * - The next 8 bits are the fixed RSP/ESP offset
6078 * in case of a pop [xSP].
6079 * @param u32Disp The displacement byte/word/dword, if any.
6080 * @param cbInstr The size of the fully decoded instruction. Used
6081 * for RIP relative addressing.
6082 * @param idxVarRet The result variable number.
6083 * @param f64Bit Whether to use a 64-bit or 32-bit address size
6084 * when calculating the address.
6085 *
6086 * @see iemOpHlpCalcRmEffAddrThreadedAddr64
6087 */
6088DECL_INLINE_THROW(uint32_t)
6089iemNativeEmitCalcRmEffAddrThreadedAddr64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bRmEx, uint32_t uSibAndRspOffset,
6090 uint32_t u32Disp, uint8_t cbInstr, uint8_t idxVarRet, bool f64Bit)
6091{
6092 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
6093
6094 /*
6095 * Special case the rip + disp32 form first.
6096 */
6097 if ((bRmEx & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
6098 {
6099#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6100 /* Need to take the current PC offset into account for the displacement, no need to flush here
6101 * as the PC is only accessed readonly and there is no branching or calling helpers involved. */
6102 u32Disp += pReNative->Core.offPc;
6103#endif
6104
6105 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
6106 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
6107 kIemNativeGstRegUse_ReadOnly);
6108#ifdef RT_ARCH_AMD64
6109 if (f64Bit)
6110 {
6111 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr;
6112 if ((int32_t)offFinalDisp == offFinalDisp)
6113 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)offFinalDisp);
6114 else
6115 {
6116 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp);
6117 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, cbInstr);
6118 }
6119 }
6120 else
6121 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp + cbInstr);
6122
6123#elif defined(RT_ARCH_ARM64)
6124 if (f64Bit)
6125 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
6126 (int64_t)(int32_t)u32Disp + cbInstr);
6127 else
6128 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
6129 (int32_t)u32Disp + cbInstr);
6130
6131#else
6132# error "Port me!"
6133#endif
6134 iemNativeRegFreeTmp(pReNative, idxRegPc);
6135 iemNativeVarRegisterRelease(pReNative, idxVarRet);
6136 return off;
6137 }
6138
6139 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
6140 int64_t i64EffAddr = 0;
6141 switch ((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
6142 {
6143 case 0: break;
6144 case 1: i64EffAddr = (int8_t)u32Disp; break;
6145 case 2: i64EffAddr = (int32_t)u32Disp; break;
6146 default: AssertFailed();
6147 }
6148
6149 /* Get the register (or SIB) value. */
6150 uint8_t idxGstRegBase = UINT8_MAX;
6151 uint8_t idxGstRegIndex = UINT8_MAX;
6152 uint8_t cShiftIndex = 0;
6153 if ((bRmEx & X86_MODRM_RM_MASK) != 4)
6154 idxGstRegBase = bRmEx & (X86_MODRM_RM_MASK | 0x8); /* bRmEx[bit 3] = REX.B */
6155 else /* SIB: */
6156 {
6157 /* index /w scaling . */
6158 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
6159 idxGstRegIndex = ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
6160 | ((bRmEx & 0x10) >> 1); /* bRmEx[bit 4] = REX.X */
6161 if (idxGstRegIndex == 4)
6162 {
6163 /* no index */
6164 cShiftIndex = 0;
6165 idxGstRegIndex = UINT8_MAX;
6166 }
6167
6168 /* base */
6169 idxGstRegBase = (uSibAndRspOffset & X86_SIB_BASE_MASK) | (bRmEx & 0x8); /* bRmEx[bit 3] = REX.B */
6170 if (idxGstRegBase == 4)
6171 {
6172 /* pop [rsp] hack */
6173 i64EffAddr += uSibAndRspOffset >> 8; /* (this is why i64EffAddr must be 64-bit) */
6174 }
6175 else if ( (idxGstRegBase & X86_SIB_BASE_MASK) == 5
6176 && (bRmEx & X86_MODRM_MOD_MASK) == 0)
6177 {
6178 /* mod=0 and base=5 -> disp32, no base reg. */
6179 Assert(i64EffAddr == 0);
6180 i64EffAddr = (int32_t)u32Disp;
6181 idxGstRegBase = UINT8_MAX;
6182 }
6183 }
6184
6185 /*
6186 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
6187 * the start of the function.
6188 */
6189 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
6190 {
6191 if (f64Bit)
6192 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint64_t)i64EffAddr);
6193 else
6194 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint32_t)i64EffAddr);
6195 return off;
6196 }
6197
6198 /*
6199 * Now emit code that calculates:
6200 * idxRegRet = (uint64_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
6201 * or if !f64Bit:
6202 * idxRegRet = (uint32_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
6203 */
6204 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
6205 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
6206 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
6207 kIemNativeGstRegUse_ReadOnly);
6208 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
6209 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
6210 kIemNativeGstRegUse_ReadOnly);
6211
6212 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
6213 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
6214 {
6215 idxRegBase = idxRegIndex;
6216 idxRegIndex = UINT8_MAX;
6217 }
6218
6219#ifdef RT_ARCH_AMD64
6220 uint8_t bFinalAdj;
6221 if (!f64Bit || (int32_t)i64EffAddr == i64EffAddr)
6222 bFinalAdj = 0; /* likely */
6223 else
6224 {
6225 /* pop [rsp] with a problematic disp32 value. Split out the
6226 RSP offset and add it separately afterwards (bFinalAdj). */
6227 /** @todo testcase: pop [rsp] with problematic disp32 (mod4). */
6228 Assert(idxGstRegBase == X86_GREG_xSP);
6229 Assert(((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK) == X86_MOD_MEM4);
6230 bFinalAdj = (uint8_t)(uSibAndRspOffset >> 8);
6231 Assert(bFinalAdj != 0);
6232 i64EffAddr -= bFinalAdj;
6233 Assert((int32_t)i64EffAddr == i64EffAddr);
6234 }
6235 uint32_t const u32EffAddr = (uint32_t)i64EffAddr;
6236//pReNative->pInstrBuf[off++] = 0xcc;
6237
6238 if (idxRegIndex == UINT8_MAX)
6239 {
6240 if (u32EffAddr == 0)
6241 {
6242 /* mov ret, base */
6243 if (f64Bit)
6244 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRet, idxRegBase);
6245 else
6246 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
6247 }
6248 else
6249 {
6250 /* lea ret, [base + disp32] */
6251 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
6252 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6253 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8)
6254 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6255 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
6256 | (f64Bit ? X86_OP_REX_W : 0);
6257 pbCodeBuf[off++] = 0x8d;
6258 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
6259 if (idxRegBase != X86_GREG_x12 /*SIB*/)
6260 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
6261 else
6262 {
6263 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6264 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
6265 }
6266 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6267 if (bMod == X86_MOD_MEM4)
6268 {
6269 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6270 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6271 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6272 }
6273 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6274 }
6275 }
6276 else
6277 {
6278 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
6279 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6280 if (idxRegBase == UINT8_MAX)
6281 {
6282 /* lea ret, [(index64 << cShiftIndex) + disp32] */
6283 if (f64Bit || idxRegRet >= 8 || idxRegIndex >= 8)
6284 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6285 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
6286 | (f64Bit ? X86_OP_REX_W : 0);
6287 pbCodeBuf[off++] = 0x8d;
6288 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
6289 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
6290 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6291 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6292 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6293 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6294 }
6295 else
6296 {
6297 /* lea ret, [(index64 << cShiftIndex) + base64 (+ disp32)] */
6298 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
6299 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6300 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
6301 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
6302 | (f64Bit ? X86_OP_REX_W : 0);
6303 pbCodeBuf[off++] = 0x8d;
6304 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
6305 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
6306 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6307 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
6308 if (bMod != X86_MOD_MEM0)
6309 {
6310 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6311 if (bMod == X86_MOD_MEM4)
6312 {
6313 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6314 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6315 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6316 }
6317 }
6318 }
6319 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6320 }
6321
6322 if (!bFinalAdj)
6323 { /* likely */ }
6324 else
6325 {
6326 Assert(f64Bit);
6327 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, bFinalAdj);
6328 }
6329
6330#elif defined(RT_ARCH_ARM64)
6331 if (i64EffAddr == 0)
6332 {
6333 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6334 if (idxRegIndex == UINT8_MAX)
6335 pu32CodeBuf[off++] = Armv8A64MkInstrMov(idxRegRet, idxRegBase, f64Bit);
6336 else if (idxRegBase != UINT8_MAX)
6337 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
6338 f64Bit, false /*fSetFlags*/, cShiftIndex);
6339 else
6340 {
6341 Assert(cShiftIndex != 0); /* See base = index swap above when shift is 0 and we have no base reg. */
6342 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, f64Bit);
6343 }
6344 }
6345 else
6346 {
6347 if (f64Bit)
6348 { /* likely */ }
6349 else
6350 i64EffAddr = (int32_t)i64EffAddr;
6351
6352 if (i64EffAddr < 4096 && i64EffAddr >= 0 && idxRegBase != UINT8_MAX)
6353 {
6354 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6355 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, i64EffAddr, f64Bit);
6356 }
6357 else if (i64EffAddr > -4096 && i64EffAddr < 0 && idxRegBase != UINT8_MAX)
6358 {
6359 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6360 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase, (uint32_t)-i64EffAddr, f64Bit);
6361 }
6362 else
6363 {
6364 if (f64Bit)
6365 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, i64EffAddr);
6366 else
6367 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, (uint32_t)i64EffAddr);
6368 if (idxRegBase != UINT8_MAX)
6369 {
6370 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6371 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, f64Bit);
6372 }
6373 }
6374 if (idxRegIndex != UINT8_MAX)
6375 {
6376 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6377 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
6378 f64Bit, false /*fSetFlags*/, cShiftIndex);
6379 }
6380 }
6381
6382#else
6383# error "port me"
6384#endif
6385
6386 if (idxRegIndex != UINT8_MAX)
6387 iemNativeRegFreeTmp(pReNative, idxRegIndex);
6388 if (idxRegBase != UINT8_MAX)
6389 iemNativeRegFreeTmp(pReNative, idxRegBase);
6390 iemNativeVarRegisterRelease(pReNative, idxVarRet);
6391 return off;
6392}
6393
6394
6395/*********************************************************************************************************************************
6396* Memory fetches and stores common *
6397*********************************************************************************************************************************/
6398
6399typedef enum IEMNATIVEMITMEMOP
6400{
6401 kIemNativeEmitMemOp_Store = 0,
6402 kIemNativeEmitMemOp_Fetch,
6403 kIemNativeEmitMemOp_Fetch_Zx_U16,
6404 kIemNativeEmitMemOp_Fetch_Zx_U32,
6405 kIemNativeEmitMemOp_Fetch_Zx_U64,
6406 kIemNativeEmitMemOp_Fetch_Sx_U16,
6407 kIemNativeEmitMemOp_Fetch_Sx_U32,
6408 kIemNativeEmitMemOp_Fetch_Sx_U64
6409} IEMNATIVEMITMEMOP;
6410
6411/** Emits code for IEM_MC_FETCH_MEM_U8/16/32/64 and IEM_MC_STORE_MEM_U8/16/32/64,
6412 * and IEM_MC_FETCH_MEM_FLAT_U8/16/32/64 and IEM_MC_STORE_MEM_FLAT_U8/16/32/64
6413 * (with iSegReg = UINT8_MAX). */
6414DECL_INLINE_THROW(uint32_t)
6415iemNativeEmitMemFetchStoreDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue, uint8_t iSegReg,
6416 uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAlignMaskAndCtl, IEMNATIVEMITMEMOP enmOp,
6417 uintptr_t pfnFunction, uint8_t idxInstr, uint8_t offDisp = 0)
6418{
6419 /*
6420 * Assert sanity.
6421 */
6422 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
6423 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
6424 Assert( enmOp != kIemNativeEmitMemOp_Store
6425 || pVarValue->enmKind == kIemNativeVarKind_Immediate
6426 || pVarValue->enmKind == kIemNativeVarKind_Stack);
6427 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
6428 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
6429 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
6430 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
6431 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
6432 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
6433#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6434 Assert( cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8
6435 || cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U));
6436#else
6437 Assert(cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8);
6438#endif
6439 Assert(!(fAlignMaskAndCtl & ~(UINT32_C(0xff) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE)));
6440 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
6441#ifdef VBOX_STRICT
6442 if (iSegReg == UINT8_MAX)
6443 {
6444 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
6445 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
6446 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
6447 switch (cbMem)
6448 {
6449 case 1:
6450 Assert( pfnFunction
6451 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU8
6452 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
6453 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
6454 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
6455 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
6456 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16
6457 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32
6458 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64
6459 : UINT64_C(0xc000b000a0009000) ));
6460 Assert(!fAlignMaskAndCtl);
6461 break;
6462 case 2:
6463 Assert( pfnFunction
6464 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU16
6465 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
6466 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
6467 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
6468 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32
6469 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64
6470 : UINT64_C(0xc000b000a0009000) ));
6471 Assert(fAlignMaskAndCtl <= 1);
6472 break;
6473 case 4:
6474 Assert( pfnFunction
6475 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU32
6476 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
6477 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
6478 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64
6479 : UINT64_C(0xc000b000a0009000) ));
6480 Assert(fAlignMaskAndCtl <= 3);
6481 break;
6482 case 8:
6483 Assert( pfnFunction
6484 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU64
6485 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU64
6486 : UINT64_C(0xc000b000a0009000) ));
6487 Assert(fAlignMaskAndCtl <= 7);
6488 break;
6489#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6490 case sizeof(RTUINT128U):
6491 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
6492 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128
6493 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse
6494 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc))
6495 || ( enmOp == kIemNativeEmitMemOp_Store
6496 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse
6497 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128NoAc)));
6498 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse
6499 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse
6500 ? (fAlignMaskAndCtl & (IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE)) && (uint8_t)fAlignMaskAndCtl == 15
6501 : fAlignMaskAndCtl <= 15);
6502 break;
6503 case sizeof(RTUINT256U):
6504 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
6505 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc
6506 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx))
6507 || ( enmOp == kIemNativeEmitMemOp_Store
6508 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256NoAc
6509 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx)));
6510 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx
6511 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx
6512 ? (fAlignMaskAndCtl & IEM_MEMMAP_F_ALIGN_GP) && (uint8_t)fAlignMaskAndCtl == 31
6513 : fAlignMaskAndCtl <= 31);
6514 break;
6515#endif
6516 }
6517 }
6518 else
6519 {
6520 Assert(iSegReg < 6);
6521 switch (cbMem)
6522 {
6523 case 1:
6524 Assert( pfnFunction
6525 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU8
6526 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU8
6527 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8
6528 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8
6529 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8
6530 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16
6531 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32
6532 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64
6533 : UINT64_C(0xc000b000a0009000) ));
6534 Assert(!fAlignMaskAndCtl);
6535 break;
6536 case 2:
6537 Assert( pfnFunction
6538 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU16
6539 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU16
6540 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16
6541 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16
6542 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32
6543 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64
6544 : UINT64_C(0xc000b000a0009000) ));
6545 Assert(fAlignMaskAndCtl <= 1);
6546 break;
6547 case 4:
6548 Assert( pfnFunction
6549 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU32
6550 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU32
6551 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32
6552 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64
6553 : UINT64_C(0xc000b000a0009000) ));
6554 Assert(fAlignMaskAndCtl <= 3);
6555 break;
6556 case 8:
6557 Assert( pfnFunction
6558 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU64
6559 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU64
6560 : UINT64_C(0xc000b000a0009000) ));
6561 Assert(fAlignMaskAndCtl <= 7);
6562 break;
6563#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6564 case sizeof(RTUINT128U):
6565 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
6566 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128
6567 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse
6568 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128NoAc))
6569 || ( enmOp == kIemNativeEmitMemOp_Store
6570 && ( pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse
6571 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128NoAc)));
6572 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse
6573 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse
6574 ? (fAlignMaskAndCtl & (IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE)) && (uint8_t)fAlignMaskAndCtl == 15
6575 : fAlignMaskAndCtl <= 15);
6576 break;
6577 case sizeof(RTUINT256U):
6578 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
6579 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256NoAc
6580 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx))
6581 || ( enmOp == kIemNativeEmitMemOp_Store
6582 && ( pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256NoAc
6583 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx)));
6584 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx
6585 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx
6586 ? (fAlignMaskAndCtl & IEM_MEMMAP_F_ALIGN_GP) && (uint8_t)fAlignMaskAndCtl == 31
6587 : fAlignMaskAndCtl <= 31);
6588 break;
6589#endif
6590 }
6591 }
6592#endif
6593
6594#ifdef VBOX_STRICT
6595 /*
6596 * Check that the fExec flags we've got make sense.
6597 */
6598 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
6599#endif
6600
6601 /*
6602 * To keep things simple we have to commit any pending writes first as we
6603 * may end up making calls.
6604 */
6605 /** @todo we could postpone this till we make the call and reload the
6606 * registers after returning from the call. Not sure if that's sensible or
6607 * not, though. */
6608#ifndef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6609 off = iemNativeRegFlushPendingWrites(pReNative, off);
6610#else
6611 /* The program counter is treated differently for now. */
6612 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc));
6613#endif
6614
6615#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6616 /*
6617 * Move/spill/flush stuff out of call-volatile registers.
6618 * This is the easy way out. We could contain this to the tlb-miss branch
6619 * by saving and restoring active stuff here.
6620 */
6621 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
6622#endif
6623
6624 /*
6625 * Define labels and allocate the result register (trying for the return
6626 * register if we can).
6627 */
6628 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
6629#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6630 uint8_t idxRegValueFetch = UINT8_MAX;
6631
6632 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
6633 idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
6634 : iemNativeVarSimdRegisterAcquire(pReNative, idxVarValue, &off);
6635 else
6636 idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
6637 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
6638 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
6639 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
6640#else
6641 uint8_t const idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
6642 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
6643 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
6644 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
6645#endif
6646 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem, offDisp);
6647
6648#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6649 uint8_t idxRegValueStore = UINT8_MAX;
6650
6651 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
6652 idxRegValueStore = !TlbState.fSkip
6653 && enmOp == kIemNativeEmitMemOp_Store
6654 && pVarValue->enmKind != kIemNativeVarKind_Immediate
6655 ? iemNativeVarSimdRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
6656 : UINT8_MAX;
6657 else
6658 idxRegValueStore = !TlbState.fSkip
6659 && enmOp == kIemNativeEmitMemOp_Store
6660 && pVarValue->enmKind != kIemNativeVarKind_Immediate
6661 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
6662 : UINT8_MAX;
6663
6664#else
6665 uint8_t const idxRegValueStore = !TlbState.fSkip
6666 && enmOp == kIemNativeEmitMemOp_Store
6667 && pVarValue->enmKind != kIemNativeVarKind_Immediate
6668 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
6669 : UINT8_MAX;
6670#endif
6671 uint32_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
6672 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
6673 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
6674 : UINT32_MAX;
6675
6676 /*
6677 * Jump to the TLB lookup code.
6678 */
6679 if (!TlbState.fSkip)
6680 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
6681
6682 /*
6683 * TlbMiss:
6684 *
6685 * Call helper to do the fetching.
6686 * We flush all guest register shadow copies here.
6687 */
6688 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
6689
6690#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6691 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6692#else
6693 RT_NOREF(idxInstr);
6694#endif
6695
6696#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6697 if (pReNative->Core.offPc)
6698 {
6699 /*
6700 * Update the program counter but restore it at the end of the TlbMiss branch.
6701 * This should allow delaying more program counter updates for the TlbLookup and hit paths
6702 * which are hopefully much more frequent, reducing the amount of memory accesses.
6703 */
6704 /* Allocate a temporary PC register. */
6705 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6706
6707 /* Perform the addition and store the result. */
6708 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
6709 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6710
6711 /* Free and flush the PC register. */
6712 iemNativeRegFreeTmp(pReNative, idxPcReg);
6713 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
6714 }
6715#endif
6716
6717#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6718 /* Save variables in volatile registers. */
6719 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
6720 | (idxRegMemResult != UINT8_MAX ? RT_BIT_32(idxRegMemResult) : 0)
6721 | (idxRegValueFetch != UINT8_MAX ? RT_BIT_32(idxRegValueFetch) : 0);
6722 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
6723#endif
6724
6725 /* IEMNATIVE_CALL_ARG2/3_GREG = uValue (idxVarValue) - if store */
6726 uint32_t fVolGregMask = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
6727#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6728 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
6729 {
6730 /*
6731 * For SIMD based variables we pass the reference on the stack for both fetches and stores.
6732 *
6733 * @note There was a register variable assigned to the variable for the TlbLookup case above
6734 * which must not be freed or the value loaded into the register will not be synced into the register
6735 * further down the road because the variable doesn't know it had a variable assigned.
6736 *
6737 * @note For loads it is not required to sync what is in the assigned register with the stack slot
6738 * as it will be overwritten anyway.
6739 */
6740 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
6741 off = iemNativeEmitLoadArgGregWithSimdVarAddrForMemAccess(pReNative, off, idxRegArgValue, idxVarValue,
6742 enmOp == kIemNativeEmitMemOp_Store /*fSyncRegWithStack*/);
6743 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
6744 }
6745 else
6746#endif
6747 if (enmOp == kIemNativeEmitMemOp_Store)
6748 {
6749 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
6750 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, idxRegArgValue, idxVarValue, 0 /*cbAppend*/,
6751#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6752 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
6753#else
6754 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
6755 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
6756#endif
6757 }
6758
6759 /* IEMNATIVE_CALL_ARG1_GREG = GCPtrMem */
6760 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarGCPtrMem, offDisp /*cbAppend*/,
6761#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6762 fVolGregMask);
6763#else
6764 fVolGregMask, true /*fSpilledVarsInvolatileRegs*/);
6765#endif
6766
6767 if (iSegReg != UINT8_MAX)
6768 {
6769 /* IEMNATIVE_CALL_ARG2_GREG = iSegReg */
6770 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
6771 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, iSegReg);
6772 }
6773
6774 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
6775 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6776
6777 /* Done setting up parameters, make the call. */
6778 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
6779
6780 /*
6781 * Put the result in the right register if this is a fetch.
6782 */
6783 if (enmOp != kIemNativeEmitMemOp_Store)
6784 {
6785#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6786 if ( cbMem == sizeof(RTUINT128U)
6787 || cbMem == sizeof(RTUINT256U))
6788 {
6789 Assert(enmOp == kIemNativeEmitMemOp_Fetch);
6790
6791 /* Sync the value on the stack with the host register assigned to the variable. */
6792 off = iemNativeEmitSimdVarSyncStackToRegister(pReNative, off, idxVarValue);
6793 }
6794 else
6795#endif
6796 {
6797 Assert(idxRegValueFetch == pVarValue->idxReg);
6798 if (idxRegValueFetch != IEMNATIVE_CALL_RET_GREG)
6799 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegValueFetch, IEMNATIVE_CALL_RET_GREG);
6800 }
6801 }
6802
6803#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6804 /* Restore variables and guest shadow registers to volatile registers. */
6805 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
6806 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
6807#endif
6808
6809#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6810 if (pReNative->Core.offPc)
6811 {
6812 /*
6813 * Time to restore the program counter to its original value.
6814 */
6815 /* Allocate a temporary PC register. */
6816 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
6817 kIemNativeGstRegUse_ForUpdate);
6818
6819 /* Restore the original value. */
6820 off = iemNativeEmitSubGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
6821 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
6822
6823 /* Free and flush the PC register. */
6824 iemNativeRegFreeTmp(pReNative, idxPcReg);
6825 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
6826 }
6827#endif
6828
6829#ifdef IEMNATIVE_WITH_TLB_LOOKUP
6830 if (!TlbState.fSkip)
6831 {
6832 /* end of TlbMiss - Jump to the done label. */
6833 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
6834 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
6835
6836 /*
6837 * TlbLookup:
6838 */
6839 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMaskAndCtl,
6840 enmOp == kIemNativeEmitMemOp_Store ? IEM_ACCESS_TYPE_WRITE : IEM_ACCESS_TYPE_READ,
6841 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult, offDisp);
6842
6843 /*
6844 * Emit code to do the actual storing / fetching.
6845 */
6846 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
6847# ifdef IEM_WITH_TLB_STATISTICS
6848 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
6849 enmOp == kIemNativeEmitMemOp_Store
6850 ? RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForFetch)
6851 : RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStore));
6852# endif
6853 switch (enmOp)
6854 {
6855 case kIemNativeEmitMemOp_Store:
6856 if (pVarValue->enmKind != kIemNativeVarKind_Immediate)
6857 {
6858 switch (cbMem)
6859 {
6860 case 1:
6861 off = iemNativeEmitStoreGpr8ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
6862 break;
6863 case 2:
6864 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
6865 break;
6866 case 4:
6867 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
6868 break;
6869 case 8:
6870 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
6871 break;
6872#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6873 case sizeof(RTUINT128U):
6874 off = iemNativeEmitStoreVecRegByGprU128Ex(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
6875 break;
6876 case sizeof(RTUINT256U):
6877 off = iemNativeEmitStoreVecRegByGprU256Ex(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
6878 break;
6879#endif
6880 default:
6881 AssertFailed();
6882 }
6883 }
6884 else
6885 {
6886 switch (cbMem)
6887 {
6888 case 1:
6889 off = iemNativeEmitStoreImm8ByGprEx(pCodeBuf, off, (uint8_t)pVarValue->u.uValue,
6890 idxRegMemResult, TlbState.idxReg1);
6891 break;
6892 case 2:
6893 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
6894 idxRegMemResult, TlbState.idxReg1);
6895 break;
6896 case 4:
6897 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
6898 idxRegMemResult, TlbState.idxReg1);
6899 break;
6900 case 8:
6901 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue,
6902 idxRegMemResult, TlbState.idxReg1);
6903 break;
6904 default:
6905 AssertFailed();
6906 }
6907 }
6908 break;
6909
6910 case kIemNativeEmitMemOp_Fetch:
6911 case kIemNativeEmitMemOp_Fetch_Zx_U16:
6912 case kIemNativeEmitMemOp_Fetch_Zx_U32:
6913 case kIemNativeEmitMemOp_Fetch_Zx_U64:
6914 switch (cbMem)
6915 {
6916 case 1:
6917 off = iemNativeEmitLoadGprByGprU8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6918 break;
6919 case 2:
6920 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6921 break;
6922 case 4:
6923 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6924 break;
6925 case 8:
6926 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6927 break;
6928#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6929 case sizeof(RTUINT128U):
6930 /*
6931 * No need to sync back the register with the stack, this is done by the generic variable handling
6932 * code if there is a register assigned to a variable and the stack must be accessed.
6933 */
6934 off = iemNativeEmitLoadVecRegByGprU128Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6935 break;
6936 case sizeof(RTUINT256U):
6937 /*
6938 * No need to sync back the register with the stack, this is done by the generic variable handling
6939 * code if there is a register assigned to a variable and the stack must be accessed.
6940 */
6941 off = iemNativeEmitLoadVecRegByGprU256Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6942 break;
6943#endif
6944 default:
6945 AssertFailed();
6946 }
6947 break;
6948
6949 case kIemNativeEmitMemOp_Fetch_Sx_U16:
6950 Assert(cbMem == 1);
6951 off = iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6952 break;
6953
6954 case kIemNativeEmitMemOp_Fetch_Sx_U32:
6955 Assert(cbMem == 1 || cbMem == 2);
6956 if (cbMem == 1)
6957 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6958 else
6959 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6960 break;
6961
6962 case kIemNativeEmitMemOp_Fetch_Sx_U64:
6963 switch (cbMem)
6964 {
6965 case 1:
6966 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6967 break;
6968 case 2:
6969 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6970 break;
6971 case 4:
6972 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
6973 break;
6974 default:
6975 AssertFailed();
6976 }
6977 break;
6978
6979 default:
6980 AssertFailed();
6981 }
6982
6983 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
6984
6985 /*
6986 * TlbDone:
6987 */
6988 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
6989
6990 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
6991
6992# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
6993 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
6994 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
6995# endif
6996 }
6997#else
6998 RT_NOREF(fAlignMaskAndCtl, idxLabelTlbMiss);
6999#endif
7000
7001 if (idxRegValueFetch != UINT8_MAX || idxRegValueStore != UINT8_MAX)
7002 iemNativeVarRegisterRelease(pReNative, idxVarValue);
7003 return off;
7004}
7005
7006
7007
7008/*********************************************************************************************************************************
7009* Memory fetches (IEM_MEM_FETCH_XXX). *
7010*********************************************************************************************************************************/
7011
7012/* 8-bit segmented: */
7013#define IEM_MC_FETCH_MEM_U8(a_u8Dst, a_iSeg, a_GCPtrMem) \
7014 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, a_iSeg, a_GCPtrMem, \
7015 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch, \
7016 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
7017
7018#define IEM_MC_FETCH_MEM_U8_ZX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
7019 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
7020 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
7021 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
7022
7023#define IEM_MC_FETCH_MEM_U8_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7024 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
7025 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
7026 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
7027
7028#define IEM_MC_FETCH_MEM_U8_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7029 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
7030 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7031 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
7032
7033#define IEM_MC_FETCH_MEM_U8_SX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
7034 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
7035 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
7036 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16, pCallEntry->idxInstr)
7037
7038#define IEM_MC_FETCH_MEM_U8_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7039 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
7040 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7041 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32, pCallEntry->idxInstr)
7042
7043#define IEM_MC_FETCH_MEM_U8_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7044 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
7045 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7046 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64, pCallEntry->idxInstr)
7047
7048/* 16-bit segmented: */
7049#define IEM_MC_FETCH_MEM_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
7050 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
7051 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
7052 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
7053
7054#define IEM_MC_FETCH_MEM_U16_DISP(a_u16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
7055 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
7056 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
7057 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr, a_offDisp)
7058
7059#define IEM_MC_FETCH_MEM_U16_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7060 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
7061 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
7062 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
7063
7064#define IEM_MC_FETCH_MEM_U16_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7065 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
7066 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7067 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
7068
7069#define IEM_MC_FETCH_MEM_U16_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7070 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
7071 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7072 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7073
7074#define IEM_MC_FETCH_MEM_U16_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7075 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
7076 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7077 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64, pCallEntry->idxInstr)
7078
7079
7080/* 32-bit segmented: */
7081#define IEM_MC_FETCH_MEM_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7082 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
7083 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7084 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
7085
7086#define IEM_MC_FETCH_MEM_U32_DISP(a_u32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
7087 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
7088 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7089 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7090
7091#define IEM_MC_FETCH_MEM_U32_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7092 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
7093 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7094 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
7095
7096#define IEM_MC_FETCH_MEM_U32_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7097 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
7098 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7099 (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64, pCallEntry->idxInstr)
7100
7101#define IEM_MC_FETCH_MEM_I16(a_i16Dst, a_iSeg, a_GCPtrMem) \
7102 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i16Dst, a_iSeg, a_GCPtrMem, \
7103 sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7104 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7105
7106#define IEM_MC_FETCH_MEM_I16_DISP(a_i16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
7107 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i16Dst, a_iSeg, a_GCPtrMem, \
7108 sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7109 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr, a_offDisp)
7110
7111#define IEM_MC_FETCH_MEM_I32(a_i32Dst, a_iSeg, a_GCPtrMem) \
7112 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i32Dst, a_iSeg, a_GCPtrMem, \
7113 sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7114 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
7115
7116#define IEM_MC_FETCH_MEM_I32_DISP(a_i32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
7117 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i32Dst, a_iSeg, a_GCPtrMem, \
7118 sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7119 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7120
7121#define IEM_MC_FETCH_MEM_I64(a_i64Dst, a_iSeg, a_GCPtrMem) \
7122 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i64Dst, a_iSeg, a_GCPtrMem, \
7123 sizeof(int64_t), sizeof(int64_t) - 1, kIemNativeEmitMemOp_Fetch, \
7124 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
7125
7126AssertCompileSize(RTFLOAT32U, sizeof(uint32_t));
7127#define IEM_MC_FETCH_MEM_R32(a_r32Dst, a_iSeg, a_GCPtrMem) \
7128 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r32Dst, a_iSeg, a_GCPtrMem, \
7129 sizeof(RTFLOAT32U), sizeof(RTFLOAT32U) - 1, kIemNativeEmitMemOp_Fetch, \
7130 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
7131
7132
7133/* 64-bit segmented: */
7134#define IEM_MC_FETCH_MEM_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7135 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
7136 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
7137 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
7138
7139AssertCompileSize(RTFLOAT64U, sizeof(uint64_t));
7140#define IEM_MC_FETCH_MEM_R64(a_r64Dst, a_iSeg, a_GCPtrMem) \
7141 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r64Dst, a_iSeg, a_GCPtrMem, \
7142 sizeof(RTFLOAT64U), sizeof(RTFLOAT64U) - 1, kIemNativeEmitMemOp_Fetch, \
7143 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
7144
7145
7146/* 8-bit flat: */
7147#define IEM_MC_FETCH_MEM_FLAT_U8(a_u8Dst, a_GCPtrMem) \
7148 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, UINT8_MAX, a_GCPtrMem, \
7149 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch, \
7150 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7151
7152#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U16(a_u16Dst, a_GCPtrMem) \
7153 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
7154 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
7155 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7156
7157#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U32(a_u32Dst, a_GCPtrMem) \
7158 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7159 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
7160 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7161
7162#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U64(a_u64Dst, a_GCPtrMem) \
7163 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7164 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7165 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7166
7167#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U16(a_u16Dst, a_GCPtrMem) \
7168 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
7169 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
7170 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16, pCallEntry->idxInstr)
7171
7172#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U32(a_u32Dst, a_GCPtrMem) \
7173 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7174 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7175 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32, pCallEntry->idxInstr)
7176
7177#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U64(a_u64Dst, a_GCPtrMem) \
7178 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7179 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7180 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64, pCallEntry->idxInstr)
7181
7182
7183/* 16-bit flat: */
7184#define IEM_MC_FETCH_MEM_FLAT_U16(a_u16Dst, a_GCPtrMem) \
7185 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
7186 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
7187 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
7188
7189#define IEM_MC_FETCH_MEM_FLAT_U16_DISP(a_u16Dst, a_GCPtrMem, a_offDisp) \
7190 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
7191 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
7192 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr, a_offDisp)
7193
7194#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U32(a_u32Dst, a_GCPtrMem) \
7195 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7196 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
7197 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
7198
7199#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U64(a_u64Dst, a_GCPtrMem) \
7200 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7201 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7202 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
7203
7204#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U32(a_u32Dst, a_GCPtrMem) \
7205 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7206 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7207 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7208
7209#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U64(a_u64Dst, a_GCPtrMem) \
7210 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7211 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7212 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64, pCallEntry->idxInstr)
7213
7214/* 32-bit flat: */
7215#define IEM_MC_FETCH_MEM_FLAT_U32(a_u32Dst, a_GCPtrMem) \
7216 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7217 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7218 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7219
7220#define IEM_MC_FETCH_MEM_FLAT_U32_DISP(a_u32Dst, a_GCPtrMem, a_offDisp) \
7221 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7222 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7223 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7224
7225#define IEM_MC_FETCH_MEM_FLAT_U32_ZX_U64(a_u64Dst, a_GCPtrMem) \
7226 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7227 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7228 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7229
7230#define IEM_MC_FETCH_MEM_FLAT_U32_SX_U64(a_u64Dst, a_GCPtrMem) \
7231 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7232 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7233 (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64, pCallEntry->idxInstr)
7234
7235#define IEM_MC_FETCH_MEM_FLAT_I16(a_i16Dst, a_GCPtrMem) \
7236 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i16Dst, UINT8_MAX, a_GCPtrMem, \
7237 sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7238 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7239
7240#define IEM_MC_FETCH_MEM_FLAT_I16_DISP(a_i16Dst, a_GCPtrMem, a_offDisp) \
7241 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i16Dst, UINT8_MAX, a_GCPtrMem, \
7242 sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7243 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr, a_offDisp)
7244
7245#define IEM_MC_FETCH_MEM_FLAT_I32(a_i32Dst, a_GCPtrMem) \
7246 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i32Dst, UINT8_MAX, a_GCPtrMem, \
7247 sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7248 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7249
7250#define IEM_MC_FETCH_MEM_FLAT_I32_DISP(a_i32Dst, a_GCPtrMem, a_offDisp) \
7251 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i32Dst, UINT8_MAX, a_GCPtrMem, \
7252 sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7253 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7254
7255#define IEM_MC_FETCH_MEM_FLAT_I64(a_i64Dst, a_GCPtrMem) \
7256 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i64Dst, UINT8_MAX, a_GCPtrMem, \
7257 sizeof(int64_t), sizeof(int64_t) - 1, kIemNativeEmitMemOp_Fetch, \
7258 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
7259
7260#define IEM_MC_FETCH_MEM_FLAT_R32(a_r32Dst, a_GCPtrMem) \
7261 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r32Dst, UINT8_MAX, a_GCPtrMem, \
7262 sizeof(RTFLOAT32U), sizeof(RTFLOAT32U) - 1, kIemNativeEmitMemOp_Fetch, \
7263 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7264
7265
7266/* 64-bit flat: */
7267#define IEM_MC_FETCH_MEM_FLAT_U64(a_u64Dst, a_GCPtrMem) \
7268 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7269 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
7270 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
7271
7272#define IEM_MC_FETCH_MEM_FLAT_R64(a_r64Dst, a_GCPtrMem) \
7273 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r64Dst, UINT8_MAX, a_GCPtrMem, \
7274 sizeof(RTFLOAT64U), sizeof(RTFLOAT64U) - 1, kIemNativeEmitMemOp_Fetch, \
7275 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
7276
7277#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7278/* 128-bit segmented: */
7279#define IEM_MC_FETCH_MEM_U128(a_u128Dst, a_iSeg, a_GCPtrMem) \
7280 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
7281 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
7282 (uintptr_t)iemNativeHlpMemFetchDataU128, pCallEntry->idxInstr)
7283
7284#define IEM_MC_FETCH_MEM_U128_ALIGN_SSE(a_u128Dst, a_iSeg, a_GCPtrMem) \
7285 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
7286 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7287 kIemNativeEmitMemOp_Fetch, \
7288 (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse, pCallEntry->idxInstr)
7289
7290AssertCompileSize(X86XMMREG, sizeof(RTUINT128U));
7291#define IEM_MC_FETCH_MEM_XMM_ALIGN_SSE(a_uXmmDst, a_iSeg, a_GCPtrMem) \
7292 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uXmmDst, a_iSeg, a_GCPtrMem, sizeof(X86XMMREG), \
7293 (sizeof(X86XMMREG) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7294 kIemNativeEmitMemOp_Fetch, \
7295 (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse, pCallEntry->idxInstr)
7296
7297#define IEM_MC_FETCH_MEM_U128_NO_AC(a_u128Dst, a_iSeg, a_GCPtrMem) \
7298 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
7299 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
7300 (uintptr_t)iemNativeHlpMemFetchDataU128NoAc, pCallEntry->idxInstr)
7301
7302#define IEM_MC_FETCH_MEM_XMM_NO_AC(a_u128Dst, a_iSeg, a_GCPtrMem) \
7303 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
7304 sizeof(X86XMMREG), sizeof(X86XMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
7305 (uintptr_t)iemNativeHlpMemFetchDataU128NoAc, pCallEntry->idxInstr)
7306
7307
7308/* 128-bit flat: */
7309#define IEM_MC_FETCH_MEM_FLAT_U128(a_u128Dst, a_GCPtrMem) \
7310 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, \
7311 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
7312 (uintptr_t)iemNativeHlpMemFlatFetchDataU128, pCallEntry->idxInstr)
7313
7314#define IEM_MC_FETCH_MEM_FLAT_U128_ALIGN_SSE(a_u128Dst, a_GCPtrMem) \
7315 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
7316 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7317 kIemNativeEmitMemOp_Fetch, \
7318 (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse, pCallEntry->idxInstr)
7319
7320#define IEM_MC_FETCH_MEM_FLAT_XMM_ALIGN_SSE(a_uXmmDst, a_GCPtrMem) \
7321 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uXmmDst, UINT8_MAX, a_GCPtrMem, sizeof(X86XMMREG), \
7322 (sizeof(X86XMMREG) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7323 kIemNativeEmitMemOp_Fetch, \
7324 (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse, pCallEntry->idxInstr)
7325
7326#define IEM_MC_FETCH_MEM_FLAT_U128_NO_AC(a_u128Dst, a_GCPtrMem) \
7327 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, \
7328 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
7329 (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc, pCallEntry->idxInstr)
7330
7331#define IEM_MC_FETCH_MEM_FLAT_XMM_NO_AC(a_uXmmDst, a_GCPtrMem) \
7332 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uXmmDst, UINT8_MAX, a_GCPtrMem, \
7333 sizeof(X86XMMREG), sizeof(X86XMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
7334 (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc, pCallEntry->idxInstr)
7335
7336/* 256-bit segmented: */
7337#define IEM_MC_FETCH_MEM_U256(a_u256Dst, a_iSeg, a_GCPtrMem) \
7338 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
7339 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
7340 (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
7341
7342#define IEM_MC_FETCH_MEM_U256_NO_AC(a_u256Dst, a_iSeg, a_GCPtrMem) \
7343 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
7344 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
7345 (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
7346
7347#define IEM_MC_FETCH_MEM_U256_ALIGN_AVX(a_u256Dst, a_iSeg, a_GCPtrMem) \
7348 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, sizeof(RTUINT256U), \
7349 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, kIemNativeEmitMemOp_Fetch, \
7350 (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx, pCallEntry->idxInstr)
7351
7352#define IEM_MC_FETCH_MEM_YMM_NO_AC(a_u256Dst, a_iSeg, a_GCPtrMem) \
7353 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
7354 sizeof(X86YMMREG), sizeof(X86YMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
7355 (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
7356
7357
7358/* 256-bit flat: */
7359#define IEM_MC_FETCH_MEM_FLAT_U256(a_u256Dst, a_GCPtrMem) \
7360 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, \
7361 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
7362 (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
7363
7364#define IEM_MC_FETCH_MEM_FLAT_U256_NO_AC(a_u256Dst, a_GCPtrMem) \
7365 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, \
7366 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
7367 (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
7368
7369#define IEM_MC_FETCH_MEM_FLAT_U256_ALIGN_AVX(a_u256Dst, a_GCPtrMem) \
7370 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT256U), \
7371 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, kIemNativeEmitMemOp_Fetch, \
7372 (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx, pCallEntry->idxInstr)
7373
7374#define IEM_MC_FETCH_MEM_FLAT_YMM_NO_AC(a_uYmmDst, a_GCPtrMem) \
7375 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uYmmDst, UINT8_MAX, a_GCPtrMem, \
7376 sizeof(X86YMMREG), sizeof(X86YMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
7377 (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
7378
7379#endif
7380
7381
7382/*********************************************************************************************************************************
7383* Memory stores (IEM_MEM_STORE_XXX). *
7384*********************************************************************************************************************************/
7385
7386#define IEM_MC_STORE_MEM_U8(a_iSeg, a_GCPtrMem, a_u8Value) \
7387 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, a_iSeg, a_GCPtrMem, \
7388 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Store, \
7389 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
7390
7391#define IEM_MC_STORE_MEM_U16(a_iSeg, a_GCPtrMem, a_u16Value) \
7392 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, a_iSeg, a_GCPtrMem, \
7393 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
7394 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
7395
7396#define IEM_MC_STORE_MEM_U32(a_iSeg, a_GCPtrMem, a_u32Value) \
7397 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, a_iSeg, a_GCPtrMem, \
7398 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
7399 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
7400
7401#define IEM_MC_STORE_MEM_U64(a_iSeg, a_GCPtrMem, a_u64Value) \
7402 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, a_iSeg, a_GCPtrMem, \
7403 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
7404 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
7405
7406
7407#define IEM_MC_STORE_MEM_FLAT_U8(a_GCPtrMem, a_u8Value) \
7408 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, UINT8_MAX, a_GCPtrMem, \
7409 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Store, \
7410 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
7411
7412#define IEM_MC_STORE_MEM_FLAT_U16(a_GCPtrMem, a_u16Value) \
7413 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, UINT8_MAX, a_GCPtrMem, \
7414 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
7415 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
7416
7417#define IEM_MC_STORE_MEM_FLAT_U32(a_GCPtrMem, a_u32Value) \
7418 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, UINT8_MAX, a_GCPtrMem, \
7419 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
7420 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
7421
7422#define IEM_MC_STORE_MEM_FLAT_U64(a_GCPtrMem, a_u64Value) \
7423 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, UINT8_MAX, a_GCPtrMem, \
7424 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
7425 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
7426
7427
7428#define IEM_MC_STORE_MEM_U8_CONST(a_iSeg, a_GCPtrMem, a_u8ConstValue) \
7429 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
7430 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
7431
7432#define IEM_MC_STORE_MEM_U16_CONST(a_iSeg, a_GCPtrMem, a_u16ConstValue) \
7433 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
7434 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
7435
7436#define IEM_MC_STORE_MEM_U32_CONST(a_iSeg, a_GCPtrMem, a_u32ConstValue) \
7437 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
7438 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
7439
7440#define IEM_MC_STORE_MEM_U64_CONST(a_iSeg, a_GCPtrMem, a_u64ConstValue) \
7441 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
7442 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
7443
7444
7445#define IEM_MC_STORE_MEM_FLAT_U8_CONST(a_GCPtrMem, a_u8ConstValue) \
7446 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
7447 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
7448
7449#define IEM_MC_STORE_MEM_FLAT_U16_CONST(a_GCPtrMem, a_u16ConstValue) \
7450 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
7451 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
7452
7453#define IEM_MC_STORE_MEM_FLAT_U32_CONST(a_GCPtrMem, a_u32ConstValue) \
7454 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
7455 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
7456
7457#define IEM_MC_STORE_MEM_FLAT_U64_CONST(a_GCPtrMem, a_u64ConstValue) \
7458 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
7459 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
7460
7461/** Emits code for IEM_MC_STORE_MEM_U8/16/32/64_CONST and
7462 * IEM_MC_STORE_MEM_FLAT_U8/16/32/64_CONST (with iSegReg = UINT8_MAX). */
7463DECL_INLINE_THROW(uint32_t)
7464iemNativeEmitMemStoreConstDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uValueConst, uint8_t iSegReg,
7465 uint8_t idxVarGCPtrMem, uint8_t cbMem, uintptr_t pfnFunction, uint8_t idxInstr)
7466{
7467 /*
7468 * Create a temporary const variable and call iemNativeEmitMemFetchStoreDataCommon
7469 * to do the grunt work.
7470 */
7471 uint8_t const idxVarConstValue = iemNativeVarAllocConst(pReNative, cbMem, uValueConst);
7472 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, idxVarConstValue, iSegReg, idxVarGCPtrMem,
7473 cbMem, cbMem - 1, kIemNativeEmitMemOp_Store,
7474 pfnFunction, idxInstr);
7475 iemNativeVarFreeLocal(pReNative, idxVarConstValue);
7476 return off;
7477}
7478
7479
7480#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7481# define IEM_MC_STORE_MEM_U128_ALIGN_SSE(a_iSeg, a_GCPtrMem, a_u128Value) \
7482 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
7483 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7484 kIemNativeEmitMemOp_Store, \
7485 (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse, pCallEntry->idxInstr)
7486
7487# define IEM_MC_STORE_MEM_U128_NO_AC(a_iSeg, a_GCPtrMem, a_u128Value) \
7488 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, a_iSeg, a_GCPtrMem, \
7489 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
7490 (uintptr_t)iemNativeHlpMemStoreDataU128NoAc, pCallEntry->idxInstr)
7491
7492# define IEM_MC_STORE_MEM_U256_NO_AC(a_iSeg, a_GCPtrMem, a_u256Value) \
7493 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, a_iSeg, a_GCPtrMem, \
7494 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
7495 (uintptr_t)iemNativeHlpMemStoreDataU256NoAc, pCallEntry->idxInstr)
7496
7497# define IEM_MC_STORE_MEM_U256_ALIGN_AVX(a_iSeg, a_GCPtrMem, a_u256Value) \
7498 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, a_iSeg, a_GCPtrMem, sizeof(RTUINT256U), \
7499 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, kIemNativeEmitMemOp_Store, \
7500 (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx, pCallEntry->idxInstr)
7501
7502
7503# define IEM_MC_STORE_MEM_FLAT_U128_ALIGN_SSE(a_GCPtrMem, a_u128Value) \
7504 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
7505 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7506 kIemNativeEmitMemOp_Store, \
7507 (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse, pCallEntry->idxInstr)
7508
7509# define IEM_MC_STORE_MEM_FLAT_U128_NO_AC(a_GCPtrMem, a_u128Value) \
7510 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, UINT8_MAX, a_GCPtrMem, \
7511 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
7512 (uintptr_t)iemNativeHlpMemFlatStoreDataU128NoAc, pCallEntry->idxInstr)
7513
7514# define IEM_MC_STORE_MEM_FLAT_U256_NO_AC(a_GCPtrMem, a_u256Value) \
7515 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, UINT8_MAX, a_GCPtrMem, \
7516 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
7517 (uintptr_t)iemNativeHlpMemFlatStoreDataU256NoAc, pCallEntry->idxInstr)
7518
7519# define IEM_MC_STORE_MEM_FLAT_U256_ALIGN_AVX(a_GCPtrMem, a_u256Value) \
7520 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT256U), \
7521 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, kIemNativeEmitMemOp_Store, \
7522 (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx, pCallEntry->idxInstr)
7523#endif
7524
7525
7526
7527/*********************************************************************************************************************************
7528* Stack Accesses. *
7529*********************************************************************************************************************************/
7530/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, fSReg, 0) */
7531#define IEM_MC_PUSH_U16(a_u16Value) \
7532 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
7533 (uintptr_t)iemNativeHlpStackStoreU16, pCallEntry->idxInstr)
7534#define IEM_MC_PUSH_U32(a_u32Value) \
7535 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
7536 (uintptr_t)iemNativeHlpStackStoreU32, pCallEntry->idxInstr)
7537#define IEM_MC_PUSH_U32_SREG(a_uSegVal) \
7538 off = iemNativeEmitStackPush(pReNative, off, a_uSegVal, RT_MAKE_U32_FROM_U8(32, 0, 1, 0), \
7539 (uintptr_t)iemNativeHlpStackStoreU32SReg, pCallEntry->idxInstr)
7540#define IEM_MC_PUSH_U64(a_u64Value) \
7541 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
7542 (uintptr_t)iemNativeHlpStackStoreU64, pCallEntry->idxInstr)
7543
7544#define IEM_MC_FLAT32_PUSH_U16(a_u16Value) \
7545 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
7546 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
7547#define IEM_MC_FLAT32_PUSH_U32(a_u32Value) \
7548 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
7549 (uintptr_t)iemNativeHlpStackFlatStoreU32, pCallEntry->idxInstr)
7550#define IEM_MC_FLAT32_PUSH_U32_SREG(a_u32Value) \
7551 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 1, 0), \
7552 (uintptr_t)iemNativeHlpStackFlatStoreU32SReg, pCallEntry->idxInstr)
7553
7554#define IEM_MC_FLAT64_PUSH_U16(a_u16Value) \
7555 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
7556 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
7557#define IEM_MC_FLAT64_PUSH_U64(a_u64Value) \
7558 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
7559 (uintptr_t)iemNativeHlpStackFlatStoreU64, pCallEntry->idxInstr)
7560
7561
7562/** IEM_MC[|_FLAT32|_FLAT64]_PUSH_U16/32/32_SREG/64 */
7563DECL_INLINE_THROW(uint32_t)
7564iemNativeEmitStackPush(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue,
7565 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
7566{
7567 /*
7568 * Assert sanity.
7569 */
7570 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
7571 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
7572#ifdef VBOX_STRICT
7573 if (RT_BYTE2(cBitsVarAndFlat) != 0)
7574 {
7575 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
7576 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
7577 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
7578 Assert( pfnFunction
7579 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
7580 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
7581 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 1, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32SReg
7582 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
7583 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
7584 : UINT64_C(0xc000b000a0009000) ));
7585 }
7586 else
7587 Assert( pfnFunction
7588 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
7589 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
7590 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 1, 0) ? (uintptr_t)iemNativeHlpStackStoreU32SReg
7591 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
7592 : UINT64_C(0xc000b000a0009000) ));
7593#endif
7594
7595#ifdef VBOX_STRICT
7596 /*
7597 * Check that the fExec flags we've got make sense.
7598 */
7599 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
7600#endif
7601
7602 /*
7603 * To keep things simple we have to commit any pending writes first as we
7604 * may end up making calls.
7605 */
7606 /** @todo we could postpone this till we make the call and reload the
7607 * registers after returning from the call. Not sure if that's sensible or
7608 * not, though. */
7609 off = iemNativeRegFlushPendingWrites(pReNative, off);
7610
7611 /*
7612 * First we calculate the new RSP and the effective stack pointer value.
7613 * For 64-bit mode and flat 32-bit these two are the same.
7614 * (Code structure is very similar to that of PUSH)
7615 */
7616 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
7617 bool const fIsSegReg = RT_BYTE3(cBitsVarAndFlat) != 0;
7618 bool const fIsIntelSeg = fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
7619 uint8_t const cbMemAccess = !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
7620 ? cbMem : sizeof(uint16_t);
7621 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
7622 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
7623 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
7624 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
7625 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
7626 if (cBitsFlat != 0)
7627 {
7628 Assert(idxRegEffSp == idxRegRsp);
7629 Assert(cBitsFlat == 32 || cBitsFlat == 64);
7630 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
7631 if (cBitsFlat == 64)
7632 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
7633 else
7634 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
7635 }
7636 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
7637 {
7638 Assert(idxRegEffSp != idxRegRsp);
7639 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
7640 kIemNativeGstRegUse_ReadOnly);
7641#ifdef RT_ARCH_AMD64
7642 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
7643#else
7644 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
7645#endif
7646 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
7647 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
7648 offFixupJumpToUseOtherBitSp = off;
7649 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
7650 {
7651 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
7652 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
7653 }
7654 else
7655 {
7656 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
7657 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
7658 }
7659 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7660 }
7661 /* SpUpdateEnd: */
7662 uint32_t const offLabelSpUpdateEnd = off;
7663
7664 /*
7665 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
7666 * we're skipping lookup).
7667 */
7668 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
7669 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
7670 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
7671 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
7672 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
7673 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
7674 : UINT32_MAX;
7675 uint8_t const idxRegValue = !TlbState.fSkip
7676 && pVarValue->enmKind != kIemNativeVarKind_Immediate
7677 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/,
7678 IEMNATIVE_CALL_ARG2_GREG /*idxRegPref*/)
7679 : UINT8_MAX;
7680 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
7681
7682
7683 if (!TlbState.fSkip)
7684 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
7685 else
7686 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
7687
7688 /*
7689 * Use16BitSp:
7690 */
7691 if (cBitsFlat == 0)
7692 {
7693#ifdef RT_ARCH_AMD64
7694 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
7695#else
7696 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
7697#endif
7698 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
7699 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
7700 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
7701 else
7702 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
7703 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
7704 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7705 }
7706
7707 /*
7708 * TlbMiss:
7709 *
7710 * Call helper to do the pushing.
7711 */
7712 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
7713
7714#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7715 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7716#else
7717 RT_NOREF(idxInstr);
7718#endif
7719
7720 /* Save variables in volatile registers. */
7721 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
7722 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
7723 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
7724 | (idxRegValue < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegValue) : 0);
7725 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
7726
7727 if ( idxRegValue == IEMNATIVE_CALL_ARG1_GREG
7728 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
7729 {
7730 /* Swap them using ARG0 as temp register: */
7731 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
7732 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
7733 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
7734 }
7735 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
7736 {
7737 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue (first!) */
7738 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue,
7739 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
7740
7741 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
7742 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
7743 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
7744 }
7745 else
7746 {
7747 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
7748 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
7749
7750 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue */
7751 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue, 0 /*offAddend*/,
7752 IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG));
7753 }
7754
7755 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
7756 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7757
7758 /* Done setting up parameters, make the call. */
7759 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
7760
7761 /* Restore variables and guest shadow registers to volatile registers. */
7762 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
7763 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
7764
7765#ifdef IEMNATIVE_WITH_TLB_LOOKUP
7766 if (!TlbState.fSkip)
7767 {
7768 /* end of TlbMiss - Jump to the done label. */
7769 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
7770 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
7771
7772 /*
7773 * TlbLookup:
7774 */
7775 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1,
7776 IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
7777
7778 /*
7779 * Emit code to do the actual storing / fetching.
7780 */
7781 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
7782# ifdef IEM_WITH_TLB_STATISTICS
7783 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
7784 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
7785# endif
7786 if (idxRegValue != UINT8_MAX)
7787 {
7788 switch (cbMemAccess)
7789 {
7790 case 2:
7791 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
7792 break;
7793 case 4:
7794 if (!fIsIntelSeg)
7795 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
7796 else
7797 {
7798 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
7799 PUSH FS in real mode, so we have to try emulate that here.
7800 We borrow the now unused idxReg1 from the TLB lookup code here. */
7801 uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
7802 kIemNativeGstReg_EFlags);
7803 if (idxRegEfl != UINT8_MAX)
7804 {
7805#ifdef ARCH_AMD64
7806 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
7807 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
7808 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
7809#else
7810 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
7811 off, TlbState.idxReg1, idxRegEfl,
7812 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
7813#endif
7814 iemNativeRegFreeTmp(pReNative, idxRegEfl);
7815 }
7816 else
7817 {
7818 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
7819 RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
7820 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
7821 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
7822 }
7823 /* ASSUMES the upper half of idxRegValue is ZERO. */
7824 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegValue);
7825 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
7826 }
7827 break;
7828 case 8:
7829 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
7830 break;
7831 default:
7832 AssertFailed();
7833 }
7834 }
7835 else
7836 {
7837 switch (cbMemAccess)
7838 {
7839 case 2:
7840 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
7841 idxRegMemResult, TlbState.idxReg1);
7842 break;
7843 case 4:
7844 Assert(!fIsSegReg);
7845 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
7846 idxRegMemResult, TlbState.idxReg1);
7847 break;
7848 case 8:
7849 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue, idxRegMemResult, TlbState.idxReg1);
7850 break;
7851 default:
7852 AssertFailed();
7853 }
7854 }
7855
7856 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
7857 TlbState.freeRegsAndReleaseVars(pReNative);
7858
7859 /*
7860 * TlbDone:
7861 *
7862 * Commit the new RSP value.
7863 */
7864 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
7865 }
7866#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
7867
7868#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
7869 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
7870#endif
7871 iemNativeRegFreeTmp(pReNative, idxRegRsp);
7872 if (idxRegEffSp != idxRegRsp)
7873 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
7874
7875 /* The value variable is implictly flushed. */
7876 if (idxRegValue != UINT8_MAX)
7877 iemNativeVarRegisterRelease(pReNative, idxVarValue);
7878 iemNativeVarFreeLocal(pReNative, idxVarValue);
7879
7880 return off;
7881}
7882
7883
7884
7885/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, 0, 0) */
7886#define IEM_MC_POP_GREG_U16(a_iGReg) \
7887 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
7888 (uintptr_t)iemNativeHlpStackFetchU16, pCallEntry->idxInstr)
7889#define IEM_MC_POP_GREG_U32(a_iGReg) \
7890 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
7891 (uintptr_t)iemNativeHlpStackFetchU32, pCallEntry->idxInstr)
7892#define IEM_MC_POP_GREG_U64(a_iGReg) \
7893 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
7894 (uintptr_t)iemNativeHlpStackFetchU64, pCallEntry->idxInstr)
7895
7896#define IEM_MC_FLAT32_POP_GREG_U16(a_iGReg) \
7897 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
7898 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
7899#define IEM_MC_FLAT32_POP_GREG_U32(a_iGReg) \
7900 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
7901 (uintptr_t)iemNativeHlpStackFlatFetchU32, pCallEntry->idxInstr)
7902
7903#define IEM_MC_FLAT64_POP_GREG_U16(a_iGReg) \
7904 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
7905 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
7906#define IEM_MC_FLAT64_POP_GREG_U64(a_iGReg) \
7907 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
7908 (uintptr_t)iemNativeHlpStackFlatFetchU64, pCallEntry->idxInstr)
7909
7910
7911DECL_FORCE_INLINE_THROW(uint32_t)
7912iemNativeEmitStackPopUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
7913 uint8_t idxRegTmp)
7914{
7915 /* Use16BitSp: */
7916#ifdef RT_ARCH_AMD64
7917 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
7918 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
7919 RT_NOREF(idxRegTmp);
7920#else
7921 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
7922 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
7923 /* add tmp, regrsp, #cbMem */
7924 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbMem, false /*f64Bit*/);
7925 /* and tmp, tmp, #0xffff */
7926 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
7927 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
7928 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
7929 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
7930#endif
7931 return off;
7932}
7933
7934
7935DECL_FORCE_INLINE(uint32_t)
7936iemNativeEmitStackPopUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
7937{
7938 /* Use32BitSp: */
7939 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
7940 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
7941 return off;
7942}
7943
7944
7945/** IEM_MC[|_FLAT32|_FLAT64]_POP_GREG_U16/32/64 */
7946DECL_INLINE_THROW(uint32_t)
7947iemNativeEmitStackPopGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGReg,
7948 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
7949{
7950 /*
7951 * Assert sanity.
7952 */
7953 Assert(idxGReg < 16);
7954#ifdef VBOX_STRICT
7955 if (RT_BYTE2(cBitsVarAndFlat) != 0)
7956 {
7957 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
7958 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
7959 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
7960 Assert( pfnFunction
7961 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
7962 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU32
7963 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
7964 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU64
7965 : UINT64_C(0xc000b000a0009000) ));
7966 }
7967 else
7968 Assert( pfnFunction
7969 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU16
7970 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU32
7971 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU64
7972 : UINT64_C(0xc000b000a0009000) ));
7973#endif
7974
7975#ifdef VBOX_STRICT
7976 /*
7977 * Check that the fExec flags we've got make sense.
7978 */
7979 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
7980#endif
7981
7982 /*
7983 * To keep things simple we have to commit any pending writes first as we
7984 * may end up making calls.
7985 */
7986 off = iemNativeRegFlushPendingWrites(pReNative, off);
7987
7988 /*
7989 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
7990 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
7991 * directly as the effective stack pointer.
7992 * (Code structure is very similar to that of PUSH)
7993 */
7994 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
7995 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
7996 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
7997 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
7998 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
7999 /** @todo can do a better job picking the register here. For cbMem >= 4 this
8000 * will be the resulting register value. */
8001 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
8002
8003 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
8004 if (cBitsFlat != 0)
8005 {
8006 Assert(idxRegEffSp == idxRegRsp);
8007 Assert(cBitsFlat == 32 || cBitsFlat == 64);
8008 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
8009 }
8010 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
8011 {
8012 Assert(idxRegEffSp != idxRegRsp);
8013 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
8014 kIemNativeGstRegUse_ReadOnly);
8015#ifdef RT_ARCH_AMD64
8016 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8017#else
8018 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
8019#endif
8020 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
8021 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
8022 offFixupJumpToUseOtherBitSp = off;
8023 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
8024 {
8025/** @todo can skip idxRegRsp updating when popping ESP. */
8026 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
8027 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8028 }
8029 else
8030 {
8031 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
8032 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
8033 }
8034 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8035 }
8036 /* SpUpdateEnd: */
8037 uint32_t const offLabelSpUpdateEnd = off;
8038
8039 /*
8040 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
8041 * we're skipping lookup).
8042 */
8043 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
8044 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
8045 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
8046 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
8047 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
8048 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
8049 : UINT32_MAX;
8050
8051 if (!TlbState.fSkip)
8052 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
8053 else
8054 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
8055
8056 /*
8057 * Use16BitSp:
8058 */
8059 if (cBitsFlat == 0)
8060 {
8061#ifdef RT_ARCH_AMD64
8062 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8063#else
8064 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
8065#endif
8066 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
8067 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
8068 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
8069 else
8070 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8071 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
8072 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8073 }
8074
8075 /*
8076 * TlbMiss:
8077 *
8078 * Call helper to do the pushing.
8079 */
8080 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
8081
8082#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8083 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8084#else
8085 RT_NOREF(idxInstr);
8086#endif
8087
8088 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
8089 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
8090 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
8091 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
8092
8093
8094 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
8095 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
8096 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
8097
8098 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
8099 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
8100
8101 /* Done setting up parameters, make the call. */
8102 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
8103
8104 /* Move the return register content to idxRegMemResult. */
8105 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
8106 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
8107
8108 /* Restore variables and guest shadow registers to volatile registers. */
8109 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
8110 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
8111
8112#ifdef IEMNATIVE_WITH_TLB_LOOKUP
8113 if (!TlbState.fSkip)
8114 {
8115 /* end of TlbMiss - Jump to the done label. */
8116 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
8117 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
8118
8119 /*
8120 * TlbLookup:
8121 */
8122 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
8123 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
8124
8125 /*
8126 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
8127 */
8128 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8129# ifdef IEM_WITH_TLB_STATISTICS
8130 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
8131 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
8132# endif
8133 switch (cbMem)
8134 {
8135 case 2:
8136 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
8137 break;
8138 case 4:
8139 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
8140 break;
8141 case 8:
8142 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
8143 break;
8144 default:
8145 AssertFailed();
8146 }
8147
8148 TlbState.freeRegsAndReleaseVars(pReNative);
8149
8150 /*
8151 * TlbDone:
8152 *
8153 * Set the new RSP value (FLAT accesses needs to calculate it first) and
8154 * commit the popped register value.
8155 */
8156 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
8157 }
8158#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
8159
8160 if (idxGReg != X86_GREG_xSP)
8161 {
8162 /* Set the register. */
8163 if (cbMem >= sizeof(uint32_t))
8164 {
8165#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
8166 AssertMsg( pReNative->idxCurCall == 0
8167 || IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))),
8168 ("%s - %u\n", g_aGstShadowInfo[idxGReg].pszName,
8169 iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))));
8170#endif
8171 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, IEMNATIVEGSTREG_GPR(idxGReg), off);
8172#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8173 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(idxGReg);
8174#endif
8175#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8176 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult,
8177 RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
8178#endif
8179 }
8180 else
8181 {
8182 Assert(cbMem == sizeof(uint16_t));
8183 uint8_t const idxRegDst = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGReg),
8184 kIemNativeGstRegUse_ForUpdate);
8185 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegDst, idxRegMemResult);
8186#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8187 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegDst, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
8188#endif
8189 iemNativeRegFreeTmp(pReNative, idxRegDst);
8190 }
8191
8192 /* Complete RSP calculation for FLAT mode. */
8193 if (idxRegEffSp == idxRegRsp)
8194 {
8195 if (cBitsFlat == 64)
8196 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, cbMem);
8197 else
8198 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, cbMem);
8199 }
8200 }
8201 else
8202 {
8203 /* We're popping RSP, ESP or SP. Only the is a bit extra work, of course. */
8204 if (cbMem == sizeof(uint64_t))
8205 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRsp, idxRegMemResult);
8206 else if (cbMem == sizeof(uint32_t))
8207 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRsp, idxRegMemResult);
8208 else
8209 {
8210 if (idxRegEffSp == idxRegRsp)
8211 {
8212 if (cBitsFlat == 64)
8213 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, cbMem);
8214 else
8215 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, cbMem);
8216 }
8217 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegRsp, idxRegMemResult);
8218 }
8219 }
8220
8221#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8222 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
8223#endif
8224
8225 iemNativeRegFreeTmp(pReNative, idxRegRsp);
8226 if (idxRegEffSp != idxRegRsp)
8227 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
8228 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
8229
8230 return off;
8231}
8232
8233
8234
8235/*********************************************************************************************************************************
8236* Memory mapping (IEM_MEM_MAP_XXX, IEM_MEM_FLAT_MAP_XXX). *
8237*********************************************************************************************************************************/
8238
8239#define IEM_MC_MEM_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8240 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
8241 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMaskAndCtl*/, \
8242 (uintptr_t)iemNativeHlpMemMapDataU8Atomic, pCallEntry->idxInstr)
8243
8244#define IEM_MC_MEM_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8245 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
8246 IEM_ACCESS_DATA_RW, 0 /*fAlignMaskAndCtl*/, \
8247 (uintptr_t)iemNativeHlpMemMapDataU8Rw, pCallEntry->idxInstr)
8248
8249#define IEM_MC_MEM_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8250 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
8251 IEM_ACCESS_DATA_W, 0 /*fAlignMaskAndCtl*/, \
8252 (uintptr_t)iemNativeHlpMemMapDataU8Wo, pCallEntry->idxInstr) \
8253
8254#define IEM_MC_MEM_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8255 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
8256 IEM_ACCESS_DATA_R, 0 /*fAlignMaskAndCtl*/, \
8257 (uintptr_t)iemNativeHlpMemMapDataU8Ro, pCallEntry->idxInstr)
8258
8259
8260#define IEM_MC_MEM_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8261 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
8262 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8263 (uintptr_t)iemNativeHlpMemMapDataU16Atomic, pCallEntry->idxInstr)
8264
8265#define IEM_MC_MEM_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8266 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
8267 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8268 (uintptr_t)iemNativeHlpMemMapDataU16Rw, pCallEntry->idxInstr)
8269
8270#define IEM_MC_MEM_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8271 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
8272 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8273 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
8274
8275#define IEM_MC_MEM_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8276 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
8277 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8278 (uintptr_t)iemNativeHlpMemMapDataU16Ro, pCallEntry->idxInstr)
8279
8280#define IEM_MC_MEM_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8281 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int16_t), \
8282 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8283 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
8284
8285
8286#define IEM_MC_MEM_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8287 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
8288 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8289 (uintptr_t)iemNativeHlpMemMapDataU32Atomic, pCallEntry->idxInstr)
8290
8291#define IEM_MC_MEM_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8292 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
8293 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8294 (uintptr_t)iemNativeHlpMemMapDataU32Rw, pCallEntry->idxInstr)
8295
8296#define IEM_MC_MEM_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8297 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
8298 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8299 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
8300
8301#define IEM_MC_MEM_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8302 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
8303 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8304 (uintptr_t)iemNativeHlpMemMapDataU32Ro, pCallEntry->idxInstr)
8305
8306#define IEM_MC_MEM_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8307 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int32_t), \
8308 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8309 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
8310
8311
8312#define IEM_MC_MEM_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8313 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
8314 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8315 (uintptr_t)iemNativeHlpMemMapDataU64Atomic, pCallEntry->idxInstr)
8316
8317#define IEM_MC_MEM_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8318 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
8319 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8320 (uintptr_t)iemNativeHlpMemMapDataU64Rw, pCallEntry->idxInstr)
8321#define IEM_MC_MEM_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8322 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
8323 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8324 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
8325
8326#define IEM_MC_MEM_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8327 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
8328 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8329 (uintptr_t)iemNativeHlpMemMapDataU64Ro, pCallEntry->idxInstr)
8330
8331#define IEM_MC_MEM_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8332 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int64_t), \
8333 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8334 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
8335
8336
8337#define IEM_MC_MEM_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8338 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
8339 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8340 (uintptr_t)iemNativeHlpMemMapDataR80Wo, pCallEntry->idxInstr) \
8341
8342#define IEM_MC_MEM_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8343 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
8344 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, /** @todo check BCD align */ \
8345 (uintptr_t)iemNativeHlpMemMapDataD80Wo, pCallEntry->idxInstr) \
8346
8347
8348#define IEM_MC_MEM_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8349 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
8350 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8351 (uintptr_t)iemNativeHlpMemMapDataU128Atomic, pCallEntry->idxInstr)
8352
8353#define IEM_MC_MEM_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8354 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
8355 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8356 (uintptr_t)iemNativeHlpMemMapDataU128Rw, pCallEntry->idxInstr)
8357
8358#define IEM_MC_MEM_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8359 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
8360 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8361 (uintptr_t)iemNativeHlpMemMapDataU128Wo, pCallEntry->idxInstr) \
8362
8363#define IEM_MC_MEM_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8364 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
8365 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8366 (uintptr_t)iemNativeHlpMemMapDataU128Ro, pCallEntry->idxInstr)
8367
8368
8369
8370#define IEM_MC_MEM_FLAT_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
8371 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
8372 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMaskAndCtl*/, \
8373 (uintptr_t)iemNativeHlpMemFlatMapDataU8Atomic, pCallEntry->idxInstr)
8374
8375#define IEM_MC_MEM_FLAT_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
8376 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
8377 IEM_ACCESS_DATA_RW, 0 /*fAlignMaskAndCtl*/, \
8378 (uintptr_t)iemNativeHlpMemFlatMapDataU8Rw, pCallEntry->idxInstr)
8379
8380#define IEM_MC_MEM_FLAT_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
8381 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
8382 IEM_ACCESS_DATA_W, 0 /*fAlignMaskAndCtl*/, \
8383 (uintptr_t)iemNativeHlpMemFlatMapDataU8Wo, pCallEntry->idxInstr) \
8384
8385#define IEM_MC_MEM_FLAT_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
8386 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
8387 IEM_ACCESS_DATA_R, 0 /*fAlignMaskAndCtl*/, \
8388 (uintptr_t)iemNativeHlpMemFlatMapDataU8Ro, pCallEntry->idxInstr)
8389
8390
8391#define IEM_MC_MEM_FLAT_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
8392 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
8393 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8394 (uintptr_t)iemNativeHlpMemFlatMapDataU16Atomic, pCallEntry->idxInstr)
8395
8396#define IEM_MC_MEM_FLAT_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
8397 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
8398 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8399 (uintptr_t)iemNativeHlpMemFlatMapDataU16Rw, pCallEntry->idxInstr)
8400
8401#define IEM_MC_MEM_FLAT_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
8402 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
8403 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8404 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
8405
8406#define IEM_MC_MEM_FLAT_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
8407 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
8408 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8409 (uintptr_t)iemNativeHlpMemFlatMapDataU16Ro, pCallEntry->idxInstr)
8410
8411#define IEM_MC_MEM_FLAT_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_GCPtrMem) \
8412 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int16_t), \
8413 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8414 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
8415
8416
8417#define IEM_MC_MEM_FLAT_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
8418 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
8419 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8420 (uintptr_t)iemNativeHlpMemFlatMapDataU32Atomic, pCallEntry->idxInstr)
8421
8422#define IEM_MC_MEM_FLAT_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
8423 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
8424 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8425 (uintptr_t)iemNativeHlpMemFlatMapDataU32Rw, pCallEntry->idxInstr)
8426
8427#define IEM_MC_MEM_FLAT_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
8428 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
8429 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8430 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
8431
8432#define IEM_MC_MEM_FLAT_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
8433 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
8434 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8435 (uintptr_t)iemNativeHlpMemFlatMapDataU32Ro, pCallEntry->idxInstr)
8436
8437#define IEM_MC_MEM_FLAT_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_GCPtrMem) \
8438 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int32_t), \
8439 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8440 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
8441
8442
8443#define IEM_MC_MEM_FLAT_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
8444 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
8445 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8446 (uintptr_t)iemNativeHlpMemFlatMapDataU64Atomic, pCallEntry->idxInstr)
8447
8448#define IEM_MC_MEM_FLAT_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
8449 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
8450 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8451 (uintptr_t)iemNativeHlpMemFlatMapDataU64Rw, pCallEntry->idxInstr)
8452
8453#define IEM_MC_MEM_FLAT_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
8454 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
8455 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8456 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
8457
8458#define IEM_MC_MEM_FLAT_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
8459 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
8460 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8461 (uintptr_t)iemNativeHlpMemFlatMapDataU64Ro, pCallEntry->idxInstr)
8462
8463#define IEM_MC_MEM_FLAT_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_GCPtrMem) \
8464 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int64_t), \
8465 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8466 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
8467
8468
8469#define IEM_MC_MEM_FLAT_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_GCPtrMem) \
8470 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
8471 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8472 (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo, pCallEntry->idxInstr) \
8473
8474#define IEM_MC_MEM_FLAT_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_GCPtrMem) \
8475 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
8476 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, /** @todo check BCD align */ \
8477 (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo, pCallEntry->idxInstr) \
8478
8479
8480#define IEM_MC_MEM_FLAT_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
8481 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
8482 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8483 (uintptr_t)iemNativeHlpMemFlatMapDataU128Atomic, pCallEntry->idxInstr)
8484
8485#define IEM_MC_MEM_FLAT_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
8486 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
8487 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8488 (uintptr_t)iemNativeHlpMemFlatMapDataU128Rw, pCallEntry->idxInstr)
8489
8490#define IEM_MC_MEM_FLAT_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
8491 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
8492 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8493 (uintptr_t)iemNativeHlpMemFlatMapDataU128Wo, pCallEntry->idxInstr) \
8494
8495#define IEM_MC_MEM_FLAT_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
8496 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
8497 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8498 (uintptr_t)iemNativeHlpMemFlatMapDataU128Ro, pCallEntry->idxInstr)
8499
8500
8501DECL_INLINE_THROW(uint32_t)
8502iemNativeEmitMemMapCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarMem, uint8_t idxVarUnmapInfo,
8503 uint8_t iSegReg, uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAccess, uint32_t fAlignMaskAndCtl,
8504 uintptr_t pfnFunction, uint8_t idxInstr)
8505{
8506 /*
8507 * Assert sanity.
8508 */
8509 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarMem);
8510 PIEMNATIVEVAR const pVarMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarMem)];
8511 AssertStmt( pVarMem->enmKind == kIemNativeVarKind_Invalid
8512 && pVarMem->cbVar == sizeof(void *),
8513 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8514
8515 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
8516 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
8517 AssertStmt( pVarUnmapInfo->enmKind == kIemNativeVarKind_Invalid
8518 && pVarUnmapInfo->cbVar == sizeof(uint8_t),
8519 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8520
8521 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
8522 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
8523 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
8524 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
8525 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
8526
8527 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
8528
8529 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
8530
8531#ifdef VBOX_STRICT
8532# define IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) \
8533 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ) \
8534 ? (uintptr_t)RT_CONCAT(a_fnBase,Rw) \
8535 : ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == IEM_ACCESS_TYPE_READ \
8536 ? (uintptr_t)RT_CONCAT(a_fnBase,Ro) : (uintptr_t)RT_CONCAT(a_fnBase,Wo) )
8537# define IEM_MAP_HLP_FN(a_fAccess, a_fnBase) \
8538 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ | IEM_ACCESS_ATOMIC) \
8539 ? (uintptr_t)RT_CONCAT(a_fnBase,Atomic) \
8540 : IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) )
8541
8542 if (iSegReg == UINT8_MAX)
8543 {
8544 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
8545 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
8546 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
8547 switch (cbMem)
8548 {
8549 case 1:
8550 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU8));
8551 Assert(!fAlignMaskAndCtl);
8552 break;
8553 case 2:
8554 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU16));
8555 Assert(fAlignMaskAndCtl < 2);
8556 break;
8557 case 4:
8558 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU32));
8559 Assert(fAlignMaskAndCtl < 4);
8560 break;
8561 case 8:
8562 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU64));
8563 Assert(fAlignMaskAndCtl < 8);
8564 break;
8565 case 10:
8566 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo
8567 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo);
8568 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
8569 Assert(fAlignMaskAndCtl < 8);
8570 break;
8571 case 16:
8572 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU128));
8573 Assert(fAlignMaskAndCtl < 16);
8574 break;
8575# if 0
8576 case 32:
8577 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU256));
8578 Assert(fAlignMaskAndCtl < 32);
8579 break;
8580 case 64:
8581 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU512));
8582 Assert(fAlignMaskAndCtl < 64);
8583 break;
8584# endif
8585 default: AssertFailed(); break;
8586 }
8587 }
8588 else
8589 {
8590 Assert(iSegReg < 6);
8591 switch (cbMem)
8592 {
8593 case 1:
8594 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU8));
8595 Assert(!fAlignMaskAndCtl);
8596 break;
8597 case 2:
8598 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU16));
8599 Assert(fAlignMaskAndCtl < 2);
8600 break;
8601 case 4:
8602 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU32));
8603 Assert(fAlignMaskAndCtl < 4);
8604 break;
8605 case 8:
8606 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU64));
8607 Assert(fAlignMaskAndCtl < 8);
8608 break;
8609 case 10:
8610 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemMapDataR80Wo
8611 || pfnFunction == (uintptr_t)iemNativeHlpMemMapDataD80Wo);
8612 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
8613 Assert(fAlignMaskAndCtl < 8);
8614 break;
8615 case 16:
8616 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU128));
8617 Assert(fAlignMaskAndCtl < 16);
8618 break;
8619# if 0
8620 case 32:
8621 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU256));
8622 Assert(fAlignMaskAndCtl < 32);
8623 break;
8624 case 64:
8625 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU512));
8626 Assert(fAlignMaskAndCtl < 64);
8627 break;
8628# endif
8629 default: AssertFailed(); break;
8630 }
8631 }
8632# undef IEM_MAP_HLP_FN
8633# undef IEM_MAP_HLP_FN_NO_AT
8634#endif
8635
8636#ifdef VBOX_STRICT
8637 /*
8638 * Check that the fExec flags we've got make sense.
8639 */
8640 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
8641#endif
8642
8643 /*
8644 * To keep things simple we have to commit any pending writes first as we
8645 * may end up making calls.
8646 */
8647 off = iemNativeRegFlushPendingWrites(pReNative, off);
8648
8649#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
8650 /*
8651 * Move/spill/flush stuff out of call-volatile registers.
8652 * This is the easy way out. We could contain this to the tlb-miss branch
8653 * by saving and restoring active stuff here.
8654 */
8655 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
8656 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
8657#endif
8658
8659 /* The bUnmapInfo variable will get a register in the tlb-hit code path,
8660 while the tlb-miss codepath will temporarily put it on the stack.
8661 Set the the type to stack here so we don't need to do it twice below. */
8662 iemNativeVarSetKindToStack(pReNative, idxVarUnmapInfo);
8663 uint8_t const idxRegUnmapInfo = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off);
8664 /** @todo use a tmp register from TlbState, since they'll be free after tlb
8665 * lookup is done. */
8666
8667 /*
8668 * Define labels and allocate the result register (trying for the return
8669 * register if we can).
8670 */
8671 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
8672 uint8_t const idxRegMemResult = !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
8673 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarMem, IEMNATIVE_CALL_RET_GREG, &off)
8674 : iemNativeVarRegisterAcquire(pReNative, idxVarMem, &off);
8675 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem);
8676 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
8677 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
8678 : UINT32_MAX;
8679
8680 /*
8681 * Jump to the TLB lookup code.
8682 */
8683 if (!TlbState.fSkip)
8684 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
8685
8686 /*
8687 * TlbMiss:
8688 *
8689 * Call helper to do the fetching.
8690 * We flush all guest register shadow copies here.
8691 */
8692 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
8693
8694#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8695 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8696#else
8697 RT_NOREF(idxInstr);
8698#endif
8699
8700#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
8701 /* Save variables in volatile registers. */
8702 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave() | RT_BIT_32(idxRegMemResult) | RT_BIT_32(idxRegUnmapInfo);
8703 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
8704#endif
8705
8706 /* IEMNATIVE_CALL_ARG2_GREG = GCPtrMem - load first as it is from a variable. */
8707 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarGCPtrMem, 0 /*cbAppend*/,
8708#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
8709 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
8710#else
8711 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
8712#endif
8713
8714 /* IEMNATIVE_CALL_ARG3_GREG = iSegReg */
8715 if (iSegReg != UINT8_MAX)
8716 {
8717 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
8718 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, iSegReg);
8719 }
8720
8721 /* IEMNATIVE_CALL_ARG1_GREG = &idxVarUnmapInfo; stackslot address, load any register with result after the call. */
8722 int32_t const offBpDispVarUnmapInfo = iemNativeStackCalcBpDisp(iemNativeVarGetStackSlot(pReNative, idxVarUnmapInfo));
8723 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, offBpDispVarUnmapInfo);
8724
8725 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
8726 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
8727
8728 /* Done setting up parameters, make the call. */
8729 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
8730
8731 /*
8732 * Put the output in the right registers.
8733 */
8734 Assert(idxRegMemResult == pVarMem->idxReg);
8735 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
8736 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
8737
8738#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
8739 /* Restore variables and guest shadow registers to volatile registers. */
8740 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
8741 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
8742#endif
8743
8744 Assert(pVarUnmapInfo->idxReg == idxRegUnmapInfo);
8745 off = iemNativeEmitLoadGprByBpU8(pReNative, off, idxRegUnmapInfo, offBpDispVarUnmapInfo);
8746
8747#ifdef IEMNATIVE_WITH_TLB_LOOKUP
8748 if (!TlbState.fSkip)
8749 {
8750 /* end of tlbsmiss - Jump to the done label. */
8751 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
8752 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
8753
8754 /*
8755 * TlbLookup:
8756 */
8757 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMaskAndCtl, fAccess,
8758 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
8759# ifdef IEM_WITH_TLB_STATISTICS
8760 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, TlbState.idxReg1, TlbState.idxReg2,
8761 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForMapped));
8762# endif
8763
8764 /* [idxVarUnmapInfo] = 0; */
8765 off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegUnmapInfo, 0);
8766
8767 /*
8768 * TlbDone:
8769 */
8770 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
8771
8772 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
8773
8774# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
8775 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
8776 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
8777# endif
8778 }
8779#else
8780 RT_NOREF(fAccess, fAlignMaskAndCtl, idxLabelTlbMiss);
8781#endif
8782
8783 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
8784 iemNativeVarRegisterRelease(pReNative, idxVarMem);
8785
8786 return off;
8787}
8788
8789
8790#define IEM_MC_MEM_COMMIT_AND_UNMAP_ATOMIC(a_bMapInfo) \
8791 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_ATOMIC, \
8792 (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic, pCallEntry->idxInstr)
8793
8794#define IEM_MC_MEM_COMMIT_AND_UNMAP_RW(a_bMapInfo) \
8795 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_RW, \
8796 (uintptr_t)iemNativeHlpMemCommitAndUnmapRw, pCallEntry->idxInstr)
8797
8798#define IEM_MC_MEM_COMMIT_AND_UNMAP_WO(a_bMapInfo) \
8799 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_W, \
8800 (uintptr_t)iemNativeHlpMemCommitAndUnmapWo, pCallEntry->idxInstr)
8801
8802#define IEM_MC_MEM_COMMIT_AND_UNMAP_RO(a_bMapInfo) \
8803 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_R, \
8804 (uintptr_t)iemNativeHlpMemCommitAndUnmapRo, pCallEntry->idxInstr)
8805
8806DECL_INLINE_THROW(uint32_t)
8807iemNativeEmitMemCommitAndUnmap(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarUnmapInfo,
8808 uint32_t fAccess, uintptr_t pfnFunction, uint8_t idxInstr)
8809{
8810 /*
8811 * Assert sanity.
8812 */
8813 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
8814#if defined(VBOX_STRICT) || defined(RT_ARCH_AMD64)
8815 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
8816#endif
8817 Assert(pVarUnmapInfo->enmKind == kIemNativeVarKind_Stack);
8818 Assert( pVarUnmapInfo->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
8819 || pVarUnmapInfo->idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS); /* must be initialized */
8820#ifdef VBOX_STRICT
8821 switch (fAccess & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC))
8822 {
8823 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_ATOMIC:
8824 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic); break;
8825 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE:
8826 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRw); break;
8827 case IEM_ACCESS_TYPE_WRITE:
8828 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapWo); break;
8829 case IEM_ACCESS_TYPE_READ:
8830 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRo); break;
8831 default: AssertFailed();
8832 }
8833#else
8834 RT_NOREF(fAccess);
8835#endif
8836
8837 /*
8838 * To keep things simple we have to commit any pending writes first as we
8839 * may end up making calls (there shouldn't be any at this point, so this
8840 * is just for consistency).
8841 */
8842 /** @todo we could postpone this till we make the call and reload the
8843 * registers after returning from the call. Not sure if that's sensible or
8844 * not, though. */
8845 off = iemNativeRegFlushPendingWrites(pReNative, off);
8846
8847 /*
8848 * Move/spill/flush stuff out of call-volatile registers.
8849 *
8850 * We exclude any register holding the bUnmapInfo variable, as we'll be
8851 * checking it after returning from the call and will free it afterwards.
8852 */
8853 /** @todo save+restore active registers and maybe guest shadows in miss
8854 * scenario. */
8855 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */,
8856 RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)));
8857
8858 /*
8859 * If idxVarUnmapInfo is zero, we can skip all this. Otherwise we'll have
8860 * to call the unmap helper function.
8861 *
8862 * The likelyhood of it being zero is higher than for the TLB hit when doing
8863 * the mapping, as a TLB miss for an well aligned and unproblematic memory
8864 * access should also end up with a mapping that won't need special unmapping.
8865 */
8866 /** @todo Go over iemMemMapJmp and implement the no-unmap-needed case! That
8867 * should speed up things for the pure interpreter as well when TLBs
8868 * are enabled. */
8869#ifdef RT_ARCH_AMD64
8870 if (pVarUnmapInfo->idxReg == UINT8_MAX)
8871 {
8872 /* test byte [rbp - xxx], 0ffh */
8873 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
8874 pbCodeBuf[off++] = 0xf6;
8875 uint8_t const idxStackSlot = pVarUnmapInfo->idxStackSlot;
8876 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, 0, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
8877 pbCodeBuf[off++] = 0xff;
8878 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8879 }
8880 else
8881#endif
8882 {
8883 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off,
8884 true /*fInitialized*/, IEMNATIVE_CALL_ARG1_GREG /*idxRegPref*/);
8885 off = iemNativeEmitTestAnyBitsInGpr8(pReNative, off, idxVarReg, 0xff);
8886 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
8887 }
8888 uint32_t const offJmpFixup = off;
8889 off = iemNativeEmitJzToFixed(pReNative, off, off /* ASSUME jz rel8 suffices*/);
8890
8891 /*
8892 * Call the unmap helper function.
8893 */
8894#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING /** @todo This should be unnecessary, the mapping call will already have set it! */
8895 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8896#else
8897 RT_NOREF(idxInstr);
8898#endif
8899
8900 /* IEMNATIVE_CALL_ARG1_GREG = idxVarUnmapInfo (first!) */
8901 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo,
8902 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
8903
8904 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
8905 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
8906
8907 /* Done setting up parameters, make the call. */
8908 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
8909
8910 /* The bUnmapInfo variable is implictly free by these MCs. */
8911 iemNativeVarFreeLocal(pReNative, idxVarUnmapInfo);
8912
8913 /*
8914 * Done, just fixup the jump for the non-call case.
8915 */
8916 iemNativeFixupFixedJump(pReNative, offJmpFixup, off);
8917
8918 return off;
8919}
8920
8921
8922
8923/*********************************************************************************************************************************
8924* State and Exceptions *
8925*********************************************************************************************************************************/
8926
8927#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
8928#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
8929
8930#define IEM_MC_PREPARE_SSE_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
8931#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
8932#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
8933
8934#define IEM_MC_PREPARE_AVX_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
8935#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
8936#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
8937
8938
8939DECL_INLINE_THROW(uint32_t) iemNativeEmitPrepareFpuForUse(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fForChange)
8940{
8941#ifndef IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS
8942 RT_NOREF(pReNative, fForChange);
8943#else
8944 if ( !(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SYNCED)
8945 && fForChange)
8946 {
8947# ifdef RT_ARCH_AMD64
8948
8949 /* Need to save the host MXCSR the first time, and clear the exception flags. */
8950 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED))
8951 {
8952 PIEMNATIVEINSTR pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
8953
8954 /* stmxcsr */
8955 if (IEMNATIVE_REG_FIXED_PVMCPU >= 8)
8956 pbCodeBuf[off++] = X86_OP_REX_B;
8957 pbCodeBuf[off++] = 0x0f;
8958 pbCodeBuf[off++] = 0xae;
8959 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, 3, IEMNATIVE_REG_FIXED_PVMCPU & 7);
8960 pbCodeBuf[off++] = RT_BYTE1(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
8961 pbCodeBuf[off++] = RT_BYTE2(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
8962 pbCodeBuf[off++] = RT_BYTE3(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
8963 pbCodeBuf[off++] = RT_BYTE4(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
8964 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8965
8966 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED;
8967 }
8968
8969 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
8970 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr, kIemNativeGstRegUse_ReadOnly);
8971
8972 /*
8973 * Mask any exceptions and clear the exception status and save into MXCSR,
8974 * taking a detour through memory here because ldmxcsr/stmxcsr don't support
8975 * a register source/target (sigh).
8976 */
8977 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, idxRegMxCsr);
8978 off = iemNativeEmitOrGpr32ByImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, X86_MXCSR_XCPT_MASK);
8979 off = iemNativeEmitAndGpr32ByImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, ~X86_MXCSR_XCPT_FLAGS);
8980 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
8981
8982 PIEMNATIVEINSTR pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
8983
8984 /* ldmxcsr */
8985 if (IEMNATIVE_REG_FIXED_PVMCPU >= 8)
8986 pbCodeBuf[off++] = X86_OP_REX_B;
8987 pbCodeBuf[off++] = 0x0f;
8988 pbCodeBuf[off++] = 0xae;
8989 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, 2, IEMNATIVE_REG_FIXED_PVMCPU & 7);
8990 pbCodeBuf[off++] = RT_BYTE1(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
8991 pbCodeBuf[off++] = RT_BYTE2(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
8992 pbCodeBuf[off++] = RT_BYTE3(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
8993 pbCodeBuf[off++] = RT_BYTE4(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
8994 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8995
8996 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
8997 iemNativeRegFreeTmp(pReNative, idxRegTmp);
8998
8999# elif defined(RT_ARCH_ARM64)
9000 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
9001
9002 /* Need to save the host floating point control register the first time, clear FPSR. */
9003 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED))
9004 {
9005 PIEMNATIVEINSTR pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
9006 pu32CodeBuf[off++] = Armv8A64MkInstrMsr(ARMV8_A64_REG_XZR, ARMV8_AARCH64_SYSREG_FPSR);
9007 pu32CodeBuf[off++] = Armv8A64MkInstrMrs(idxRegTmp, ARMV8_AARCH64_SYSREG_FPCR);
9008 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9009 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED;
9010 }
9011
9012 /*
9013 * Translate MXCSR to FPCR.
9014 *
9015 * Unfortunately we can't emulate the exact behavior of MXCSR as we can't take
9016 * FEAT_AFP on arm64 for granted (My M2 Macbook doesn't has it). So we can't map
9017 * MXCSR.DAZ to FPCR.FIZ and MXCSR.FZ to FPCR.FZ with FPCR.AH being set.
9018 * We can only use FPCR.FZ which will flush inputs _and_ output de-normals to zero.
9019 */
9020 /** @todo Check the host supported flags (needs additional work to get the host features from CPUM)
9021 * and implement alternate handling if FEAT_AFP is present. */
9022 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr, kIemNativeGstRegUse_ReadOnly);
9023
9024 PIEMNATIVEINSTR pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
9025
9026 /* First make sure that there is nothing set for the upper 16-bits (X86_MXCSR_MM, which we don't emulate right now). */
9027 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegTmp, idxRegMxCsr);
9028
9029 /* If either MXCSR.FZ or MXCSR.DAZ is set FPCR.FZ will be set. */
9030 pu32CodeBuf[off++] = Armv8A64MkInstrUbfx(IEMNATIVE_REG_FIXED_TMP0, idxRegTmp, X86_MXCSR_DAZ_BIT, 1);
9031 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegTmp, idxRegTmp, X86_MXCSR_FZ_BIT);
9032 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(idxRegTmp, idxRegTmp, IEMNATIVE_REG_FIXED_TMP0);
9033 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegTmp, idxRegTmp, ARMV8_FPCR_FZ_BIT);
9034
9035 /*
9036 * Init the rounding mode, the layout differs between MXCSR.RM[14:13] and FPCR.RMode[23:22]:
9037 *
9038 * Value MXCSR FPCR
9039 * 0 RN RN
9040 * 1 R- R+
9041 * 2 R+ R-
9042 * 3 RZ RZ
9043 *
9044 * Conversion can be achieved by switching bit positions
9045 */
9046 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxRegMxCsr, X86_MXCSR_RC_SHIFT);
9047 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxRegTmp, IEMNATIVE_REG_FIXED_TMP0, 14, 1);
9048 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxRegMxCsr, X86_MXCSR_RC_SHIFT + 1);
9049 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxRegTmp, IEMNATIVE_REG_FIXED_TMP0, 13, 1);
9050
9051 /* Write the value to FPCR. */
9052 pu32CodeBuf[off++] = Armv8A64MkInstrMsr(idxRegTmp, ARMV8_AARCH64_SYSREG_FPCR);
9053
9054 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9055 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
9056 iemNativeRegFreeTmp(pReNative, idxRegTmp);
9057# else
9058# error "Port me"
9059# endif
9060 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SYNCED;
9061 }
9062#endif
9063 return off;
9064}
9065
9066
9067
9068/*********************************************************************************************************************************
9069* Emitters for FPU related operations. *
9070*********************************************************************************************************************************/
9071
9072#define IEM_MC_FETCH_FCW(a_u16Fcw) \
9073 off = iemNativeEmitFetchFpuFcw(pReNative, off, a_u16Fcw)
9074
9075/** Emits code for IEM_MC_FETCH_FCW. */
9076DECL_INLINE_THROW(uint32_t)
9077iemNativeEmitFetchFpuFcw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
9078{
9079 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9080 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
9081
9082 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9083
9084 /* Allocate a temporary FCW register. */
9085 /** @todo eliminate extra register */
9086 uint8_t const idxFcwReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFcw,
9087 kIemNativeGstRegUse_ReadOnly);
9088
9089 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFcwReg);
9090
9091 /* Free but don't flush the FCW register. */
9092 iemNativeRegFreeTmp(pReNative, idxFcwReg);
9093 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9094
9095 return off;
9096}
9097
9098
9099#define IEM_MC_FETCH_FSW(a_u16Fsw) \
9100 off = iemNativeEmitFetchFpuFsw(pReNative, off, a_u16Fsw)
9101
9102/** Emits code for IEM_MC_FETCH_FSW. */
9103DECL_INLINE_THROW(uint32_t)
9104iemNativeEmitFetchFpuFsw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
9105{
9106 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9107 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
9108
9109 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, false /*fInitialized*/);
9110 /* Allocate a temporary FSW register. */
9111 /** @todo eliminate extra register */
9112 uint8_t const idxFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw,
9113 kIemNativeGstRegUse_ReadOnly);
9114
9115 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFswReg);
9116
9117 /* Free but don't flush the FSW register. */
9118 iemNativeRegFreeTmp(pReNative, idxFswReg);
9119 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9120
9121 return off;
9122}
9123
9124
9125
9126#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
9127
9128
9129/*********************************************************************************************************************************
9130* Emitters for SSE/AVX specific operations. *
9131*********************************************************************************************************************************/
9132
9133#define IEM_MC_COPY_XREG_U128(a_iXRegDst, a_iXRegSrc) \
9134 off = iemNativeEmitSimdCopyXregU128(pReNative, off, a_iXRegDst, a_iXRegSrc)
9135
9136/** Emits code for IEM_MC_COPY_XREG_U128. */
9137DECL_INLINE_THROW(uint32_t)
9138iemNativeEmitSimdCopyXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXRegDst, uint8_t iXRegSrc)
9139{
9140 /* This is a nop if the source and destination register are the same. */
9141 if (iXRegDst != iXRegSrc)
9142 {
9143 /* Allocate destination and source register. */
9144 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegDst),
9145 kIemNativeGstSimdRegLdStSz_Low128,
9146 kIemNativeGstRegUse_ForFullWrite);
9147 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegSrc),
9148 kIemNativeGstSimdRegLdStSz_Low128,
9149 kIemNativeGstRegUse_ReadOnly);
9150
9151 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
9152
9153 /* Free but don't flush the source and destination register. */
9154 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9155 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9156 }
9157
9158 return off;
9159}
9160
9161
9162#define IEM_MC_FETCH_XREG_U128(a_u128Value, a_iXReg) \
9163 off = iemNativeEmitSimdFetchXregU128(pReNative, off, a_u128Value, a_iXReg)
9164
9165/** Emits code for IEM_MC_FETCH_XREG_U128. */
9166DECL_INLINE_THROW(uint32_t)
9167iemNativeEmitSimdFetchXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg)
9168{
9169 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9170 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
9171
9172 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9173 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
9174
9175 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
9176
9177 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxVarReg, idxSimdRegSrc);
9178
9179 /* Free but don't flush the source register. */
9180 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9181 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
9182
9183 return off;
9184}
9185
9186
9187#define IEM_MC_FETCH_XREG_U64(a_u64Value, a_iXReg, a_iQWord) \
9188 off = iemNativeEmitSimdFetchXregU64(pReNative, off, a_u64Value, a_iXReg, a_iQWord)
9189
9190#define IEM_MC_FETCH_XREG_R64(a_r64Value, a_iXReg, a_iQWord) \
9191 off = iemNativeEmitSimdFetchXregU64(pReNative, off, a_r64Value, a_iXReg, a_iQWord)
9192
9193/** Emits code for IEM_MC_FETCH_XREG_U64. */
9194DECL_INLINE_THROW(uint32_t)
9195iemNativeEmitSimdFetchXregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iQWord)
9196{
9197 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9198 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
9199
9200 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9201 kIemNativeGstSimdRegLdStSz_Low128,
9202 kIemNativeGstRegUse_ReadOnly);
9203
9204 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9205 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9206
9207 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
9208
9209 /* Free but don't flush the source register. */
9210 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9211 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9212
9213 return off;
9214}
9215
9216
9217#define IEM_MC_FETCH_XREG_U32(a_u32Value, a_iXReg, a_iDWord) \
9218 off = iemNativeEmitSimdFetchXregU32(pReNative, off, a_u32Value, a_iXReg, a_iDWord)
9219
9220#define IEM_MC_FETCH_XREG_R32(a_r32Value, a_iXReg, a_iDWord) \
9221 off = iemNativeEmitSimdFetchXregU32(pReNative, off, a_r32Value, a_iXReg, a_iDWord)
9222
9223/** Emits code for IEM_MC_FETCH_XREG_U32/IEM_MC_FETCH_XREG_R32. */
9224DECL_INLINE_THROW(uint32_t)
9225iemNativeEmitSimdFetchXregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iDWord)
9226{
9227 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9228 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
9229
9230 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9231 kIemNativeGstSimdRegLdStSz_Low128,
9232 kIemNativeGstRegUse_ReadOnly);
9233
9234 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9235 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9236
9237 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
9238
9239 /* Free but don't flush the source register. */
9240 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9241 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9242
9243 return off;
9244}
9245
9246
9247#define IEM_MC_FETCH_XREG_U16(a_u64Value, a_iXReg, a_iWord) \
9248 off = iemNativeEmitSimdFetchXregU16(pReNative, off, a_u64Value, a_iXReg, a_iWord)
9249
9250/** Emits code for IEM_MC_FETCH_XREG_U16. */
9251DECL_INLINE_THROW(uint32_t)
9252iemNativeEmitSimdFetchXregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iWord)
9253{
9254 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9255 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
9256
9257 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9258 kIemNativeGstSimdRegLdStSz_Low128,
9259 kIemNativeGstRegUse_ReadOnly);
9260
9261 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9262 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9263
9264 off = iemNativeEmitSimdLoadGprFromVecRegU16(pReNative, off, idxVarReg, idxSimdRegSrc, iWord);
9265
9266 /* Free but don't flush the source register. */
9267 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9268 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9269
9270 return off;
9271}
9272
9273
9274#define IEM_MC_FETCH_XREG_U8(a_u64Value, a_iXReg, a_iByte) \
9275 off = iemNativeEmitSimdFetchXregU8(pReNative, off, a_u64Value, a_iXReg, a_iByte)
9276
9277/** Emits code for IEM_MC_FETCH_XREG_U8. */
9278DECL_INLINE_THROW(uint32_t)
9279iemNativeEmitSimdFetchXregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iByte)
9280{
9281 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9282 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint8_t));
9283
9284 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9285 kIemNativeGstSimdRegLdStSz_Low128,
9286 kIemNativeGstRegUse_ReadOnly);
9287
9288 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9289 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9290
9291 off = iemNativeEmitSimdLoadGprFromVecRegU8(pReNative, off, idxVarReg, idxSimdRegSrc, iByte);
9292
9293 /* Free but don't flush the source register. */
9294 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9295 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9296
9297 return off;
9298}
9299
9300
9301#define IEM_MC_STORE_XREG_U128(a_iXReg, a_u128Value) \
9302 off = iemNativeEmitSimdStoreXregU128(pReNative, off, a_iXReg, a_u128Value)
9303
9304AssertCompileSize(X86XMMREG, sizeof(RTUINT128U));
9305#define IEM_MC_STORE_XREG_XMM(a_iXReg, a_XmmValue) \
9306 off = iemNativeEmitSimdStoreXregU128(pReNative, off, a_iXReg, a_XmmValue)
9307
9308
9309/** Emits code for IEM_MC_STORE_XREG_U128/IEM_MC_STORE_XREG_XMM. */
9310DECL_INLINE_THROW(uint32_t)
9311iemNativeEmitSimdStoreXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
9312{
9313 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9314 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
9315
9316 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9317 kIemNativeGstSimdRegLdStSz_Low128,
9318 kIemNativeGstRegUse_ForFullWrite);
9319 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitialized*/);
9320
9321 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
9322
9323 /* Free but don't flush the source register. */
9324 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9325 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9326
9327 return off;
9328}
9329
9330
9331#define IEM_MC_STORE_XREG_U64(a_iXReg, a_iQWord, a_u64Value) \
9332 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u64Value, sizeof(uint64_t), a_iQWord)
9333
9334#define IEM_MC_STORE_XREG_U32(a_iXReg, a_iDWord, a_u32Value) \
9335 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint32_t), a_iDWord)
9336
9337#define IEM_MC_STORE_XREG_U16(a_iXReg, a_iWord, a_u32Value) \
9338 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint16_t), a_iWord)
9339
9340#define IEM_MC_STORE_XREG_U8(a_iXReg, a_iByte, a_u32Value) \
9341 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint8_t), a_iByte)
9342
9343#define IEM_MC_STORE_XREG_R32(a_iXReg, a_r32Value) \
9344 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_r32Value, sizeof(RTFLOAT32U), 0 /*iElem*/)
9345
9346#define IEM_MC_STORE_XREG_R64(a_iXReg, a_r64Value) \
9347 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_r64Value, sizeof(RTFLOAT64U), 0 /*iElem*/)
9348
9349/** Emits code for IEM_MC_STORE_XREG_U64/IEM_MC_STORE_XREG_U32/IEM_MC_STORE_XREG_U16/IEM_MC_STORE_XREG_U8. */
9350DECL_INLINE_THROW(uint32_t)
9351iemNativeEmitSimdStoreXregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar,
9352 uint8_t cbLocal, uint8_t iElem)
9353{
9354 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9355 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbLocal);
9356
9357#ifdef VBOX_STRICT
9358 switch (cbLocal)
9359 {
9360 case sizeof(uint64_t): Assert(iElem < 2); break;
9361 case sizeof(uint32_t): Assert(iElem < 4); break;
9362 case sizeof(uint16_t): Assert(iElem < 8); break;
9363 case sizeof(uint8_t): Assert(iElem < 16); break;
9364 default: AssertFailed();
9365 }
9366#endif
9367
9368 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9369 kIemNativeGstSimdRegLdStSz_Low128,
9370 kIemNativeGstRegUse_ForUpdate);
9371 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
9372
9373 switch (cbLocal)
9374 {
9375 case sizeof(uint64_t): off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
9376 case sizeof(uint32_t): off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
9377 case sizeof(uint16_t): off = iemNativeEmitSimdStoreGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
9378 case sizeof(uint8_t): off = iemNativeEmitSimdStoreGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
9379 default: AssertFailed();
9380 }
9381
9382 /* Free but don't flush the source register. */
9383 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9384 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9385
9386 return off;
9387}
9388
9389
9390#define IEM_MC_STORE_XREG_U64_ZX_U128(a_iXReg, a_u64Value) \
9391 off = iemNativeEmitSimdStoreXregU64ZxU128(pReNative, off, a_iXReg, a_u64Value)
9392
9393/** Emits code for IEM_MC_STORE_XREG_U64_ZX_U128. */
9394DECL_INLINE_THROW(uint32_t)
9395iemNativeEmitSimdStoreXregU64ZxU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
9396{
9397 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9398 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
9399
9400 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9401 kIemNativeGstSimdRegLdStSz_Low128,
9402 kIemNativeGstRegUse_ForUpdate);
9403 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
9404
9405 /* Zero the vector register first, then store the 64-bit value to the lower 64-bit. */
9406 off = iemNativeEmitSimdZeroVecRegLowU128(pReNative, off, idxSimdRegDst);
9407 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0);
9408
9409 /* Free but don't flush the source register. */
9410 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9411 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9412
9413 return off;
9414}
9415
9416
9417#define IEM_MC_STORE_XREG_U32_ZX_U128(a_iXReg, a_u32Value) \
9418 off = iemNativeEmitSimdStoreXregU32ZxU128(pReNative, off, a_iXReg, a_u32Value)
9419
9420/** Emits code for IEM_MC_STORE_XREG_U32_ZX_U128. */
9421DECL_INLINE_THROW(uint32_t)
9422iemNativeEmitSimdStoreXregU32ZxU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
9423{
9424 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9425 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
9426
9427 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9428 kIemNativeGstSimdRegLdStSz_Low128,
9429 kIemNativeGstRegUse_ForUpdate);
9430 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
9431
9432 /* Zero the vector register first, then store the 32-bit value to the lowest 32-bit element. */
9433 off = iemNativeEmitSimdZeroVecRegLowU128(pReNative, off, idxSimdRegDst);
9434 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, 0);
9435
9436 /* Free but don't flush the source register. */
9437 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9438 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9439
9440 return off;
9441}
9442
9443
9444#define IEM_MC_STORE_XREG_U32_U128(a_iXReg, a_iDwDst, a_u128Value, a_iDwSrc) \
9445 off = iemNativeEmitSimdStoreXregU32U128(pReNative, off, a_iXReg, a_iDwDst, a_u128Value, a_iDwSrc)
9446
9447/** Emits code for IEM_MC_STORE_XREG_U32_U128. */
9448DECL_INLINE_THROW(uint32_t)
9449iemNativeEmitSimdStoreXregU32U128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t iDwDst,
9450 uint8_t idxSrcVar, uint8_t iDwSrc)
9451{
9452 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9453 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
9454
9455 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9456 kIemNativeGstSimdRegLdStSz_Low128,
9457 kIemNativeGstRegUse_ForUpdate);
9458 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitialized*/);
9459
9460 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, idxVarReg, iDwSrc);
9461 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, IEMNATIVE_REG_FIXED_TMP0, iDwDst);
9462
9463 /* Free but don't flush the destination register. */
9464 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9465 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9466
9467 return off;
9468}
9469
9470
9471#define IEM_MC_COPY_YREG_U128_ZX_VLMAX(a_iYRegDst, a_iYRegSrc) \
9472 off = iemNativeEmitSimdCopyYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrc)
9473
9474/** Emits code for IEM_MC_COPY_YREG_U128_ZX_VLMAX. */
9475DECL_INLINE_THROW(uint32_t)
9476iemNativeEmitSimdCopyYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrc)
9477{
9478 /*
9479 * The iYRegSrc == iYRegDst case needs to be treated differently here, because
9480 * if iYRegDst gets allocated first for the full write it won't load the
9481 * actual value from CPUMCTX. When allocating iYRegSrc afterwards it will get
9482 * duplicated from the already allocated host register for iYRegDst containing
9483 * garbage. This will be catched by the guest register value checking in debug
9484 * builds.
9485 */
9486 if (iYRegDst != iYRegSrc)
9487 {
9488 /* Allocate destination and source register. */
9489 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9490 kIemNativeGstSimdRegLdStSz_256,
9491 kIemNativeGstRegUse_ForFullWrite);
9492 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
9493 kIemNativeGstSimdRegLdStSz_Low128,
9494 kIemNativeGstRegUse_ReadOnly);
9495
9496 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
9497 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9498
9499 /* Free but don't flush the source and destination register. */
9500 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9501 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9502 }
9503 else
9504 {
9505 /* This effectively only clears the upper 128-bits of the register. */
9506 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9507 kIemNativeGstSimdRegLdStSz_High128,
9508 kIemNativeGstRegUse_ForFullWrite);
9509
9510 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
9511
9512 /* Free but don't flush the destination register. */
9513 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
9514 }
9515
9516 return off;
9517}
9518
9519
9520#define IEM_MC_COPY_YREG_U256_ZX_VLMAX(a_iYRegDst, a_iYRegSrc) \
9521 off = iemNativeEmitSimdCopyYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrc)
9522
9523/** Emits code for IEM_MC_COPY_YREG_U256_ZX_VLMAX. */
9524DECL_INLINE_THROW(uint32_t)
9525iemNativeEmitSimdCopyYregU256ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrc)
9526{
9527 /*
9528 * The iYRegSrc == iYRegDst case needs to be treated differently here, because
9529 * if iYRegDst gets allocated first for the full write it won't load the
9530 * actual value from CPUMCTX. When allocating iYRegSrc afterwards it will get
9531 * duplicated from the already allocated host register for iYRegDst containing
9532 * garbage. This will be catched by the guest register value checking in debug
9533 * builds. iYRegSrc == iYRegDst would effectively only clear any upper 256-bits
9534 * for a zmm register we don't support yet, so this is just a nop.
9535 */
9536 if (iYRegDst != iYRegSrc)
9537 {
9538 /* Allocate destination and source register. */
9539 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
9540 kIemNativeGstSimdRegLdStSz_256,
9541 kIemNativeGstRegUse_ReadOnly);
9542 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
9543 kIemNativeGstSimdRegLdStSz_256,
9544 kIemNativeGstRegUse_ForFullWrite);
9545
9546 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
9547
9548 /* Free but don't flush the source and destination register. */
9549 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9550 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9551 }
9552
9553 return off;
9554}
9555
9556
9557#define IEM_MC_FETCH_YREG_U128(a_u128Dst, a_iYRegSrc, a_iDQWord) \
9558 off = iemNativeEmitSimdFetchYregU128(pReNative, off, a_u128Dst, a_iYRegSrc, a_iDQWord)
9559
9560/** Emits code for IEM_MC_FETCH_YREG_U128. */
9561DECL_INLINE_THROW(uint32_t)
9562iemNativeEmitSimdFetchYregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iDQWord)
9563{
9564 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9565 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
9566
9567 Assert(iDQWord <= 1);
9568 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9569 iDQWord == 1
9570 ? kIemNativeGstSimdRegLdStSz_High128
9571 : kIemNativeGstSimdRegLdStSz_Low128,
9572 kIemNativeGstRegUse_ReadOnly);
9573
9574 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9575 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
9576
9577 if (iDQWord == 1)
9578 off = iemNativeEmitSimdLoadVecRegLowU128FromVecRegHighU128(pReNative, off, idxVarReg, idxSimdRegSrc);
9579 else
9580 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxVarReg, idxSimdRegSrc);
9581
9582 /* Free but don't flush the source register. */
9583 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9584 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
9585
9586 return off;
9587}
9588
9589
9590#define IEM_MC_FETCH_YREG_U64(a_u64Dst, a_iYRegSrc, a_iQWord) \
9591 off = iemNativeEmitSimdFetchYregU64(pReNative, off, a_u64Dst, a_iYRegSrc, a_iQWord)
9592
9593/** Emits code for IEM_MC_FETCH_YREG_U64. */
9594DECL_INLINE_THROW(uint32_t)
9595iemNativeEmitSimdFetchYregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iQWord)
9596{
9597 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9598 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
9599
9600 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9601 iQWord >= 2
9602 ? kIemNativeGstSimdRegLdStSz_High128
9603 : kIemNativeGstSimdRegLdStSz_Low128,
9604 kIemNativeGstRegUse_ReadOnly);
9605
9606 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9607 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9608
9609 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
9610
9611 /* Free but don't flush the source register. */
9612 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9613 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9614
9615 return off;
9616}
9617
9618
9619#define IEM_MC_FETCH_YREG_U32(a_u32Dst, a_iYRegSrc) \
9620 off = iemNativeEmitSimdFetchYregU32(pReNative, off, a_u32Dst, a_iYRegSrc, 0)
9621
9622/** Emits code for IEM_MC_FETCH_YREG_U32. */
9623DECL_INLINE_THROW(uint32_t)
9624iemNativeEmitSimdFetchYregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iDWord)
9625{
9626 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9627 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
9628
9629 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9630 iDWord >= 4
9631 ? kIemNativeGstSimdRegLdStSz_High128
9632 : kIemNativeGstSimdRegLdStSz_Low128,
9633 kIemNativeGstRegUse_ReadOnly);
9634
9635 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9636 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9637
9638 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
9639
9640 /* Free but don't flush the source register. */
9641 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9642 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9643
9644 return off;
9645}
9646
9647
9648#define IEM_MC_CLEAR_YREG_128_UP(a_iYReg) \
9649 off = iemNativeEmitSimdClearYregHighU128(pReNative, off, a_iYReg)
9650
9651/** Emits code for IEM_MC_CLEAR_YREG_128_UP. */
9652DECL_INLINE_THROW(uint32_t)
9653iemNativeEmitSimdClearYregHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg)
9654{
9655 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9656 kIemNativeGstSimdRegLdStSz_High128,
9657 kIemNativeGstRegUse_ForFullWrite);
9658
9659 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
9660
9661 /* Free but don't flush the register. */
9662 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
9663
9664 return off;
9665}
9666
9667
9668#define IEM_MC_STORE_YREG_U128(a_iYRegDst, a_iDQword, a_u128Value) \
9669 off = iemNativeEmitSimdStoreYregU128(pReNative, off, a_iYRegDst, a_iDQword, a_u128Value)
9670
9671/** Emits code for IEM_MC_STORE_YREG_U128. */
9672DECL_INLINE_THROW(uint32_t)
9673iemNativeEmitSimdStoreYregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t iDQword, uint8_t idxSrcVar)
9674{
9675 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9676 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
9677
9678 Assert(iDQword <= 1);
9679 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9680 iDQword == 0
9681 ? kIemNativeGstSimdRegLdStSz_Low128
9682 : kIemNativeGstSimdRegLdStSz_High128,
9683 kIemNativeGstRegUse_ForFullWrite);
9684
9685 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
9686
9687 if (iDQword == 0)
9688 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
9689 else
9690 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128(pReNative, off, idxSimdRegDst, idxVarReg);
9691
9692 /* Free but don't flush the source register. */
9693 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9694 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9695
9696 return off;
9697}
9698
9699
9700#define IEM_MC_STORE_YREG_U128_ZX_VLMAX(a_iYRegDst, a_u128Src) \
9701 off = iemNativeEmitSimdStoreYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_u128Src)
9702
9703/** Emits code for IEM_MC_STORE_YREG_U128_ZX_VLMAX. */
9704DECL_INLINE_THROW(uint32_t)
9705iemNativeEmitSimdStoreYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9706{
9707 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9708 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
9709
9710 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9711 kIemNativeGstSimdRegLdStSz_256,
9712 kIemNativeGstRegUse_ForFullWrite);
9713
9714 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
9715
9716 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
9717 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9718
9719 /* Free but don't flush the source register. */
9720 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9721 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9722
9723 return off;
9724}
9725
9726
9727#define IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX(a_iXRegDst, a_u8Src) \
9728 off = iemNativeEmitSimdBroadcastXregU8ZxVlmax(pReNative, off, a_iXRegDst, a_u8Src)
9729
9730/** Emits code for IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX. */
9731DECL_INLINE_THROW(uint32_t)
9732iemNativeEmitSimdBroadcastXregU8ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
9733{
9734 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9735 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint8_t));
9736
9737 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9738 kIemNativeGstSimdRegLdStSz_256,
9739 kIemNativeGstRegUse_ForFullWrite);
9740
9741 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9742
9743 off = iemNativeEmitSimdBroadcastGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
9744 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9745
9746 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9747 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9748
9749 return off;
9750}
9751
9752
9753#define IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX(a_iXRegDst, a_u16Src) \
9754 off = iemNativeEmitSimdBroadcastXregU16ZxVlmax(pReNative, off, a_iXRegDst, a_u16Src)
9755
9756/** Emits code for IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX. */
9757DECL_INLINE_THROW(uint32_t)
9758iemNativeEmitSimdBroadcastXregU16ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
9759{
9760 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9761 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint16_t));
9762
9763 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9764 kIemNativeGstSimdRegLdStSz_256,
9765 kIemNativeGstRegUse_ForFullWrite);
9766
9767 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9768
9769 off = iemNativeEmitSimdBroadcastGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
9770 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9771
9772 /* Free but don't flush the source register. */
9773 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9774 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9775
9776 return off;
9777}
9778
9779
9780#define IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX(a_iXRegDst, a_u32Src) \
9781 off = iemNativeEmitSimdBroadcastXregU32ZxVlmax(pReNative, off, a_iXRegDst, a_u32Src)
9782
9783/** Emits code for IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX. */
9784DECL_INLINE_THROW(uint32_t)
9785iemNativeEmitSimdBroadcastXregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
9786{
9787 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9788 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
9789
9790 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9791 kIemNativeGstSimdRegLdStSz_256,
9792 kIemNativeGstRegUse_ForFullWrite);
9793
9794 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9795
9796 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
9797 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9798
9799 /* Free but don't flush the source register. */
9800 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9801 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9802
9803 return off;
9804}
9805
9806
9807#define IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX(a_iXRegDst, a_u64Src) \
9808 off = iemNativeEmitSimdBroadcastXregU64ZxVlmax(pReNative, off, a_iXRegDst, a_u64Src)
9809
9810/** Emits code for IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX. */
9811DECL_INLINE_THROW(uint32_t)
9812iemNativeEmitSimdBroadcastXregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
9813{
9814 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9815 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
9816
9817 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9818 kIemNativeGstSimdRegLdStSz_256,
9819 kIemNativeGstRegUse_ForFullWrite);
9820
9821 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9822
9823 off = iemNativeEmitSimdBroadcastGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
9824 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
9825
9826 /* Free but don't flush the source register. */
9827 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9828 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9829
9830 return off;
9831}
9832
9833
9834#define IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX(a_iYRegDst, a_u8Src) \
9835 off = iemNativeEmitSimdBroadcastYregU8ZxVlmax(pReNative, off, a_iYRegDst, a_u8Src)
9836
9837/** Emits code for IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX. */
9838DECL_INLINE_THROW(uint32_t)
9839iemNativeEmitSimdBroadcastYregU8ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9840{
9841 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9842 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint8_t));
9843
9844 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9845 kIemNativeGstSimdRegLdStSz_256,
9846 kIemNativeGstRegUse_ForFullWrite);
9847
9848 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9849
9850 off = iemNativeEmitSimdBroadcastGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
9851
9852 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9853 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9854
9855 return off;
9856}
9857
9858
9859#define IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX(a_iYRegDst, a_u16Src) \
9860 off = iemNativeEmitSimdBroadcastYregU16ZxVlmax(pReNative, off, a_iYRegDst, a_u16Src)
9861
9862/** Emits code for IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX. */
9863DECL_INLINE_THROW(uint32_t)
9864iemNativeEmitSimdBroadcastYregU16ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9865{
9866 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9867 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint16_t));
9868
9869 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9870 kIemNativeGstSimdRegLdStSz_256,
9871 kIemNativeGstRegUse_ForFullWrite);
9872
9873 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9874
9875 off = iemNativeEmitSimdBroadcastGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
9876
9877 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9878 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9879
9880 return off;
9881}
9882
9883
9884#define IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX(a_iYRegDst, a_u32Src) \
9885 off = iemNativeEmitSimdBroadcastYregU32ZxVlmax(pReNative, off, a_iYRegDst, a_u32Src)
9886
9887/** Emits code for IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX. */
9888DECL_INLINE_THROW(uint32_t)
9889iemNativeEmitSimdBroadcastYregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9890{
9891 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9892 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
9893
9894 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9895 kIemNativeGstSimdRegLdStSz_256,
9896 kIemNativeGstRegUse_ForFullWrite);
9897
9898 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9899
9900 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
9901
9902 /* Free but don't flush the source register. */
9903 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9904 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9905
9906 return off;
9907}
9908
9909
9910#define IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX(a_iYRegDst, a_u64Src) \
9911 off = iemNativeEmitSimdBroadcastYregU64ZxVlmax(pReNative, off, a_iYRegDst, a_u64Src)
9912
9913/** Emits code for IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX. */
9914DECL_INLINE_THROW(uint32_t)
9915iemNativeEmitSimdBroadcastYregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9916{
9917 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9918 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
9919
9920 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9921 kIemNativeGstSimdRegLdStSz_256,
9922 kIemNativeGstRegUse_ForFullWrite);
9923
9924 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9925
9926 off = iemNativeEmitSimdBroadcastGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
9927
9928 /* Free but don't flush the source register. */
9929 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9930 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9931
9932 return off;
9933}
9934
9935
9936#define IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX(a_iYRegDst, a_u128Src) \
9937 off = iemNativeEmitSimdBroadcastYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_u128Src)
9938
9939/** Emits code for IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX. */
9940DECL_INLINE_THROW(uint32_t)
9941iemNativeEmitSimdBroadcastYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9942{
9943 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9944 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
9945
9946 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9947 kIemNativeGstSimdRegLdStSz_256,
9948 kIemNativeGstRegUse_ForFullWrite);
9949
9950 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
9951
9952 off = iemNativeEmitSimdBroadcastVecRegU128ToVecReg(pReNative, off, idxSimdRegDst, idxVarReg);
9953
9954 /* Free but don't flush the source register. */
9955 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9956 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9957
9958 return off;
9959}
9960
9961
9962#define IEM_MC_STORE_YREG_U32_ZX_VLMAX(a_iYRegDst, a_u32Src) \
9963 off = iemNativeEmitSimdStoreYregU32ZxVlmax(pReNative, off, a_iYRegDst, a_u32Src)
9964
9965/** Emits code for IEM_MC_STORE_YREG_U32_ZX_VLMAX. */
9966DECL_INLINE_THROW(uint32_t)
9967iemNativeEmitSimdStoreYregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9968{
9969 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9970 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
9971
9972 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
9973 kIemNativeGstSimdRegLdStSz_256,
9974 kIemNativeGstRegUse_ForFullWrite);
9975
9976 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
9977
9978 off = iemNativeEmitSimdZeroVecRegU256(pReNative, off, idxSimdRegDst);
9979 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iDWord*/);
9980
9981 /* Free but don't flush the source register. */
9982 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9983 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
9984
9985 return off;
9986}
9987
9988
9989#define IEM_MC_STORE_YREG_U64_ZX_VLMAX(a_iYRegDst, a_u64Src) \
9990 off = iemNativeEmitSimdStoreYregU64ZxVlmax(pReNative, off, a_iYRegDst, a_u64Src)
9991
9992/** Emits code for IEM_MC_STORE_YREG_U64_ZX_VLMAX. */
9993DECL_INLINE_THROW(uint32_t)
9994iemNativeEmitSimdStoreYregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
9995{
9996 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9997 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
9998
9999 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10000 kIemNativeGstSimdRegLdStSz_256,
10001 kIemNativeGstRegUse_ForFullWrite);
10002
10003 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10004
10005 off = iemNativeEmitSimdZeroVecRegU256(pReNative, off, idxSimdRegDst);
10006 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iQWord*/);
10007
10008 /* Free but don't flush the source register. */
10009 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10010 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10011
10012 return off;
10013}
10014
10015
10016#define IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX(a_iYRegDst, a_u64Local, a_iYRegSrcHx) \
10017 off = iemNativeEmitSimdMergeYregU64LocalU64HiZxVlmax(pReNative, off, a_iYRegDst, a_u64Local, a_iYRegSrcHx)
10018
10019/** Emits code for IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX. */
10020DECL_INLINE_THROW(uint32_t)
10021iemNativeEmitSimdMergeYregU64LocalU64HiZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar, uint8_t iYRegSrcHx)
10022{
10023 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10024 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10025
10026 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10027 kIemNativeGstSimdRegLdStSz_256,
10028 kIemNativeGstRegUse_ForFullWrite);
10029 uint8_t const idxSimdRegSrcHx = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrcHx),
10030 kIemNativeGstSimdRegLdStSz_Low128,
10031 kIemNativeGstRegUse_ReadOnly);
10032 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10033
10034 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrcHx);
10035 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iQWord*/);
10036 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10037
10038 /* Free but don't flush the source and destination registers. */
10039 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrcHx);
10040 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10041 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10042
10043 return off;
10044}
10045
10046
10047#define IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX(a_iYRegDst, a_iYRegSrcHx, a_u64Local) \
10048 off = iemNativeEmitSimdMergeYregU64LoU64LocalZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrcHx, a_u64Local)
10049
10050/** Emits code for IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX. */
10051DECL_INLINE_THROW(uint32_t)
10052iemNativeEmitSimdMergeYregU64LoU64LocalZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrcHx, uint8_t idxSrcVar)
10053{
10054 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10055 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10056
10057 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10058 kIemNativeGstSimdRegLdStSz_256,
10059 kIemNativeGstRegUse_ForFullWrite);
10060 uint8_t const idxSimdRegSrcHx = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrcHx),
10061 kIemNativeGstSimdRegLdStSz_Low128,
10062 kIemNativeGstRegUse_ReadOnly);
10063 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10064
10065 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrcHx);
10066 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 1 /*iQWord*/);
10067 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10068
10069 /* Free but don't flush the source and destination registers. */
10070 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrcHx);
10071 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10072 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10073
10074 return off;
10075}
10076
10077
10078#define IEM_MC_CLEAR_XREG_U32_MASK(a_iXReg, a_bMask) \
10079 off = iemNativeEmitSimdClearXregU32Mask(pReNative, off, a_iXReg, a_bMask)
10080
10081
10082/** Emits code for IEM_MC_CLEAR_XREG_U32_MASK. */
10083DECL_INLINE_THROW(uint32_t)
10084iemNativeEmitSimdClearXregU32Mask(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t bImm8Mask)
10085{
10086 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10087 kIemNativeGstSimdRegLdStSz_Low128,
10088 kIemNativeGstRegUse_ForUpdate);
10089
10090 /** @todo r=aeichner For certain bit combinations we could reduce the number of emitted instructions. */
10091 if (bImm8Mask & RT_BIT(0))
10092 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 0 /*iDWord*/);
10093 if (bImm8Mask & RT_BIT(1))
10094 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 1 /*iDWord*/);
10095 if (bImm8Mask & RT_BIT(2))
10096 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 2 /*iDWord*/);
10097 if (bImm8Mask & RT_BIT(3))
10098 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 3 /*iDWord*/);
10099
10100 /* Free but don't flush the destination register. */
10101 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10102
10103 return off;
10104}
10105
10106
10107#define IEM_MC_FETCH_YREG_U256(a_u256Dst, a_iYRegSrc) \
10108 off = iemNativeEmitSimdFetchYregU256(pReNative, off, a_u256Dst, a_iYRegSrc)
10109
10110#define IEM_MC_FETCH_YREG_YMM(a_uYmmDst, a_iYRegSrc) \
10111 off = iemNativeEmitSimdFetchYregU256(pReNative, off, a_uYmmDst, a_iYRegSrc)
10112
10113/** Emits code for IEM_MC_FETCH_YREG_U256/IEM_MC_FETCH_YREG_YMM. */
10114DECL_INLINE_THROW(uint32_t)
10115iemNativeEmitSimdFetchYregU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYRegSrc)
10116{
10117 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10118 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT256U));
10119
10120 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
10121 kIemNativeGstSimdRegLdStSz_256,
10122 kIemNativeGstRegUse_ReadOnly);
10123 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
10124
10125 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxVarReg, idxSimdRegSrc);
10126
10127 /* Free but don't flush the source register. */
10128 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10129 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
10130
10131 return off;
10132}
10133
10134
10135#define IEM_MC_STORE_YREG_U256_ZX_VLMAX(a_iYRegDst, a_u256Src) \
10136 off = iemNativeEmitSimdStoreYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_u256Src)
10137
10138#define IEM_MC_STORE_YREG_YMM_ZX_VLMAX(a_iYRegDst, a_uYmmSrc) \
10139 off = iemNativeEmitSimdStoreYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_uYmmSrc)
10140
10141/** Emits code for IEM_MC_STORE_YREG_U256_ZX_VLMAX/IEM_MC_STORE_YREG_YMM_ZX_VLMAX. */
10142DECL_INLINE_THROW(uint32_t)
10143iemNativeEmitSimdStoreYregU256ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar)
10144{
10145 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10146 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
10147
10148 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10149 kIemNativeGstSimdRegLdStSz_256,
10150 kIemNativeGstRegUse_ForFullWrite);
10151 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
10152
10153 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxSimdRegDst, idxVarRegSrc);
10154
10155 /* Free but don't flush the source register. */
10156 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10157 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10158
10159 return off;
10160}
10161
10162
10163#define IEM_MC_STORE_YREG_U32_U256(a_iYRegDst, a_iDwDst, a_u256Value, a_iDwSrc) \
10164 off = iemNativeEmitSimdStoreYregU32FromU256(pReNative, off, a_iYRegDst, a_iDwDst, a_u256Value, a_iDwSrc)
10165
10166
10167/** Emits code for IEM_MC_STORE_YREG_U32_U256. */
10168DECL_INLINE_THROW(uint32_t)
10169iemNativeEmitSimdStoreYregU32FromU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iDwDst,
10170 uint8_t idxSrcVar, uint8_t iDwSrc)
10171{
10172 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10173 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
10174
10175 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10176 iDwDst < 4
10177 ? kIemNativeGstSimdRegLdStSz_Low128
10178 : kIemNativeGstSimdRegLdStSz_High128,
10179 kIemNativeGstRegUse_ForUpdate);
10180 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
10181 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
10182
10183 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxRegTmp, idxVarRegSrc, iDwSrc);
10184 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxRegTmp, iDwDst);
10185
10186 /* Free but don't flush the source register. */
10187 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10188 iemNativeRegFreeTmp(pReNative, idxRegTmp);
10189 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10190
10191 return off;
10192}
10193
10194
10195#define IEM_MC_STORE_YREG_U64_U256(a_iYRegDst, a_iQwDst, a_u256Value, a_iQwSrc) \
10196 off = iemNativeEmitSimdStoreYregU64FromU256(pReNative, off, a_iYRegDst, a_iQwDst, a_u256Value, a_iQwSrc)
10197
10198
10199/** Emits code for IEM_MC_STORE_YREG_U64_U256. */
10200DECL_INLINE_THROW(uint32_t)
10201iemNativeEmitSimdStoreYregU64FromU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iQwDst,
10202 uint8_t idxSrcVar, uint8_t iQwSrc)
10203{
10204 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10205 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
10206
10207 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10208 iQwDst < 2
10209 ? kIemNativeGstSimdRegLdStSz_Low128
10210 : kIemNativeGstSimdRegLdStSz_High128,
10211 kIemNativeGstRegUse_ForUpdate);
10212 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
10213 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
10214
10215 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxRegTmp, idxVarRegSrc, iQwSrc);
10216 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxRegTmp, iQwDst);
10217
10218 /* Free but don't flush the source register. */
10219 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10220 iemNativeRegFreeTmp(pReNative, idxRegTmp);
10221 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10222
10223 return off;
10224}
10225
10226
10227#define IEM_MC_STORE_YREG_U64(a_iYRegDst, a_iQword, a_u64Value) \
10228 off = iemNativeEmitSimdStoreYregU64(pReNative, off, a_iYRegDst, a_iQword, a_u64Value)
10229
10230
10231/** Emits code for IEM_MC_STORE_YREG_U64. */
10232DECL_INLINE_THROW(uint32_t)
10233iemNativeEmitSimdStoreYregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iQwDst, uint8_t idxSrcVar)
10234{
10235 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10236 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10237
10238 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10239 iQwDst < 2
10240 ? kIemNativeGstSimdRegLdStSz_Low128
10241 : kIemNativeGstSimdRegLdStSz_High128,
10242 kIemNativeGstRegUse_ForUpdate);
10243
10244 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10245
10246 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, iQwDst);
10247
10248 /* Free but don't flush the source register. */
10249 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10250 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10251
10252 return off;
10253}
10254
10255
10256#define IEM_MC_CLEAR_ZREG_256_UP(a_iYReg) \
10257 off = iemNativeEmitSimdClearZregU256Vlmax(pReNative, off, a_iYReg)
10258
10259/** Emits code for IEM_MC_CLEAR_ZREG_256_UP. */
10260DECL_INLINE_THROW(uint32_t)
10261iemNativeEmitSimdClearZregU256Vlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg)
10262{
10263 RT_NOREF(pReNative, iYReg);
10264 /** @todo Needs to be implemented when support for AVX-512 is added. */
10265 return off;
10266}
10267
10268
10269
10270/*********************************************************************************************************************************
10271* Emitters for IEM_MC_CALL_SSE_AIMPL_XXX *
10272*********************************************************************************************************************************/
10273
10274/**
10275 * Common worker for IEM_MC_CALL_SSE_AIMPL_XXX/IEM_MC_CALL_AVX_AIMPL_XXX.
10276 */
10277DECL_INLINE_THROW(uint32_t)
10278iemNativeEmitCallSseAvxAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t cArgs, uint8_t idxInstr)
10279{
10280 /* Grab the MXCSR register, it must not be call volatile or we end up freeing it when setting up the call below. */
10281 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr,
10282 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
10283 AssertRelease(!(RT_BIT_32(idxRegMxCsr) & IEMNATIVE_CALL_VOLATILE_GREG_MASK));
10284
10285#if 0 /* This is not required right now as the called helper will set up the SSE/AVX state if it is an assembly one. */
10286 /*
10287 * Need to do the FPU preparation.
10288 */
10289 off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/);
10290#endif
10291
10292 /*
10293 * Do all the call setup and cleanup.
10294 */
10295 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_SSE_AIMPL_HIDDEN_ARGS, IEM_SSE_AIMPL_HIDDEN_ARGS,
10296 false /*fFlushPendingWrites*/);
10297
10298 /*
10299 * Load the MXCSR register into the first argument and mask out the current exception flags.
10300 */
10301 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, idxRegMxCsr);
10302 off = iemNativeEmitAndGpr32ByImm(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, ~X86_MXCSR_XCPT_FLAGS);
10303
10304 /*
10305 * Make the call.
10306 */
10307 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
10308
10309 /*
10310 * The updated MXCSR is in the return register, update exception status flags.
10311 *
10312 * The return register is marked allocated as a temporary because it is required for the
10313 * exception generation check below.
10314 */
10315 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG)));
10316 uint8_t const idxRegTmp = iemNativeRegMarkAllocated(pReNative, IEMNATIVE_CALL_RET_GREG, kIemNativeWhat_Tmp);
10317 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, idxRegMxCsr, idxRegTmp);
10318
10319#ifndef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
10320 /* Writeback the MXCSR register value (there is no delayed writeback for such registers at the moment). */
10321 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxRegMxCsr, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.XState.x87.MXCSR));
10322#endif
10323
10324 /*
10325 * Make sure we don't have any outstanding guest register writes as we may
10326 * raise an \#UD or \#XF and all guest register must be up to date in CPUMCTX.
10327 */
10328 off = iemNativeRegFlushPendingWrites(pReNative, off);
10329
10330#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
10331 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
10332#else
10333 RT_NOREF(idxInstr);
10334#endif
10335
10336 /** @todo r=aeichner ANDN from BMI1 would save us a temporary and additional instruction here but I don't
10337 * want to assume the existence for this instruction at the moment. */
10338 uint8_t const idxRegTmp2 = iemNativeRegAllocTmp(pReNative, &off);
10339
10340 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegTmp2, idxRegTmp);
10341 /* tmp &= X86_MXCSR_XCPT_MASK */
10342 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK);
10343 /* tmp >>= X86_MXCSR_XCPT_MASK_SHIFT */
10344 off = iemNativeEmitShiftGprRight(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK_SHIFT);
10345 /* tmp = ~tmp */
10346 off = iemNativeEmitInvBitsGpr(pReNative, off, idxRegTmp, idxRegTmp, false /*f64Bit*/);
10347 /* tmp &= mxcsr */
10348 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxRegTmp, idxRegTmp2);
10349 off = iemNativeEmitTestAnyBitsInGprAndTbExitIfAnySet(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_FLAGS,
10350 kIemNativeLabelType_RaiseSseAvxFpRelated);
10351
10352 iemNativeRegFreeTmp(pReNative, idxRegTmp2);
10353 iemNativeRegFreeTmp(pReNative, idxRegTmp);
10354 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
10355
10356 return off;
10357}
10358
10359
10360#define IEM_MC_CALL_SSE_AIMPL_2(a_pfnAImpl, a0, a1) \
10361 off = iemNativeEmitCallSseAImpl2(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1))
10362
10363/** Emits code for IEM_MC_CALL_SSE_AIMPL_2. */
10364DECL_INLINE_THROW(uint32_t)
10365iemNativeEmitCallSseAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
10366{
10367 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_SSE_AIMPL_HIDDEN_ARGS);
10368 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_SSE_AIMPL_HIDDEN_ARGS);
10369 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 2, idxInstr);
10370}
10371
10372
10373#define IEM_MC_CALL_SSE_AIMPL_3(a_pfnAImpl, a0, a1, a2) \
10374 off = iemNativeEmitCallSseAImpl3(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1), (a2))
10375
10376/** Emits code for IEM_MC_CALL_SSE_AIMPL_3. */
10377DECL_INLINE_THROW(uint32_t)
10378iemNativeEmitCallSseAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl,
10379 uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
10380{
10381 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_SSE_AIMPL_HIDDEN_ARGS);
10382 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_SSE_AIMPL_HIDDEN_ARGS);
10383 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_SSE_AIMPL_HIDDEN_ARGS);
10384 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 3, idxInstr);
10385}
10386
10387
10388/*********************************************************************************************************************************
10389* Emitters for IEM_MC_CALL_AVX_AIMPL_XXX *
10390*********************************************************************************************************************************/
10391
10392#define IEM_MC_CALL_AVX_AIMPL_2(a_pfnAImpl, a0, a1) \
10393 off = iemNativeEmitCallAvxAImpl2(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1))
10394
10395/** Emits code for IEM_MC_CALL_AVX_AIMPL_2. */
10396DECL_INLINE_THROW(uint32_t)
10397iemNativeEmitCallAvxAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
10398{
10399 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_AVX_AIMPL_HIDDEN_ARGS);
10400 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_AVX_AIMPL_HIDDEN_ARGS);
10401 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 2, idxInstr);
10402}
10403
10404
10405#define IEM_MC_CALL_AVX_AIMPL_3(a_pfnAImpl, a0, a1, a2) \
10406 off = iemNativeEmitCallAvxAImpl3(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1), (a2))
10407
10408/** Emits code for IEM_MC_CALL_AVX_AIMPL_3. */
10409DECL_INLINE_THROW(uint32_t)
10410iemNativeEmitCallAvxAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl,
10411 uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
10412{
10413 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_AVX_AIMPL_HIDDEN_ARGS);
10414 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_AVX_AIMPL_HIDDEN_ARGS);
10415 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_AVX_AIMPL_HIDDEN_ARGS);
10416 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 3, idxInstr);
10417}
10418
10419
10420#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
10421
10422
10423/*********************************************************************************************************************************
10424* Include instruction emitters. *
10425*********************************************************************************************************************************/
10426#include "target-x86/IEMAllN8veEmit-x86.h"
10427
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette