VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompFuncs.h@ 105904

Last change on this file since 105904 was 105878, checked in by vboxsync, 3 months ago

VMM/IEM: Don't flush PC prior to 64-bit relative jumps, flush it when in the #GP(0) code path. [build fix] bugref:10720 bugref:10373

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 537.9 KB
Line 
1/* $Id: IEMAllN8veRecompFuncs.h 105878 2024-08-27 23:31:45Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler - Inlined Bits.
4 */
5
6/*
7 * Copyright (C) 2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
33#define IEM_WITH_OPAQUE_DECODER_STATE
34#define VMCPU_INCL_CPUM_GST_CTX
35#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
36#define IEMNATIVE_INCL_TABLE_FUNCTION_PROTOTYPES
37#include <VBox/vmm/iem.h>
38#include <VBox/vmm/cpum.h>
39#include <VBox/vmm/dbgf.h>
40#include "IEMInternal.h"
41#include <VBox/vmm/vmcc.h>
42#include <VBox/log.h>
43#include <VBox/err.h>
44#include <VBox/dis.h>
45#include <VBox/param.h>
46#include <iprt/assert.h>
47#include <iprt/heap.h>
48#include <iprt/mem.h>
49#include <iprt/string.h>
50#if defined(RT_ARCH_AMD64)
51# include <iprt/x86.h>
52#elif defined(RT_ARCH_ARM64)
53# include <iprt/armv8.h>
54#endif
55
56#include "IEMInline.h"
57#include "IEMThreadedFunctions.h"
58#include "IEMN8veRecompiler.h"
59#include "IEMN8veRecompilerEmit.h"
60#include "IEMN8veRecompilerTlbLookup.h"
61#include "IEMNativeFunctions.h"
62
63
64/*
65 * Narrow down configs here to avoid wasting time on unused configs here.
66 * Note! Same checks in IEMAllThrdRecompiler.cpp.
67 */
68
69#ifndef IEM_WITH_CODE_TLB
70# error The code TLB must be enabled for the recompiler.
71#endif
72
73#ifndef IEM_WITH_DATA_TLB
74# error The data TLB must be enabled for the recompiler.
75#endif
76
77#ifndef IEM_WITH_SETJMP
78# error The setjmp approach must be enabled for the recompiler.
79#endif
80
81#if defined(IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS) && !defined(IEMNATIVE_WITH_SIMD_REG_ALLOCATOR)
82# error "IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS requires IEMNATIVE_WITH_SIMD_REG_ALLOCATOR"
83#endif
84
85
86/*********************************************************************************************************************************
87* Code emitters for flushing pending guest register writes and sanity checks *
88*********************************************************************************************************************************/
89
90#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
91
92# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
93/**
94 * Updates IEMCPU::uPcUpdatingDebug.
95 */
96DECL_INLINE_THROW(uint32_t) iemNativeEmitPcDebugAdd(PIEMRECOMPILERSTATE pReNative, uint32_t off, int64_t offDisp, uint8_t cBits)
97{
98# ifdef RT_ARCH_AMD64
99 if (pReNative->Core.fDebugPcInitialized && cBits >= 32)
100 {
101 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
102 if ((int32_t)offDisp == offDisp || cBits != 64)
103 {
104 /* add [q]word [pVCpu->iem.s.uPcUpdatingDebug], imm32/imm8 */
105 if (cBits == 64)
106 pCodeBuf[off++] = X86_OP_REX_W;
107 pCodeBuf[off++] = (int8_t)offDisp == offDisp ? 0x83 : 0x81;
108 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
109 if ((int8_t)offDisp == offDisp)
110 pCodeBuf[off++] = (int8_t)offDisp;
111 else
112 {
113 *(int32_t *)&pCodeBuf[off] = (int32_t)offDisp;
114 off += sizeof(int32_t);
115 }
116 }
117 else
118 {
119 /* mov tmp0, imm64 */
120 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, IEMNATIVE_REG_FIXED_TMP0, offDisp);
121
122 /* add [pVCpu->iem.s.uPcUpdatingDebug], tmp0 */
123 if (cBits == 64)
124 pCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 >= 8 ? X86_OP_REX_R : 0);
125 else if (IEMNATIVE_REG_FIXED_TMP0 >= 8)
126 pCodeBuf[off++] = X86_OP_REX_R;
127 pCodeBuf[off++] = 0x01;
128 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, IEMNATIVE_REG_FIXED_TMP0 & 7,
129 RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
130 }
131 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
132 return off;
133 }
134# endif
135
136 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
137 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, RT_ARCH_VAL == RT_ARCH_VAL_AMD64 ? 32 : 12);
138
139 if (pReNative->Core.fDebugPcInitialized)
140 {
141 Log4(("uPcUpdatingDebug+=%ld cBits=%d off=%#x\n", offDisp, cBits, off));
142 off = iemNativeEmitLoadGprFromVCpuU64Ex(pCodeBuf, off, idxTmpReg, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
143 }
144 else
145 {
146 Log4(("uPcUpdatingDebug=rip+%ld cBits=%d off=%#x\n", offDisp, cBits, off));
147 pReNative->Core.fDebugPcInitialized = true;
148 off = iemNativeEmitLoadGprFromVCpuU64Ex(pCodeBuf, off, idxTmpReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
149 }
150
151 if (cBits == 64)
152 off = iemNativeEmitAddGprImmEx(pCodeBuf, off, idxTmpReg, offDisp, IEMNATIVE_REG_FIXED_TMP0);
153 else
154 {
155 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxTmpReg, (int32_t)offDisp, IEMNATIVE_REG_FIXED_TMP0);
156 if (cBits == 16)
157 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxTmpReg, UINT16_MAX);
158 }
159
160 off = iemNativeEmitStoreGprToVCpuU64Ex(pCodeBuf, off, idxTmpReg, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug),
161 IEMNATIVE_REG_FIXED_TMP0);
162
163 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
164 iemNativeRegFreeTmp(pReNative, idxTmpReg);
165 return off;
166}
167
168
169# elif defined(IEMNATIVE_REG_FIXED_PC_DBG)
170DECL_INLINE_THROW(uint32_t) iemNativePcAdjustCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
171{
172 /* Compare the shadow with the context value, they should match. */
173 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, IEMNATIVE_REG_FIXED_PC_DBG);
174 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, pReNative->Core.offPc);
175 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, kIemNativeGstReg_Pc);
176 return off;
177}
178# endif
179
180#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
181
182/**
183 * Flushes delayed write of a specific guest register.
184 *
185 * This must be called prior to calling CImpl functions and any helpers that use
186 * the guest state (like raising exceptions) and such.
187 *
188 * This optimization has not yet been implemented. The first target would be
189 * RIP updates, since these are the most common ones.
190 */
191DECL_INLINE_THROW(uint32_t)
192iemNativeRegFlushPendingSpecificWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREGREF enmClass, uint8_t idxReg)
193{
194#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
195 /* If for whatever reason it is possible to reference the PC register at some point we need to do the writeback here first. */
196#endif
197
198#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
199#if 0 /** @todo r=aeichner EFLAGS writeback delay. */
200 if ( enmClass == kIemNativeGstRegRef_EFlags
201 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(kIemNativeGstReg_EFlags))
202 off = iemNativeRegFlushPendingWrite(pReNative, off, kIemNativeGstReg_EFlags);
203#else
204 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(kIemNativeGstReg_EFlags)));
205#endif
206
207 if ( enmClass == kIemNativeGstRegRef_Gpr
208 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxReg))
209 off = iemNativeRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTREG_GPR(idxReg));
210#endif
211
212#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
213 if ( enmClass == kIemNativeGstRegRef_XReg
214 && pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxReg))
215 {
216 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxReg));
217 /* Flush the shadows as the register needs to be reloaded (there is no guarantee right now, that the referenced register doesn't change). */
218 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxReg];
219
220 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off);
221 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxReg)));
222 }
223#endif
224 RT_NOREF(pReNative, enmClass, idxReg);
225 return off;
226}
227
228
229
230/*********************************************************************************************************************************
231* Emitters for IEM_MC_BEGIN_EX and IEM_MC_END. *
232*********************************************************************************************************************************/
233
234#undef IEM_MC_BEGIN /* unused */
235#define IEM_MC_BEGIN_EX(a_fMcFlags, a_fCImplFlags, a_cArgsIncludingHidden) \
236 { \
237 Assert(pReNative->Core.bmVars == 0); \
238 Assert(pReNative->Core.u64ArgVars == UINT64_MAX); \
239 Assert(pReNative->Core.bmStack == 0); \
240 pReNative->fMc = (a_fMcFlags); \
241 pReNative->fCImpl = (a_fCImplFlags); \
242 pReNative->cArgsX = (a_cArgsIncludingHidden)
243
244/** We have to get to the end in recompilation mode, as otherwise we won't
245 * generate code for all the IEM_MC_IF_XXX branches. */
246#define IEM_MC_END() \
247 iemNativeVarFreeAll(pReNative); \
248 } return off
249
250
251
252/*********************************************************************************************************************************
253* Native Emitter Support. *
254*********************************************************************************************************************************/
255
256#define IEM_MC_NATIVE_IF(a_fSupportedHosts) if (RT_ARCH_VAL & (a_fSupportedHosts)) {
257
258#define IEM_MC_NATIVE_ELSE() } else {
259
260#define IEM_MC_NATIVE_ENDIF() } ((void)0)
261
262
263#define IEM_MC_NATIVE_EMIT_0(a_fnEmitter) \
264 off = a_fnEmitter(pReNative, off)
265
266#define IEM_MC_NATIVE_EMIT_1(a_fnEmitter, a0) \
267 off = a_fnEmitter(pReNative, off, (a0))
268
269#define IEM_MC_NATIVE_EMIT_2(a_fnEmitter, a0, a1) \
270 off = a_fnEmitter(pReNative, off, (a0), (a1))
271
272#define IEM_MC_NATIVE_EMIT_2_EX(a_fnEmitter, a0, a1) \
273 off = a_fnEmitter(pReNative, off, pCallEntry->idxInstr, (a0), (a1))
274
275#define IEM_MC_NATIVE_EMIT_3(a_fnEmitter, a0, a1, a2) \
276 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2))
277
278#define IEM_MC_NATIVE_EMIT_4(a_fnEmitter, a0, a1, a2, a3) \
279 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3))
280
281#define IEM_MC_NATIVE_EMIT_5(a_fnEmitter, a0, a1, a2, a3, a4) \
282 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4))
283
284#define IEM_MC_NATIVE_EMIT_6(a_fnEmitter, a0, a1, a2, a3, a4, a5) \
285 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5))
286
287#define IEM_MC_NATIVE_EMIT_7(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6) \
288 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6))
289
290#define IEM_MC_NATIVE_EMIT_8(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6, a7) \
291 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6), (a7))
292
293
294#ifndef RT_ARCH_AMD64
295# define IEM_MC_NATIVE_SET_AMD64_HOST_REG_FOR_LOCAL(a_VarNm, a_idxHostReg) ((void)0)
296#else
297/** @note This is a naive approach that ASSUMES that the register isn't
298 * allocated, so it only works safely for the first allocation(s) in
299 * a MC block. */
300# define IEM_MC_NATIVE_SET_AMD64_HOST_REG_FOR_LOCAL(a_VarNm, a_idxHostReg) \
301 off = iemNativeVarSetAmd64HostRegisterForLocal(pReNative, off, a_VarNm, a_idxHostReg)
302
303DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off);
304
305DECL_INLINE_THROW(uint32_t)
306iemNativeVarSetAmd64HostRegisterForLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t idxHstReg)
307{
308 Log12(("iemNativeVarSetAmd64HostRegisterForLocal: idxVar=%#x idxHstReg=%s (%#x) off=%#x\n", idxVar, g_apszIemNativeHstRegNames[idxHstReg], idxHstReg, off));
309 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
310 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg))); /* iemNativeVarRegisterSet does a throw/longjmp on this */
311
312# ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
313 /* Must flush the register if it hold pending writes. */
314 if ( (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
315 && (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows) )
316 off = iemNativeRegFlushDirtyGuest(pReNative, off, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
317# endif
318
319 iemNativeVarRegisterSet(pReNative, idxVar, idxHstReg, off);
320 return off;
321}
322
323#endif /* RT_ARCH_AMD64 */
324
325
326
327/*********************************************************************************************************************************
328* Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX) *
329*********************************************************************************************************************************/
330
331#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl) \
332 pReNative->fMc = 0; \
333 pReNative->fCImpl = (a_fFlags); \
334 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, \
335 a_cbInstr) /** @todo not used ... */
336
337
338#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
339 pReNative->fMc = 0; \
340 pReNative->fCImpl = (a_fFlags); \
341 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
342
343DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off,
344 uint8_t idxInstr, uint64_t a_fGstShwFlush,
345 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
346{
347 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
348}
349
350
351#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
352 pReNative->fMc = 0; \
353 pReNative->fCImpl = (a_fFlags); \
354 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
355 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
356
357DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off,
358 uint8_t idxInstr, uint64_t a_fGstShwFlush,
359 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
360{
361 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
362}
363
364
365#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
366 pReNative->fMc = 0; \
367 pReNative->fCImpl = (a_fFlags); \
368 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
369 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
370
371DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off,
372 uint8_t idxInstr, uint64_t a_fGstShwFlush,
373 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
374 uint64_t uArg2)
375{
376 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
377}
378
379
380
381/*********************************************************************************************************************************
382* Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX) *
383*********************************************************************************************************************************/
384
385/** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
386 * and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
387DECL_INLINE_THROW(uint32_t)
388iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
389{
390 /*
391 * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
392 * return with special status code and make the execution loop deal with
393 * this. If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
394 * exception and won't continue execution. While CPUMCTX_DBG_DBGF_MASK
395 * could continue w/o interruption, it probably will drop into the
396 * debugger, so not worth the effort of trying to services it here and we
397 * just lump it in with the handling of the others.
398 *
399 * To simplify the code and the register state management even more (wrt
400 * immediate in AND operation), we always update the flags and skip the
401 * extra check associated conditional jump.
402 */
403 AssertCompile( (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
404 <= UINT32_MAX);
405#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
406 AssertMsg( pReNative->idxCurCall == 0
407 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1],
408 IEMLIVENESSBIT_IDX_EFL_OTHER)),
409 ("Efl_Other - %u\n", iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1],
410 IEMLIVENESSBIT_IDX_EFL_OTHER)));
411#endif
412
413 /*
414 * As this code can break out of the execution loop when jumping to the ReturnWithFlags label
415 * any pending register writes must be flushed.
416 */
417 off = iemNativeRegFlushPendingWrites(pReNative, off);
418
419 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
420 kIemNativeGstRegUse_ForUpdate, false /*fNoVolatileRegs*/,
421 true /*fSkipLivenessAssert*/);
422 off = iemNativeEmitTestAnyBitsInGprAndTbExitIfAnySet(pReNative, off, idxEflReg,
423 X86_EFL_TF | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK,
424 kIemNativeLabelType_ReturnWithFlags);
425 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
426 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
427
428 /* Free but don't flush the EFLAGS register. */
429 iemNativeRegFreeTmp(pReNative, idxEflReg);
430
431 return off;
432}
433
434
435/** Helper for iemNativeEmitFinishInstructionWithStatus. */
436DECLINLINE(RTGCPHYS) iemNativeCallEntryToGCPhysPc(PCIEMTB pTb, PCIEMTHRDEDCALLENTRY pCallEntry)
437{
438 unsigned const offOpcodes = pCallEntry->offOpcode;
439 unsigned const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
440 for (unsigned idxRange = 0; idxRange < cRanges; idxRange++)
441 {
442 unsigned const offRange = offOpcodes - (unsigned)pTb->aRanges[idxRange].offOpcodes;
443 if (offRange < (unsigned)pTb->aRanges[idxRange].cbOpcodes)
444 return iemTbGetRangePhysPageAddr(pTb, idxRange) + offRange + pTb->aRanges[idxRange].offPhysPage;
445 }
446 AssertFailedReturn(NIL_RTGCPHYS);
447}
448
449
450/** The VINF_SUCCESS dummy. */
451template<int const a_rcNormal, bool const a_fIsJump>
452DECL_FORCE_INLINE_THROW(uint32_t)
453iemNativeEmitFinishInstructionWithStatus(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry,
454 int32_t const offJump)
455{
456 AssertCompile(a_rcNormal == VINF_SUCCESS || a_rcNormal == VINF_IEM_REEXEC_BREAK);
457 if (a_rcNormal != VINF_SUCCESS)
458 {
459#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
460 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
461#else
462 RT_NOREF_PV(pCallEntry);
463#endif
464
465 /* As this code returns from the TB any pending register writes must be flushed. */
466 off = iemNativeRegFlushPendingWrites(pReNative, off);
467
468 /*
469 * If we're in a conditional, mark the current branch as exiting so we
470 * can disregard its state when we hit the IEM_MC_ENDIF.
471 */
472 iemNativeMarkCurCondBranchAsExiting(pReNative);
473
474 /*
475 * Use the lookup table for getting to the next TB quickly.
476 * Note! In this code path there can only be one entry at present.
477 */
478 uint8_t const idxTbLookupFirst = IEM_TB_LOOKUP_TAB_GET_IDX(pCallEntry->uTbLookup);
479 PCIEMTB const pTbOrg = pReNative->pTbOrg;
480 Assert(idxTbLookupFirst < pTbOrg->cTbLookupEntries);
481 Assert(IEM_TB_LOOKUP_TAB_GET_SIZE(pCallEntry->uTbLookup) == 1);
482
483#if 0
484 /* Update IEMCPU::ppTbLookupEntryR3 to get the best lookup effect. */
485 PIEMTB * const ppTbLookupFirst = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTbOrg, idxTbLookupFirst);
486 Assert(IEM_TB_LOOKUP_TAB_GET_SIZE(pCallEntry->uTbLookup) == 1); /* large stuff later/never */
487 off = iemNativeEmitStoreImmToVCpuU64(pReNative, off, (uintptr_t)ppTbLookupFirst,
488 RT_UOFFSETOF(VMCPU, iem.s.ppTbLookupEntryR3));
489
490 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnBreak);
491
492#else
493 /* Load the index as argument #1 for the helper call at the given label. */
494 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxTbLookupFirst);
495
496 /*
497 * Figure out the physical address of the current instruction and see
498 * whether the next instruction we're about to execute is in the same
499 * page so we by can optimistically skip TLB loading.
500 *
501 * - This is safe for all cases in FLAT mode.
502 * - In segmentmented modes it is complicated, given that a negative
503 * jump may underflow EIP and a forward jump may overflow or run into
504 * CS.LIM and triggering a #GP. The only thing we can get away with
505 * now at compile time is forward jumps w/o CS.LIM checks, since the
506 * lack of CS.LIM checks means we're good for the entire physical page
507 * we're executing on and another 15 bytes before we run into CS.LIM.
508 */
509 if ( IEM_F_MODE_X86_IS_FLAT(pReNative->fExec)
510# if 0 /** @todo breaks on IP/EIP/RIP wraparound tests in bs3-cpu-weird-1. See also iemNativeHlpReturnBreakViaLookup. */
511 || !(pTbOrg->fFlags & IEMTB_F_CS_LIM_CHECKS)
512# endif
513 )
514 {
515 RTGCPHYS const GCPhysPcCurrent = iemNativeCallEntryToGCPhysPc(pTbOrg, pCallEntry);
516 RTGCPHYS const GCPhysPcNext = GCPhysPcCurrent + pCallEntry->cbOpcode + (int64_t)(a_fIsJump ? offJump : 0);
517 if ( (GCPhysPcNext >> GUEST_PAGE_SHIFT) == (GCPhysPcCurrent >> GUEST_PAGE_SHIFT)
518 && GUEST_PAGE_SIZE - (GCPhysPcCurrent & GUEST_PAGE_OFFSET_MASK) >= pCallEntry->cbOpcode /* 0xfff: je -56h */ )
519
520 {
521 /* Load the next GCPhysPc into the 3rd argument for the helper call. */
522 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, GCPhysPcNext);
523
524 /* Load the key lookup flags into the 2nd argument for the helper call.
525 - This is safe wrt CS limit checking since we're only here for FLAT modes.
526 - ASSUMING that this isn't a STI or POPF instruction, we can exclude any
527 interrupt shadow.
528 - The NMI inhibiting is more questionable, though... */
529 /** @todo We don't implement NMI blocking atm, except via VT-x/AMD-V.
530 * Should we copy it into fExec to simplify this? OTOH, it's just a
531 * couple of extra instructions if EFLAGS are already in a register. */
532 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG,
533 (pReNative->fExec & IEMTB_F_KEY_MASK) | IEMTB_F_TYPE_NATIVE);
534
535 if (pReNative->idxLastCheckIrqCallNo != UINT32_MAX)
536 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnBreakViaLookup);
537 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnBreakViaLookupWithIrq);
538 }
539 }
540 if (pReNative->idxLastCheckIrqCallNo != UINT32_MAX)
541 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnBreakViaLookupWithTlb);
542 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnBreakViaLookupWithTlbAndIrq);
543#endif
544 }
545 return off;
546}
547
548
549#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr, a_rcNormal) \
550 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
551 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
552
553#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr, a_rcNormal) \
554 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
555 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
556 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
557
558/** Same as iemRegAddToRip64AndFinishingNoFlags. */
559DECL_INLINE_THROW(uint32_t)
560iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
561{
562#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
563# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
564 if (!pReNative->Core.offPc)
565 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
566# endif
567
568 /* Allocate a temporary PC register. */
569 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
570
571 /* Perform the addition and store the result. */
572 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
573 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
574
575 /* Free but don't flush the PC register. */
576 iemNativeRegFreeTmp(pReNative, idxPcReg);
577#endif
578
579#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
580 pReNative->Core.offPc += cbInstr;
581 Log4(("offPc=%#RX64 cbInstr=%#x off=%#x\n", pReNative->Core.offPc, cbInstr, off));
582# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
583 off = iemNativeEmitPcDebugAdd(pReNative, off, cbInstr, 64);
584 off = iemNativeEmitPcDebugCheck(pReNative, off);
585# elif defined(IEMNATIVE_REG_FIXED_PC_DBG)
586 off = iemNativePcAdjustCheck(pReNative, off);
587# endif
588 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
589#endif
590
591 return off;
592}
593
594
595#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr, a_rcNormal) \
596 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
597 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
598
599#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr, a_rcNormal) \
600 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
601 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
602 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
603
604/** Same as iemRegAddToEip32AndFinishingNoFlags. */
605DECL_INLINE_THROW(uint32_t)
606iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
607{
608#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
609# ifdef IEMNATIVE_REG_FIXED_PC_DBG
610 if (!pReNative->Core.offPc)
611 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
612# endif
613
614 /* Allocate a temporary PC register. */
615 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
616
617 /* Perform the addition and store the result. */
618 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
619 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
620
621 /* Free but don't flush the PC register. */
622 iemNativeRegFreeTmp(pReNative, idxPcReg);
623#endif
624
625#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
626 pReNative->Core.offPc += cbInstr;
627 Log4(("offPc=%#RX64 cbInstr=%#x off=%#x\n", pReNative->Core.offPc, cbInstr, off));
628# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
629 off = iemNativeEmitPcDebugAdd(pReNative, off, cbInstr, 32);
630 off = iemNativeEmitPcDebugCheck(pReNative, off);
631# elif defined(IEMNATIVE_REG_FIXED_PC_DBG)
632 off = iemNativePcAdjustCheck(pReNative, off);
633# endif
634 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
635#endif
636
637 return off;
638}
639
640
641#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr, a_rcNormal) \
642 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
643 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
644
645#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr, a_rcNormal) \
646 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
647 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
648 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
649
650/** Same as iemRegAddToIp16AndFinishingNoFlags. */
651DECL_INLINE_THROW(uint32_t)
652iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
653{
654#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
655# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
656 if (!pReNative->Core.offPc)
657 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
658# endif
659
660 /* Allocate a temporary PC register. */
661 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
662
663 /* Perform the addition and store the result. */
664 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
665 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
666 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
667
668 /* Free but don't flush the PC register. */
669 iemNativeRegFreeTmp(pReNative, idxPcReg);
670#endif
671
672#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
673 pReNative->Core.offPc += cbInstr;
674 Log4(("offPc=%#RX64 cbInstr=%#x off=%#x\n", pReNative->Core.offPc, cbInstr, off));
675# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
676 off = iemNativeEmitPcDebugAdd(pReNative, off, cbInstr, 16);
677 off = iemNativeEmitPcDebugCheck(pReNative, off);
678# elif defined(IEMNATIVE_REG_FIXED_PC_DBG)
679 off = iemNativePcAdjustCheck(pReNative, off);
680# endif
681 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
682#endif
683
684 return off;
685}
686
687
688/*********************************************************************************************************************************
689* Common code for changing PC/RIP/EIP/IP. *
690*********************************************************************************************************************************/
691
692/**
693 * Emits code to check if the content of @a idxAddrReg is a canonical address,
694 * raising a \#GP(0) if it isn't.
695 *
696 * @returns New code buffer offset, UINT32_MAX on failure.
697 * @param pReNative The native recompile state.
698 * @param off The code buffer offset.
699 * @param idxAddrReg The host register with the address to check.
700 * @param idxInstr The current instruction.
701 */
702DECL_FORCE_INLINE_THROW(uint32_t)
703iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
704{
705 /*
706 * Make sure we don't have any outstanding guest register writes as we may
707 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
708 */
709 off = iemNativeRegFlushPendingWrites(pReNative, off);
710
711#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
712 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
713#else
714 RT_NOREF(idxInstr);
715#endif
716
717#ifdef RT_ARCH_AMD64
718 /*
719 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
720 * return raisexcpt();
721 * ---- this variant avoid loading a 64-bit immediate, but is an instruction longer.
722 */
723 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
724
725 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
726 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
727 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
728 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
729 off = iemNativeEmitJnzTbExit(pReNative, off, kIemNativeLabelType_RaiseGp0);
730
731 iemNativeRegFreeTmp(pReNative, iTmpReg);
732
733#elif defined(RT_ARCH_ARM64)
734 /*
735 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
736 * return raisexcpt();
737 * ----
738 * mov x1, 0x800000000000
739 * add x1, x0, x1
740 * cmp xzr, x1, lsr 48
741 * b.ne .Lraisexcpt
742 */
743 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
744
745 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
746 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
747 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
748 off = iemNativeEmitJnzTbExit(pReNative, off, kIemNativeLabelType_RaiseGp0);
749
750 iemNativeRegFreeTmp(pReNative, iTmpReg);
751
752#else
753# error "Port me"
754#endif
755 return off;
756}
757
758
759/**
760 * Emits code to check if the content of @a idxAddrReg is a canonical address,
761 * raising a \#GP(0) if it isn't.
762 *
763 * Caller makes sure everything is flushed, except maybe PC.
764 *
765 * @returns New code buffer offset, UINT32_MAX on failure.
766 * @param pReNative The native recompile state.
767 * @param off The code buffer offset.
768 * @param idxAddrReg The host register with the address to check.
769 * @param offDisp The relative displacement that has already been
770 * added to idxAddrReg and must be subtracted if
771 * raising a \#GP(0).
772 * @param idxInstr The current instruction.
773 */
774DECL_FORCE_INLINE_THROW(uint32_t)
775iemNativeEmitCheckGprCanonicalMaybeRaiseGp0WithDisp(PIEMRECOMPILERSTATE pReNative, uint32_t off,
776 uint8_t idxAddrReg, int64_t offDisp, uint8_t idxInstr)
777{
778#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
779 Assert(pReNative->Core.bmGstRegShadowDirty == 0);
780#endif
781
782#ifdef RT_ARCH_AMD64
783 /*
784 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
785 * return raisexcpt();
786 * ---- this variant avoid loading a 64-bit immediate, but is an instruction longer.
787 */
788 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
789
790 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
791 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
792 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
793 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
794
795#elif defined(RT_ARCH_ARM64)
796 /*
797 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
798 * return raisexcpt();
799 * ----
800 * mov x1, 0x800000000000
801 * add x1, x0, x1
802 * cmp xzr, x1, lsr 48
803 * b.ne .Lraisexcpt
804 */
805 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
806
807 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
808 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
809 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
810#else
811# error "Port me"
812#endif
813
814 /* Jump to the #GP code (hoping static prediction considers forward branches as not-taken). */
815 uint32_t const offFixup1 = off;
816 off = iemNativeEmitJnzToFixed(pReNative, off, off /*8-bit jump suffices*/);
817
818 /* jump .Lnoexcept; Skip the #GP code. */
819 uint32_t const offFixup2 = off;
820 off = iemNativeEmitJmpToFixed(pReNative, off, off /*8-bit jump suffices*/);
821
822 /* .Lraisexcpt: */
823 iemNativeFixupFixedJump(pReNative, offFixup1, off);
824#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
825 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr), iTmpReg);
826#else
827 RT_NOREF(idxInstr);
828#endif
829
830 /* Undo the PC adjustment and store the old PC value. */
831 off = iemNativeEmitSubGprImm(pReNative, off, idxAddrReg, offDisp, iTmpReg);
832 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxAddrReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
833
834 off = iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_RaiseGp0, false /*fActuallyExitingTb*/);
835
836 /* .Lnoexcept: */
837 iemNativeFixupFixedJump(pReNative, offFixup2, off);
838
839 iemNativeRegFreeTmp(pReNative, iTmpReg);
840 return off;
841}
842
843
844/**
845 * Emits code to check if the content of @a idxAddrReg is a canonical address,
846 * raising a \#GP(0) if it isn't.
847 *
848 * Caller makes sure everything is flushed, except maybe PC.
849 *
850 * @returns New code buffer offset, UINT32_MAX on failure.
851 * @param pReNative The native recompile state.
852 * @param off The code buffer offset.
853 * @param idxAddrReg The host register with the address to check.
854 * @param idxOldPcReg Register holding the old PC that offPc is relative
855 * to if available, otherwise UINT8_MAX.
856 * @param idxInstr The current instruction.
857 */
858DECL_FORCE_INLINE_THROW(uint32_t)
859iemNativeEmitCheckGprCanonicalMaybeRaiseGp0WithOldPc(PIEMRECOMPILERSTATE pReNative, uint32_t off,
860 uint8_t idxAddrReg, uint8_t idxOldPcReg, uint8_t idxInstr)
861{
862#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
863 Assert(pReNative->Core.bmGstRegShadowDirty == 0);
864#endif
865
866#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
867# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
868 if (!pReNative->Core.offPc)
869# endif
870 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
871#else
872 RT_NOREF(idxInstr);
873#endif
874
875#ifdef RT_ARCH_AMD64
876 /*
877 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
878 * return raisexcpt();
879 * ---- this variant avoid loading a 64-bit immediate, but is an instruction longer.
880 */
881 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
882
883 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
884 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
885 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
886 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
887
888#elif defined(RT_ARCH_ARM64)
889 /*
890 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
891 * return raisexcpt();
892 * ----
893 * mov x1, 0x800000000000
894 * add x1, x0, x1
895 * cmp xzr, x1, lsr 48
896 * b.ne .Lraisexcpt
897 */
898 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
899
900 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
901 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
902 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
903#else
904# error "Port me"
905#endif
906
907#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
908 if (pReNative->Core.offPc)
909 {
910 /** @todo On x86, it is said that conditional jumps forward are statically
911 * predicited as not taken, so this isn't a very good construct.
912 * Investigate whether it makes sense to invert it and add another
913 * jump. Also, find out wtf the static predictor does here on arm! */
914 uint32_t const offFixup = off;
915 off = iemNativeEmitJzToFixed(pReNative, off, off + 16 /*8-bit suffices*/);
916
917 /* .Lraisexcpt: */
918# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
919 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr), iTmpReg);
920# endif
921 /* We need to update cpum.GstCtx.rip. */
922 if (idxOldPcReg == UINT8_MAX)
923 {
924 idxOldPcReg = iTmpReg;
925 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxOldPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
926 }
927 off = iemNativeEmitAddGprImm(pReNative, off, idxOldPcReg, pReNative->Core.offPc);
928 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxOldPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
929
930 off = iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_RaiseGp0, false /*fActuallyExitingTb*/);
931 iemNativeFixupFixedJump(pReNative, offFixup, off);
932 }
933 else
934#endif
935 off = iemNativeEmitJnzTbExit(pReNative, off, kIemNativeLabelType_RaiseGp0);
936
937 iemNativeRegFreeTmp(pReNative, iTmpReg);
938
939 return off;
940}
941
942
943/**
944 * Emits code to check if that the content of @a idxAddrReg is within the limit
945 * of CS, raising a \#GP(0) if it isn't.
946 *
947 * @returns New code buffer offset; throws VBox status code on error.
948 * @param pReNative The native recompile state.
949 * @param off The code buffer offset.
950 * @param idxAddrReg The host register (32-bit) with the address to
951 * check.
952 * @param idxInstr The current instruction.
953 */
954DECL_FORCE_INLINE_THROW(uint32_t)
955iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
956 uint8_t idxAddrReg, uint8_t idxInstr)
957{
958 /*
959 * Make sure we don't have any outstanding guest register writes as we may
960 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
961 */
962 off = iemNativeRegFlushPendingWrites(pReNative, off);
963
964#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
965 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
966#else
967 RT_NOREF(idxInstr);
968#endif
969
970 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
971 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
972 kIemNativeGstRegUse_ReadOnly);
973
974 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
975 off = iemNativeEmitJaTbExit(pReNative, off, kIemNativeLabelType_RaiseGp0);
976
977 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
978 return off;
979}
980
981
982
983
984/**
985 * Emits code to check if that the content of @a idxAddrReg is within the limit
986 * of CS, raising a \#GP(0) if it isn't.
987 *
988 * Caller makes sure everything is flushed, except maybe PC.
989 *
990 * @returns New code buffer offset; throws VBox status code on error.
991 * @param pReNative The native recompile state.
992 * @param off The code buffer offset.
993 * @param idxAddrReg The host register (32-bit) with the address to
994 * check.
995 * @param idxOldPcReg Register holding the old PC that offPc is relative
996 * to if available, otherwise UINT8_MAX.
997 * @param idxInstr The current instruction.
998 */
999DECL_FORCE_INLINE_THROW(uint32_t)
1000iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0WithOldPc(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1001 uint8_t idxAddrReg, uint8_t idxOldPcReg, uint8_t idxInstr)
1002{
1003#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
1004 Assert(pReNative->Core.bmGstRegShadowDirty == 0);
1005#endif
1006
1007#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1008# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1009 if (!pReNative->Core.offPc)
1010# endif
1011 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1012#else
1013 RT_NOREF(idxInstr);
1014#endif
1015
1016 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
1017 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
1018 kIemNativeGstRegUse_ReadOnly);
1019
1020 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
1021#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1022 if (pReNative->Core.offPc)
1023 {
1024 uint32_t const offFixup = off;
1025 off = iemNativeEmitJbeToFixed(pReNative, off, off + 16 /*8-bit suffices*/);
1026
1027 /* Raising a GP(0), but first we need to update cpum.GstCtx.rip. */
1028 if (idxOldPcReg == UINT8_MAX)
1029 {
1030 idxOldPcReg = idxAddrReg;
1031 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxOldPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1032 }
1033 off = iemNativeEmitAddGprImm(pReNative, off, idxOldPcReg, pReNative->Core.offPc);
1034 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxOldPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1035# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1036 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1037# endif
1038 off = iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_RaiseGp0, false /*fActuallyExitingTb*/);
1039 iemNativeFixupFixedJump(pReNative, offFixup, off);
1040 }
1041 else
1042#endif
1043 off = iemNativeEmitJaTbExit(pReNative, off, kIemNativeLabelType_RaiseGp0);
1044
1045 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
1046 return off;
1047}
1048
1049
1050/*********************************************************************************************************************************
1051* Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX). *
1052*********************************************************************************************************************************/
1053
1054#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1055 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1056 (a_enmEffOpSize), pCallEntry->idxInstr); \
1057 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1058
1059#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1060 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1061 (a_enmEffOpSize), pCallEntry->idxInstr); \
1062 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1063 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1064
1065#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr, a_rcNormal) \
1066 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1067 IEMMODE_16BIT, pCallEntry->idxInstr); \
1068 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1069
1070#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
1071 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1072 IEMMODE_16BIT, pCallEntry->idxInstr); \
1073 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1074 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1075
1076#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr, a_rcNormal) \
1077 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (a_i32), \
1078 IEMMODE_64BIT, pCallEntry->idxInstr); \
1079 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1080
1081#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
1082 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (a_i32), \
1083 IEMMODE_64BIT, pCallEntry->idxInstr); \
1084 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1085 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1086
1087
1088#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_INTRAPG(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1089 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1090 (a_enmEffOpSize), pCallEntry->idxInstr); \
1091 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1092
1093#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_INTRAPG_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1094 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1095 (a_enmEffOpSize), pCallEntry->idxInstr); \
1096 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1097 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1098
1099#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_INTRAPG(a_i16, a_cbInstr, a_rcNormal) \
1100 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1101 IEMMODE_16BIT, pCallEntry->idxInstr); \
1102 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1103
1104#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_INTRAPG_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
1105 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1106 IEMMODE_16BIT, pCallEntry->idxInstr); \
1107 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1108 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1109
1110#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_INTRAPG(a_i32, a_cbInstr, a_rcNormal) \
1111 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (a_i32), \
1112 IEMMODE_64BIT, pCallEntry->idxInstr); \
1113 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1114
1115#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_INTRAPG_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
1116 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (a_i32), \
1117 IEMMODE_64BIT, pCallEntry->idxInstr); \
1118 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1119 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1120
1121/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
1122 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
1123 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
1124template<bool const a_fWithinPage>
1125DECL_INLINE_THROW(uint32_t)
1126iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
1127 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
1128{
1129 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
1130#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1131 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1132 if (a_fWithinPage && enmEffOpSize == IEMMODE_64BIT)
1133 {
1134 /* No #GP checking required, just update offPc and get on with it. */
1135 pReNative->Core.offPc += (int64_t)offDisp + cbInstr;
1136# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1137 off = iemNativeEmitPcDebugAdd(pReNative, off, (int64_t)offDisp + cbInstr, enmEffOpSize == IEMMODE_64BIT ? 64 : 16);
1138# endif
1139 }
1140 else
1141#endif
1142 {
1143 /* Flush all but PC iff we're doing a 64-bit update here and this isn't within a page.. */
1144 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT && !a_fWithinPage))
1145 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc) /*fGstShwExcept*/);
1146
1147 /* Allocate a temporary PC register. */
1148 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1149 kIemNativeGstRegUse_ForUpdate);
1150
1151 /* Perform the addition. */
1152 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr + pReNative->Core.offPc);
1153
1154 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
1155 {
1156 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't.
1157 We can skip this if the target is within the same page. */
1158 if (!a_fWithinPage)
1159 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0WithDisp(pReNative, off, idxPcReg,
1160 (int64_t)offDisp + cbInstr, idxInstr);
1161 }
1162 else
1163 {
1164 /* Just truncate the result to 16-bit IP. */
1165 Assert(enmEffOpSize == IEMMODE_16BIT);
1166 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
1167 }
1168
1169#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1170# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1171 off = iemNativeEmitPcDebugAdd(pReNative, off, (int64_t)offDisp + cbInstr, enmEffOpSize == IEMMODE_64BIT ? 64 : 16);
1172 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
1173# endif
1174 /* Since we've already got the new PC value in idxPcReg, we can just as
1175 well write it out and reset offPc to zero. Otherwise, we'd need to use
1176 a copy the shadow PC, which will cost another move instruction here. */
1177# if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || defined(LOG_ENABLED) || defined(VBOX_WITH_STATISTICS)
1178 uint8_t const idxOldInstrPlusOne = pReNative->idxInstrPlusOneOfLastPcUpdate;
1179 pReNative->idxInstrPlusOneOfLastPcUpdate = RT_MAX(idxInstr + 1, idxOldInstrPlusOne);
1180 uint8_t const cInstrsSkipped = idxInstr <= idxOldInstrPlusOne ? 0 : idxInstr - idxOldInstrPlusOne;
1181 Log4(("iemNativeEmitRip64RelativeJumpAndFinishingNoFlags: offPc=%#RX64 -> 0; off=%#x; idxInstr=%u cInstrsSkipped=%u cCondDepth=%d\n",
1182 pReNative->Core.offPc, off, idxInstr, cInstrsSkipped, pReNative->cCondDepth));
1183 STAM_COUNTER_ADD(&pReNative->pVCpu->iem.s.StatNativePcUpdateDelayed, cInstrsSkipped);
1184# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1185 iemNativeDbgInfoAddNativeOffset(pReNative, off);
1186 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc, cInstrsSkipped);
1187# endif
1188# endif
1189 pReNative->Core.offPc = 0;
1190#endif
1191
1192 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1193
1194 /* Free but don't flush the PC register. */
1195 iemNativeRegFreeTmp(pReNative, idxPcReg);
1196 }
1197 return off;
1198}
1199
1200
1201#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1202 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1203 (a_enmEffOpSize), pCallEntry->idxInstr); \
1204 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1205
1206#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1207 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1208 (a_enmEffOpSize), pCallEntry->idxInstr); \
1209 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1210 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1211
1212#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr, a_rcNormal) \
1213 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1214 IEMMODE_16BIT, pCallEntry->idxInstr); \
1215 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1216
1217#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
1218 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1219 IEMMODE_16BIT, pCallEntry->idxInstr); \
1220 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1221 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1222
1223#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr, a_rcNormal) \
1224 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (a_i32), \
1225 IEMMODE_32BIT, pCallEntry->idxInstr); \
1226 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1227
1228#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
1229 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (a_i32), \
1230 IEMMODE_32BIT, pCallEntry->idxInstr); \
1231 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1232 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1233
1234
1235#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_FLAT(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1236 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1237 (a_enmEffOpSize), pCallEntry->idxInstr); \
1238 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1239
1240#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_FLAT_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1241 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1242 (a_enmEffOpSize), pCallEntry->idxInstr); \
1243 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1244 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1245
1246#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_FLAT(a_i16, a_cbInstr, a_rcNormal) \
1247 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1248 IEMMODE_16BIT, pCallEntry->idxInstr); \
1249 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1250
1251#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_FLAT_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
1252 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1253 IEMMODE_16BIT, pCallEntry->idxInstr); \
1254 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1255 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1256
1257#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_FLAT(a_i32, a_cbInstr, a_rcNormal) \
1258 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (a_i32), \
1259 IEMMODE_32BIT, pCallEntry->idxInstr); \
1260 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1261
1262#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_FLAT_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
1263 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (a_i32), \
1264 IEMMODE_32BIT, pCallEntry->idxInstr); \
1265 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1266 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1267
1268/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
1269 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
1270 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
1271template<bool const a_fFlat>
1272DECL_INLINE_THROW(uint32_t)
1273iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
1274 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
1275{
1276 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
1277#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1278 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1279#endif
1280
1281 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1282 if (!a_fFlat || enmEffOpSize == IEMMODE_16BIT)
1283 {
1284 off = iemNativeRegFlushPendingWrites(pReNative, off);
1285#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1286 Assert(pReNative->Core.offPc == 0);
1287#endif
1288 }
1289
1290 /* Allocate a temporary PC register. */
1291 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
1292
1293 /* Perform the addition. */
1294#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1295 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr + (int32_t)pReNative->Core.offPc);
1296#else
1297 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr + (int32_t)pReNative->Core.offPc);
1298#endif
1299
1300 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
1301 if (enmEffOpSize == IEMMODE_16BIT)
1302 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
1303
1304 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
1305 if (!a_fFlat)
1306 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
1307
1308 /* Commit it. */
1309#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1310 off = iemNativeEmitPcDebugAdd(pReNative, off, offDisp + cbInstr, enmEffOpSize == IEMMODE_32BIT ? 32 : 16);
1311 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
1312#endif
1313
1314 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1315#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1316 pReNative->Core.offPc = 0;
1317#endif
1318
1319 /* Free but don't flush the PC register. */
1320 iemNativeRegFreeTmp(pReNative, idxPcReg);
1321
1322 return off;
1323}
1324
1325
1326#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr, a_rcNormal) \
1327 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
1328 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1329
1330#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr, a_rcNormal) \
1331 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
1332 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1333 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1334
1335#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr, a_rcNormal) \
1336 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
1337 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1338
1339#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
1340 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
1341 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1342 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1343
1344#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr, a_rcNormal) \
1345 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
1346 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, a_i32)
1347
1348#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
1349 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
1350 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1351 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, a_i32)
1352
1353/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
1354DECL_INLINE_THROW(uint32_t)
1355iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1356 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
1357{
1358 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1359 off = iemNativeRegFlushPendingWrites(pReNative, off);
1360
1361#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1362 Assert(pReNative->Core.offPc == 0);
1363 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1364#endif
1365
1366 /* Allocate a temporary PC register. */
1367 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
1368
1369 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
1370 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
1371 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
1372 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
1373#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1374 off = iemNativeEmitPcDebugAdd(pReNative, off, offDisp + cbInstr, 16);
1375 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
1376#endif
1377 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1378
1379 /* Free but don't flush the PC register. */
1380 iemNativeRegFreeTmp(pReNative, idxPcReg);
1381
1382 return off;
1383}
1384
1385
1386
1387/*********************************************************************************************************************************
1388* Emitters for changing PC/RIP/EIP/IP with a indirect jump (IEM_MC_SET_RIP_UXX_AND_FINISH). *
1389*********************************************************************************************************************************/
1390
1391/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets. */
1392#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP) \
1393 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1394
1395/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets. */
1396#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP) \
1397 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1398
1399/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code. */
1400#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP) \
1401 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1402
1403/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets that checks and
1404 * clears flags. */
1405#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP) \
1406 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP); \
1407 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1408
1409/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets that checks and
1410 * clears flags. */
1411#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP) \
1412 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP); \
1413 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1414
1415/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code that checks and
1416 * clears flags. */
1417#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16NewIP) \
1418 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP); \
1419 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1420
1421#undef IEM_MC_SET_RIP_U16_AND_FINISH
1422
1423
1424/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets. */
1425#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP) \
1426 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
1427
1428/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code. */
1429#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP) \
1430 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
1431
1432/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets that checks and
1433 * clears flags. */
1434#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP) \
1435 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP); \
1436 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1437
1438/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code that checks
1439 * and clears flags. */
1440#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u32NewEIP) \
1441 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP); \
1442 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1443
1444#undef IEM_MC_SET_RIP_U32_AND_FINISH
1445
1446
1447/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code. */
1448#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP) \
1449 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u64NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
1450
1451/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code that checks
1452 * and clears flags. */
1453#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewEIP) \
1454 IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP); \
1455 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1456
1457#undef IEM_MC_SET_RIP_U64_AND_FINISH
1458
1459
1460/** Same as iemRegRipJumpU16AndFinishNoFlags,
1461 * iemRegRipJumpU32AndFinishNoFlags and iemRegRipJumpU64AndFinishNoFlags. */
1462DECL_INLINE_THROW(uint32_t)
1463iemNativeEmitRipJumpNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarPc, bool f64Bit,
1464 uint8_t idxInstr, uint8_t cbVar)
1465{
1466 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
1467 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
1468
1469 /* If we can't rule out a #GP(0) below, flush all dirty register except for
1470 PC which will be handled specially by the two workers below if they raise a GP. */
1471 bool const fMayRaiseGp0 = (f64Bit && cbVar > sizeof(uint32_t)) || (!f64Bit && !IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
1472 uint8_t const idxOldPcReg = fMayRaiseGp0
1473 ? iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off, kIemNativeGstReg_Pc)
1474 : UINT8_MAX;
1475 if (fMayRaiseGp0)
1476 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc) /*fGstShwExcept*/);
1477
1478 /* Get a register with the new PC loaded from idxVarPc.
1479 Note! This ASSUMES that the high bits of the GPR is zeroed. */
1480 uint8_t const idxPcReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxVarPc, kIemNativeGstReg_Pc, &off);
1481
1482 /* Check that the target is within CS.LIM / is canonical (may #GP(0) + exit TB). */
1483 if (fMayRaiseGp0)
1484 {
1485 if (f64Bit)
1486 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0WithOldPc(pReNative, off, idxPcReg, idxOldPcReg, idxInstr);
1487 else
1488 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0WithOldPc(pReNative, off, idxPcReg, idxOldPcReg, idxInstr);
1489 }
1490
1491 /* Store the result. */
1492 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1493
1494#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1495 pReNative->Core.offPc = 0;
1496 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1497# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1498 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
1499 pReNative->Core.fDebugPcInitialized = true;
1500 Log4(("uPcUpdatingDebug=rip off=%#x\n", off));
1501# endif
1502#endif
1503
1504 if (idxOldPcReg != UINT8_MAX)
1505 iemNativeRegFreeTmp(pReNative, idxOldPcReg);
1506 iemNativeVarRegisterRelease(pReNative, idxVarPc);
1507 /** @todo implictly free the variable? */
1508
1509 return off;
1510}
1511
1512
1513
1514/*********************************************************************************************************************************
1515* Emitters for changing PC/RIP/EIP/IP with a relative call jump (IEM_MC_IND_CALL_UXX_AND_FINISH) (requires stack emmiters). *
1516*********************************************************************************************************************************/
1517
1518/** @todo These helpers belong to the stack push API naturally but we already need them up here (we could of course move
1519 * this below the stack emitters but then this is not close to the rest of the PC/RIP handling...). */
1520DECL_FORCE_INLINE_THROW(uint32_t)
1521iemNativeEmitStackPushUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
1522{
1523 /* Use16BitSp: */
1524#ifdef RT_ARCH_AMD64
1525 off = iemNativeEmitSubGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
1526 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
1527#else
1528 /* sub regeff, regrsp, #cbMem */
1529 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegEffSp, idxRegRsp, cbMem, false /*f64Bit*/);
1530 /* and regeff, regeff, #0xffff */
1531 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
1532 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegEffSp, idxRegEffSp, 15, 0, false /*f64Bit*/);
1533 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
1534 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegEffSp, 0, 16, false /*f64Bit*/);
1535#endif
1536 return off;
1537}
1538
1539
1540DECL_FORCE_INLINE(uint32_t)
1541iemNativeEmitStackPushUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
1542{
1543 /* Use32BitSp: */
1544 off = iemNativeEmitSubGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
1545 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
1546 return off;
1547}
1548
1549
1550DECL_INLINE_THROW(uint32_t)
1551iemNativeEmitStackPushRip(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const idxRegPc,
1552 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
1553{
1554 /*
1555 * Assert sanity.
1556 */
1557#ifdef VBOX_STRICT
1558 if (RT_BYTE2(cBitsVarAndFlat) != 0)
1559 {
1560 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
1561 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
1562 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
1563 Assert( pfnFunction
1564 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
1565 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
1566 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
1567 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
1568 : UINT64_C(0xc000b000a0009000) ));
1569 }
1570 else
1571 Assert( pfnFunction
1572 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
1573 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
1574 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
1575 : UINT64_C(0xc000b000a0009000) ));
1576#endif
1577
1578#ifdef VBOX_STRICT
1579 /*
1580 * Check that the fExec flags we've got make sense.
1581 */
1582 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
1583#endif
1584
1585 /*
1586 * To keep things simple we have to commit any pending writes first as we
1587 * may end up making calls.
1588 */
1589 /** @todo we could postpone this till we make the call and reload the
1590 * registers after returning from the call. Not sure if that's sensible or
1591 * not, though. */
1592 off = iemNativeRegFlushPendingWrites(pReNative, off);
1593
1594 /*
1595 * First we calculate the new RSP and the effective stack pointer value.
1596 * For 64-bit mode and flat 32-bit these two are the same.
1597 * (Code structure is very similar to that of PUSH)
1598 */
1599 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
1600 bool const fIsSegReg = RT_BYTE3(cBitsVarAndFlat) != 0;
1601 bool const fIsIntelSeg = fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
1602 uint8_t const cbMemAccess = !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
1603 ? cbMem : sizeof(uint16_t);
1604 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
1605 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
1606 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
1607 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
1608 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
1609 if (cBitsFlat != 0)
1610 {
1611 Assert(idxRegEffSp == idxRegRsp);
1612 Assert(cBitsFlat == 32 || cBitsFlat == 64);
1613 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
1614 if (cBitsFlat == 64)
1615 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
1616 else
1617 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
1618 }
1619 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
1620 {
1621 Assert(idxRegEffSp != idxRegRsp);
1622 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
1623 kIemNativeGstRegUse_ReadOnly);
1624#ifdef RT_ARCH_AMD64
1625 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1626#else
1627 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1628#endif
1629 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
1630 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
1631 offFixupJumpToUseOtherBitSp = off;
1632 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
1633 {
1634 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
1635 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1636 }
1637 else
1638 {
1639 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
1640 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1641 }
1642 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1643 }
1644 /* SpUpdateEnd: */
1645 uint32_t const offLabelSpUpdateEnd = off;
1646
1647 /*
1648 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
1649 * we're skipping lookup).
1650 */
1651 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
1652 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
1653 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
1654 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
1655 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
1656 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
1657 : UINT32_MAX;
1658 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
1659
1660
1661 if (!TlbState.fSkip)
1662 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
1663 else
1664 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
1665
1666 /*
1667 * Use16BitSp:
1668 */
1669 if (cBitsFlat == 0)
1670 {
1671#ifdef RT_ARCH_AMD64
1672 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1673#else
1674 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1675#endif
1676 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
1677 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
1678 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1679 else
1680 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1681 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
1682 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1683 }
1684
1685 /*
1686 * TlbMiss:
1687 *
1688 * Call helper to do the pushing.
1689 */
1690 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
1691
1692#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1693 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1694#else
1695 RT_NOREF(idxInstr);
1696#endif
1697
1698 /* Save variables in volatile registers. */
1699 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
1700 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
1701 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
1702 | (RT_BIT_32(idxRegPc));
1703 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
1704
1705 if ( idxRegPc == IEMNATIVE_CALL_ARG1_GREG
1706 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
1707 {
1708 /* Swap them using ARG0 as temp register: */
1709 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
1710 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
1711 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
1712 }
1713 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
1714 {
1715 /* IEMNATIVE_CALL_ARG2_GREG = idxRegPc (first!) */
1716 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxRegPc);
1717
1718 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
1719 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
1720 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
1721 }
1722 else
1723 {
1724 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
1725 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
1726
1727 /* IEMNATIVE_CALL_ARG2_GREG = idxRegPc */
1728 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxRegPc);
1729 }
1730
1731 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
1732 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
1733
1734 /* Done setting up parameters, make the call. */
1735 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
1736
1737 /* Restore variables and guest shadow registers to volatile registers. */
1738 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
1739 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
1740
1741#ifdef IEMNATIVE_WITH_TLB_LOOKUP
1742 if (!TlbState.fSkip)
1743 {
1744 /* end of TlbMiss - Jump to the done label. */
1745 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
1746 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
1747
1748 /*
1749 * TlbLookup:
1750 */
1751 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1,
1752 IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
1753
1754 /*
1755 * Emit code to do the actual storing / fetching.
1756 */
1757 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
1758# ifdef IEM_WITH_TLB_STATISTICS
1759 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
1760 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
1761# endif
1762 switch (cbMemAccess)
1763 {
1764 case 2:
1765 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1766 break;
1767 case 4:
1768 if (!fIsIntelSeg)
1769 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1770 else
1771 {
1772 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
1773 PUSH FS in real mode, so we have to try emulate that here.
1774 We borrow the now unused idxReg1 from the TLB lookup code here. */
1775 uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
1776 kIemNativeGstReg_EFlags);
1777 if (idxRegEfl != UINT8_MAX)
1778 {
1779#ifdef ARCH_AMD64
1780 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
1781 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
1782 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
1783#else
1784 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
1785 off, TlbState.idxReg1, idxRegEfl,
1786 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
1787#endif
1788 iemNativeRegFreeTmp(pReNative, idxRegEfl);
1789 }
1790 else
1791 {
1792 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
1793 RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
1794 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
1795 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
1796 }
1797 /* ASSUMES the upper half of idxRegPc is ZERO. */
1798 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegPc);
1799 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
1800 }
1801 break;
1802 case 8:
1803 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1804 break;
1805 default:
1806 AssertFailed();
1807 }
1808
1809 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
1810 TlbState.freeRegsAndReleaseVars(pReNative);
1811
1812 /*
1813 * TlbDone:
1814 *
1815 * Commit the new RSP value.
1816 */
1817 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
1818 }
1819#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
1820
1821#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
1822 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
1823#endif
1824 iemNativeRegFreeTmp(pReNative, idxRegRsp);
1825 if (idxRegEffSp != idxRegRsp)
1826 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
1827
1828 return off;
1829}
1830
1831
1832/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for pre-386 targets. */
1833#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16(a_u16NewIP, a_cbInstr) \
1834 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1835
1836/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for pre-386 targets that checks and
1837 * clears flags. */
1838#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP, a_cbInstr) \
1839 IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16(a_u16NewIP, a_cbInstr); \
1840 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1841
1842/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for 386+ targets. */
1843#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32(a_u16NewIP, a_cbInstr) \
1844 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1845
1846/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for 386+ targets that checks and
1847 * clears flags. */
1848#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP, a_cbInstr) \
1849 IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32(a_u16NewIP, a_cbInstr); \
1850 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1851
1852#undef IEM_MC_IND_CALL_U16_AND_FINISH
1853
1854
1855/** Variant of IEM_MC_IND_CALL_U32_AND_FINISH for 386+ targets. */
1856#define IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP, a_cbInstr) \
1857 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
1858
1859/** Variant of IEM_MC_IND_CALL_U32_AND_FINISH for 386+ targets that checks and
1860 * clears flags. */
1861#define IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP, a_cbInstr) \
1862 IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP, a_cbInstr); \
1863 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1864
1865#undef IEM_MC_IND_CALL_U32_AND_FINISH
1866
1867
1868/** Variant of IEM_MC_IND_CALL_U64_AND_FINISH with instruction length as
1869 * an extra parameter, for use in 64-bit code. */
1870#define IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64(a_u64NewIP, a_cbInstr) \
1871 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u64NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
1872
1873
1874/** Variant of IEM_MC_IND_CALL_U64_AND_FINISH with instruction length as
1875 * an extra parameter, for use in 64-bit code and we need to check and clear
1876 * flags. */
1877#define IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewIP, a_cbInstr) \
1878 IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64(a_u64NewIP, a_cbInstr); \
1879 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1880
1881#undef IEM_MC_IND_CALL_U64_AND_FINISH
1882
1883/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
1884 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
1885DECL_INLINE_THROW(uint32_t)
1886iemNativeEmitRipIndirectCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxVarPc, bool f64Bit,
1887 uint8_t idxInstr, uint8_t cbVar)
1888{
1889 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
1890 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
1891
1892 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1893 off = iemNativeRegFlushPendingWrites(pReNative, off);
1894
1895#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1896 Assert(pReNative->Core.offPc == 0);
1897 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1898#endif
1899
1900 /* Get a register with the new PC loaded from idxVarPc.
1901 Note! This ASSUMES that the high bits of the GPR is zeroed. */
1902 uint8_t const idxNewPcReg = iemNativeVarRegisterAcquire(pReNative, idxVarPc, &off);
1903
1904 /* Check limit (may #GP(0) + exit TB). */
1905 if (!f64Bit)
1906/** @todo we can skip this test in FLAT 32-bit mode. */
1907 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxNewPcReg, idxInstr);
1908 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
1909 else if (cbVar > sizeof(uint32_t))
1910 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxNewPcReg, idxInstr);
1911
1912#if 1
1913 /* Allocate a temporary PC register, we don't want it shadowed. */
1914 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1915 kIemNativeGstRegUse_Calculation, true /*fNoVolatileRegs*/);
1916#else
1917 /* Allocate a temporary PC register. */
1918 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate,
1919 true /*fNoVolatileRegs*/);
1920#endif
1921
1922 /* Perform the addition and push the variable to the guest stack. */
1923 /** @todo Flat variants for PC32 variants. */
1924 switch (cbVar)
1925 {
1926 case sizeof(uint16_t):
1927 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
1928 /* Truncate the result to 16-bit IP. */
1929 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
1930 off = iemNativeEmitStackPushRip(pReNative, off, idxPcReg, RT_MAKE_U32_FROM_U8(16, 0, 0, 0),
1931 (uintptr_t)iemNativeHlpStackStoreU16, idxInstr);
1932 break;
1933 case sizeof(uint32_t):
1934 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
1935 /** @todo In FLAT mode we can use the flat variant. */
1936 off = iemNativeEmitStackPushRip(pReNative, off, idxPcReg, RT_MAKE_U32_FROM_U8(32, 0, 0, 0),
1937 (uintptr_t)iemNativeHlpStackStoreU32, idxInstr);
1938 break;
1939 case sizeof(uint64_t):
1940 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
1941 off = iemNativeEmitStackPushRip(pReNative, off, idxPcReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0),
1942 (uintptr_t)iemNativeHlpStackFlatStoreU64, idxInstr);
1943 break;
1944 default:
1945 AssertFailed();
1946 }
1947
1948 /* RSP got changed, so do this again. */
1949 off = iemNativeRegFlushPendingWrites(pReNative, off);
1950
1951 /* Store the result. */
1952 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxNewPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1953#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1954 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxNewPcReg, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
1955 pReNative->Core.fDebugPcInitialized = true;
1956 Log4(("uPcUpdatingDebug=rip/indirect-call off=%#x\n", off));
1957#endif
1958
1959#if 1
1960 /* Need to transfer the shadow information to the new RIP register. */
1961 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxNewPcReg, kIemNativeGstReg_Pc, off);
1962#else
1963 /* Sync the new PC. */
1964 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxPcReg, idxNewPcReg);
1965#endif
1966 iemNativeVarRegisterRelease(pReNative, idxVarPc);
1967 iemNativeRegFreeTmp(pReNative, idxPcReg);
1968 /** @todo implictly free the variable? */
1969
1970 return off;
1971}
1972
1973
1974/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1975 * an extra parameter, for use in 16-bit code on a pre-386 CPU. */
1976#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr) \
1977 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
1978
1979/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1980 * an extra parameter, for use in 16-bit code on a pre-386 CPU and we need to check and clear
1981 * flags. */
1982#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr) \
1983 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr); \
1984 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1985
1986/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1987 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
1988#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr) \
1989 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
1990
1991/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1992 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
1993 * flags. */
1994#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr) \
1995 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr); \
1996 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1997
1998/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1999 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
2000#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr) \
2001 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
2002
2003/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
2004 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
2005 * flags. */
2006#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr) \
2007 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr); \
2008 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2009
2010#undef IEM_MC_REL_CALL_S16_AND_FINISH
2011
2012/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
2013 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
2014DECL_INLINE_THROW(uint32_t)
2015iemNativeEmitRipRelativeCallS16NoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int16_t offDisp,
2016 uint8_t idxInstr)
2017{
2018 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
2019 off = iemNativeRegFlushPendingWrites(pReNative, off);
2020
2021#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2022 Assert(pReNative->Core.offPc == 0);
2023 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
2024#endif
2025
2026 /* Allocate a temporary PC register. */
2027 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
2028 kIemNativeGstRegUse_Calculation, true /*fNoVolatileRegs*/);
2029 uint8_t const idxPcRegNew = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
2030
2031 /* Calculate the new RIP. */
2032 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcRegOld, cbInstr);
2033 /* Truncate the result to 16-bit IP. */
2034 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcRegOld);
2035 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxPcRegNew, idxPcRegOld);
2036 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcRegNew, offDisp);
2037
2038 /* Truncate the result to 16-bit IP. */
2039 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcRegNew);
2040
2041 /* Check limit (may #GP(0) + exit TB). */
2042 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
2043
2044 /* Perform the addition and push the variable to the guest stack. */
2045 off = iemNativeEmitStackPushRip(pReNative, off, idxPcRegOld, RT_MAKE_U32_FROM_U8(16, 0, 0, 0),
2046 (uintptr_t)iemNativeHlpStackStoreU16, idxInstr);
2047
2048 /* RSP got changed, so flush again. */
2049 off = iemNativeRegFlushPendingWrites(pReNative, off);
2050
2051 /* Store the result. */
2052 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
2053#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
2054 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
2055 pReNative->Core.fDebugPcInitialized = true;
2056 Log4(("uPcUpdatingDebug=rip/rel-call-16 off=%#x offDisp=%d\n", off, offDisp));
2057#endif
2058
2059 /* Need to transfer the shadow information to the new RIP register. */
2060 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
2061 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
2062 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
2063
2064 return off;
2065}
2066
2067
2068/** Variant of IEM_MC_REL_CALL_S32_AND_FINISH with instruction length as
2069 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
2070#define IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr) \
2071 off = iemNativeEmitEip32RelativeCallNoFlags(pReNative, off, a_cbInstr, (a_i32), pCallEntry->idxInstr)
2072
2073/** Variant of IEM_MC_REL_CALL_S32_AND_FINISH with instruction length as
2074 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
2075 * flags. */
2076#define IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr) \
2077 IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr); \
2078 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2079
2080#undef IEM_MC_REL_CALL_S32_AND_FINISH
2081
2082/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
2083 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
2084DECL_INLINE_THROW(uint32_t)
2085iemNativeEmitEip32RelativeCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int32_t offDisp,
2086 uint8_t idxInstr)
2087{
2088 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
2089 off = iemNativeRegFlushPendingWrites(pReNative, off);
2090
2091#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2092 Assert(pReNative->Core.offPc == 0);
2093 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
2094#endif
2095
2096 /* Allocate a temporary PC register. */
2097 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
2098 kIemNativeGstRegUse_ReadOnly, true /*fNoVolatileRegs*/);
2099 uint8_t const idxPcRegNew = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
2100
2101 /* Update the EIP to get the return address. */
2102 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcRegOld, cbInstr);
2103
2104 /* Load address, add the displacement and check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
2105 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxPcRegNew, idxPcRegOld);
2106 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcRegNew, offDisp);
2107 /** @todo we can skip this test in FLAT 32-bit mode. */
2108 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
2109
2110 /* Perform Perform the return address to the guest stack. */
2111 /** @todo Can avoid the stack limit checks in FLAT 32-bit mode. */
2112 off = iemNativeEmitStackPushRip(pReNative, off, idxPcRegOld, RT_MAKE_U32_FROM_U8(32, 0, 0, 0),
2113 (uintptr_t)iemNativeHlpStackStoreU32, idxInstr);
2114
2115 /* RSP got changed, so do this again. */
2116 off = iemNativeRegFlushPendingWrites(pReNative, off);
2117
2118 /* Store the result. */
2119 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
2120#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
2121 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
2122 pReNative->Core.fDebugPcInitialized = true;
2123 Log4(("uPcUpdatingDebug=eip/rel-call-32 off=%#x offDisp=%d\n", off, offDisp));
2124#endif
2125
2126 /* Need to transfer the shadow information to the new RIP register. */
2127 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
2128 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
2129 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
2130
2131 return off;
2132}
2133
2134
2135/** Variant of IEM_MC_REL_CALL_S64_AND_FINISH with instruction length as
2136 * an extra parameter, for use in 64-bit code. */
2137#define IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64(a_i64, a_cbInstr) \
2138 off = iemNativeEmitRip64RelativeCallNoFlags(pReNative, off, a_cbInstr, (a_i64), pCallEntry->idxInstr)
2139
2140/** Variant of IEM_MC_REL_CALL_S64_AND_FINISH with instruction length as
2141 * an extra parameter, for use in 64-bit code and we need to check and clear
2142 * flags. */
2143#define IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i64, a_cbInstr) \
2144 IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64(a_i64, a_cbInstr); \
2145 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2146
2147#undef IEM_MC_REL_CALL_S64_AND_FINISH
2148
2149/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
2150 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
2151DECL_INLINE_THROW(uint32_t)
2152iemNativeEmitRip64RelativeCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int64_t offDisp,
2153 uint8_t idxInstr)
2154{
2155 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
2156 off = iemNativeRegFlushPendingWrites(pReNative, off);
2157
2158#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2159 Assert(pReNative->Core.offPc == 0);
2160 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
2161#endif
2162
2163 /* Allocate a temporary PC register. */
2164 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
2165 kIemNativeGstRegUse_ReadOnly, true /*fNoVolatileRegs*/);
2166 uint8_t const idxPcRegNew = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
2167
2168 /* Update the RIP to get the return address. */
2169 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcRegOld, cbInstr);
2170
2171 /* Load address, add the displacement and check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
2172 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxPcRegNew, idxPcRegOld);
2173 off = iemNativeEmitAddGprImm(pReNative, off, idxPcRegNew, offDisp);
2174 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
2175
2176 /* Perform Perform the return address to the guest stack. */
2177 off = iemNativeEmitStackPushRip(pReNative, off, idxPcRegOld, RT_MAKE_U32_FROM_U8(64, 64, 0, 0),
2178 (uintptr_t)iemNativeHlpStackFlatStoreU64, idxInstr);
2179
2180 /* RSP got changed, so do this again. */
2181 off = iemNativeRegFlushPendingWrites(pReNative, off);
2182
2183 /* Store the result. */
2184 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
2185#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
2186 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
2187 pReNative->Core.fDebugPcInitialized = true;
2188 Log4(("uPcUpdatingDebug=rip/rel-call-64 off=%#x offDisp=%ld\n", off, offDisp));
2189#endif
2190
2191 /* Need to transfer the shadow information to the new RIP register. */
2192 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
2193 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
2194 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
2195
2196 return off;
2197}
2198
2199
2200/*********************************************************************************************************************************
2201* Emitters for changing PC/RIP/EIP/IP with a RETN (Iw) instruction (IEM_MC_RETN_AND_FINISH) (requires stack emmiters). *
2202*********************************************************************************************************************************/
2203
2204DECL_FORCE_INLINE_THROW(uint32_t)
2205iemNativeEmitStackPopForRetnUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
2206 uint16_t cbPopAdd, uint8_t idxRegTmp)
2207{
2208 /* Use16BitSp: */
2209#ifdef RT_ARCH_AMD64
2210 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
2211 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
2212 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbPopAdd); /* ASSUMES this does NOT modify bits [63:16]! */
2213 RT_NOREF(idxRegTmp);
2214
2215#elif defined(RT_ARCH_ARM64)
2216 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
2217 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
2218 /* add tmp, regrsp, #cbMem */
2219 uint16_t const cbCombined = cbMem + cbPopAdd;
2220 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbCombined & (RT_BIT_32(12) - 1U), false /*f64Bit*/);
2221 if (cbCombined >= RT_BIT_32(12))
2222 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegTmp, cbCombined >> 12,
2223 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift12*/);
2224 /* and tmp, tmp, #0xffff */
2225 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
2226 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
2227 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
2228 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
2229
2230#else
2231# error "Port me"
2232#endif
2233 return off;
2234}
2235
2236
2237DECL_FORCE_INLINE_THROW(uint32_t)
2238iemNativeEmitStackPopForRetnUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
2239 uint16_t cbPopAdd)
2240{
2241 /* Use32BitSp: */
2242 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
2243 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem + cbPopAdd);
2244 return off;
2245}
2246
2247
2248/** Variant of IEM_MC_RETN_AND_FINISH for pre-386 targets. */
2249#define IEM_MC_RETN_AND_FINISH_THREADED_PC16(a_u16Pop, a_cbInstr) \
2250 off = iemNativeEmitRetn(pReNative, off, (a_cbInstr), (a_u16Pop), false /*f64Bit*/, IEMMODE_16BIT, pCallEntry->idxInstr)
2251
2252/** Variant of IEM_MC_RETN_AND_FINISH for 386+ targets. */
2253#define IEM_MC_RETN_AND_FINISH_THREADED_PC32(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
2254 off = iemNativeEmitRetn(pReNative, off, (a_cbInstr), (a_u16Pop), false /*f64Bit*/, (a_enmEffOpSize), pCallEntry->idxInstr)
2255
2256/** Variant of IEM_MC_RETN_AND_FINISH for use in 64-bit code. */
2257#define IEM_MC_RETN_AND_FINISH_THREADED_PC64(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
2258 off = iemNativeEmitRetn(pReNative, off, (a_cbInstr), (a_u16Pop), true /*f64Bit*/, (a_enmEffOpSize), pCallEntry->idxInstr)
2259
2260/** Variant of IEM_MC_RETN_AND_FINISH for pre-386 targets that checks and
2261 * clears flags. */
2262#define IEM_MC_RETN_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16Pop, a_cbInstr) \
2263 IEM_MC_RETN_AND_FINISH_THREADED_PC16(a_u16Pop, a_cbInstr); \
2264 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2265
2266/** Variant of IEM_MC_RETN_AND_FINISH for 386+ targets that checks and
2267 * clears flags. */
2268#define IEM_MC_RETN_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
2269 IEM_MC_RETN_AND_FINISH_THREADED_PC32(a_u16Pop, a_cbInstr, a_enmEffOpSize); \
2270 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2271
2272/** Variant of IEM_MC_RETN_AND_FINISH for use in 64-bit code that checks and
2273 * clears flags. */
2274#define IEM_MC_RETN_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
2275 IEM_MC_RETN_AND_FINISH_THREADED_PC64(a_u16Pop, a_cbInstr, a_enmEffOpSize); \
2276 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2277
2278/** IEM_MC[|_FLAT32|_FLAT64]_RETN_AND_FINISH */
2279DECL_INLINE_THROW(uint32_t)
2280iemNativeEmitRetn(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint16_t cbPop, bool f64Bit,
2281 IEMMODE enmEffOpSize, uint8_t idxInstr)
2282{
2283 RT_NOREF(cbInstr);
2284
2285#ifdef VBOX_STRICT
2286 /*
2287 * Check that the fExec flags we've got make sense.
2288 */
2289 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
2290#endif
2291
2292 /*
2293 * To keep things simple we have to commit any pending writes first as we
2294 * may end up making calls.
2295 */
2296 off = iemNativeRegFlushPendingWrites(pReNative, off);
2297
2298 /*
2299 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
2300 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
2301 * directly as the effective stack pointer.
2302 * (Code structure is very similar to that of PUSH)
2303 *
2304 * Note! As a simplification, we treat opsize overridden returns (o16 ret)
2305 * in FLAT 32-bit mode as if we weren't in FLAT mode since these
2306 * aren't commonly used (or useful) and thus not in need of optimizing.
2307 *
2308 * Note! For non flat modes the guest RSP is not allocated for update but rather for calculation
2309 * as the shadowed register would remain modified even if the return address throws a \#GP(0)
2310 * due to being outside the CS limit causing a wrong stack pointer value in the guest (see
2311 * the near return testcase in bs3-cpu-basic-2). If no exception is thrown the shadowing is transfered
2312 * to the new register returned by iemNativeRegAllocTmpForGuestReg() at the end.
2313 */
2314 uint8_t const cbMem = enmEffOpSize == IEMMODE_64BIT
2315 ? sizeof(uint64_t)
2316 : enmEffOpSize == IEMMODE_32BIT
2317 ? sizeof(uint32_t)
2318 : sizeof(uint16_t);
2319 bool const fFlat = IEM_F_MODE_X86_IS_FLAT(pReNative->fExec) && enmEffOpSize != IEMMODE_16BIT; /* see note */
2320 uintptr_t const pfnFunction = fFlat
2321 ? enmEffOpSize == IEMMODE_64BIT
2322 ? (uintptr_t)iemNativeHlpStackFlatFetchU64
2323 : (uintptr_t)iemNativeHlpStackFlatFetchU32
2324 : enmEffOpSize == IEMMODE_32BIT
2325 ? (uintptr_t)iemNativeHlpStackFetchU32
2326 : (uintptr_t)iemNativeHlpStackFetchU16;
2327 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
2328 fFlat ? kIemNativeGstRegUse_ForUpdate
2329 : kIemNativeGstRegUse_Calculation,
2330 true /*fNoVolatileRegs*/);
2331 uint8_t const idxRegEffSp = fFlat ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
2332 /** @todo can do a better job picking the register here. For cbMem >= 4 this
2333 * will be the resulting register value. */
2334 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
2335
2336 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
2337 if (fFlat)
2338 Assert(idxRegEffSp == idxRegRsp);
2339 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
2340 {
2341 Assert(idxRegEffSp != idxRegRsp);
2342 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
2343 kIemNativeGstRegUse_ReadOnly);
2344#ifdef RT_ARCH_AMD64
2345 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
2346#else
2347 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
2348#endif
2349 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
2350 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
2351 offFixupJumpToUseOtherBitSp = off;
2352 if (enmEffOpSize == IEMMODE_32BIT)
2353 {
2354 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
2355 off = iemNativeEmitStackPopForRetnUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop);
2356 }
2357 else
2358 {
2359 Assert(enmEffOpSize == IEMMODE_16BIT);
2360 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
2361 off = iemNativeEmitStackPopForRetnUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop,
2362 idxRegMemResult);
2363 }
2364 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2365 }
2366 /* SpUpdateEnd: */
2367 uint32_t const offLabelSpUpdateEnd = off;
2368
2369 /*
2370 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
2371 * we're skipping lookup).
2372 */
2373 uint8_t const iSegReg = fFlat ? UINT8_MAX : X86_SREG_SS;
2374 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
2375 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
2376 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
2377 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
2378 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
2379 : UINT32_MAX;
2380
2381 if (!TlbState.fSkip)
2382 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
2383 else
2384 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
2385
2386 /*
2387 * Use16BitSp:
2388 */
2389 if (!fFlat)
2390 {
2391#ifdef RT_ARCH_AMD64
2392 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
2393#else
2394 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
2395#endif
2396 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
2397 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
2398 off = iemNativeEmitStackPopForRetnUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop,
2399 idxRegMemResult);
2400 else
2401 off = iemNativeEmitStackPopForRetnUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop);
2402 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
2403 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2404 }
2405
2406 /*
2407 * TlbMiss:
2408 *
2409 * Call helper to do the pushing.
2410 */
2411 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
2412
2413#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2414 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2415#else
2416 RT_NOREF(idxInstr);
2417#endif
2418
2419 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
2420 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
2421 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
2422 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
2423
2424
2425 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
2426 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
2427 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
2428
2429 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
2430 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
2431
2432 /* Done setting up parameters, make the call. */
2433 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
2434
2435 /* Move the return register content to idxRegMemResult. */
2436 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
2437 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
2438
2439 /* Restore variables and guest shadow registers to volatile registers. */
2440 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
2441 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
2442
2443#ifdef IEMNATIVE_WITH_TLB_LOOKUP
2444 if (!TlbState.fSkip)
2445 {
2446 /* end of TlbMiss - Jump to the done label. */
2447 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
2448 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
2449
2450 /*
2451 * TlbLookup:
2452 */
2453 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
2454 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
2455
2456 /*
2457 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
2458 */
2459 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
2460# ifdef IEM_WITH_TLB_STATISTICS
2461 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
2462 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
2463# endif
2464 switch (cbMem)
2465 {
2466 case 2:
2467 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
2468 break;
2469 case 4:
2470 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
2471 break;
2472 case 8:
2473 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
2474 break;
2475 default:
2476 AssertFailed();
2477 }
2478
2479 TlbState.freeRegsAndReleaseVars(pReNative);
2480
2481 /*
2482 * TlbDone:
2483 *
2484 * Set the new RSP value (FLAT accesses needs to calculate it first) and
2485 * commit the popped register value.
2486 */
2487 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
2488 }
2489#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
2490
2491 /* Check limit before committing RIP and RSP (may #GP(0) + exit TB). */
2492 if (!f64Bit)
2493/** @todo we can skip this test in FLAT 32-bit mode. */
2494 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxRegMemResult, idxInstr);
2495 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
2496 else if (enmEffOpSize == IEMMODE_64BIT)
2497 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxRegMemResult, idxInstr);
2498
2499 /* Complete RSP calculation for FLAT mode. */
2500 if (idxRegEffSp == idxRegRsp)
2501 {
2502 if (enmEffOpSize == IEMMODE_64BIT)
2503 off = iemNativeEmitAddGprImm(pReNative, off, idxRegRsp, sizeof(uint64_t) + cbPop);
2504 else
2505 {
2506 Assert(enmEffOpSize == IEMMODE_32BIT);
2507 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxRegRsp, sizeof(uint32_t) + cbPop);
2508 }
2509 }
2510
2511 /* Commit the result and clear any current guest shadows for RIP. */
2512 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
2513 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
2514 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, kIemNativeGstReg_Pc, off);
2515#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
2516 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
2517 pReNative->Core.fDebugPcInitialized = true;
2518 Log4(("uPcUpdatingDebug=rip/ret off=%#x\n", off));
2519#endif
2520
2521 /* Need to transfer the shadowing information to the host register containing the updated value now. */
2522 if (!fFlat)
2523 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegRsp, IEMNATIVEGSTREG_GPR(X86_GREG_xSP), off);
2524
2525 iemNativeRegFreeTmp(pReNative, idxRegRsp);
2526 if (idxRegEffSp != idxRegRsp)
2527 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
2528 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
2529 return off;
2530}
2531
2532
2533/*********************************************************************************************************************************
2534* Emitters for raising exceptions (IEM_MC_MAYBE_RAISE_XXX) *
2535*********************************************************************************************************************************/
2536
2537#define IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE() \
2538 off = iemNativeEmitMaybeRaiseDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
2539
2540/**
2541 * Emits code to check if a \#NM exception should be raised.
2542 *
2543 * @returns New code buffer offset, UINT32_MAX on failure.
2544 * @param pReNative The native recompile state.
2545 * @param off The code buffer offset.
2546 * @param idxInstr The current instruction.
2547 */
2548DECL_INLINE_THROW(uint32_t)
2549iemNativeEmitMaybeRaiseDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2550{
2551#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2552 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeDeviceNotAvailXcptCheckPotential);
2553
2554 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE))
2555 {
2556#endif
2557 /*
2558 * Make sure we don't have any outstanding guest register writes as we may
2559 * raise an #NM and all guest register must be up to date in CPUMCTX.
2560 */
2561 /** @todo r=aeichner Can we postpone this to the RaiseNm path? */
2562 off = iemNativeRegFlushPendingWrites(pReNative, off);
2563
2564#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2565 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2566#else
2567 RT_NOREF(idxInstr);
2568#endif
2569
2570 /* Allocate a temporary CR0 register. */
2571 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0,
2572 kIemNativeGstRegUse_ReadOnly);
2573
2574 /*
2575 * if (cr0 & (X86_CR0_EM | X86_CR0_TS) != 0)
2576 * return raisexcpt();
2577 */
2578 /* Test and jump. */
2579 off = iemNativeEmitTestAnyBitsInGprAndTbExitIfAnySet(pReNative, off, idxCr0Reg, X86_CR0_EM | X86_CR0_TS,
2580 kIemNativeLabelType_RaiseNm);
2581
2582 /* Free but don't flush the CR0 register. */
2583 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2584
2585#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2586 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE;
2587 }
2588 else
2589 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeDeviceNotAvailXcptCheckOmitted);
2590#endif
2591
2592 return off;
2593}
2594
2595
2596#define IEM_MC_MAYBE_RAISE_WAIT_DEVICE_NOT_AVAILABLE() \
2597 off = iemNativeEmitMaybeRaiseWaitDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
2598
2599/**
2600 * Emits code to check if a \#NM exception should be raised.
2601 *
2602 * @returns New code buffer offset, UINT32_MAX on failure.
2603 * @param pReNative The native recompile state.
2604 * @param off The code buffer offset.
2605 * @param idxInstr The current instruction.
2606 */
2607DECL_INLINE_THROW(uint32_t)
2608iemNativeEmitMaybeRaiseWaitDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2609{
2610#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2611 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeWaitDeviceNotAvailXcptCheckPotential);
2612
2613 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_WAIT_DEVICE_NOT_AVAILABLE))
2614 {
2615#endif
2616 /*
2617 * Make sure we don't have any outstanding guest register writes as we may
2618 * raise an #NM and all guest register must be up to date in CPUMCTX.
2619 */
2620 /** @todo r=aeichner Can we postpone this to the RaiseNm path? */
2621 off = iemNativeRegFlushPendingWrites(pReNative, off);
2622
2623#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2624 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2625#else
2626 RT_NOREF(idxInstr);
2627#endif
2628
2629 /* Allocate a temporary CR0 register. */
2630 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0,
2631 kIemNativeGstRegUse_Calculation);
2632
2633 /*
2634 * if (cr0 & (X86_CR0_MP | X86_CR0_TS) == (X86_CR0_MP | X86_CR0_TS))
2635 * return raisexcpt();
2636 */
2637 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxCr0Reg, X86_CR0_MP | X86_CR0_TS);
2638 /* Test and jump. */
2639 off = iemNativeEmitTestIfGpr32EqualsImmAndTbExit(pReNative, off, idxCr0Reg, X86_CR0_MP | X86_CR0_TS,
2640 kIemNativeLabelType_RaiseNm);
2641
2642 /* Free the CR0 register. */
2643 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2644
2645#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2646 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_WAIT_DEVICE_NOT_AVAILABLE;
2647 }
2648 else
2649 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeWaitDeviceNotAvailXcptCheckOmitted);
2650#endif
2651
2652 return off;
2653}
2654
2655
2656#define IEM_MC_MAYBE_RAISE_FPU_XCPT() \
2657 off = iemNativeEmitMaybeRaiseFpuException(pReNative, off, pCallEntry->idxInstr)
2658
2659/**
2660 * Emits code to check if a \#MF exception should be raised.
2661 *
2662 * @returns New code buffer offset, UINT32_MAX on failure.
2663 * @param pReNative The native recompile state.
2664 * @param off The code buffer offset.
2665 * @param idxInstr The current instruction.
2666 */
2667DECL_INLINE_THROW(uint32_t)
2668iemNativeEmitMaybeRaiseFpuException(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2669{
2670 /*
2671 * Make sure we don't have any outstanding guest register writes as we may
2672 * raise an #MF and all guest register must be up to date in CPUMCTX.
2673 */
2674 /** @todo r=aeichner Can we postpone this to the RaiseMf path? */
2675 off = iemNativeRegFlushPendingWrites(pReNative, off);
2676
2677#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2678 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2679#else
2680 RT_NOREF(idxInstr);
2681#endif
2682
2683 /* Allocate a temporary FSW register. */
2684 uint8_t const idxFpuFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw,
2685 kIemNativeGstRegUse_ReadOnly);
2686
2687 /*
2688 * if (FSW & X86_FSW_ES != 0)
2689 * return raisexcpt();
2690 */
2691 /* Test and jump. */
2692 off = iemNativeEmitTestBitInGprAndTbExitIfSet(pReNative, off, idxFpuFswReg, X86_FSW_ES_BIT, kIemNativeLabelType_RaiseMf);
2693
2694 /* Free but don't flush the FSW register. */
2695 iemNativeRegFreeTmp(pReNative, idxFpuFswReg);
2696
2697 return off;
2698}
2699
2700
2701#define IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT() \
2702 off = iemNativeEmitMaybeRaiseSseRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
2703
2704/**
2705 * Emits code to check if a SSE exception (either \#UD or \#NM) should be raised.
2706 *
2707 * @returns New code buffer offset, UINT32_MAX on failure.
2708 * @param pReNative The native recompile state.
2709 * @param off The code buffer offset.
2710 * @param idxInstr The current instruction.
2711 */
2712DECL_INLINE_THROW(uint32_t)
2713iemNativeEmitMaybeRaiseSseRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2714{
2715#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2716 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeSseXcptCheckPotential);
2717
2718 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE))
2719 {
2720#endif
2721 /*
2722 * Make sure we don't have any outstanding guest register writes as we may
2723 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
2724 */
2725 /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
2726 off = iemNativeRegFlushPendingWrites(pReNative, off);
2727
2728#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2729 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2730#else
2731 RT_NOREF(idxInstr);
2732#endif
2733
2734 /* Allocate a temporary CR0 and CR4 register. */
2735 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
2736 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
2737 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2738
2739 AssertCompile(!((X86_CR0_EM | X86_CR0_TS) & X86_CR4_OSFXSR));
2740#ifdef RT_ARCH_AMD64
2741 /*
2742 * We do a modified test here:
2743 * if (!(((cr4 & X86_CR4_OSFXSR) | cr0) ^ X86_CR4_OSFXSR)) { likely }
2744 * else { goto RaiseSseRelated; }
2745 * This ASSUMES that CR0[bit 9] is always zero. This is the case on
2746 * all targets except the 386, which doesn't support SSE, this should
2747 * be a safe assumption.
2748 */
2749 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6+3+3+7+7+6);
2750 //pCodeBuf[off++] = 0xcc;
2751 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, X86_CR4_OSFXSR); /* Isolate CR4.OSFXSR as CR4.TSD and */
2752 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxTmpReg, idxCr4Reg); /* CR4.DE would overlap the CR0 bits. */
2753 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, idxTmpReg, idxCr0Reg);
2754 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_CR0_EM | X86_CR0_TS | X86_CR4_OSFXSR);
2755 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_CR4_OSFXSR);
2756 off = iemNativeEmitJccTbExitEx(pReNative, pCodeBuf, off, kIemNativeLabelType_RaiseSseRelated, kIemNativeInstrCond_ne);
2757
2758#elif defined(RT_ARCH_ARM64)
2759 /*
2760 * We do a modified test here:
2761 * if (!((cr0 & (X86_CR0_EM | X86_CR0_TS)) | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1) ^ 1))) { likely }
2762 * else { goto RaiseSseRelated; }
2763 */
2764 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+5);
2765 //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
2766 Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - X86_CR0_EM_BIT) == (X86_CR0_EM | X86_CR0_TS));
2767 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxCr0Reg, 1, 32 - X86_CR0_EM_BIT, false /*f64Bit*/);
2768 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSFXSR_BIT, 1, false /*f64Bit*/);
2769 /* -> idxTmpReg[0]=OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
2770 Assert(Armv8A64ConvertImmRImmS2Mask32(0, 0) == 1);
2771 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 0, 0, false /*f64Bit*/);
2772 /* -> idxTmpReg[0]=~OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
2773 off = iemNativeEmitTestIfGprIsNotZeroAndTbExitEx(pReNative, pCodeBuf, off, idxTmpReg, false /*f64Bit*/,
2774 kIemNativeLabelType_RaiseSseRelated);
2775
2776#else
2777# error "Port me!"
2778#endif
2779
2780 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2781 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2782 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2783 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
2784
2785#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2786 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE;
2787 }
2788 else
2789 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeSseXcptCheckOmitted);
2790#endif
2791
2792 return off;
2793}
2794
2795
2796#define IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT() \
2797 off = iemNativeEmitMaybeRaiseAvxRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
2798
2799/**
2800 * Emits code to check if a AVX exception (either \#UD or \#NM) should be raised.
2801 *
2802 * @returns New code buffer offset, UINT32_MAX on failure.
2803 * @param pReNative The native recompile state.
2804 * @param off The code buffer offset.
2805 * @param idxInstr The current instruction.
2806 */
2807DECL_INLINE_THROW(uint32_t)
2808iemNativeEmitMaybeRaiseAvxRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2809{
2810#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2811 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeAvxXcptCheckPotential);
2812
2813 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX))
2814 {
2815#endif
2816 /*
2817 * Make sure we don't have any outstanding guest register writes as we may
2818 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
2819 */
2820 /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
2821 off = iemNativeRegFlushPendingWrites(pReNative, off);
2822
2823#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2824 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2825#else
2826 RT_NOREF(idxInstr);
2827#endif
2828
2829 /* Allocate a temporary CR0, CR4 and XCR0 register. */
2830 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
2831 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
2832 uint8_t const idxXcr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Xcr0);
2833 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2834
2835 /*
2836 * We have the following in IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT:
2837 * if (RT_LIKELY( ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE))
2838 * | (pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE)
2839 * | (pVCpu->cpum.GstCtx.cr0 & X86_CR0_TS))
2840 * == (XSAVE_C_YMM | XSAVE_C_SSE | X86_CR4_OSXSAVE)))
2841 * { likely }
2842 * else { goto RaiseAvxRelated; }
2843 */
2844#ifdef RT_ARCH_AMD64
2845 /* if (!( ( ((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) << 2)
2846 | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1) << 1)
2847 | ((cr0 >> X86_CR0_TS_BIT) & 1) )
2848 ^ 0x1a) ) { likely }
2849 else { goto RaiseAvxRelated; } */
2850 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6+3+5+3+5+3+7+6);
2851 //pCodeBuf[off++] = 0xcc;
2852 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, XSAVE_C_YMM | XSAVE_C_SSE);
2853 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxTmpReg, idxXcr0Reg);
2854 off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off, idxCr4Reg, X86_CR4_OSXSAVE_BIT);
2855 off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
2856 /* -> idxTmpReg[0]=CR4.OSXSAVE; idxTmpReg[1]=0; idxTmpReg[2]=SSE; idxTmpReg[3]=YMM; (the rest is zero) */
2857 off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off, idxCr0Reg, X86_CR0_TS_BIT);
2858 off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
2859 /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=SSE; idxTmpReg[4]=YMM; */
2860 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg, ((XSAVE_C_YMM | XSAVE_C_SSE) << 2) | 2);
2861 /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=~SSE; idxTmpReg[4]=~YMM; */
2862 off = iemNativeEmitJccTbExitEx(pReNative, pCodeBuf, off, kIemNativeLabelType_RaiseAvxRelated, kIemNativeInstrCond_ne);
2863
2864#elif defined(RT_ARCH_ARM64)
2865 /* if (!( (((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) | ((cr4 >> X86_CR4_OSFXSR_BIT) & 1)) ^ 7) << 1)
2866 | ((cr0 >> X86_CR0_TS_BIT) & 1) ) { likely }
2867 else { goto RaiseAvxRelated; } */
2868 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6);
2869 //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
2870 Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - XSAVE_C_SSE_BIT) == (XSAVE_C_YMM | XSAVE_C_SSE));
2871 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxXcr0Reg, 1, 32 - XSAVE_C_SSE_BIT, false /*f64Bit*/);
2872 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSXSAVE_BIT, 1, false /*f64Bit*/);
2873 /* -> idxTmpReg[0]=CR4.OSXSAVE; idxTmpReg[1]=SSE; idxTmpReg[2]=YMM; (the rest is zero) */
2874 Assert(Armv8A64ConvertImmRImmS2Mask32(2, 0) == 7);
2875 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 2, 0, false /*f64Bit*/);
2876 /* -> idxTmpReg[0]=~CR4.OSXSAVE; idxTmpReg[1]=~SSE; idxTmpReg[2]=~YMM; (the rest is zero) */
2877 pCodeBuf[off++] = Armv8A64MkInstrLslImm(idxTmpReg, idxTmpReg, 1, false /*f64Bit*/);
2878 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr0Reg, X86_CR0_TS_BIT, 1, false /*f64Bit*/);
2879 /* -> idxTmpReg[0]=CR0.TS; idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=~SSE; idxTmpReg[3]=~YMM; (the rest is zero) */
2880 off = iemNativeEmitTestIfGprIsNotZeroAndTbExitEx(pReNative, pCodeBuf, off, idxTmpReg, false /*f64Bit*/,
2881 kIemNativeLabelType_RaiseAvxRelated);
2882
2883#else
2884# error "Port me!"
2885#endif
2886
2887 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2888 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2889 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
2890 iemNativeRegFreeTmp(pReNative, idxXcr0Reg);
2891#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2892 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
2893 }
2894 else
2895 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeAvxXcptCheckOmitted);
2896#endif
2897
2898 return off;
2899}
2900
2901
2902#define IEM_MC_RAISE_DIVIDE_ERROR() \
2903 off = iemNativeEmitRaiseDivideError(pReNative, off, pCallEntry->idxInstr)
2904
2905/**
2906 * Emits code to raise a \#DE.
2907 *
2908 * @returns New code buffer offset, UINT32_MAX on failure.
2909 * @param pReNative The native recompile state.
2910 * @param off The code buffer offset.
2911 * @param idxInstr The current instruction.
2912 */
2913DECL_INLINE_THROW(uint32_t)
2914iemNativeEmitRaiseDivideError(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2915{
2916 /*
2917 * Make sure we don't have any outstanding guest register writes as we may
2918 */
2919 off = iemNativeRegFlushPendingWrites(pReNative, off);
2920
2921#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2922 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2923#else
2924 RT_NOREF(idxInstr);
2925#endif
2926
2927 /* raise \#DE exception unconditionally. */
2928 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_RaiseDe);
2929}
2930
2931
2932#define IEM_MC_RAISE_GP0_IF_EFF_ADDR_UNALIGNED(a_EffAddr, a_cbAlign) \
2933 off = iemNativeEmitRaiseGp0IfEffAddrUnaligned(pReNative, off, pCallEntry->idxInstr, a_EffAddr, a_cbAlign)
2934
2935/**
2936 * Emits code to raise a \#GP(0) if the given variable contains an unaligned address.
2937 *
2938 * @returns New code buffer offset, UINT32_MAX on failure.
2939 * @param pReNative The native recompile state.
2940 * @param off The code buffer offset.
2941 * @param idxInstr The current instruction.
2942 * @param idxVarEffAddr Index of the variable containing the effective address to check.
2943 * @param cbAlign The alignment in bytes to check against.
2944 */
2945DECL_INLINE_THROW(uint32_t)
2946iemNativeEmitRaiseGp0IfEffAddrUnaligned(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr,
2947 uint8_t idxVarEffAddr, uint8_t cbAlign)
2948{
2949 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEffAddr);
2950 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEffAddr, sizeof(RTGCPTR));
2951
2952 /*
2953 * Make sure we don't have any outstanding guest register writes as we may throw an exception.
2954 */
2955 off = iemNativeRegFlushPendingWrites(pReNative, off);
2956
2957#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2958 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2959#else
2960 RT_NOREF(idxInstr);
2961#endif
2962
2963 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarEffAddr, &off);
2964
2965 off = iemNativeEmitTestAnyBitsInGprAndTbExitIfAnySet(pReNative, off, idxVarReg, cbAlign - 1,
2966 kIemNativeLabelType_RaiseGp0);
2967
2968 iemNativeVarRegisterRelease(pReNative, idxVarEffAddr);
2969 return off;
2970}
2971
2972
2973/*********************************************************************************************************************************
2974* Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF) *
2975*********************************************************************************************************************************/
2976
2977/**
2978 * Pushes an IEM_MC_IF_XXX onto the condition stack.
2979 *
2980 * @returns Pointer to the condition stack entry on success, NULL on failure
2981 * (too many nestings)
2982 */
2983DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative)
2984{
2985 uint32_t const idxStack = pReNative->cCondDepth;
2986 AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
2987
2988 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
2989 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
2990
2991 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
2992 pEntry->fInElse = false;
2993 pEntry->fIfExitTb = false;
2994 pEntry->fElseExitTb = false;
2995 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
2996 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
2997
2998 return pEntry;
2999}
3000
3001
3002/**
3003 * Start of the if-block, snapshotting the register and variable state.
3004 */
3005DECL_INLINE_THROW(void)
3006iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
3007{
3008 Assert(offIfBlock != UINT32_MAX);
3009 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
3010 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
3011 Assert(!pEntry->fInElse);
3012
3013 /* Define the start of the IF block if request or for disassembly purposes. */
3014 if (idxLabelIf != UINT32_MAX)
3015 iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
3016#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3017 else
3018 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
3019#else
3020 RT_NOREF(offIfBlock);
3021#endif
3022
3023 /* Copy the initial state so we can restore it in the 'else' block. */
3024 pEntry->InitialState = pReNative->Core;
3025}
3026
3027
3028#define IEM_MC_ELSE() } while (0); \
3029 off = iemNativeEmitElse(pReNative, off); \
3030 do {
3031
3032/** Emits code related to IEM_MC_ELSE. */
3033DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3034{
3035 /* Check sanity and get the conditional stack entry. */
3036 Assert(off != UINT32_MAX);
3037 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
3038 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
3039 Assert(!pEntry->fInElse);
3040
3041 /* We can skip dirty register flushing and the dirty register flushing if
3042 the branch already jumped to a TB exit. */
3043 if (!pEntry->fIfExitTb)
3044 {
3045#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK) && 0
3046 /* Writeback any dirty shadow registers. */
3047 /** @todo r=aeichner Possible optimization is to only writeback guest registers which became dirty
3048 * in one of the branches and leave guest registers already dirty before the start of the if
3049 * block alone. */
3050 off = iemNativeRegFlushDirtyGuest(pReNative, off);
3051#endif
3052
3053 /* Jump to the endif. */
3054 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
3055 }
3056# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3057 else
3058 Assert(pReNative->Core.offPc == 0);
3059# endif
3060
3061 /* Define the else label and enter the else part of the condition. */
3062 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
3063 pEntry->fInElse = true;
3064
3065 /* Snapshot the core state so we can do a merge at the endif and restore
3066 the snapshot we took at the start of the if-block. */
3067 pEntry->IfFinalState = pReNative->Core;
3068 pReNative->Core = pEntry->InitialState;
3069
3070 return off;
3071}
3072
3073
3074#define IEM_MC_ENDIF() } while (0); \
3075 off = iemNativeEmitEndIf(pReNative, off)
3076
3077/** Emits code related to IEM_MC_ENDIF. */
3078DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3079{
3080 /* Check sanity and get the conditional stack entry. */
3081 Assert(off != UINT32_MAX);
3082 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
3083 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
3084
3085#if defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) && 0
3086 off = iemNativeRegFlushDirtyGuest(pReNative, off);
3087#endif
3088
3089 /*
3090 * If either of the branches exited the TB, we can take the state from the
3091 * other branch and skip all the merging headache.
3092 */
3093 bool fDefinedLabels = false;
3094 if (pEntry->fElseExitTb || pEntry->fIfExitTb)
3095 {
3096#ifdef VBOX_STRICT
3097 Assert(pReNative->cCondDepth == 1); /* Assuming this only happens in simple conditional structures. */
3098 Assert(pEntry->fElseExitTb != pEntry->fIfExitTb); /* Assuming we don't have any code where both branches exits. */
3099 PCIEMNATIVECORESTATE const pExitCoreState = pEntry->fIfExitTb && pEntry->fInElse
3100 ? &pEntry->IfFinalState : &pReNative->Core;
3101# ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3102 Assert(pExitCoreState->bmGstRegShadowDirty == 0);
3103# endif
3104# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3105 Assert(pExitCoreState->offPc == 0);
3106# endif
3107 RT_NOREF(pExitCoreState);
3108#endif
3109
3110 if (!pEntry->fIfExitTb)
3111 {
3112 Assert(pEntry->fInElse);
3113 pReNative->Core = pEntry->IfFinalState;
3114 }
3115 }
3116 else
3117 {
3118 /*
3119 * Now we have find common group with the core state at the end of the
3120 * if-final. Use the smallest common denominator and just drop anything
3121 * that isn't the same in both states.
3122 */
3123 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
3124 * which is why we're doing this at the end of the else-block.
3125 * But we'd need more info about future for that to be worth the effort. */
3126 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
3127#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3128 AssertMsgStmt(pReNative->Core.offPc == pOther->offPc,
3129 ("Core.offPc=%#RX64 pOther->offPc=%#RX64\n", pReNative->Core.offPc, pOther->offPc),
3130 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
3131#endif
3132
3133 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
3134 {
3135#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3136 /*
3137 * If the branch has differences in dirty shadow registers, we will flush
3138 * the register only dirty in the current branch and dirty any that's only
3139 * dirty in the other one.
3140 */
3141 uint64_t const fGstRegDirtyOther = pOther->bmGstRegShadowDirty;
3142 uint64_t const fGstRegDirtyThis = pReNative->Core.bmGstRegShadowDirty;
3143 uint64_t const fGstRegDirtyDiff = fGstRegDirtyOther ^ fGstRegDirtyThis;
3144 uint64_t const fGstRegDirtyHead = fGstRegDirtyThis & fGstRegDirtyDiff;
3145 uint64_t fGstRegDirtyTail = fGstRegDirtyOther & fGstRegDirtyDiff;
3146 if (!fGstRegDirtyDiff)
3147 { /* likely */ }
3148 else
3149 {
3150 //uint64_t const fGstRegDirtyHead = pReNative->Core.bmGstRegShadowDirty & fGstRegDirtyDiff;
3151 if (fGstRegDirtyHead)
3152 {
3153 Log12(("iemNativeEmitEndIf: flushing dirty guest registers in current branch: %RX64\n", fGstRegDirtyHead));
3154 off = iemNativeRegFlushDirtyGuest(pReNative, off, fGstRegDirtyHead);
3155 }
3156 }
3157#endif
3158
3159 /*
3160 * Shadowed guest registers.
3161 *
3162 * We drop any shadows where the two states disagree about where
3163 * things are kept. We may end up flushing dirty more registers
3164 * here, if the two branches keeps things in different registers.
3165 */
3166 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
3167 if (fGstRegs)
3168 {
3169 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
3170 do
3171 {
3172 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
3173 fGstRegs &= ~RT_BIT_64(idxGstReg);
3174
3175 uint8_t const idxCurHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
3176 uint8_t const idxOtherHstReg = pOther->aidxGstRegShadows[idxGstReg];
3177 if ( idxCurHstReg != idxOtherHstReg
3178 || !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg)))
3179 {
3180#ifndef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3181 Log12(("iemNativeEmitEndIf: dropping gst %s (%d) from hst %s (other %d/%#RX64)\n",
3182 g_aGstShadowInfo[idxGstReg].pszName, idxGstReg, g_apszIemNativeHstRegNames[idxCurHstReg],
3183 idxOtherHstReg, pOther->bmGstRegShadows));
3184#else
3185 Log12(("iemNativeEmitEndIf: dropping %s gst %s (%d) from hst %s (other %d/%#RX64/%s)\n",
3186 pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg) ? "_dirty_" : "clean",
3187 g_aGstShadowInfo[idxGstReg].pszName, idxGstReg, g_apszIemNativeHstRegNames[idxCurHstReg],
3188 idxOtherHstReg, pOther->bmGstRegShadows,
3189 pOther->bmGstRegShadowDirty & RT_BIT_64(idxGstReg) ? "dirty" : "clean"));
3190 if (pOther->bmGstRegShadowDirty & RT_BIT_64(idxGstReg))
3191 fGstRegDirtyTail |= RT_BIT_64(idxGstReg);
3192 if (pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg))
3193 off = iemNativeRegFlushPendingWrite(pReNative, off, (IEMNATIVEGSTREG)idxGstReg);
3194#endif
3195 iemNativeRegClearGstRegShadowingOne(pReNative, idxCurHstReg, (IEMNATIVEGSTREG)idxGstReg, off);
3196 }
3197 } while (fGstRegs);
3198 }
3199 else
3200 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
3201
3202#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3203 /*
3204 * Generate jumpy code for flushing dirty registers from the other
3205 * branch that aren't dirty in the current one.
3206 */
3207 if (!fGstRegDirtyTail)
3208 { /* likely */ }
3209 else
3210 {
3211 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeEndIfOtherBranchDirty);
3212 Log12(("iemNativeEmitEndIf: Dirty register only in the other branch: %#RX64 - BAD!\n", fGstRegDirtyTail));
3213
3214 /* First the current branch has to jump over the dirty flushing from the other branch. */
3215 uint32_t const offFixup1 = off;
3216 off = iemNativeEmitJmpToFixed(pReNative, off, off + 10);
3217
3218 /* Put the endif and maybe else label here so the other branch ends up here. */
3219 if (!pEntry->fInElse)
3220 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
3221 else
3222 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
3223 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
3224 fDefinedLabels = true;
3225
3226 /* Flush the dirty guest registers from the other branch. */
3227 while (fGstRegDirtyTail)
3228 {
3229 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegDirtyTail) - 1;
3230 fGstRegDirtyTail &= ~RT_BIT_64(idxGstReg);
3231 Log12(("iemNativeEmitEndIf: tail flushing %s (%d) from other branch %d (cur %d/%#RX64)\n",
3232 g_aGstShadowInfo[idxGstReg].pszName, idxGstReg, pOther->aidxGstRegShadows[idxGstReg],
3233 pReNative->Core.aidxGstRegShadows[idxGstReg], pReNative->Core.bmGstRegShadows));
3234
3235 off = iemNativeRegFlushPendingWriteEx(pReNative, off, (PIEMNATIVECORESTATE)pOther, (IEMNATIVEGSTREG)idxGstReg);
3236
3237 /* Mismatching shadowing should've been dropped in the previous step already. */
3238 Assert( !(pReNative->Core.bmGstRegShadows & RT_BIT_64(idxGstReg))
3239 || pReNative->Core.aidxGstRegShadows[idxGstReg] == pOther->aidxGstRegShadows[idxGstReg]);
3240 }
3241
3242 /* Here is the actual endif label, fixup the above jump to land here. */
3243 iemNativeFixupFixedJump(pReNative, offFixup1, off);
3244 }
3245#endif
3246
3247 /*
3248 * Check variables next. For now we must require them to be identical
3249 * or stuff we can recreate. (No code is emitted here.)
3250 */
3251 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
3252#ifdef VBOX_STRICT
3253 uint32_t const offAssert = off;
3254#endif
3255 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
3256 if (fVars)
3257 {
3258 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
3259 do
3260 {
3261 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
3262 fVars &= ~RT_BIT_32(idxVar);
3263
3264 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
3265 {
3266 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
3267 continue;
3268 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
3269 {
3270 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
3271 if (idxHstReg != UINT8_MAX)
3272 {
3273 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3274 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3275 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x\n",
3276 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
3277 }
3278 continue;
3279 }
3280 }
3281 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
3282 continue;
3283
3284 /* Irreconcilable, so drop it. */
3285 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
3286 if (idxHstReg != UINT8_MAX)
3287 {
3288 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3289 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3290 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x (also dropped)\n",
3291 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
3292 }
3293 Log11(("iemNativeEmitEndIf: Freeing variable #%u/%#x\n", idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
3294 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
3295 } while (fVars);
3296 }
3297 Assert(off == offAssert);
3298
3299 /*
3300 * Finally, check that the host register allocations matches.
3301 */
3302 AssertMsgStmt((pReNative->Core.bmHstRegs & (pReNative->Core.bmHstRegs ^ pOther->bmHstRegs)) == 0,
3303 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
3304 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
3305 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
3306 }
3307 }
3308
3309 /*
3310 * Define the endif label and maybe the else one if we're still in the 'if' part.
3311 */
3312 if (!fDefinedLabels)
3313 {
3314 if (!pEntry->fInElse)
3315 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
3316 else
3317 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
3318 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
3319 }
3320
3321 /* Pop the conditional stack.*/
3322 pReNative->cCondDepth -= 1;
3323
3324 return off;
3325}
3326
3327
3328#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
3329 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits)); \
3330 do {
3331
3332/** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
3333DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
3334{
3335 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitsInEfl);
3336 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3337
3338 /* Get the eflags. */
3339 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
3340 kIemNativeGstRegUse_ReadOnly);
3341
3342 /* Test and jump. */
3343 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
3344
3345 /* Free but don't flush the EFlags register. */
3346 iemNativeRegFreeTmp(pReNative, idxEflReg);
3347
3348 /* Make a copy of the core state now as we start the if-block. */
3349 iemNativeCondStartIfBlock(pReNative, off);
3350
3351 return off;
3352}
3353
3354
3355#define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
3356 off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits)); \
3357 do {
3358
3359/** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
3360DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
3361{
3362 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitsInEfl);
3363 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3364
3365 /* Get the eflags. */
3366 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
3367 kIemNativeGstRegUse_ReadOnly);
3368
3369 /* Test and jump. */
3370 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
3371
3372 /* Free but don't flush the EFlags register. */
3373 iemNativeRegFreeTmp(pReNative, idxEflReg);
3374
3375 /* Make a copy of the core state now as we start the if-block. */
3376 iemNativeCondStartIfBlock(pReNative, off);
3377
3378 return off;
3379}
3380
3381
3382#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
3383 off = iemNativeEmitIfEflagsBitSet(pReNative, off, (a_fBit)); \
3384 do {
3385
3386/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
3387DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
3388{
3389 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
3390 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3391
3392 /* Get the eflags. */
3393 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
3394 kIemNativeGstRegUse_ReadOnly);
3395
3396 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
3397 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
3398
3399 /* Test and jump. */
3400 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
3401
3402 /* Free but don't flush the EFlags register. */
3403 iemNativeRegFreeTmp(pReNative, idxEflReg);
3404
3405 /* Make a copy of the core state now as we start the if-block. */
3406 iemNativeCondStartIfBlock(pReNative, off);
3407
3408 return off;
3409}
3410
3411
3412#define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
3413 off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, (a_fBit)); \
3414 do {
3415
3416/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
3417DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
3418{
3419 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
3420 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3421
3422 /* Get the eflags. */
3423 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
3424 kIemNativeGstRegUse_ReadOnly);
3425
3426 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
3427 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
3428
3429 /* Test and jump. */
3430 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
3431
3432 /* Free but don't flush the EFlags register. */
3433 iemNativeRegFreeTmp(pReNative, idxEflReg);
3434
3435 /* Make a copy of the core state now as we start the if-block. */
3436 iemNativeCondStartIfBlock(pReNative, off);
3437
3438 return off;
3439}
3440
3441
3442#define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2) \
3443 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, false /*fInverted*/); \
3444 do {
3445
3446#define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2) \
3447 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, true /*fInverted*/); \
3448 do {
3449
3450/** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
3451DECL_INLINE_THROW(uint32_t)
3452iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3453 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
3454{
3455 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBit1InEfl | fBit2InEfl);
3456 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3457
3458 /* Get the eflags. */
3459 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
3460 kIemNativeGstRegUse_ReadOnly);
3461
3462 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
3463 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
3464
3465 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
3466 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
3467 Assert(iBitNo1 != iBitNo2);
3468
3469#ifdef RT_ARCH_AMD64
3470 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl);
3471
3472 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
3473 if (iBitNo1 > iBitNo2)
3474 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
3475 else
3476 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
3477 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
3478
3479#elif defined(RT_ARCH_ARM64)
3480 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3481 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
3482
3483 /* and tmpreg, eflreg, #1<<iBitNo1 */
3484 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
3485
3486 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
3487 if (iBitNo1 > iBitNo2)
3488 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
3489 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
3490 else
3491 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
3492 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
3493
3494 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3495
3496#else
3497# error "Port me"
3498#endif
3499
3500 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
3501 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
3502 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
3503
3504 /* Free but don't flush the EFlags and tmp registers. */
3505 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3506 iemNativeRegFreeTmp(pReNative, idxEflReg);
3507
3508 /* Make a copy of the core state now as we start the if-block. */
3509 iemNativeCondStartIfBlock(pReNative, off);
3510
3511 return off;
3512}
3513
3514
3515#define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
3516 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, false /*fInverted*/); \
3517 do {
3518
3519#define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
3520 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, true /*fInverted*/); \
3521 do {
3522
3523/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
3524 * IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
3525DECL_INLINE_THROW(uint32_t)
3526iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl,
3527 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
3528{
3529 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl | fBit1InEfl | fBit2InEfl);
3530 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3531
3532 /* We need an if-block label for the non-inverted variant. */
3533 uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
3534 pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
3535
3536 /* Get the eflags. */
3537 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
3538 kIemNativeGstRegUse_ReadOnly);
3539
3540 /* Translate the flag masks to bit numbers. */
3541 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
3542 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
3543
3544 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
3545 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
3546 Assert(iBitNo1 != iBitNo);
3547
3548 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
3549 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
3550 Assert(iBitNo2 != iBitNo);
3551 Assert(iBitNo2 != iBitNo1);
3552
3553#ifdef RT_ARCH_AMD64
3554 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl); /* This must come before we jump anywhere! */
3555#elif defined(RT_ARCH_ARM64)
3556 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3557#endif
3558
3559 /* Check for the lone bit first. */
3560 if (!fInverted)
3561 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
3562 else
3563 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
3564
3565 /* Then extract and compare the other two bits. */
3566#ifdef RT_ARCH_AMD64
3567 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
3568 if (iBitNo1 > iBitNo2)
3569 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
3570 else
3571 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
3572 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
3573
3574#elif defined(RT_ARCH_ARM64)
3575 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
3576
3577 /* and tmpreg, eflreg, #1<<iBitNo1 */
3578 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
3579
3580 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
3581 if (iBitNo1 > iBitNo2)
3582 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
3583 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
3584 else
3585 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
3586 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
3587
3588 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3589
3590#else
3591# error "Port me"
3592#endif
3593
3594 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
3595 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
3596 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
3597
3598 /* Free but don't flush the EFlags and tmp registers. */
3599 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3600 iemNativeRegFreeTmp(pReNative, idxEflReg);
3601
3602 /* Make a copy of the core state now as we start the if-block. */
3603 iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
3604
3605 return off;
3606}
3607
3608
3609#define IEM_MC_IF_CX_IS_NZ() \
3610 off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
3611 do {
3612
3613/** Emits code for IEM_MC_IF_CX_IS_NZ. */
3614DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3615{
3616 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3617
3618 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3619 kIemNativeGstRegUse_ReadOnly);
3620 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
3621 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3622
3623 iemNativeCondStartIfBlock(pReNative, off);
3624 return off;
3625}
3626
3627
3628#define IEM_MC_IF_ECX_IS_NZ() \
3629 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
3630 do {
3631
3632#define IEM_MC_IF_RCX_IS_NZ() \
3633 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
3634 do {
3635
3636/** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
3637DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
3638{
3639 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3640
3641 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3642 kIemNativeGstRegUse_ReadOnly);
3643 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
3644 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3645
3646 iemNativeCondStartIfBlock(pReNative, off);
3647 return off;
3648}
3649
3650
3651#define IEM_MC_IF_CX_IS_NOT_ONE() \
3652 off = iemNativeEmitIfCxIsNotOne(pReNative, off); \
3653 do {
3654
3655/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE. */
3656DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3657{
3658 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3659
3660 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3661 kIemNativeGstRegUse_ReadOnly);
3662#ifdef RT_ARCH_AMD64
3663 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3664#else
3665 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3666 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
3667 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3668#endif
3669 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3670
3671 iemNativeCondStartIfBlock(pReNative, off);
3672 return off;
3673}
3674
3675
3676#define IEM_MC_IF_ECX_IS_NOT_ONE() \
3677 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, false /*f64Bit*/); \
3678 do {
3679
3680#define IEM_MC_IF_RCX_IS_NOT_ONE() \
3681 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, true /*f64Bit*/); \
3682 do {
3683
3684/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE and IEM_MC_IF_RCX_IS_NOT_ONE. */
3685DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
3686{
3687 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3688
3689 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3690 kIemNativeGstRegUse_ReadOnly);
3691 if (f64Bit)
3692 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3693 else
3694 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3695 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3696
3697 iemNativeCondStartIfBlock(pReNative, off);
3698 return off;
3699}
3700
3701
3702#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
3703 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/); \
3704 do {
3705
3706#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3707 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/); \
3708 do {
3709
3710/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET and
3711 * IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
3712DECL_INLINE_THROW(uint32_t)
3713iemNativeEmitIfCxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl, bool fCheckIfSet)
3714{
3715 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
3716 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3717
3718 /* We have to load both RCX and EFLAGS before we can start branching,
3719 otherwise we'll end up in the else-block with an inconsistent
3720 register allocator state.
3721 Doing EFLAGS first as it's more likely to be loaded, right? */
3722 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
3723 kIemNativeGstRegUse_ReadOnly);
3724 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3725 kIemNativeGstRegUse_ReadOnly);
3726
3727 /** @todo we could reduce this to a single branch instruction by spending a
3728 * temporary register and some setnz stuff. Not sure if loops are
3729 * worth it. */
3730 /* Check CX. */
3731#ifdef RT_ARCH_AMD64
3732 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3733#else
3734 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3735 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
3736 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3737#endif
3738
3739 /* Check the EFlags bit. */
3740 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
3741 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
3742 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
3743 !fCheckIfSet /*fJmpIfSet*/);
3744
3745 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3746 iemNativeRegFreeTmp(pReNative, idxEflReg);
3747
3748 iemNativeCondStartIfBlock(pReNative, off);
3749 return off;
3750}
3751
3752
3753#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
3754 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, false /*f64Bit*/); \
3755 do {
3756
3757#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3758 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, false /*f64Bit*/); \
3759 do {
3760
3761#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
3762 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, true /*f64Bit*/); \
3763 do {
3764
3765#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3766 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, true /*f64Bit*/); \
3767 do {
3768
3769/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET,
3770 * IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET,
3771 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET and
3772 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
3773DECL_INLINE_THROW(uint32_t)
3774iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3775 uint32_t fBitInEfl, bool fCheckIfSet, bool f64Bit)
3776{
3777 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
3778 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3779
3780 /* We have to load both RCX and EFLAGS before we can start branching,
3781 otherwise we'll end up in the else-block with an inconsistent
3782 register allocator state.
3783 Doing EFLAGS first as it's more likely to be loaded, right? */
3784 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
3785 kIemNativeGstRegUse_ReadOnly);
3786 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3787 kIemNativeGstRegUse_ReadOnly);
3788
3789 /** @todo we could reduce this to a single branch instruction by spending a
3790 * temporary register and some setnz stuff. Not sure if loops are
3791 * worth it. */
3792 /* Check RCX/ECX. */
3793 if (f64Bit)
3794 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3795 else
3796 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3797
3798 /* Check the EFlags bit. */
3799 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
3800 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
3801 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
3802 !fCheckIfSet /*fJmpIfSet*/);
3803
3804 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3805 iemNativeRegFreeTmp(pReNative, idxEflReg);
3806
3807 iemNativeCondStartIfBlock(pReNative, off);
3808 return off;
3809}
3810
3811
3812#define IEM_MC_IF_LOCAL_IS_Z(a_Local) \
3813 off = iemNativeEmitIfLocalIsZ(pReNative, off, a_Local); \
3814 do {
3815
3816/** Emits code for IEM_MC_IF_LOCAL_IS_Z. */
3817DECL_INLINE_THROW(uint32_t)
3818iemNativeEmitIfLocalIsZ(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarLocal)
3819{
3820 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3821
3822 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarLocal);
3823 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarLocal)];
3824 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
3825 AssertStmt(pVarRc->cbVar == sizeof(int32_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
3826
3827 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarLocal, &off);
3828
3829 off = iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(pReNative, off, idxReg, false /*f64Bit*/, pEntry->idxLabelElse);
3830
3831 iemNativeVarRegisterRelease(pReNative, idxVarLocal);
3832
3833 iemNativeCondStartIfBlock(pReNative, off);
3834 return off;
3835}
3836
3837
3838#define IEM_MC_IF_GREG_BIT_SET(a_iGReg, a_iBitNo) \
3839 off = iemNativeEmitIfGregBitSet(pReNative, off, a_iGReg, a_iBitNo); \
3840 do {
3841
3842/** Emits code for IEM_MC_IF_GREG_BIT_SET. */
3843DECL_INLINE_THROW(uint32_t)
3844iemNativeEmitIfGregBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t iBitNo)
3845{
3846 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3847 Assert(iGReg < 16);
3848
3849 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3850 kIemNativeGstRegUse_ReadOnly);
3851
3852 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxGstFullReg, iBitNo, pEntry->idxLabelElse);
3853
3854 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3855
3856 iemNativeCondStartIfBlock(pReNative, off);
3857 return off;
3858}
3859
3860
3861
3862/*********************************************************************************************************************************
3863* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
3864*********************************************************************************************************************************/
3865
3866#define IEM_MC_NOREF(a_Name) \
3867 RT_NOREF_PV(a_Name)
3868
3869#define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
3870 uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
3871
3872#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
3873 uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
3874
3875#define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_Local, a_iArg) \
3876 uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
3877
3878#define IEM_MC_LOCAL(a_Type, a_Name) \
3879 uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
3880
3881#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
3882 uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
3883
3884#define IEM_MC_LOCAL_ASSIGN(a_Type, a_Name, a_Value) \
3885 uint8_t const a_Name = iemNativeVarAllocAssign(pReNative, &off, sizeof(a_Type), (a_Value))
3886
3887
3888/**
3889 * Sets the host register for @a idxVarRc to @a idxReg.
3890 *
3891 * The register must not be allocated. Any guest register shadowing will be
3892 * implictly dropped by this call.
3893 *
3894 * The variable must not have any register associated with it (causes
3895 * VERR_IEM_VAR_IPE_10 to be raised). Conversion to a stack variable is
3896 * implied.
3897 *
3898 * @returns idxReg
3899 * @param pReNative The recompiler state.
3900 * @param idxVar The variable.
3901 * @param idxReg The host register (typically IEMNATIVE_CALL_RET_GREG).
3902 * @param off For recording in debug info.
3903 *
3904 * @throws VERR_IEM_VAR_IPE_10, VERR_IEM_VAR_IPE_11
3905 */
3906DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off)
3907{
3908 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3909 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3910 Assert(!pVar->fRegAcquired);
3911 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3912 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_10));
3913 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_11));
3914
3915 iemNativeRegClearGstRegShadowing(pReNative, idxReg, off);
3916 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
3917
3918 iemNativeVarSetKindToStack(pReNative, idxVar);
3919 pVar->idxReg = idxReg;
3920
3921 return idxReg;
3922}
3923
3924
3925/**
3926 * A convenient helper function.
3927 */
3928DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSetAndAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
3929 uint8_t idxReg, uint32_t *poff)
3930{
3931 idxReg = iemNativeVarRegisterSet(pReNative, idxVar, idxReg, *poff);
3932 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fRegAcquired = true;
3933 return idxReg;
3934}
3935
3936
3937/**
3938 * This is called by IEM_MC_END() to clean up all variables.
3939 */
3940DECL_FORCE_INLINE(void) iemNativeVarFreeAll(PIEMRECOMPILERSTATE pReNative)
3941{
3942 uint32_t const bmVars = pReNative->Core.bmVars;
3943 if (bmVars != 0)
3944 iemNativeVarFreeAllSlow(pReNative, bmVars);
3945 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
3946 Assert(pReNative->Core.bmStack == 0);
3947}
3948
3949
3950#define IEM_MC_FREE_LOCAL(a_Name) iemNativeVarFreeLocal(pReNative, a_Name)
3951
3952/**
3953 * This is called by IEM_MC_FREE_LOCAL.
3954 */
3955DECLINLINE(void) iemNativeVarFreeLocal(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
3956{
3957 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3958 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo == UINT8_MAX);
3959 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
3960}
3961
3962
3963#define IEM_MC_FREE_ARG(a_Name) iemNativeVarFreeArg(pReNative, a_Name)
3964
3965/**
3966 * This is called by IEM_MC_FREE_ARG.
3967 */
3968DECLINLINE(void) iemNativeVarFreeArg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
3969{
3970 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3971 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars));
3972 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
3973}
3974
3975
3976#define IEM_MC_ASSIGN_TO_SMALLER(a_VarDst, a_VarSrcEol) off = iemNativeVarAssignToSmaller(pReNative, off, a_VarDst, a_VarSrcEol)
3977
3978/**
3979 * This is called by IEM_MC_ASSIGN_TO_SMALLER.
3980 */
3981DECL_INLINE_THROW(uint32_t)
3982iemNativeVarAssignToSmaller(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarDst, uint8_t idxVarSrc)
3983{
3984 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarDst);
3985 PIEMNATIVEVAR const pVarDst = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarDst)];
3986 AssertStmt(pVarDst->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
3987 Assert( pVarDst->cbVar == sizeof(uint16_t)
3988 || pVarDst->cbVar == sizeof(uint32_t));
3989
3990 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarSrc);
3991 PIEMNATIVEVAR const pVarSrc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarSrc)];
3992 AssertStmt( pVarSrc->enmKind == kIemNativeVarKind_Stack
3993 || pVarSrc->enmKind == kIemNativeVarKind_Immediate,
3994 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
3995
3996 Assert(pVarDst->cbVar < pVarSrc->cbVar);
3997
3998 /*
3999 * Special case for immediates.
4000 */
4001 if (pVarSrc->enmKind == kIemNativeVarKind_Immediate)
4002 {
4003 switch (pVarDst->cbVar)
4004 {
4005 case sizeof(uint16_t):
4006 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint16_t)pVarSrc->u.uValue);
4007 break;
4008 case sizeof(uint32_t):
4009 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint32_t)pVarSrc->u.uValue);
4010 break;
4011 default: AssertFailed(); break;
4012 }
4013 }
4014 else
4015 {
4016 /*
4017 * The generic solution for now.
4018 */
4019 /** @todo optimize this by having the python script make sure the source
4020 * variable passed to IEM_MC_ASSIGN_TO_SMALLER is not used after the
4021 * statement. Then we could just transfer the register assignments. */
4022 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off);
4023 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off);
4024 switch (pVarDst->cbVar)
4025 {
4026 case sizeof(uint16_t):
4027 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegDst, idxRegSrc);
4028 break;
4029 case sizeof(uint32_t):
4030 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegDst, idxRegSrc);
4031 break;
4032 default: AssertFailed(); break;
4033 }
4034 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
4035 iemNativeVarRegisterRelease(pReNative, idxVarDst);
4036 }
4037 return off;
4038}
4039
4040
4041
4042/*********************************************************************************************************************************
4043* Emitters for IEM_MC_CALL_CIMPL_XXX *
4044*********************************************************************************************************************************/
4045
4046/** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
4047DECL_INLINE_THROW(uint32_t)
4048iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
4049 uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cArgs)
4050
4051{
4052 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
4053
4054#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4055 /* Clear the appropriate IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_XXX flags
4056 when a calls clobber any of the relevant control registers. */
4057# if 1
4058 if (!(fGstShwFlush & (RT_BIT_64(kIemNativeGstReg_Cr0) | RT_BIT_64(kIemNativeGstReg_Cr4) | RT_BIT_64(kIemNativeGstReg_Xcr0))))
4059 {
4060 /* Likely as long as call+ret are done via cimpl. */
4061 Assert( /*pfnCImpl != (uintptr_t)iemCImpl_mov_Cd_Rd && pfnCImpl != (uintptr_t)iemCImpl_xsetbv
4062 &&*/ pfnCImpl != (uintptr_t)iemCImpl_lmsw && pfnCImpl != (uintptr_t)iemCImpl_clts);
4063 }
4064 else if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_Xcr0))
4065 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
4066 else if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_Cr4))
4067 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
4068 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE);
4069 else
4070 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
4071 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
4072 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
4073
4074# else
4075 if (pfnCImpl == (uintptr_t)iemCImpl_xsetbv) /* Modifies xcr0 which only the AVX check uses. */
4076 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
4077 else if (pfnCImpl == (uintptr_t)iemCImpl_mov_Cd_Rd) /* Can modify cr4 which all checks use. */
4078 pReNative->fSimdRaiseXcptChecksEmitted = 0;
4079 else if ( pfnCImpl == (uintptr_t)iemCImpl_FarJmp
4080 || pfnCImpl == (uintptr_t)iemCImpl_callf
4081 || pfnCImpl == (uintptr_t)iemCImpl_lmsw
4082 || pfnCImpl == (uintptr_t)iemCImpl_clts) /* Will only modify cr0 */
4083 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
4084 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
4085 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
4086# endif
4087
4088# ifdef IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS
4089 /* Mark the host floating point control register as not synced if MXCSR is modified. */
4090 if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_MxCsr))
4091 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SYNCED;
4092# endif
4093#endif
4094
4095 /*
4096 * Do all the call setup and cleanup.
4097 */
4098 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
4099
4100 /*
4101 * Load the two or three hidden arguments.
4102 */
4103#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
4104 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
4105 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
4106 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
4107#else
4108 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
4109 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
4110#endif
4111
4112 /*
4113 * Make the call and check the return code.
4114 *
4115 * Shadow PC copies are always flushed here, other stuff depends on flags.
4116 * Segment and general purpose registers are explictily flushed via the
4117 * IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG and IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG
4118 * macros.
4119 */
4120 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
4121#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
4122 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
4123#endif
4124 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, fGstShwFlush | RT_BIT_64(kIemNativeGstReg_Pc));
4125 if (!(pReNative->fMc & IEM_MC_F_WITHOUT_FLAGS)) /** @todo We don't emit with-flags/without-flags variations for CIMPL calls. */
4126 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
4127 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
4128
4129#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
4130 pReNative->Core.fDebugPcInitialized = false;
4131 Log4(("fDebugPcInitialized=false cimpl off=%#x (v1)\n", off));
4132#endif
4133
4134 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
4135}
4136
4137
4138#define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
4139 off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0)
4140
4141/** Emits code for IEM_MC_CALL_CIMPL_1. */
4142DECL_INLINE_THROW(uint32_t)
4143iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
4144 uintptr_t pfnCImpl, uint8_t idxArg0)
4145{
4146 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
4147 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 1);
4148}
4149
4150
4151#define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
4152 off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0, a1)
4153
4154/** Emits code for IEM_MC_CALL_CIMPL_2. */
4155DECL_INLINE_THROW(uint32_t)
4156iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
4157 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
4158{
4159 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
4160 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
4161 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 2);
4162}
4163
4164
4165#define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
4166 off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
4167 (uintptr_t)a_pfnCImpl, a0, a1, a2)
4168
4169/** Emits code for IEM_MC_CALL_CIMPL_3. */
4170DECL_INLINE_THROW(uint32_t)
4171iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
4172 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
4173{
4174 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
4175 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
4176 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
4177 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 3);
4178}
4179
4180
4181#define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3) \
4182 off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
4183 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
4184
4185/** Emits code for IEM_MC_CALL_CIMPL_4. */
4186DECL_INLINE_THROW(uint32_t)
4187iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
4188 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
4189{
4190 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
4191 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
4192 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
4193 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
4194 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 4);
4195}
4196
4197
4198#define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3, a4) \
4199 off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
4200 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
4201
4202/** Emits code for IEM_MC_CALL_CIMPL_4. */
4203DECL_INLINE_THROW(uint32_t)
4204iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
4205 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
4206{
4207 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
4208 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
4209 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
4210 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
4211 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg4, 4 + IEM_CIMPL_HIDDEN_ARGS);
4212 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 5);
4213}
4214
4215
4216/** Recompiler debugging: Flush guest register shadow copies. */
4217#define IEM_MC_HINT_FLUSH_GUEST_SHADOW(g_fGstShwFlush) iemNativeRegFlushGuestShadows(pReNative, g_fGstShwFlush)
4218
4219
4220
4221/*********************************************************************************************************************************
4222* Emitters for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX *
4223*********************************************************************************************************************************/
4224
4225/**
4226 * Common worker for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX.
4227 */
4228DECL_INLINE_THROW(uint32_t)
4229iemNativeEmitCallAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
4230 uintptr_t pfnAImpl, uint8_t cArgs)
4231{
4232 if (idxVarRc != UINT8_MAX)
4233 {
4234 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRc);
4235 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarRc)];
4236 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
4237 AssertStmt(pVarRc->cbVar <= sizeof(uint64_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
4238 }
4239
4240 /*
4241 * Do all the call setup and cleanup.
4242 *
4243 * It is only required to flush pending guest register writes in call volatile registers as
4244 * assembly helpers can't throw and don't access anything living in CPUMCTX, they only
4245 * access parameters. The flushing of call volatile registers is always done in iemNativeEmitCallCommon()
4246 * no matter the fFlushPendingWrites parameter.
4247 */
4248 off = iemNativeEmitCallCommon(pReNative, off, cArgs, 0 /*cHiddenArgs*/, false /*fFlushPendingWrites*/);
4249
4250 /*
4251 * Make the call and update the return code variable if we've got one.
4252 */
4253 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
4254 if (idxVarRc != UINT8_MAX)
4255 iemNativeVarRegisterSet(pReNative, idxVarRc, IEMNATIVE_CALL_RET_GREG, off);
4256
4257 return off;
4258}
4259
4260
4261
4262#define IEM_MC_CALL_VOID_AIMPL_0(a_pfn) \
4263 off = iemNativeEmitCallAImpl0(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn))
4264
4265#define IEM_MC_CALL_AIMPL_0(a_rc, a_pfn) \
4266 off = iemNativeEmitCallAImpl0(pReNative, off, a_rc, (uintptr_t)(a_pfn))
4267
4268/** Emits code for IEM_MC_CALL_VOID_AIMPL_0 and IEM_MC_CALL_AIMPL_0. */
4269DECL_INLINE_THROW(uint32_t)
4270iemNativeEmitCallAImpl0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl)
4271{
4272 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 0);
4273}
4274
4275
4276#define IEM_MC_CALL_VOID_AIMPL_1(a_pfn, a0) \
4277 off = iemNativeEmitCallAImpl1(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0)
4278
4279#define IEM_MC_CALL_AIMPL_1(a_rc, a_pfn, a0) \
4280 off = iemNativeEmitCallAImpl1(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0)
4281
4282/** Emits code for IEM_MC_CALL_VOID_AIMPL_1 and IEM_MC_CALL_AIMPL_1. */
4283DECL_INLINE_THROW(uint32_t)
4284iemNativeEmitCallAImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl, uint8_t idxArg0)
4285{
4286 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
4287 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 1);
4288}
4289
4290
4291#define IEM_MC_CALL_VOID_AIMPL_2(a_pfn, a0, a1) \
4292 off = iemNativeEmitCallAImpl2(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1)
4293
4294#define IEM_MC_CALL_AIMPL_2(a_rc, a_pfn, a0, a1) \
4295 off = iemNativeEmitCallAImpl2(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1)
4296
4297/** Emits code for IEM_MC_CALL_VOID_AIMPL_2 and IEM_MC_CALL_AIMPL_2. */
4298DECL_INLINE_THROW(uint32_t)
4299iemNativeEmitCallAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
4300 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
4301{
4302 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
4303 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
4304 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 2);
4305}
4306
4307
4308#define IEM_MC_CALL_VOID_AIMPL_3(a_pfn, a0, a1, a2) \
4309 off = iemNativeEmitCallAImpl3(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2)
4310
4311#define IEM_MC_CALL_AIMPL_3(a_rcType, a_rc, a_pfn, a0, a1, a2) \
4312 IEM_MC_LOCAL(a_rcType, a_rc); \
4313 off = iemNativeEmitCallAImpl3(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2)
4314
4315/** Emits code for IEM_MC_CALL_VOID_AIMPL_3 and IEM_MC_CALL_AIMPL_3. */
4316DECL_INLINE_THROW(uint32_t)
4317iemNativeEmitCallAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
4318 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
4319{
4320 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
4321 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
4322 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
4323 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 3);
4324}
4325
4326
4327#define IEM_MC_CALL_VOID_AIMPL_4(a_pfn, a0, a1, a2, a3) \
4328 off = iemNativeEmitCallAImpl4(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2, a3)
4329
4330#define IEM_MC_CALL_AIMPL_4(a_rcType, a_rc, a_pfn, a0, a1, a2, a3) \
4331 IEM_MC_LOCAL(a_rcType, a_rc); \
4332 off = iemNativeEmitCallAImpl4(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2, a3)
4333
4334/** Emits code for IEM_MC_CALL_VOID_AIMPL_4 and IEM_MC_CALL_AIMPL_4. */
4335DECL_INLINE_THROW(uint32_t)
4336iemNativeEmitCallAImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
4337 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
4338{
4339 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
4340 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
4341 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
4342 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3);
4343 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 4);
4344}
4345
4346
4347
4348/*********************************************************************************************************************************
4349* Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX). *
4350*********************************************************************************************************************************/
4351
4352#define IEM_MC_FETCH_GREG_U8_THREADED(a_u8Dst, a_iGRegEx) \
4353 off = iemNativeEmitFetchGregU8(pReNative, off, a_u8Dst, a_iGRegEx, sizeof(uint8_t) /*cbZeroExtended*/)
4354
4355#define IEM_MC_FETCH_GREG_U8_ZX_U16_THREADED(a_u16Dst, a_iGRegEx) \
4356 off = iemNativeEmitFetchGregU8(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t) /*cbZeroExtended*/)
4357
4358#define IEM_MC_FETCH_GREG_U8_ZX_U32_THREADED(a_u32Dst, a_iGRegEx) \
4359 off = iemNativeEmitFetchGregU8(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t) /*cbZeroExtended*/)
4360
4361#define IEM_MC_FETCH_GREG_U8_ZX_U64_THREADED(a_u64Dst, a_iGRegEx) \
4362 off = iemNativeEmitFetchGregU8(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t) /*cbZeroExtended*/)
4363
4364
4365/** Emits code for IEM_MC_FETCH_GREG_U8_THREADED and
4366 * IEM_MC_FETCH_GREG_U8_ZX_U16/32/64_THREADED. */
4367DECL_INLINE_THROW(uint32_t)
4368iemNativeEmitFetchGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, int8_t cbZeroExtended)
4369{
4370 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4371 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
4372 Assert(iGRegEx < 20);
4373
4374 /* Same discussion as in iemNativeEmitFetchGregU16 */
4375 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
4376 kIemNativeGstRegUse_ReadOnly);
4377
4378 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4379 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4380
4381 /* The value is zero-extended to the full 64-bit host register width. */
4382 if (iGRegEx < 16)
4383 off = iemNativeEmitLoadGprFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
4384 else
4385 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
4386
4387 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4388 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4389 return off;
4390}
4391
4392
4393#define IEM_MC_FETCH_GREG_U8_SX_U16_THREADED(a_u16Dst, a_iGRegEx) \
4394 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t))
4395
4396#define IEM_MC_FETCH_GREG_U8_SX_U32_THREADED(a_u32Dst, a_iGRegEx) \
4397 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t))
4398
4399#define IEM_MC_FETCH_GREG_U8_SX_U64_THREADED(a_u64Dst, a_iGRegEx) \
4400 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t))
4401
4402/** Emits code for IEM_MC_FETCH_GREG_U8_SX_U16/32/64_THREADED. */
4403DECL_INLINE_THROW(uint32_t)
4404iemNativeEmitFetchGregU8Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, uint8_t cbSignExtended)
4405{
4406 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4407 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
4408 Assert(iGRegEx < 20);
4409
4410 /* Same discussion as in iemNativeEmitFetchGregU16 */
4411 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
4412 kIemNativeGstRegUse_ReadOnly);
4413
4414 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4415 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4416
4417 if (iGRegEx < 16)
4418 {
4419 switch (cbSignExtended)
4420 {
4421 case sizeof(uint16_t):
4422 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
4423 break;
4424 case sizeof(uint32_t):
4425 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
4426 break;
4427 case sizeof(uint64_t):
4428 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
4429 break;
4430 default: AssertFailed(); break;
4431 }
4432 }
4433 else
4434 {
4435 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
4436 switch (cbSignExtended)
4437 {
4438 case sizeof(uint16_t):
4439 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
4440 break;
4441 case sizeof(uint32_t):
4442 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
4443 break;
4444 case sizeof(uint64_t):
4445 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
4446 break;
4447 default: AssertFailed(); break;
4448 }
4449 }
4450
4451 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4452 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4453 return off;
4454}
4455
4456
4457
4458#define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
4459 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint16_t))
4460
4461#define IEM_MC_FETCH_GREG_U16_ZX_U32(a_u16Dst, a_iGReg) \
4462 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
4463
4464#define IEM_MC_FETCH_GREG_U16_ZX_U64(a_u16Dst, a_iGReg) \
4465 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
4466
4467/** Emits code for IEM_MC_FETCH_GREG_U16 and IEM_MC_FETCH_GREG_U16_ZX_U32/64. */
4468DECL_INLINE_THROW(uint32_t)
4469iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
4470{
4471 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4472 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
4473 Assert(iGReg < 16);
4474
4475 /*
4476 * We can either just load the low 16-bit of the GPR into a host register
4477 * for the variable, or we can do so via a shadow copy host register. The
4478 * latter will avoid having to reload it if it's being stored later, but
4479 * will waste a host register if it isn't touched again. Since we don't
4480 * know what going to happen, we choose the latter for now.
4481 */
4482 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4483 kIemNativeGstRegUse_ReadOnly);
4484
4485 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4486 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4487 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4488 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4489
4490 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4491 return off;
4492}
4493
4494#define IEM_MC_FETCH_GREG_I16(a_i16Dst, a_iGReg) \
4495 off = iemNativeEmitFetchGregI16(pReNative, off, a_i16Dst, a_iGReg)
4496
4497/** Emits code for IEM_MC_FETCH_GREG_I16. */
4498DECL_INLINE_THROW(uint32_t)
4499iemNativeEmitFetchGregI16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
4500{
4501 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4502 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(int16_t));
4503 Assert(iGReg < 16);
4504
4505 /*
4506 * We can either just load the low 16-bit of the GPR into a host register
4507 * for the variable, or we can do so via a shadow copy host register. The
4508 * latter will avoid having to reload it if it's being stored later, but
4509 * will waste a host register if it isn't touched again. Since we don't
4510 * know what going to happen, we choose the latter for now.
4511 */
4512 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4513 kIemNativeGstRegUse_ReadOnly);
4514
4515 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4516 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4517#ifdef RT_ARCH_AMD64
4518 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4519#elif defined(RT_ARCH_ARM64) /* Note! There are no 16-bit registers on ARM, we emulate that through 32-bit registers which requires sign extension. */
4520 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4521#endif
4522 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4523
4524 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4525 return off;
4526}
4527
4528
4529#define IEM_MC_FETCH_GREG_U16_SX_U32(a_u16Dst, a_iGReg) \
4530 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
4531
4532#define IEM_MC_FETCH_GREG_U16_SX_U64(a_u16Dst, a_iGReg) \
4533 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
4534
4535/** Emits code for IEM_MC_FETCH_GREG_U16_SX_U32/64. */
4536DECL_INLINE_THROW(uint32_t)
4537iemNativeEmitFetchGregU16Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbSignExtended)
4538{
4539 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4540 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
4541 Assert(iGReg < 16);
4542
4543 /*
4544 * We can either just load the low 16-bit of the GPR into a host register
4545 * for the variable, or we can do so via a shadow copy host register. The
4546 * latter will avoid having to reload it if it's being stored later, but
4547 * will waste a host register if it isn't touched again. Since we don't
4548 * know what going to happen, we choose the latter for now.
4549 */
4550 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4551 kIemNativeGstRegUse_ReadOnly);
4552
4553 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4554 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4555 if (cbSignExtended == sizeof(uint32_t))
4556 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4557 else
4558 {
4559 Assert(cbSignExtended == sizeof(uint64_t));
4560 off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4561 }
4562 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4563
4564 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4565 return off;
4566}
4567
4568
4569#define IEM_MC_FETCH_GREG_I32(a_i32Dst, a_iGReg) \
4570 off = iemNativeEmitFetchGregU32(pReNative, off, a_i32Dst, a_iGReg, sizeof(uint32_t))
4571
4572#define IEM_MC_FETCH_GREG_U32(a_u32Dst, a_iGReg) \
4573 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint32_t))
4574
4575#define IEM_MC_FETCH_GREG_U32_ZX_U64(a_u32Dst, a_iGReg) \
4576 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint64_t))
4577
4578/** Emits code for IEM_MC_FETCH_GREG_U32. */
4579DECL_INLINE_THROW(uint32_t)
4580iemNativeEmitFetchGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
4581{
4582 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4583 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
4584 Assert(iGReg < 16);
4585
4586 /*
4587 * We can either just load the low 16-bit of the GPR into a host register
4588 * for the variable, or we can do so via a shadow copy host register. The
4589 * latter will avoid having to reload it if it's being stored later, but
4590 * will waste a host register if it isn't touched again. Since we don't
4591 * know what going to happen, we choose the latter for now.
4592 */
4593 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4594 kIemNativeGstRegUse_ReadOnly);
4595
4596 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4597 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4598 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
4599 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4600
4601 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4602 return off;
4603}
4604
4605
4606#define IEM_MC_FETCH_GREG_U32_SX_U64(a_u32Dst, a_iGReg) \
4607 off = iemNativeEmitFetchGregU32SxU64(pReNative, off, a_u32Dst, a_iGReg)
4608
4609/** Emits code for IEM_MC_FETCH_GREG_U32. */
4610DECL_INLINE_THROW(uint32_t)
4611iemNativeEmitFetchGregU32SxU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
4612{
4613 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4614 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
4615 Assert(iGReg < 16);
4616
4617 /*
4618 * We can either just load the low 32-bit of the GPR into a host register
4619 * for the variable, or we can do so via a shadow copy host register. The
4620 * latter will avoid having to reload it if it's being stored later, but
4621 * will waste a host register if it isn't touched again. Since we don't
4622 * know what going to happen, we choose the latter for now.
4623 */
4624 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4625 kIemNativeGstRegUse_ReadOnly);
4626
4627 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4628 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4629 off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
4630 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4631
4632 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4633 return off;
4634}
4635
4636
4637#define IEM_MC_FETCH_GREG_U64(a_u64Dst, a_iGReg) \
4638 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
4639
4640#define IEM_MC_FETCH_GREG_U64_ZX_U64(a_u64Dst, a_iGReg) \
4641 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
4642
4643/** Emits code for IEM_MC_FETCH_GREG_U64 (and the
4644 * IEM_MC_FETCH_GREG_U64_ZX_U64 alias). */
4645DECL_INLINE_THROW(uint32_t)
4646iemNativeEmitFetchGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
4647{
4648 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4649 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
4650 Assert(iGReg < 16);
4651
4652 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4653 kIemNativeGstRegUse_ReadOnly);
4654
4655 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4656 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4657 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxVarReg, idxGstFullReg);
4658 /** @todo name the register a shadow one already? */
4659 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4660
4661 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4662 return off;
4663}
4664
4665
4666#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4667#define IEM_MC_FETCH_GREG_PAIR_U64(a_u128Dst, a_iGRegLo, a_iGRegHi) \
4668 off = iemNativeEmitFetchGregPairU64(pReNative, off, a_u128Dst, a_iGRegLo, a_iGRegHi)
4669
4670/** Emits code for IEM_MC_FETCH_GREG_PAIR_U64. */
4671DECL_INLINE_THROW(uint32_t)
4672iemNativeEmitFetchGregPairU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegLo, uint8_t iGRegHi)
4673{
4674 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4675 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
4676 Assert(iGRegLo < 16 && iGRegHi < 16);
4677
4678 uint8_t const idxGstFullRegLo = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegLo),
4679 kIemNativeGstRegUse_ReadOnly);
4680 uint8_t const idxGstFullRegHi = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegHi),
4681 kIemNativeGstRegUse_ReadOnly);
4682
4683 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4684 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
4685 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxVarReg, idxGstFullRegLo, 0);
4686 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxVarReg, idxGstFullRegHi, 1);
4687
4688 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
4689 iemNativeRegFreeTmp(pReNative, idxGstFullRegLo);
4690 iemNativeRegFreeTmp(pReNative, idxGstFullRegHi);
4691 return off;
4692}
4693#endif
4694
4695
4696/*********************************************************************************************************************************
4697* Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX). *
4698*********************************************************************************************************************************/
4699
4700#define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
4701 off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
4702
4703/** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
4704DECL_INLINE_THROW(uint32_t)
4705iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
4706{
4707 Assert(iGRegEx < 20);
4708 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
4709 kIemNativeGstRegUse_ForUpdate);
4710#ifdef RT_ARCH_AMD64
4711 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
4712
4713 /* To the lowest byte of the register: mov r8, imm8 */
4714 if (iGRegEx < 16)
4715 {
4716 if (idxGstTmpReg >= 8)
4717 pbCodeBuf[off++] = X86_OP_REX_B;
4718 else if (idxGstTmpReg >= 4)
4719 pbCodeBuf[off++] = X86_OP_REX;
4720 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
4721 pbCodeBuf[off++] = u8Value;
4722 }
4723 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
4724 else if (idxGstTmpReg < 4)
4725 {
4726 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
4727 pbCodeBuf[off++] = u8Value;
4728 }
4729 else
4730 {
4731 /* ror reg64, 8 */
4732 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4733 pbCodeBuf[off++] = 0xc1;
4734 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4735 pbCodeBuf[off++] = 8;
4736
4737 /* mov reg8, imm8 */
4738 if (idxGstTmpReg >= 8)
4739 pbCodeBuf[off++] = X86_OP_REX_B;
4740 else if (idxGstTmpReg >= 4)
4741 pbCodeBuf[off++] = X86_OP_REX;
4742 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
4743 pbCodeBuf[off++] = u8Value;
4744
4745 /* rol reg64, 8 */
4746 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4747 pbCodeBuf[off++] = 0xc1;
4748 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4749 pbCodeBuf[off++] = 8;
4750 }
4751
4752#elif defined(RT_ARCH_ARM64)
4753 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
4754 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4755 if (iGRegEx < 16)
4756 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
4757 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
4758 else
4759 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
4760 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
4761 iemNativeRegFreeTmp(pReNative, idxImmReg);
4762
4763#else
4764# error "Port me!"
4765#endif
4766
4767 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4768
4769#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4770 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
4771#endif
4772
4773 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4774 return off;
4775}
4776
4777
4778#define IEM_MC_STORE_GREG_U8_THREADED(a_iGRegEx, a_u8Value) \
4779 off = iemNativeEmitStoreGregU8(pReNative, off, a_iGRegEx, a_u8Value)
4780
4781/** Emits code for IEM_MC_STORE_GREG_U8_THREADED. */
4782DECL_INLINE_THROW(uint32_t)
4783iemNativeEmitStoreGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t idxValueVar)
4784{
4785 Assert(iGRegEx < 20);
4786 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
4787
4788 /*
4789 * If it's a constant value (unlikely) we treat this as a
4790 * IEM_MC_STORE_GREG_U8_CONST statement.
4791 */
4792 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
4793 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
4794 { /* likely */ }
4795 else
4796 {
4797 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
4798 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4799 return iemNativeEmitStoreGregU8Const(pReNative, off, iGRegEx, (uint8_t)pValueVar->u.uValue);
4800 }
4801
4802 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
4803 kIemNativeGstRegUse_ForUpdate);
4804 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
4805
4806#ifdef RT_ARCH_AMD64
4807 /* To the lowest byte of the register: mov reg8, reg8(r/m) */
4808 if (iGRegEx < 16)
4809 {
4810 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4811 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
4812 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
4813 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
4814 pbCodeBuf[off++] = X86_OP_REX;
4815 pbCodeBuf[off++] = 0x8a;
4816 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
4817 }
4818 /* Otherwise it's to ah, ch, dh or bh from al, cl, dl or bl: use mov r8, r8 if we can, otherwise, we rotate. */
4819 else if (idxGstTmpReg < 4 && idxVarReg < 4)
4820 {
4821 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2+1);
4822 pbCodeBuf[off++] = 0x8a;
4823 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg + 4, idxVarReg);
4824 }
4825 else
4826 {
4827 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
4828
4829 /* ror reg64, 8 */
4830 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4831 pbCodeBuf[off++] = 0xc1;
4832 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4833 pbCodeBuf[off++] = 8;
4834
4835 /* mov reg8, reg8(r/m) */
4836 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
4837 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
4838 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
4839 pbCodeBuf[off++] = X86_OP_REX;
4840 pbCodeBuf[off++] = 0x8a;
4841 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
4842
4843 /* rol reg64, 8 */
4844 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4845 pbCodeBuf[off++] = 0xc1;
4846 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4847 pbCodeBuf[off++] = 8;
4848 }
4849
4850#elif defined(RT_ARCH_ARM64)
4851 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 7:0.
4852 or
4853 bfi w1, w2, 8, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 15:8. */
4854 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4855 if (iGRegEx < 16)
4856 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 8);
4857 else
4858 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 8, 8);
4859
4860#else
4861# error "Port me!"
4862#endif
4863 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4864
4865 iemNativeVarRegisterRelease(pReNative, idxValueVar);
4866
4867#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4868 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
4869#endif
4870 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4871 return off;
4872}
4873
4874
4875
4876#define IEM_MC_STORE_GREG_U16_CONST(a_iGReg, a_u16Const) \
4877 off = iemNativeEmitStoreGregU16Const(pReNative, off, a_iGReg, a_u16Const)
4878
4879/** Emits code for IEM_MC_STORE_GREG_U16. */
4880DECL_INLINE_THROW(uint32_t)
4881iemNativeEmitStoreGregU16Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint16_t uValue)
4882{
4883 Assert(iGReg < 16);
4884 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4885 kIemNativeGstRegUse_ForUpdate);
4886#ifdef RT_ARCH_AMD64
4887 /* mov reg16, imm16 */
4888 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
4889 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4890 if (idxGstTmpReg >= 8)
4891 pbCodeBuf[off++] = X86_OP_REX_B;
4892 pbCodeBuf[off++] = 0xb8 + (idxGstTmpReg & 7);
4893 pbCodeBuf[off++] = RT_BYTE1(uValue);
4894 pbCodeBuf[off++] = RT_BYTE2(uValue);
4895
4896#elif defined(RT_ARCH_ARM64)
4897 /* movk xdst, #uValue, lsl #0 */
4898 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4899 pu32CodeBuf[off++] = Armv8A64MkInstrMovK(idxGstTmpReg, uValue);
4900
4901#else
4902# error "Port me!"
4903#endif
4904
4905 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4906
4907#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4908 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4909#endif
4910 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4911 return off;
4912}
4913
4914
4915#define IEM_MC_STORE_GREG_U16(a_iGReg, a_u16Value) \
4916 off = iemNativeEmitStoreGregU16(pReNative, off, a_iGReg, a_u16Value)
4917
4918/** Emits code for IEM_MC_STORE_GREG_U16. */
4919DECL_INLINE_THROW(uint32_t)
4920iemNativeEmitStoreGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
4921{
4922 Assert(iGReg < 16);
4923 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
4924
4925 /*
4926 * If it's a constant value (unlikely) we treat this as a
4927 * IEM_MC_STORE_GREG_U16_CONST statement.
4928 */
4929 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
4930 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
4931 { /* likely */ }
4932 else
4933 {
4934 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
4935 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4936 return iemNativeEmitStoreGregU16Const(pReNative, off, iGReg, (uint16_t)pValueVar->u.uValue);
4937 }
4938
4939 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4940 kIemNativeGstRegUse_ForUpdate);
4941
4942#ifdef RT_ARCH_AMD64
4943 /* mov reg16, reg16 or [mem16] */
4944 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
4945 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4946 if (pValueVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
4947 {
4948 if (idxGstTmpReg >= 8 || pValueVar->idxReg >= 8)
4949 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0)
4950 | (pValueVar->idxReg >= 8 ? X86_OP_REX_B : 0);
4951 pbCodeBuf[off++] = 0x8b;
4952 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, pValueVar->idxReg & 7);
4953 }
4954 else
4955 {
4956 uint8_t const idxStackSlot = pValueVar->idxStackSlot;
4957 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
4958 if (idxGstTmpReg >= 8)
4959 pbCodeBuf[off++] = X86_OP_REX_R;
4960 pbCodeBuf[off++] = 0x8b;
4961 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, idxGstTmpReg, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
4962 }
4963
4964#elif defined(RT_ARCH_ARM64)
4965 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
4966 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
4967 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4968 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 16);
4969 iemNativeVarRegisterRelease(pReNative, idxValueVar);
4970
4971#else
4972# error "Port me!"
4973#endif
4974
4975 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4976
4977#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4978 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4979#endif
4980 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4981 return off;
4982}
4983
4984
4985#define IEM_MC_STORE_GREG_U32_CONST(a_iGReg, a_u32Const) \
4986 off = iemNativeEmitStoreGregU32Const(pReNative, off, a_iGReg, a_u32Const)
4987
4988/** Emits code for IEM_MC_STORE_GREG_U32_CONST. */
4989DECL_INLINE_THROW(uint32_t)
4990iemNativeEmitStoreGregU32Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint32_t uValue)
4991{
4992 Assert(iGReg < 16);
4993 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4994 kIemNativeGstRegUse_ForFullWrite);
4995 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
4996#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4997 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4998#endif
4999 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5000 return off;
5001}
5002
5003
5004#define IEM_MC_STORE_GREG_U32(a_iGReg, a_u32Value) \
5005 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_u32Value)
5006
5007#define IEM_MC_STORE_GREG_I32(a_iGReg, a_i32Value) \
5008 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_i32Value)
5009
5010/** Emits code for IEM_MC_STORE_GREG_U32/IEM_MC_STORE_GREG_I32. */
5011DECL_INLINE_THROW(uint32_t)
5012iemNativeEmitStoreGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
5013{
5014 Assert(iGReg < 16);
5015 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
5016
5017 /*
5018 * If it's a constant value (unlikely) we treat this as a
5019 * IEM_MC_STORE_GREG_U32_CONST statement.
5020 */
5021 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
5022 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
5023 { /* likely */ }
5024 else
5025 {
5026 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
5027 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
5028 return iemNativeEmitStoreGregU32Const(pReNative, off, iGReg, (uint32_t)pValueVar->u.uValue);
5029 }
5030
5031 /*
5032 * For the rest we allocate a guest register for the variable and writes
5033 * it to the CPUMCTX structure.
5034 */
5035 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
5036#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5037 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5038#else
5039 RT_NOREF(idxVarReg);
5040#endif
5041#ifdef VBOX_STRICT
5042 off = iemNativeEmitTop32BitsClearCheck(pReNative, off, idxVarReg);
5043#endif
5044 iemNativeVarRegisterRelease(pReNative, idxValueVar);
5045 return off;
5046}
5047
5048
5049#define IEM_MC_STORE_GREG_U64_CONST(a_iGReg, a_u64Const) \
5050 off = iemNativeEmitStoreGregU64Const(pReNative, off, a_iGReg, a_u64Const)
5051
5052/** Emits code for IEM_MC_STORE_GREG_U64_CONST. */
5053DECL_INLINE_THROW(uint32_t)
5054iemNativeEmitStoreGregU64Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uValue)
5055{
5056 Assert(iGReg < 16);
5057 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5058 kIemNativeGstRegUse_ForFullWrite);
5059 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
5060#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5061 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5062#endif
5063 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5064 return off;
5065}
5066
5067
5068#define IEM_MC_STORE_GREG_U64(a_iGReg, a_u64Value) \
5069 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_u64Value)
5070
5071#define IEM_MC_STORE_GREG_I64(a_iGReg, a_i64Value) \
5072 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_i64Value)
5073
5074/** Emits code for IEM_MC_STORE_GREG_U64. */
5075DECL_INLINE_THROW(uint32_t)
5076iemNativeEmitStoreGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
5077{
5078 Assert(iGReg < 16);
5079 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
5080
5081 /*
5082 * If it's a constant value (unlikely) we treat this as a
5083 * IEM_MC_STORE_GREG_U64_CONST statement.
5084 */
5085 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
5086 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
5087 { /* likely */ }
5088 else
5089 {
5090 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
5091 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
5092 return iemNativeEmitStoreGregU64Const(pReNative, off, iGReg, pValueVar->u.uValue);
5093 }
5094
5095 /*
5096 * For the rest we allocate a guest register for the variable and writes
5097 * it to the CPUMCTX structure.
5098 */
5099 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
5100#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5101 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5102#else
5103 RT_NOREF(idxVarReg);
5104#endif
5105 iemNativeVarRegisterRelease(pReNative, idxValueVar);
5106 return off;
5107}
5108
5109
5110#define IEM_MC_CLEAR_HIGH_GREG_U64(a_iGReg) \
5111 off = iemNativeEmitClearHighGregU64(pReNative, off, a_iGReg)
5112
5113/** Emits code for IEM_MC_CLEAR_HIGH_GREG_U64. */
5114DECL_INLINE_THROW(uint32_t)
5115iemNativeEmitClearHighGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg)
5116{
5117 Assert(iGReg < 16);
5118 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5119 kIemNativeGstRegUse_ForUpdate);
5120 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxGstTmpReg, idxGstTmpReg);
5121#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5122 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5123#endif
5124 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5125 return off;
5126}
5127
5128
5129#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5130#define IEM_MC_STORE_GREG_PAIR_U64(a_iGRegLo, a_iGRegHi, a_u128Value) \
5131 off = iemNativeEmitStoreGregPairU64(pReNative, off, a_iGRegLo, a_iGRegHi, a_u128Value)
5132
5133/** Emits code for IEM_MC_FETCH_GREG_PAIR_U64. */
5134DECL_INLINE_THROW(uint32_t)
5135iemNativeEmitStoreGregPairU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegLo, uint8_t iGRegHi, uint8_t idxDstVar)
5136{
5137 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
5138 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
5139 Assert(iGRegLo < 16 && iGRegHi < 16);
5140
5141 uint8_t const idxGstFullRegLo = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegLo),
5142 kIemNativeGstRegUse_ForFullWrite);
5143 uint8_t const idxGstFullRegHi = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegHi),
5144 kIemNativeGstRegUse_ForFullWrite);
5145
5146 iemNativeVarSetKindToStack(pReNative, idxDstVar);
5147 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
5148 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxGstFullRegLo, idxVarReg, 0);
5149 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxGstFullRegHi, idxVarReg, 1);
5150
5151 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
5152 iemNativeRegFreeTmp(pReNative, idxGstFullRegLo);
5153 iemNativeRegFreeTmp(pReNative, idxGstFullRegHi);
5154 return off;
5155}
5156#endif
5157
5158
5159/*********************************************************************************************************************************
5160* General purpose register manipulation (add, sub). *
5161*********************************************************************************************************************************/
5162
5163#define IEM_MC_ADD_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
5164 off = iemNativeEmitAddGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
5165
5166/** Emits code for IEM_MC_ADD_GREG_U16. */
5167DECL_INLINE_THROW(uint32_t)
5168iemNativeEmitAddGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend)
5169{
5170 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5171 kIemNativeGstRegUse_ForUpdate);
5172
5173#ifdef RT_ARCH_AMD64
5174 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
5175 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5176 if (idxGstTmpReg >= 8)
5177 pbCodeBuf[off++] = X86_OP_REX_B;
5178 if (uAddend == 1)
5179 {
5180 pbCodeBuf[off++] = 0xff; /* inc */
5181 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5182 }
5183 else
5184 {
5185 pbCodeBuf[off++] = 0x81;
5186 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5187 pbCodeBuf[off++] = uAddend;
5188 pbCodeBuf[off++] = 0;
5189 }
5190
5191#else
5192 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5193 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5194
5195 /* sub tmp, gstgrp, uAddend */
5196 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxTmpReg, idxGstTmpReg, uAddend, false /*f64Bit*/);
5197
5198 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
5199 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
5200
5201 iemNativeRegFreeTmp(pReNative, idxTmpReg);
5202#endif
5203
5204 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5205
5206#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5207 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5208#endif
5209
5210 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5211 return off;
5212}
5213
5214
5215#define IEM_MC_ADD_GREG_U32(a_iGReg, a_u8Const) \
5216 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
5217
5218#define IEM_MC_ADD_GREG_U64(a_iGReg, a_u8Const) \
5219 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
5220
5221/** Emits code for IEM_MC_ADD_GREG_U32 and IEM_MC_ADD_GREG_U64. */
5222DECL_INLINE_THROW(uint32_t)
5223iemNativeEmitAddGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend, bool f64Bit)
5224{
5225 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5226 kIemNativeGstRegUse_ForUpdate);
5227
5228#ifdef RT_ARCH_AMD64
5229 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
5230 if (f64Bit)
5231 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
5232 else if (idxGstTmpReg >= 8)
5233 pbCodeBuf[off++] = X86_OP_REX_B;
5234 if (uAddend == 1)
5235 {
5236 pbCodeBuf[off++] = 0xff; /* inc */
5237 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5238 }
5239 else if (uAddend < 128)
5240 {
5241 pbCodeBuf[off++] = 0x83; /* add */
5242 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5243 pbCodeBuf[off++] = RT_BYTE1(uAddend);
5244 }
5245 else
5246 {
5247 pbCodeBuf[off++] = 0x81; /* add */
5248 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5249 pbCodeBuf[off++] = RT_BYTE1(uAddend);
5250 pbCodeBuf[off++] = 0;
5251 pbCodeBuf[off++] = 0;
5252 pbCodeBuf[off++] = 0;
5253 }
5254
5255#else
5256 /* sub tmp, gstgrp, uAddend */
5257 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5258 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGstTmpReg, idxGstTmpReg, uAddend, f64Bit);
5259
5260#endif
5261
5262 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5263
5264#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5265 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5266#endif
5267
5268 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5269 return off;
5270}
5271
5272
5273
5274#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
5275 off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
5276
5277/** Emits code for IEM_MC_SUB_GREG_U16. */
5278DECL_INLINE_THROW(uint32_t)
5279iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
5280{
5281 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5282 kIemNativeGstRegUse_ForUpdate);
5283
5284#ifdef RT_ARCH_AMD64
5285 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
5286 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5287 if (idxGstTmpReg >= 8)
5288 pbCodeBuf[off++] = X86_OP_REX_B;
5289 if (uSubtrahend == 1)
5290 {
5291 pbCodeBuf[off++] = 0xff; /* dec */
5292 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
5293 }
5294 else
5295 {
5296 pbCodeBuf[off++] = 0x81;
5297 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
5298 pbCodeBuf[off++] = uSubtrahend;
5299 pbCodeBuf[off++] = 0;
5300 }
5301
5302#else
5303 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5304 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5305
5306 /* sub tmp, gstgrp, uSubtrahend */
5307 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
5308
5309 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
5310 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
5311
5312 iemNativeRegFreeTmp(pReNative, idxTmpReg);
5313#endif
5314
5315 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5316
5317#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5318 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5319#endif
5320
5321 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5322 return off;
5323}
5324
5325
5326#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
5327 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
5328
5329#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
5330 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
5331
5332/** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
5333DECL_INLINE_THROW(uint32_t)
5334iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
5335{
5336 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5337 kIemNativeGstRegUse_ForUpdate);
5338
5339#ifdef RT_ARCH_AMD64
5340 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
5341 if (f64Bit)
5342 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
5343 else if (idxGstTmpReg >= 8)
5344 pbCodeBuf[off++] = X86_OP_REX_B;
5345 if (uSubtrahend == 1)
5346 {
5347 pbCodeBuf[off++] = 0xff; /* dec */
5348 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
5349 }
5350 else if (uSubtrahend < 128)
5351 {
5352 pbCodeBuf[off++] = 0x83; /* sub */
5353 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
5354 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
5355 }
5356 else
5357 {
5358 pbCodeBuf[off++] = 0x81; /* sub */
5359 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
5360 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
5361 pbCodeBuf[off++] = 0;
5362 pbCodeBuf[off++] = 0;
5363 pbCodeBuf[off++] = 0;
5364 }
5365
5366#else
5367 /* sub tmp, gstgrp, uSubtrahend */
5368 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5369 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
5370
5371#endif
5372
5373 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5374
5375#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5376 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5377#endif
5378
5379 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5380 return off;
5381}
5382
5383
5384#define IEM_MC_AND_GREG_U8(a_iGReg, a_u8Mask) \
5385 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u8Mask, sizeof(uint8_t))
5386
5387#define IEM_MC_AND_GREG_U16(a_iGReg, a_u16Mask) \
5388 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u16Mask, sizeof(uint16_t))
5389
5390#define IEM_MC_AND_GREG_U32(a_iGReg, a_u32Mask) \
5391 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u32Mask, sizeof(uint32_t))
5392
5393#define IEM_MC_AND_GREG_U64(a_iGReg, a_u64Mask) \
5394 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u64Mask, sizeof(uint64_t))
5395
5396/** Emits code for IEM_MC_AND_GREG_U8, IEM_MC_AND_GREG_U16, IEM_MC_AND_GREG_U32 and IEM_MC_AND_GREG_U64. */
5397DECL_INLINE_THROW(uint32_t)
5398iemNativeEmitAndGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uMask, uint8_t cbMask)
5399{
5400#ifdef VBOX_STRICT
5401 switch (cbMask)
5402 {
5403 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
5404 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
5405 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
5406 case sizeof(uint64_t): break;
5407 default: AssertFailedBreak();
5408 }
5409#endif
5410
5411 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5412 kIemNativeGstRegUse_ForUpdate);
5413
5414 switch (cbMask)
5415 {
5416 case sizeof(uint8_t): /* Leaves the higher bits untouched. */
5417 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask | UINT64_C(0xffffffffffffff00));
5418 break;
5419 case sizeof(uint16_t): /* Leaves the higher bits untouched. */
5420 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask | UINT64_C(0xffffffffffff0000));
5421 break;
5422 case sizeof(uint32_t): /* Zeroes the high 32 bits of the guest register. */
5423 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxGstTmpReg, uMask);
5424 break;
5425 case sizeof(uint64_t):
5426 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask);
5427 break;
5428 default: AssertFailedBreak();
5429 }
5430
5431 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5432
5433#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5434 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5435#endif
5436
5437 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5438 return off;
5439}
5440
5441
5442#define IEM_MC_OR_GREG_U8(a_iGReg, a_u8Mask) \
5443 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u8Mask, sizeof(uint8_t))
5444
5445#define IEM_MC_OR_GREG_U16(a_iGReg, a_u16Mask) \
5446 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u16Mask, sizeof(uint16_t))
5447
5448#define IEM_MC_OR_GREG_U32(a_iGReg, a_u32Mask) \
5449 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u32Mask, sizeof(uint32_t))
5450
5451#define IEM_MC_OR_GREG_U64(a_iGReg, a_u64Mask) \
5452 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u64Mask, sizeof(uint64_t))
5453
5454/** Emits code for IEM_MC_OR_GREG_U8, IEM_MC_OR_GREG_U16, IEM_MC_OR_GREG_U32 and IEM_MC_OR_GREG_U64. */
5455DECL_INLINE_THROW(uint32_t)
5456iemNativeEmitOrGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uMask, uint8_t cbMask)
5457{
5458#ifdef VBOX_STRICT
5459 switch (cbMask)
5460 {
5461 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
5462 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
5463 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
5464 case sizeof(uint64_t): break;
5465 default: AssertFailedBreak();
5466 }
5467#endif
5468
5469 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5470 kIemNativeGstRegUse_ForUpdate);
5471
5472 switch (cbMask)
5473 {
5474 case sizeof(uint8_t): /* Leaves the higher bits untouched. */
5475 case sizeof(uint16_t):
5476 case sizeof(uint64_t):
5477 off = iemNativeEmitOrGprByImm(pReNative, off, idxGstTmpReg, uMask);
5478 break;
5479 case sizeof(uint32_t): /* Zeroes the high 32 bits of the guest register. */
5480 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxGstTmpReg, uMask);
5481 break;
5482 default: AssertFailedBreak();
5483 }
5484
5485 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5486
5487#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5488 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5489#endif
5490
5491 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5492 return off;
5493}
5494
5495
5496/*********************************************************************************************************************************
5497* Local/Argument variable manipulation (add, sub, and, or). *
5498*********************************************************************************************************************************/
5499
5500#define IEM_MC_AND_LOCAL_U8(a_u8Local, a_u8Mask) \
5501 off = iemNativeEmitAndLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
5502
5503#define IEM_MC_AND_LOCAL_U16(a_u16Local, a_u16Mask) \
5504 off = iemNativeEmitAndLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
5505
5506#define IEM_MC_AND_LOCAL_U32(a_u32Local, a_u32Mask) \
5507 off = iemNativeEmitAndLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
5508
5509#define IEM_MC_AND_LOCAL_U64(a_u64Local, a_u64Mask) \
5510 off = iemNativeEmitAndLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
5511
5512
5513#define IEM_MC_AND_ARG_U16(a_u16Arg, a_u16Mask) \
5514 off = iemNativeEmitAndLocal(pReNative, off, a_u16Arg, a_u16Mask, sizeof(uint16_t))
5515
5516#define IEM_MC_AND_ARG_U32(a_u32Arg, a_u32Mask) \
5517 off = iemNativeEmitAndLocal(pReNative, off, a_u32Arg, a_u32Mask, sizeof(uint32_t))
5518
5519#define IEM_MC_AND_ARG_U64(a_u64Arg, a_u64Mask) \
5520 off = iemNativeEmitAndLocal(pReNative, off, a_u64Arg, a_u64Mask, sizeof(uint64_t))
5521
5522/** Emits code for AND'ing a local and a constant value. */
5523DECL_INLINE_THROW(uint32_t)
5524iemNativeEmitAndLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
5525{
5526#ifdef VBOX_STRICT
5527 switch (cbMask)
5528 {
5529 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
5530 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
5531 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
5532 case sizeof(uint64_t): break;
5533 default: AssertFailedBreak();
5534 }
5535#endif
5536
5537 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
5538 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
5539
5540 if (cbMask <= sizeof(uint32_t))
5541 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg, uMask);
5542 else
5543 off = iemNativeEmitAndGprByImm(pReNative, off, idxVarReg, uMask);
5544
5545 iemNativeVarRegisterRelease(pReNative, idxVar);
5546 return off;
5547}
5548
5549
5550#define IEM_MC_OR_LOCAL_U8(a_u8Local, a_u8Mask) \
5551 off = iemNativeEmitOrLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
5552
5553#define IEM_MC_OR_LOCAL_U16(a_u16Local, a_u16Mask) \
5554 off = iemNativeEmitOrLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
5555
5556#define IEM_MC_OR_LOCAL_U32(a_u32Local, a_u32Mask) \
5557 off = iemNativeEmitOrLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
5558
5559#define IEM_MC_OR_LOCAL_U64(a_u64Local, a_u64Mask) \
5560 off = iemNativeEmitOrLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
5561
5562/** Emits code for OR'ing a local and a constant value. */
5563DECL_INLINE_THROW(uint32_t)
5564iemNativeEmitOrLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
5565{
5566#ifdef VBOX_STRICT
5567 switch (cbMask)
5568 {
5569 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
5570 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
5571 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
5572 case sizeof(uint64_t): break;
5573 default: AssertFailedBreak();
5574 }
5575#endif
5576
5577 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
5578 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
5579
5580 if (cbMask <= sizeof(uint32_t))
5581 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxVarReg, uMask);
5582 else
5583 off = iemNativeEmitOrGprByImm(pReNative, off, idxVarReg, uMask);
5584
5585 iemNativeVarRegisterRelease(pReNative, idxVar);
5586 return off;
5587}
5588
5589
5590#define IEM_MC_BSWAP_LOCAL_U16(a_u16Local) \
5591 off = iemNativeEmitBswapLocal(pReNative, off, a_u16Local, sizeof(uint16_t))
5592
5593#define IEM_MC_BSWAP_LOCAL_U32(a_u32Local) \
5594 off = iemNativeEmitBswapLocal(pReNative, off, a_u32Local, sizeof(uint32_t))
5595
5596#define IEM_MC_BSWAP_LOCAL_U64(a_u64Local) \
5597 off = iemNativeEmitBswapLocal(pReNative, off, a_u64Local, sizeof(uint64_t))
5598
5599/** Emits code for reversing the byte order in a local value. */
5600DECL_INLINE_THROW(uint32_t)
5601iemNativeEmitBswapLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal)
5602{
5603 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
5604 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
5605
5606 switch (cbLocal)
5607 {
5608 case sizeof(uint16_t): off = iemNativeEmitBswapGpr16(pReNative, off, idxVarReg); break;
5609 case sizeof(uint32_t): off = iemNativeEmitBswapGpr32(pReNative, off, idxVarReg); break;
5610 case sizeof(uint64_t): off = iemNativeEmitBswapGpr(pReNative, off, idxVarReg); break;
5611 default: AssertFailedBreak();
5612 }
5613
5614 iemNativeVarRegisterRelease(pReNative, idxVar);
5615 return off;
5616}
5617
5618
5619#define IEM_MC_SHL_LOCAL_S16(a_i16Local, a_cShift) \
5620 off = iemNativeEmitShlLocal(pReNative, off, a_i16Local, sizeof(int16_t), a_cShift)
5621
5622#define IEM_MC_SHL_LOCAL_S32(a_i32Local, a_cShift) \
5623 off = iemNativeEmitShlLocal(pReNative, off, a_i32Local, sizeof(int32_t), a_cShift)
5624
5625#define IEM_MC_SHL_LOCAL_S64(a_i64Local, a_cShift) \
5626 off = iemNativeEmitShlLocal(pReNative, off, a_i64Local, sizeof(int64_t), a_cShift)
5627
5628/** Emits code for shifting left a local value. */
5629DECL_INLINE_THROW(uint32_t)
5630iemNativeEmitShlLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal, uint8_t cShift)
5631{
5632#ifdef VBOX_STRICT
5633 switch (cbLocal)
5634 {
5635 case sizeof(uint8_t): Assert(cShift < 8); break;
5636 case sizeof(uint16_t): Assert(cShift < 16); break;
5637 case sizeof(uint32_t): Assert(cShift < 32); break;
5638 case sizeof(uint64_t): Assert(cShift < 64); break;
5639 default: AssertFailedBreak();
5640 }
5641#endif
5642
5643 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
5644 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
5645
5646 if (cbLocal <= sizeof(uint32_t))
5647 {
5648 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxVarReg, cShift);
5649 if (cbLocal < sizeof(uint32_t))
5650 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg,
5651 cbLocal == sizeof(uint16_t)
5652 ? UINT32_C(0xffff)
5653 : UINT32_C(0xff));
5654 }
5655 else
5656 off = iemNativeEmitShiftGprLeft(pReNative, off, idxVarReg, cShift);
5657
5658 iemNativeVarRegisterRelease(pReNative, idxVar);
5659 return off;
5660}
5661
5662
5663#define IEM_MC_SAR_LOCAL_S16(a_i16Local, a_cShift) \
5664 off = iemNativeEmitSarLocal(pReNative, off, a_i16Local, sizeof(int16_t), a_cShift)
5665
5666#define IEM_MC_SAR_LOCAL_S32(a_i32Local, a_cShift) \
5667 off = iemNativeEmitSarLocal(pReNative, off, a_i32Local, sizeof(int32_t), a_cShift)
5668
5669#define IEM_MC_SAR_LOCAL_S64(a_i64Local, a_cShift) \
5670 off = iemNativeEmitSarLocal(pReNative, off, a_i64Local, sizeof(int64_t), a_cShift)
5671
5672/** Emits code for shifting left a local value. */
5673DECL_INLINE_THROW(uint32_t)
5674iemNativeEmitSarLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal, uint8_t cShift)
5675{
5676#ifdef VBOX_STRICT
5677 switch (cbLocal)
5678 {
5679 case sizeof(int8_t): Assert(cShift < 8); break;
5680 case sizeof(int16_t): Assert(cShift < 16); break;
5681 case sizeof(int32_t): Assert(cShift < 32); break;
5682 case sizeof(int64_t): Assert(cShift < 64); break;
5683 default: AssertFailedBreak();
5684 }
5685#endif
5686
5687 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
5688 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
5689
5690 /* Need to sign extend the value first to make sure the sign is correct in the following arithmetic shift. */
5691 if (cbLocal == sizeof(uint8_t))
5692 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
5693 else if (cbLocal == sizeof(uint16_t))
5694 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxVarReg);
5695
5696 if (cbLocal <= sizeof(uint32_t))
5697 off = iemNativeEmitArithShiftGpr32Right(pReNative, off, idxVarReg, cShift);
5698 else
5699 off = iemNativeEmitArithShiftGprRight(pReNative, off, idxVarReg, cShift);
5700
5701 iemNativeVarRegisterRelease(pReNative, idxVar);
5702 return off;
5703}
5704
5705
5706#define IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR(a_EffAddr, a_i16) \
5707 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i16, sizeof(int16_t))
5708
5709#define IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR(a_EffAddr, a_i32) \
5710 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i32, sizeof(int32_t))
5711
5712#define IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR(a_EffAddr, a_i64) \
5713 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i64, sizeof(int64_t))
5714
5715/** Emits code for IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR/IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR/IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR. */
5716DECL_INLINE_THROW(uint32_t)
5717iemNativeEmitAddLocalToEffAddr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEffAddr, uint8_t idxVar, uint8_t cbLocal)
5718{
5719 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEffAddr);
5720 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEffAddr, sizeof(RTGCPTR));
5721 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
5722 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
5723
5724 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
5725 uint8_t const idxVarRegEffAddr = iemNativeVarRegisterAcquire(pReNative, idxVarEffAddr, &off, true /*fInitialized*/);
5726
5727 /* Need to sign extend the value. */
5728 if (cbLocal <= sizeof(uint32_t))
5729 {
5730/** @todo ARM64: In case of boredone, the extended add instruction can do the
5731 * conversion directly: ADD idxVarRegEffAddr, idxVarRegEffAddr, [w]idxVarReg, SXTH/SXTW */
5732 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
5733
5734 switch (cbLocal)
5735 {
5736 case sizeof(int16_t): off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxRegTmp, idxVarReg); break;
5737 case sizeof(int32_t): off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxRegTmp, idxVarReg); break;
5738 default: AssertFailed();
5739 }
5740
5741 off = iemNativeEmitAddTwoGprs(pReNative, off, idxVarRegEffAddr, idxRegTmp);
5742 iemNativeRegFreeTmp(pReNative, idxRegTmp);
5743 }
5744 else
5745 off = iemNativeEmitAddTwoGprs(pReNative, off, idxVarRegEffAddr, idxVarReg);
5746
5747 iemNativeVarRegisterRelease(pReNative, idxVarEffAddr);
5748 iemNativeVarRegisterRelease(pReNative, idxVar);
5749 return off;
5750}
5751
5752
5753
5754/*********************************************************************************************************************************
5755* EFLAGS *
5756*********************************************************************************************************************************/
5757
5758#if !defined(VBOX_WITH_STATISTICS) || !defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
5759# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) ((void)0)
5760#else
5761# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) \
5762 iemNativeEFlagsOptimizationStats(pReNative, a_fEflInput, a_fEflOutput)
5763
5764DECLINLINE(void) iemNativeEFlagsOptimizationStats(PIEMRECOMPILERSTATE pReNative, uint32_t fEflInput, uint32_t fEflOutput)
5765{
5766 if (fEflOutput)
5767 {
5768 PVMCPUCC const pVCpu = pReNative->pVCpu;
5769# ifndef IEMLIVENESS_EXTENDED_LAYOUT
5770 IEMLIVENESSBIT const LivenessBit0 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit0;
5771 IEMLIVENESSBIT const LivenessBit1 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit1;
5772 AssertCompile(IEMLIVENESS_STATE_CLOBBERED == 0);
5773# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
5774 if (fEflOutput & (a_fEfl)) \
5775 { \
5776 if (LivenessBit0.a_fLivenessMember | LivenessBit1.a_fLivenessMember) \
5777 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
5778 else \
5779 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
5780 } else do { } while (0)
5781# else
5782 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall];
5783 IEMLIVENESSBIT const LivenessClobbered =
5784 {
5785 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
5786 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
5787 | pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
5788 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
5789 };
5790 IEMLIVENESSBIT const LivenessDelayable =
5791 {
5792 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
5793 & pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
5794 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
5795 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
5796 };
5797# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
5798 if (fEflOutput & (a_fEfl)) \
5799 { \
5800 if (LivenessClobbered.a_fLivenessMember) \
5801 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
5802 else if (LivenessDelayable.a_fLivenessMember) \
5803 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Delayable); \
5804 else \
5805 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
5806 } else do { } while (0)
5807# endif
5808 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_CF, fEflCf, StatNativeLivenessEflCf);
5809 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_PF, fEflPf, StatNativeLivenessEflPf);
5810 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_AF, fEflAf, StatNativeLivenessEflAf);
5811 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_ZF, fEflZf, StatNativeLivenessEflZf);
5812 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_SF, fEflSf, StatNativeLivenessEflSf);
5813 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_OF, fEflOf, StatNativeLivenessEflOf);
5814 //CHECK_FLAG_AND_UPDATE_STATS(~X86_EFL_STATUS_BITS, fEflOther, StatNativeLivenessEflOther);
5815# undef CHECK_FLAG_AND_UPDATE_STATS
5816 }
5817 RT_NOREF(fEflInput);
5818}
5819#endif /* VBOX_WITH_STATISTICS */
5820
5821#undef IEM_MC_FETCH_EFLAGS /* should not be used */
5822#define IEM_MC_FETCH_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5823 off = iemNativeEmitFetchEFlags(pReNative, off, a_EFlags, a_fEflInput, a_fEflOutput)
5824
5825/** Handles IEM_MC_FETCH_EFLAGS_EX. */
5826DECL_INLINE_THROW(uint32_t)
5827iemNativeEmitFetchEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags,
5828 uint32_t fEflInput, uint32_t fEflOutput)
5829{
5830 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
5831 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
5832 RT_NOREF(fEflInput, fEflOutput);
5833
5834#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
5835# ifdef VBOX_STRICT
5836 if ( pReNative->idxCurCall != 0
5837 && (fEflInput != 0 || fEflOutput != 0) /* for NOT these are both zero for now. */)
5838 {
5839 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall - 1];
5840 uint32_t const fBoth = fEflInput | fEflOutput;
5841# define ASSERT_ONE_EFL(a_fElfConst, a_idxField) \
5842 AssertMsg( !(fBoth & (a_fElfConst)) \
5843 || (!(fEflInput & (a_fElfConst)) \
5844 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
5845 : !(fEflOutput & (a_fElfConst)) \
5846 ? IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
5847 : IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) ), \
5848 ("%s - %u\n", #a_fElfConst, iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)))
5849 ASSERT_ONE_EFL(~(uint32_t)X86_EFL_STATUS_BITS, IEMLIVENESSBIT_IDX_EFL_OTHER);
5850 ASSERT_ONE_EFL(X86_EFL_CF, IEMLIVENESSBIT_IDX_EFL_CF);
5851 ASSERT_ONE_EFL(X86_EFL_PF, IEMLIVENESSBIT_IDX_EFL_PF);
5852 ASSERT_ONE_EFL(X86_EFL_AF, IEMLIVENESSBIT_IDX_EFL_AF);
5853 ASSERT_ONE_EFL(X86_EFL_ZF, IEMLIVENESSBIT_IDX_EFL_ZF);
5854 ASSERT_ONE_EFL(X86_EFL_SF, IEMLIVENESSBIT_IDX_EFL_SF);
5855 ASSERT_ONE_EFL(X86_EFL_OF, IEMLIVENESSBIT_IDX_EFL_OF);
5856# undef ASSERT_ONE_EFL
5857 }
5858# endif
5859#endif
5860
5861 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fEflInput);
5862
5863 /** @todo this is suboptimial. EFLAGS is probably shadowed and we should use
5864 * the existing shadow copy. */
5865 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, false /*fInitialized*/);
5866 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
5867 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
5868 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
5869 return off;
5870}
5871
5872
5873
5874/** @todo emit strict build assertions for IEM_MC_COMMIT_EFLAGS_EX when we
5875 * start using it with custom native code emission (inlining assembly
5876 * instruction helpers). */
5877#undef IEM_MC_COMMIT_EFLAGS /* should not be used */
5878#define IEM_MC_COMMIT_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5879 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
5880 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags, a_fEflOutput, true /*fUpdateSkipping*/)
5881
5882#undef IEM_MC_COMMIT_EFLAGS_OPT /* should not be used */
5883#define IEM_MC_COMMIT_EFLAGS_OPT_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5884 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
5885 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags, a_fEflOutput, false /*fUpdateSkipping*/)
5886
5887/** Handles IEM_MC_COMMIT_EFLAGS_EX. */
5888DECL_INLINE_THROW(uint32_t)
5889iemNativeEmitCommitEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags, uint32_t fEflOutput,
5890 bool fUpdateSkipping)
5891{
5892 RT_NOREF(fEflOutput);
5893 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, true /*fInitialized*/);
5894 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
5895
5896#ifdef VBOX_STRICT
5897 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RA1_MASK);
5898 uint32_t offFixup = off;
5899 off = iemNativeEmitJnzToFixed(pReNative, off, off);
5900 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2001));
5901 iemNativeFixupFixedJump(pReNative, offFixup, off);
5902
5903 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RAZ_MASK & CPUMX86EFLAGS_HW_MASK_32);
5904 offFixup = off;
5905 off = iemNativeEmitJzToFixed(pReNative, off, off);
5906 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2002));
5907 iemNativeFixupFixedJump(pReNative, offFixup, off);
5908
5909 /** @todo validate that only bits in the fElfOutput mask changed. */
5910#endif
5911
5912#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
5913 if (fUpdateSkipping)
5914 {
5915 if ((fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
5916 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
5917 else
5918 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~(fEflOutput & X86_EFL_STATUS_BITS),
5919 RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
5920 }
5921#else
5922 RT_NOREF_PV(fUpdateSkipping);
5923#endif
5924
5925 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
5926 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF_DYN(VMCPUCC, cpum.GstCtx.eflags));
5927 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
5928 return off;
5929}
5930
5931
5932typedef enum IEMNATIVEMITEFLOP
5933{
5934 kIemNativeEmitEflOp_Invalid = 0,
5935 kIemNativeEmitEflOp_Set,
5936 kIemNativeEmitEflOp_Clear,
5937 kIemNativeEmitEflOp_Flip
5938} IEMNATIVEMITEFLOP;
5939
5940#define IEM_MC_SET_EFL_BIT(a_fBit) \
5941 off = iemNativeEmitModifyEFlagsBit(pReNative, off, a_fBit, kIemNativeEmitEflOp_Set);
5942
5943#define IEM_MC_CLEAR_EFL_BIT(a_fBit) \
5944 off = iemNativeEmitModifyEFlagsBit(pReNative, off, a_fBit, kIemNativeEmitEflOp_Clear);
5945
5946#define IEM_MC_FLIP_EFL_BIT(a_fBit) \
5947 off = iemNativeEmitModifyEFlagsBit(pReNative, off, a_fBit, kIemNativeEmitEflOp_Flip);
5948
5949/** Handles IEM_MC_SET_EFL_BIT/IEM_MC_CLEAR_EFL_BIT/IEM_MC_FLIP_EFL_BIT. */
5950DECL_INLINE_THROW(uint32_t)
5951iemNativeEmitModifyEFlagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fEflBit, IEMNATIVEMITEFLOP enmOp)
5952{
5953 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
5954 kIemNativeGstRegUse_ForUpdate, false /*fNoVolatileRegs*/);
5955
5956 switch (enmOp)
5957 {
5958 case kIemNativeEmitEflOp_Set:
5959 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxEflReg, fEflBit);
5960 break;
5961 case kIemNativeEmitEflOp_Clear:
5962 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~fEflBit);
5963 break;
5964 case kIemNativeEmitEflOp_Flip:
5965 off = iemNativeEmitXorGpr32ByImm(pReNative, off, idxEflReg, fEflBit);
5966 break;
5967 default:
5968 AssertFailed();
5969 break;
5970 }
5971
5972 /** @todo No delayed writeback for EFLAGS right now. */
5973 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
5974
5975 /* Free but don't flush the EFLAGS register. */
5976 iemNativeRegFreeTmp(pReNative, idxEflReg);
5977
5978 return off;
5979}
5980
5981
5982/*********************************************************************************************************************************
5983* Emitters for segment register fetches (IEM_MC_FETCH_SREG_XXX).
5984*********************************************************************************************************************************/
5985
5986#define IEM_MC_FETCH_SREG_U16(a_u16Dst, a_iSReg) \
5987 off = iemNativeEmitFetchSReg(pReNative, off, a_u16Dst, a_iSReg, sizeof(uint16_t))
5988
5989#define IEM_MC_FETCH_SREG_ZX_U32(a_u32Dst, a_iSReg) \
5990 off = iemNativeEmitFetchSReg(pReNative, off, a_u32Dst, a_iSReg, sizeof(uint32_t))
5991
5992#define IEM_MC_FETCH_SREG_ZX_U64(a_u64Dst, a_iSReg) \
5993 off = iemNativeEmitFetchSReg(pReNative, off, a_u64Dst, a_iSReg, sizeof(uint64_t))
5994
5995
5996/** Emits code for IEM_MC_FETCH_SREG_U16, IEM_MC_FETCH_SREG_ZX_U32 and
5997 * IEM_MC_FETCH_SREG_ZX_U64. */
5998DECL_INLINE_THROW(uint32_t)
5999iemNativeEmitFetchSReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iSReg, int8_t cbVar)
6000{
6001 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
6002 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbVar); RT_NOREF(cbVar);
6003 Assert(iSReg < X86_SREG_COUNT);
6004
6005 /*
6006 * For now, we will not create a shadow copy of a selector. The rational
6007 * is that since we do not recompile the popping and loading of segment
6008 * registers and that the the IEM_MC_FETCH_SREG_U* MCs are only used for
6009 * pushing and moving to registers, there is only a small chance that the
6010 * shadow copy will be accessed again before the register is reloaded. One
6011 * scenario would be nested called in 16-bit code, but I doubt it's worth
6012 * the extra register pressure atm.
6013 *
6014 * What we really need first, though, is to combine iemNativeRegAllocTmpForGuestReg
6015 * and iemNativeVarRegisterAcquire for a load scenario. We only got the
6016 * store scencario covered at present (r160730).
6017 */
6018 iemNativeVarSetKindToStack(pReNative, idxDstVar);
6019 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
6020 off = iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aSRegs[iSReg].Sel));
6021 iemNativeVarRegisterRelease(pReNative, idxDstVar);
6022 return off;
6023}
6024
6025
6026
6027/*********************************************************************************************************************************
6028* Register references. *
6029*********************************************************************************************************************************/
6030
6031#define IEM_MC_REF_GREG_U8_THREADED(a_pu8Dst, a_iGRegEx) \
6032 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, false /*fConst*/)
6033
6034#define IEM_MC_REF_GREG_U8_CONST_THREADED(a_pu8Dst, a_iGRegEx) \
6035 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, true /*fConst*/)
6036
6037/** Handles IEM_MC_REF_GREG_U8[_CONST]. */
6038DECL_INLINE_THROW(uint32_t)
6039iemNativeEmitRefGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGRegEx, bool fConst)
6040{
6041 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
6042 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
6043 Assert(iGRegEx < 20);
6044
6045 if (iGRegEx < 16)
6046 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
6047 else
6048 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_GprHighByte, iGRegEx & 15);
6049
6050 /* If we've delayed writing back the register value, flush it now. */
6051 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
6052
6053 /* If it's not a const reference we need to flush the shadow copy of the register now. */
6054 if (!fConst)
6055 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGRegEx & 15)));
6056
6057 return off;
6058}
6059
6060#define IEM_MC_REF_GREG_U16(a_pu16Dst, a_iGReg) \
6061 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, false /*fConst*/)
6062
6063#define IEM_MC_REF_GREG_U16_CONST(a_pu16Dst, a_iGReg) \
6064 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, true /*fConst*/)
6065
6066#define IEM_MC_REF_GREG_U32(a_pu32Dst, a_iGReg) \
6067 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, false /*fConst*/)
6068
6069#define IEM_MC_REF_GREG_U32_CONST(a_pu32Dst, a_iGReg) \
6070 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, true /*fConst*/)
6071
6072#define IEM_MC_REF_GREG_I32(a_pi32Dst, a_iGReg) \
6073 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, false /*fConst*/)
6074
6075#define IEM_MC_REF_GREG_I32_CONST(a_pi32Dst, a_iGReg) \
6076 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, true /*fConst*/)
6077
6078#define IEM_MC_REF_GREG_U64(a_pu64Dst, a_iGReg) \
6079 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, false /*fConst*/)
6080
6081#define IEM_MC_REF_GREG_U64_CONST(a_pu64Dst, a_iGReg) \
6082 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, true /*fConst*/)
6083
6084#define IEM_MC_REF_GREG_I64(a_pi64Dst, a_iGReg) \
6085 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, false /*fConst*/)
6086
6087#define IEM_MC_REF_GREG_I64_CONST(a_pi64Dst, a_iGReg) \
6088 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, true /*fConst*/)
6089
6090/** Handles IEM_MC_REF_GREG_Uxx[_CONST] and IEM_MC_REF_GREG_Ixx[_CONST]. */
6091DECL_INLINE_THROW(uint32_t)
6092iemNativeEmitRefGregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGReg, bool fConst)
6093{
6094 Assert(iGReg < 16);
6095 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGReg);
6096 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
6097
6098 /* If we've delayed writing back the register value, flush it now. */
6099 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGReg);
6100
6101 /* If it's not a const reference we need to flush the shadow copy of the register now. */
6102 if (!fConst)
6103 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGReg)));
6104
6105 return off;
6106}
6107
6108
6109#undef IEM_MC_REF_EFLAGS /* should not be used. */
6110#define IEM_MC_REF_EFLAGS_EX(a_pEFlags, a_fEflInput, a_fEflOutput) \
6111 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
6112 off = iemNativeEmitRefEFlags(pReNative, off, a_pEFlags, a_fEflInput, a_fEflOutput)
6113
6114/** Handles IEM_MC_REF_EFLAGS. */
6115DECL_INLINE_THROW(uint32_t)
6116iemNativeEmitRefEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint32_t fEflInput, uint32_t fEflOutput)
6117{
6118 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_EFlags, 0);
6119 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
6120
6121#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
6122 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fEflInput);
6123
6124 /* Updating the skipping according to the outputs is a little early, but
6125 we don't have any other hooks for references atm. */
6126 if ((fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
6127 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
6128 else if (fEflOutput & X86_EFL_STATUS_BITS)
6129 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~(fEflOutput & X86_EFL_STATUS_BITS),
6130 RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
6131#else
6132 RT_NOREF(fEflInput, fEflOutput);
6133#endif
6134
6135 /* If we've delayed writing back the register value, flush it now. */
6136 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_EFlags, 0);
6137
6138 /* If there is a shadow copy of guest EFLAGS, flush it now. */
6139 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_EFlags));
6140
6141 return off;
6142}
6143
6144
6145/** @todo Emit code for IEM_MC_ASSERT_EFLAGS in strict builds? Once we emit
6146 * different code from threaded recompiler, maybe it would be helpful. For now
6147 * we assume the threaded recompiler catches any incorrect EFLAGS delcarations. */
6148#define IEM_MC_ASSERT_EFLAGS(a_fEflInput, a_fEflOutput) ((void)0)
6149
6150
6151#define IEM_MC_REF_XREG_U128(a_pu128Dst, a_iXReg) \
6152 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, false /*fConst*/)
6153
6154#define IEM_MC_REF_XREG_XMM(a_puXmmDst, a_iXReg) \
6155 off = iemNativeEmitRefXregXxx(pReNative, off, a_puXmmDst, a_iXReg, false /*fConst*/)
6156
6157#define IEM_MC_REF_XREG_U128_CONST(a_pu128Dst, a_iXReg) \
6158 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, true /*fConst*/)
6159
6160#define IEM_MC_REF_XREG_XMM_CONST(a_pXmmDst, a_iXReg) \
6161 off = iemNativeEmitRefXregXxx(pReNative, off, a_pXmmDst, a_iXReg, true /*fConst*/)
6162
6163#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6164/* Just being paranoid here. */
6165# ifndef _MSC_VER /* MSC can't compile this, doesn't like [0]. Added reduced version afterwards. */
6166AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].au64[0]);
6167AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].au32[0]);
6168AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].ar64[0]);
6169AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].ar32[0]);
6170# endif
6171AssertCompileMemberOffset(X86XMMREG, au64, 0);
6172AssertCompileMemberOffset(X86XMMREG, au32, 0);
6173AssertCompileMemberOffset(X86XMMREG, ar64, 0);
6174AssertCompileMemberOffset(X86XMMREG, ar32, 0);
6175
6176# define IEM_MC_REF_XREG_U32_CONST(a_pu32Dst, a_iXReg) \
6177 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu32Dst, a_iXReg, true /*fConst*/)
6178# define IEM_MC_REF_XREG_U64_CONST(a_pu64Dst, a_iXReg) \
6179 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu64Dst, a_iXReg, true /*fConst*/)
6180# define IEM_MC_REF_XREG_R32_CONST(a_pr32Dst, a_iXReg) \
6181 off = iemNativeEmitRefXregXxx(pReNative, off, a_pr32Dst, a_iXReg, true /*fConst*/)
6182# define IEM_MC_REF_XREG_R64_CONST(a_pr64Dst, a_iXReg) \
6183 off = iemNativeEmitRefXregXxx(pReNative, off, a_pr64Dst, a_iXReg, true /*fConst*/)
6184#endif
6185
6186/** Handles IEM_MC_REF_XREG_xxx[_CONST]. */
6187DECL_INLINE_THROW(uint32_t)
6188iemNativeEmitRefXregXxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iXReg, bool fConst)
6189{
6190 Assert(iXReg < 16);
6191 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_XReg, iXReg);
6192 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
6193
6194 /* If we've delayed writing back the register value, flush it now. */
6195 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_XReg, iXReg);
6196
6197#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6198 /* If it's not a const reference we need to flush the shadow copy of the register now. */
6199 if (!fConst)
6200 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(iXReg)));
6201#else
6202 RT_NOREF(fConst);
6203#endif
6204
6205 return off;
6206}
6207
6208
6209
6210/*********************************************************************************************************************************
6211* Effective Address Calculation *
6212*********************************************************************************************************************************/
6213#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_16(a_GCPtrEff, a_bRm, a_u16Disp) \
6214 off = iemNativeEmitCalcRmEffAddrThreadedAddr16(pReNative, off, a_bRm, a_u16Disp, a_GCPtrEff)
6215
6216/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_16.
6217 * @sa iemOpHlpCalcRmEffAddrThreadedAddr16 */
6218DECL_INLINE_THROW(uint32_t)
6219iemNativeEmitCalcRmEffAddrThreadedAddr16(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6220 uint8_t bRm, uint16_t u16Disp, uint8_t idxVarRet)
6221{
6222 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
6223
6224 /*
6225 * Handle the disp16 form with no registers first.
6226 *
6227 * Convert to an immediate value, as that'll delay the register allocation
6228 * and assignment till the memory access / call / whatever and we can use
6229 * a more appropriate register (or none at all).
6230 */
6231 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
6232 {
6233 iemNativeVarSetKindToConst(pReNative, idxVarRet, u16Disp);
6234 return off;
6235 }
6236
6237 /* Determin the displacment. */
6238 uint16_t u16EffAddr;
6239 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
6240 {
6241 case 0: u16EffAddr = 0; break;
6242 case 1: u16EffAddr = (int16_t)(int8_t)u16Disp; break;
6243 case 2: u16EffAddr = u16Disp; break;
6244 default: AssertFailedStmt(u16EffAddr = 0);
6245 }
6246
6247 /* Determine the registers involved. */
6248 uint8_t idxGstRegBase;
6249 uint8_t idxGstRegIndex;
6250 switch (bRm & X86_MODRM_RM_MASK)
6251 {
6252 case 0:
6253 idxGstRegBase = X86_GREG_xBX;
6254 idxGstRegIndex = X86_GREG_xSI;
6255 break;
6256 case 1:
6257 idxGstRegBase = X86_GREG_xBX;
6258 idxGstRegIndex = X86_GREG_xDI;
6259 break;
6260 case 2:
6261 idxGstRegBase = X86_GREG_xBP;
6262 idxGstRegIndex = X86_GREG_xSI;
6263 break;
6264 case 3:
6265 idxGstRegBase = X86_GREG_xBP;
6266 idxGstRegIndex = X86_GREG_xDI;
6267 break;
6268 case 4:
6269 idxGstRegBase = X86_GREG_xSI;
6270 idxGstRegIndex = UINT8_MAX;
6271 break;
6272 case 5:
6273 idxGstRegBase = X86_GREG_xDI;
6274 idxGstRegIndex = UINT8_MAX;
6275 break;
6276 case 6:
6277 idxGstRegBase = X86_GREG_xBP;
6278 idxGstRegIndex = UINT8_MAX;
6279 break;
6280#ifdef _MSC_VER /* lazy compiler, thinks idxGstRegBase and idxGstRegIndex may otherwise be used uninitialized. */
6281 default:
6282#endif
6283 case 7:
6284 idxGstRegBase = X86_GREG_xBX;
6285 idxGstRegIndex = UINT8_MAX;
6286 break;
6287 }
6288
6289 /*
6290 * Now emit code that calculates: idxRegRet = (uint16_t)(u16EffAddr + idxGstRegBase [+ idxGstRegIndex])
6291 */
6292 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
6293 uint8_t const idxRegBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
6294 kIemNativeGstRegUse_ReadOnly);
6295 uint8_t const idxRegIndex = idxGstRegIndex != UINT8_MAX
6296 ? iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
6297 kIemNativeGstRegUse_ReadOnly)
6298 : UINT8_MAX;
6299#ifdef RT_ARCH_AMD64
6300 if (idxRegIndex == UINT8_MAX)
6301 {
6302 if (u16EffAddr == 0)
6303 {
6304 /* movxz ret, base */
6305 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegRet, idxRegBase);
6306 }
6307 else
6308 {
6309 /* lea ret32, [base64 + disp32] */
6310 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
6311 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6312 if (idxRegRet >= 8 || idxRegBase >= 8)
6313 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
6314 pbCodeBuf[off++] = 0x8d;
6315 if (idxRegBase != X86_GREG_x12 /*SIB*/)
6316 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, idxRegBase & 7);
6317 else
6318 {
6319 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, 4 /*SIB*/);
6320 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
6321 }
6322 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
6323 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
6324 pbCodeBuf[off++] = 0;
6325 pbCodeBuf[off++] = 0;
6326 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6327
6328 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
6329 }
6330 }
6331 else
6332 {
6333 /* lea ret32, [index64 + base64 (+ disp32)] */
6334 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
6335 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6336 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
6337 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6338 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
6339 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
6340 pbCodeBuf[off++] = 0x8d;
6341 uint8_t const bMod = u16EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0 : X86_MOD_MEM4;
6342 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6343 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, 0);
6344 if (bMod == X86_MOD_MEM4)
6345 {
6346 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
6347 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
6348 pbCodeBuf[off++] = 0;
6349 pbCodeBuf[off++] = 0;
6350 }
6351 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6352 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
6353 }
6354
6355#elif defined(RT_ARCH_ARM64)
6356 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6357 if (u16EffAddr == 0)
6358 {
6359 if (idxRegIndex == UINT8_MAX)
6360 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegBase);
6361 else
6362 {
6363 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex, false /*f64Bit*/);
6364 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
6365 }
6366 }
6367 else
6368 {
6369 if ((int16_t)u16EffAddr < 4096 && (int16_t)u16EffAddr >= 0)
6370 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u16EffAddr, false /*f64Bit*/);
6371 else if ((int16_t)u16EffAddr > -4096 && (int16_t)u16EffAddr < 0)
6372 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
6373 (uint16_t)-(int16_t)u16EffAddr, false /*f64Bit*/);
6374 else
6375 {
6376 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegRet, u16EffAddr);
6377 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
6378 }
6379 if (idxRegIndex != UINT8_MAX)
6380 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex, false /*f64Bit*/);
6381 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
6382 }
6383
6384#else
6385# error "port me"
6386#endif
6387
6388 if (idxRegIndex != UINT8_MAX)
6389 iemNativeRegFreeTmp(pReNative, idxRegIndex);
6390 iemNativeRegFreeTmp(pReNative, idxRegBase);
6391 iemNativeVarRegisterRelease(pReNative, idxVarRet);
6392 return off;
6393}
6394
6395
6396#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_32(a_GCPtrEff, a_bRm, a_uSibAndRspOffset, a_u32Disp) \
6397 off = iemNativeEmitCalcRmEffAddrThreadedAddr32(pReNative, off, a_bRm, a_uSibAndRspOffset, a_u32Disp, a_GCPtrEff)
6398
6399/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_32.
6400 * @see iemOpHlpCalcRmEffAddrThreadedAddr32 */
6401DECL_INLINE_THROW(uint32_t)
6402iemNativeEmitCalcRmEffAddrThreadedAddr32(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6403 uint8_t bRm, uint32_t uSibAndRspOffset, uint32_t u32Disp, uint8_t idxVarRet)
6404{
6405 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
6406
6407 /*
6408 * Handle the disp32 form with no registers first.
6409 *
6410 * Convert to an immediate value, as that'll delay the register allocation
6411 * and assignment till the memory access / call / whatever and we can use
6412 * a more appropriate register (or none at all).
6413 */
6414 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
6415 {
6416 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32Disp);
6417 return off;
6418 }
6419
6420 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
6421 uint32_t u32EffAddr = 0;
6422 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
6423 {
6424 case 0: break;
6425 case 1: u32EffAddr = (int8_t)u32Disp; break;
6426 case 2: u32EffAddr = u32Disp; break;
6427 default: AssertFailed();
6428 }
6429
6430 /* Get the register (or SIB) value. */
6431 uint8_t idxGstRegBase = UINT8_MAX;
6432 uint8_t idxGstRegIndex = UINT8_MAX;
6433 uint8_t cShiftIndex = 0;
6434 switch (bRm & X86_MODRM_RM_MASK)
6435 {
6436 case 0: idxGstRegBase = X86_GREG_xAX; break;
6437 case 1: idxGstRegBase = X86_GREG_xCX; break;
6438 case 2: idxGstRegBase = X86_GREG_xDX; break;
6439 case 3: idxGstRegBase = X86_GREG_xBX; break;
6440 case 4: /* SIB */
6441 {
6442 /* index /w scaling . */
6443 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
6444 switch ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
6445 {
6446 case 0: idxGstRegIndex = X86_GREG_xAX; break;
6447 case 1: idxGstRegIndex = X86_GREG_xCX; break;
6448 case 2: idxGstRegIndex = X86_GREG_xDX; break;
6449 case 3: idxGstRegIndex = X86_GREG_xBX; break;
6450 case 4: cShiftIndex = 0; /*no index*/ break;
6451 case 5: idxGstRegIndex = X86_GREG_xBP; break;
6452 case 6: idxGstRegIndex = X86_GREG_xSI; break;
6453 case 7: idxGstRegIndex = X86_GREG_xDI; break;
6454 }
6455
6456 /* base */
6457 switch (uSibAndRspOffset & X86_SIB_BASE_MASK)
6458 {
6459 case 0: idxGstRegBase = X86_GREG_xAX; break;
6460 case 1: idxGstRegBase = X86_GREG_xCX; break;
6461 case 2: idxGstRegBase = X86_GREG_xDX; break;
6462 case 3: idxGstRegBase = X86_GREG_xBX; break;
6463 case 4:
6464 idxGstRegBase = X86_GREG_xSP;
6465 u32EffAddr += uSibAndRspOffset >> 8;
6466 break;
6467 case 5:
6468 if ((bRm & X86_MODRM_MOD_MASK) != 0)
6469 idxGstRegBase = X86_GREG_xBP;
6470 else
6471 {
6472 Assert(u32EffAddr == 0);
6473 u32EffAddr = u32Disp;
6474 }
6475 break;
6476 case 6: idxGstRegBase = X86_GREG_xSI; break;
6477 case 7: idxGstRegBase = X86_GREG_xDI; break;
6478 }
6479 break;
6480 }
6481 case 5: idxGstRegBase = X86_GREG_xBP; break;
6482 case 6: idxGstRegBase = X86_GREG_xSI; break;
6483 case 7: idxGstRegBase = X86_GREG_xDI; break;
6484 }
6485
6486 /*
6487 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
6488 * the start of the function.
6489 */
6490 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
6491 {
6492 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32EffAddr);
6493 return off;
6494 }
6495
6496 /*
6497 * Now emit code that calculates: idxRegRet = (uint32_t)(u32EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
6498 */
6499 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
6500 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
6501 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
6502 kIemNativeGstRegUse_ReadOnly);
6503 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
6504 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
6505 kIemNativeGstRegUse_ReadOnly);
6506
6507 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
6508 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
6509 {
6510 idxRegBase = idxRegIndex;
6511 idxRegIndex = UINT8_MAX;
6512 }
6513
6514#ifdef RT_ARCH_AMD64
6515 if (idxRegIndex == UINT8_MAX)
6516 {
6517 if (u32EffAddr == 0)
6518 {
6519 /* mov ret, base */
6520 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
6521 }
6522 else
6523 {
6524 /* lea ret32, [base64 + disp32] */
6525 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
6526 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6527 if (idxRegRet >= 8 || idxRegBase >= 8)
6528 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
6529 pbCodeBuf[off++] = 0x8d;
6530 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
6531 if (idxRegBase != X86_GREG_x12 /*SIB*/)
6532 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
6533 else
6534 {
6535 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6536 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
6537 }
6538 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6539 if (bMod == X86_MOD_MEM4)
6540 {
6541 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6542 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6543 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6544 }
6545 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6546 }
6547 }
6548 else
6549 {
6550 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
6551 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6552 if (idxRegBase == UINT8_MAX)
6553 {
6554 /* lea ret32, [(index64 << cShiftIndex) + disp32] */
6555 if (idxRegRet >= 8 || idxRegIndex >= 8)
6556 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6557 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
6558 pbCodeBuf[off++] = 0x8d;
6559 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
6560 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
6561 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6562 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6563 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6564 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6565 }
6566 else
6567 {
6568 /* lea ret32, [(index64 << cShiftIndex) + base64 (+ disp32)] */
6569 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
6570 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6571 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
6572 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
6573 pbCodeBuf[off++] = 0x8d;
6574 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
6575 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
6576 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6577 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
6578 if (bMod != X86_MOD_MEM0)
6579 {
6580 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6581 if (bMod == X86_MOD_MEM4)
6582 {
6583 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6584 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6585 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6586 }
6587 }
6588 }
6589 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6590 }
6591
6592#elif defined(RT_ARCH_ARM64)
6593 if (u32EffAddr == 0)
6594 {
6595 if (idxRegIndex == UINT8_MAX)
6596 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
6597 else if (idxRegBase == UINT8_MAX)
6598 {
6599 if (cShiftIndex == 0)
6600 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegIndex);
6601 else
6602 {
6603 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6604 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, false /*f64Bit*/);
6605 }
6606 }
6607 else
6608 {
6609 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6610 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
6611 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
6612 }
6613 }
6614 else
6615 {
6616 if ((int32_t)u32EffAddr < 4096 && (int32_t)u32EffAddr >= 0 && idxRegBase != UINT8_MAX)
6617 {
6618 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6619 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u32EffAddr, false /*f64Bit*/);
6620 }
6621 else if ((int32_t)u32EffAddr > -4096 && (int32_t)u32EffAddr < 0 && idxRegBase != UINT8_MAX)
6622 {
6623 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6624 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
6625 (uint32_t)-(int32_t)u32EffAddr, false /*f64Bit*/);
6626 }
6627 else
6628 {
6629 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, u32EffAddr);
6630 if (idxRegBase != UINT8_MAX)
6631 {
6632 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6633 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
6634 }
6635 }
6636 if (idxRegIndex != UINT8_MAX)
6637 {
6638 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6639 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
6640 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
6641 }
6642 }
6643
6644#else
6645# error "port me"
6646#endif
6647
6648 if (idxRegIndex != UINT8_MAX)
6649 iemNativeRegFreeTmp(pReNative, idxRegIndex);
6650 if (idxRegBase != UINT8_MAX)
6651 iemNativeRegFreeTmp(pReNative, idxRegBase);
6652 iemNativeVarRegisterRelease(pReNative, idxVarRet);
6653 return off;
6654}
6655
6656
6657#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
6658 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
6659 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
6660
6661#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_FSGS(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
6662 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
6663 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
6664
6665#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_ADDR32(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
6666 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
6667 a_u32Disp, a_cbImm, a_GCPtrEff, false /*f64Bit*/)
6668
6669/**
6670 * Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_64*.
6671 *
6672 * @returns New off.
6673 * @param pReNative .
6674 * @param off .
6675 * @param bRmEx The ModRM byte but with bit 3 set to REX.B and
6676 * bit 4 to REX.X. The two bits are part of the
6677 * REG sub-field, which isn't needed in this
6678 * function.
6679 * @param uSibAndRspOffset Two parts:
6680 * - The first 8 bits make up the SIB byte.
6681 * - The next 8 bits are the fixed RSP/ESP offset
6682 * in case of a pop [xSP].
6683 * @param u32Disp The displacement byte/word/dword, if any.
6684 * @param cbInstr The size of the fully decoded instruction. Used
6685 * for RIP relative addressing.
6686 * @param idxVarRet The result variable number.
6687 * @param f64Bit Whether to use a 64-bit or 32-bit address size
6688 * when calculating the address.
6689 *
6690 * @see iemOpHlpCalcRmEffAddrThreadedAddr64
6691 */
6692DECL_INLINE_THROW(uint32_t)
6693iemNativeEmitCalcRmEffAddrThreadedAddr64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bRmEx, uint32_t uSibAndRspOffset,
6694 uint32_t u32Disp, uint8_t cbInstr, uint8_t idxVarRet, bool f64Bit)
6695{
6696 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
6697
6698 /*
6699 * Special case the rip + disp32 form first.
6700 */
6701 if ((bRmEx & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
6702 {
6703 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
6704 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
6705 kIemNativeGstRegUse_ReadOnly);
6706 if (f64Bit)
6707 {
6708#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6709 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr + (int64_t)pReNative->Core.offPc;
6710#else
6711 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr;
6712#endif
6713#ifdef RT_ARCH_AMD64
6714 if ((int32_t)offFinalDisp == offFinalDisp)
6715 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)offFinalDisp);
6716 else
6717 {
6718 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp);
6719 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, cbInstr);
6720 }
6721#else
6722 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, offFinalDisp);
6723#endif
6724 }
6725 else
6726 {
6727# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6728 int32_t const offFinalDisp = (int32_t)u32Disp + cbInstr + (int32_t)pReNative->Core.offPc;
6729# else
6730 int32_t const offFinalDisp = (int32_t)u32Disp + cbInstr;
6731# endif
6732 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, offFinalDisp);
6733 }
6734 iemNativeRegFreeTmp(pReNative, idxRegPc);
6735 iemNativeVarRegisterRelease(pReNative, idxVarRet);
6736 return off;
6737 }
6738
6739 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
6740 int64_t i64EffAddr = 0;
6741 switch ((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
6742 {
6743 case 0: break;
6744 case 1: i64EffAddr = (int8_t)u32Disp; break;
6745 case 2: i64EffAddr = (int32_t)u32Disp; break;
6746 default: AssertFailed();
6747 }
6748
6749 /* Get the register (or SIB) value. */
6750 uint8_t idxGstRegBase = UINT8_MAX;
6751 uint8_t idxGstRegIndex = UINT8_MAX;
6752 uint8_t cShiftIndex = 0;
6753 if ((bRmEx & X86_MODRM_RM_MASK) != 4)
6754 idxGstRegBase = bRmEx & (X86_MODRM_RM_MASK | 0x8); /* bRmEx[bit 3] = REX.B */
6755 else /* SIB: */
6756 {
6757 /* index /w scaling . */
6758 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
6759 idxGstRegIndex = ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
6760 | ((bRmEx & 0x10) >> 1); /* bRmEx[bit 4] = REX.X */
6761 if (idxGstRegIndex == 4)
6762 {
6763 /* no index */
6764 cShiftIndex = 0;
6765 idxGstRegIndex = UINT8_MAX;
6766 }
6767
6768 /* base */
6769 idxGstRegBase = (uSibAndRspOffset & X86_SIB_BASE_MASK) | (bRmEx & 0x8); /* bRmEx[bit 3] = REX.B */
6770 if (idxGstRegBase == 4)
6771 {
6772 /* pop [rsp] hack */
6773 i64EffAddr += uSibAndRspOffset >> 8; /* (this is why i64EffAddr must be 64-bit) */
6774 }
6775 else if ( (idxGstRegBase & X86_SIB_BASE_MASK) == 5
6776 && (bRmEx & X86_MODRM_MOD_MASK) == 0)
6777 {
6778 /* mod=0 and base=5 -> disp32, no base reg. */
6779 Assert(i64EffAddr == 0);
6780 i64EffAddr = (int32_t)u32Disp;
6781 idxGstRegBase = UINT8_MAX;
6782 }
6783 }
6784
6785 /*
6786 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
6787 * the start of the function.
6788 */
6789 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
6790 {
6791 if (f64Bit)
6792 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint64_t)i64EffAddr);
6793 else
6794 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint32_t)i64EffAddr);
6795 return off;
6796 }
6797
6798 /*
6799 * Now emit code that calculates:
6800 * idxRegRet = (uint64_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
6801 * or if !f64Bit:
6802 * idxRegRet = (uint32_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
6803 */
6804 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
6805 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
6806 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
6807 kIemNativeGstRegUse_ReadOnly);
6808 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
6809 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
6810 kIemNativeGstRegUse_ReadOnly);
6811
6812 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
6813 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
6814 {
6815 idxRegBase = idxRegIndex;
6816 idxRegIndex = UINT8_MAX;
6817 }
6818
6819#ifdef RT_ARCH_AMD64
6820 uint8_t bFinalAdj;
6821 if (!f64Bit || (int32_t)i64EffAddr == i64EffAddr)
6822 bFinalAdj = 0; /* likely */
6823 else
6824 {
6825 /* pop [rsp] with a problematic disp32 value. Split out the
6826 RSP offset and add it separately afterwards (bFinalAdj). */
6827 /** @todo testcase: pop [rsp] with problematic disp32 (mod4). */
6828 Assert(idxGstRegBase == X86_GREG_xSP);
6829 Assert(((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK) == X86_MOD_MEM4);
6830 bFinalAdj = (uint8_t)(uSibAndRspOffset >> 8);
6831 Assert(bFinalAdj != 0);
6832 i64EffAddr -= bFinalAdj;
6833 Assert((int32_t)i64EffAddr == i64EffAddr);
6834 }
6835 uint32_t const u32EffAddr = (uint32_t)i64EffAddr;
6836//pReNative->pInstrBuf[off++] = 0xcc;
6837
6838 if (idxRegIndex == UINT8_MAX)
6839 {
6840 if (u32EffAddr == 0)
6841 {
6842 /* mov ret, base */
6843 if (f64Bit)
6844 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRet, idxRegBase);
6845 else
6846 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
6847 }
6848 else
6849 {
6850 /* lea ret, [base + disp32] */
6851 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
6852 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6853 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8)
6854 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6855 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
6856 | (f64Bit ? X86_OP_REX_W : 0);
6857 pbCodeBuf[off++] = 0x8d;
6858 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
6859 if (idxRegBase != X86_GREG_x12 /*SIB*/)
6860 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
6861 else
6862 {
6863 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6864 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
6865 }
6866 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6867 if (bMod == X86_MOD_MEM4)
6868 {
6869 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6870 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6871 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6872 }
6873 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6874 }
6875 }
6876 else
6877 {
6878 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
6879 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6880 if (idxRegBase == UINT8_MAX)
6881 {
6882 /* lea ret, [(index64 << cShiftIndex) + disp32] */
6883 if (f64Bit || idxRegRet >= 8 || idxRegIndex >= 8)
6884 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6885 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
6886 | (f64Bit ? X86_OP_REX_W : 0);
6887 pbCodeBuf[off++] = 0x8d;
6888 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
6889 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
6890 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6891 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6892 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6893 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6894 }
6895 else
6896 {
6897 /* lea ret, [(index64 << cShiftIndex) + base64 (+ disp32)] */
6898 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
6899 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6900 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
6901 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
6902 | (f64Bit ? X86_OP_REX_W : 0);
6903 pbCodeBuf[off++] = 0x8d;
6904 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
6905 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
6906 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6907 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
6908 if (bMod != X86_MOD_MEM0)
6909 {
6910 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6911 if (bMod == X86_MOD_MEM4)
6912 {
6913 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6914 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6915 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6916 }
6917 }
6918 }
6919 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6920 }
6921
6922 if (!bFinalAdj)
6923 { /* likely */ }
6924 else
6925 {
6926 Assert(f64Bit);
6927 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, bFinalAdj);
6928 }
6929
6930#elif defined(RT_ARCH_ARM64)
6931 if (i64EffAddr == 0)
6932 {
6933 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6934 if (idxRegIndex == UINT8_MAX)
6935 pu32CodeBuf[off++] = Armv8A64MkInstrMov(idxRegRet, idxRegBase, f64Bit);
6936 else if (idxRegBase != UINT8_MAX)
6937 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
6938 f64Bit, false /*fSetFlags*/, cShiftIndex);
6939 else
6940 {
6941 Assert(cShiftIndex != 0); /* See base = index swap above when shift is 0 and we have no base reg. */
6942 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, f64Bit);
6943 }
6944 }
6945 else
6946 {
6947 if (f64Bit)
6948 { /* likely */ }
6949 else
6950 i64EffAddr = (int32_t)i64EffAddr;
6951
6952 if (i64EffAddr < 4096 && i64EffAddr >= 0 && idxRegBase != UINT8_MAX)
6953 {
6954 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6955 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, i64EffAddr, f64Bit);
6956 }
6957 else if (i64EffAddr > -4096 && i64EffAddr < 0 && idxRegBase != UINT8_MAX)
6958 {
6959 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6960 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase, (uint32_t)-i64EffAddr, f64Bit);
6961 }
6962 else
6963 {
6964 if (f64Bit)
6965 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, i64EffAddr);
6966 else
6967 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, (uint32_t)i64EffAddr);
6968 if (idxRegBase != UINT8_MAX)
6969 {
6970 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6971 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, f64Bit);
6972 }
6973 }
6974 if (idxRegIndex != UINT8_MAX)
6975 {
6976 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6977 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
6978 f64Bit, false /*fSetFlags*/, cShiftIndex);
6979 }
6980 }
6981
6982#else
6983# error "port me"
6984#endif
6985
6986 if (idxRegIndex != UINT8_MAX)
6987 iemNativeRegFreeTmp(pReNative, idxRegIndex);
6988 if (idxRegBase != UINT8_MAX)
6989 iemNativeRegFreeTmp(pReNative, idxRegBase);
6990 iemNativeVarRegisterRelease(pReNative, idxVarRet);
6991 return off;
6992}
6993
6994
6995/*********************************************************************************************************************************
6996* Memory fetches and stores common *
6997*********************************************************************************************************************************/
6998
6999typedef enum IEMNATIVEMITMEMOP
7000{
7001 kIemNativeEmitMemOp_Store = 0,
7002 kIemNativeEmitMemOp_Fetch,
7003 kIemNativeEmitMemOp_Fetch_Zx_U16,
7004 kIemNativeEmitMemOp_Fetch_Zx_U32,
7005 kIemNativeEmitMemOp_Fetch_Zx_U64,
7006 kIemNativeEmitMemOp_Fetch_Sx_U16,
7007 kIemNativeEmitMemOp_Fetch_Sx_U32,
7008 kIemNativeEmitMemOp_Fetch_Sx_U64
7009} IEMNATIVEMITMEMOP;
7010
7011/** Emits code for IEM_MC_FETCH_MEM_U8/16/32/64 and IEM_MC_STORE_MEM_U8/16/32/64,
7012 * and IEM_MC_FETCH_MEM_FLAT_U8/16/32/64 and IEM_MC_STORE_MEM_FLAT_U8/16/32/64
7013 * (with iSegReg = UINT8_MAX). */
7014DECL_INLINE_THROW(uint32_t)
7015iemNativeEmitMemFetchStoreDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue, uint8_t iSegReg,
7016 uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAlignMaskAndCtl, IEMNATIVEMITMEMOP enmOp,
7017 uintptr_t pfnFunction, uint8_t idxInstr, uint8_t offDisp = 0)
7018{
7019 /*
7020 * Assert sanity.
7021 */
7022 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
7023 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
7024 Assert( enmOp != kIemNativeEmitMemOp_Store
7025 || pVarValue->enmKind == kIemNativeVarKind_Immediate
7026 || pVarValue->enmKind == kIemNativeVarKind_Stack);
7027 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
7028 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
7029 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
7030 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
7031 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7032 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
7033#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7034 Assert( cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8
7035 || cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U));
7036#else
7037 Assert(cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8);
7038#endif
7039 Assert(!(fAlignMaskAndCtl & ~(UINT32_C(0xff) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE)));
7040 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
7041#ifdef VBOX_STRICT
7042 if (iSegReg == UINT8_MAX)
7043 {
7044 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
7045 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
7046 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
7047 switch (cbMem)
7048 {
7049 case 1:
7050 Assert( pfnFunction
7051 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU8
7052 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
7053 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
7054 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
7055 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
7056 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16
7057 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32
7058 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64
7059 : UINT64_C(0xc000b000a0009000) ));
7060 Assert(!fAlignMaskAndCtl);
7061 break;
7062 case 2:
7063 Assert( pfnFunction
7064 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU16
7065 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
7066 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
7067 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
7068 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32
7069 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64
7070 : UINT64_C(0xc000b000a0009000) ));
7071 Assert(fAlignMaskAndCtl <= 1);
7072 break;
7073 case 4:
7074 Assert( pfnFunction
7075 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU32
7076 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
7077 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
7078 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64
7079 : UINT64_C(0xc000b000a0009000) ));
7080 Assert(fAlignMaskAndCtl <= 3);
7081 break;
7082 case 8:
7083 Assert( pfnFunction
7084 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU64
7085 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU64
7086 : UINT64_C(0xc000b000a0009000) ));
7087 Assert(fAlignMaskAndCtl <= 7);
7088 break;
7089#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7090 case sizeof(RTUINT128U):
7091 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
7092 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128
7093 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse
7094 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc))
7095 || ( enmOp == kIemNativeEmitMemOp_Store
7096 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse
7097 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128NoAc)));
7098 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse
7099 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse
7100 ? (fAlignMaskAndCtl & (IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE)) && (uint8_t)fAlignMaskAndCtl == 15
7101 : fAlignMaskAndCtl <= 15);
7102 break;
7103 case sizeof(RTUINT256U):
7104 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
7105 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc
7106 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx))
7107 || ( enmOp == kIemNativeEmitMemOp_Store
7108 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256NoAc
7109 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx)));
7110 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx
7111 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx
7112 ? (fAlignMaskAndCtl & IEM_MEMMAP_F_ALIGN_GP) && (uint8_t)fAlignMaskAndCtl == 31
7113 : fAlignMaskAndCtl <= 31);
7114 break;
7115#endif
7116 }
7117 }
7118 else
7119 {
7120 Assert(iSegReg < 6);
7121 switch (cbMem)
7122 {
7123 case 1:
7124 Assert( pfnFunction
7125 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU8
7126 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU8
7127 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8
7128 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8
7129 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8
7130 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16
7131 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32
7132 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64
7133 : UINT64_C(0xc000b000a0009000) ));
7134 Assert(!fAlignMaskAndCtl);
7135 break;
7136 case 2:
7137 Assert( pfnFunction
7138 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU16
7139 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU16
7140 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16
7141 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16
7142 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32
7143 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64
7144 : UINT64_C(0xc000b000a0009000) ));
7145 Assert(fAlignMaskAndCtl <= 1);
7146 break;
7147 case 4:
7148 Assert( pfnFunction
7149 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU32
7150 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU32
7151 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32
7152 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64
7153 : UINT64_C(0xc000b000a0009000) ));
7154 Assert(fAlignMaskAndCtl <= 3);
7155 break;
7156 case 8:
7157 Assert( pfnFunction
7158 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU64
7159 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU64
7160 : UINT64_C(0xc000b000a0009000) ));
7161 Assert(fAlignMaskAndCtl <= 7);
7162 break;
7163#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7164 case sizeof(RTUINT128U):
7165 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
7166 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128
7167 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse
7168 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128NoAc))
7169 || ( enmOp == kIemNativeEmitMemOp_Store
7170 && ( pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse
7171 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128NoAc)));
7172 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse
7173 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse
7174 ? (fAlignMaskAndCtl & (IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE)) && (uint8_t)fAlignMaskAndCtl == 15
7175 : fAlignMaskAndCtl <= 15);
7176 break;
7177 case sizeof(RTUINT256U):
7178 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
7179 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256NoAc
7180 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx))
7181 || ( enmOp == kIemNativeEmitMemOp_Store
7182 && ( pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256NoAc
7183 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx)));
7184 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx
7185 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx
7186 ? (fAlignMaskAndCtl & IEM_MEMMAP_F_ALIGN_GP) && (uint8_t)fAlignMaskAndCtl == 31
7187 : fAlignMaskAndCtl <= 31);
7188 break;
7189#endif
7190 }
7191 }
7192#endif
7193
7194#ifdef VBOX_STRICT
7195 /*
7196 * Check that the fExec flags we've got make sense.
7197 */
7198 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
7199#endif
7200
7201 /*
7202 * To keep things simple we have to commit any pending writes first as we
7203 * may end up making calls.
7204 */
7205 /** @todo we could postpone this till we make the call and reload the
7206 * registers after returning from the call. Not sure if that's sensible or
7207 * not, though. */
7208#ifndef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7209 off = iemNativeRegFlushPendingWrites(pReNative, off);
7210#else
7211 /* The program counter is treated differently for now. */
7212 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc));
7213#endif
7214
7215#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7216 /*
7217 * Move/spill/flush stuff out of call-volatile registers.
7218 * This is the easy way out. We could contain this to the tlb-miss branch
7219 * by saving and restoring active stuff here.
7220 */
7221 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
7222#endif
7223
7224 /*
7225 * Define labels and allocate the result register (trying for the return
7226 * register if we can).
7227 */
7228 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
7229#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7230 uint8_t idxRegValueFetch = UINT8_MAX;
7231
7232 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
7233 idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
7234 : iemNativeVarSimdRegisterAcquire(pReNative, idxVarValue, &off);
7235 else
7236 idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
7237 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
7238 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
7239 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
7240#else
7241 uint8_t const idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
7242 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
7243 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
7244 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
7245#endif
7246 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem, offDisp);
7247
7248#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7249 uint8_t idxRegValueStore = UINT8_MAX;
7250
7251 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
7252 idxRegValueStore = !TlbState.fSkip
7253 && enmOp == kIemNativeEmitMemOp_Store
7254 && pVarValue->enmKind != kIemNativeVarKind_Immediate
7255 ? iemNativeVarSimdRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
7256 : UINT8_MAX;
7257 else
7258 idxRegValueStore = !TlbState.fSkip
7259 && enmOp == kIemNativeEmitMemOp_Store
7260 && pVarValue->enmKind != kIemNativeVarKind_Immediate
7261 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
7262 : UINT8_MAX;
7263
7264#else
7265 uint8_t const idxRegValueStore = !TlbState.fSkip
7266 && enmOp == kIemNativeEmitMemOp_Store
7267 && pVarValue->enmKind != kIemNativeVarKind_Immediate
7268 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
7269 : UINT8_MAX;
7270#endif
7271 uint32_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
7272 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
7273 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
7274 : UINT32_MAX;
7275
7276 /*
7277 * Jump to the TLB lookup code.
7278 */
7279 if (!TlbState.fSkip)
7280 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
7281
7282 /*
7283 * TlbMiss:
7284 *
7285 * Call helper to do the fetching.
7286 * We flush all guest register shadow copies here.
7287 */
7288 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
7289
7290#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7291 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7292#else
7293 RT_NOREF(idxInstr);
7294#endif
7295
7296#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7297 if (pReNative->Core.offPc)
7298 {
7299 /*
7300 * Update the program counter but restore it at the end of the TlbMiss branch.
7301 * This should allow delaying more program counter updates for the TlbLookup and hit paths
7302 * which are hopefully much more frequent, reducing the amount of memory accesses.
7303 */
7304 /* Allocate a temporary PC register. */
7305/** @todo r=bird: This would technically need to be done up front as it's a register allocation. */
7306 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
7307 kIemNativeGstRegUse_ForUpdate);
7308
7309 /* Perform the addition and store the result. */
7310 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
7311 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7312# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
7313 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
7314# endif
7315
7316 /* Free and flush the PC register. */
7317 iemNativeRegFreeTmp(pReNative, idxPcReg);
7318 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
7319 }
7320#endif
7321
7322#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7323 /* Save variables in volatile registers. */
7324 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
7325 | (idxRegMemResult != UINT8_MAX ? RT_BIT_32(idxRegMemResult) : 0)
7326 | (idxRegValueFetch != UINT8_MAX ? RT_BIT_32(idxRegValueFetch) : 0);
7327 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
7328#endif
7329
7330 /* IEMNATIVE_CALL_ARG2/3_GREG = uValue (idxVarValue) - if store */
7331 uint32_t fVolGregMask = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
7332#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7333 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
7334 {
7335 /*
7336 * For SIMD based variables we pass the reference on the stack for both fetches and stores.
7337 *
7338 * @note There was a register variable assigned to the variable for the TlbLookup case above
7339 * which must not be freed or the value loaded into the register will not be synced into the register
7340 * further down the road because the variable doesn't know it had a variable assigned.
7341 *
7342 * @note For loads it is not required to sync what is in the assigned register with the stack slot
7343 * as it will be overwritten anyway.
7344 */
7345 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
7346 off = iemNativeEmitLoadArgGregWithSimdVarAddrForMemAccess(pReNative, off, idxRegArgValue, idxVarValue,
7347 enmOp == kIemNativeEmitMemOp_Store /*fSyncRegWithStack*/);
7348 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
7349 }
7350 else
7351#endif
7352 if (enmOp == kIemNativeEmitMemOp_Store)
7353 {
7354 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
7355 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, idxRegArgValue, idxVarValue, 0 /*cbAppend*/,
7356#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7357 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
7358#else
7359 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
7360 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
7361#endif
7362 }
7363
7364 /* IEMNATIVE_CALL_ARG1_GREG = GCPtrMem */
7365 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarGCPtrMem, offDisp /*cbAppend*/,
7366#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7367 fVolGregMask);
7368#else
7369 fVolGregMask, true /*fSpilledVarsInvolatileRegs*/);
7370#endif
7371
7372 if (iSegReg != UINT8_MAX)
7373 {
7374 /* IEMNATIVE_CALL_ARG2_GREG = iSegReg */
7375 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
7376 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, iSegReg);
7377 }
7378
7379 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
7380 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7381
7382 /* Done setting up parameters, make the call. */
7383 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
7384
7385 /*
7386 * Put the result in the right register if this is a fetch.
7387 */
7388 if (enmOp != kIemNativeEmitMemOp_Store)
7389 {
7390#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7391 if ( cbMem == sizeof(RTUINT128U)
7392 || cbMem == sizeof(RTUINT256U))
7393 {
7394 Assert(enmOp == kIemNativeEmitMemOp_Fetch);
7395
7396 /* Sync the value on the stack with the host register assigned to the variable. */
7397 off = iemNativeEmitSimdVarSyncStackToRegister(pReNative, off, idxVarValue);
7398 }
7399 else
7400#endif
7401 {
7402 Assert(idxRegValueFetch == pVarValue->idxReg);
7403 if (idxRegValueFetch != IEMNATIVE_CALL_RET_GREG)
7404 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegValueFetch, IEMNATIVE_CALL_RET_GREG);
7405 }
7406 }
7407
7408#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7409 /* Restore variables and guest shadow registers to volatile registers. */
7410 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
7411 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
7412#endif
7413
7414#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7415 if (pReNative->Core.offPc)
7416 {
7417 /*
7418 * Time to restore the program counter to its original value.
7419 */
7420 /* Allocate a temporary PC register. */
7421 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
7422 kIemNativeGstRegUse_ForUpdate);
7423
7424 /* Restore the original value. */
7425 off = iemNativeEmitSubGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
7426 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7427
7428 /* Free and flush the PC register. */
7429 iemNativeRegFreeTmp(pReNative, idxPcReg);
7430 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
7431 }
7432#endif
7433
7434#ifdef IEMNATIVE_WITH_TLB_LOOKUP
7435 if (!TlbState.fSkip)
7436 {
7437 /* end of TlbMiss - Jump to the done label. */
7438 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
7439 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
7440
7441 /*
7442 * TlbLookup:
7443 */
7444 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMaskAndCtl,
7445 enmOp == kIemNativeEmitMemOp_Store ? IEM_ACCESS_TYPE_WRITE : IEM_ACCESS_TYPE_READ,
7446 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult, offDisp);
7447
7448 /*
7449 * Emit code to do the actual storing / fetching.
7450 */
7451 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
7452# ifdef IEM_WITH_TLB_STATISTICS
7453 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
7454 enmOp == kIemNativeEmitMemOp_Store
7455 ? RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForFetch)
7456 : RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStore));
7457# endif
7458 switch (enmOp)
7459 {
7460 case kIemNativeEmitMemOp_Store:
7461 if (pVarValue->enmKind != kIemNativeVarKind_Immediate)
7462 {
7463 switch (cbMem)
7464 {
7465 case 1:
7466 off = iemNativeEmitStoreGpr8ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7467 break;
7468 case 2:
7469 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7470 break;
7471 case 4:
7472 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7473 break;
7474 case 8:
7475 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7476 break;
7477#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7478 case sizeof(RTUINT128U):
7479 off = iemNativeEmitStoreVecRegByGprU128Ex(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7480 break;
7481 case sizeof(RTUINT256U):
7482 off = iemNativeEmitStoreVecRegByGprU256Ex(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7483 break;
7484#endif
7485 default:
7486 AssertFailed();
7487 }
7488 }
7489 else
7490 {
7491 switch (cbMem)
7492 {
7493 case 1:
7494 off = iemNativeEmitStoreImm8ByGprEx(pCodeBuf, off, (uint8_t)pVarValue->u.uValue,
7495 idxRegMemResult, TlbState.idxReg1);
7496 break;
7497 case 2:
7498 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
7499 idxRegMemResult, TlbState.idxReg1);
7500 break;
7501 case 4:
7502 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
7503 idxRegMemResult, TlbState.idxReg1);
7504 break;
7505 case 8:
7506 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue,
7507 idxRegMemResult, TlbState.idxReg1);
7508 break;
7509 default:
7510 AssertFailed();
7511 }
7512 }
7513 break;
7514
7515 case kIemNativeEmitMemOp_Fetch:
7516 case kIemNativeEmitMemOp_Fetch_Zx_U16:
7517 case kIemNativeEmitMemOp_Fetch_Zx_U32:
7518 case kIemNativeEmitMemOp_Fetch_Zx_U64:
7519 switch (cbMem)
7520 {
7521 case 1:
7522 off = iemNativeEmitLoadGprByGprU8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7523 break;
7524 case 2:
7525 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7526 break;
7527 case 4:
7528 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7529 break;
7530 case 8:
7531 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7532 break;
7533#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7534 case sizeof(RTUINT128U):
7535 /*
7536 * No need to sync back the register with the stack, this is done by the generic variable handling
7537 * code if there is a register assigned to a variable and the stack must be accessed.
7538 */
7539 off = iemNativeEmitLoadVecRegByGprU128Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7540 break;
7541 case sizeof(RTUINT256U):
7542 /*
7543 * No need to sync back the register with the stack, this is done by the generic variable handling
7544 * code if there is a register assigned to a variable and the stack must be accessed.
7545 */
7546 off = iemNativeEmitLoadVecRegByGprU256Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7547 break;
7548#endif
7549 default:
7550 AssertFailed();
7551 }
7552 break;
7553
7554 case kIemNativeEmitMemOp_Fetch_Sx_U16:
7555 Assert(cbMem == 1);
7556 off = iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7557 break;
7558
7559 case kIemNativeEmitMemOp_Fetch_Sx_U32:
7560 Assert(cbMem == 1 || cbMem == 2);
7561 if (cbMem == 1)
7562 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7563 else
7564 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7565 break;
7566
7567 case kIemNativeEmitMemOp_Fetch_Sx_U64:
7568 switch (cbMem)
7569 {
7570 case 1:
7571 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7572 break;
7573 case 2:
7574 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7575 break;
7576 case 4:
7577 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7578 break;
7579 default:
7580 AssertFailed();
7581 }
7582 break;
7583
7584 default:
7585 AssertFailed();
7586 }
7587
7588 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
7589
7590 /*
7591 * TlbDone:
7592 */
7593 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
7594
7595 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
7596
7597# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7598 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
7599 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
7600# endif
7601 }
7602#else
7603 RT_NOREF(fAlignMaskAndCtl, idxLabelTlbMiss);
7604#endif
7605
7606 if (idxRegValueFetch != UINT8_MAX || idxRegValueStore != UINT8_MAX)
7607 iemNativeVarRegisterRelease(pReNative, idxVarValue);
7608 return off;
7609}
7610
7611
7612
7613/*********************************************************************************************************************************
7614* Memory fetches (IEM_MEM_FETCH_XXX). *
7615*********************************************************************************************************************************/
7616
7617/* 8-bit segmented: */
7618#define IEM_MC_FETCH_MEM_U8(a_u8Dst, a_iSeg, a_GCPtrMem) \
7619 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, a_iSeg, a_GCPtrMem, \
7620 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch, \
7621 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
7622
7623#define IEM_MC_FETCH_MEM_U8_ZX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
7624 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
7625 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
7626 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
7627
7628#define IEM_MC_FETCH_MEM_U8_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7629 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
7630 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
7631 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
7632
7633#define IEM_MC_FETCH_MEM_U8_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7634 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
7635 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7636 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
7637
7638#define IEM_MC_FETCH_MEM_U8_SX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
7639 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
7640 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
7641 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16, pCallEntry->idxInstr)
7642
7643#define IEM_MC_FETCH_MEM_U8_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7644 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
7645 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7646 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32, pCallEntry->idxInstr)
7647
7648#define IEM_MC_FETCH_MEM_U8_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7649 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
7650 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7651 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64, pCallEntry->idxInstr)
7652
7653/* 16-bit segmented: */
7654#define IEM_MC_FETCH_MEM_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
7655 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
7656 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
7657 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
7658
7659#define IEM_MC_FETCH_MEM_U16_DISP(a_u16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
7660 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
7661 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
7662 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr, a_offDisp)
7663
7664#define IEM_MC_FETCH_MEM_U16_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7665 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
7666 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
7667 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
7668
7669#define IEM_MC_FETCH_MEM_U16_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7670 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
7671 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7672 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
7673
7674#define IEM_MC_FETCH_MEM_U16_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7675 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
7676 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7677 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7678
7679#define IEM_MC_FETCH_MEM_U16_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7680 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
7681 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7682 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64, pCallEntry->idxInstr)
7683
7684
7685/* 32-bit segmented: */
7686#define IEM_MC_FETCH_MEM_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7687 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
7688 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7689 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
7690
7691#define IEM_MC_FETCH_MEM_U32_DISP(a_u32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
7692 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
7693 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7694 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7695
7696#define IEM_MC_FETCH_MEM_U32_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7697 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
7698 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7699 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
7700
7701#define IEM_MC_FETCH_MEM_U32_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7702 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
7703 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7704 (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64, pCallEntry->idxInstr)
7705
7706#define IEM_MC_FETCH_MEM_I16(a_i16Dst, a_iSeg, a_GCPtrMem) \
7707 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i16Dst, a_iSeg, a_GCPtrMem, \
7708 sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7709 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7710
7711#define IEM_MC_FETCH_MEM_I16_DISP(a_i16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
7712 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i16Dst, a_iSeg, a_GCPtrMem, \
7713 sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7714 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr, a_offDisp)
7715
7716#define IEM_MC_FETCH_MEM_I32(a_i32Dst, a_iSeg, a_GCPtrMem) \
7717 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i32Dst, a_iSeg, a_GCPtrMem, \
7718 sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7719 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
7720
7721#define IEM_MC_FETCH_MEM_I32_DISP(a_i32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
7722 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i32Dst, a_iSeg, a_GCPtrMem, \
7723 sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7724 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7725
7726#define IEM_MC_FETCH_MEM_I64(a_i64Dst, a_iSeg, a_GCPtrMem) \
7727 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i64Dst, a_iSeg, a_GCPtrMem, \
7728 sizeof(int64_t), sizeof(int64_t) - 1, kIemNativeEmitMemOp_Fetch, \
7729 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
7730
7731AssertCompileSize(RTFLOAT32U, sizeof(uint32_t));
7732#define IEM_MC_FETCH_MEM_R32(a_r32Dst, a_iSeg, a_GCPtrMem) \
7733 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r32Dst, a_iSeg, a_GCPtrMem, \
7734 sizeof(RTFLOAT32U), sizeof(RTFLOAT32U) - 1, kIemNativeEmitMemOp_Fetch, \
7735 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
7736
7737
7738/* 64-bit segmented: */
7739#define IEM_MC_FETCH_MEM_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7740 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
7741 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
7742 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
7743
7744AssertCompileSize(RTFLOAT64U, sizeof(uint64_t));
7745#define IEM_MC_FETCH_MEM_R64(a_r64Dst, a_iSeg, a_GCPtrMem) \
7746 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r64Dst, a_iSeg, a_GCPtrMem, \
7747 sizeof(RTFLOAT64U), sizeof(RTFLOAT64U) - 1, kIemNativeEmitMemOp_Fetch, \
7748 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
7749
7750
7751/* 8-bit flat: */
7752#define IEM_MC_FETCH_MEM_FLAT_U8(a_u8Dst, a_GCPtrMem) \
7753 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, UINT8_MAX, a_GCPtrMem, \
7754 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch, \
7755 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7756
7757#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U16(a_u16Dst, a_GCPtrMem) \
7758 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
7759 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
7760 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7761
7762#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U32(a_u32Dst, a_GCPtrMem) \
7763 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7764 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
7765 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7766
7767#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U64(a_u64Dst, a_GCPtrMem) \
7768 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7769 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7770 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7771
7772#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U16(a_u16Dst, a_GCPtrMem) \
7773 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
7774 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
7775 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16, pCallEntry->idxInstr)
7776
7777#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U32(a_u32Dst, a_GCPtrMem) \
7778 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7779 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7780 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32, pCallEntry->idxInstr)
7781
7782#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U64(a_u64Dst, a_GCPtrMem) \
7783 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7784 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7785 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64, pCallEntry->idxInstr)
7786
7787
7788/* 16-bit flat: */
7789#define IEM_MC_FETCH_MEM_FLAT_U16(a_u16Dst, a_GCPtrMem) \
7790 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
7791 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
7792 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
7793
7794#define IEM_MC_FETCH_MEM_FLAT_U16_DISP(a_u16Dst, a_GCPtrMem, a_offDisp) \
7795 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
7796 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
7797 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr, a_offDisp)
7798
7799#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U32(a_u32Dst, a_GCPtrMem) \
7800 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7801 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
7802 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
7803
7804#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U64(a_u64Dst, a_GCPtrMem) \
7805 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7806 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7807 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
7808
7809#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U32(a_u32Dst, a_GCPtrMem) \
7810 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7811 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7812 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7813
7814#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U64(a_u64Dst, a_GCPtrMem) \
7815 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7816 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7817 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64, pCallEntry->idxInstr)
7818
7819/* 32-bit flat: */
7820#define IEM_MC_FETCH_MEM_FLAT_U32(a_u32Dst, a_GCPtrMem) \
7821 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7822 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7823 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7824
7825#define IEM_MC_FETCH_MEM_FLAT_U32_DISP(a_u32Dst, a_GCPtrMem, a_offDisp) \
7826 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7827 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7828 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7829
7830#define IEM_MC_FETCH_MEM_FLAT_U32_ZX_U64(a_u64Dst, a_GCPtrMem) \
7831 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7832 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7833 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7834
7835#define IEM_MC_FETCH_MEM_FLAT_U32_SX_U64(a_u64Dst, a_GCPtrMem) \
7836 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7837 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7838 (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64, pCallEntry->idxInstr)
7839
7840#define IEM_MC_FETCH_MEM_FLAT_I16(a_i16Dst, a_GCPtrMem) \
7841 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i16Dst, UINT8_MAX, a_GCPtrMem, \
7842 sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7843 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7844
7845#define IEM_MC_FETCH_MEM_FLAT_I16_DISP(a_i16Dst, a_GCPtrMem, a_offDisp) \
7846 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i16Dst, UINT8_MAX, a_GCPtrMem, \
7847 sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7848 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr, a_offDisp)
7849
7850#define IEM_MC_FETCH_MEM_FLAT_I32(a_i32Dst, a_GCPtrMem) \
7851 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i32Dst, UINT8_MAX, a_GCPtrMem, \
7852 sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7853 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7854
7855#define IEM_MC_FETCH_MEM_FLAT_I32_DISP(a_i32Dst, a_GCPtrMem, a_offDisp) \
7856 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i32Dst, UINT8_MAX, a_GCPtrMem, \
7857 sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7858 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7859
7860#define IEM_MC_FETCH_MEM_FLAT_I64(a_i64Dst, a_GCPtrMem) \
7861 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i64Dst, UINT8_MAX, a_GCPtrMem, \
7862 sizeof(int64_t), sizeof(int64_t) - 1, kIemNativeEmitMemOp_Fetch, \
7863 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
7864
7865#define IEM_MC_FETCH_MEM_FLAT_R32(a_r32Dst, a_GCPtrMem) \
7866 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r32Dst, UINT8_MAX, a_GCPtrMem, \
7867 sizeof(RTFLOAT32U), sizeof(RTFLOAT32U) - 1, kIemNativeEmitMemOp_Fetch, \
7868 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7869
7870
7871/* 64-bit flat: */
7872#define IEM_MC_FETCH_MEM_FLAT_U64(a_u64Dst, a_GCPtrMem) \
7873 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7874 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
7875 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
7876
7877#define IEM_MC_FETCH_MEM_FLAT_R64(a_r64Dst, a_GCPtrMem) \
7878 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r64Dst, UINT8_MAX, a_GCPtrMem, \
7879 sizeof(RTFLOAT64U), sizeof(RTFLOAT64U) - 1, kIemNativeEmitMemOp_Fetch, \
7880 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
7881
7882#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7883/* 128-bit segmented: */
7884#define IEM_MC_FETCH_MEM_U128(a_u128Dst, a_iSeg, a_GCPtrMem) \
7885 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
7886 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
7887 (uintptr_t)iemNativeHlpMemFetchDataU128, pCallEntry->idxInstr)
7888
7889#define IEM_MC_FETCH_MEM_U128_ALIGN_SSE(a_u128Dst, a_iSeg, a_GCPtrMem) \
7890 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
7891 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7892 kIemNativeEmitMemOp_Fetch, \
7893 (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse, pCallEntry->idxInstr)
7894
7895AssertCompileSize(X86XMMREG, sizeof(RTUINT128U));
7896#define IEM_MC_FETCH_MEM_XMM_ALIGN_SSE(a_uXmmDst, a_iSeg, a_GCPtrMem) \
7897 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uXmmDst, a_iSeg, a_GCPtrMem, sizeof(X86XMMREG), \
7898 (sizeof(X86XMMREG) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7899 kIemNativeEmitMemOp_Fetch, \
7900 (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse, pCallEntry->idxInstr)
7901
7902#define IEM_MC_FETCH_MEM_U128_NO_AC(a_u128Dst, a_iSeg, a_GCPtrMem) \
7903 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
7904 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
7905 (uintptr_t)iemNativeHlpMemFetchDataU128NoAc, pCallEntry->idxInstr)
7906
7907#define IEM_MC_FETCH_MEM_XMM_NO_AC(a_u128Dst, a_iSeg, a_GCPtrMem) \
7908 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
7909 sizeof(X86XMMREG), sizeof(X86XMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
7910 (uintptr_t)iemNativeHlpMemFetchDataU128NoAc, pCallEntry->idxInstr)
7911
7912
7913/* 128-bit flat: */
7914#define IEM_MC_FETCH_MEM_FLAT_U128(a_u128Dst, a_GCPtrMem) \
7915 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, \
7916 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
7917 (uintptr_t)iemNativeHlpMemFlatFetchDataU128, pCallEntry->idxInstr)
7918
7919#define IEM_MC_FETCH_MEM_FLAT_U128_ALIGN_SSE(a_u128Dst, a_GCPtrMem) \
7920 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
7921 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7922 kIemNativeEmitMemOp_Fetch, \
7923 (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse, pCallEntry->idxInstr)
7924
7925#define IEM_MC_FETCH_MEM_FLAT_XMM_ALIGN_SSE(a_uXmmDst, a_GCPtrMem) \
7926 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uXmmDst, UINT8_MAX, a_GCPtrMem, sizeof(X86XMMREG), \
7927 (sizeof(X86XMMREG) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7928 kIemNativeEmitMemOp_Fetch, \
7929 (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse, pCallEntry->idxInstr)
7930
7931#define IEM_MC_FETCH_MEM_FLAT_U128_NO_AC(a_u128Dst, a_GCPtrMem) \
7932 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, \
7933 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
7934 (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc, pCallEntry->idxInstr)
7935
7936#define IEM_MC_FETCH_MEM_FLAT_XMM_NO_AC(a_uXmmDst, a_GCPtrMem) \
7937 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uXmmDst, UINT8_MAX, a_GCPtrMem, \
7938 sizeof(X86XMMREG), sizeof(X86XMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
7939 (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc, pCallEntry->idxInstr)
7940
7941/* 256-bit segmented: */
7942#define IEM_MC_FETCH_MEM_U256(a_u256Dst, a_iSeg, a_GCPtrMem) \
7943 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
7944 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
7945 (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
7946
7947#define IEM_MC_FETCH_MEM_U256_NO_AC(a_u256Dst, a_iSeg, a_GCPtrMem) \
7948 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
7949 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
7950 (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
7951
7952#define IEM_MC_FETCH_MEM_U256_ALIGN_AVX(a_u256Dst, a_iSeg, a_GCPtrMem) \
7953 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, sizeof(RTUINT256U), \
7954 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, kIemNativeEmitMemOp_Fetch, \
7955 (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx, pCallEntry->idxInstr)
7956
7957#define IEM_MC_FETCH_MEM_YMM_NO_AC(a_u256Dst, a_iSeg, a_GCPtrMem) \
7958 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
7959 sizeof(X86YMMREG), sizeof(X86YMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
7960 (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
7961
7962
7963/* 256-bit flat: */
7964#define IEM_MC_FETCH_MEM_FLAT_U256(a_u256Dst, a_GCPtrMem) \
7965 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, \
7966 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
7967 (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
7968
7969#define IEM_MC_FETCH_MEM_FLAT_U256_NO_AC(a_u256Dst, a_GCPtrMem) \
7970 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, \
7971 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
7972 (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
7973
7974#define IEM_MC_FETCH_MEM_FLAT_U256_ALIGN_AVX(a_u256Dst, a_GCPtrMem) \
7975 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT256U), \
7976 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, kIemNativeEmitMemOp_Fetch, \
7977 (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx, pCallEntry->idxInstr)
7978
7979#define IEM_MC_FETCH_MEM_FLAT_YMM_NO_AC(a_uYmmDst, a_GCPtrMem) \
7980 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uYmmDst, UINT8_MAX, a_GCPtrMem, \
7981 sizeof(X86YMMREG), sizeof(X86YMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
7982 (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
7983
7984#endif
7985
7986
7987/*********************************************************************************************************************************
7988* Memory stores (IEM_MEM_STORE_XXX). *
7989*********************************************************************************************************************************/
7990
7991#define IEM_MC_STORE_MEM_U8(a_iSeg, a_GCPtrMem, a_u8Value) \
7992 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, a_iSeg, a_GCPtrMem, \
7993 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Store, \
7994 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
7995
7996#define IEM_MC_STORE_MEM_U16(a_iSeg, a_GCPtrMem, a_u16Value) \
7997 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, a_iSeg, a_GCPtrMem, \
7998 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
7999 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
8000
8001#define IEM_MC_STORE_MEM_U32(a_iSeg, a_GCPtrMem, a_u32Value) \
8002 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, a_iSeg, a_GCPtrMem, \
8003 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
8004 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
8005
8006#define IEM_MC_STORE_MEM_U64(a_iSeg, a_GCPtrMem, a_u64Value) \
8007 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, a_iSeg, a_GCPtrMem, \
8008 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
8009 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
8010
8011
8012#define IEM_MC_STORE_MEM_FLAT_U8(a_GCPtrMem, a_u8Value) \
8013 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, UINT8_MAX, a_GCPtrMem, \
8014 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Store, \
8015 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
8016
8017#define IEM_MC_STORE_MEM_FLAT_U16(a_GCPtrMem, a_u16Value) \
8018 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, UINT8_MAX, a_GCPtrMem, \
8019 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
8020 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
8021
8022#define IEM_MC_STORE_MEM_FLAT_U32(a_GCPtrMem, a_u32Value) \
8023 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, UINT8_MAX, a_GCPtrMem, \
8024 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
8025 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
8026
8027#define IEM_MC_STORE_MEM_FLAT_U64(a_GCPtrMem, a_u64Value) \
8028 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, UINT8_MAX, a_GCPtrMem, \
8029 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
8030 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
8031
8032
8033#define IEM_MC_STORE_MEM_U8_CONST(a_iSeg, a_GCPtrMem, a_u8ConstValue) \
8034 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
8035 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
8036
8037#define IEM_MC_STORE_MEM_U16_CONST(a_iSeg, a_GCPtrMem, a_u16ConstValue) \
8038 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
8039 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
8040
8041#define IEM_MC_STORE_MEM_U32_CONST(a_iSeg, a_GCPtrMem, a_u32ConstValue) \
8042 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
8043 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
8044
8045#define IEM_MC_STORE_MEM_U64_CONST(a_iSeg, a_GCPtrMem, a_u64ConstValue) \
8046 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
8047 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
8048
8049
8050#define IEM_MC_STORE_MEM_FLAT_U8_CONST(a_GCPtrMem, a_u8ConstValue) \
8051 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
8052 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
8053
8054#define IEM_MC_STORE_MEM_FLAT_U16_CONST(a_GCPtrMem, a_u16ConstValue) \
8055 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
8056 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
8057
8058#define IEM_MC_STORE_MEM_FLAT_U32_CONST(a_GCPtrMem, a_u32ConstValue) \
8059 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
8060 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
8061
8062#define IEM_MC_STORE_MEM_FLAT_U64_CONST(a_GCPtrMem, a_u64ConstValue) \
8063 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
8064 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
8065
8066/** Emits code for IEM_MC_STORE_MEM_U8/16/32/64_CONST and
8067 * IEM_MC_STORE_MEM_FLAT_U8/16/32/64_CONST (with iSegReg = UINT8_MAX). */
8068DECL_INLINE_THROW(uint32_t)
8069iemNativeEmitMemStoreConstDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uValueConst, uint8_t iSegReg,
8070 uint8_t idxVarGCPtrMem, uint8_t cbMem, uintptr_t pfnFunction, uint8_t idxInstr)
8071{
8072 /*
8073 * Create a temporary const variable and call iemNativeEmitMemFetchStoreDataCommon
8074 * to do the grunt work.
8075 */
8076 uint8_t const idxVarConstValue = iemNativeVarAllocConst(pReNative, cbMem, uValueConst);
8077 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, idxVarConstValue, iSegReg, idxVarGCPtrMem,
8078 cbMem, cbMem - 1, kIemNativeEmitMemOp_Store,
8079 pfnFunction, idxInstr);
8080 iemNativeVarFreeLocal(pReNative, idxVarConstValue);
8081 return off;
8082}
8083
8084
8085#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8086# define IEM_MC_STORE_MEM_U128_ALIGN_SSE(a_iSeg, a_GCPtrMem, a_u128Value) \
8087 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
8088 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
8089 kIemNativeEmitMemOp_Store, \
8090 (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse, pCallEntry->idxInstr)
8091
8092# define IEM_MC_STORE_MEM_U128_NO_AC(a_iSeg, a_GCPtrMem, a_u128Value) \
8093 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, a_iSeg, a_GCPtrMem, \
8094 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
8095 (uintptr_t)iemNativeHlpMemStoreDataU128NoAc, pCallEntry->idxInstr)
8096
8097# define IEM_MC_STORE_MEM_U256_NO_AC(a_iSeg, a_GCPtrMem, a_u256Value) \
8098 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, a_iSeg, a_GCPtrMem, \
8099 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
8100 (uintptr_t)iemNativeHlpMemStoreDataU256NoAc, pCallEntry->idxInstr)
8101
8102# define IEM_MC_STORE_MEM_U256_ALIGN_AVX(a_iSeg, a_GCPtrMem, a_u256Value) \
8103 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, a_iSeg, a_GCPtrMem, sizeof(RTUINT256U), \
8104 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, kIemNativeEmitMemOp_Store, \
8105 (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx, pCallEntry->idxInstr)
8106
8107
8108# define IEM_MC_STORE_MEM_FLAT_U128_ALIGN_SSE(a_GCPtrMem, a_u128Value) \
8109 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
8110 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
8111 kIemNativeEmitMemOp_Store, \
8112 (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse, pCallEntry->idxInstr)
8113
8114# define IEM_MC_STORE_MEM_FLAT_U128_NO_AC(a_GCPtrMem, a_u128Value) \
8115 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, UINT8_MAX, a_GCPtrMem, \
8116 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
8117 (uintptr_t)iemNativeHlpMemFlatStoreDataU128NoAc, pCallEntry->idxInstr)
8118
8119# define IEM_MC_STORE_MEM_FLAT_U256_NO_AC(a_GCPtrMem, a_u256Value) \
8120 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, UINT8_MAX, a_GCPtrMem, \
8121 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
8122 (uintptr_t)iemNativeHlpMemFlatStoreDataU256NoAc, pCallEntry->idxInstr)
8123
8124# define IEM_MC_STORE_MEM_FLAT_U256_ALIGN_AVX(a_GCPtrMem, a_u256Value) \
8125 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT256U), \
8126 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, kIemNativeEmitMemOp_Store, \
8127 (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx, pCallEntry->idxInstr)
8128#endif
8129
8130
8131
8132/*********************************************************************************************************************************
8133* Stack Accesses. *
8134*********************************************************************************************************************************/
8135/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, fSReg, 0) */
8136#define IEM_MC_PUSH_U16(a_u16Value) \
8137 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
8138 (uintptr_t)iemNativeHlpStackStoreU16, pCallEntry->idxInstr)
8139#define IEM_MC_PUSH_U32(a_u32Value) \
8140 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
8141 (uintptr_t)iemNativeHlpStackStoreU32, pCallEntry->idxInstr)
8142#define IEM_MC_PUSH_U32_SREG(a_uSegVal) \
8143 off = iemNativeEmitStackPush(pReNative, off, a_uSegVal, RT_MAKE_U32_FROM_U8(32, 0, 1, 0), \
8144 (uintptr_t)iemNativeHlpStackStoreU32SReg, pCallEntry->idxInstr)
8145#define IEM_MC_PUSH_U64(a_u64Value) \
8146 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
8147 (uintptr_t)iemNativeHlpStackStoreU64, pCallEntry->idxInstr)
8148
8149#define IEM_MC_FLAT32_PUSH_U16(a_u16Value) \
8150 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
8151 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
8152#define IEM_MC_FLAT32_PUSH_U32(a_u32Value) \
8153 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
8154 (uintptr_t)iemNativeHlpStackFlatStoreU32, pCallEntry->idxInstr)
8155#define IEM_MC_FLAT32_PUSH_U32_SREG(a_u32Value) \
8156 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 1, 0), \
8157 (uintptr_t)iemNativeHlpStackFlatStoreU32SReg, pCallEntry->idxInstr)
8158
8159#define IEM_MC_FLAT64_PUSH_U16(a_u16Value) \
8160 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
8161 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
8162#define IEM_MC_FLAT64_PUSH_U64(a_u64Value) \
8163 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
8164 (uintptr_t)iemNativeHlpStackFlatStoreU64, pCallEntry->idxInstr)
8165
8166
8167/** IEM_MC[|_FLAT32|_FLAT64]_PUSH_U16/32/32_SREG/64 */
8168DECL_INLINE_THROW(uint32_t)
8169iemNativeEmitStackPush(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue,
8170 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
8171{
8172 /*
8173 * Assert sanity.
8174 */
8175 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
8176 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
8177#ifdef VBOX_STRICT
8178 if (RT_BYTE2(cBitsVarAndFlat) != 0)
8179 {
8180 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
8181 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
8182 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
8183 Assert( pfnFunction
8184 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
8185 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
8186 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 1, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32SReg
8187 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
8188 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
8189 : UINT64_C(0xc000b000a0009000) ));
8190 }
8191 else
8192 Assert( pfnFunction
8193 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
8194 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
8195 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 1, 0) ? (uintptr_t)iemNativeHlpStackStoreU32SReg
8196 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
8197 : UINT64_C(0xc000b000a0009000) ));
8198#endif
8199
8200#ifdef VBOX_STRICT
8201 /*
8202 * Check that the fExec flags we've got make sense.
8203 */
8204 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
8205#endif
8206
8207 /*
8208 * To keep things simple we have to commit any pending writes first as we
8209 * may end up making calls.
8210 */
8211 /** @todo we could postpone this till we make the call and reload the
8212 * registers after returning from the call. Not sure if that's sensible or
8213 * not, though. */
8214 off = iemNativeRegFlushPendingWrites(pReNative, off);
8215
8216 /*
8217 * First we calculate the new RSP and the effective stack pointer value.
8218 * For 64-bit mode and flat 32-bit these two are the same.
8219 * (Code structure is very similar to that of PUSH)
8220 */
8221 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
8222 bool const fIsSegReg = RT_BYTE3(cBitsVarAndFlat) != 0;
8223 bool const fIsIntelSeg = fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
8224 uint8_t const cbMemAccess = !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
8225 ? cbMem : sizeof(uint16_t);
8226 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
8227 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
8228 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
8229 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
8230 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
8231 if (cBitsFlat != 0)
8232 {
8233 Assert(idxRegEffSp == idxRegRsp);
8234 Assert(cBitsFlat == 32 || cBitsFlat == 64);
8235 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
8236 if (cBitsFlat == 64)
8237 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
8238 else
8239 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
8240 }
8241 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
8242 {
8243 Assert(idxRegEffSp != idxRegRsp);
8244 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
8245 kIemNativeGstRegUse_ReadOnly);
8246#ifdef RT_ARCH_AMD64
8247 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8248#else
8249 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
8250#endif
8251 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
8252 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
8253 offFixupJumpToUseOtherBitSp = off;
8254 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
8255 {
8256 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
8257 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8258 }
8259 else
8260 {
8261 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
8262 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8263 }
8264 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8265 }
8266 /* SpUpdateEnd: */
8267 uint32_t const offLabelSpUpdateEnd = off;
8268
8269 /*
8270 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
8271 * we're skipping lookup).
8272 */
8273 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
8274 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
8275 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
8276 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
8277 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
8278 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
8279 : UINT32_MAX;
8280 uint8_t const idxRegValue = !TlbState.fSkip
8281 && pVarValue->enmKind != kIemNativeVarKind_Immediate
8282 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/,
8283 IEMNATIVE_CALL_ARG2_GREG /*idxRegPref*/)
8284 : UINT8_MAX;
8285 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
8286
8287
8288 if (!TlbState.fSkip)
8289 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
8290 else
8291 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
8292
8293 /*
8294 * Use16BitSp:
8295 */
8296 if (cBitsFlat == 0)
8297 {
8298#ifdef RT_ARCH_AMD64
8299 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8300#else
8301 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
8302#endif
8303 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
8304 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
8305 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8306 else
8307 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8308 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
8309 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8310 }
8311
8312 /*
8313 * TlbMiss:
8314 *
8315 * Call helper to do the pushing.
8316 */
8317 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
8318
8319#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8320 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8321#else
8322 RT_NOREF(idxInstr);
8323#endif
8324
8325 /* Save variables in volatile registers. */
8326 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
8327 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
8328 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
8329 | (idxRegValue < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegValue) : 0);
8330 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
8331
8332 if ( idxRegValue == IEMNATIVE_CALL_ARG1_GREG
8333 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
8334 {
8335 /* Swap them using ARG0 as temp register: */
8336 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
8337 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
8338 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
8339 }
8340 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
8341 {
8342 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue (first!) */
8343 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue,
8344 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
8345
8346 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
8347 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
8348 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
8349 }
8350 else
8351 {
8352 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
8353 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
8354
8355 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue */
8356 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue, 0 /*offAddend*/,
8357 IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG));
8358 }
8359
8360 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
8361 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
8362
8363 /* Done setting up parameters, make the call. */
8364 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
8365
8366 /* Restore variables and guest shadow registers to volatile registers. */
8367 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
8368 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
8369
8370#ifdef IEMNATIVE_WITH_TLB_LOOKUP
8371 if (!TlbState.fSkip)
8372 {
8373 /* end of TlbMiss - Jump to the done label. */
8374 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
8375 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
8376
8377 /*
8378 * TlbLookup:
8379 */
8380 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1,
8381 IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
8382
8383 /*
8384 * Emit code to do the actual storing / fetching.
8385 */
8386 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
8387# ifdef IEM_WITH_TLB_STATISTICS
8388 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
8389 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
8390# endif
8391 if (idxRegValue != UINT8_MAX)
8392 {
8393 switch (cbMemAccess)
8394 {
8395 case 2:
8396 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
8397 break;
8398 case 4:
8399 if (!fIsIntelSeg)
8400 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
8401 else
8402 {
8403 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
8404 PUSH FS in real mode, so we have to try emulate that here.
8405 We borrow the now unused idxReg1 from the TLB lookup code here. */
8406 uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
8407 kIemNativeGstReg_EFlags);
8408 if (idxRegEfl != UINT8_MAX)
8409 {
8410#ifdef ARCH_AMD64
8411 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
8412 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
8413 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
8414#else
8415 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
8416 off, TlbState.idxReg1, idxRegEfl,
8417 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
8418#endif
8419 iemNativeRegFreeTmp(pReNative, idxRegEfl);
8420 }
8421 else
8422 {
8423 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
8424 RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
8425 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
8426 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
8427 }
8428 /* ASSUMES the upper half of idxRegValue is ZERO. */
8429 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegValue);
8430 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
8431 }
8432 break;
8433 case 8:
8434 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
8435 break;
8436 default:
8437 AssertFailed();
8438 }
8439 }
8440 else
8441 {
8442 switch (cbMemAccess)
8443 {
8444 case 2:
8445 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
8446 idxRegMemResult, TlbState.idxReg1);
8447 break;
8448 case 4:
8449 Assert(!fIsSegReg);
8450 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
8451 idxRegMemResult, TlbState.idxReg1);
8452 break;
8453 case 8:
8454 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue, idxRegMemResult, TlbState.idxReg1);
8455 break;
8456 default:
8457 AssertFailed();
8458 }
8459 }
8460
8461 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
8462 TlbState.freeRegsAndReleaseVars(pReNative);
8463
8464 /*
8465 * TlbDone:
8466 *
8467 * Commit the new RSP value.
8468 */
8469 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
8470 }
8471#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
8472
8473#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8474 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
8475#endif
8476 iemNativeRegFreeTmp(pReNative, idxRegRsp);
8477 if (idxRegEffSp != idxRegRsp)
8478 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
8479
8480 /* The value variable is implictly flushed. */
8481 if (idxRegValue != UINT8_MAX)
8482 iemNativeVarRegisterRelease(pReNative, idxVarValue);
8483 iemNativeVarFreeLocal(pReNative, idxVarValue);
8484
8485 return off;
8486}
8487
8488
8489
8490/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, 0, 0) */
8491#define IEM_MC_POP_GREG_U16(a_iGReg) \
8492 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
8493 (uintptr_t)iemNativeHlpStackFetchU16, pCallEntry->idxInstr)
8494#define IEM_MC_POP_GREG_U32(a_iGReg) \
8495 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
8496 (uintptr_t)iemNativeHlpStackFetchU32, pCallEntry->idxInstr)
8497#define IEM_MC_POP_GREG_U64(a_iGReg) \
8498 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
8499 (uintptr_t)iemNativeHlpStackFetchU64, pCallEntry->idxInstr)
8500
8501#define IEM_MC_FLAT32_POP_GREG_U16(a_iGReg) \
8502 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
8503 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
8504#define IEM_MC_FLAT32_POP_GREG_U32(a_iGReg) \
8505 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
8506 (uintptr_t)iemNativeHlpStackFlatFetchU32, pCallEntry->idxInstr)
8507
8508#define IEM_MC_FLAT64_POP_GREG_U16(a_iGReg) \
8509 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
8510 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
8511#define IEM_MC_FLAT64_POP_GREG_U64(a_iGReg) \
8512 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
8513 (uintptr_t)iemNativeHlpStackFlatFetchU64, pCallEntry->idxInstr)
8514
8515
8516DECL_FORCE_INLINE_THROW(uint32_t)
8517iemNativeEmitStackPopUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
8518 uint8_t idxRegTmp)
8519{
8520 /* Use16BitSp: */
8521#ifdef RT_ARCH_AMD64
8522 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
8523 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
8524 RT_NOREF(idxRegTmp);
8525#else
8526 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
8527 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
8528 /* add tmp, regrsp, #cbMem */
8529 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbMem, false /*f64Bit*/);
8530 /* and tmp, tmp, #0xffff */
8531 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
8532 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
8533 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
8534 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
8535#endif
8536 return off;
8537}
8538
8539
8540DECL_FORCE_INLINE(uint32_t)
8541iemNativeEmitStackPopUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
8542{
8543 /* Use32BitSp: */
8544 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
8545 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
8546 return off;
8547}
8548
8549
8550/** IEM_MC[|_FLAT32|_FLAT64]_POP_GREG_U16/32/64 */
8551DECL_INLINE_THROW(uint32_t)
8552iemNativeEmitStackPopGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGReg,
8553 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
8554{
8555 /*
8556 * Assert sanity.
8557 */
8558 Assert(idxGReg < 16);
8559#ifdef VBOX_STRICT
8560 if (RT_BYTE2(cBitsVarAndFlat) != 0)
8561 {
8562 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
8563 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
8564 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
8565 Assert( pfnFunction
8566 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
8567 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU32
8568 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
8569 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU64
8570 : UINT64_C(0xc000b000a0009000) ));
8571 }
8572 else
8573 Assert( pfnFunction
8574 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU16
8575 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU32
8576 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU64
8577 : UINT64_C(0xc000b000a0009000) ));
8578#endif
8579
8580#ifdef VBOX_STRICT
8581 /*
8582 * Check that the fExec flags we've got make sense.
8583 */
8584 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
8585#endif
8586
8587 /*
8588 * To keep things simple we have to commit any pending writes first as we
8589 * may end up making calls.
8590 */
8591 off = iemNativeRegFlushPendingWrites(pReNative, off);
8592
8593 /*
8594 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
8595 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
8596 * directly as the effective stack pointer.
8597 * (Code structure is very similar to that of PUSH)
8598 */
8599 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
8600 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
8601 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
8602 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
8603 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
8604 /** @todo can do a better job picking the register here. For cbMem >= 4 this
8605 * will be the resulting register value. */
8606 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
8607
8608 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
8609 if (cBitsFlat != 0)
8610 {
8611 Assert(idxRegEffSp == idxRegRsp);
8612 Assert(cBitsFlat == 32 || cBitsFlat == 64);
8613 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
8614 }
8615 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
8616 {
8617 Assert(idxRegEffSp != idxRegRsp);
8618 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
8619 kIemNativeGstRegUse_ReadOnly);
8620#ifdef RT_ARCH_AMD64
8621 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8622#else
8623 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
8624#endif
8625 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
8626 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
8627 offFixupJumpToUseOtherBitSp = off;
8628 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
8629 {
8630/** @todo can skip idxRegRsp updating when popping ESP. */
8631 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
8632 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8633 }
8634 else
8635 {
8636 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
8637 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
8638 }
8639 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8640 }
8641 /* SpUpdateEnd: */
8642 uint32_t const offLabelSpUpdateEnd = off;
8643
8644 /*
8645 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
8646 * we're skipping lookup).
8647 */
8648 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
8649 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
8650 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
8651 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
8652 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
8653 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
8654 : UINT32_MAX;
8655
8656 if (!TlbState.fSkip)
8657 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
8658 else
8659 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
8660
8661 /*
8662 * Use16BitSp:
8663 */
8664 if (cBitsFlat == 0)
8665 {
8666#ifdef RT_ARCH_AMD64
8667 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8668#else
8669 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
8670#endif
8671 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
8672 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
8673 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
8674 else
8675 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8676 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
8677 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8678 }
8679
8680 /*
8681 * TlbMiss:
8682 *
8683 * Call helper to do the pushing.
8684 */
8685 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
8686
8687#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8688 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8689#else
8690 RT_NOREF(idxInstr);
8691#endif
8692
8693 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
8694 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
8695 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
8696 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
8697
8698
8699 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
8700 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
8701 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
8702
8703 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
8704 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
8705
8706 /* Done setting up parameters, make the call. */
8707 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
8708
8709 /* Move the return register content to idxRegMemResult. */
8710 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
8711 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
8712
8713 /* Restore variables and guest shadow registers to volatile registers. */
8714 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
8715 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
8716
8717#ifdef IEMNATIVE_WITH_TLB_LOOKUP
8718 if (!TlbState.fSkip)
8719 {
8720 /* end of TlbMiss - Jump to the done label. */
8721 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
8722 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
8723
8724 /*
8725 * TlbLookup:
8726 */
8727 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
8728 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
8729
8730 /*
8731 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
8732 */
8733 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8734# ifdef IEM_WITH_TLB_STATISTICS
8735 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
8736 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
8737# endif
8738 switch (cbMem)
8739 {
8740 case 2:
8741 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
8742 break;
8743 case 4:
8744 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
8745 break;
8746 case 8:
8747 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
8748 break;
8749 default:
8750 AssertFailed();
8751 }
8752
8753 TlbState.freeRegsAndReleaseVars(pReNative);
8754
8755 /*
8756 * TlbDone:
8757 *
8758 * Set the new RSP value (FLAT accesses needs to calculate it first) and
8759 * commit the popped register value.
8760 */
8761 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
8762 }
8763#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
8764
8765 if (idxGReg != X86_GREG_xSP)
8766 {
8767 /* Set the register. */
8768 if (cbMem >= sizeof(uint32_t))
8769 {
8770#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
8771 AssertMsg( pReNative->idxCurCall == 0
8772 || IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))),
8773 ("%s - %u\n", g_aGstShadowInfo[idxGReg].pszName,
8774 iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))));
8775#endif
8776 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, IEMNATIVEGSTREG_GPR(idxGReg), off);
8777#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8778 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(idxGReg);
8779#endif
8780#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8781 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult,
8782 RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
8783#endif
8784 }
8785 else
8786 {
8787 Assert(cbMem == sizeof(uint16_t));
8788 uint8_t const idxRegDst = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGReg),
8789 kIemNativeGstRegUse_ForUpdate);
8790 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegDst, idxRegMemResult);
8791#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8792 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegDst, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
8793#endif
8794 iemNativeRegFreeTmp(pReNative, idxRegDst);
8795 }
8796
8797 /* Complete RSP calculation for FLAT mode. */
8798 if (idxRegEffSp == idxRegRsp)
8799 {
8800 if (cBitsFlat == 64)
8801 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, cbMem);
8802 else
8803 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, cbMem);
8804 }
8805 }
8806 else
8807 {
8808 /* We're popping RSP, ESP or SP. Only the is a bit extra work, of course. */
8809 if (cbMem == sizeof(uint64_t))
8810 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRsp, idxRegMemResult);
8811 else if (cbMem == sizeof(uint32_t))
8812 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRsp, idxRegMemResult);
8813 else
8814 {
8815 if (idxRegEffSp == idxRegRsp)
8816 {
8817 if (cBitsFlat == 64)
8818 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, cbMem);
8819 else
8820 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, cbMem);
8821 }
8822 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegRsp, idxRegMemResult);
8823 }
8824 }
8825
8826#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8827 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
8828#endif
8829
8830 iemNativeRegFreeTmp(pReNative, idxRegRsp);
8831 if (idxRegEffSp != idxRegRsp)
8832 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
8833 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
8834
8835 return off;
8836}
8837
8838
8839
8840/*********************************************************************************************************************************
8841* Memory mapping (IEM_MEM_MAP_XXX, IEM_MEM_FLAT_MAP_XXX). *
8842*********************************************************************************************************************************/
8843
8844#define IEM_MC_MEM_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8845 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
8846 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMaskAndCtl*/, \
8847 (uintptr_t)iemNativeHlpMemMapDataU8Atomic, pCallEntry->idxInstr)
8848
8849#define IEM_MC_MEM_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8850 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
8851 IEM_ACCESS_DATA_RW, 0 /*fAlignMaskAndCtl*/, \
8852 (uintptr_t)iemNativeHlpMemMapDataU8Rw, pCallEntry->idxInstr)
8853
8854#define IEM_MC_MEM_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8855 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
8856 IEM_ACCESS_DATA_W, 0 /*fAlignMaskAndCtl*/, \
8857 (uintptr_t)iemNativeHlpMemMapDataU8Wo, pCallEntry->idxInstr) \
8858
8859#define IEM_MC_MEM_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8860 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
8861 IEM_ACCESS_DATA_R, 0 /*fAlignMaskAndCtl*/, \
8862 (uintptr_t)iemNativeHlpMemMapDataU8Ro, pCallEntry->idxInstr)
8863
8864
8865#define IEM_MC_MEM_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8866 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
8867 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8868 (uintptr_t)iemNativeHlpMemMapDataU16Atomic, pCallEntry->idxInstr)
8869
8870#define IEM_MC_MEM_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8871 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
8872 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8873 (uintptr_t)iemNativeHlpMemMapDataU16Rw, pCallEntry->idxInstr)
8874
8875#define IEM_MC_MEM_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8876 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
8877 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8878 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
8879
8880#define IEM_MC_MEM_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8881 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
8882 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8883 (uintptr_t)iemNativeHlpMemMapDataU16Ro, pCallEntry->idxInstr)
8884
8885#define IEM_MC_MEM_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8886 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int16_t), \
8887 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8888 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
8889
8890
8891#define IEM_MC_MEM_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8892 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
8893 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8894 (uintptr_t)iemNativeHlpMemMapDataU32Atomic, pCallEntry->idxInstr)
8895
8896#define IEM_MC_MEM_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8897 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
8898 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8899 (uintptr_t)iemNativeHlpMemMapDataU32Rw, pCallEntry->idxInstr)
8900
8901#define IEM_MC_MEM_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8902 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
8903 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8904 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
8905
8906#define IEM_MC_MEM_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8907 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
8908 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8909 (uintptr_t)iemNativeHlpMemMapDataU32Ro, pCallEntry->idxInstr)
8910
8911#define IEM_MC_MEM_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8912 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int32_t), \
8913 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8914 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
8915
8916
8917#define IEM_MC_MEM_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8918 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
8919 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8920 (uintptr_t)iemNativeHlpMemMapDataU64Atomic, pCallEntry->idxInstr)
8921
8922#define IEM_MC_MEM_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8923 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
8924 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8925 (uintptr_t)iemNativeHlpMemMapDataU64Rw, pCallEntry->idxInstr)
8926#define IEM_MC_MEM_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8927 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
8928 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8929 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
8930
8931#define IEM_MC_MEM_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8932 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
8933 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8934 (uintptr_t)iemNativeHlpMemMapDataU64Ro, pCallEntry->idxInstr)
8935
8936#define IEM_MC_MEM_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8937 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int64_t), \
8938 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8939 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
8940
8941
8942#define IEM_MC_MEM_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8943 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
8944 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8945 (uintptr_t)iemNativeHlpMemMapDataR80Wo, pCallEntry->idxInstr) \
8946
8947#define IEM_MC_MEM_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8948 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
8949 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, /** @todo check BCD align */ \
8950 (uintptr_t)iemNativeHlpMemMapDataD80Wo, pCallEntry->idxInstr) \
8951
8952
8953#define IEM_MC_MEM_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8954 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
8955 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8956 (uintptr_t)iemNativeHlpMemMapDataU128Atomic, pCallEntry->idxInstr)
8957
8958#define IEM_MC_MEM_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8959 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
8960 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8961 (uintptr_t)iemNativeHlpMemMapDataU128Rw, pCallEntry->idxInstr)
8962
8963#define IEM_MC_MEM_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8964 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
8965 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8966 (uintptr_t)iemNativeHlpMemMapDataU128Wo, pCallEntry->idxInstr) \
8967
8968#define IEM_MC_MEM_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8969 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
8970 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8971 (uintptr_t)iemNativeHlpMemMapDataU128Ro, pCallEntry->idxInstr)
8972
8973
8974
8975#define IEM_MC_MEM_FLAT_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
8976 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
8977 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMaskAndCtl*/, \
8978 (uintptr_t)iemNativeHlpMemFlatMapDataU8Atomic, pCallEntry->idxInstr)
8979
8980#define IEM_MC_MEM_FLAT_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
8981 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
8982 IEM_ACCESS_DATA_RW, 0 /*fAlignMaskAndCtl*/, \
8983 (uintptr_t)iemNativeHlpMemFlatMapDataU8Rw, pCallEntry->idxInstr)
8984
8985#define IEM_MC_MEM_FLAT_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
8986 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
8987 IEM_ACCESS_DATA_W, 0 /*fAlignMaskAndCtl*/, \
8988 (uintptr_t)iemNativeHlpMemFlatMapDataU8Wo, pCallEntry->idxInstr) \
8989
8990#define IEM_MC_MEM_FLAT_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
8991 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
8992 IEM_ACCESS_DATA_R, 0 /*fAlignMaskAndCtl*/, \
8993 (uintptr_t)iemNativeHlpMemFlatMapDataU8Ro, pCallEntry->idxInstr)
8994
8995
8996#define IEM_MC_MEM_FLAT_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
8997 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
8998 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8999 (uintptr_t)iemNativeHlpMemFlatMapDataU16Atomic, pCallEntry->idxInstr)
9000
9001#define IEM_MC_MEM_FLAT_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
9002 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
9003 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
9004 (uintptr_t)iemNativeHlpMemFlatMapDataU16Rw, pCallEntry->idxInstr)
9005
9006#define IEM_MC_MEM_FLAT_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
9007 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
9008 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
9009 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
9010
9011#define IEM_MC_MEM_FLAT_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
9012 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
9013 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
9014 (uintptr_t)iemNativeHlpMemFlatMapDataU16Ro, pCallEntry->idxInstr)
9015
9016#define IEM_MC_MEM_FLAT_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_GCPtrMem) \
9017 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int16_t), \
9018 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
9019 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
9020
9021
9022#define IEM_MC_MEM_FLAT_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
9023 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
9024 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
9025 (uintptr_t)iemNativeHlpMemFlatMapDataU32Atomic, pCallEntry->idxInstr)
9026
9027#define IEM_MC_MEM_FLAT_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
9028 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
9029 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
9030 (uintptr_t)iemNativeHlpMemFlatMapDataU32Rw, pCallEntry->idxInstr)
9031
9032#define IEM_MC_MEM_FLAT_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
9033 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
9034 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
9035 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
9036
9037#define IEM_MC_MEM_FLAT_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
9038 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
9039 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
9040 (uintptr_t)iemNativeHlpMemFlatMapDataU32Ro, pCallEntry->idxInstr)
9041
9042#define IEM_MC_MEM_FLAT_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_GCPtrMem) \
9043 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int32_t), \
9044 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
9045 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
9046
9047
9048#define IEM_MC_MEM_FLAT_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
9049 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
9050 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
9051 (uintptr_t)iemNativeHlpMemFlatMapDataU64Atomic, pCallEntry->idxInstr)
9052
9053#define IEM_MC_MEM_FLAT_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
9054 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
9055 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
9056 (uintptr_t)iemNativeHlpMemFlatMapDataU64Rw, pCallEntry->idxInstr)
9057
9058#define IEM_MC_MEM_FLAT_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
9059 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
9060 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
9061 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
9062
9063#define IEM_MC_MEM_FLAT_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
9064 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
9065 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
9066 (uintptr_t)iemNativeHlpMemFlatMapDataU64Ro, pCallEntry->idxInstr)
9067
9068#define IEM_MC_MEM_FLAT_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_GCPtrMem) \
9069 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int64_t), \
9070 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
9071 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
9072
9073
9074#define IEM_MC_MEM_FLAT_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_GCPtrMem) \
9075 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
9076 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
9077 (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo, pCallEntry->idxInstr) \
9078
9079#define IEM_MC_MEM_FLAT_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_GCPtrMem) \
9080 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
9081 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, /** @todo check BCD align */ \
9082 (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo, pCallEntry->idxInstr) \
9083
9084
9085#define IEM_MC_MEM_FLAT_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
9086 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
9087 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
9088 (uintptr_t)iemNativeHlpMemFlatMapDataU128Atomic, pCallEntry->idxInstr)
9089
9090#define IEM_MC_MEM_FLAT_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
9091 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
9092 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
9093 (uintptr_t)iemNativeHlpMemFlatMapDataU128Rw, pCallEntry->idxInstr)
9094
9095#define IEM_MC_MEM_FLAT_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
9096 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
9097 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
9098 (uintptr_t)iemNativeHlpMemFlatMapDataU128Wo, pCallEntry->idxInstr) \
9099
9100#define IEM_MC_MEM_FLAT_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
9101 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
9102 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
9103 (uintptr_t)iemNativeHlpMemFlatMapDataU128Ro, pCallEntry->idxInstr)
9104
9105
9106DECL_INLINE_THROW(uint32_t)
9107iemNativeEmitMemMapCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarMem, uint8_t idxVarUnmapInfo,
9108 uint8_t iSegReg, uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAccess, uint32_t fAlignMaskAndCtl,
9109 uintptr_t pfnFunction, uint8_t idxInstr)
9110{
9111 /*
9112 * Assert sanity.
9113 */
9114 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarMem);
9115 PIEMNATIVEVAR const pVarMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarMem)];
9116 AssertStmt( pVarMem->enmKind == kIemNativeVarKind_Invalid
9117 && pVarMem->cbVar == sizeof(void *),
9118 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9119
9120 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
9121 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
9122 AssertStmt( pVarUnmapInfo->enmKind == kIemNativeVarKind_Invalid
9123 && pVarUnmapInfo->cbVar == sizeof(uint8_t),
9124 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9125
9126 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
9127 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
9128 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
9129 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
9130 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9131
9132 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
9133
9134 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
9135
9136#ifdef VBOX_STRICT
9137# define IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) \
9138 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ) \
9139 ? (uintptr_t)RT_CONCAT(a_fnBase,Rw) \
9140 : ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == IEM_ACCESS_TYPE_READ \
9141 ? (uintptr_t)RT_CONCAT(a_fnBase,Ro) : (uintptr_t)RT_CONCAT(a_fnBase,Wo) )
9142# define IEM_MAP_HLP_FN(a_fAccess, a_fnBase) \
9143 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ | IEM_ACCESS_ATOMIC) \
9144 ? (uintptr_t)RT_CONCAT(a_fnBase,Atomic) \
9145 : IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) )
9146
9147 if (iSegReg == UINT8_MAX)
9148 {
9149 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
9150 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
9151 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
9152 switch (cbMem)
9153 {
9154 case 1:
9155 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU8));
9156 Assert(!fAlignMaskAndCtl);
9157 break;
9158 case 2:
9159 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU16));
9160 Assert(fAlignMaskAndCtl < 2);
9161 break;
9162 case 4:
9163 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU32));
9164 Assert(fAlignMaskAndCtl < 4);
9165 break;
9166 case 8:
9167 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU64));
9168 Assert(fAlignMaskAndCtl < 8);
9169 break;
9170 case 10:
9171 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo
9172 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo);
9173 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
9174 Assert(fAlignMaskAndCtl < 8);
9175 break;
9176 case 16:
9177 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU128));
9178 Assert(fAlignMaskAndCtl < 16);
9179 break;
9180# if 0
9181 case 32:
9182 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU256));
9183 Assert(fAlignMaskAndCtl < 32);
9184 break;
9185 case 64:
9186 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU512));
9187 Assert(fAlignMaskAndCtl < 64);
9188 break;
9189# endif
9190 default: AssertFailed(); break;
9191 }
9192 }
9193 else
9194 {
9195 Assert(iSegReg < 6);
9196 switch (cbMem)
9197 {
9198 case 1:
9199 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU8));
9200 Assert(!fAlignMaskAndCtl);
9201 break;
9202 case 2:
9203 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU16));
9204 Assert(fAlignMaskAndCtl < 2);
9205 break;
9206 case 4:
9207 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU32));
9208 Assert(fAlignMaskAndCtl < 4);
9209 break;
9210 case 8:
9211 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU64));
9212 Assert(fAlignMaskAndCtl < 8);
9213 break;
9214 case 10:
9215 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemMapDataR80Wo
9216 || pfnFunction == (uintptr_t)iemNativeHlpMemMapDataD80Wo);
9217 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
9218 Assert(fAlignMaskAndCtl < 8);
9219 break;
9220 case 16:
9221 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU128));
9222 Assert(fAlignMaskAndCtl < 16);
9223 break;
9224# if 0
9225 case 32:
9226 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU256));
9227 Assert(fAlignMaskAndCtl < 32);
9228 break;
9229 case 64:
9230 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU512));
9231 Assert(fAlignMaskAndCtl < 64);
9232 break;
9233# endif
9234 default: AssertFailed(); break;
9235 }
9236 }
9237# undef IEM_MAP_HLP_FN
9238# undef IEM_MAP_HLP_FN_NO_AT
9239#endif
9240
9241#ifdef VBOX_STRICT
9242 /*
9243 * Check that the fExec flags we've got make sense.
9244 */
9245 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
9246#endif
9247
9248 /*
9249 * To keep things simple we have to commit any pending writes first as we
9250 * may end up making calls.
9251 */
9252 off = iemNativeRegFlushPendingWrites(pReNative, off);
9253
9254#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
9255 /*
9256 * Move/spill/flush stuff out of call-volatile registers.
9257 * This is the easy way out. We could contain this to the tlb-miss branch
9258 * by saving and restoring active stuff here.
9259 */
9260 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
9261 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
9262#endif
9263
9264 /* The bUnmapInfo variable will get a register in the tlb-hit code path,
9265 while the tlb-miss codepath will temporarily put it on the stack.
9266 Set the the type to stack here so we don't need to do it twice below. */
9267 iemNativeVarSetKindToStack(pReNative, idxVarUnmapInfo);
9268 uint8_t const idxRegUnmapInfo = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off);
9269 /** @todo use a tmp register from TlbState, since they'll be free after tlb
9270 * lookup is done. */
9271
9272 /*
9273 * Define labels and allocate the result register (trying for the return
9274 * register if we can).
9275 */
9276 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
9277 uint8_t const idxRegMemResult = !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
9278 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarMem, IEMNATIVE_CALL_RET_GREG, &off)
9279 : iemNativeVarRegisterAcquire(pReNative, idxVarMem, &off);
9280 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem);
9281 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
9282 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
9283 : UINT32_MAX;
9284
9285 /*
9286 * Jump to the TLB lookup code.
9287 */
9288 if (!TlbState.fSkip)
9289 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
9290
9291 /*
9292 * TlbMiss:
9293 *
9294 * Call helper to do the fetching.
9295 * We flush all guest register shadow copies here.
9296 */
9297 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
9298
9299#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
9300 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
9301#else
9302 RT_NOREF(idxInstr);
9303#endif
9304
9305#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
9306 /* Save variables in volatile registers. */
9307 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave() | RT_BIT_32(idxRegMemResult) | RT_BIT_32(idxRegUnmapInfo);
9308 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
9309#endif
9310
9311 /* IEMNATIVE_CALL_ARG2_GREG = GCPtrMem - load first as it is from a variable. */
9312 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarGCPtrMem, 0 /*cbAppend*/,
9313#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
9314 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
9315#else
9316 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
9317#endif
9318
9319 /* IEMNATIVE_CALL_ARG3_GREG = iSegReg */
9320 if (iSegReg != UINT8_MAX)
9321 {
9322 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
9323 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, iSegReg);
9324 }
9325
9326 /* IEMNATIVE_CALL_ARG1_GREG = &idxVarUnmapInfo; stackslot address, load any register with result after the call. */
9327 int32_t const offBpDispVarUnmapInfo = iemNativeStackCalcBpDisp(iemNativeVarGetStackSlot(pReNative, idxVarUnmapInfo));
9328 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, offBpDispVarUnmapInfo);
9329
9330 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
9331 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9332
9333 /* Done setting up parameters, make the call. */
9334 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
9335
9336 /*
9337 * Put the output in the right registers.
9338 */
9339 Assert(idxRegMemResult == pVarMem->idxReg);
9340 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
9341 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
9342
9343#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
9344 /* Restore variables and guest shadow registers to volatile registers. */
9345 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
9346 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
9347#endif
9348
9349 Assert(pVarUnmapInfo->idxReg == idxRegUnmapInfo);
9350 off = iemNativeEmitLoadGprByBpU8(pReNative, off, idxRegUnmapInfo, offBpDispVarUnmapInfo);
9351
9352#ifdef IEMNATIVE_WITH_TLB_LOOKUP
9353 if (!TlbState.fSkip)
9354 {
9355 /* end of tlbsmiss - Jump to the done label. */
9356 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
9357 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
9358
9359 /*
9360 * TlbLookup:
9361 */
9362 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMaskAndCtl, fAccess,
9363 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
9364# ifdef IEM_WITH_TLB_STATISTICS
9365 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, TlbState.idxReg1, TlbState.idxReg2,
9366 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForMapped));
9367# endif
9368
9369 /* [idxVarUnmapInfo] = 0; */
9370 off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegUnmapInfo, 0);
9371
9372 /*
9373 * TlbDone:
9374 */
9375 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
9376
9377 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
9378
9379# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
9380 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
9381 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
9382# endif
9383 }
9384#else
9385 RT_NOREF(fAccess, fAlignMaskAndCtl, idxLabelTlbMiss);
9386#endif
9387
9388 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
9389 iemNativeVarRegisterRelease(pReNative, idxVarMem);
9390
9391 return off;
9392}
9393
9394
9395#define IEM_MC_MEM_COMMIT_AND_UNMAP_ATOMIC(a_bMapInfo) \
9396 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_ATOMIC, \
9397 (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic, pCallEntry->idxInstr)
9398
9399#define IEM_MC_MEM_COMMIT_AND_UNMAP_RW(a_bMapInfo) \
9400 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_RW, \
9401 (uintptr_t)iemNativeHlpMemCommitAndUnmapRw, pCallEntry->idxInstr)
9402
9403#define IEM_MC_MEM_COMMIT_AND_UNMAP_WO(a_bMapInfo) \
9404 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_W, \
9405 (uintptr_t)iemNativeHlpMemCommitAndUnmapWo, pCallEntry->idxInstr)
9406
9407#define IEM_MC_MEM_COMMIT_AND_UNMAP_RO(a_bMapInfo) \
9408 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_R, \
9409 (uintptr_t)iemNativeHlpMemCommitAndUnmapRo, pCallEntry->idxInstr)
9410
9411DECL_INLINE_THROW(uint32_t)
9412iemNativeEmitMemCommitAndUnmap(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarUnmapInfo,
9413 uint32_t fAccess, uintptr_t pfnFunction, uint8_t idxInstr)
9414{
9415 /*
9416 * Assert sanity.
9417 */
9418 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
9419#if defined(VBOX_STRICT) || defined(RT_ARCH_AMD64)
9420 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
9421#endif
9422 Assert(pVarUnmapInfo->enmKind == kIemNativeVarKind_Stack);
9423 Assert( pVarUnmapInfo->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
9424 || pVarUnmapInfo->idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS); /* must be initialized */
9425#ifdef VBOX_STRICT
9426 switch (fAccess & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC))
9427 {
9428 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_ATOMIC:
9429 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic); break;
9430 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE:
9431 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRw); break;
9432 case IEM_ACCESS_TYPE_WRITE:
9433 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapWo); break;
9434 case IEM_ACCESS_TYPE_READ:
9435 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRo); break;
9436 default: AssertFailed();
9437 }
9438#else
9439 RT_NOREF(fAccess);
9440#endif
9441
9442 /*
9443 * To keep things simple we have to commit any pending writes first as we
9444 * may end up making calls (there shouldn't be any at this point, so this
9445 * is just for consistency).
9446 */
9447 /** @todo we could postpone this till we make the call and reload the
9448 * registers after returning from the call. Not sure if that's sensible or
9449 * not, though. */
9450 off = iemNativeRegFlushPendingWrites(pReNative, off);
9451
9452 /*
9453 * Move/spill/flush stuff out of call-volatile registers.
9454 *
9455 * We exclude any register holding the bUnmapInfo variable, as we'll be
9456 * checking it after returning from the call and will free it afterwards.
9457 */
9458 /** @todo save+restore active registers and maybe guest shadows in miss
9459 * scenario. */
9460 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */,
9461 RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)));
9462
9463 /*
9464 * If idxVarUnmapInfo is zero, we can skip all this. Otherwise we'll have
9465 * to call the unmap helper function.
9466 *
9467 * The likelyhood of it being zero is higher than for the TLB hit when doing
9468 * the mapping, as a TLB miss for an well aligned and unproblematic memory
9469 * access should also end up with a mapping that won't need special unmapping.
9470 */
9471 /** @todo Go over iemMemMapJmp and implement the no-unmap-needed case! That
9472 * should speed up things for the pure interpreter as well when TLBs
9473 * are enabled. */
9474#ifdef RT_ARCH_AMD64
9475 if (pVarUnmapInfo->idxReg == UINT8_MAX)
9476 {
9477 /* test byte [rbp - xxx], 0ffh */
9478 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
9479 pbCodeBuf[off++] = 0xf6;
9480 uint8_t const idxStackSlot = pVarUnmapInfo->idxStackSlot;
9481 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, 0, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
9482 pbCodeBuf[off++] = 0xff;
9483 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9484 }
9485 else
9486#endif
9487 {
9488 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off,
9489 true /*fInitialized*/, IEMNATIVE_CALL_ARG1_GREG /*idxRegPref*/);
9490 off = iemNativeEmitTestAnyBitsInGpr8(pReNative, off, idxVarReg, 0xff);
9491 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
9492 }
9493 uint32_t const offJmpFixup = off;
9494 off = iemNativeEmitJzToFixed(pReNative, off, off /* ASSUME jz rel8 suffices*/);
9495
9496 /*
9497 * Call the unmap helper function.
9498 */
9499#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING /** @todo This should be unnecessary, the mapping call will already have set it! */
9500 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
9501#else
9502 RT_NOREF(idxInstr);
9503#endif
9504
9505 /* IEMNATIVE_CALL_ARG1_GREG = idxVarUnmapInfo (first!) */
9506 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo,
9507 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
9508
9509 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
9510 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9511
9512 /* Done setting up parameters, make the call. */
9513 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
9514
9515 /* The bUnmapInfo variable is implictly free by these MCs. */
9516 iemNativeVarFreeLocal(pReNative, idxVarUnmapInfo);
9517
9518 /*
9519 * Done, just fixup the jump for the non-call case.
9520 */
9521 iemNativeFixupFixedJump(pReNative, offJmpFixup, off);
9522
9523 return off;
9524}
9525
9526
9527
9528/*********************************************************************************************************************************
9529* State and Exceptions *
9530*********************************************************************************************************************************/
9531
9532#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9533#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
9534
9535#define IEM_MC_PREPARE_SSE_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9536#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9537#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
9538
9539#define IEM_MC_PREPARE_AVX_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9540#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9541#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
9542
9543
9544DECL_INLINE_THROW(uint32_t) iemNativeEmitPrepareFpuForUse(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fForChange)
9545{
9546#ifndef IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS
9547 RT_NOREF(pReNative, fForChange);
9548#else
9549 if ( !(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SYNCED)
9550 && fForChange)
9551 {
9552# ifdef RT_ARCH_AMD64
9553
9554 /* Need to save the host MXCSR the first time, and clear the exception flags. */
9555 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED))
9556 {
9557 PIEMNATIVEINSTR pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
9558
9559 /* stmxcsr */
9560 if (IEMNATIVE_REG_FIXED_PVMCPU >= 8)
9561 pbCodeBuf[off++] = X86_OP_REX_B;
9562 pbCodeBuf[off++] = 0x0f;
9563 pbCodeBuf[off++] = 0xae;
9564 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, 3, IEMNATIVE_REG_FIXED_PVMCPU & 7);
9565 pbCodeBuf[off++] = RT_BYTE1(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9566 pbCodeBuf[off++] = RT_BYTE2(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9567 pbCodeBuf[off++] = RT_BYTE3(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9568 pbCodeBuf[off++] = RT_BYTE4(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9569 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9570
9571 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED;
9572 }
9573
9574 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
9575 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr, kIemNativeGstRegUse_ReadOnly);
9576
9577 /*
9578 * Mask any exceptions and clear the exception status and save into MXCSR,
9579 * taking a detour through memory here because ldmxcsr/stmxcsr don't support
9580 * a register source/target (sigh).
9581 */
9582 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, idxRegMxCsr);
9583 off = iemNativeEmitOrGpr32ByImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, X86_MXCSR_XCPT_MASK);
9584 off = iemNativeEmitAndGpr32ByImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, ~X86_MXCSR_XCPT_FLAGS);
9585 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9586
9587 PIEMNATIVEINSTR pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
9588
9589 /* ldmxcsr */
9590 if (IEMNATIVE_REG_FIXED_PVMCPU >= 8)
9591 pbCodeBuf[off++] = X86_OP_REX_B;
9592 pbCodeBuf[off++] = 0x0f;
9593 pbCodeBuf[off++] = 0xae;
9594 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, 2, IEMNATIVE_REG_FIXED_PVMCPU & 7);
9595 pbCodeBuf[off++] = RT_BYTE1(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9596 pbCodeBuf[off++] = RT_BYTE2(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9597 pbCodeBuf[off++] = RT_BYTE3(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9598 pbCodeBuf[off++] = RT_BYTE4(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9599 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9600
9601 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
9602 iemNativeRegFreeTmp(pReNative, idxRegTmp);
9603
9604# elif defined(RT_ARCH_ARM64)
9605 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
9606
9607 /* Need to save the host floating point control register the first time, clear FPSR. */
9608 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED))
9609 {
9610 PIEMNATIVEINSTR pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
9611 pu32CodeBuf[off++] = Armv8A64MkInstrMsr(ARMV8_A64_REG_XZR, ARMV8_AARCH64_SYSREG_FPSR);
9612 pu32CodeBuf[off++] = Armv8A64MkInstrMrs(idxRegTmp, ARMV8_AARCH64_SYSREG_FPCR);
9613 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9614 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED;
9615 }
9616
9617 /*
9618 * Translate MXCSR to FPCR.
9619 *
9620 * Unfortunately we can't emulate the exact behavior of MXCSR as we can't take
9621 * FEAT_AFP on arm64 for granted (My M2 Macbook doesn't has it). So we can't map
9622 * MXCSR.DAZ to FPCR.FIZ and MXCSR.FZ to FPCR.FZ with FPCR.AH being set.
9623 * We can only use FPCR.FZ which will flush inputs _and_ output de-normals to zero.
9624 */
9625 /** @todo Check the host supported flags (needs additional work to get the host features from CPUM)
9626 * and implement alternate handling if FEAT_AFP is present. */
9627 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr, kIemNativeGstRegUse_ReadOnly);
9628
9629 PIEMNATIVEINSTR pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
9630
9631 /* First make sure that there is nothing set for the upper 16-bits (X86_MXCSR_MM, which we don't emulate right now). */
9632 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegTmp, idxRegMxCsr);
9633
9634 /* If either MXCSR.FZ or MXCSR.DAZ is set FPCR.FZ will be set. */
9635 pu32CodeBuf[off++] = Armv8A64MkInstrUbfx(IEMNATIVE_REG_FIXED_TMP0, idxRegTmp, X86_MXCSR_DAZ_BIT, 1);
9636 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegTmp, idxRegTmp, X86_MXCSR_FZ_BIT);
9637 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(idxRegTmp, idxRegTmp, IEMNATIVE_REG_FIXED_TMP0);
9638 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegTmp, idxRegTmp, ARMV8_FPCR_FZ_BIT);
9639
9640 /*
9641 * Init the rounding mode, the layout differs between MXCSR.RM[14:13] and FPCR.RMode[23:22]:
9642 *
9643 * Value MXCSR FPCR
9644 * 0 RN RN
9645 * 1 R- R+
9646 * 2 R+ R-
9647 * 3 RZ RZ
9648 *
9649 * Conversion can be achieved by switching bit positions
9650 */
9651 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxRegMxCsr, X86_MXCSR_RC_SHIFT);
9652 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxRegTmp, IEMNATIVE_REG_FIXED_TMP0, 14, 1);
9653 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxRegMxCsr, X86_MXCSR_RC_SHIFT + 1);
9654 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxRegTmp, IEMNATIVE_REG_FIXED_TMP0, 13, 1);
9655
9656 /* Write the value to FPCR. */
9657 pu32CodeBuf[off++] = Armv8A64MkInstrMsr(idxRegTmp, ARMV8_AARCH64_SYSREG_FPCR);
9658
9659 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9660 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
9661 iemNativeRegFreeTmp(pReNative, idxRegTmp);
9662# else
9663# error "Port me"
9664# endif
9665 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SYNCED;
9666 }
9667#endif
9668 return off;
9669}
9670
9671
9672
9673/*********************************************************************************************************************************
9674* Emitters for FPU related operations. *
9675*********************************************************************************************************************************/
9676
9677#define IEM_MC_FETCH_FCW(a_u16Fcw) \
9678 off = iemNativeEmitFetchFpuFcw(pReNative, off, a_u16Fcw)
9679
9680/** Emits code for IEM_MC_FETCH_FCW. */
9681DECL_INLINE_THROW(uint32_t)
9682iemNativeEmitFetchFpuFcw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
9683{
9684 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9685 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
9686
9687 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9688
9689 /* Allocate a temporary FCW register. */
9690 /** @todo eliminate extra register */
9691 uint8_t const idxFcwReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFcw,
9692 kIemNativeGstRegUse_ReadOnly);
9693
9694 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFcwReg);
9695
9696 /* Free but don't flush the FCW register. */
9697 iemNativeRegFreeTmp(pReNative, idxFcwReg);
9698 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9699
9700 return off;
9701}
9702
9703
9704#define IEM_MC_FETCH_FSW(a_u16Fsw) \
9705 off = iemNativeEmitFetchFpuFsw(pReNative, off, a_u16Fsw)
9706
9707/** Emits code for IEM_MC_FETCH_FSW. */
9708DECL_INLINE_THROW(uint32_t)
9709iemNativeEmitFetchFpuFsw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
9710{
9711 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9712 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
9713
9714 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, false /*fInitialized*/);
9715 /* Allocate a temporary FSW register. */
9716 /** @todo eliminate extra register */
9717 uint8_t const idxFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw,
9718 kIemNativeGstRegUse_ReadOnly);
9719
9720 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFswReg);
9721
9722 /* Free but don't flush the FSW register. */
9723 iemNativeRegFreeTmp(pReNative, idxFswReg);
9724 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9725
9726 return off;
9727}
9728
9729
9730
9731#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
9732
9733
9734/*********************************************************************************************************************************
9735* Emitters for SSE/AVX specific operations. *
9736*********************************************************************************************************************************/
9737
9738#define IEM_MC_COPY_XREG_U128(a_iXRegDst, a_iXRegSrc) \
9739 off = iemNativeEmitSimdCopyXregU128(pReNative, off, a_iXRegDst, a_iXRegSrc)
9740
9741/** Emits code for IEM_MC_COPY_XREG_U128. */
9742DECL_INLINE_THROW(uint32_t)
9743iemNativeEmitSimdCopyXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXRegDst, uint8_t iXRegSrc)
9744{
9745 /* This is a nop if the source and destination register are the same. */
9746 if (iXRegDst != iXRegSrc)
9747 {
9748 /* Allocate destination and source register. */
9749 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegDst),
9750 kIemNativeGstSimdRegLdStSz_Low128,
9751 kIemNativeGstRegUse_ForFullWrite);
9752 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegSrc),
9753 kIemNativeGstSimdRegLdStSz_Low128,
9754 kIemNativeGstRegUse_ReadOnly);
9755
9756 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
9757
9758 /* Free but don't flush the source and destination register. */
9759 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9760 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9761 }
9762
9763 return off;
9764}
9765
9766
9767#define IEM_MC_FETCH_XREG_U128(a_u128Value, a_iXReg) \
9768 off = iemNativeEmitSimdFetchXregU128(pReNative, off, a_u128Value, a_iXReg)
9769
9770/** Emits code for IEM_MC_FETCH_XREG_U128. */
9771DECL_INLINE_THROW(uint32_t)
9772iemNativeEmitSimdFetchXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg)
9773{
9774 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9775 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
9776
9777 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9778 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
9779
9780 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
9781
9782 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxVarReg, idxSimdRegSrc);
9783
9784 /* Free but don't flush the source register. */
9785 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9786 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
9787
9788 return off;
9789}
9790
9791
9792#define IEM_MC_FETCH_XREG_U64(a_u64Value, a_iXReg, a_iQWord) \
9793 off = iemNativeEmitSimdFetchXregU64(pReNative, off, a_u64Value, a_iXReg, a_iQWord)
9794
9795#define IEM_MC_FETCH_XREG_R64(a_r64Value, a_iXReg, a_iQWord) \
9796 off = iemNativeEmitSimdFetchXregU64(pReNative, off, a_r64Value, a_iXReg, a_iQWord)
9797
9798/** Emits code for IEM_MC_FETCH_XREG_U64. */
9799DECL_INLINE_THROW(uint32_t)
9800iemNativeEmitSimdFetchXregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iQWord)
9801{
9802 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9803 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
9804
9805 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9806 kIemNativeGstSimdRegLdStSz_Low128,
9807 kIemNativeGstRegUse_ReadOnly);
9808
9809 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9810 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9811
9812 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
9813
9814 /* Free but don't flush the source register. */
9815 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9816 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9817
9818 return off;
9819}
9820
9821
9822#define IEM_MC_FETCH_XREG_U32(a_u32Value, a_iXReg, a_iDWord) \
9823 off = iemNativeEmitSimdFetchXregU32(pReNative, off, a_u32Value, a_iXReg, a_iDWord)
9824
9825#define IEM_MC_FETCH_XREG_R32(a_r32Value, a_iXReg, a_iDWord) \
9826 off = iemNativeEmitSimdFetchXregU32(pReNative, off, a_r32Value, a_iXReg, a_iDWord)
9827
9828/** Emits code for IEM_MC_FETCH_XREG_U32/IEM_MC_FETCH_XREG_R32. */
9829DECL_INLINE_THROW(uint32_t)
9830iemNativeEmitSimdFetchXregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iDWord)
9831{
9832 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9833 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
9834
9835 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9836 kIemNativeGstSimdRegLdStSz_Low128,
9837 kIemNativeGstRegUse_ReadOnly);
9838
9839 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9840 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9841
9842 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
9843
9844 /* Free but don't flush the source register. */
9845 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9846 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9847
9848 return off;
9849}
9850
9851
9852#define IEM_MC_FETCH_XREG_U16(a_u64Value, a_iXReg, a_iWord) \
9853 off = iemNativeEmitSimdFetchXregU16(pReNative, off, a_u64Value, a_iXReg, a_iWord)
9854
9855/** Emits code for IEM_MC_FETCH_XREG_U16. */
9856DECL_INLINE_THROW(uint32_t)
9857iemNativeEmitSimdFetchXregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iWord)
9858{
9859 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9860 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
9861
9862 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9863 kIemNativeGstSimdRegLdStSz_Low128,
9864 kIemNativeGstRegUse_ReadOnly);
9865
9866 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9867 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9868
9869 off = iemNativeEmitSimdLoadGprFromVecRegU16(pReNative, off, idxVarReg, idxSimdRegSrc, iWord);
9870
9871 /* Free but don't flush the source register. */
9872 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9873 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9874
9875 return off;
9876}
9877
9878
9879#define IEM_MC_FETCH_XREG_U8(a_u64Value, a_iXReg, a_iByte) \
9880 off = iemNativeEmitSimdFetchXregU8(pReNative, off, a_u64Value, a_iXReg, a_iByte)
9881
9882/** Emits code for IEM_MC_FETCH_XREG_U8. */
9883DECL_INLINE_THROW(uint32_t)
9884iemNativeEmitSimdFetchXregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iByte)
9885{
9886 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9887 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint8_t));
9888
9889 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9890 kIemNativeGstSimdRegLdStSz_Low128,
9891 kIemNativeGstRegUse_ReadOnly);
9892
9893 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9894 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9895
9896 off = iemNativeEmitSimdLoadGprFromVecRegU8(pReNative, off, idxVarReg, idxSimdRegSrc, iByte);
9897
9898 /* Free but don't flush the source register. */
9899 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9900 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9901
9902 return off;
9903}
9904
9905
9906#define IEM_MC_STORE_XREG_U128(a_iXReg, a_u128Value) \
9907 off = iemNativeEmitSimdStoreXregU128(pReNative, off, a_iXReg, a_u128Value)
9908
9909AssertCompileSize(X86XMMREG, sizeof(RTUINT128U));
9910#define IEM_MC_STORE_XREG_XMM(a_iXReg, a_XmmValue) \
9911 off = iemNativeEmitSimdStoreXregU128(pReNative, off, a_iXReg, a_XmmValue)
9912
9913
9914/** Emits code for IEM_MC_STORE_XREG_U128/IEM_MC_STORE_XREG_XMM. */
9915DECL_INLINE_THROW(uint32_t)
9916iemNativeEmitSimdStoreXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
9917{
9918 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9919 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
9920
9921 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9922 kIemNativeGstSimdRegLdStSz_Low128,
9923 kIemNativeGstRegUse_ForFullWrite);
9924 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitialized*/);
9925
9926 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
9927
9928 /* Free but don't flush the source register. */
9929 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9930 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9931
9932 return off;
9933}
9934
9935
9936#define IEM_MC_STORE_XREG_U64(a_iXReg, a_iQWord, a_u64Value) \
9937 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u64Value, sizeof(uint64_t), a_iQWord)
9938
9939#define IEM_MC_STORE_XREG_U32(a_iXReg, a_iDWord, a_u32Value) \
9940 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint32_t), a_iDWord)
9941
9942#define IEM_MC_STORE_XREG_U16(a_iXReg, a_iWord, a_u32Value) \
9943 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint16_t), a_iWord)
9944
9945#define IEM_MC_STORE_XREG_U8(a_iXReg, a_iByte, a_u32Value) \
9946 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint8_t), a_iByte)
9947
9948#define IEM_MC_STORE_XREG_R32(a_iXReg, a_r32Value) \
9949 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_r32Value, sizeof(RTFLOAT32U), 0 /*iElem*/)
9950
9951#define IEM_MC_STORE_XREG_R64(a_iXReg, a_r64Value) \
9952 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_r64Value, sizeof(RTFLOAT64U), 0 /*iElem*/)
9953
9954/** Emits code for IEM_MC_STORE_XREG_U64/IEM_MC_STORE_XREG_U32/IEM_MC_STORE_XREG_U16/IEM_MC_STORE_XREG_U8. */
9955DECL_INLINE_THROW(uint32_t)
9956iemNativeEmitSimdStoreXregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar,
9957 uint8_t cbLocal, uint8_t iElem)
9958{
9959 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9960 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbLocal);
9961
9962#ifdef VBOX_STRICT
9963 switch (cbLocal)
9964 {
9965 case sizeof(uint64_t): Assert(iElem < 2); break;
9966 case sizeof(uint32_t): Assert(iElem < 4); break;
9967 case sizeof(uint16_t): Assert(iElem < 8); break;
9968 case sizeof(uint8_t): Assert(iElem < 16); break;
9969 default: AssertFailed();
9970 }
9971#endif
9972
9973 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9974 kIemNativeGstSimdRegLdStSz_Low128,
9975 kIemNativeGstRegUse_ForUpdate);
9976 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
9977
9978 switch (cbLocal)
9979 {
9980 case sizeof(uint64_t): off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
9981 case sizeof(uint32_t): off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
9982 case sizeof(uint16_t): off = iemNativeEmitSimdStoreGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
9983 case sizeof(uint8_t): off = iemNativeEmitSimdStoreGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
9984 default: AssertFailed();
9985 }
9986
9987 /* Free but don't flush the source register. */
9988 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9989 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9990
9991 return off;
9992}
9993
9994
9995#define IEM_MC_STORE_XREG_U64_ZX_U128(a_iXReg, a_u64Value) \
9996 off = iemNativeEmitSimdStoreXregU64ZxU128(pReNative, off, a_iXReg, a_u64Value)
9997
9998/** Emits code for IEM_MC_STORE_XREG_U64_ZX_U128. */
9999DECL_INLINE_THROW(uint32_t)
10000iemNativeEmitSimdStoreXregU64ZxU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
10001{
10002 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10003 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
10004
10005 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10006 kIemNativeGstSimdRegLdStSz_Low128,
10007 kIemNativeGstRegUse_ForUpdate);
10008 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
10009
10010 /* Zero the vector register first, then store the 64-bit value to the lower 64-bit. */
10011 off = iemNativeEmitSimdZeroVecRegLowU128(pReNative, off, idxSimdRegDst);
10012 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0);
10013
10014 /* Free but don't flush the source register. */
10015 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10016 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10017
10018 return off;
10019}
10020
10021
10022#define IEM_MC_STORE_XREG_U32_ZX_U128(a_iXReg, a_u32Value) \
10023 off = iemNativeEmitSimdStoreXregU32ZxU128(pReNative, off, a_iXReg, a_u32Value)
10024
10025/** Emits code for IEM_MC_STORE_XREG_U32_ZX_U128. */
10026DECL_INLINE_THROW(uint32_t)
10027iemNativeEmitSimdStoreXregU32ZxU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
10028{
10029 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10030 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
10031
10032 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10033 kIemNativeGstSimdRegLdStSz_Low128,
10034 kIemNativeGstRegUse_ForUpdate);
10035 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
10036
10037 /* Zero the vector register first, then store the 32-bit value to the lowest 32-bit element. */
10038 off = iemNativeEmitSimdZeroVecRegLowU128(pReNative, off, idxSimdRegDst);
10039 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, 0);
10040
10041 /* Free but don't flush the source register. */
10042 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10043 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10044
10045 return off;
10046}
10047
10048
10049#define IEM_MC_STORE_XREG_U32_U128(a_iXReg, a_iDwDst, a_u128Value, a_iDwSrc) \
10050 off = iemNativeEmitSimdStoreXregU32U128(pReNative, off, a_iXReg, a_iDwDst, a_u128Value, a_iDwSrc)
10051
10052/** Emits code for IEM_MC_STORE_XREG_U32_U128. */
10053DECL_INLINE_THROW(uint32_t)
10054iemNativeEmitSimdStoreXregU32U128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t iDwDst,
10055 uint8_t idxSrcVar, uint8_t iDwSrc)
10056{
10057 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10058 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
10059
10060 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10061 kIemNativeGstSimdRegLdStSz_Low128,
10062 kIemNativeGstRegUse_ForUpdate);
10063 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitialized*/);
10064
10065 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, idxVarReg, iDwSrc);
10066 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, IEMNATIVE_REG_FIXED_TMP0, iDwDst);
10067
10068 /* Free but don't flush the destination register. */
10069 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10070 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10071
10072 return off;
10073}
10074
10075
10076#define IEM_MC_COPY_YREG_U128_ZX_VLMAX(a_iYRegDst, a_iYRegSrc) \
10077 off = iemNativeEmitSimdCopyYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrc)
10078
10079/** Emits code for IEM_MC_COPY_YREG_U128_ZX_VLMAX. */
10080DECL_INLINE_THROW(uint32_t)
10081iemNativeEmitSimdCopyYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrc)
10082{
10083 /*
10084 * The iYRegSrc == iYRegDst case needs to be treated differently here, because
10085 * if iYRegDst gets allocated first for the full write it won't load the
10086 * actual value from CPUMCTX. When allocating iYRegSrc afterwards it will get
10087 * duplicated from the already allocated host register for iYRegDst containing
10088 * garbage. This will be catched by the guest register value checking in debug
10089 * builds.
10090 */
10091 if (iYRegDst != iYRegSrc)
10092 {
10093 /* Allocate destination and source register. */
10094 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10095 kIemNativeGstSimdRegLdStSz_256,
10096 kIemNativeGstRegUse_ForFullWrite);
10097 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
10098 kIemNativeGstSimdRegLdStSz_Low128,
10099 kIemNativeGstRegUse_ReadOnly);
10100
10101 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
10102 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10103
10104 /* Free but don't flush the source and destination register. */
10105 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10106 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10107 }
10108 else
10109 {
10110 /* This effectively only clears the upper 128-bits of the register. */
10111 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10112 kIemNativeGstSimdRegLdStSz_High128,
10113 kIemNativeGstRegUse_ForFullWrite);
10114
10115 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
10116
10117 /* Free but don't flush the destination register. */
10118 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
10119 }
10120
10121 return off;
10122}
10123
10124
10125#define IEM_MC_COPY_YREG_U256_ZX_VLMAX(a_iYRegDst, a_iYRegSrc) \
10126 off = iemNativeEmitSimdCopyYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrc)
10127
10128/** Emits code for IEM_MC_COPY_YREG_U256_ZX_VLMAX. */
10129DECL_INLINE_THROW(uint32_t)
10130iemNativeEmitSimdCopyYregU256ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrc)
10131{
10132 /*
10133 * The iYRegSrc == iYRegDst case needs to be treated differently here, because
10134 * if iYRegDst gets allocated first for the full write it won't load the
10135 * actual value from CPUMCTX. When allocating iYRegSrc afterwards it will get
10136 * duplicated from the already allocated host register for iYRegDst containing
10137 * garbage. This will be catched by the guest register value checking in debug
10138 * builds. iYRegSrc == iYRegDst would effectively only clear any upper 256-bits
10139 * for a zmm register we don't support yet, so this is just a nop.
10140 */
10141 if (iYRegDst != iYRegSrc)
10142 {
10143 /* Allocate destination and source register. */
10144 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
10145 kIemNativeGstSimdRegLdStSz_256,
10146 kIemNativeGstRegUse_ReadOnly);
10147 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10148 kIemNativeGstSimdRegLdStSz_256,
10149 kIemNativeGstRegUse_ForFullWrite);
10150
10151 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
10152
10153 /* Free but don't flush the source and destination register. */
10154 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10155 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10156 }
10157
10158 return off;
10159}
10160
10161
10162#define IEM_MC_FETCH_YREG_U128(a_u128Dst, a_iYRegSrc, a_iDQWord) \
10163 off = iemNativeEmitSimdFetchYregU128(pReNative, off, a_u128Dst, a_iYRegSrc, a_iDQWord)
10164
10165/** Emits code for IEM_MC_FETCH_YREG_U128. */
10166DECL_INLINE_THROW(uint32_t)
10167iemNativeEmitSimdFetchYregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iDQWord)
10168{
10169 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10170 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
10171
10172 Assert(iDQWord <= 1);
10173 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10174 iDQWord == 1
10175 ? kIemNativeGstSimdRegLdStSz_High128
10176 : kIemNativeGstSimdRegLdStSz_Low128,
10177 kIemNativeGstRegUse_ReadOnly);
10178
10179 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10180 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
10181
10182 if (iDQWord == 1)
10183 off = iemNativeEmitSimdLoadVecRegLowU128FromVecRegHighU128(pReNative, off, idxVarReg, idxSimdRegSrc);
10184 else
10185 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxVarReg, idxSimdRegSrc);
10186
10187 /* Free but don't flush the source register. */
10188 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10189 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
10190
10191 return off;
10192}
10193
10194
10195#define IEM_MC_FETCH_YREG_U64(a_u64Dst, a_iYRegSrc, a_iQWord) \
10196 off = iemNativeEmitSimdFetchYregU64(pReNative, off, a_u64Dst, a_iYRegSrc, a_iQWord)
10197
10198/** Emits code for IEM_MC_FETCH_YREG_U64. */
10199DECL_INLINE_THROW(uint32_t)
10200iemNativeEmitSimdFetchYregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iQWord)
10201{
10202 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10203 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
10204
10205 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10206 iQWord >= 2
10207 ? kIemNativeGstSimdRegLdStSz_High128
10208 : kIemNativeGstSimdRegLdStSz_Low128,
10209 kIemNativeGstRegUse_ReadOnly);
10210
10211 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10212 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10213
10214 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
10215
10216 /* Free but don't flush the source register. */
10217 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10218 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10219
10220 return off;
10221}
10222
10223
10224#define IEM_MC_FETCH_YREG_U32(a_u32Dst, a_iYRegSrc) \
10225 off = iemNativeEmitSimdFetchYregU32(pReNative, off, a_u32Dst, a_iYRegSrc, 0)
10226
10227/** Emits code for IEM_MC_FETCH_YREG_U32. */
10228DECL_INLINE_THROW(uint32_t)
10229iemNativeEmitSimdFetchYregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iDWord)
10230{
10231 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10232 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
10233
10234 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10235 iDWord >= 4
10236 ? kIemNativeGstSimdRegLdStSz_High128
10237 : kIemNativeGstSimdRegLdStSz_Low128,
10238 kIemNativeGstRegUse_ReadOnly);
10239
10240 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10241 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10242
10243 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
10244
10245 /* Free but don't flush the source register. */
10246 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10247 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10248
10249 return off;
10250}
10251
10252
10253#define IEM_MC_CLEAR_YREG_128_UP(a_iYReg) \
10254 off = iemNativeEmitSimdClearYregHighU128(pReNative, off, a_iYReg)
10255
10256/** Emits code for IEM_MC_CLEAR_YREG_128_UP. */
10257DECL_INLINE_THROW(uint32_t)
10258iemNativeEmitSimdClearYregHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg)
10259{
10260 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10261 kIemNativeGstSimdRegLdStSz_High128,
10262 kIemNativeGstRegUse_ForFullWrite);
10263
10264 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
10265
10266 /* Free but don't flush the register. */
10267 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
10268
10269 return off;
10270}
10271
10272
10273#define IEM_MC_STORE_YREG_U128(a_iYRegDst, a_iDQword, a_u128Value) \
10274 off = iemNativeEmitSimdStoreYregU128(pReNative, off, a_iYRegDst, a_iDQword, a_u128Value)
10275
10276/** Emits code for IEM_MC_STORE_YREG_U128. */
10277DECL_INLINE_THROW(uint32_t)
10278iemNativeEmitSimdStoreYregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t iDQword, uint8_t idxSrcVar)
10279{
10280 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10281 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
10282
10283 Assert(iDQword <= 1);
10284 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10285 iDQword == 0
10286 ? kIemNativeGstSimdRegLdStSz_Low128
10287 : kIemNativeGstSimdRegLdStSz_High128,
10288 kIemNativeGstRegUse_ForFullWrite);
10289
10290 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
10291
10292 if (iDQword == 0)
10293 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
10294 else
10295 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128(pReNative, off, idxSimdRegDst, idxVarReg);
10296
10297 /* Free but don't flush the source register. */
10298 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10299 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10300
10301 return off;
10302}
10303
10304
10305#define IEM_MC_STORE_YREG_U128_ZX_VLMAX(a_iYRegDst, a_u128Src) \
10306 off = iemNativeEmitSimdStoreYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_u128Src)
10307
10308/** Emits code for IEM_MC_STORE_YREG_U128_ZX_VLMAX. */
10309DECL_INLINE_THROW(uint32_t)
10310iemNativeEmitSimdStoreYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10311{
10312 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10313 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
10314
10315 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10316 kIemNativeGstSimdRegLdStSz_256,
10317 kIemNativeGstRegUse_ForFullWrite);
10318
10319 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
10320
10321 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
10322 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10323
10324 /* Free but don't flush the source register. */
10325 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10326 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10327
10328 return off;
10329}
10330
10331
10332#define IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX(a_iXRegDst, a_u8Src) \
10333 off = iemNativeEmitSimdBroadcastXregU8ZxVlmax(pReNative, off, a_iXRegDst, a_u8Src)
10334
10335/** Emits code for IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX. */
10336DECL_INLINE_THROW(uint32_t)
10337iemNativeEmitSimdBroadcastXregU8ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
10338{
10339 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10340 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint8_t));
10341
10342 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10343 kIemNativeGstSimdRegLdStSz_256,
10344 kIemNativeGstRegUse_ForFullWrite);
10345
10346 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10347
10348 off = iemNativeEmitSimdBroadcastGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
10349 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10350
10351 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10352 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10353
10354 return off;
10355}
10356
10357
10358#define IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX(a_iXRegDst, a_u16Src) \
10359 off = iemNativeEmitSimdBroadcastXregU16ZxVlmax(pReNative, off, a_iXRegDst, a_u16Src)
10360
10361/** Emits code for IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX. */
10362DECL_INLINE_THROW(uint32_t)
10363iemNativeEmitSimdBroadcastXregU16ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
10364{
10365 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10366 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint16_t));
10367
10368 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10369 kIemNativeGstSimdRegLdStSz_256,
10370 kIemNativeGstRegUse_ForFullWrite);
10371
10372 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10373
10374 off = iemNativeEmitSimdBroadcastGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
10375 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10376
10377 /* Free but don't flush the source register. */
10378 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10379 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10380
10381 return off;
10382}
10383
10384
10385#define IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX(a_iXRegDst, a_u32Src) \
10386 off = iemNativeEmitSimdBroadcastXregU32ZxVlmax(pReNative, off, a_iXRegDst, a_u32Src)
10387
10388/** Emits code for IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX. */
10389DECL_INLINE_THROW(uint32_t)
10390iemNativeEmitSimdBroadcastXregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
10391{
10392 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10393 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
10394
10395 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10396 kIemNativeGstSimdRegLdStSz_256,
10397 kIemNativeGstRegUse_ForFullWrite);
10398
10399 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10400
10401 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
10402 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10403
10404 /* Free but don't flush the source register. */
10405 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10406 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10407
10408 return off;
10409}
10410
10411
10412#define IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX(a_iXRegDst, a_u64Src) \
10413 off = iemNativeEmitSimdBroadcastXregU64ZxVlmax(pReNative, off, a_iXRegDst, a_u64Src)
10414
10415/** Emits code for IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX. */
10416DECL_INLINE_THROW(uint32_t)
10417iemNativeEmitSimdBroadcastXregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
10418{
10419 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10420 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10421
10422 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10423 kIemNativeGstSimdRegLdStSz_256,
10424 kIemNativeGstRegUse_ForFullWrite);
10425
10426 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10427
10428 off = iemNativeEmitSimdBroadcastGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
10429 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10430
10431 /* Free but don't flush the source register. */
10432 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10433 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10434
10435 return off;
10436}
10437
10438
10439#define IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX(a_iYRegDst, a_u8Src) \
10440 off = iemNativeEmitSimdBroadcastYregU8ZxVlmax(pReNative, off, a_iYRegDst, a_u8Src)
10441
10442/** Emits code for IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX. */
10443DECL_INLINE_THROW(uint32_t)
10444iemNativeEmitSimdBroadcastYregU8ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10445{
10446 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10447 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint8_t));
10448
10449 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10450 kIemNativeGstSimdRegLdStSz_256,
10451 kIemNativeGstRegUse_ForFullWrite);
10452
10453 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10454
10455 off = iemNativeEmitSimdBroadcastGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
10456
10457 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10458 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10459
10460 return off;
10461}
10462
10463
10464#define IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX(a_iYRegDst, a_u16Src) \
10465 off = iemNativeEmitSimdBroadcastYregU16ZxVlmax(pReNative, off, a_iYRegDst, a_u16Src)
10466
10467/** Emits code for IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX. */
10468DECL_INLINE_THROW(uint32_t)
10469iemNativeEmitSimdBroadcastYregU16ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10470{
10471 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10472 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint16_t));
10473
10474 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10475 kIemNativeGstSimdRegLdStSz_256,
10476 kIemNativeGstRegUse_ForFullWrite);
10477
10478 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10479
10480 off = iemNativeEmitSimdBroadcastGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
10481
10482 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10483 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10484
10485 return off;
10486}
10487
10488
10489#define IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX(a_iYRegDst, a_u32Src) \
10490 off = iemNativeEmitSimdBroadcastYregU32ZxVlmax(pReNative, off, a_iYRegDst, a_u32Src)
10491
10492/** Emits code for IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX. */
10493DECL_INLINE_THROW(uint32_t)
10494iemNativeEmitSimdBroadcastYregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10495{
10496 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10497 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
10498
10499 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10500 kIemNativeGstSimdRegLdStSz_256,
10501 kIemNativeGstRegUse_ForFullWrite);
10502
10503 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10504
10505 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
10506
10507 /* Free but don't flush the source register. */
10508 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10509 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10510
10511 return off;
10512}
10513
10514
10515#define IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX(a_iYRegDst, a_u64Src) \
10516 off = iemNativeEmitSimdBroadcastYregU64ZxVlmax(pReNative, off, a_iYRegDst, a_u64Src)
10517
10518/** Emits code for IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX. */
10519DECL_INLINE_THROW(uint32_t)
10520iemNativeEmitSimdBroadcastYregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10521{
10522 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10523 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10524
10525 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10526 kIemNativeGstSimdRegLdStSz_256,
10527 kIemNativeGstRegUse_ForFullWrite);
10528
10529 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10530
10531 off = iemNativeEmitSimdBroadcastGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
10532
10533 /* Free but don't flush the source register. */
10534 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10535 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10536
10537 return off;
10538}
10539
10540
10541#define IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX(a_iYRegDst, a_u128Src) \
10542 off = iemNativeEmitSimdBroadcastYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_u128Src)
10543
10544/** Emits code for IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX. */
10545DECL_INLINE_THROW(uint32_t)
10546iemNativeEmitSimdBroadcastYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10547{
10548 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10549 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
10550
10551 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10552 kIemNativeGstSimdRegLdStSz_256,
10553 kIemNativeGstRegUse_ForFullWrite);
10554
10555 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
10556
10557 off = iemNativeEmitSimdBroadcastVecRegU128ToVecReg(pReNative, off, idxSimdRegDst, idxVarReg);
10558
10559 /* Free but don't flush the source register. */
10560 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10561 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10562
10563 return off;
10564}
10565
10566
10567#define IEM_MC_STORE_YREG_U32_ZX_VLMAX(a_iYRegDst, a_u32Src) \
10568 off = iemNativeEmitSimdStoreYregU32ZxVlmax(pReNative, off, a_iYRegDst, a_u32Src)
10569
10570/** Emits code for IEM_MC_STORE_YREG_U32_ZX_VLMAX. */
10571DECL_INLINE_THROW(uint32_t)
10572iemNativeEmitSimdStoreYregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10573{
10574 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10575 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
10576
10577 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10578 kIemNativeGstSimdRegLdStSz_256,
10579 kIemNativeGstRegUse_ForFullWrite);
10580
10581 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10582
10583 off = iemNativeEmitSimdZeroVecRegU256(pReNative, off, idxSimdRegDst);
10584 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iDWord*/);
10585
10586 /* Free but don't flush the source register. */
10587 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10588 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10589
10590 return off;
10591}
10592
10593
10594#define IEM_MC_STORE_YREG_U64_ZX_VLMAX(a_iYRegDst, a_u64Src) \
10595 off = iemNativeEmitSimdStoreYregU64ZxVlmax(pReNative, off, a_iYRegDst, a_u64Src)
10596
10597/** Emits code for IEM_MC_STORE_YREG_U64_ZX_VLMAX. */
10598DECL_INLINE_THROW(uint32_t)
10599iemNativeEmitSimdStoreYregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10600{
10601 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10602 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10603
10604 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10605 kIemNativeGstSimdRegLdStSz_256,
10606 kIemNativeGstRegUse_ForFullWrite);
10607
10608 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10609
10610 off = iemNativeEmitSimdZeroVecRegU256(pReNative, off, idxSimdRegDst);
10611 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iQWord*/);
10612
10613 /* Free but don't flush the source register. */
10614 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10615 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10616
10617 return off;
10618}
10619
10620
10621#define IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX(a_iYRegDst, a_u64Local, a_iYRegSrcHx) \
10622 off = iemNativeEmitSimdMergeYregU64LocalU64HiZxVlmax(pReNative, off, a_iYRegDst, a_u64Local, a_iYRegSrcHx)
10623
10624/** Emits code for IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX. */
10625DECL_INLINE_THROW(uint32_t)
10626iemNativeEmitSimdMergeYregU64LocalU64HiZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar, uint8_t iYRegSrcHx)
10627{
10628 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10629 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10630
10631 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10632 kIemNativeGstSimdRegLdStSz_256,
10633 kIemNativeGstRegUse_ForFullWrite);
10634 uint8_t const idxSimdRegSrcHx = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrcHx),
10635 kIemNativeGstSimdRegLdStSz_Low128,
10636 kIemNativeGstRegUse_ReadOnly);
10637 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10638
10639 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrcHx);
10640 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iQWord*/);
10641 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10642
10643 /* Free but don't flush the source and destination registers. */
10644 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrcHx);
10645 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10646 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10647
10648 return off;
10649}
10650
10651
10652#define IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX(a_iYRegDst, a_iYRegSrcHx, a_u64Local) \
10653 off = iemNativeEmitSimdMergeYregU64LoU64LocalZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrcHx, a_u64Local)
10654
10655/** Emits code for IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX. */
10656DECL_INLINE_THROW(uint32_t)
10657iemNativeEmitSimdMergeYregU64LoU64LocalZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrcHx, uint8_t idxSrcVar)
10658{
10659 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10660 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10661
10662 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10663 kIemNativeGstSimdRegLdStSz_256,
10664 kIemNativeGstRegUse_ForFullWrite);
10665 uint8_t const idxSimdRegSrcHx = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrcHx),
10666 kIemNativeGstSimdRegLdStSz_Low128,
10667 kIemNativeGstRegUse_ReadOnly);
10668 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10669
10670 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrcHx);
10671 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 1 /*iQWord*/);
10672 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10673
10674 /* Free but don't flush the source and destination registers. */
10675 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrcHx);
10676 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10677 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10678
10679 return off;
10680}
10681
10682
10683#define IEM_MC_CLEAR_XREG_U32_MASK(a_iXReg, a_bMask) \
10684 off = iemNativeEmitSimdClearXregU32Mask(pReNative, off, a_iXReg, a_bMask)
10685
10686
10687/** Emits code for IEM_MC_CLEAR_XREG_U32_MASK. */
10688DECL_INLINE_THROW(uint32_t)
10689iemNativeEmitSimdClearXregU32Mask(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t bImm8Mask)
10690{
10691 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10692 kIemNativeGstSimdRegLdStSz_Low128,
10693 kIemNativeGstRegUse_ForUpdate);
10694
10695 /** @todo r=aeichner For certain bit combinations we could reduce the number of emitted instructions. */
10696 if (bImm8Mask & RT_BIT(0))
10697 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 0 /*iDWord*/);
10698 if (bImm8Mask & RT_BIT(1))
10699 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 1 /*iDWord*/);
10700 if (bImm8Mask & RT_BIT(2))
10701 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 2 /*iDWord*/);
10702 if (bImm8Mask & RT_BIT(3))
10703 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 3 /*iDWord*/);
10704
10705 /* Free but don't flush the destination register. */
10706 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10707
10708 return off;
10709}
10710
10711
10712#define IEM_MC_FETCH_YREG_U256(a_u256Dst, a_iYRegSrc) \
10713 off = iemNativeEmitSimdFetchYregU256(pReNative, off, a_u256Dst, a_iYRegSrc)
10714
10715#define IEM_MC_FETCH_YREG_YMM(a_uYmmDst, a_iYRegSrc) \
10716 off = iemNativeEmitSimdFetchYregU256(pReNative, off, a_uYmmDst, a_iYRegSrc)
10717
10718/** Emits code for IEM_MC_FETCH_YREG_U256/IEM_MC_FETCH_YREG_YMM. */
10719DECL_INLINE_THROW(uint32_t)
10720iemNativeEmitSimdFetchYregU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYRegSrc)
10721{
10722 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10723 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT256U));
10724
10725 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
10726 kIemNativeGstSimdRegLdStSz_256,
10727 kIemNativeGstRegUse_ReadOnly);
10728 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
10729
10730 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxVarReg, idxSimdRegSrc);
10731
10732 /* Free but don't flush the source register. */
10733 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10734 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
10735
10736 return off;
10737}
10738
10739
10740#define IEM_MC_STORE_YREG_U256_ZX_VLMAX(a_iYRegDst, a_u256Src) \
10741 off = iemNativeEmitSimdStoreYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_u256Src)
10742
10743#define IEM_MC_STORE_YREG_YMM_ZX_VLMAX(a_iYRegDst, a_uYmmSrc) \
10744 off = iemNativeEmitSimdStoreYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_uYmmSrc)
10745
10746/** Emits code for IEM_MC_STORE_YREG_U256_ZX_VLMAX/IEM_MC_STORE_YREG_YMM_ZX_VLMAX. */
10747DECL_INLINE_THROW(uint32_t)
10748iemNativeEmitSimdStoreYregU256ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar)
10749{
10750 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10751 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
10752
10753 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10754 kIemNativeGstSimdRegLdStSz_256,
10755 kIemNativeGstRegUse_ForFullWrite);
10756 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
10757
10758 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxSimdRegDst, idxVarRegSrc);
10759
10760 /* Free but don't flush the source register. */
10761 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10762 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10763
10764 return off;
10765}
10766
10767
10768#define IEM_MC_STORE_YREG_U32_U256(a_iYRegDst, a_iDwDst, a_u256Value, a_iDwSrc) \
10769 off = iemNativeEmitSimdStoreYregU32FromU256(pReNative, off, a_iYRegDst, a_iDwDst, a_u256Value, a_iDwSrc)
10770
10771
10772/** Emits code for IEM_MC_STORE_YREG_U32_U256. */
10773DECL_INLINE_THROW(uint32_t)
10774iemNativeEmitSimdStoreYregU32FromU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iDwDst,
10775 uint8_t idxSrcVar, uint8_t iDwSrc)
10776{
10777 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10778 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
10779
10780 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10781 iDwDst < 4
10782 ? kIemNativeGstSimdRegLdStSz_Low128
10783 : kIemNativeGstSimdRegLdStSz_High128,
10784 kIemNativeGstRegUse_ForUpdate);
10785 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
10786 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
10787
10788 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxRegTmp, idxVarRegSrc, iDwSrc);
10789 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxRegTmp, iDwDst);
10790
10791 /* Free but don't flush the source register. */
10792 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10793 iemNativeRegFreeTmp(pReNative, idxRegTmp);
10794 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10795
10796 return off;
10797}
10798
10799
10800#define IEM_MC_STORE_YREG_U64_U256(a_iYRegDst, a_iQwDst, a_u256Value, a_iQwSrc) \
10801 off = iemNativeEmitSimdStoreYregU64FromU256(pReNative, off, a_iYRegDst, a_iQwDst, a_u256Value, a_iQwSrc)
10802
10803
10804/** Emits code for IEM_MC_STORE_YREG_U64_U256. */
10805DECL_INLINE_THROW(uint32_t)
10806iemNativeEmitSimdStoreYregU64FromU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iQwDst,
10807 uint8_t idxSrcVar, uint8_t iQwSrc)
10808{
10809 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10810 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
10811
10812 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10813 iQwDst < 2
10814 ? kIemNativeGstSimdRegLdStSz_Low128
10815 : kIemNativeGstSimdRegLdStSz_High128,
10816 kIemNativeGstRegUse_ForUpdate);
10817 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
10818 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
10819
10820 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxRegTmp, idxVarRegSrc, iQwSrc);
10821 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxRegTmp, iQwDst);
10822
10823 /* Free but don't flush the source register. */
10824 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10825 iemNativeRegFreeTmp(pReNative, idxRegTmp);
10826 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10827
10828 return off;
10829}
10830
10831
10832#define IEM_MC_STORE_YREG_U64(a_iYRegDst, a_iQword, a_u64Value) \
10833 off = iemNativeEmitSimdStoreYregU64(pReNative, off, a_iYRegDst, a_iQword, a_u64Value)
10834
10835
10836/** Emits code for IEM_MC_STORE_YREG_U64. */
10837DECL_INLINE_THROW(uint32_t)
10838iemNativeEmitSimdStoreYregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iQwDst, uint8_t idxSrcVar)
10839{
10840 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10841 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10842
10843 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10844 iQwDst < 2
10845 ? kIemNativeGstSimdRegLdStSz_Low128
10846 : kIemNativeGstSimdRegLdStSz_High128,
10847 kIemNativeGstRegUse_ForUpdate);
10848
10849 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10850
10851 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, iQwDst);
10852
10853 /* Free but don't flush the source register. */
10854 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10855 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10856
10857 return off;
10858}
10859
10860
10861#define IEM_MC_CLEAR_ZREG_256_UP(a_iYReg) \
10862 off = iemNativeEmitSimdClearZregU256Vlmax(pReNative, off, a_iYReg)
10863
10864/** Emits code for IEM_MC_CLEAR_ZREG_256_UP. */
10865DECL_INLINE_THROW(uint32_t)
10866iemNativeEmitSimdClearZregU256Vlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg)
10867{
10868 RT_NOREF(pReNative, iYReg);
10869 /** @todo Needs to be implemented when support for AVX-512 is added. */
10870 return off;
10871}
10872
10873
10874
10875/*********************************************************************************************************************************
10876* Emitters for IEM_MC_CALL_SSE_AIMPL_XXX *
10877*********************************************************************************************************************************/
10878
10879/**
10880 * Common worker for IEM_MC_CALL_SSE_AIMPL_XXX/IEM_MC_CALL_AVX_AIMPL_XXX.
10881 */
10882DECL_INLINE_THROW(uint32_t)
10883iemNativeEmitCallSseAvxAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t cArgs, uint8_t idxInstr)
10884{
10885 /* Grab the MXCSR register, it must not be call volatile or we end up freeing it when setting up the call below. */
10886 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr,
10887 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
10888 AssertRelease(!(RT_BIT_32(idxRegMxCsr) & IEMNATIVE_CALL_VOLATILE_GREG_MASK));
10889
10890#if 0 /* This is not required right now as the called helper will set up the SSE/AVX state if it is an assembly one. */
10891 /*
10892 * Need to do the FPU preparation.
10893 */
10894 off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/);
10895#endif
10896
10897 /*
10898 * Do all the call setup and cleanup.
10899 */
10900 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_SSE_AIMPL_HIDDEN_ARGS, IEM_SSE_AIMPL_HIDDEN_ARGS,
10901 false /*fFlushPendingWrites*/);
10902
10903 /*
10904 * Load the MXCSR register into the first argument and mask out the current exception flags.
10905 */
10906 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, idxRegMxCsr);
10907 off = iemNativeEmitAndGpr32ByImm(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, ~X86_MXCSR_XCPT_FLAGS);
10908
10909 /*
10910 * Make the call.
10911 */
10912 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
10913
10914 /*
10915 * The updated MXCSR is in the return register, update exception status flags.
10916 *
10917 * The return register is marked allocated as a temporary because it is required for the
10918 * exception generation check below.
10919 */
10920 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG)));
10921 uint8_t const idxRegTmp = iemNativeRegMarkAllocated(pReNative, IEMNATIVE_CALL_RET_GREG, kIemNativeWhat_Tmp);
10922 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, idxRegMxCsr, idxRegTmp);
10923
10924#ifndef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
10925 /* Writeback the MXCSR register value (there is no delayed writeback for such registers at the moment). */
10926 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxRegMxCsr, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.XState.x87.MXCSR));
10927#endif
10928
10929 /*
10930 * Make sure we don't have any outstanding guest register writes as we may
10931 * raise an \#UD or \#XF and all guest register must be up to date in CPUMCTX.
10932 */
10933 off = iemNativeRegFlushPendingWrites(pReNative, off);
10934
10935#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
10936 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
10937#else
10938 RT_NOREF(idxInstr);
10939#endif
10940
10941 /** @todo r=aeichner ANDN from BMI1 would save us a temporary and additional instruction here but I don't
10942 * want to assume the existence for this instruction at the moment. */
10943 uint8_t const idxRegTmp2 = iemNativeRegAllocTmp(pReNative, &off);
10944
10945 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegTmp2, idxRegTmp);
10946 /* tmp &= X86_MXCSR_XCPT_MASK */
10947 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK);
10948 /* tmp >>= X86_MXCSR_XCPT_MASK_SHIFT */
10949 off = iemNativeEmitShiftGprRight(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK_SHIFT);
10950 /* tmp = ~tmp */
10951 off = iemNativeEmitInvBitsGpr(pReNative, off, idxRegTmp, idxRegTmp, false /*f64Bit*/);
10952 /* tmp &= mxcsr */
10953 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxRegTmp, idxRegTmp2);
10954 off = iemNativeEmitTestAnyBitsInGprAndTbExitIfAnySet(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_FLAGS,
10955 kIemNativeLabelType_RaiseSseAvxFpRelated);
10956
10957 iemNativeRegFreeTmp(pReNative, idxRegTmp2);
10958 iemNativeRegFreeTmp(pReNative, idxRegTmp);
10959 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
10960
10961 return off;
10962}
10963
10964
10965#define IEM_MC_CALL_SSE_AIMPL_2(a_pfnAImpl, a0, a1) \
10966 off = iemNativeEmitCallSseAImpl2(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1))
10967
10968/** Emits code for IEM_MC_CALL_SSE_AIMPL_2. */
10969DECL_INLINE_THROW(uint32_t)
10970iemNativeEmitCallSseAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
10971{
10972 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_SSE_AIMPL_HIDDEN_ARGS);
10973 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_SSE_AIMPL_HIDDEN_ARGS);
10974 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 2, idxInstr);
10975}
10976
10977
10978#define IEM_MC_CALL_SSE_AIMPL_3(a_pfnAImpl, a0, a1, a2) \
10979 off = iemNativeEmitCallSseAImpl3(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1), (a2))
10980
10981/** Emits code for IEM_MC_CALL_SSE_AIMPL_3. */
10982DECL_INLINE_THROW(uint32_t)
10983iemNativeEmitCallSseAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl,
10984 uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
10985{
10986 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_SSE_AIMPL_HIDDEN_ARGS);
10987 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_SSE_AIMPL_HIDDEN_ARGS);
10988 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_SSE_AIMPL_HIDDEN_ARGS);
10989 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 3, idxInstr);
10990}
10991
10992
10993/*********************************************************************************************************************************
10994* Emitters for IEM_MC_CALL_AVX_AIMPL_XXX *
10995*********************************************************************************************************************************/
10996
10997#define IEM_MC_CALL_AVX_AIMPL_2(a_pfnAImpl, a0, a1) \
10998 off = iemNativeEmitCallAvxAImpl2(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1))
10999
11000/** Emits code for IEM_MC_CALL_AVX_AIMPL_2. */
11001DECL_INLINE_THROW(uint32_t)
11002iemNativeEmitCallAvxAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
11003{
11004 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_AVX_AIMPL_HIDDEN_ARGS);
11005 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_AVX_AIMPL_HIDDEN_ARGS);
11006 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 2, idxInstr);
11007}
11008
11009
11010#define IEM_MC_CALL_AVX_AIMPL_3(a_pfnAImpl, a0, a1, a2) \
11011 off = iemNativeEmitCallAvxAImpl3(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1), (a2))
11012
11013/** Emits code for IEM_MC_CALL_AVX_AIMPL_3. */
11014DECL_INLINE_THROW(uint32_t)
11015iemNativeEmitCallAvxAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl,
11016 uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
11017{
11018 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_AVX_AIMPL_HIDDEN_ARGS);
11019 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_AVX_AIMPL_HIDDEN_ARGS);
11020 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_AVX_AIMPL_HIDDEN_ARGS);
11021 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 3, idxInstr);
11022}
11023
11024
11025#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
11026
11027
11028/*********************************************************************************************************************************
11029* Include instruction emitters. *
11030*********************************************************************************************************************************/
11031#include "target-x86/IEMAllN8veEmit-x86.h"
11032
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette