VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompFuncs.h@ 106097

Last change on this file since 106097 was 106097, checked in by vboxsync, 5 months ago

VMM/IEM: Liveness fix for MXCSR modifying in addps and friends. bugref:10652 bugref:10372

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 540.9 KB
Line 
1/* $Id: IEMAllN8veRecompFuncs.h 106097 2024-09-19 14:27:50Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler - Inlined Bits.
4 */
5
6/*
7 * Copyright (C) 2023-2024 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
33#define IEM_WITH_OPAQUE_DECODER_STATE
34#define VMCPU_INCL_CPUM_GST_CTX
35#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
36#define IEMNATIVE_INCL_TABLE_FUNCTION_PROTOTYPES
37#include <VBox/vmm/iem.h>
38#include <VBox/vmm/cpum.h>
39#include <VBox/vmm/dbgf.h>
40#include "IEMInternal.h"
41#include <VBox/vmm/vmcc.h>
42#include <VBox/log.h>
43#include <VBox/err.h>
44#include <VBox/dis.h>
45#include <VBox/param.h>
46#include <iprt/assert.h>
47#include <iprt/heap.h>
48#include <iprt/mem.h>
49#include <iprt/string.h>
50#if defined(RT_ARCH_AMD64)
51# include <iprt/x86.h>
52#elif defined(RT_ARCH_ARM64)
53# include <iprt/armv8.h>
54#endif
55
56#include "IEMInline.h"
57#include "IEMThreadedFunctions.h"
58#include "IEMN8veRecompiler.h"
59#include "IEMN8veRecompilerEmit.h"
60#include "IEMN8veRecompilerTlbLookup.h"
61#include "IEMNativeFunctions.h"
62
63
64/*
65 * Narrow down configs here to avoid wasting time on unused configs here.
66 * Note! Same checks in IEMAllThrdRecompiler.cpp.
67 */
68
69#ifndef IEM_WITH_CODE_TLB
70# error The code TLB must be enabled for the recompiler.
71#endif
72
73#ifndef IEM_WITH_DATA_TLB
74# error The data TLB must be enabled for the recompiler.
75#endif
76
77#ifndef IEM_WITH_SETJMP
78# error The setjmp approach must be enabled for the recompiler.
79#endif
80
81#if defined(IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS) && !defined(IEMNATIVE_WITH_SIMD_REG_ALLOCATOR)
82# error "IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS requires IEMNATIVE_WITH_SIMD_REG_ALLOCATOR"
83#endif
84
85
86/*********************************************************************************************************************************
87* Code emitters for flushing pending guest register writes and sanity checks *
88*********************************************************************************************************************************/
89
90#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
91
92# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
93/**
94 * Updates IEMCPU::uPcUpdatingDebug.
95 */
96DECL_INLINE_THROW(uint32_t) iemNativeEmitPcDebugAdd(PIEMRECOMPILERSTATE pReNative, uint32_t off, int64_t offDisp, uint8_t cBits)
97{
98# ifdef RT_ARCH_AMD64
99 if (pReNative->Core.fDebugPcInitialized && cBits >= 32)
100 {
101 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
102 if ((int32_t)offDisp == offDisp || cBits != 64)
103 {
104 /* add [q]word [pVCpu->iem.s.uPcUpdatingDebug], imm32/imm8 */
105 if (cBits == 64)
106 pCodeBuf[off++] = X86_OP_REX_W;
107 pCodeBuf[off++] = (int8_t)offDisp == offDisp ? 0x83 : 0x81;
108 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
109 if ((int8_t)offDisp == offDisp)
110 pCodeBuf[off++] = (int8_t)offDisp;
111 else
112 {
113 *(int32_t *)&pCodeBuf[off] = (int32_t)offDisp;
114 off += sizeof(int32_t);
115 }
116 }
117 else
118 {
119 /* mov tmp0, imm64 */
120 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, IEMNATIVE_REG_FIXED_TMP0, offDisp);
121
122 /* add [pVCpu->iem.s.uPcUpdatingDebug], tmp0 */
123 if (cBits == 64)
124 pCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 >= 8 ? X86_OP_REX_R : 0);
125 else if (IEMNATIVE_REG_FIXED_TMP0 >= 8)
126 pCodeBuf[off++] = X86_OP_REX_R;
127 pCodeBuf[off++] = 0x01;
128 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, IEMNATIVE_REG_FIXED_TMP0 & 7,
129 RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
130 }
131 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
132 return off;
133 }
134# endif
135
136 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
137 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, RT_ARCH_VAL == RT_ARCH_VAL_AMD64 ? 32 : 12);
138
139 if (pReNative->Core.fDebugPcInitialized)
140 {
141 Log4(("uPcUpdatingDebug+=%ld cBits=%d off=%#x\n", offDisp, cBits, off));
142 off = iemNativeEmitLoadGprFromVCpuU64Ex(pCodeBuf, off, idxTmpReg, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
143 }
144 else
145 {
146 Log4(("uPcUpdatingDebug=rip+%ld cBits=%d off=%#x\n", offDisp, cBits, off));
147 pReNative->Core.fDebugPcInitialized = true;
148 off = iemNativeEmitLoadGprFromVCpuU64Ex(pCodeBuf, off, idxTmpReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
149 }
150
151 if (cBits == 64)
152 off = iemNativeEmitAddGprImmEx(pCodeBuf, off, idxTmpReg, offDisp, IEMNATIVE_REG_FIXED_TMP0);
153 else
154 {
155 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxTmpReg, (int32_t)offDisp, IEMNATIVE_REG_FIXED_TMP0);
156 if (cBits == 16)
157 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxTmpReg, UINT16_MAX);
158 }
159
160 off = iemNativeEmitStoreGprToVCpuU64Ex(pCodeBuf, off, idxTmpReg, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug),
161 IEMNATIVE_REG_FIXED_TMP0);
162
163 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
164 iemNativeRegFreeTmp(pReNative, idxTmpReg);
165 return off;
166}
167
168
169# elif defined(IEMNATIVE_REG_FIXED_PC_DBG)
170DECL_INLINE_THROW(uint32_t) iemNativePcAdjustCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
171{
172 /* Compare the shadow with the context value, they should match. */
173 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, IEMNATIVE_REG_FIXED_PC_DBG);
174 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, pReNative->Core.offPc);
175 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, kIemNativeGstReg_Pc);
176 return off;
177}
178# endif
179
180#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
181
182/**
183 * Flushes delayed write of a specific guest register.
184 *
185 * This must be called prior to calling CImpl functions and any helpers that use
186 * the guest state (like raising exceptions) and such.
187 *
188 * This optimization has not yet been implemented. The first target would be
189 * RIP updates, since these are the most common ones.
190 */
191DECL_INLINE_THROW(uint32_t)
192iemNativeRegFlushPendingSpecificWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREGREF enmClass, uint8_t idxReg)
193{
194#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
195 /* If for whatever reason it is possible to reference the PC register at some point we need to do the writeback here first. */
196#endif
197
198#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
199#if 0 /** @todo r=aeichner EFLAGS writeback delay. */
200 if ( enmClass == kIemNativeGstRegRef_EFlags
201 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(kIemNativeGstReg_EFlags))
202 off = iemNativeRegFlushPendingWrite(pReNative, off, kIemNativeGstReg_EFlags);
203#else
204 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(kIemNativeGstReg_EFlags)));
205#endif
206
207 if ( enmClass == kIemNativeGstRegRef_Gpr
208 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxReg))
209 off = iemNativeRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTREG_GPR(idxReg));
210#endif
211
212#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
213 if ( enmClass == kIemNativeGstRegRef_XReg
214 && pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxReg))
215 {
216 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxReg));
217 /* Flush the shadows as the register needs to be reloaded (there is no guarantee right now, that the referenced register doesn't change). */
218 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxReg];
219
220 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off);
221 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxReg)));
222 }
223#endif
224 RT_NOREF(pReNative, enmClass, idxReg);
225 return off;
226}
227
228
229
230/*********************************************************************************************************************************
231* Emitters for IEM_MC_BEGIN_EX and IEM_MC_END. *
232*********************************************************************************************************************************/
233
234#undef IEM_MC_BEGIN /* unused */
235#define IEM_MC_BEGIN_EX(a_fMcFlags, a_fCImplFlags, a_cArgsIncludingHidden) \
236 { \
237 Assert(pReNative->Core.bmVars == 0); \
238 Assert(pReNative->Core.u64ArgVars == UINT64_MAX); \
239 Assert(pReNative->Core.bmStack == 0); \
240 pReNative->fMc = (a_fMcFlags); \
241 pReNative->fCImpl = (a_fCImplFlags); \
242 pReNative->cArgsX = (a_cArgsIncludingHidden)
243
244/** We have to get to the end in recompilation mode, as otherwise we won't
245 * generate code for all the IEM_MC_IF_XXX branches. */
246#define IEM_MC_END() \
247 iemNativeVarFreeAll(pReNative); \
248 } return off
249
250
251
252/*********************************************************************************************************************************
253* Liveness Stubs *
254*********************************************************************************************************************************/
255
256#define IEM_MC_LIVENESS_GREG_INPUT(a_iGReg) ((void)0)
257#define IEM_MC_LIVENESS_GREG_CLOBBER(a_iGReg) ((void)0)
258#define IEM_MC_LIVENESS_GREG_MODIFY(a_iGReg) ((void)0)
259
260#define IEM_MC_LIVENESS_MREG_INPUT(a_iMReg) ((void)0)
261#define IEM_MC_LIVENESS_MREG_CLOBBER(a_iMReg) ((void)0)
262#define IEM_MC_LIVENESS_MREG_MODIFY(a_iMReg) ((void)0)
263
264#define IEM_MC_LIVENESS_XREG_INPUT(a_iXReg) ((void)0)
265#define IEM_MC_LIVENESS_XREG_CLOBBER(a_iXReg) ((void)0)
266#define IEM_MC_LIVENESS_XREG_MODIFY(a_iXReg) ((void)0)
267
268#define IEM_MC_LIVENESS_MXCSR_INPUT() ((void)0)
269#define IEM_MC_LIVENESS_MXCSR_CLOBBER() ((void)0)
270#define IEM_MC_LIVENESS_MXCSR_MODIFY() ((void)0)
271
272
273/*********************************************************************************************************************************
274* Native Emitter Support. *
275*********************************************************************************************************************************/
276
277#define IEM_MC_NATIVE_IF(a_fSupportedHosts) if (RT_ARCH_VAL & (a_fSupportedHosts)) {
278
279#define IEM_MC_NATIVE_ELSE() } else {
280
281#define IEM_MC_NATIVE_ENDIF() } ((void)0)
282
283
284#define IEM_MC_NATIVE_EMIT_0(a_fnEmitter) \
285 off = a_fnEmitter(pReNative, off)
286
287#define IEM_MC_NATIVE_EMIT_1(a_fnEmitter, a0) \
288 off = a_fnEmitter(pReNative, off, (a0))
289
290#define IEM_MC_NATIVE_EMIT_2(a_fnEmitter, a0, a1) \
291 off = a_fnEmitter(pReNative, off, (a0), (a1))
292
293#define IEM_MC_NATIVE_EMIT_2_EX(a_fnEmitter, a0, a1) \
294 off = a_fnEmitter(pReNative, off, pCallEntry->idxInstr, (a0), (a1))
295
296#define IEM_MC_NATIVE_EMIT_3(a_fnEmitter, a0, a1, a2) \
297 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2))
298
299#define IEM_MC_NATIVE_EMIT_4(a_fnEmitter, a0, a1, a2, a3) \
300 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3))
301
302#define IEM_MC_NATIVE_EMIT_5(a_fnEmitter, a0, a1, a2, a3, a4) \
303 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4))
304
305#define IEM_MC_NATIVE_EMIT_6(a_fnEmitter, a0, a1, a2, a3, a4, a5) \
306 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5))
307
308#define IEM_MC_NATIVE_EMIT_7(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6) \
309 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6))
310
311#define IEM_MC_NATIVE_EMIT_8(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6, a7) \
312 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6), (a7))
313
314
315#ifndef RT_ARCH_AMD64
316# define IEM_MC_NATIVE_SET_AMD64_HOST_REG_FOR_LOCAL(a_VarNm, a_idxHostReg) ((void)0)
317#else
318/** @note This is a naive approach that ASSUMES that the register isn't
319 * allocated, so it only works safely for the first allocation(s) in
320 * a MC block. */
321# define IEM_MC_NATIVE_SET_AMD64_HOST_REG_FOR_LOCAL(a_VarNm, a_idxHostReg) \
322 off = iemNativeVarSetAmd64HostRegisterForLocal(pReNative, off, a_VarNm, a_idxHostReg)
323
324DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg,
325 uint32_t off, bool fAllocated);
326
327DECL_INLINE_THROW(uint32_t)
328iemNativeVarSetAmd64HostRegisterForLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t idxHstReg)
329{
330 Log12(("iemNativeVarSetAmd64HostRegisterForLocal: idxVar=%#x idxHstReg=%s (%#x) off=%#x\n", idxVar, g_apszIemNativeHstRegNames[idxHstReg], idxHstReg, off));
331 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
332 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg))); /* iemNativeVarRegisterSet does a throw/longjmp on this */
333
334# ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
335 /* Must flush the register if it hold pending writes. */
336 if ( (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
337 && (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows) )
338 off = iemNativeRegFlushDirtyGuest(pReNative, off, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
339# endif
340
341 iemNativeVarRegisterSet(pReNative, idxVar, idxHstReg, off, false /*fAllocated*/);
342 return off;
343}
344
345#endif /* RT_ARCH_AMD64 */
346
347
348
349/*********************************************************************************************************************************
350* Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX) *
351*********************************************************************************************************************************/
352
353#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl) \
354 pReNative->fMc = 0; \
355 pReNative->fCImpl = (a_fFlags); \
356 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, \
357 a_cbInstr) /** @todo not used ... */
358
359
360#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
361 pReNative->fMc = 0; \
362 pReNative->fCImpl = (a_fFlags); \
363 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
364
365DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off,
366 uint8_t idxInstr, uint64_t a_fGstShwFlush,
367 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
368{
369 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
370}
371
372
373#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
374 pReNative->fMc = 0; \
375 pReNative->fCImpl = (a_fFlags); \
376 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
377 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
378
379DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off,
380 uint8_t idxInstr, uint64_t a_fGstShwFlush,
381 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
382{
383 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
384}
385
386
387#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
388 pReNative->fMc = 0; \
389 pReNative->fCImpl = (a_fFlags); \
390 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
391 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
392
393DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off,
394 uint8_t idxInstr, uint64_t a_fGstShwFlush,
395 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
396 uint64_t uArg2)
397{
398 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
399}
400
401
402
403/*********************************************************************************************************************************
404* Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX) *
405*********************************************************************************************************************************/
406
407/** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
408 * and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
409DECL_INLINE_THROW(uint32_t)
410iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
411{
412 /*
413 * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
414 * return with special status code and make the execution loop deal with
415 * this. If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
416 * exception and won't continue execution. While CPUMCTX_DBG_DBGF_MASK
417 * could continue w/o interruption, it probably will drop into the
418 * debugger, so not worth the effort of trying to services it here and we
419 * just lump it in with the handling of the others.
420 *
421 * To simplify the code and the register state management even more (wrt
422 * immediate in AND operation), we always update the flags and skip the
423 * extra check associated conditional jump.
424 */
425 AssertCompile( (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
426 <= UINT32_MAX);
427#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
428 AssertMsg( pReNative->idxCurCall == 0
429 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1],
430 IEMLIVENESSBIT_IDX_EFL_OTHER)),
431 ("Efl_Other - %u\n", iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1],
432 IEMLIVENESSBIT_IDX_EFL_OTHER)));
433#endif
434
435 /*
436 * As this code can break out of the execution loop when jumping to the ReturnWithFlags label
437 * any pending register writes must be flushed.
438 */
439 off = iemNativeRegFlushPendingWrites(pReNative, off);
440
441 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
442 kIemNativeGstRegUse_ForUpdate, false /*fNoVolatileRegs*/,
443 true /*fSkipLivenessAssert*/);
444 off = iemNativeEmitTestAnyBitsInGprAndTbExitIfAnySet(pReNative, off, idxEflReg,
445 X86_EFL_TF | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK,
446 kIemNativeLabelType_ReturnWithFlags);
447 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
448 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
449
450 /* Free but don't flush the EFLAGS register. */
451 iemNativeRegFreeTmp(pReNative, idxEflReg);
452
453 return off;
454}
455
456
457/** Helper for iemNativeEmitFinishInstructionWithStatus. */
458DECLINLINE(RTGCPHYS) iemNativeCallEntryToGCPhysPc(PCIEMTB pTb, PCIEMTHRDEDCALLENTRY pCallEntry)
459{
460 unsigned const offOpcodes = pCallEntry->offOpcode;
461 unsigned const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
462 for (unsigned idxRange = 0; idxRange < cRanges; idxRange++)
463 {
464 unsigned const offRange = offOpcodes - (unsigned)pTb->aRanges[idxRange].offOpcodes;
465 if (offRange < (unsigned)pTb->aRanges[idxRange].cbOpcodes)
466 return iemTbGetRangePhysPageAddr(pTb, idxRange) + offRange + pTb->aRanges[idxRange].offPhysPage;
467 }
468 AssertFailedReturn(NIL_RTGCPHYS);
469}
470
471
472/** The VINF_SUCCESS dummy. */
473template<int const a_rcNormal, bool const a_fIsJump>
474DECL_FORCE_INLINE_THROW(uint32_t)
475iemNativeEmitFinishInstructionWithStatus(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry,
476 int32_t const offJump)
477{
478 AssertCompile(a_rcNormal == VINF_SUCCESS || a_rcNormal == VINF_IEM_REEXEC_BREAK);
479 if (a_rcNormal != VINF_SUCCESS)
480 {
481#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
482 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
483#else
484 RT_NOREF_PV(pCallEntry);
485#endif
486
487 /* As this code returns from the TB any pending register writes must be flushed. */
488 off = iemNativeRegFlushPendingWrites(pReNative, off);
489
490 /*
491 * If we're in a conditional, mark the current branch as exiting so we
492 * can disregard its state when we hit the IEM_MC_ENDIF.
493 */
494 iemNativeMarkCurCondBranchAsExiting(pReNative);
495
496 /*
497 * Use the lookup table for getting to the next TB quickly.
498 * Note! In this code path there can only be one entry at present.
499 */
500 uint8_t const idxTbLookupFirst = IEM_TB_LOOKUP_TAB_GET_IDX(pCallEntry->uTbLookup);
501 PCIEMTB const pTbOrg = pReNative->pTbOrg;
502 Assert(idxTbLookupFirst < pTbOrg->cTbLookupEntries);
503 Assert(IEM_TB_LOOKUP_TAB_GET_SIZE(pCallEntry->uTbLookup) == 1);
504
505#if 0
506 /* Update IEMCPU::ppTbLookupEntryR3 to get the best lookup effect. */
507 PIEMTB * const ppTbLookupFirst = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTbOrg, idxTbLookupFirst);
508 Assert(IEM_TB_LOOKUP_TAB_GET_SIZE(pCallEntry->uTbLookup) == 1); /* large stuff later/never */
509 off = iemNativeEmitStoreImmToVCpuU64(pReNative, off, (uintptr_t)ppTbLookupFirst,
510 RT_UOFFSETOF(VMCPU, iem.s.ppTbLookupEntryR3));
511
512 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnBreak);
513
514#else
515 /* Load the index as argument #1 for the helper call at the given label. */
516 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxTbLookupFirst);
517
518 /*
519 * Figure out the physical address of the current instruction and see
520 * whether the next instruction we're about to execute is in the same
521 * page so we by can optimistically skip TLB loading.
522 *
523 * - This is safe for all cases in FLAT mode.
524 * - In segmentmented modes it is complicated, given that a negative
525 * jump may underflow EIP and a forward jump may overflow or run into
526 * CS.LIM and triggering a #GP. The only thing we can get away with
527 * now at compile time is forward jumps w/o CS.LIM checks, since the
528 * lack of CS.LIM checks means we're good for the entire physical page
529 * we're executing on and another 15 bytes before we run into CS.LIM.
530 */
531 if ( IEM_F_MODE_X86_IS_FLAT(pReNative->fExec)
532# if 0 /** @todo breaks on IP/EIP/RIP wraparound tests in bs3-cpu-weird-1. See also iemNativeHlpReturnBreakViaLookup. */
533 || !(pTbOrg->fFlags & IEMTB_F_CS_LIM_CHECKS)
534# endif
535 )
536 {
537 RTGCPHYS const GCPhysPcCurrent = iemNativeCallEntryToGCPhysPc(pTbOrg, pCallEntry);
538 RTGCPHYS const GCPhysPcNext = GCPhysPcCurrent + pCallEntry->cbOpcode + (int64_t)(a_fIsJump ? offJump : 0);
539 if ( (GCPhysPcNext >> GUEST_PAGE_SHIFT) == (GCPhysPcCurrent >> GUEST_PAGE_SHIFT)
540 && GUEST_PAGE_SIZE - (GCPhysPcCurrent & GUEST_PAGE_OFFSET_MASK) >= pCallEntry->cbOpcode /* 0xfff: je -56h */ )
541
542 {
543 /* Load the next GCPhysPc into the 3rd argument for the helper call. */
544 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, GCPhysPcNext);
545
546 /* Load the key lookup flags into the 2nd argument for the helper call.
547 - This is safe wrt CS limit checking since we're only here for FLAT modes.
548 - ASSUMING that this isn't a STI or POPF instruction, we can exclude any
549 interrupt shadow.
550 - The NMI inhibiting is more questionable, though... */
551 /** @todo We don't implement NMI blocking atm, except via VT-x/AMD-V.
552 * Should we copy it into fExec to simplify this? OTOH, it's just a
553 * couple of extra instructions if EFLAGS are already in a register. */
554 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG,
555 (pReNative->fExec & IEMTB_F_KEY_MASK) | IEMTB_F_TYPE_NATIVE);
556
557 if (pReNative->idxLastCheckIrqCallNo != UINT32_MAX)
558 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnBreakViaLookup);
559 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnBreakViaLookupWithIrq);
560 }
561 }
562 if (pReNative->idxLastCheckIrqCallNo != UINT32_MAX)
563 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnBreakViaLookupWithTlb);
564 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnBreakViaLookupWithTlbAndIrq);
565#endif
566 }
567 return off;
568}
569
570
571#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr, a_rcNormal) \
572 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
573 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
574
575#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr, a_rcNormal) \
576 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
577 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
578 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
579
580/** Same as iemRegAddToRip64AndFinishingNoFlags. */
581DECL_INLINE_THROW(uint32_t)
582iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
583{
584#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
585# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
586 if (!pReNative->Core.offPc)
587 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
588# endif
589
590 /* Allocate a temporary PC register. */
591 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
592
593 /* Perform the addition and store the result. */
594 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
595 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
596
597 /* Free but don't flush the PC register. */
598 iemNativeRegFreeTmp(pReNative, idxPcReg);
599#endif
600
601#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
602 pReNative->Core.offPc += cbInstr;
603 Log4(("offPc=%#RX64 cbInstr=%#x off=%#x\n", pReNative->Core.offPc, cbInstr, off));
604# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
605 off = iemNativeEmitPcDebugAdd(pReNative, off, cbInstr, 64);
606 off = iemNativeEmitPcDebugCheck(pReNative, off);
607# elif defined(IEMNATIVE_REG_FIXED_PC_DBG)
608 off = iemNativePcAdjustCheck(pReNative, off);
609# endif
610 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
611#endif
612
613 return off;
614}
615
616
617#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr, a_rcNormal) \
618 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
619 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
620
621#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr, a_rcNormal) \
622 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
623 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
624 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
625
626/** Same as iemRegAddToEip32AndFinishingNoFlags. */
627DECL_INLINE_THROW(uint32_t)
628iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
629{
630#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
631# ifdef IEMNATIVE_REG_FIXED_PC_DBG
632 if (!pReNative->Core.offPc)
633 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
634# endif
635
636 /* Allocate a temporary PC register. */
637 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
638
639 /* Perform the addition and store the result. */
640 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
641 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
642
643 /* Free but don't flush the PC register. */
644 iemNativeRegFreeTmp(pReNative, idxPcReg);
645#endif
646
647#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
648 pReNative->Core.offPc += cbInstr;
649 Log4(("offPc=%#RX64 cbInstr=%#x off=%#x\n", pReNative->Core.offPc, cbInstr, off));
650# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
651 off = iemNativeEmitPcDebugAdd(pReNative, off, cbInstr, 32);
652 off = iemNativeEmitPcDebugCheck(pReNative, off);
653# elif defined(IEMNATIVE_REG_FIXED_PC_DBG)
654 off = iemNativePcAdjustCheck(pReNative, off);
655# endif
656 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
657#endif
658
659 return off;
660}
661
662
663#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr, a_rcNormal) \
664 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
665 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
666
667#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr, a_rcNormal) \
668 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
669 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
670 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
671
672/** Same as iemRegAddToIp16AndFinishingNoFlags. */
673DECL_INLINE_THROW(uint32_t)
674iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
675{
676#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
677# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
678 if (!pReNative->Core.offPc)
679 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
680# endif
681
682 /* Allocate a temporary PC register. */
683 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
684
685 /* Perform the addition and store the result. */
686 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
687 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
688 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
689
690 /* Free but don't flush the PC register. */
691 iemNativeRegFreeTmp(pReNative, idxPcReg);
692#endif
693
694#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
695 pReNative->Core.offPc += cbInstr;
696 Log4(("offPc=%#RX64 cbInstr=%#x off=%#x\n", pReNative->Core.offPc, cbInstr, off));
697# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
698 off = iemNativeEmitPcDebugAdd(pReNative, off, cbInstr, 16);
699 off = iemNativeEmitPcDebugCheck(pReNative, off);
700# elif defined(IEMNATIVE_REG_FIXED_PC_DBG)
701 off = iemNativePcAdjustCheck(pReNative, off);
702# endif
703 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
704#endif
705
706 return off;
707}
708
709
710/*********************************************************************************************************************************
711* Common code for changing PC/RIP/EIP/IP. *
712*********************************************************************************************************************************/
713
714/**
715 * Emits code to check if the content of @a idxAddrReg is a canonical address,
716 * raising a \#GP(0) if it isn't.
717 *
718 * @returns New code buffer offset, UINT32_MAX on failure.
719 * @param pReNative The native recompile state.
720 * @param off The code buffer offset.
721 * @param idxAddrReg The host register with the address to check.
722 * @param idxInstr The current instruction.
723 */
724DECL_FORCE_INLINE_THROW(uint32_t)
725iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
726{
727 /*
728 * Make sure we don't have any outstanding guest register writes as we may
729 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
730 */
731 off = iemNativeRegFlushPendingWrites(pReNative, off);
732
733#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
734 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
735#else
736 RT_NOREF(idxInstr);
737#endif
738
739#ifdef RT_ARCH_AMD64
740 /*
741 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
742 * return raisexcpt();
743 * ---- this variant avoid loading a 64-bit immediate, but is an instruction longer.
744 */
745 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
746
747 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
748 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
749 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
750 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
751 off = iemNativeEmitJnzTbExit(pReNative, off, kIemNativeLabelType_RaiseGp0);
752
753 iemNativeRegFreeTmp(pReNative, iTmpReg);
754
755#elif defined(RT_ARCH_ARM64)
756 /*
757 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
758 * return raisexcpt();
759 * ----
760 * mov x1, 0x800000000000
761 * add x1, x0, x1
762 * cmp xzr, x1, lsr 48
763 * b.ne .Lraisexcpt
764 */
765 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
766
767 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
768 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
769 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
770 off = iemNativeEmitJnzTbExit(pReNative, off, kIemNativeLabelType_RaiseGp0);
771
772 iemNativeRegFreeTmp(pReNative, iTmpReg);
773
774#else
775# error "Port me"
776#endif
777 return off;
778}
779
780
781/**
782 * Emits code to check if the content of @a idxAddrReg is a canonical address,
783 * raising a \#GP(0) if it isn't.
784 *
785 * Caller makes sure everything is flushed, except maybe PC.
786 *
787 * @returns New code buffer offset, UINT32_MAX on failure.
788 * @param pReNative The native recompile state.
789 * @param off The code buffer offset.
790 * @param idxAddrReg The host register with the address to check.
791 * @param offDisp The relative displacement that has already been
792 * added to idxAddrReg and must be subtracted if
793 * raising a \#GP(0).
794 * @param idxInstr The current instruction.
795 */
796DECL_FORCE_INLINE_THROW(uint32_t)
797iemNativeEmitCheckGprCanonicalMaybeRaiseGp0WithDisp(PIEMRECOMPILERSTATE pReNative, uint32_t off,
798 uint8_t idxAddrReg, int64_t offDisp, uint8_t idxInstr)
799{
800#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
801 Assert(pReNative->Core.bmGstRegShadowDirty == 0);
802#endif
803
804#ifdef RT_ARCH_AMD64
805 /*
806 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
807 * return raisexcpt();
808 * ---- this variant avoid loading a 64-bit immediate, but is an instruction longer.
809 */
810 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
811
812 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
813 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
814 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
815 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
816
817#elif defined(RT_ARCH_ARM64)
818 /*
819 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
820 * return raisexcpt();
821 * ----
822 * mov x1, 0x800000000000
823 * add x1, x0, x1
824 * cmp xzr, x1, lsr 48
825 * b.ne .Lraisexcpt
826 */
827 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
828
829 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
830 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
831 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
832#else
833# error "Port me"
834#endif
835
836 /* Jump to the #GP code (hoping static prediction considers forward branches as not-taken). */
837 uint32_t const offFixup1 = off;
838 off = iemNativeEmitJnzToFixed(pReNative, off, off /*8-bit jump suffices*/);
839
840 /* jump .Lnoexcept; Skip the #GP code. */
841 uint32_t const offFixup2 = off;
842 off = iemNativeEmitJmpToFixed(pReNative, off, off /*8-bit jump suffices*/);
843
844 /* .Lraisexcpt: */
845 iemNativeFixupFixedJump(pReNative, offFixup1, off);
846#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
847 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr), iTmpReg);
848#else
849 RT_NOREF(idxInstr);
850#endif
851
852 /* Undo the PC adjustment and store the old PC value. */
853 off = iemNativeEmitSubGprImm(pReNative, off, idxAddrReg, offDisp, iTmpReg);
854 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxAddrReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
855
856 off = iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_RaiseGp0, false /*fActuallyExitingTb*/);
857
858 /* .Lnoexcept: */
859 iemNativeFixupFixedJump(pReNative, offFixup2, off);
860
861 iemNativeRegFreeTmp(pReNative, iTmpReg);
862 return off;
863}
864
865
866/**
867 * Emits code to check if the content of @a idxAddrReg is a canonical address,
868 * raising a \#GP(0) if it isn't.
869 *
870 * Caller makes sure everything is flushed, except maybe PC.
871 *
872 * @returns New code buffer offset, UINT32_MAX on failure.
873 * @param pReNative The native recompile state.
874 * @param off The code buffer offset.
875 * @param idxAddrReg The host register with the address to check.
876 * @param idxOldPcReg Register holding the old PC that offPc is relative
877 * to if available, otherwise UINT8_MAX.
878 * @param idxInstr The current instruction.
879 */
880DECL_FORCE_INLINE_THROW(uint32_t)
881iemNativeEmitCheckGprCanonicalMaybeRaiseGp0WithOldPc(PIEMRECOMPILERSTATE pReNative, uint32_t off,
882 uint8_t idxAddrReg, uint8_t idxOldPcReg, uint8_t idxInstr)
883{
884#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
885 Assert(pReNative->Core.bmGstRegShadowDirty == 0);
886#endif
887
888#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
889# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
890 if (!pReNative->Core.offPc)
891# endif
892 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
893#else
894 RT_NOREF(idxInstr);
895#endif
896
897#ifdef RT_ARCH_AMD64
898 /*
899 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
900 * return raisexcpt();
901 * ---- this variant avoid loading a 64-bit immediate, but is an instruction longer.
902 */
903 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
904
905 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
906 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
907 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
908 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
909
910#elif defined(RT_ARCH_ARM64)
911 /*
912 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
913 * return raisexcpt();
914 * ----
915 * mov x1, 0x800000000000
916 * add x1, x0, x1
917 * cmp xzr, x1, lsr 48
918 * b.ne .Lraisexcpt
919 */
920 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
921
922 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
923 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
924 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
925#else
926# error "Port me"
927#endif
928
929#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
930 if (pReNative->Core.offPc)
931 {
932 /** @todo On x86, it is said that conditional jumps forward are statically
933 * predicited as not taken, so this isn't a very good construct.
934 * Investigate whether it makes sense to invert it and add another
935 * jump. Also, find out wtf the static predictor does here on arm! */
936 uint32_t const offFixup = off;
937 off = iemNativeEmitJzToFixed(pReNative, off, off + 16 /*8-bit suffices*/);
938
939 /* .Lraisexcpt: */
940# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
941 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr), iTmpReg);
942# endif
943 /* We need to update cpum.GstCtx.rip. */
944 if (idxOldPcReg == UINT8_MAX)
945 {
946 idxOldPcReg = iTmpReg;
947 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxOldPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
948 }
949 off = iemNativeEmitAddGprImm(pReNative, off, idxOldPcReg, pReNative->Core.offPc);
950 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxOldPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
951
952 off = iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_RaiseGp0, false /*fActuallyExitingTb*/);
953 iemNativeFixupFixedJump(pReNative, offFixup, off);
954 }
955 else
956#endif
957 off = iemNativeEmitJnzTbExit(pReNative, off, kIemNativeLabelType_RaiseGp0);
958
959 iemNativeRegFreeTmp(pReNative, iTmpReg);
960
961 return off;
962}
963
964
965/**
966 * Emits code to check if that the content of @a idxAddrReg is within the limit
967 * of CS, raising a \#GP(0) if it isn't.
968 *
969 * @returns New code buffer offset; throws VBox status code on error.
970 * @param pReNative The native recompile state.
971 * @param off The code buffer offset.
972 * @param idxAddrReg The host register (32-bit) with the address to
973 * check.
974 * @param idxInstr The current instruction.
975 */
976DECL_FORCE_INLINE_THROW(uint32_t)
977iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
978 uint8_t idxAddrReg, uint8_t idxInstr)
979{
980 /*
981 * Make sure we don't have any outstanding guest register writes as we may
982 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
983 */
984 off = iemNativeRegFlushPendingWrites(pReNative, off);
985
986#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
987 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
988#else
989 RT_NOREF(idxInstr);
990#endif
991
992 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
993 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
994 kIemNativeGstRegUse_ReadOnly);
995
996 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
997 off = iemNativeEmitJaTbExit(pReNative, off, kIemNativeLabelType_RaiseGp0);
998
999 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
1000 return off;
1001}
1002
1003
1004
1005
1006/**
1007 * Emits code to check if that the content of @a idxAddrReg is within the limit
1008 * of CS, raising a \#GP(0) if it isn't.
1009 *
1010 * Caller makes sure everything is flushed, except maybe PC.
1011 *
1012 * @returns New code buffer offset; throws VBox status code on error.
1013 * @param pReNative The native recompile state.
1014 * @param off The code buffer offset.
1015 * @param idxAddrReg The host register (32-bit) with the address to
1016 * check.
1017 * @param idxOldPcReg Register holding the old PC that offPc is relative
1018 * to if available, otherwise UINT8_MAX.
1019 * @param idxInstr The current instruction.
1020 */
1021DECL_FORCE_INLINE_THROW(uint32_t)
1022iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0WithOldPc(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1023 uint8_t idxAddrReg, uint8_t idxOldPcReg, uint8_t idxInstr)
1024{
1025#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
1026 Assert(pReNative->Core.bmGstRegShadowDirty == 0);
1027#endif
1028
1029#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1030# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1031 if (!pReNative->Core.offPc)
1032# endif
1033 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1034#else
1035 RT_NOREF(idxInstr);
1036#endif
1037
1038 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
1039 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
1040 kIemNativeGstRegUse_ReadOnly);
1041
1042 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
1043#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1044 if (pReNative->Core.offPc)
1045 {
1046 uint32_t const offFixup = off;
1047 off = iemNativeEmitJbeToFixed(pReNative, off, off + 16 /*8-bit suffices*/);
1048
1049 /* Raising a GP(0), but first we need to update cpum.GstCtx.rip. */
1050 if (idxOldPcReg == UINT8_MAX)
1051 {
1052 idxOldPcReg = idxAddrReg;
1053 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxOldPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1054 }
1055 off = iemNativeEmitAddGprImm(pReNative, off, idxOldPcReg, pReNative->Core.offPc);
1056 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxOldPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1057# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1058 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1059# endif
1060 off = iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_RaiseGp0, false /*fActuallyExitingTb*/);
1061 iemNativeFixupFixedJump(pReNative, offFixup, off);
1062 }
1063 else
1064#endif
1065 off = iemNativeEmitJaTbExit(pReNative, off, kIemNativeLabelType_RaiseGp0);
1066
1067 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
1068 return off;
1069}
1070
1071
1072/*********************************************************************************************************************************
1073* Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX). *
1074*********************************************************************************************************************************/
1075
1076#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1077 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1078 (a_enmEffOpSize), pCallEntry->idxInstr); \
1079 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1080
1081#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1082 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1083 (a_enmEffOpSize), pCallEntry->idxInstr); \
1084 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1085 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1086
1087#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr, a_rcNormal) \
1088 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1089 IEMMODE_16BIT, pCallEntry->idxInstr); \
1090 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1091
1092#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
1093 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1094 IEMMODE_16BIT, pCallEntry->idxInstr); \
1095 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1096 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1097
1098#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr, a_rcNormal) \
1099 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (a_i32), \
1100 IEMMODE_64BIT, pCallEntry->idxInstr); \
1101 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1102
1103#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
1104 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (a_i32), \
1105 IEMMODE_64BIT, pCallEntry->idxInstr); \
1106 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1107 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1108
1109
1110#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_INTRAPG(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1111 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1112 (a_enmEffOpSize), pCallEntry->idxInstr); \
1113 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1114
1115#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_INTRAPG_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1116 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1117 (a_enmEffOpSize), pCallEntry->idxInstr); \
1118 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1119 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1120
1121#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_INTRAPG(a_i16, a_cbInstr, a_rcNormal) \
1122 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1123 IEMMODE_16BIT, pCallEntry->idxInstr); \
1124 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1125
1126#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_INTRAPG_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
1127 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1128 IEMMODE_16BIT, pCallEntry->idxInstr); \
1129 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1130 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1131
1132#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_INTRAPG(a_i32, a_cbInstr, a_rcNormal) \
1133 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (a_i32), \
1134 IEMMODE_64BIT, pCallEntry->idxInstr); \
1135 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1136
1137#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_INTRAPG_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
1138 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (a_i32), \
1139 IEMMODE_64BIT, pCallEntry->idxInstr); \
1140 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1141 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1142
1143/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
1144 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
1145 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
1146template<bool const a_fWithinPage>
1147DECL_INLINE_THROW(uint32_t)
1148iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
1149 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
1150{
1151 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
1152#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1153 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1154 if (a_fWithinPage && enmEffOpSize == IEMMODE_64BIT)
1155 {
1156 /* No #GP checking required, just update offPc and get on with it. */
1157 pReNative->Core.offPc += (int64_t)offDisp + cbInstr;
1158# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1159 off = iemNativeEmitPcDebugAdd(pReNative, off, (int64_t)offDisp + cbInstr, enmEffOpSize == IEMMODE_64BIT ? 64 : 16);
1160# endif
1161 }
1162 else
1163#endif
1164 {
1165 /* Flush all but PC iff we're doing a 64-bit update here and this isn't within a page.. */
1166 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT && !a_fWithinPage))
1167 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc) /*fGstShwExcept*/);
1168
1169 /* Allocate a temporary PC register. */
1170 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1171 kIemNativeGstRegUse_ForUpdate);
1172
1173 /* Perform the addition. */
1174 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr + pReNative->Core.offPc);
1175
1176 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
1177 {
1178 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't.
1179 We can skip this if the target is within the same page. */
1180 if (!a_fWithinPage)
1181 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0WithDisp(pReNative, off, idxPcReg,
1182 (int64_t)offDisp + cbInstr, idxInstr);
1183 }
1184 else
1185 {
1186 /* Just truncate the result to 16-bit IP. */
1187 Assert(enmEffOpSize == IEMMODE_16BIT);
1188 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
1189 }
1190
1191#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1192# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1193 off = iemNativeEmitPcDebugAdd(pReNative, off, (int64_t)offDisp + cbInstr, enmEffOpSize == IEMMODE_64BIT ? 64 : 16);
1194 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
1195# endif
1196 /* Since we've already got the new PC value in idxPcReg, we can just as
1197 well write it out and reset offPc to zero. Otherwise, we'd need to use
1198 a copy the shadow PC, which will cost another move instruction here. */
1199# if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || defined(LOG_ENABLED) || defined(VBOX_WITH_STATISTICS)
1200 uint8_t const idxOldInstrPlusOne = pReNative->idxInstrPlusOneOfLastPcUpdate;
1201 pReNative->idxInstrPlusOneOfLastPcUpdate = RT_MAX(idxInstr + 1, idxOldInstrPlusOne);
1202 uint8_t const cInstrsSkipped = idxInstr <= idxOldInstrPlusOne ? 0 : idxInstr - idxOldInstrPlusOne;
1203 Log4(("iemNativeEmitRip64RelativeJumpAndFinishingNoFlags: offPc=%#RX64 -> 0; off=%#x; idxInstr=%u cInstrsSkipped=%u cCondDepth=%d\n",
1204 pReNative->Core.offPc, off, idxInstr, cInstrsSkipped, pReNative->cCondDepth));
1205 STAM_COUNTER_ADD(&pReNative->pVCpu->iem.s.StatNativePcUpdateDelayed, cInstrsSkipped);
1206# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1207 iemNativeDbgInfoAddNativeOffset(pReNative, off);
1208 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc, cInstrsSkipped);
1209# endif
1210# endif
1211 pReNative->Core.offPc = 0;
1212#endif
1213
1214 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1215
1216 /* Free but don't flush the PC register. */
1217 iemNativeRegFreeTmp(pReNative, idxPcReg);
1218 }
1219 return off;
1220}
1221
1222
1223#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1224 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1225 (a_enmEffOpSize), pCallEntry->idxInstr); \
1226 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1227
1228#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1229 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1230 (a_enmEffOpSize), pCallEntry->idxInstr); \
1231 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1232 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1233
1234#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr, a_rcNormal) \
1235 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1236 IEMMODE_16BIT, pCallEntry->idxInstr); \
1237 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1238
1239#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
1240 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1241 IEMMODE_16BIT, pCallEntry->idxInstr); \
1242 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1243 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1244
1245#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr, a_rcNormal) \
1246 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (a_i32), \
1247 IEMMODE_32BIT, pCallEntry->idxInstr); \
1248 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1249
1250#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
1251 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (a_i32), \
1252 IEMMODE_32BIT, pCallEntry->idxInstr); \
1253 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1254 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1255
1256
1257#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_FLAT(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1258 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1259 (a_enmEffOpSize), pCallEntry->idxInstr); \
1260 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1261
1262#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_FLAT_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1263 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1264 (a_enmEffOpSize), pCallEntry->idxInstr); \
1265 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1266 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1267
1268#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_FLAT(a_i16, a_cbInstr, a_rcNormal) \
1269 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1270 IEMMODE_16BIT, pCallEntry->idxInstr); \
1271 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1272
1273#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_FLAT_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
1274 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1275 IEMMODE_16BIT, pCallEntry->idxInstr); \
1276 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1277 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1278
1279#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_FLAT(a_i32, a_cbInstr, a_rcNormal) \
1280 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (a_i32), \
1281 IEMMODE_32BIT, pCallEntry->idxInstr); \
1282 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1283
1284#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_FLAT_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
1285 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (a_i32), \
1286 IEMMODE_32BIT, pCallEntry->idxInstr); \
1287 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1288 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1289
1290/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
1291 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
1292 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
1293template<bool const a_fFlat>
1294DECL_INLINE_THROW(uint32_t)
1295iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
1296 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
1297{
1298 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
1299#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1300 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1301#endif
1302
1303 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1304 if (!a_fFlat || enmEffOpSize == IEMMODE_16BIT)
1305 {
1306 off = iemNativeRegFlushPendingWrites(pReNative, off);
1307#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1308 Assert(pReNative->Core.offPc == 0);
1309#endif
1310 }
1311
1312 /* Allocate a temporary PC register. */
1313 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
1314
1315 /* Perform the addition. */
1316#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1317 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr + (int32_t)pReNative->Core.offPc);
1318#else
1319 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr + (int32_t)pReNative->Core.offPc);
1320#endif
1321
1322 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
1323 if (enmEffOpSize == IEMMODE_16BIT)
1324 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
1325
1326 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
1327 if (!a_fFlat)
1328 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
1329
1330 /* Commit it. */
1331#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1332 off = iemNativeEmitPcDebugAdd(pReNative, off, offDisp + cbInstr, enmEffOpSize == IEMMODE_32BIT ? 32 : 16);
1333 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
1334#endif
1335
1336 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1337#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1338 pReNative->Core.offPc = 0;
1339#endif
1340
1341 /* Free but don't flush the PC register. */
1342 iemNativeRegFreeTmp(pReNative, idxPcReg);
1343
1344 return off;
1345}
1346
1347
1348#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr, a_rcNormal) \
1349 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
1350 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1351
1352#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr, a_rcNormal) \
1353 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
1354 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1355 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1356
1357#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr, a_rcNormal) \
1358 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
1359 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1360
1361#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
1362 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
1363 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1364 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1365
1366#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr, a_rcNormal) \
1367 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
1368 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, a_i32)
1369
1370#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
1371 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
1372 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1373 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, a_i32)
1374
1375/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
1376DECL_INLINE_THROW(uint32_t)
1377iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1378 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
1379{
1380 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1381 off = iemNativeRegFlushPendingWrites(pReNative, off);
1382
1383#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1384 Assert(pReNative->Core.offPc == 0);
1385 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1386#endif
1387
1388 /* Allocate a temporary PC register. */
1389 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
1390
1391 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
1392 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
1393 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
1394 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
1395#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1396 off = iemNativeEmitPcDebugAdd(pReNative, off, offDisp + cbInstr, 16);
1397 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
1398#endif
1399 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1400
1401 /* Free but don't flush the PC register. */
1402 iemNativeRegFreeTmp(pReNative, idxPcReg);
1403
1404 return off;
1405}
1406
1407
1408
1409/*********************************************************************************************************************************
1410* Emitters for changing PC/RIP/EIP/IP with a indirect jump (IEM_MC_SET_RIP_UXX_AND_FINISH). *
1411*********************************************************************************************************************************/
1412
1413/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets. */
1414#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP) \
1415 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1416
1417/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets. */
1418#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP) \
1419 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1420
1421/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code. */
1422#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP) \
1423 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1424
1425/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets that checks and
1426 * clears flags. */
1427#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP) \
1428 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP); \
1429 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1430
1431/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets that checks and
1432 * clears flags. */
1433#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP) \
1434 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP); \
1435 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1436
1437/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code that checks and
1438 * clears flags. */
1439#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16NewIP) \
1440 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP); \
1441 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1442
1443#undef IEM_MC_SET_RIP_U16_AND_FINISH
1444
1445
1446/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets. */
1447#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP) \
1448 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
1449
1450/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code. */
1451#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP) \
1452 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
1453
1454/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets that checks and
1455 * clears flags. */
1456#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP) \
1457 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP); \
1458 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1459
1460/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code that checks
1461 * and clears flags. */
1462#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u32NewEIP) \
1463 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP); \
1464 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1465
1466#undef IEM_MC_SET_RIP_U32_AND_FINISH
1467
1468
1469/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code. */
1470#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP) \
1471 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u64NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
1472
1473/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code that checks
1474 * and clears flags. */
1475#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewEIP) \
1476 IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP); \
1477 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1478
1479#undef IEM_MC_SET_RIP_U64_AND_FINISH
1480
1481
1482/** Same as iemRegRipJumpU16AndFinishNoFlags,
1483 * iemRegRipJumpU32AndFinishNoFlags and iemRegRipJumpU64AndFinishNoFlags. */
1484DECL_INLINE_THROW(uint32_t)
1485iemNativeEmitRipJumpNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarPc, bool f64Bit,
1486 uint8_t idxInstr, uint8_t cbVar)
1487{
1488 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
1489 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
1490
1491 /* If we can't rule out a #GP(0) below, flush all dirty register except for
1492 PC which will be handled specially by the two workers below if they raise a GP. */
1493 bool const fMayRaiseGp0 = (f64Bit && cbVar > sizeof(uint32_t)) || (!f64Bit && !IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
1494 uint8_t const idxOldPcReg = fMayRaiseGp0
1495 ? iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off, kIemNativeGstReg_Pc)
1496 : UINT8_MAX;
1497 if (fMayRaiseGp0)
1498 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc) /*fGstShwExcept*/);
1499
1500 /* Get a register with the new PC loaded from idxVarPc.
1501 Note! This ASSUMES that the high bits of the GPR is zeroed. */
1502 uint8_t const idxPcReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxVarPc, kIemNativeGstReg_Pc, &off);
1503
1504 /* Check that the target is within CS.LIM / is canonical (may #GP(0) + exit TB). */
1505 if (fMayRaiseGp0)
1506 {
1507 if (f64Bit)
1508 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0WithOldPc(pReNative, off, idxPcReg, idxOldPcReg, idxInstr);
1509 else
1510 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0WithOldPc(pReNative, off, idxPcReg, idxOldPcReg, idxInstr);
1511 }
1512
1513 /* Store the result. */
1514 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1515
1516#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1517 pReNative->Core.offPc = 0;
1518 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1519# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1520 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
1521 pReNative->Core.fDebugPcInitialized = true;
1522 Log4(("uPcUpdatingDebug=rip off=%#x\n", off));
1523# endif
1524#endif
1525
1526 if (idxOldPcReg != UINT8_MAX)
1527 iemNativeRegFreeTmp(pReNative, idxOldPcReg);
1528 iemNativeVarRegisterRelease(pReNative, idxVarPc);
1529 /** @todo implictly free the variable? */
1530
1531 return off;
1532}
1533
1534
1535
1536/*********************************************************************************************************************************
1537* Emitters for changing PC/RIP/EIP/IP with a relative call jump (IEM_MC_IND_CALL_UXX_AND_FINISH) (requires stack emmiters). *
1538*********************************************************************************************************************************/
1539
1540/** @todo These helpers belong to the stack push API naturally but we already need them up here (we could of course move
1541 * this below the stack emitters but then this is not close to the rest of the PC/RIP handling...). */
1542DECL_FORCE_INLINE_THROW(uint32_t)
1543iemNativeEmitStackPushUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
1544{
1545 /* Use16BitSp: */
1546#ifdef RT_ARCH_AMD64
1547 off = iemNativeEmitSubGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
1548 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
1549#else
1550 /* sub regeff, regrsp, #cbMem */
1551 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegEffSp, idxRegRsp, cbMem, false /*f64Bit*/);
1552 /* and regeff, regeff, #0xffff */
1553 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
1554 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegEffSp, idxRegEffSp, 15, 0, false /*f64Bit*/);
1555 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
1556 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegEffSp, 0, 16, false /*f64Bit*/);
1557#endif
1558 return off;
1559}
1560
1561
1562DECL_FORCE_INLINE(uint32_t)
1563iemNativeEmitStackPushUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
1564{
1565 /* Use32BitSp: */
1566 off = iemNativeEmitSubGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
1567 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
1568 return off;
1569}
1570
1571
1572DECL_INLINE_THROW(uint32_t)
1573iemNativeEmitStackPushRip(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const idxRegPc,
1574 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
1575{
1576 /*
1577 * Assert sanity.
1578 */
1579#ifdef VBOX_STRICT
1580 if (RT_BYTE2(cBitsVarAndFlat) != 0)
1581 {
1582 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
1583 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
1584 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
1585 Assert( pfnFunction
1586 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
1587 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
1588 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
1589 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
1590 : UINT64_C(0xc000b000a0009000) ));
1591 }
1592 else
1593 Assert( pfnFunction
1594 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
1595 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
1596 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
1597 : UINT64_C(0xc000b000a0009000) ));
1598#endif
1599
1600#ifdef VBOX_STRICT
1601 /*
1602 * Check that the fExec flags we've got make sense.
1603 */
1604 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
1605#endif
1606
1607 /*
1608 * To keep things simple we have to commit any pending writes first as we
1609 * may end up making calls.
1610 */
1611 /** @todo we could postpone this till we make the call and reload the
1612 * registers after returning from the call. Not sure if that's sensible or
1613 * not, though. */
1614 off = iemNativeRegFlushPendingWrites(pReNative, off);
1615
1616 /*
1617 * First we calculate the new RSP and the effective stack pointer value.
1618 * For 64-bit mode and flat 32-bit these two are the same.
1619 * (Code structure is very similar to that of PUSH)
1620 */
1621 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
1622 bool const fIsSegReg = RT_BYTE3(cBitsVarAndFlat) != 0;
1623 bool const fIsIntelSeg = fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
1624 uint8_t const cbMemAccess = !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
1625 ? cbMem : sizeof(uint16_t);
1626 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
1627 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
1628 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
1629 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
1630 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
1631 if (cBitsFlat != 0)
1632 {
1633 Assert(idxRegEffSp == idxRegRsp);
1634 Assert(cBitsFlat == 32 || cBitsFlat == 64);
1635 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
1636 if (cBitsFlat == 64)
1637 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
1638 else
1639 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
1640 }
1641 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
1642 {
1643 Assert(idxRegEffSp != idxRegRsp);
1644 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
1645 kIemNativeGstRegUse_ReadOnly);
1646#ifdef RT_ARCH_AMD64
1647 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1648#else
1649 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1650#endif
1651 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
1652 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
1653 offFixupJumpToUseOtherBitSp = off;
1654 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
1655 {
1656 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
1657 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1658 }
1659 else
1660 {
1661 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
1662 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1663 }
1664 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1665 }
1666 /* SpUpdateEnd: */
1667 uint32_t const offLabelSpUpdateEnd = off;
1668
1669 /*
1670 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
1671 * we're skipping lookup).
1672 */
1673 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
1674 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
1675 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
1676 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
1677 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
1678 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
1679 : UINT32_MAX;
1680 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
1681
1682
1683 if (!TlbState.fSkip)
1684 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
1685 else
1686 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
1687
1688 /*
1689 * Use16BitSp:
1690 */
1691 if (cBitsFlat == 0)
1692 {
1693#ifdef RT_ARCH_AMD64
1694 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1695#else
1696 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1697#endif
1698 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
1699 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
1700 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1701 else
1702 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1703 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
1704 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1705 }
1706
1707 /*
1708 * TlbMiss:
1709 *
1710 * Call helper to do the pushing.
1711 */
1712 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
1713
1714#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1715 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1716#else
1717 RT_NOREF(idxInstr);
1718#endif
1719
1720 /* Save variables in volatile registers. */
1721 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
1722 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
1723 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
1724 | (RT_BIT_32(idxRegPc));
1725 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
1726
1727 if ( idxRegPc == IEMNATIVE_CALL_ARG1_GREG
1728 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
1729 {
1730 /* Swap them using ARG0 as temp register: */
1731 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
1732 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
1733 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
1734 }
1735 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
1736 {
1737 /* IEMNATIVE_CALL_ARG2_GREG = idxRegPc (first!) */
1738 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxRegPc);
1739
1740 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
1741 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
1742 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
1743 }
1744 else
1745 {
1746 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
1747 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
1748
1749 /* IEMNATIVE_CALL_ARG2_GREG = idxRegPc */
1750 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxRegPc);
1751 }
1752
1753 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
1754 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
1755
1756 /* Done setting up parameters, make the call. */
1757 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
1758
1759 /* Restore variables and guest shadow registers to volatile registers. */
1760 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
1761 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
1762
1763#ifdef IEMNATIVE_WITH_TLB_LOOKUP
1764 if (!TlbState.fSkip)
1765 {
1766 /* end of TlbMiss - Jump to the done label. */
1767 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
1768 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
1769
1770 /*
1771 * TlbLookup:
1772 */
1773 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1,
1774 IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
1775
1776 /*
1777 * Emit code to do the actual storing / fetching.
1778 */
1779 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
1780# ifdef IEM_WITH_TLB_STATISTICS
1781 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
1782 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
1783# endif
1784 switch (cbMemAccess)
1785 {
1786 case 2:
1787 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1788 break;
1789 case 4:
1790 if (!fIsIntelSeg)
1791 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1792 else
1793 {
1794 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
1795 PUSH FS in real mode, so we have to try emulate that here.
1796 We borrow the now unused idxReg1 from the TLB lookup code here. */
1797 uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
1798 kIemNativeGstReg_EFlags);
1799 if (idxRegEfl != UINT8_MAX)
1800 {
1801#ifdef ARCH_AMD64
1802 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
1803 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
1804 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
1805#else
1806 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
1807 off, TlbState.idxReg1, idxRegEfl,
1808 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
1809#endif
1810 iemNativeRegFreeTmp(pReNative, idxRegEfl);
1811 }
1812 else
1813 {
1814 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
1815 RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
1816 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
1817 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
1818 }
1819 /* ASSUMES the upper half of idxRegPc is ZERO. */
1820 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegPc);
1821 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
1822 }
1823 break;
1824 case 8:
1825 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1826 break;
1827 default:
1828 AssertFailed();
1829 }
1830
1831 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
1832 TlbState.freeRegsAndReleaseVars(pReNative);
1833
1834 /*
1835 * TlbDone:
1836 *
1837 * Commit the new RSP value.
1838 */
1839 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
1840 }
1841#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
1842
1843#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
1844 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
1845#endif
1846 iemNativeRegFreeTmp(pReNative, idxRegRsp);
1847 if (idxRegEffSp != idxRegRsp)
1848 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
1849
1850 return off;
1851}
1852
1853
1854/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for pre-386 targets. */
1855#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16(a_u16NewIP, a_cbInstr) \
1856 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1857
1858/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for pre-386 targets that checks and
1859 * clears flags. */
1860#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP, a_cbInstr) \
1861 IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16(a_u16NewIP, a_cbInstr); \
1862 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1863
1864/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for 386+ targets. */
1865#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32(a_u16NewIP, a_cbInstr) \
1866 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1867
1868/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for 386+ targets that checks and
1869 * clears flags. */
1870#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP, a_cbInstr) \
1871 IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32(a_u16NewIP, a_cbInstr); \
1872 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1873
1874#undef IEM_MC_IND_CALL_U16_AND_FINISH
1875
1876
1877/** Variant of IEM_MC_IND_CALL_U32_AND_FINISH for 386+ targets. */
1878#define IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP, a_cbInstr) \
1879 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
1880
1881/** Variant of IEM_MC_IND_CALL_U32_AND_FINISH for 386+ targets that checks and
1882 * clears flags. */
1883#define IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP, a_cbInstr) \
1884 IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP, a_cbInstr); \
1885 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1886
1887#undef IEM_MC_IND_CALL_U32_AND_FINISH
1888
1889
1890/** Variant of IEM_MC_IND_CALL_U64_AND_FINISH with instruction length as
1891 * an extra parameter, for use in 64-bit code. */
1892#define IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64(a_u64NewIP, a_cbInstr) \
1893 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u64NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
1894
1895
1896/** Variant of IEM_MC_IND_CALL_U64_AND_FINISH with instruction length as
1897 * an extra parameter, for use in 64-bit code and we need to check and clear
1898 * flags. */
1899#define IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewIP, a_cbInstr) \
1900 IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64(a_u64NewIP, a_cbInstr); \
1901 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1902
1903#undef IEM_MC_IND_CALL_U64_AND_FINISH
1904
1905/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
1906 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
1907DECL_INLINE_THROW(uint32_t)
1908iemNativeEmitRipIndirectCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxVarPc, bool f64Bit,
1909 uint8_t idxInstr, uint8_t cbVar)
1910{
1911 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
1912 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
1913
1914 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1915 off = iemNativeRegFlushPendingWrites(pReNative, off);
1916
1917#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1918 Assert(pReNative->Core.offPc == 0);
1919 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1920#endif
1921
1922 /* Get a register with the new PC loaded from idxVarPc.
1923 Note! This ASSUMES that the high bits of the GPR is zeroed. */
1924 uint8_t const idxNewPcReg = iemNativeVarRegisterAcquire(pReNative, idxVarPc, &off);
1925
1926 /* Check limit (may #GP(0) + exit TB). */
1927 if (!f64Bit)
1928/** @todo we can skip this test in FLAT 32-bit mode. */
1929 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxNewPcReg, idxInstr);
1930 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
1931 else if (cbVar > sizeof(uint32_t))
1932 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxNewPcReg, idxInstr);
1933
1934#if 1
1935 /* Allocate a temporary PC register, we don't want it shadowed. */
1936 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1937 kIemNativeGstRegUse_Calculation, true /*fNoVolatileRegs*/);
1938#else
1939 /* Allocate a temporary PC register. */
1940 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate,
1941 true /*fNoVolatileRegs*/);
1942#endif
1943
1944 /* Perform the addition and push the variable to the guest stack. */
1945 /** @todo Flat variants for PC32 variants. */
1946 switch (cbVar)
1947 {
1948 case sizeof(uint16_t):
1949 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
1950 /* Truncate the result to 16-bit IP. */
1951 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
1952 off = iemNativeEmitStackPushRip(pReNative, off, idxPcReg, RT_MAKE_U32_FROM_U8(16, 0, 0, 0),
1953 (uintptr_t)iemNativeHlpStackStoreU16, idxInstr);
1954 break;
1955 case sizeof(uint32_t):
1956 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
1957 /** @todo In FLAT mode we can use the flat variant. */
1958 off = iemNativeEmitStackPushRip(pReNative, off, idxPcReg, RT_MAKE_U32_FROM_U8(32, 0, 0, 0),
1959 (uintptr_t)iemNativeHlpStackStoreU32, idxInstr);
1960 break;
1961 case sizeof(uint64_t):
1962 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
1963 off = iemNativeEmitStackPushRip(pReNative, off, idxPcReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0),
1964 (uintptr_t)iemNativeHlpStackFlatStoreU64, idxInstr);
1965 break;
1966 default:
1967 AssertFailed();
1968 }
1969
1970 /* RSP got changed, so do this again. */
1971 off = iemNativeRegFlushPendingWrites(pReNative, off);
1972
1973 /* Store the result. */
1974 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxNewPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1975#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1976 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxNewPcReg, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
1977 pReNative->Core.fDebugPcInitialized = true;
1978 Log4(("uPcUpdatingDebug=rip/indirect-call off=%#x\n", off));
1979#endif
1980
1981#if 1
1982 /* Need to transfer the shadow information to the new RIP register. */
1983 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxNewPcReg, kIemNativeGstReg_Pc, off);
1984#else
1985 /* Sync the new PC. */
1986 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxPcReg, idxNewPcReg);
1987#endif
1988 iemNativeVarRegisterRelease(pReNative, idxVarPc);
1989 iemNativeRegFreeTmp(pReNative, idxPcReg);
1990 /** @todo implictly free the variable? */
1991
1992 return off;
1993}
1994
1995
1996/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1997 * an extra parameter, for use in 16-bit code on a pre-386 CPU. */
1998#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr) \
1999 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
2000
2001/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
2002 * an extra parameter, for use in 16-bit code on a pre-386 CPU and we need to check and clear
2003 * flags. */
2004#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr) \
2005 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr); \
2006 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2007
2008/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
2009 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
2010#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr) \
2011 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
2012
2013/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
2014 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
2015 * flags. */
2016#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr) \
2017 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr); \
2018 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2019
2020/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
2021 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
2022#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr) \
2023 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
2024
2025/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
2026 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
2027 * flags. */
2028#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr) \
2029 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr); \
2030 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2031
2032#undef IEM_MC_REL_CALL_S16_AND_FINISH
2033
2034/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
2035 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
2036DECL_INLINE_THROW(uint32_t)
2037iemNativeEmitRipRelativeCallS16NoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int16_t offDisp,
2038 uint8_t idxInstr)
2039{
2040 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
2041 off = iemNativeRegFlushPendingWrites(pReNative, off);
2042
2043#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2044 Assert(pReNative->Core.offPc == 0);
2045 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
2046#endif
2047
2048 /* Allocate a temporary PC register. */
2049 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
2050 kIemNativeGstRegUse_Calculation, true /*fNoVolatileRegs*/);
2051 uint8_t const idxPcRegNew = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
2052
2053 /* Calculate the new RIP. */
2054 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcRegOld, cbInstr);
2055 /* Truncate the result to 16-bit IP. */
2056 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcRegOld);
2057 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxPcRegNew, idxPcRegOld);
2058 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcRegNew, offDisp);
2059
2060 /* Truncate the result to 16-bit IP. */
2061 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcRegNew);
2062
2063 /* Check limit (may #GP(0) + exit TB). */
2064 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
2065
2066 /* Perform the addition and push the variable to the guest stack. */
2067 off = iemNativeEmitStackPushRip(pReNative, off, idxPcRegOld, RT_MAKE_U32_FROM_U8(16, 0, 0, 0),
2068 (uintptr_t)iemNativeHlpStackStoreU16, idxInstr);
2069
2070 /* RSP got changed, so flush again. */
2071 off = iemNativeRegFlushPendingWrites(pReNative, off);
2072
2073 /* Store the result. */
2074 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
2075#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
2076 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
2077 pReNative->Core.fDebugPcInitialized = true;
2078 Log4(("uPcUpdatingDebug=rip/rel-call-16 off=%#x offDisp=%d\n", off, offDisp));
2079#endif
2080
2081 /* Need to transfer the shadow information to the new RIP register. */
2082 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
2083 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
2084 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
2085
2086 return off;
2087}
2088
2089
2090/** Variant of IEM_MC_REL_CALL_S32_AND_FINISH with instruction length as
2091 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
2092#define IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr) \
2093 off = iemNativeEmitEip32RelativeCallNoFlags(pReNative, off, a_cbInstr, (a_i32), pCallEntry->idxInstr)
2094
2095/** Variant of IEM_MC_REL_CALL_S32_AND_FINISH with instruction length as
2096 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
2097 * flags. */
2098#define IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr) \
2099 IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr); \
2100 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2101
2102#undef IEM_MC_REL_CALL_S32_AND_FINISH
2103
2104/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
2105 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
2106DECL_INLINE_THROW(uint32_t)
2107iemNativeEmitEip32RelativeCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int32_t offDisp,
2108 uint8_t idxInstr)
2109{
2110 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
2111 off = iemNativeRegFlushPendingWrites(pReNative, off);
2112
2113#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2114 Assert(pReNative->Core.offPc == 0);
2115 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
2116#endif
2117
2118 /* Allocate a temporary PC register. */
2119 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
2120 kIemNativeGstRegUse_ReadOnly, true /*fNoVolatileRegs*/);
2121 uint8_t const idxPcRegNew = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
2122
2123 /* Update the EIP to get the return address. */
2124 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcRegOld, cbInstr);
2125
2126 /* Load address, add the displacement and check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
2127 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxPcRegNew, idxPcRegOld);
2128 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcRegNew, offDisp);
2129 /** @todo we can skip this test in FLAT 32-bit mode. */
2130 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
2131
2132 /* Perform Perform the return address to the guest stack. */
2133 /** @todo Can avoid the stack limit checks in FLAT 32-bit mode. */
2134 off = iemNativeEmitStackPushRip(pReNative, off, idxPcRegOld, RT_MAKE_U32_FROM_U8(32, 0, 0, 0),
2135 (uintptr_t)iemNativeHlpStackStoreU32, idxInstr);
2136
2137 /* RSP got changed, so do this again. */
2138 off = iemNativeRegFlushPendingWrites(pReNative, off);
2139
2140 /* Store the result. */
2141 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
2142#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
2143 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
2144 pReNative->Core.fDebugPcInitialized = true;
2145 Log4(("uPcUpdatingDebug=eip/rel-call-32 off=%#x offDisp=%d\n", off, offDisp));
2146#endif
2147
2148 /* Need to transfer the shadow information to the new RIP register. */
2149 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
2150 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
2151 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
2152
2153 return off;
2154}
2155
2156
2157/** Variant of IEM_MC_REL_CALL_S64_AND_FINISH with instruction length as
2158 * an extra parameter, for use in 64-bit code. */
2159#define IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64(a_i64, a_cbInstr) \
2160 off = iemNativeEmitRip64RelativeCallNoFlags(pReNative, off, a_cbInstr, (a_i64), pCallEntry->idxInstr)
2161
2162/** Variant of IEM_MC_REL_CALL_S64_AND_FINISH with instruction length as
2163 * an extra parameter, for use in 64-bit code and we need to check and clear
2164 * flags. */
2165#define IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i64, a_cbInstr) \
2166 IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64(a_i64, a_cbInstr); \
2167 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2168
2169#undef IEM_MC_REL_CALL_S64_AND_FINISH
2170
2171/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
2172 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
2173DECL_INLINE_THROW(uint32_t)
2174iemNativeEmitRip64RelativeCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int64_t offDisp,
2175 uint8_t idxInstr)
2176{
2177 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
2178 off = iemNativeRegFlushPendingWrites(pReNative, off);
2179
2180#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2181 Assert(pReNative->Core.offPc == 0);
2182 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
2183#endif
2184
2185 /* Allocate a temporary PC register. */
2186 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
2187 kIemNativeGstRegUse_ReadOnly, true /*fNoVolatileRegs*/);
2188 uint8_t const idxPcRegNew = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
2189
2190 /* Update the RIP to get the return address. */
2191 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcRegOld, cbInstr);
2192
2193 /* Load address, add the displacement and check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
2194 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxPcRegNew, idxPcRegOld);
2195 off = iemNativeEmitAddGprImm(pReNative, off, idxPcRegNew, offDisp);
2196 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
2197
2198 /* Perform Perform the return address to the guest stack. */
2199 off = iemNativeEmitStackPushRip(pReNative, off, idxPcRegOld, RT_MAKE_U32_FROM_U8(64, 64, 0, 0),
2200 (uintptr_t)iemNativeHlpStackFlatStoreU64, idxInstr);
2201
2202 /* RSP got changed, so do this again. */
2203 off = iemNativeRegFlushPendingWrites(pReNative, off);
2204
2205 /* Store the result. */
2206 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
2207#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
2208 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
2209 pReNative->Core.fDebugPcInitialized = true;
2210 Log4(("uPcUpdatingDebug=rip/rel-call-64 off=%#x offDisp=%ld\n", off, offDisp));
2211#endif
2212
2213 /* Need to transfer the shadow information to the new RIP register. */
2214 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
2215 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
2216 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
2217
2218 return off;
2219}
2220
2221
2222/*********************************************************************************************************************************
2223* Emitters for changing PC/RIP/EIP/IP with a RETN (Iw) instruction (IEM_MC_RETN_AND_FINISH) (requires stack emmiters). *
2224*********************************************************************************************************************************/
2225
2226DECL_FORCE_INLINE_THROW(uint32_t)
2227iemNativeEmitStackPopForRetnUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
2228 uint16_t cbPopAdd, uint8_t idxRegTmp)
2229{
2230 /* Use16BitSp: */
2231#ifdef RT_ARCH_AMD64
2232 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
2233 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
2234 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbPopAdd); /* ASSUMES this does NOT modify bits [63:16]! */
2235 RT_NOREF(idxRegTmp);
2236
2237#elif defined(RT_ARCH_ARM64)
2238 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
2239 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
2240 /* add tmp, regrsp, #cbMem */
2241 uint16_t const cbCombined = cbMem + cbPopAdd;
2242 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbCombined & (RT_BIT_32(12) - 1U), false /*f64Bit*/);
2243 if (cbCombined >= RT_BIT_32(12))
2244 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegTmp, cbCombined >> 12,
2245 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift12*/);
2246 /* and tmp, tmp, #0xffff */
2247 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
2248 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
2249 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
2250 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
2251
2252#else
2253# error "Port me"
2254#endif
2255 return off;
2256}
2257
2258
2259DECL_FORCE_INLINE_THROW(uint32_t)
2260iemNativeEmitStackPopForRetnUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
2261 uint16_t cbPopAdd)
2262{
2263 /* Use32BitSp: */
2264 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
2265 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem + cbPopAdd);
2266 return off;
2267}
2268
2269
2270/** Variant of IEM_MC_RETN_AND_FINISH for pre-386 targets. */
2271#define IEM_MC_RETN_AND_FINISH_THREADED_PC16(a_u16Pop, a_cbInstr) \
2272 off = iemNativeEmitRetn(pReNative, off, (a_cbInstr), (a_u16Pop), false /*f64Bit*/, IEMMODE_16BIT, pCallEntry->idxInstr)
2273
2274/** Variant of IEM_MC_RETN_AND_FINISH for 386+ targets. */
2275#define IEM_MC_RETN_AND_FINISH_THREADED_PC32(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
2276 off = iemNativeEmitRetn(pReNative, off, (a_cbInstr), (a_u16Pop), false /*f64Bit*/, (a_enmEffOpSize), pCallEntry->idxInstr)
2277
2278/** Variant of IEM_MC_RETN_AND_FINISH for use in 64-bit code. */
2279#define IEM_MC_RETN_AND_FINISH_THREADED_PC64(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
2280 off = iemNativeEmitRetn(pReNative, off, (a_cbInstr), (a_u16Pop), true /*f64Bit*/, (a_enmEffOpSize), pCallEntry->idxInstr)
2281
2282/** Variant of IEM_MC_RETN_AND_FINISH for pre-386 targets that checks and
2283 * clears flags. */
2284#define IEM_MC_RETN_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16Pop, a_cbInstr) \
2285 IEM_MC_RETN_AND_FINISH_THREADED_PC16(a_u16Pop, a_cbInstr); \
2286 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2287
2288/** Variant of IEM_MC_RETN_AND_FINISH for 386+ targets that checks and
2289 * clears flags. */
2290#define IEM_MC_RETN_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
2291 IEM_MC_RETN_AND_FINISH_THREADED_PC32(a_u16Pop, a_cbInstr, a_enmEffOpSize); \
2292 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2293
2294/** Variant of IEM_MC_RETN_AND_FINISH for use in 64-bit code that checks and
2295 * clears flags. */
2296#define IEM_MC_RETN_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
2297 IEM_MC_RETN_AND_FINISH_THREADED_PC64(a_u16Pop, a_cbInstr, a_enmEffOpSize); \
2298 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2299
2300/** IEM_MC[|_FLAT32|_FLAT64]_RETN_AND_FINISH */
2301DECL_INLINE_THROW(uint32_t)
2302iemNativeEmitRetn(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint16_t cbPop, bool f64Bit,
2303 IEMMODE enmEffOpSize, uint8_t idxInstr)
2304{
2305 RT_NOREF(cbInstr);
2306
2307#ifdef VBOX_STRICT
2308 /*
2309 * Check that the fExec flags we've got make sense.
2310 */
2311 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
2312#endif
2313
2314 /*
2315 * To keep things simple we have to commit any pending writes first as we
2316 * may end up making calls.
2317 */
2318 off = iemNativeRegFlushPendingWrites(pReNative, off);
2319
2320 /*
2321 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
2322 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
2323 * directly as the effective stack pointer.
2324 * (Code structure is very similar to that of PUSH)
2325 *
2326 * Note! As a simplification, we treat opsize overridden returns (o16 ret)
2327 * in FLAT 32-bit mode as if we weren't in FLAT mode since these
2328 * aren't commonly used (or useful) and thus not in need of optimizing.
2329 *
2330 * Note! For non flat modes the guest RSP is not allocated for update but rather for calculation
2331 * as the shadowed register would remain modified even if the return address throws a \#GP(0)
2332 * due to being outside the CS limit causing a wrong stack pointer value in the guest (see
2333 * the near return testcase in bs3-cpu-basic-2). If no exception is thrown the shadowing is transfered
2334 * to the new register returned by iemNativeRegAllocTmpForGuestReg() at the end.
2335 */
2336 uint8_t const cbMem = enmEffOpSize == IEMMODE_64BIT
2337 ? sizeof(uint64_t)
2338 : enmEffOpSize == IEMMODE_32BIT
2339 ? sizeof(uint32_t)
2340 : sizeof(uint16_t);
2341 bool const fFlat = IEM_F_MODE_X86_IS_FLAT(pReNative->fExec) && enmEffOpSize != IEMMODE_16BIT; /* see note */
2342 uintptr_t const pfnFunction = fFlat
2343 ? enmEffOpSize == IEMMODE_64BIT
2344 ? (uintptr_t)iemNativeHlpStackFlatFetchU64
2345 : (uintptr_t)iemNativeHlpStackFlatFetchU32
2346 : enmEffOpSize == IEMMODE_32BIT
2347 ? (uintptr_t)iemNativeHlpStackFetchU32
2348 : (uintptr_t)iemNativeHlpStackFetchU16;
2349 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
2350 fFlat ? kIemNativeGstRegUse_ForUpdate
2351 : kIemNativeGstRegUse_Calculation,
2352 true /*fNoVolatileRegs*/);
2353 uint8_t const idxRegEffSp = fFlat ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
2354 /** @todo can do a better job picking the register here. For cbMem >= 4 this
2355 * will be the resulting register value. */
2356 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
2357
2358 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
2359 if (fFlat)
2360 Assert(idxRegEffSp == idxRegRsp);
2361 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
2362 {
2363 Assert(idxRegEffSp != idxRegRsp);
2364 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
2365 kIemNativeGstRegUse_ReadOnly);
2366#ifdef RT_ARCH_AMD64
2367 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
2368#else
2369 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
2370#endif
2371 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
2372 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
2373 offFixupJumpToUseOtherBitSp = off;
2374 if (enmEffOpSize == IEMMODE_32BIT)
2375 {
2376 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
2377 off = iemNativeEmitStackPopForRetnUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop);
2378 }
2379 else
2380 {
2381 Assert(enmEffOpSize == IEMMODE_16BIT);
2382 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
2383 off = iemNativeEmitStackPopForRetnUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop,
2384 idxRegMemResult);
2385 }
2386 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2387 }
2388 /* SpUpdateEnd: */
2389 uint32_t const offLabelSpUpdateEnd = off;
2390
2391 /*
2392 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
2393 * we're skipping lookup).
2394 */
2395 uint8_t const iSegReg = fFlat ? UINT8_MAX : X86_SREG_SS;
2396 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
2397 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
2398 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
2399 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
2400 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
2401 : UINT32_MAX;
2402
2403 if (!TlbState.fSkip)
2404 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
2405 else
2406 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
2407
2408 /*
2409 * Use16BitSp:
2410 */
2411 if (!fFlat)
2412 {
2413#ifdef RT_ARCH_AMD64
2414 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
2415#else
2416 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
2417#endif
2418 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
2419 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
2420 off = iemNativeEmitStackPopForRetnUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop,
2421 idxRegMemResult);
2422 else
2423 off = iemNativeEmitStackPopForRetnUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop);
2424 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
2425 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2426 }
2427
2428 /*
2429 * TlbMiss:
2430 *
2431 * Call helper to do the pushing.
2432 */
2433 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
2434
2435#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2436 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2437#else
2438 RT_NOREF(idxInstr);
2439#endif
2440
2441 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
2442 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
2443 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
2444 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
2445
2446
2447 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
2448 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
2449 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
2450
2451 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
2452 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
2453
2454 /* Done setting up parameters, make the call. */
2455 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
2456
2457 /* Move the return register content to idxRegMemResult. */
2458 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
2459 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
2460
2461 /* Restore variables and guest shadow registers to volatile registers. */
2462 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
2463 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
2464
2465#ifdef IEMNATIVE_WITH_TLB_LOOKUP
2466 if (!TlbState.fSkip)
2467 {
2468 /* end of TlbMiss - Jump to the done label. */
2469 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
2470 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
2471
2472 /*
2473 * TlbLookup:
2474 */
2475 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
2476 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
2477
2478 /*
2479 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
2480 */
2481 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
2482# ifdef IEM_WITH_TLB_STATISTICS
2483 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
2484 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
2485# endif
2486 switch (cbMem)
2487 {
2488 case 2:
2489 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
2490 break;
2491 case 4:
2492 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
2493 break;
2494 case 8:
2495 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
2496 break;
2497 default:
2498 AssertFailed();
2499 }
2500
2501 TlbState.freeRegsAndReleaseVars(pReNative);
2502
2503 /*
2504 * TlbDone:
2505 *
2506 * Set the new RSP value (FLAT accesses needs to calculate it first) and
2507 * commit the popped register value.
2508 */
2509 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
2510 }
2511#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
2512
2513 /* Check limit before committing RIP and RSP (may #GP(0) + exit TB). */
2514 if (!f64Bit)
2515/** @todo we can skip this test in FLAT 32-bit mode. */
2516 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxRegMemResult, idxInstr);
2517 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
2518 else if (enmEffOpSize == IEMMODE_64BIT)
2519 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxRegMemResult, idxInstr);
2520
2521 /* Complete RSP calculation for FLAT mode. */
2522 if (idxRegEffSp == idxRegRsp)
2523 {
2524 if (enmEffOpSize == IEMMODE_64BIT)
2525 off = iemNativeEmitAddGprImm(pReNative, off, idxRegRsp, sizeof(uint64_t) + cbPop);
2526 else
2527 {
2528 Assert(enmEffOpSize == IEMMODE_32BIT);
2529 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxRegRsp, sizeof(uint32_t) + cbPop);
2530 }
2531 }
2532
2533 /* Commit the result and clear any current guest shadows for RIP. */
2534 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
2535 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
2536 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, kIemNativeGstReg_Pc, off);
2537#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
2538 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
2539 pReNative->Core.fDebugPcInitialized = true;
2540 Log4(("uPcUpdatingDebug=rip/ret off=%#x\n", off));
2541#endif
2542
2543 /* Need to transfer the shadowing information to the host register containing the updated value now. */
2544 if (!fFlat)
2545 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegRsp, IEMNATIVEGSTREG_GPR(X86_GREG_xSP), off);
2546
2547 iemNativeRegFreeTmp(pReNative, idxRegRsp);
2548 if (idxRegEffSp != idxRegRsp)
2549 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
2550 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
2551 return off;
2552}
2553
2554
2555/*********************************************************************************************************************************
2556* Emitters for raising exceptions (IEM_MC_MAYBE_RAISE_XXX) *
2557*********************************************************************************************************************************/
2558
2559#define IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE() \
2560 off = iemNativeEmitMaybeRaiseDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
2561
2562/**
2563 * Emits code to check if a \#NM exception should be raised.
2564 *
2565 * @returns New code buffer offset, UINT32_MAX on failure.
2566 * @param pReNative The native recompile state.
2567 * @param off The code buffer offset.
2568 * @param idxInstr The current instruction.
2569 */
2570DECL_INLINE_THROW(uint32_t)
2571iemNativeEmitMaybeRaiseDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2572{
2573#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2574 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeDeviceNotAvailXcptCheckPotential);
2575
2576 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE))
2577 {
2578#endif
2579 /*
2580 * Make sure we don't have any outstanding guest register writes as we may
2581 * raise an #NM and all guest register must be up to date in CPUMCTX.
2582 */
2583 /** @todo r=aeichner Can we postpone this to the RaiseNm path? */
2584 off = iemNativeRegFlushPendingWrites(pReNative, off);
2585
2586#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2587 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2588#else
2589 RT_NOREF(idxInstr);
2590#endif
2591
2592 /* Allocate a temporary CR0 register. */
2593 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0,
2594 kIemNativeGstRegUse_ReadOnly);
2595
2596 /*
2597 * if (cr0 & (X86_CR0_EM | X86_CR0_TS) != 0)
2598 * return raisexcpt();
2599 */
2600 /* Test and jump. */
2601 off = iemNativeEmitTestAnyBitsInGprAndTbExitIfAnySet(pReNative, off, idxCr0Reg, X86_CR0_EM | X86_CR0_TS,
2602 kIemNativeLabelType_RaiseNm);
2603
2604 /* Free but don't flush the CR0 register. */
2605 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2606
2607#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2608 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE;
2609 }
2610 else
2611 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeDeviceNotAvailXcptCheckOmitted);
2612#endif
2613
2614 return off;
2615}
2616
2617
2618#define IEM_MC_MAYBE_RAISE_WAIT_DEVICE_NOT_AVAILABLE() \
2619 off = iemNativeEmitMaybeRaiseWaitDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
2620
2621/**
2622 * Emits code to check if a \#NM exception should be raised.
2623 *
2624 * @returns New code buffer offset, UINT32_MAX on failure.
2625 * @param pReNative The native recompile state.
2626 * @param off The code buffer offset.
2627 * @param idxInstr The current instruction.
2628 */
2629DECL_INLINE_THROW(uint32_t)
2630iemNativeEmitMaybeRaiseWaitDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2631{
2632#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2633 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeWaitDeviceNotAvailXcptCheckPotential);
2634
2635 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_WAIT_DEVICE_NOT_AVAILABLE))
2636 {
2637#endif
2638 /*
2639 * Make sure we don't have any outstanding guest register writes as we may
2640 * raise an #NM and all guest register must be up to date in CPUMCTX.
2641 */
2642 /** @todo r=aeichner Can we postpone this to the RaiseNm path? */
2643 off = iemNativeRegFlushPendingWrites(pReNative, off);
2644
2645#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2646 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2647#else
2648 RT_NOREF(idxInstr);
2649#endif
2650
2651 /* Allocate a temporary CR0 register. */
2652 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0,
2653 kIemNativeGstRegUse_Calculation);
2654
2655 /*
2656 * if (cr0 & (X86_CR0_MP | X86_CR0_TS) == (X86_CR0_MP | X86_CR0_TS))
2657 * return raisexcpt();
2658 */
2659 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxCr0Reg, X86_CR0_MP | X86_CR0_TS);
2660 /* Test and jump. */
2661 off = iemNativeEmitTestIfGpr32EqualsImmAndTbExit(pReNative, off, idxCr0Reg, X86_CR0_MP | X86_CR0_TS,
2662 kIemNativeLabelType_RaiseNm);
2663
2664 /* Free the CR0 register. */
2665 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2666
2667#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2668 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_WAIT_DEVICE_NOT_AVAILABLE;
2669 }
2670 else
2671 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeWaitDeviceNotAvailXcptCheckOmitted);
2672#endif
2673
2674 return off;
2675}
2676
2677
2678#define IEM_MC_MAYBE_RAISE_FPU_XCPT() \
2679 off = iemNativeEmitMaybeRaiseFpuException(pReNative, off, pCallEntry->idxInstr)
2680
2681/**
2682 * Emits code to check if a \#MF exception should be raised.
2683 *
2684 * @returns New code buffer offset, UINT32_MAX on failure.
2685 * @param pReNative The native recompile state.
2686 * @param off The code buffer offset.
2687 * @param idxInstr The current instruction.
2688 */
2689DECL_INLINE_THROW(uint32_t)
2690iemNativeEmitMaybeRaiseFpuException(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2691{
2692 /*
2693 * Make sure we don't have any outstanding guest register writes as we may
2694 * raise an #MF and all guest register must be up to date in CPUMCTX.
2695 */
2696 /** @todo r=aeichner Can we postpone this to the RaiseMf path? */
2697 off = iemNativeRegFlushPendingWrites(pReNative, off);
2698
2699#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2700 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2701#else
2702 RT_NOREF(idxInstr);
2703#endif
2704
2705 /* Allocate a temporary FSW register. */
2706 uint8_t const idxFpuFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw,
2707 kIemNativeGstRegUse_ReadOnly);
2708
2709 /*
2710 * if (FSW & X86_FSW_ES != 0)
2711 * return raisexcpt();
2712 */
2713 /* Test and jump. */
2714 off = iemNativeEmitTestBitInGprAndTbExitIfSet(pReNative, off, idxFpuFswReg, X86_FSW_ES_BIT, kIemNativeLabelType_RaiseMf);
2715
2716 /* Free but don't flush the FSW register. */
2717 iemNativeRegFreeTmp(pReNative, idxFpuFswReg);
2718
2719 return off;
2720}
2721
2722
2723#define IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT() \
2724 off = iemNativeEmitMaybeRaiseSseRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
2725
2726/**
2727 * Emits code to check if a SSE exception (either \#UD or \#NM) should be raised.
2728 *
2729 * @returns New code buffer offset, UINT32_MAX on failure.
2730 * @param pReNative The native recompile state.
2731 * @param off The code buffer offset.
2732 * @param idxInstr The current instruction.
2733 */
2734DECL_INLINE_THROW(uint32_t)
2735iemNativeEmitMaybeRaiseSseRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2736{
2737#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2738 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeSseXcptCheckPotential);
2739
2740 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE))
2741 {
2742#endif
2743 /*
2744 * Make sure we don't have any outstanding guest register writes as we may
2745 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
2746 */
2747 /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
2748 off = iemNativeRegFlushPendingWrites(pReNative, off);
2749
2750#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2751 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2752#else
2753 RT_NOREF(idxInstr);
2754#endif
2755
2756 /* Allocate a temporary CR0 and CR4 register. */
2757 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
2758 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
2759 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2760
2761 AssertCompile(!((X86_CR0_EM | X86_CR0_TS) & X86_CR4_OSFXSR));
2762#ifdef RT_ARCH_AMD64
2763 /*
2764 * We do a modified test here:
2765 * if (!(((cr4 & X86_CR4_OSFXSR) | cr0) ^ X86_CR4_OSFXSR)) { likely }
2766 * else { goto RaiseSseRelated; }
2767 * This ASSUMES that CR0[bit 9] is always zero. This is the case on
2768 * all targets except the 386, which doesn't support SSE, this should
2769 * be a safe assumption.
2770 */
2771 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6+3+3+7+7+6);
2772 //pCodeBuf[off++] = 0xcc;
2773 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, X86_CR4_OSFXSR); /* Isolate CR4.OSFXSR as CR4.TSD and */
2774 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxTmpReg, idxCr4Reg); /* CR4.DE would overlap the CR0 bits. */
2775 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, idxTmpReg, idxCr0Reg);
2776 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_CR0_EM | X86_CR0_TS | X86_CR4_OSFXSR);
2777 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_CR4_OSFXSR);
2778 off = iemNativeEmitJccTbExitEx(pReNative, pCodeBuf, off, kIemNativeLabelType_RaiseSseRelated, kIemNativeInstrCond_ne);
2779
2780#elif defined(RT_ARCH_ARM64)
2781 /*
2782 * We do a modified test here:
2783 * if (!((cr0 & (X86_CR0_EM | X86_CR0_TS)) | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1) ^ 1))) { likely }
2784 * else { goto RaiseSseRelated; }
2785 */
2786 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+5);
2787 //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
2788 Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - X86_CR0_EM_BIT) == (X86_CR0_EM | X86_CR0_TS));
2789 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxCr0Reg, 1, 32 - X86_CR0_EM_BIT, false /*f64Bit*/);
2790 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSFXSR_BIT, 1, false /*f64Bit*/);
2791 /* -> idxTmpReg[0]=OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
2792 Assert(Armv8A64ConvertImmRImmS2Mask32(0, 0) == 1);
2793 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 0, 0, false /*f64Bit*/);
2794 /* -> idxTmpReg[0]=~OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
2795 off = iemNativeEmitTestIfGprIsNotZeroAndTbExitEx(pReNative, pCodeBuf, off, idxTmpReg, false /*f64Bit*/,
2796 kIemNativeLabelType_RaiseSseRelated);
2797
2798#else
2799# error "Port me!"
2800#endif
2801
2802 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2803 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2804 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2805 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
2806
2807#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2808 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE;
2809 }
2810 else
2811 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeSseXcptCheckOmitted);
2812#endif
2813
2814 return off;
2815}
2816
2817
2818#define IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT() \
2819 off = iemNativeEmitMaybeRaiseAvxRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
2820
2821/**
2822 * Emits code to check if a AVX exception (either \#UD or \#NM) should be raised.
2823 *
2824 * @returns New code buffer offset, UINT32_MAX on failure.
2825 * @param pReNative The native recompile state.
2826 * @param off The code buffer offset.
2827 * @param idxInstr The current instruction.
2828 */
2829DECL_INLINE_THROW(uint32_t)
2830iemNativeEmitMaybeRaiseAvxRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2831{
2832#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2833 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeAvxXcptCheckPotential);
2834
2835 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX))
2836 {
2837#endif
2838 /*
2839 * Make sure we don't have any outstanding guest register writes as we may
2840 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
2841 */
2842 /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
2843 off = iemNativeRegFlushPendingWrites(pReNative, off);
2844
2845#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2846 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2847#else
2848 RT_NOREF(idxInstr);
2849#endif
2850
2851 /* Allocate a temporary CR0, CR4 and XCR0 register. */
2852 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
2853 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
2854 uint8_t const idxXcr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Xcr0);
2855 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2856
2857 /*
2858 * We have the following in IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT:
2859 * if (RT_LIKELY( ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE))
2860 * | (pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE)
2861 * | (pVCpu->cpum.GstCtx.cr0 & X86_CR0_TS))
2862 * == (XSAVE_C_YMM | XSAVE_C_SSE | X86_CR4_OSXSAVE)))
2863 * { likely }
2864 * else { goto RaiseAvxRelated; }
2865 */
2866#ifdef RT_ARCH_AMD64
2867 /* if (!( ( ((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) << 2)
2868 | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1) << 1)
2869 | ((cr0 >> X86_CR0_TS_BIT) & 1) )
2870 ^ 0x1a) ) { likely }
2871 else { goto RaiseAvxRelated; } */
2872 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6+3+5+3+5+3+7+6);
2873 //pCodeBuf[off++] = 0xcc;
2874 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, XSAVE_C_YMM | XSAVE_C_SSE);
2875 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxTmpReg, idxXcr0Reg);
2876 off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off, idxCr4Reg, X86_CR4_OSXSAVE_BIT);
2877 off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
2878 /* -> idxTmpReg[0]=CR4.OSXSAVE; idxTmpReg[1]=0; idxTmpReg[2]=SSE; idxTmpReg[3]=YMM; (the rest is zero) */
2879 off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off, idxCr0Reg, X86_CR0_TS_BIT);
2880 off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
2881 /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=SSE; idxTmpReg[4]=YMM; */
2882 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg, ((XSAVE_C_YMM | XSAVE_C_SSE) << 2) | 2);
2883 /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=~SSE; idxTmpReg[4]=~YMM; */
2884 off = iemNativeEmitJccTbExitEx(pReNative, pCodeBuf, off, kIemNativeLabelType_RaiseAvxRelated, kIemNativeInstrCond_ne);
2885
2886#elif defined(RT_ARCH_ARM64)
2887 /* if (!( (((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) | ((cr4 >> X86_CR4_OSFXSR_BIT) & 1)) ^ 7) << 1)
2888 | ((cr0 >> X86_CR0_TS_BIT) & 1) ) { likely }
2889 else { goto RaiseAvxRelated; } */
2890 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6);
2891 //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
2892 Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - XSAVE_C_SSE_BIT) == (XSAVE_C_YMM | XSAVE_C_SSE));
2893 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxXcr0Reg, 1, 32 - XSAVE_C_SSE_BIT, false /*f64Bit*/);
2894 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSXSAVE_BIT, 1, false /*f64Bit*/);
2895 /* -> idxTmpReg[0]=CR4.OSXSAVE; idxTmpReg[1]=SSE; idxTmpReg[2]=YMM; (the rest is zero) */
2896 Assert(Armv8A64ConvertImmRImmS2Mask32(2, 0) == 7);
2897 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 2, 0, false /*f64Bit*/);
2898 /* -> idxTmpReg[0]=~CR4.OSXSAVE; idxTmpReg[1]=~SSE; idxTmpReg[2]=~YMM; (the rest is zero) */
2899 pCodeBuf[off++] = Armv8A64MkInstrLslImm(idxTmpReg, idxTmpReg, 1, false /*f64Bit*/);
2900 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr0Reg, X86_CR0_TS_BIT, 1, false /*f64Bit*/);
2901 /* -> idxTmpReg[0]=CR0.TS; idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=~SSE; idxTmpReg[3]=~YMM; (the rest is zero) */
2902 off = iemNativeEmitTestIfGprIsNotZeroAndTbExitEx(pReNative, pCodeBuf, off, idxTmpReg, false /*f64Bit*/,
2903 kIemNativeLabelType_RaiseAvxRelated);
2904
2905#else
2906# error "Port me!"
2907#endif
2908
2909 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2910 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2911 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
2912 iemNativeRegFreeTmp(pReNative, idxXcr0Reg);
2913#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2914 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
2915 }
2916 else
2917 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeAvxXcptCheckOmitted);
2918#endif
2919
2920 return off;
2921}
2922
2923
2924#define IEM_MC_RAISE_DIVIDE_ERROR() \
2925 off = iemNativeEmitRaiseDivideError(pReNative, off, pCallEntry->idxInstr)
2926
2927/**
2928 * Emits code to raise a \#DE.
2929 *
2930 * @returns New code buffer offset, UINT32_MAX on failure.
2931 * @param pReNative The native recompile state.
2932 * @param off The code buffer offset.
2933 * @param idxInstr The current instruction.
2934 */
2935DECL_INLINE_THROW(uint32_t)
2936iemNativeEmitRaiseDivideError(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2937{
2938 /*
2939 * Make sure we don't have any outstanding guest register writes as we may
2940 */
2941 off = iemNativeRegFlushPendingWrites(pReNative, off);
2942
2943#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2944 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2945#else
2946 RT_NOREF(idxInstr);
2947#endif
2948
2949 /* raise \#DE exception unconditionally. */
2950 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_RaiseDe);
2951}
2952
2953
2954#define IEM_MC_RAISE_GP0_IF_EFF_ADDR_UNALIGNED(a_EffAddr, a_cbAlign) \
2955 off = iemNativeEmitRaiseGp0IfEffAddrUnaligned(pReNative, off, pCallEntry->idxInstr, a_EffAddr, a_cbAlign)
2956
2957/**
2958 * Emits code to raise a \#GP(0) if the given variable contains an unaligned address.
2959 *
2960 * @returns New code buffer offset, UINT32_MAX on failure.
2961 * @param pReNative The native recompile state.
2962 * @param off The code buffer offset.
2963 * @param idxInstr The current instruction.
2964 * @param idxVarEffAddr Index of the variable containing the effective address to check.
2965 * @param cbAlign The alignment in bytes to check against.
2966 */
2967DECL_INLINE_THROW(uint32_t)
2968iemNativeEmitRaiseGp0IfEffAddrUnaligned(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr,
2969 uint8_t idxVarEffAddr, uint8_t cbAlign)
2970{
2971 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEffAddr);
2972 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEffAddr, sizeof(RTGCPTR));
2973
2974 /*
2975 * Make sure we don't have any outstanding guest register writes as we may throw an exception.
2976 */
2977 off = iemNativeRegFlushPendingWrites(pReNative, off);
2978
2979#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2980 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2981#else
2982 RT_NOREF(idxInstr);
2983#endif
2984
2985 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarEffAddr, &off);
2986
2987 off = iemNativeEmitTestAnyBitsInGprAndTbExitIfAnySet(pReNative, off, idxVarReg, cbAlign - 1,
2988 kIemNativeLabelType_RaiseGp0);
2989
2990 iemNativeVarRegisterRelease(pReNative, idxVarEffAddr);
2991 return off;
2992}
2993
2994
2995/*********************************************************************************************************************************
2996* Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF) *
2997*********************************************************************************************************************************/
2998
2999/**
3000 * Pushes an IEM_MC_IF_XXX onto the condition stack.
3001 *
3002 * @returns Pointer to the condition stack entry on success, NULL on failure
3003 * (too many nestings)
3004 */
3005DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative)
3006{
3007 uint32_t const idxStack = pReNative->cCondDepth;
3008 AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
3009
3010 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
3011 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
3012
3013 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
3014 pEntry->fInElse = false;
3015 pEntry->fIfExitTb = false;
3016 pEntry->fElseExitTb = false;
3017 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
3018 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
3019
3020 return pEntry;
3021}
3022
3023
3024/**
3025 * Start of the if-block, snapshotting the register and variable state.
3026 */
3027DECL_INLINE_THROW(void)
3028iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
3029{
3030 Assert(offIfBlock != UINT32_MAX);
3031 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
3032 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
3033 Assert(!pEntry->fInElse);
3034
3035 /* Define the start of the IF block if request or for disassembly purposes. */
3036 if (idxLabelIf != UINT32_MAX)
3037 iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
3038#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3039 else
3040 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
3041#else
3042 RT_NOREF(offIfBlock);
3043#endif
3044
3045 /* Copy the initial state so we can restore it in the 'else' block. */
3046 pEntry->InitialState = pReNative->Core;
3047}
3048
3049
3050#define IEM_MC_ELSE() } while (0); \
3051 off = iemNativeEmitElse(pReNative, off); \
3052 do {
3053
3054/** Emits code related to IEM_MC_ELSE. */
3055DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3056{
3057 /* Check sanity and get the conditional stack entry. */
3058 Assert(off != UINT32_MAX);
3059 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
3060 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
3061 Assert(!pEntry->fInElse);
3062
3063 /* We can skip dirty register flushing and the dirty register flushing if
3064 the branch already jumped to a TB exit. */
3065 if (!pEntry->fIfExitTb)
3066 {
3067#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK) && 0
3068 /* Writeback any dirty shadow registers. */
3069 /** @todo r=aeichner Possible optimization is to only writeback guest registers which became dirty
3070 * in one of the branches and leave guest registers already dirty before the start of the if
3071 * block alone. */
3072 off = iemNativeRegFlushDirtyGuest(pReNative, off);
3073#endif
3074
3075 /* Jump to the endif. */
3076 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
3077 }
3078# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3079 else
3080 Assert(pReNative->Core.offPc == 0);
3081# endif
3082
3083 /* Define the else label and enter the else part of the condition. */
3084 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
3085 pEntry->fInElse = true;
3086
3087 /* Snapshot the core state so we can do a merge at the endif and restore
3088 the snapshot we took at the start of the if-block. */
3089 pEntry->IfFinalState = pReNative->Core;
3090 pReNative->Core = pEntry->InitialState;
3091
3092 return off;
3093}
3094
3095
3096#define IEM_MC_ENDIF() } while (0); \
3097 off = iemNativeEmitEndIf(pReNative, off)
3098
3099/** Emits code related to IEM_MC_ENDIF. */
3100DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3101{
3102 /* Check sanity and get the conditional stack entry. */
3103 Assert(off != UINT32_MAX);
3104 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
3105 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
3106
3107#if defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) && 0
3108 off = iemNativeRegFlushDirtyGuest(pReNative, off);
3109#endif
3110
3111 /*
3112 * If either of the branches exited the TB, we can take the state from the
3113 * other branch and skip all the merging headache.
3114 */
3115 bool fDefinedLabels = false;
3116 if (pEntry->fElseExitTb || pEntry->fIfExitTb)
3117 {
3118#ifdef VBOX_STRICT
3119 Assert(pReNative->cCondDepth == 1); /* Assuming this only happens in simple conditional structures. */
3120 Assert(pEntry->fElseExitTb != pEntry->fIfExitTb); /* Assuming we don't have any code where both branches exits. */
3121 PCIEMNATIVECORESTATE const pExitCoreState = pEntry->fIfExitTb && pEntry->fInElse
3122 ? &pEntry->IfFinalState : &pReNative->Core;
3123# ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3124 Assert(pExitCoreState->bmGstRegShadowDirty == 0);
3125# endif
3126# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3127 Assert(pExitCoreState->offPc == 0);
3128# endif
3129 RT_NOREF(pExitCoreState);
3130#endif
3131
3132 if (!pEntry->fIfExitTb)
3133 {
3134 Assert(pEntry->fInElse);
3135 pReNative->Core = pEntry->IfFinalState;
3136 }
3137 }
3138 else
3139 {
3140 /*
3141 * Now we have find common group with the core state at the end of the
3142 * if-final. Use the smallest common denominator and just drop anything
3143 * that isn't the same in both states.
3144 */
3145 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
3146 * which is why we're doing this at the end of the else-block.
3147 * But we'd need more info about future for that to be worth the effort. */
3148 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
3149#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3150 AssertMsgStmt(pReNative->Core.offPc == pOther->offPc,
3151 ("Core.offPc=%#RX64 pOther->offPc=%#RX64\n", pReNative->Core.offPc, pOther->offPc),
3152 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
3153#endif
3154
3155 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
3156 {
3157#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3158 /*
3159 * If the branch has differences in dirty shadow registers, we will flush
3160 * the register only dirty in the current branch and dirty any that's only
3161 * dirty in the other one.
3162 */
3163 uint64_t const fGstRegDirtyOther = pOther->bmGstRegShadowDirty;
3164 uint64_t const fGstRegDirtyThis = pReNative->Core.bmGstRegShadowDirty;
3165 uint64_t const fGstRegDirtyDiff = fGstRegDirtyOther ^ fGstRegDirtyThis;
3166 uint64_t const fGstRegDirtyHead = fGstRegDirtyThis & fGstRegDirtyDiff;
3167 uint64_t fGstRegDirtyTail = fGstRegDirtyOther & fGstRegDirtyDiff;
3168 if (!fGstRegDirtyDiff)
3169 { /* likely */ }
3170 else
3171 {
3172 //uint64_t const fGstRegDirtyHead = pReNative->Core.bmGstRegShadowDirty & fGstRegDirtyDiff;
3173 if (fGstRegDirtyHead)
3174 {
3175 Log12(("iemNativeEmitEndIf: flushing dirty guest registers in current branch: %RX64\n", fGstRegDirtyHead));
3176 off = iemNativeRegFlushDirtyGuest(pReNative, off, fGstRegDirtyHead);
3177 }
3178 }
3179#endif
3180
3181 /*
3182 * Shadowed guest registers.
3183 *
3184 * We drop any shadows where the two states disagree about where
3185 * things are kept. We may end up flushing dirty more registers
3186 * here, if the two branches keeps things in different registers.
3187 */
3188 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
3189 if (fGstRegs)
3190 {
3191 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
3192 do
3193 {
3194 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
3195 fGstRegs &= ~RT_BIT_64(idxGstReg);
3196
3197 uint8_t const idxCurHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
3198 uint8_t const idxOtherHstReg = pOther->aidxGstRegShadows[idxGstReg];
3199 if ( idxCurHstReg != idxOtherHstReg
3200 || !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg)))
3201 {
3202#ifndef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3203 Log12(("iemNativeEmitEndIf: dropping gst %s (%d) from hst %s (other %d/%#RX64)\n",
3204 g_aGstShadowInfo[idxGstReg].pszName, idxGstReg, g_apszIemNativeHstRegNames[idxCurHstReg],
3205 idxOtherHstReg, pOther->bmGstRegShadows));
3206#else
3207 Log12(("iemNativeEmitEndIf: dropping %s gst %s (%d) from hst %s (other %d/%#RX64/%s)\n",
3208 pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg) ? "_dirty_" : "clean",
3209 g_aGstShadowInfo[idxGstReg].pszName, idxGstReg, g_apszIemNativeHstRegNames[idxCurHstReg],
3210 idxOtherHstReg, pOther->bmGstRegShadows,
3211 pOther->bmGstRegShadowDirty & RT_BIT_64(idxGstReg) ? "dirty" : "clean"));
3212 if (pOther->bmGstRegShadowDirty & RT_BIT_64(idxGstReg))
3213 fGstRegDirtyTail |= RT_BIT_64(idxGstReg);
3214 if (pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg))
3215 off = iemNativeRegFlushPendingWrite(pReNative, off, (IEMNATIVEGSTREG)idxGstReg);
3216#endif
3217 iemNativeRegClearGstRegShadowingOne(pReNative, idxCurHstReg, (IEMNATIVEGSTREG)idxGstReg, off);
3218 }
3219 } while (fGstRegs);
3220 }
3221 else
3222 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
3223
3224#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3225 /*
3226 * Generate jumpy code for flushing dirty registers from the other
3227 * branch that aren't dirty in the current one.
3228 */
3229 if (!fGstRegDirtyTail)
3230 { /* likely */ }
3231 else
3232 {
3233 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeEndIfOtherBranchDirty);
3234 Log12(("iemNativeEmitEndIf: Dirty register only in the other branch: %#RX64 - BAD!\n", fGstRegDirtyTail));
3235
3236 /* First the current branch has to jump over the dirty flushing from the other branch. */
3237 uint32_t const offFixup1 = off;
3238 off = iemNativeEmitJmpToFixed(pReNative, off, off + 10);
3239
3240 /* Put the endif and maybe else label here so the other branch ends up here. */
3241 if (!pEntry->fInElse)
3242 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
3243 else
3244 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
3245 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
3246 fDefinedLabels = true;
3247
3248 /* Flush the dirty guest registers from the other branch. */
3249 while (fGstRegDirtyTail)
3250 {
3251 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegDirtyTail) - 1;
3252 fGstRegDirtyTail &= ~RT_BIT_64(idxGstReg);
3253 Log12(("iemNativeEmitEndIf: tail flushing %s (%d) from other branch %d (cur %d/%#RX64)\n",
3254 g_aGstShadowInfo[idxGstReg].pszName, idxGstReg, pOther->aidxGstRegShadows[idxGstReg],
3255 pReNative->Core.aidxGstRegShadows[idxGstReg], pReNative->Core.bmGstRegShadows));
3256
3257 off = iemNativeRegFlushPendingWriteEx(pReNative, off, (PIEMNATIVECORESTATE)pOther, (IEMNATIVEGSTREG)idxGstReg);
3258
3259 /* Mismatching shadowing should've been dropped in the previous step already. */
3260 Assert( !(pReNative->Core.bmGstRegShadows & RT_BIT_64(idxGstReg))
3261 || pReNative->Core.aidxGstRegShadows[idxGstReg] == pOther->aidxGstRegShadows[idxGstReg]);
3262 }
3263
3264 /* Here is the actual endif label, fixup the above jump to land here. */
3265 iemNativeFixupFixedJump(pReNative, offFixup1, off);
3266 }
3267#endif
3268
3269 /*
3270 * Check variables next. For now we must require them to be identical
3271 * or stuff we can recreate. (No code is emitted here.)
3272 */
3273 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
3274#ifdef VBOX_STRICT
3275 uint32_t const offAssert = off;
3276#endif
3277 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
3278 if (fVars)
3279 {
3280 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
3281 do
3282 {
3283 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
3284 fVars &= ~RT_BIT_32(idxVar);
3285
3286 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
3287 {
3288 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
3289 continue;
3290 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
3291 {
3292 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
3293 if (idxHstReg != UINT8_MAX)
3294 {
3295 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3296 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3297 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x\n",
3298 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
3299 }
3300 continue;
3301 }
3302 }
3303 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
3304 continue;
3305
3306 /* Irreconcilable, so drop it. */
3307 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
3308 if (idxHstReg != UINT8_MAX)
3309 {
3310 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3311 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3312 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x (also dropped)\n",
3313 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
3314 }
3315 Log11(("iemNativeEmitEndIf: Freeing variable #%u/%#x\n", idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
3316 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
3317 } while (fVars);
3318 }
3319 Assert(off == offAssert);
3320
3321 /*
3322 * Finally, check that the host register allocations matches.
3323 */
3324 AssertMsgStmt((pReNative->Core.bmHstRegs & (pReNative->Core.bmHstRegs ^ pOther->bmHstRegs)) == 0,
3325 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
3326 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
3327 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
3328 }
3329 }
3330
3331 /*
3332 * Define the endif label and maybe the else one if we're still in the 'if' part.
3333 */
3334 if (!fDefinedLabels)
3335 {
3336 if (!pEntry->fInElse)
3337 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
3338 else
3339 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
3340 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
3341 }
3342
3343 /* Pop the conditional stack.*/
3344 pReNative->cCondDepth -= 1;
3345
3346 return off;
3347}
3348
3349
3350#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
3351 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits)); \
3352 do {
3353
3354/** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
3355DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
3356{
3357 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitsInEfl);
3358 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3359
3360 /* Get the eflags. */
3361 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
3362 kIemNativeGstRegUse_ReadOnly);
3363
3364 /* Test and jump. */
3365 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
3366
3367 /* Free but don't flush the EFlags register. */
3368 iemNativeRegFreeTmp(pReNative, idxEflReg);
3369
3370 /* Make a copy of the core state now as we start the if-block. */
3371 iemNativeCondStartIfBlock(pReNative, off);
3372
3373 return off;
3374}
3375
3376
3377#define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
3378 off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits)); \
3379 do {
3380
3381/** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
3382DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
3383{
3384 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitsInEfl);
3385 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3386
3387 /* Get the eflags. */
3388 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
3389 kIemNativeGstRegUse_ReadOnly);
3390
3391 /* Test and jump. */
3392 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
3393
3394 /* Free but don't flush the EFlags register. */
3395 iemNativeRegFreeTmp(pReNative, idxEflReg);
3396
3397 /* Make a copy of the core state now as we start the if-block. */
3398 iemNativeCondStartIfBlock(pReNative, off);
3399
3400 return off;
3401}
3402
3403
3404#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
3405 off = iemNativeEmitIfEflagsBitSet(pReNative, off, (a_fBit)); \
3406 do {
3407
3408/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
3409DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
3410{
3411 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
3412 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3413
3414 /* Get the eflags. */
3415 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
3416 kIemNativeGstRegUse_ReadOnly);
3417
3418 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
3419 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
3420
3421 /* Test and jump. */
3422 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
3423
3424 /* Free but don't flush the EFlags register. */
3425 iemNativeRegFreeTmp(pReNative, idxEflReg);
3426
3427 /* Make a copy of the core state now as we start the if-block. */
3428 iemNativeCondStartIfBlock(pReNative, off);
3429
3430 return off;
3431}
3432
3433
3434#define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
3435 off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, (a_fBit)); \
3436 do {
3437
3438/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
3439DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
3440{
3441 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
3442 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3443
3444 /* Get the eflags. */
3445 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
3446 kIemNativeGstRegUse_ReadOnly);
3447
3448 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
3449 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
3450
3451 /* Test and jump. */
3452 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
3453
3454 /* Free but don't flush the EFlags register. */
3455 iemNativeRegFreeTmp(pReNative, idxEflReg);
3456
3457 /* Make a copy of the core state now as we start the if-block. */
3458 iemNativeCondStartIfBlock(pReNative, off);
3459
3460 return off;
3461}
3462
3463
3464#define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2) \
3465 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, false /*fInverted*/); \
3466 do {
3467
3468#define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2) \
3469 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, true /*fInverted*/); \
3470 do {
3471
3472/** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
3473DECL_INLINE_THROW(uint32_t)
3474iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3475 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
3476{
3477 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBit1InEfl | fBit2InEfl);
3478 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3479
3480 /* Get the eflags. */
3481 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
3482 kIemNativeGstRegUse_ReadOnly);
3483
3484 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
3485 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
3486
3487 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
3488 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
3489 Assert(iBitNo1 != iBitNo2);
3490
3491#ifdef RT_ARCH_AMD64
3492 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl);
3493
3494 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
3495 if (iBitNo1 > iBitNo2)
3496 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
3497 else
3498 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
3499 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
3500
3501#elif defined(RT_ARCH_ARM64)
3502 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3503 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
3504
3505 /* and tmpreg, eflreg, #1<<iBitNo1 */
3506 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
3507
3508 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
3509 if (iBitNo1 > iBitNo2)
3510 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
3511 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
3512 else
3513 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
3514 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
3515
3516 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3517
3518#else
3519# error "Port me"
3520#endif
3521
3522 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
3523 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
3524 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
3525
3526 /* Free but don't flush the EFlags and tmp registers. */
3527 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3528 iemNativeRegFreeTmp(pReNative, idxEflReg);
3529
3530 /* Make a copy of the core state now as we start the if-block. */
3531 iemNativeCondStartIfBlock(pReNative, off);
3532
3533 return off;
3534}
3535
3536
3537#define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
3538 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, false /*fInverted*/); \
3539 do {
3540
3541#define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
3542 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, true /*fInverted*/); \
3543 do {
3544
3545/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
3546 * IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
3547DECL_INLINE_THROW(uint32_t)
3548iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl,
3549 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
3550{
3551 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl | fBit1InEfl | fBit2InEfl);
3552 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3553
3554 /* We need an if-block label for the non-inverted variant. */
3555 uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
3556 pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
3557
3558 /* Get the eflags. */
3559 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
3560 kIemNativeGstRegUse_ReadOnly);
3561
3562 /* Translate the flag masks to bit numbers. */
3563 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
3564 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
3565
3566 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
3567 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
3568 Assert(iBitNo1 != iBitNo);
3569
3570 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
3571 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
3572 Assert(iBitNo2 != iBitNo);
3573 Assert(iBitNo2 != iBitNo1);
3574
3575#ifdef RT_ARCH_AMD64
3576 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl); /* This must come before we jump anywhere! */
3577#elif defined(RT_ARCH_ARM64)
3578 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3579#endif
3580
3581 /* Check for the lone bit first. */
3582 if (!fInverted)
3583 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
3584 else
3585 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
3586
3587 /* Then extract and compare the other two bits. */
3588#ifdef RT_ARCH_AMD64
3589 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
3590 if (iBitNo1 > iBitNo2)
3591 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
3592 else
3593 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
3594 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
3595
3596#elif defined(RT_ARCH_ARM64)
3597 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
3598
3599 /* and tmpreg, eflreg, #1<<iBitNo1 */
3600 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
3601
3602 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
3603 if (iBitNo1 > iBitNo2)
3604 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
3605 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
3606 else
3607 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
3608 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
3609
3610 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3611
3612#else
3613# error "Port me"
3614#endif
3615
3616 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
3617 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
3618 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
3619
3620 /* Free but don't flush the EFlags and tmp registers. */
3621 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3622 iemNativeRegFreeTmp(pReNative, idxEflReg);
3623
3624 /* Make a copy of the core state now as we start the if-block. */
3625 iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
3626
3627 return off;
3628}
3629
3630
3631#define IEM_MC_IF_CX_IS_NZ() \
3632 off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
3633 do {
3634
3635/** Emits code for IEM_MC_IF_CX_IS_NZ. */
3636DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3637{
3638 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3639
3640 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3641 kIemNativeGstRegUse_ReadOnly);
3642 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
3643 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3644
3645 iemNativeCondStartIfBlock(pReNative, off);
3646 return off;
3647}
3648
3649
3650#define IEM_MC_IF_ECX_IS_NZ() \
3651 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
3652 do {
3653
3654#define IEM_MC_IF_RCX_IS_NZ() \
3655 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
3656 do {
3657
3658/** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
3659DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
3660{
3661 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3662
3663 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3664 kIemNativeGstRegUse_ReadOnly);
3665 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
3666 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3667
3668 iemNativeCondStartIfBlock(pReNative, off);
3669 return off;
3670}
3671
3672
3673#define IEM_MC_IF_CX_IS_NOT_ONE() \
3674 off = iemNativeEmitIfCxIsNotOne(pReNative, off); \
3675 do {
3676
3677/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE. */
3678DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3679{
3680 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3681
3682 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3683 kIemNativeGstRegUse_ReadOnly);
3684#ifdef RT_ARCH_AMD64
3685 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3686#else
3687 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3688 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
3689 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3690#endif
3691 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3692
3693 iemNativeCondStartIfBlock(pReNative, off);
3694 return off;
3695}
3696
3697
3698#define IEM_MC_IF_ECX_IS_NOT_ONE() \
3699 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, false /*f64Bit*/); \
3700 do {
3701
3702#define IEM_MC_IF_RCX_IS_NOT_ONE() \
3703 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, true /*f64Bit*/); \
3704 do {
3705
3706/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE and IEM_MC_IF_RCX_IS_NOT_ONE. */
3707DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
3708{
3709 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3710
3711 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3712 kIemNativeGstRegUse_ReadOnly);
3713 if (f64Bit)
3714 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3715 else
3716 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3717 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3718
3719 iemNativeCondStartIfBlock(pReNative, off);
3720 return off;
3721}
3722
3723
3724#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
3725 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/); \
3726 do {
3727
3728#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3729 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/); \
3730 do {
3731
3732/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET and
3733 * IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
3734DECL_INLINE_THROW(uint32_t)
3735iemNativeEmitIfCxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl, bool fCheckIfSet)
3736{
3737 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
3738 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3739
3740 /* We have to load both RCX and EFLAGS before we can start branching,
3741 otherwise we'll end up in the else-block with an inconsistent
3742 register allocator state.
3743 Doing EFLAGS first as it's more likely to be loaded, right? */
3744 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
3745 kIemNativeGstRegUse_ReadOnly);
3746 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3747 kIemNativeGstRegUse_ReadOnly);
3748
3749 /** @todo we could reduce this to a single branch instruction by spending a
3750 * temporary register and some setnz stuff. Not sure if loops are
3751 * worth it. */
3752 /* Check CX. */
3753#ifdef RT_ARCH_AMD64
3754 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3755#else
3756 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3757 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
3758 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3759#endif
3760
3761 /* Check the EFlags bit. */
3762 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
3763 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
3764 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
3765 !fCheckIfSet /*fJmpIfSet*/);
3766
3767 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3768 iemNativeRegFreeTmp(pReNative, idxEflReg);
3769
3770 iemNativeCondStartIfBlock(pReNative, off);
3771 return off;
3772}
3773
3774
3775#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
3776 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, false /*f64Bit*/); \
3777 do {
3778
3779#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3780 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, false /*f64Bit*/); \
3781 do {
3782
3783#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
3784 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, true /*f64Bit*/); \
3785 do {
3786
3787#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3788 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, true /*f64Bit*/); \
3789 do {
3790
3791/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET,
3792 * IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET,
3793 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET and
3794 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
3795DECL_INLINE_THROW(uint32_t)
3796iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3797 uint32_t fBitInEfl, bool fCheckIfSet, bool f64Bit)
3798{
3799 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
3800 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3801
3802 /* We have to load both RCX and EFLAGS before we can start branching,
3803 otherwise we'll end up in the else-block with an inconsistent
3804 register allocator state.
3805 Doing EFLAGS first as it's more likely to be loaded, right? */
3806 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
3807 kIemNativeGstRegUse_ReadOnly);
3808 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3809 kIemNativeGstRegUse_ReadOnly);
3810
3811 /** @todo we could reduce this to a single branch instruction by spending a
3812 * temporary register and some setnz stuff. Not sure if loops are
3813 * worth it. */
3814 /* Check RCX/ECX. */
3815 if (f64Bit)
3816 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3817 else
3818 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3819
3820 /* Check the EFlags bit. */
3821 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
3822 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
3823 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
3824 !fCheckIfSet /*fJmpIfSet*/);
3825
3826 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3827 iemNativeRegFreeTmp(pReNative, idxEflReg);
3828
3829 iemNativeCondStartIfBlock(pReNative, off);
3830 return off;
3831}
3832
3833
3834#define IEM_MC_IF_LOCAL_IS_Z(a_Local) \
3835 off = iemNativeEmitIfLocalIsZ(pReNative, off, a_Local); \
3836 do {
3837
3838/** Emits code for IEM_MC_IF_LOCAL_IS_Z. */
3839DECL_INLINE_THROW(uint32_t)
3840iemNativeEmitIfLocalIsZ(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarLocal)
3841{
3842 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3843
3844 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarLocal);
3845 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarLocal)];
3846 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
3847 AssertStmt(pVarRc->cbVar == sizeof(int32_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
3848
3849 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarLocal, &off);
3850
3851 off = iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(pReNative, off, idxReg, false /*f64Bit*/, pEntry->idxLabelElse);
3852
3853 iemNativeVarRegisterRelease(pReNative, idxVarLocal);
3854
3855 iemNativeCondStartIfBlock(pReNative, off);
3856 return off;
3857}
3858
3859
3860#define IEM_MC_IF_GREG_BIT_SET(a_iGReg, a_iBitNo) \
3861 off = iemNativeEmitIfGregBitSet(pReNative, off, a_iGReg, a_iBitNo); \
3862 do {
3863
3864/** Emits code for IEM_MC_IF_GREG_BIT_SET. */
3865DECL_INLINE_THROW(uint32_t)
3866iemNativeEmitIfGregBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t iBitNo)
3867{
3868 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3869 Assert(iGReg < 16);
3870
3871 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3872 kIemNativeGstRegUse_ReadOnly);
3873
3874 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxGstFullReg, iBitNo, pEntry->idxLabelElse);
3875
3876 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3877
3878 iemNativeCondStartIfBlock(pReNative, off);
3879 return off;
3880}
3881
3882
3883
3884/*********************************************************************************************************************************
3885* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
3886*********************************************************************************************************************************/
3887
3888#define IEM_MC_NOREF(a_Name) \
3889 RT_NOREF_PV(a_Name)
3890
3891#define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
3892 uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
3893
3894#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
3895 uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
3896
3897#define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_Local, a_iArg) \
3898 uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
3899
3900#define IEM_MC_LOCAL(a_Type, a_Name) \
3901 uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
3902
3903#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
3904 uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
3905
3906#define IEM_MC_LOCAL_ASSIGN(a_Type, a_Name, a_Value) \
3907 uint8_t const a_Name = iemNativeVarAllocAssign(pReNative, &off, sizeof(a_Type), (a_Value))
3908
3909
3910/**
3911 * Sets the host register for @a idxVarRc to @a idxReg.
3912 *
3913 * Any guest register shadowing will be implictly dropped by this call.
3914 *
3915 * The variable must not have any register associated with it (causes
3916 * VERR_IEM_VAR_IPE_10 to be raised). Conversion to a stack variable is
3917 * implied.
3918 *
3919 * @returns idxReg
3920 * @param pReNative The recompiler state.
3921 * @param idxVar The variable.
3922 * @param idxReg The host register (typically IEMNATIVE_CALL_RET_GREG).
3923 * @param off For recording in debug info.
3924 * @param fAllocated Set if the register is already allocated, false if not.
3925 *
3926 * @throws VERR_IEM_VAR_IPE_10, VERR_IEM_VAR_IPE_11
3927 */
3928DECL_INLINE_THROW(uint8_t)
3929iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off, bool fAllocated)
3930{
3931 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3932 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3933 Assert(!pVar->fRegAcquired);
3934 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3935 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_10));
3936 AssertStmt(RT_BOOL(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)) == fAllocated,
3937 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_11));
3938
3939 iemNativeRegClearGstRegShadowing(pReNative, idxReg, off);
3940 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
3941
3942 iemNativeVarSetKindToStack(pReNative, idxVar);
3943 pVar->idxReg = idxReg;
3944
3945 return idxReg;
3946}
3947
3948
3949/**
3950 * A convenient helper function.
3951 */
3952DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSetAndAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
3953 uint8_t idxReg, uint32_t *poff)
3954{
3955 idxReg = iemNativeVarRegisterSet(pReNative, idxVar, idxReg, *poff, false /*fAllocated*/);
3956 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fRegAcquired = true;
3957 return idxReg;
3958}
3959
3960
3961/**
3962 * This is called by IEM_MC_END() to clean up all variables.
3963 */
3964DECL_FORCE_INLINE(void) iemNativeVarFreeAll(PIEMRECOMPILERSTATE pReNative)
3965{
3966 uint32_t const bmVars = pReNative->Core.bmVars;
3967 if (bmVars != 0)
3968 iemNativeVarFreeAllSlow(pReNative, bmVars);
3969 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
3970 Assert(pReNative->Core.bmStack == 0);
3971}
3972
3973
3974#define IEM_MC_FREE_LOCAL(a_Name) iemNativeVarFreeLocal(pReNative, a_Name)
3975
3976/**
3977 * This is called by IEM_MC_FREE_LOCAL.
3978 */
3979DECLINLINE(void) iemNativeVarFreeLocal(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
3980{
3981 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3982 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo == UINT8_MAX);
3983 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
3984}
3985
3986
3987#define IEM_MC_FREE_ARG(a_Name) iemNativeVarFreeArg(pReNative, a_Name)
3988
3989/**
3990 * This is called by IEM_MC_FREE_ARG.
3991 */
3992DECLINLINE(void) iemNativeVarFreeArg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
3993{
3994 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3995 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars));
3996 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
3997}
3998
3999
4000#define IEM_MC_ASSIGN_TO_SMALLER(a_VarDst, a_VarSrcEol) off = iemNativeVarAssignToSmaller(pReNative, off, a_VarDst, a_VarSrcEol)
4001
4002/**
4003 * This is called by IEM_MC_ASSIGN_TO_SMALLER.
4004 */
4005DECL_INLINE_THROW(uint32_t)
4006iemNativeVarAssignToSmaller(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarDst, uint8_t idxVarSrc)
4007{
4008 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarDst);
4009 PIEMNATIVEVAR const pVarDst = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarDst)];
4010 AssertStmt(pVarDst->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4011 Assert( pVarDst->cbVar == sizeof(uint16_t)
4012 || pVarDst->cbVar == sizeof(uint32_t));
4013
4014 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarSrc);
4015 PIEMNATIVEVAR const pVarSrc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarSrc)];
4016 AssertStmt( pVarSrc->enmKind == kIemNativeVarKind_Stack
4017 || pVarSrc->enmKind == kIemNativeVarKind_Immediate,
4018 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4019
4020 Assert(pVarDst->cbVar < pVarSrc->cbVar);
4021
4022 /*
4023 * Special case for immediates.
4024 */
4025 if (pVarSrc->enmKind == kIemNativeVarKind_Immediate)
4026 {
4027 switch (pVarDst->cbVar)
4028 {
4029 case sizeof(uint16_t):
4030 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint16_t)pVarSrc->u.uValue);
4031 break;
4032 case sizeof(uint32_t):
4033 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint32_t)pVarSrc->u.uValue);
4034 break;
4035 default: AssertFailed(); break;
4036 }
4037 }
4038 else
4039 {
4040 /*
4041 * The generic solution for now.
4042 */
4043 /** @todo optimize this by having the python script make sure the source
4044 * variable passed to IEM_MC_ASSIGN_TO_SMALLER is not used after the
4045 * statement. Then we could just transfer the register assignments. */
4046 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off);
4047 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off);
4048 switch (pVarDst->cbVar)
4049 {
4050 case sizeof(uint16_t):
4051 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegDst, idxRegSrc);
4052 break;
4053 case sizeof(uint32_t):
4054 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegDst, idxRegSrc);
4055 break;
4056 default: AssertFailed(); break;
4057 }
4058 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
4059 iemNativeVarRegisterRelease(pReNative, idxVarDst);
4060 }
4061 return off;
4062}
4063
4064
4065
4066/*********************************************************************************************************************************
4067* Emitters for IEM_MC_CALL_CIMPL_XXX *
4068*********************************************************************************************************************************/
4069
4070/** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
4071DECL_INLINE_THROW(uint32_t)
4072iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
4073 uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cArgs)
4074
4075{
4076 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
4077
4078#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4079 /* Clear the appropriate IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_XXX flags
4080 when a calls clobber any of the relevant control registers. */
4081# if 1
4082 if (!(fGstShwFlush & (RT_BIT_64(kIemNativeGstReg_Cr0) | RT_BIT_64(kIemNativeGstReg_Cr4) | RT_BIT_64(kIemNativeGstReg_Xcr0))))
4083 {
4084 /* Likely as long as call+ret are done via cimpl. */
4085 Assert( /*pfnCImpl != (uintptr_t)iemCImpl_mov_Cd_Rd && pfnCImpl != (uintptr_t)iemCImpl_xsetbv
4086 &&*/ pfnCImpl != (uintptr_t)iemCImpl_lmsw && pfnCImpl != (uintptr_t)iemCImpl_clts);
4087 }
4088 else if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_Xcr0))
4089 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
4090 else if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_Cr4))
4091 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
4092 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE);
4093 else
4094 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
4095 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
4096 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
4097
4098# else
4099 if (pfnCImpl == (uintptr_t)iemCImpl_xsetbv) /* Modifies xcr0 which only the AVX check uses. */
4100 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
4101 else if (pfnCImpl == (uintptr_t)iemCImpl_mov_Cd_Rd) /* Can modify cr4 which all checks use. */
4102 pReNative->fSimdRaiseXcptChecksEmitted = 0;
4103 else if ( pfnCImpl == (uintptr_t)iemCImpl_FarJmp
4104 || pfnCImpl == (uintptr_t)iemCImpl_callf
4105 || pfnCImpl == (uintptr_t)iemCImpl_lmsw
4106 || pfnCImpl == (uintptr_t)iemCImpl_clts) /* Will only modify cr0 */
4107 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
4108 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
4109 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
4110# endif
4111
4112# ifdef IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS
4113 /* Mark the host floating point control register as not synced if MXCSR is modified. */
4114 if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_MxCsr))
4115 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SYNCED;
4116# endif
4117#endif
4118
4119 /*
4120 * Do all the call setup and cleanup.
4121 */
4122 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
4123
4124 /*
4125 * Load the two or three hidden arguments.
4126 */
4127#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
4128 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
4129 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
4130 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
4131#else
4132 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
4133 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
4134#endif
4135
4136 /*
4137 * Make the call and check the return code.
4138 *
4139 * Shadow PC copies are always flushed here, other stuff depends on flags.
4140 * Segment and general purpose registers are explictily flushed via the
4141 * IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG and IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG
4142 * macros.
4143 */
4144 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
4145#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
4146 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
4147#endif
4148 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, fGstShwFlush | RT_BIT_64(kIemNativeGstReg_Pc));
4149 if (!(pReNative->fMc & IEM_MC_F_WITHOUT_FLAGS)) /** @todo We don't emit with-flags/without-flags variations for CIMPL calls. */
4150 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
4151 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
4152
4153#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
4154 pReNative->Core.fDebugPcInitialized = false;
4155 Log4(("fDebugPcInitialized=false cimpl off=%#x (v1)\n", off));
4156#endif
4157
4158 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
4159}
4160
4161
4162#define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
4163 off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0)
4164
4165/** Emits code for IEM_MC_CALL_CIMPL_1. */
4166DECL_INLINE_THROW(uint32_t)
4167iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
4168 uintptr_t pfnCImpl, uint8_t idxArg0)
4169{
4170 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
4171 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 1);
4172}
4173
4174
4175#define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
4176 off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0, a1)
4177
4178/** Emits code for IEM_MC_CALL_CIMPL_2. */
4179DECL_INLINE_THROW(uint32_t)
4180iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
4181 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
4182{
4183 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
4184 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
4185 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 2);
4186}
4187
4188
4189#define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
4190 off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
4191 (uintptr_t)a_pfnCImpl, a0, a1, a2)
4192
4193/** Emits code for IEM_MC_CALL_CIMPL_3. */
4194DECL_INLINE_THROW(uint32_t)
4195iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
4196 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
4197{
4198 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
4199 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
4200 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
4201 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 3);
4202}
4203
4204
4205#define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3) \
4206 off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
4207 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
4208
4209/** Emits code for IEM_MC_CALL_CIMPL_4. */
4210DECL_INLINE_THROW(uint32_t)
4211iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
4212 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
4213{
4214 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
4215 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
4216 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
4217 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
4218 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 4);
4219}
4220
4221
4222#define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3, a4) \
4223 off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
4224 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
4225
4226/** Emits code for IEM_MC_CALL_CIMPL_4. */
4227DECL_INLINE_THROW(uint32_t)
4228iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
4229 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
4230{
4231 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
4232 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
4233 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
4234 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
4235 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg4, 4 + IEM_CIMPL_HIDDEN_ARGS);
4236 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 5);
4237}
4238
4239
4240/** Recompiler debugging: Flush guest register shadow copies. */
4241#define IEM_MC_HINT_FLUSH_GUEST_SHADOW(g_fGstShwFlush) iemNativeRegFlushGuestShadows(pReNative, g_fGstShwFlush)
4242
4243
4244
4245/*********************************************************************************************************************************
4246* Emitters for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX *
4247*********************************************************************************************************************************/
4248
4249/**
4250 * Common worker for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX.
4251 */
4252DECL_INLINE_THROW(uint32_t)
4253iemNativeEmitCallAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
4254 uintptr_t pfnAImpl, uint8_t cArgs)
4255{
4256 if (idxVarRc != UINT8_MAX)
4257 {
4258 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRc);
4259 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarRc)];
4260 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
4261 AssertStmt(pVarRc->cbVar <= sizeof(uint64_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
4262 }
4263
4264 /*
4265 * Do all the call setup and cleanup.
4266 *
4267 * It is only required to flush pending guest register writes in call volatile registers as
4268 * assembly helpers can't throw and don't access anything living in CPUMCTX, they only
4269 * access parameters. The flushing of call volatile registers is always done in iemNativeEmitCallCommon()
4270 * no matter the fFlushPendingWrites parameter.
4271 */
4272 off = iemNativeEmitCallCommon(pReNative, off, cArgs, 0 /*cHiddenArgs*/, false /*fFlushPendingWrites*/);
4273
4274 /*
4275 * Make the call and update the return code variable if we've got one.
4276 */
4277 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
4278 if (idxVarRc != UINT8_MAX)
4279 iemNativeVarRegisterSet(pReNative, idxVarRc, IEMNATIVE_CALL_RET_GREG, off, false /*fAllocated*/);
4280
4281 return off;
4282}
4283
4284
4285
4286#define IEM_MC_CALL_VOID_AIMPL_0(a_pfn) \
4287 off = iemNativeEmitCallAImpl0(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn))
4288
4289#define IEM_MC_CALL_AIMPL_0(a_rc, a_pfn) \
4290 off = iemNativeEmitCallAImpl0(pReNative, off, a_rc, (uintptr_t)(a_pfn))
4291
4292/** Emits code for IEM_MC_CALL_VOID_AIMPL_0 and IEM_MC_CALL_AIMPL_0. */
4293DECL_INLINE_THROW(uint32_t)
4294iemNativeEmitCallAImpl0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl)
4295{
4296 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 0);
4297}
4298
4299
4300#define IEM_MC_CALL_VOID_AIMPL_1(a_pfn, a0) \
4301 off = iemNativeEmitCallAImpl1(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0)
4302
4303#define IEM_MC_CALL_AIMPL_1(a_rc, a_pfn, a0) \
4304 off = iemNativeEmitCallAImpl1(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0)
4305
4306/** Emits code for IEM_MC_CALL_VOID_AIMPL_1 and IEM_MC_CALL_AIMPL_1. */
4307DECL_INLINE_THROW(uint32_t)
4308iemNativeEmitCallAImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl, uint8_t idxArg0)
4309{
4310 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
4311 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 1);
4312}
4313
4314
4315#define IEM_MC_CALL_VOID_AIMPL_2(a_pfn, a0, a1) \
4316 off = iemNativeEmitCallAImpl2(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1)
4317
4318#define IEM_MC_CALL_AIMPL_2(a_rc, a_pfn, a0, a1) \
4319 off = iemNativeEmitCallAImpl2(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1)
4320
4321/** Emits code for IEM_MC_CALL_VOID_AIMPL_2 and IEM_MC_CALL_AIMPL_2. */
4322DECL_INLINE_THROW(uint32_t)
4323iemNativeEmitCallAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
4324 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
4325{
4326 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
4327 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
4328 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 2);
4329}
4330
4331
4332#define IEM_MC_CALL_VOID_AIMPL_3(a_pfn, a0, a1, a2) \
4333 off = iemNativeEmitCallAImpl3(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2)
4334
4335#define IEM_MC_CALL_AIMPL_3(a_rcType, a_rc, a_pfn, a0, a1, a2) \
4336 IEM_MC_LOCAL(a_rcType, a_rc); \
4337 off = iemNativeEmitCallAImpl3(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2)
4338
4339/** Emits code for IEM_MC_CALL_VOID_AIMPL_3 and IEM_MC_CALL_AIMPL_3. */
4340DECL_INLINE_THROW(uint32_t)
4341iemNativeEmitCallAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
4342 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
4343{
4344 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
4345 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
4346 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
4347 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 3);
4348}
4349
4350
4351#define IEM_MC_CALL_VOID_AIMPL_4(a_pfn, a0, a1, a2, a3) \
4352 off = iemNativeEmitCallAImpl4(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2, a3)
4353
4354#define IEM_MC_CALL_AIMPL_4(a_rcType, a_rc, a_pfn, a0, a1, a2, a3) \
4355 IEM_MC_LOCAL(a_rcType, a_rc); \
4356 off = iemNativeEmitCallAImpl4(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2, a3)
4357
4358/** Emits code for IEM_MC_CALL_VOID_AIMPL_4 and IEM_MC_CALL_AIMPL_4. */
4359DECL_INLINE_THROW(uint32_t)
4360iemNativeEmitCallAImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
4361 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
4362{
4363 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
4364 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
4365 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
4366 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3);
4367 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 4);
4368}
4369
4370
4371
4372/*********************************************************************************************************************************
4373* Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX). *
4374*********************************************************************************************************************************/
4375
4376#define IEM_MC_FETCH_GREG_U8_THREADED(a_u8Dst, a_iGRegEx) \
4377 off = iemNativeEmitFetchGregU8(pReNative, off, a_u8Dst, a_iGRegEx, sizeof(uint8_t) /*cbZeroExtended*/)
4378
4379#define IEM_MC_FETCH_GREG_U8_ZX_U16_THREADED(a_u16Dst, a_iGRegEx) \
4380 off = iemNativeEmitFetchGregU8(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t) /*cbZeroExtended*/)
4381
4382#define IEM_MC_FETCH_GREG_U8_ZX_U32_THREADED(a_u32Dst, a_iGRegEx) \
4383 off = iemNativeEmitFetchGregU8(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t) /*cbZeroExtended*/)
4384
4385#define IEM_MC_FETCH_GREG_U8_ZX_U64_THREADED(a_u64Dst, a_iGRegEx) \
4386 off = iemNativeEmitFetchGregU8(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t) /*cbZeroExtended*/)
4387
4388
4389/** Emits code for IEM_MC_FETCH_GREG_U8_THREADED and
4390 * IEM_MC_FETCH_GREG_U8_ZX_U16/32/64_THREADED. */
4391DECL_INLINE_THROW(uint32_t)
4392iemNativeEmitFetchGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, int8_t cbZeroExtended)
4393{
4394 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4395 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
4396 Assert(iGRegEx < 20);
4397
4398 /* Same discussion as in iemNativeEmitFetchGregU16 */
4399 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
4400 kIemNativeGstRegUse_ReadOnly);
4401
4402 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4403 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4404
4405 /* The value is zero-extended to the full 64-bit host register width. */
4406 if (iGRegEx < 16)
4407 off = iemNativeEmitLoadGprFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
4408 else
4409 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
4410
4411 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4412 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4413 return off;
4414}
4415
4416
4417#define IEM_MC_FETCH_GREG_U8_SX_U16_THREADED(a_u16Dst, a_iGRegEx) \
4418 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t))
4419
4420#define IEM_MC_FETCH_GREG_U8_SX_U32_THREADED(a_u32Dst, a_iGRegEx) \
4421 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t))
4422
4423#define IEM_MC_FETCH_GREG_U8_SX_U64_THREADED(a_u64Dst, a_iGRegEx) \
4424 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t))
4425
4426/** Emits code for IEM_MC_FETCH_GREG_U8_SX_U16/32/64_THREADED. */
4427DECL_INLINE_THROW(uint32_t)
4428iemNativeEmitFetchGregU8Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, uint8_t cbSignExtended)
4429{
4430 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4431 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
4432 Assert(iGRegEx < 20);
4433
4434 /* Same discussion as in iemNativeEmitFetchGregU16 */
4435 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
4436 kIemNativeGstRegUse_ReadOnly);
4437
4438 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4439 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4440
4441 if (iGRegEx < 16)
4442 {
4443 switch (cbSignExtended)
4444 {
4445 case sizeof(uint16_t):
4446 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
4447 break;
4448 case sizeof(uint32_t):
4449 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
4450 break;
4451 case sizeof(uint64_t):
4452 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
4453 break;
4454 default: AssertFailed(); break;
4455 }
4456 }
4457 else
4458 {
4459 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
4460 switch (cbSignExtended)
4461 {
4462 case sizeof(uint16_t):
4463 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
4464 break;
4465 case sizeof(uint32_t):
4466 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
4467 break;
4468 case sizeof(uint64_t):
4469 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
4470 break;
4471 default: AssertFailed(); break;
4472 }
4473 }
4474
4475 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4476 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4477 return off;
4478}
4479
4480
4481
4482#define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
4483 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint16_t))
4484
4485#define IEM_MC_FETCH_GREG_U16_ZX_U32(a_u16Dst, a_iGReg) \
4486 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
4487
4488#define IEM_MC_FETCH_GREG_U16_ZX_U64(a_u16Dst, a_iGReg) \
4489 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
4490
4491/** Emits code for IEM_MC_FETCH_GREG_U16 and IEM_MC_FETCH_GREG_U16_ZX_U32/64. */
4492DECL_INLINE_THROW(uint32_t)
4493iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
4494{
4495 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4496 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
4497 Assert(iGReg < 16);
4498
4499 /*
4500 * We can either just load the low 16-bit of the GPR into a host register
4501 * for the variable, or we can do so via a shadow copy host register. The
4502 * latter will avoid having to reload it if it's being stored later, but
4503 * will waste a host register if it isn't touched again. Since we don't
4504 * know what going to happen, we choose the latter for now.
4505 */
4506 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4507 kIemNativeGstRegUse_ReadOnly);
4508
4509 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4510 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4511 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4512 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4513
4514 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4515 return off;
4516}
4517
4518#define IEM_MC_FETCH_GREG_I16(a_i16Dst, a_iGReg) \
4519 off = iemNativeEmitFetchGregI16(pReNative, off, a_i16Dst, a_iGReg)
4520
4521/** Emits code for IEM_MC_FETCH_GREG_I16. */
4522DECL_INLINE_THROW(uint32_t)
4523iemNativeEmitFetchGregI16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
4524{
4525 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4526 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(int16_t));
4527 Assert(iGReg < 16);
4528
4529 /*
4530 * We can either just load the low 16-bit of the GPR into a host register
4531 * for the variable, or we can do so via a shadow copy host register. The
4532 * latter will avoid having to reload it if it's being stored later, but
4533 * will waste a host register if it isn't touched again. Since we don't
4534 * know what going to happen, we choose the latter for now.
4535 */
4536 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4537 kIemNativeGstRegUse_ReadOnly);
4538
4539 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4540 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4541#ifdef RT_ARCH_AMD64
4542 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4543#elif defined(RT_ARCH_ARM64) /* Note! There are no 16-bit registers on ARM, we emulate that through 32-bit registers which requires sign extension. */
4544 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4545#endif
4546 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4547
4548 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4549 return off;
4550}
4551
4552
4553#define IEM_MC_FETCH_GREG_U16_SX_U32(a_u16Dst, a_iGReg) \
4554 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
4555
4556#define IEM_MC_FETCH_GREG_U16_SX_U64(a_u16Dst, a_iGReg) \
4557 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
4558
4559/** Emits code for IEM_MC_FETCH_GREG_U16_SX_U32/64. */
4560DECL_INLINE_THROW(uint32_t)
4561iemNativeEmitFetchGregU16Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbSignExtended)
4562{
4563 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4564 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
4565 Assert(iGReg < 16);
4566
4567 /*
4568 * We can either just load the low 16-bit of the GPR into a host register
4569 * for the variable, or we can do so via a shadow copy host register. The
4570 * latter will avoid having to reload it if it's being stored later, but
4571 * will waste a host register if it isn't touched again. Since we don't
4572 * know what going to happen, we choose the latter for now.
4573 */
4574 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4575 kIemNativeGstRegUse_ReadOnly);
4576
4577 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4578 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4579 if (cbSignExtended == sizeof(uint32_t))
4580 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4581 else
4582 {
4583 Assert(cbSignExtended == sizeof(uint64_t));
4584 off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4585 }
4586 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4587
4588 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4589 return off;
4590}
4591
4592
4593#define IEM_MC_FETCH_GREG_I32(a_i32Dst, a_iGReg) \
4594 off = iemNativeEmitFetchGregU32(pReNative, off, a_i32Dst, a_iGReg, sizeof(uint32_t))
4595
4596#define IEM_MC_FETCH_GREG_U32(a_u32Dst, a_iGReg) \
4597 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint32_t))
4598
4599#define IEM_MC_FETCH_GREG_U32_ZX_U64(a_u32Dst, a_iGReg) \
4600 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint64_t))
4601
4602/** Emits code for IEM_MC_FETCH_GREG_U32. */
4603DECL_INLINE_THROW(uint32_t)
4604iemNativeEmitFetchGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
4605{
4606 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4607 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
4608 Assert(iGReg < 16);
4609
4610 /*
4611 * We can either just load the low 16-bit of the GPR into a host register
4612 * for the variable, or we can do so via a shadow copy host register. The
4613 * latter will avoid having to reload it if it's being stored later, but
4614 * will waste a host register if it isn't touched again. Since we don't
4615 * know what going to happen, we choose the latter for now.
4616 */
4617 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4618 kIemNativeGstRegUse_ReadOnly);
4619
4620 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4621 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4622 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
4623 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4624
4625 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4626 return off;
4627}
4628
4629
4630#define IEM_MC_FETCH_GREG_U32_SX_U64(a_u32Dst, a_iGReg) \
4631 off = iemNativeEmitFetchGregU32SxU64(pReNative, off, a_u32Dst, a_iGReg)
4632
4633/** Emits code for IEM_MC_FETCH_GREG_U32. */
4634DECL_INLINE_THROW(uint32_t)
4635iemNativeEmitFetchGregU32SxU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
4636{
4637 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4638 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
4639 Assert(iGReg < 16);
4640
4641 /*
4642 * We can either just load the low 32-bit of the GPR into a host register
4643 * for the variable, or we can do so via a shadow copy host register. The
4644 * latter will avoid having to reload it if it's being stored later, but
4645 * will waste a host register if it isn't touched again. Since we don't
4646 * know what going to happen, we choose the latter for now.
4647 */
4648 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4649 kIemNativeGstRegUse_ReadOnly);
4650
4651 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4652 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4653 off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
4654 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4655
4656 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4657 return off;
4658}
4659
4660
4661#define IEM_MC_FETCH_GREG_U64(a_u64Dst, a_iGReg) \
4662 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
4663
4664#define IEM_MC_FETCH_GREG_U64_ZX_U64(a_u64Dst, a_iGReg) \
4665 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
4666
4667/** Emits code for IEM_MC_FETCH_GREG_U64 (and the
4668 * IEM_MC_FETCH_GREG_U64_ZX_U64 alias). */
4669DECL_INLINE_THROW(uint32_t)
4670iemNativeEmitFetchGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
4671{
4672 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4673 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
4674 Assert(iGReg < 16);
4675
4676 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4677 kIemNativeGstRegUse_ReadOnly);
4678
4679 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4680 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4681 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxVarReg, idxGstFullReg);
4682 /** @todo name the register a shadow one already? */
4683 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4684
4685 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4686 return off;
4687}
4688
4689
4690#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4691#define IEM_MC_FETCH_GREG_PAIR_U64(a_u128Dst, a_iGRegLo, a_iGRegHi) \
4692 off = iemNativeEmitFetchGregPairU64(pReNative, off, a_u128Dst, a_iGRegLo, a_iGRegHi)
4693
4694/** Emits code for IEM_MC_FETCH_GREG_PAIR_U64. */
4695DECL_INLINE_THROW(uint32_t)
4696iemNativeEmitFetchGregPairU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegLo, uint8_t iGRegHi)
4697{
4698 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4699 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
4700 Assert(iGRegLo < 16 && iGRegHi < 16);
4701
4702 uint8_t const idxGstFullRegLo = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegLo),
4703 kIemNativeGstRegUse_ReadOnly);
4704 uint8_t const idxGstFullRegHi = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegHi),
4705 kIemNativeGstRegUse_ReadOnly);
4706
4707 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4708 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
4709 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxVarReg, idxGstFullRegLo, 0);
4710 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxVarReg, idxGstFullRegHi, 1);
4711
4712 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
4713 iemNativeRegFreeTmp(pReNative, idxGstFullRegLo);
4714 iemNativeRegFreeTmp(pReNative, idxGstFullRegHi);
4715 return off;
4716}
4717#endif
4718
4719
4720/*********************************************************************************************************************************
4721* Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX). *
4722*********************************************************************************************************************************/
4723
4724#define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
4725 off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
4726
4727/** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
4728DECL_INLINE_THROW(uint32_t)
4729iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
4730{
4731 Assert(iGRegEx < 20);
4732 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
4733 kIemNativeGstRegUse_ForUpdate);
4734#ifdef RT_ARCH_AMD64
4735 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
4736
4737 /* To the lowest byte of the register: mov r8, imm8 */
4738 if (iGRegEx < 16)
4739 {
4740 if (idxGstTmpReg >= 8)
4741 pbCodeBuf[off++] = X86_OP_REX_B;
4742 else if (idxGstTmpReg >= 4)
4743 pbCodeBuf[off++] = X86_OP_REX;
4744 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
4745 pbCodeBuf[off++] = u8Value;
4746 }
4747 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
4748 else if (idxGstTmpReg < 4)
4749 {
4750 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
4751 pbCodeBuf[off++] = u8Value;
4752 }
4753 else
4754 {
4755 /* ror reg64, 8 */
4756 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4757 pbCodeBuf[off++] = 0xc1;
4758 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4759 pbCodeBuf[off++] = 8;
4760
4761 /* mov reg8, imm8 */
4762 if (idxGstTmpReg >= 8)
4763 pbCodeBuf[off++] = X86_OP_REX_B;
4764 else if (idxGstTmpReg >= 4)
4765 pbCodeBuf[off++] = X86_OP_REX;
4766 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
4767 pbCodeBuf[off++] = u8Value;
4768
4769 /* rol reg64, 8 */
4770 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4771 pbCodeBuf[off++] = 0xc1;
4772 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4773 pbCodeBuf[off++] = 8;
4774 }
4775
4776#elif defined(RT_ARCH_ARM64)
4777 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
4778 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4779 if (iGRegEx < 16)
4780 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
4781 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
4782 else
4783 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
4784 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
4785 iemNativeRegFreeTmp(pReNative, idxImmReg);
4786
4787#else
4788# error "Port me!"
4789#endif
4790
4791 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4792
4793#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4794 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
4795#endif
4796
4797 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4798 return off;
4799}
4800
4801
4802#define IEM_MC_STORE_GREG_U8_THREADED(a_iGRegEx, a_u8Value) \
4803 off = iemNativeEmitStoreGregU8(pReNative, off, a_iGRegEx, a_u8Value)
4804
4805/** Emits code for IEM_MC_STORE_GREG_U8_THREADED. */
4806DECL_INLINE_THROW(uint32_t)
4807iemNativeEmitStoreGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t idxValueVar)
4808{
4809 Assert(iGRegEx < 20);
4810 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
4811
4812 /*
4813 * If it's a constant value (unlikely) we treat this as a
4814 * IEM_MC_STORE_GREG_U8_CONST statement.
4815 */
4816 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
4817 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
4818 { /* likely */ }
4819 else
4820 {
4821 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
4822 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4823 return iemNativeEmitStoreGregU8Const(pReNative, off, iGRegEx, (uint8_t)pValueVar->u.uValue);
4824 }
4825
4826 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
4827 kIemNativeGstRegUse_ForUpdate);
4828 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
4829
4830#ifdef RT_ARCH_AMD64
4831 /* To the lowest byte of the register: mov reg8, reg8(r/m) */
4832 if (iGRegEx < 16)
4833 {
4834 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4835 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
4836 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
4837 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
4838 pbCodeBuf[off++] = X86_OP_REX;
4839 pbCodeBuf[off++] = 0x8a;
4840 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
4841 }
4842 /* Otherwise it's to ah, ch, dh or bh from al, cl, dl or bl: use mov r8, r8 if we can, otherwise, we rotate. */
4843 else if (idxGstTmpReg < 4 && idxVarReg < 4)
4844 {
4845 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2+1);
4846 pbCodeBuf[off++] = 0x8a;
4847 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg + 4, idxVarReg);
4848 }
4849 else
4850 {
4851 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
4852
4853 /* ror reg64, 8 */
4854 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4855 pbCodeBuf[off++] = 0xc1;
4856 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4857 pbCodeBuf[off++] = 8;
4858
4859 /* mov reg8, reg8(r/m) */
4860 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
4861 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
4862 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
4863 pbCodeBuf[off++] = X86_OP_REX;
4864 pbCodeBuf[off++] = 0x8a;
4865 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
4866
4867 /* rol reg64, 8 */
4868 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4869 pbCodeBuf[off++] = 0xc1;
4870 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4871 pbCodeBuf[off++] = 8;
4872 }
4873
4874#elif defined(RT_ARCH_ARM64)
4875 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 7:0.
4876 or
4877 bfi w1, w2, 8, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 15:8. */
4878 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4879 if (iGRegEx < 16)
4880 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 8);
4881 else
4882 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 8, 8);
4883
4884#else
4885# error "Port me!"
4886#endif
4887 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4888
4889 iemNativeVarRegisterRelease(pReNative, idxValueVar);
4890
4891#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4892 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
4893#endif
4894 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4895 return off;
4896}
4897
4898
4899
4900#define IEM_MC_STORE_GREG_U16_CONST(a_iGReg, a_u16Const) \
4901 off = iemNativeEmitStoreGregU16Const(pReNative, off, a_iGReg, a_u16Const)
4902
4903/** Emits code for IEM_MC_STORE_GREG_U16. */
4904DECL_INLINE_THROW(uint32_t)
4905iemNativeEmitStoreGregU16Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint16_t uValue)
4906{
4907 Assert(iGReg < 16);
4908 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4909 kIemNativeGstRegUse_ForUpdate);
4910#ifdef RT_ARCH_AMD64
4911 /* mov reg16, imm16 */
4912 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
4913 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4914 if (idxGstTmpReg >= 8)
4915 pbCodeBuf[off++] = X86_OP_REX_B;
4916 pbCodeBuf[off++] = 0xb8 + (idxGstTmpReg & 7);
4917 pbCodeBuf[off++] = RT_BYTE1(uValue);
4918 pbCodeBuf[off++] = RT_BYTE2(uValue);
4919
4920#elif defined(RT_ARCH_ARM64)
4921 /* movk xdst, #uValue, lsl #0 */
4922 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4923 pu32CodeBuf[off++] = Armv8A64MkInstrMovK(idxGstTmpReg, uValue);
4924
4925#else
4926# error "Port me!"
4927#endif
4928
4929 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4930
4931#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4932 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4933#endif
4934 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4935 return off;
4936}
4937
4938
4939#define IEM_MC_STORE_GREG_U16(a_iGReg, a_u16Value) \
4940 off = iemNativeEmitStoreGregU16(pReNative, off, a_iGReg, a_u16Value)
4941
4942/** Emits code for IEM_MC_STORE_GREG_U16. */
4943DECL_INLINE_THROW(uint32_t)
4944iemNativeEmitStoreGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
4945{
4946 Assert(iGReg < 16);
4947 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
4948
4949 /*
4950 * If it's a constant value (unlikely) we treat this as a
4951 * IEM_MC_STORE_GREG_U16_CONST statement.
4952 */
4953 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
4954 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
4955 { /* likely */ }
4956 else
4957 {
4958 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
4959 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4960 return iemNativeEmitStoreGregU16Const(pReNative, off, iGReg, (uint16_t)pValueVar->u.uValue);
4961 }
4962
4963 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4964 kIemNativeGstRegUse_ForUpdate);
4965
4966#ifdef RT_ARCH_AMD64
4967 /* mov reg16, reg16 or [mem16] */
4968 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
4969 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4970 if (pValueVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
4971 {
4972 if (idxGstTmpReg >= 8 || pValueVar->idxReg >= 8)
4973 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0)
4974 | (pValueVar->idxReg >= 8 ? X86_OP_REX_B : 0);
4975 pbCodeBuf[off++] = 0x8b;
4976 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, pValueVar->idxReg & 7);
4977 }
4978 else
4979 {
4980 uint8_t const idxStackSlot = pValueVar->idxStackSlot;
4981 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
4982 if (idxGstTmpReg >= 8)
4983 pbCodeBuf[off++] = X86_OP_REX_R;
4984 pbCodeBuf[off++] = 0x8b;
4985 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, idxGstTmpReg, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
4986 }
4987
4988#elif defined(RT_ARCH_ARM64)
4989 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
4990 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
4991 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4992 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 16);
4993 iemNativeVarRegisterRelease(pReNative, idxValueVar);
4994
4995#else
4996# error "Port me!"
4997#endif
4998
4999 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5000
5001#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5002 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5003#endif
5004 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5005 return off;
5006}
5007
5008
5009#define IEM_MC_STORE_GREG_U32_CONST(a_iGReg, a_u32Const) \
5010 off = iemNativeEmitStoreGregU32Const(pReNative, off, a_iGReg, a_u32Const)
5011
5012/** Emits code for IEM_MC_STORE_GREG_U32_CONST. */
5013DECL_INLINE_THROW(uint32_t)
5014iemNativeEmitStoreGregU32Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint32_t uValue)
5015{
5016 Assert(iGReg < 16);
5017 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5018 kIemNativeGstRegUse_ForFullWrite);
5019 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
5020#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5021 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5022#endif
5023 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5024 return off;
5025}
5026
5027
5028#define IEM_MC_STORE_GREG_U32(a_iGReg, a_u32Value) \
5029 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_u32Value)
5030
5031#define IEM_MC_STORE_GREG_I32(a_iGReg, a_i32Value) \
5032 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_i32Value)
5033
5034/** Emits code for IEM_MC_STORE_GREG_U32/IEM_MC_STORE_GREG_I32. */
5035DECL_INLINE_THROW(uint32_t)
5036iemNativeEmitStoreGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
5037{
5038 Assert(iGReg < 16);
5039 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
5040
5041 /*
5042 * If it's a constant value (unlikely) we treat this as a
5043 * IEM_MC_STORE_GREG_U32_CONST statement.
5044 */
5045 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
5046 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
5047 { /* likely */ }
5048 else
5049 {
5050 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
5051 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
5052 return iemNativeEmitStoreGregU32Const(pReNative, off, iGReg, (uint32_t)pValueVar->u.uValue);
5053 }
5054
5055 /*
5056 * For the rest we allocate a guest register for the variable and writes
5057 * it to the CPUMCTX structure.
5058 */
5059 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
5060#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5061 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5062#else
5063 RT_NOREF(idxVarReg);
5064#endif
5065#ifdef VBOX_STRICT
5066 off = iemNativeEmitTop32BitsClearCheck(pReNative, off, idxVarReg);
5067#endif
5068 iemNativeVarRegisterRelease(pReNative, idxValueVar);
5069 return off;
5070}
5071
5072
5073#define IEM_MC_STORE_GREG_U64_CONST(a_iGReg, a_u64Const) \
5074 off = iemNativeEmitStoreGregU64Const(pReNative, off, a_iGReg, a_u64Const)
5075
5076/** Emits code for IEM_MC_STORE_GREG_U64_CONST. */
5077DECL_INLINE_THROW(uint32_t)
5078iemNativeEmitStoreGregU64Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uValue)
5079{
5080 Assert(iGReg < 16);
5081 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5082 kIemNativeGstRegUse_ForFullWrite);
5083 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
5084#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5085 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5086#endif
5087 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5088 return off;
5089}
5090
5091
5092#define IEM_MC_STORE_GREG_U64(a_iGReg, a_u64Value) \
5093 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_u64Value)
5094
5095#define IEM_MC_STORE_GREG_I64(a_iGReg, a_i64Value) \
5096 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_i64Value)
5097
5098/** Emits code for IEM_MC_STORE_GREG_U64. */
5099DECL_INLINE_THROW(uint32_t)
5100iemNativeEmitStoreGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
5101{
5102 Assert(iGReg < 16);
5103 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
5104
5105 /*
5106 * If it's a constant value (unlikely) we treat this as a
5107 * IEM_MC_STORE_GREG_U64_CONST statement.
5108 */
5109 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
5110 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
5111 { /* likely */ }
5112 else
5113 {
5114 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
5115 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
5116 return iemNativeEmitStoreGregU64Const(pReNative, off, iGReg, pValueVar->u.uValue);
5117 }
5118
5119 /*
5120 * For the rest we allocate a guest register for the variable and writes
5121 * it to the CPUMCTX structure.
5122 */
5123 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
5124#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5125 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5126#else
5127 RT_NOREF(idxVarReg);
5128#endif
5129 iemNativeVarRegisterRelease(pReNative, idxValueVar);
5130 return off;
5131}
5132
5133
5134#define IEM_MC_CLEAR_HIGH_GREG_U64(a_iGReg) \
5135 off = iemNativeEmitClearHighGregU64(pReNative, off, a_iGReg)
5136
5137/** Emits code for IEM_MC_CLEAR_HIGH_GREG_U64. */
5138DECL_INLINE_THROW(uint32_t)
5139iemNativeEmitClearHighGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg)
5140{
5141 Assert(iGReg < 16);
5142 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5143 kIemNativeGstRegUse_ForUpdate);
5144 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxGstTmpReg, idxGstTmpReg);
5145#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5146 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5147#endif
5148 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5149 return off;
5150}
5151
5152
5153#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5154#define IEM_MC_STORE_GREG_PAIR_U64(a_iGRegLo, a_iGRegHi, a_u128Value) \
5155 off = iemNativeEmitStoreGregPairU64(pReNative, off, a_iGRegLo, a_iGRegHi, a_u128Value)
5156
5157/** Emits code for IEM_MC_FETCH_GREG_PAIR_U64. */
5158DECL_INLINE_THROW(uint32_t)
5159iemNativeEmitStoreGregPairU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegLo, uint8_t iGRegHi, uint8_t idxDstVar)
5160{
5161 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
5162 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
5163 Assert(iGRegLo < 16 && iGRegHi < 16);
5164
5165 uint8_t const idxGstFullRegLo = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegLo),
5166 kIemNativeGstRegUse_ForFullWrite);
5167 uint8_t const idxGstFullRegHi = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegHi),
5168 kIemNativeGstRegUse_ForFullWrite);
5169
5170 iemNativeVarSetKindToStack(pReNative, idxDstVar);
5171 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
5172 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxGstFullRegLo, idxVarReg, 0);
5173 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxGstFullRegHi, idxVarReg, 1);
5174
5175 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
5176 iemNativeRegFreeTmp(pReNative, idxGstFullRegLo);
5177 iemNativeRegFreeTmp(pReNative, idxGstFullRegHi);
5178 return off;
5179}
5180#endif
5181
5182
5183/*********************************************************************************************************************************
5184* General purpose register manipulation (add, sub). *
5185*********************************************************************************************************************************/
5186
5187#define IEM_MC_ADD_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
5188 off = iemNativeEmitAddGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
5189
5190/** Emits code for IEM_MC_ADD_GREG_U16. */
5191DECL_INLINE_THROW(uint32_t)
5192iemNativeEmitAddGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend)
5193{
5194 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5195 kIemNativeGstRegUse_ForUpdate);
5196
5197#ifdef RT_ARCH_AMD64
5198 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
5199 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5200 if (idxGstTmpReg >= 8)
5201 pbCodeBuf[off++] = X86_OP_REX_B;
5202 if (uAddend == 1)
5203 {
5204 pbCodeBuf[off++] = 0xff; /* inc */
5205 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5206 }
5207 else
5208 {
5209 pbCodeBuf[off++] = 0x81;
5210 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5211 pbCodeBuf[off++] = uAddend;
5212 pbCodeBuf[off++] = 0;
5213 }
5214
5215#else
5216 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5217 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5218
5219 /* sub tmp, gstgrp, uAddend */
5220 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxTmpReg, idxGstTmpReg, uAddend, false /*f64Bit*/);
5221
5222 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
5223 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
5224
5225 iemNativeRegFreeTmp(pReNative, idxTmpReg);
5226#endif
5227
5228 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5229
5230#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5231 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5232#endif
5233
5234 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5235 return off;
5236}
5237
5238
5239#define IEM_MC_ADD_GREG_U32(a_iGReg, a_u8Const) \
5240 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
5241
5242#define IEM_MC_ADD_GREG_U64(a_iGReg, a_u8Const) \
5243 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
5244
5245/** Emits code for IEM_MC_ADD_GREG_U32 and IEM_MC_ADD_GREG_U64. */
5246DECL_INLINE_THROW(uint32_t)
5247iemNativeEmitAddGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend, bool f64Bit)
5248{
5249 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5250 kIemNativeGstRegUse_ForUpdate);
5251
5252#ifdef RT_ARCH_AMD64
5253 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
5254 if (f64Bit)
5255 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
5256 else if (idxGstTmpReg >= 8)
5257 pbCodeBuf[off++] = X86_OP_REX_B;
5258 if (uAddend == 1)
5259 {
5260 pbCodeBuf[off++] = 0xff; /* inc */
5261 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5262 }
5263 else if (uAddend < 128)
5264 {
5265 pbCodeBuf[off++] = 0x83; /* add */
5266 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5267 pbCodeBuf[off++] = RT_BYTE1(uAddend);
5268 }
5269 else
5270 {
5271 pbCodeBuf[off++] = 0x81; /* add */
5272 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5273 pbCodeBuf[off++] = RT_BYTE1(uAddend);
5274 pbCodeBuf[off++] = 0;
5275 pbCodeBuf[off++] = 0;
5276 pbCodeBuf[off++] = 0;
5277 }
5278
5279#else
5280 /* sub tmp, gstgrp, uAddend */
5281 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5282 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGstTmpReg, idxGstTmpReg, uAddend, f64Bit);
5283
5284#endif
5285
5286 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5287
5288#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5289 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5290#endif
5291
5292 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5293 return off;
5294}
5295
5296
5297
5298#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
5299 off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
5300
5301/** Emits code for IEM_MC_SUB_GREG_U16. */
5302DECL_INLINE_THROW(uint32_t)
5303iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
5304{
5305 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5306 kIemNativeGstRegUse_ForUpdate);
5307
5308#ifdef RT_ARCH_AMD64
5309 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
5310 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5311 if (idxGstTmpReg >= 8)
5312 pbCodeBuf[off++] = X86_OP_REX_B;
5313 if (uSubtrahend == 1)
5314 {
5315 pbCodeBuf[off++] = 0xff; /* dec */
5316 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
5317 }
5318 else
5319 {
5320 pbCodeBuf[off++] = 0x81;
5321 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
5322 pbCodeBuf[off++] = uSubtrahend;
5323 pbCodeBuf[off++] = 0;
5324 }
5325
5326#else
5327 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5328 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5329
5330 /* sub tmp, gstgrp, uSubtrahend */
5331 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
5332
5333 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
5334 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
5335
5336 iemNativeRegFreeTmp(pReNative, idxTmpReg);
5337#endif
5338
5339 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5340
5341#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5342 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5343#endif
5344
5345 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5346 return off;
5347}
5348
5349
5350#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
5351 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
5352
5353#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
5354 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
5355
5356/** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
5357DECL_INLINE_THROW(uint32_t)
5358iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
5359{
5360 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5361 kIemNativeGstRegUse_ForUpdate);
5362
5363#ifdef RT_ARCH_AMD64
5364 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
5365 if (f64Bit)
5366 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
5367 else if (idxGstTmpReg >= 8)
5368 pbCodeBuf[off++] = X86_OP_REX_B;
5369 if (uSubtrahend == 1)
5370 {
5371 pbCodeBuf[off++] = 0xff; /* dec */
5372 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
5373 }
5374 else if (uSubtrahend < 128)
5375 {
5376 pbCodeBuf[off++] = 0x83; /* sub */
5377 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
5378 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
5379 }
5380 else
5381 {
5382 pbCodeBuf[off++] = 0x81; /* sub */
5383 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
5384 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
5385 pbCodeBuf[off++] = 0;
5386 pbCodeBuf[off++] = 0;
5387 pbCodeBuf[off++] = 0;
5388 }
5389
5390#else
5391 /* sub tmp, gstgrp, uSubtrahend */
5392 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5393 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
5394
5395#endif
5396
5397 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5398
5399#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5400 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5401#endif
5402
5403 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5404 return off;
5405}
5406
5407
5408#define IEM_MC_AND_GREG_U8(a_iGReg, a_u8Mask) \
5409 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u8Mask, sizeof(uint8_t))
5410
5411#define IEM_MC_AND_GREG_U16(a_iGReg, a_u16Mask) \
5412 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u16Mask, sizeof(uint16_t))
5413
5414#define IEM_MC_AND_GREG_U32(a_iGReg, a_u32Mask) \
5415 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u32Mask, sizeof(uint32_t))
5416
5417#define IEM_MC_AND_GREG_U64(a_iGReg, a_u64Mask) \
5418 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u64Mask, sizeof(uint64_t))
5419
5420/** Emits code for IEM_MC_AND_GREG_U8, IEM_MC_AND_GREG_U16, IEM_MC_AND_GREG_U32 and IEM_MC_AND_GREG_U64. */
5421DECL_INLINE_THROW(uint32_t)
5422iemNativeEmitAndGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uMask, uint8_t cbMask)
5423{
5424#ifdef VBOX_STRICT
5425 switch (cbMask)
5426 {
5427 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
5428 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
5429 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
5430 case sizeof(uint64_t): break;
5431 default: AssertFailedBreak();
5432 }
5433#endif
5434
5435 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5436 kIemNativeGstRegUse_ForUpdate);
5437
5438 switch (cbMask)
5439 {
5440 case sizeof(uint8_t): /* Leaves the higher bits untouched. */
5441 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask | UINT64_C(0xffffffffffffff00));
5442 break;
5443 case sizeof(uint16_t): /* Leaves the higher bits untouched. */
5444 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask | UINT64_C(0xffffffffffff0000));
5445 break;
5446 case sizeof(uint32_t): /* Zeroes the high 32 bits of the guest register. */
5447 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxGstTmpReg, uMask);
5448 break;
5449 case sizeof(uint64_t):
5450 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask);
5451 break;
5452 default: AssertFailedBreak();
5453 }
5454
5455 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5456
5457#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5458 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5459#endif
5460
5461 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5462 return off;
5463}
5464
5465
5466#define IEM_MC_OR_GREG_U8(a_iGReg, a_u8Mask) \
5467 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u8Mask, sizeof(uint8_t))
5468
5469#define IEM_MC_OR_GREG_U16(a_iGReg, a_u16Mask) \
5470 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u16Mask, sizeof(uint16_t))
5471
5472#define IEM_MC_OR_GREG_U32(a_iGReg, a_u32Mask) \
5473 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u32Mask, sizeof(uint32_t))
5474
5475#define IEM_MC_OR_GREG_U64(a_iGReg, a_u64Mask) \
5476 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u64Mask, sizeof(uint64_t))
5477
5478/** Emits code for IEM_MC_OR_GREG_U8, IEM_MC_OR_GREG_U16, IEM_MC_OR_GREG_U32 and IEM_MC_OR_GREG_U64. */
5479DECL_INLINE_THROW(uint32_t)
5480iemNativeEmitOrGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uMask, uint8_t cbMask)
5481{
5482#ifdef VBOX_STRICT
5483 switch (cbMask)
5484 {
5485 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
5486 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
5487 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
5488 case sizeof(uint64_t): break;
5489 default: AssertFailedBreak();
5490 }
5491#endif
5492
5493 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5494 kIemNativeGstRegUse_ForUpdate);
5495
5496 switch (cbMask)
5497 {
5498 case sizeof(uint8_t): /* Leaves the higher bits untouched. */
5499 case sizeof(uint16_t):
5500 case sizeof(uint64_t):
5501 off = iemNativeEmitOrGprByImm(pReNative, off, idxGstTmpReg, uMask);
5502 break;
5503 case sizeof(uint32_t): /* Zeroes the high 32 bits of the guest register. */
5504 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxGstTmpReg, uMask);
5505 break;
5506 default: AssertFailedBreak();
5507 }
5508
5509 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5510
5511#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5512 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5513#endif
5514
5515 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5516 return off;
5517}
5518
5519
5520/*********************************************************************************************************************************
5521* Local/Argument variable manipulation (add, sub, and, or). *
5522*********************************************************************************************************************************/
5523
5524#define IEM_MC_AND_LOCAL_U8(a_u8Local, a_u8Mask) \
5525 off = iemNativeEmitAndLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
5526
5527#define IEM_MC_AND_LOCAL_U16(a_u16Local, a_u16Mask) \
5528 off = iemNativeEmitAndLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
5529
5530#define IEM_MC_AND_LOCAL_U32(a_u32Local, a_u32Mask) \
5531 off = iemNativeEmitAndLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
5532
5533#define IEM_MC_AND_LOCAL_U64(a_u64Local, a_u64Mask) \
5534 off = iemNativeEmitAndLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
5535
5536
5537#define IEM_MC_AND_ARG_U16(a_u16Arg, a_u16Mask) \
5538 off = iemNativeEmitAndLocal(pReNative, off, a_u16Arg, a_u16Mask, sizeof(uint16_t))
5539
5540#define IEM_MC_AND_ARG_U32(a_u32Arg, a_u32Mask) \
5541 off = iemNativeEmitAndLocal(pReNative, off, a_u32Arg, a_u32Mask, sizeof(uint32_t))
5542
5543#define IEM_MC_AND_ARG_U64(a_u64Arg, a_u64Mask) \
5544 off = iemNativeEmitAndLocal(pReNative, off, a_u64Arg, a_u64Mask, sizeof(uint64_t))
5545
5546/** Emits code for AND'ing a local and a constant value. */
5547DECL_INLINE_THROW(uint32_t)
5548iemNativeEmitAndLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
5549{
5550#ifdef VBOX_STRICT
5551 switch (cbMask)
5552 {
5553 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
5554 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
5555 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
5556 case sizeof(uint64_t): break;
5557 default: AssertFailedBreak();
5558 }
5559#endif
5560
5561 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
5562 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
5563
5564 if (cbMask <= sizeof(uint32_t))
5565 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg, uMask);
5566 else
5567 off = iemNativeEmitAndGprByImm(pReNative, off, idxVarReg, uMask);
5568
5569 iemNativeVarRegisterRelease(pReNative, idxVar);
5570 return off;
5571}
5572
5573
5574#define IEM_MC_OR_LOCAL_U8(a_u8Local, a_u8Mask) \
5575 off = iemNativeEmitOrLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
5576
5577#define IEM_MC_OR_LOCAL_U16(a_u16Local, a_u16Mask) \
5578 off = iemNativeEmitOrLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
5579
5580#define IEM_MC_OR_LOCAL_U32(a_u32Local, a_u32Mask) \
5581 off = iemNativeEmitOrLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
5582
5583#define IEM_MC_OR_LOCAL_U64(a_u64Local, a_u64Mask) \
5584 off = iemNativeEmitOrLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
5585
5586/** Emits code for OR'ing a local and a constant value. */
5587DECL_INLINE_THROW(uint32_t)
5588iemNativeEmitOrLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
5589{
5590#ifdef VBOX_STRICT
5591 switch (cbMask)
5592 {
5593 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
5594 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
5595 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
5596 case sizeof(uint64_t): break;
5597 default: AssertFailedBreak();
5598 }
5599#endif
5600
5601 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
5602 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
5603
5604 if (cbMask <= sizeof(uint32_t))
5605 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxVarReg, uMask);
5606 else
5607 off = iemNativeEmitOrGprByImm(pReNative, off, idxVarReg, uMask);
5608
5609 iemNativeVarRegisterRelease(pReNative, idxVar);
5610 return off;
5611}
5612
5613
5614#define IEM_MC_BSWAP_LOCAL_U16(a_u16Local) \
5615 off = iemNativeEmitBswapLocal(pReNative, off, a_u16Local, sizeof(uint16_t))
5616
5617#define IEM_MC_BSWAP_LOCAL_U32(a_u32Local) \
5618 off = iemNativeEmitBswapLocal(pReNative, off, a_u32Local, sizeof(uint32_t))
5619
5620#define IEM_MC_BSWAP_LOCAL_U64(a_u64Local) \
5621 off = iemNativeEmitBswapLocal(pReNative, off, a_u64Local, sizeof(uint64_t))
5622
5623/** Emits code for reversing the byte order in a local value. */
5624DECL_INLINE_THROW(uint32_t)
5625iemNativeEmitBswapLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal)
5626{
5627 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
5628 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
5629
5630 switch (cbLocal)
5631 {
5632 case sizeof(uint16_t): off = iemNativeEmitBswapGpr16(pReNative, off, idxVarReg); break;
5633 case sizeof(uint32_t): off = iemNativeEmitBswapGpr32(pReNative, off, idxVarReg); break;
5634 case sizeof(uint64_t): off = iemNativeEmitBswapGpr(pReNative, off, idxVarReg); break;
5635 default: AssertFailedBreak();
5636 }
5637
5638 iemNativeVarRegisterRelease(pReNative, idxVar);
5639 return off;
5640}
5641
5642
5643#define IEM_MC_SHL_LOCAL_S16(a_i16Local, a_cShift) \
5644 off = iemNativeEmitShlLocal(pReNative, off, a_i16Local, sizeof(int16_t), a_cShift)
5645
5646#define IEM_MC_SHL_LOCAL_S32(a_i32Local, a_cShift) \
5647 off = iemNativeEmitShlLocal(pReNative, off, a_i32Local, sizeof(int32_t), a_cShift)
5648
5649#define IEM_MC_SHL_LOCAL_S64(a_i64Local, a_cShift) \
5650 off = iemNativeEmitShlLocal(pReNative, off, a_i64Local, sizeof(int64_t), a_cShift)
5651
5652/** Emits code for shifting left a local value. */
5653DECL_INLINE_THROW(uint32_t)
5654iemNativeEmitShlLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal, uint8_t cShift)
5655{
5656#ifdef VBOX_STRICT
5657 switch (cbLocal)
5658 {
5659 case sizeof(uint8_t): Assert(cShift < 8); break;
5660 case sizeof(uint16_t): Assert(cShift < 16); break;
5661 case sizeof(uint32_t): Assert(cShift < 32); break;
5662 case sizeof(uint64_t): Assert(cShift < 64); break;
5663 default: AssertFailedBreak();
5664 }
5665#endif
5666
5667 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
5668 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
5669
5670 if (cbLocal <= sizeof(uint32_t))
5671 {
5672 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxVarReg, cShift);
5673 if (cbLocal < sizeof(uint32_t))
5674 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg,
5675 cbLocal == sizeof(uint16_t)
5676 ? UINT32_C(0xffff)
5677 : UINT32_C(0xff));
5678 }
5679 else
5680 off = iemNativeEmitShiftGprLeft(pReNative, off, idxVarReg, cShift);
5681
5682 iemNativeVarRegisterRelease(pReNative, idxVar);
5683 return off;
5684}
5685
5686
5687#define IEM_MC_SAR_LOCAL_S16(a_i16Local, a_cShift) \
5688 off = iemNativeEmitSarLocal(pReNative, off, a_i16Local, sizeof(int16_t), a_cShift)
5689
5690#define IEM_MC_SAR_LOCAL_S32(a_i32Local, a_cShift) \
5691 off = iemNativeEmitSarLocal(pReNative, off, a_i32Local, sizeof(int32_t), a_cShift)
5692
5693#define IEM_MC_SAR_LOCAL_S64(a_i64Local, a_cShift) \
5694 off = iemNativeEmitSarLocal(pReNative, off, a_i64Local, sizeof(int64_t), a_cShift)
5695
5696/** Emits code for shifting left a local value. */
5697DECL_INLINE_THROW(uint32_t)
5698iemNativeEmitSarLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal, uint8_t cShift)
5699{
5700#ifdef VBOX_STRICT
5701 switch (cbLocal)
5702 {
5703 case sizeof(int8_t): Assert(cShift < 8); break;
5704 case sizeof(int16_t): Assert(cShift < 16); break;
5705 case sizeof(int32_t): Assert(cShift < 32); break;
5706 case sizeof(int64_t): Assert(cShift < 64); break;
5707 default: AssertFailedBreak();
5708 }
5709#endif
5710
5711 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
5712 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
5713
5714 /* Need to sign extend the value first to make sure the sign is correct in the following arithmetic shift. */
5715 if (cbLocal == sizeof(uint8_t))
5716 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
5717 else if (cbLocal == sizeof(uint16_t))
5718 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxVarReg);
5719
5720 if (cbLocal <= sizeof(uint32_t))
5721 off = iemNativeEmitArithShiftGpr32Right(pReNative, off, idxVarReg, cShift);
5722 else
5723 off = iemNativeEmitArithShiftGprRight(pReNative, off, idxVarReg, cShift);
5724
5725 iemNativeVarRegisterRelease(pReNative, idxVar);
5726 return off;
5727}
5728
5729
5730#define IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR(a_EffAddr, a_i16) \
5731 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i16, sizeof(int16_t))
5732
5733#define IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR(a_EffAddr, a_i32) \
5734 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i32, sizeof(int32_t))
5735
5736#define IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR(a_EffAddr, a_i64) \
5737 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i64, sizeof(int64_t))
5738
5739/** Emits code for IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR/IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR/IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR. */
5740DECL_INLINE_THROW(uint32_t)
5741iemNativeEmitAddLocalToEffAddr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEffAddr, uint8_t idxVar, uint8_t cbLocal)
5742{
5743 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEffAddr);
5744 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEffAddr, sizeof(RTGCPTR));
5745 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
5746 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
5747
5748 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
5749 uint8_t const idxVarRegEffAddr = iemNativeVarRegisterAcquire(pReNative, idxVarEffAddr, &off, true /*fInitialized*/);
5750
5751 /* Need to sign extend the value. */
5752 if (cbLocal <= sizeof(uint32_t))
5753 {
5754/** @todo ARM64: In case of boredone, the extended add instruction can do the
5755 * conversion directly: ADD idxVarRegEffAddr, idxVarRegEffAddr, [w]idxVarReg, SXTH/SXTW */
5756 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
5757
5758 switch (cbLocal)
5759 {
5760 case sizeof(int16_t): off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxRegTmp, idxVarReg); break;
5761 case sizeof(int32_t): off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxRegTmp, idxVarReg); break;
5762 default: AssertFailed();
5763 }
5764
5765 off = iemNativeEmitAddTwoGprs(pReNative, off, idxVarRegEffAddr, idxRegTmp);
5766 iemNativeRegFreeTmp(pReNative, idxRegTmp);
5767 }
5768 else
5769 off = iemNativeEmitAddTwoGprs(pReNative, off, idxVarRegEffAddr, idxVarReg);
5770
5771 iemNativeVarRegisterRelease(pReNative, idxVarEffAddr);
5772 iemNativeVarRegisterRelease(pReNative, idxVar);
5773 return off;
5774}
5775
5776
5777
5778/*********************************************************************************************************************************
5779* EFLAGS *
5780*********************************************************************************************************************************/
5781
5782#if !defined(VBOX_WITH_STATISTICS) || !defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
5783# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) ((void)0)
5784#else
5785# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) \
5786 iemNativeEFlagsOptimizationStats(pReNative, a_fEflInput, a_fEflOutput)
5787
5788DECLINLINE(void) iemNativeEFlagsOptimizationStats(PIEMRECOMPILERSTATE pReNative, uint32_t fEflInput, uint32_t fEflOutput)
5789{
5790 if (fEflOutput)
5791 {
5792 PVMCPUCC const pVCpu = pReNative->pVCpu;
5793# ifndef IEMLIVENESS_EXTENDED_LAYOUT
5794 IEMLIVENESSBIT const LivenessBit0 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit0;
5795 IEMLIVENESSBIT const LivenessBit1 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit1;
5796 AssertCompile(IEMLIVENESS_STATE_CLOBBERED == 0);
5797# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
5798 if (fEflOutput & (a_fEfl)) \
5799 { \
5800 if (LivenessBit0.a_fLivenessMember | LivenessBit1.a_fLivenessMember) \
5801 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
5802 else \
5803 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
5804 } else do { } while (0)
5805# else
5806 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall];
5807 IEMLIVENESSBIT const LivenessClobbered =
5808 {
5809 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
5810 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_POTENTIAL_CALL].bm64
5811 | pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
5812 | pLivenessEntry->aBits[IEMLIVENESS_BIT_CALL].bm64)
5813 };
5814 IEMLIVENESSBIT const LivenessDelayable =
5815 {
5816 pLivenessEntry->aBits[IEMLIVENESS_BIT_POTENTIAL_CALL].bm64
5817 & pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
5818 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
5819 | pLivenessEntry->aBits[IEMLIVENESS_BIT_CALL].bm64)
5820 };
5821# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
5822 if (fEflOutput & (a_fEfl)) \
5823 { \
5824 if (LivenessClobbered.a_fLivenessMember) \
5825 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
5826 else if (LivenessDelayable.a_fLivenessMember) \
5827 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Delayable); \
5828 else \
5829 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
5830 } else do { } while (0)
5831# endif
5832 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_CF, fEflCf, StatNativeLivenessEflCf);
5833 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_PF, fEflPf, StatNativeLivenessEflPf);
5834 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_AF, fEflAf, StatNativeLivenessEflAf);
5835 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_ZF, fEflZf, StatNativeLivenessEflZf);
5836 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_SF, fEflSf, StatNativeLivenessEflSf);
5837 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_OF, fEflOf, StatNativeLivenessEflOf);
5838 //CHECK_FLAG_AND_UPDATE_STATS(~X86_EFL_STATUS_BITS, fEflOther, StatNativeLivenessEflOther);
5839# undef CHECK_FLAG_AND_UPDATE_STATS
5840 }
5841 RT_NOREF(fEflInput);
5842}
5843#endif /* VBOX_WITH_STATISTICS */
5844
5845#undef IEM_MC_FETCH_EFLAGS /* should not be used */
5846#define IEM_MC_FETCH_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5847 off = iemNativeEmitFetchEFlags(pReNative, off, a_EFlags, a_fEflInput, a_fEflOutput)
5848
5849/** Handles IEM_MC_FETCH_EFLAGS_EX. */
5850DECL_INLINE_THROW(uint32_t)
5851iemNativeEmitFetchEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags,
5852 uint32_t fEflInput, uint32_t fEflOutput)
5853{
5854 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
5855 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
5856 RT_NOREF(fEflInput, fEflOutput);
5857
5858#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
5859# ifdef VBOX_STRICT
5860 if ( pReNative->idxCurCall != 0
5861 && (fEflInput != 0 || fEflOutput != 0) /* for NOT these are both zero for now. */)
5862 {
5863 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall - 1];
5864 uint32_t const fBoth = fEflInput | fEflOutput;
5865# define ASSERT_ONE_EFL(a_fElfConst, a_idxField) \
5866 AssertMsg( !(fBoth & (a_fElfConst)) \
5867 || (!(fEflInput & (a_fElfConst)) \
5868 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
5869 : !(fEflOutput & (a_fElfConst)) \
5870 ? IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
5871 : IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) ), \
5872 ("%s - %u\n", #a_fElfConst, iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)))
5873 ASSERT_ONE_EFL(~(uint32_t)X86_EFL_STATUS_BITS, IEMLIVENESSBIT_IDX_EFL_OTHER);
5874 ASSERT_ONE_EFL(X86_EFL_CF, IEMLIVENESSBIT_IDX_EFL_CF);
5875 ASSERT_ONE_EFL(X86_EFL_PF, IEMLIVENESSBIT_IDX_EFL_PF);
5876 ASSERT_ONE_EFL(X86_EFL_AF, IEMLIVENESSBIT_IDX_EFL_AF);
5877 ASSERT_ONE_EFL(X86_EFL_ZF, IEMLIVENESSBIT_IDX_EFL_ZF);
5878 ASSERT_ONE_EFL(X86_EFL_SF, IEMLIVENESSBIT_IDX_EFL_SF);
5879 ASSERT_ONE_EFL(X86_EFL_OF, IEMLIVENESSBIT_IDX_EFL_OF);
5880# undef ASSERT_ONE_EFL
5881 }
5882# endif
5883#endif
5884
5885 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fEflInput);
5886
5887 /** @todo This could be prettier...*/
5888 /** @todo Also, the shadowing+liveness handling of EFlags is currently
5889 * problematic, but I'll try tackle that soon (@bugref{10720}). */
5890 PCIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarEFlags)];
5891 Assert(pVar->enmKind == kIemNativeVarKind_Invalid || pVar->enmKind == kIemNativeVarKind_Stack);
5892 Assert(pVar->idxReg == UINT8_MAX);
5893 if (pVar->uArgNo >= IEMNATIVE_CALL_ARG_GREG_COUNT)
5894 {
5895 /** @todo We could use kIemNativeGstRegUse_ReadOnly here when fOutput is
5896 * zero, but since iemNativeVarRegisterSet clears the shadowing,
5897 * that's counter productive... */
5898 uint8_t const idxGstReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
5899 kIemNativeGstRegUse_ForUpdate, false /*fNoVolatileRegs*/,
5900 true /** @todo EFlags shadowing+liveness weirdness (@bugref{10720}). */);
5901 iemNativeVarRegisterSet(pReNative, idxVarEFlags, idxGstReg, off, true /*fAllocated*/);
5902 }
5903 else
5904 {
5905 /* Register argument variable: Avoid assertions in generic call code and load it the traditional way. */
5906 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, false /*fInitialized*/);
5907 uint8_t const idxGstReg = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off, kIemNativeGstReg_EFlags);
5908 if (idxGstReg != UINT8_MAX)
5909 {
5910 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstReg);
5911 iemNativeRegFreeTmp(pReNative, idxGstReg);
5912 }
5913 else
5914 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxVarReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
5915 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
5916 }
5917 return off;
5918}
5919
5920
5921
5922/** @todo emit strict build assertions for IEM_MC_COMMIT_EFLAGS_EX when we
5923 * start using it with custom native code emission (inlining assembly
5924 * instruction helpers). */
5925#undef IEM_MC_COMMIT_EFLAGS /* should not be used */
5926#define IEM_MC_COMMIT_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5927 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
5928 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags, a_fEflOutput, true /*fUpdateSkipping*/)
5929
5930#undef IEM_MC_COMMIT_EFLAGS_OPT /* should not be used */
5931#define IEM_MC_COMMIT_EFLAGS_OPT_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5932 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
5933 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags, a_fEflOutput, false /*fUpdateSkipping*/)
5934
5935/** Handles IEM_MC_COMMIT_EFLAGS_EX. */
5936DECL_INLINE_THROW(uint32_t)
5937iemNativeEmitCommitEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags, uint32_t fEflOutput,
5938 bool fUpdateSkipping)
5939{
5940 RT_NOREF(fEflOutput);
5941 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, true /*fInitialized*/);
5942 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
5943
5944#ifdef VBOX_STRICT
5945 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RA1_MASK);
5946 uint32_t offFixup = off;
5947 off = iemNativeEmitJnzToFixed(pReNative, off, off);
5948 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2001));
5949 iemNativeFixupFixedJump(pReNative, offFixup, off);
5950
5951 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RAZ_MASK & CPUMX86EFLAGS_HW_MASK_32);
5952 offFixup = off;
5953 off = iemNativeEmitJzToFixed(pReNative, off, off);
5954 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2002));
5955 iemNativeFixupFixedJump(pReNative, offFixup, off);
5956
5957 /** @todo validate that only bits in the fElfOutput mask changed. */
5958#endif
5959
5960#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
5961 if (fUpdateSkipping)
5962 {
5963 if ((fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
5964 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
5965 else
5966 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~(fEflOutput & X86_EFL_STATUS_BITS),
5967 RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
5968 }
5969#else
5970 RT_NOREF_PV(fUpdateSkipping);
5971#endif
5972
5973 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
5974 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF_DYN(VMCPUCC, cpum.GstCtx.eflags));
5975 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
5976 return off;
5977}
5978
5979
5980typedef enum IEMNATIVEMITEFLOP
5981{
5982 kIemNativeEmitEflOp_Set,
5983 kIemNativeEmitEflOp_Clear,
5984 kIemNativeEmitEflOp_Flip
5985} IEMNATIVEMITEFLOP;
5986
5987#define IEM_MC_SET_EFL_BIT(a_fBit) \
5988 off = iemNativeEmitModifyEFlagsBit<kIemNativeEmitEflOp_Set>(pReNative, off, a_fBit)
5989
5990#define IEM_MC_CLEAR_EFL_BIT(a_fBit) \
5991 off = iemNativeEmitModifyEFlagsBit<kIemNativeEmitEflOp_Clear>(pReNative, off, a_fBit)
5992
5993#define IEM_MC_FLIP_EFL_BIT(a_fBit) \
5994 off = iemNativeEmitModifyEFlagsBit<kIemNativeEmitEflOp_Flip>(pReNative, off, a_fBit)
5995
5996/** Handles IEM_MC_SET_EFL_BIT/IEM_MC_CLEAR_EFL_BIT/IEM_MC_FLIP_EFL_BIT. */
5997template<IEMNATIVEMITEFLOP const a_enmOp>
5998DECL_INLINE_THROW(uint32_t) iemNativeEmitModifyEFlagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fEflBit)
5999{
6000 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
6001 kIemNativeGstRegUse_ForUpdate, false /*fNoVolatileRegs*/,
6002 true /*fSkipLivenessAssert*/); /** @todo proper liveness / eflags fix */
6003
6004 /* Using 'if constexpr' forces code elimination in debug builds with VC. */
6005 if RT_CONSTEXPR_IF(a_enmOp == kIemNativeEmitEflOp_Set)
6006 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxEflReg, fEflBit);
6007 else if RT_CONSTEXPR_IF(a_enmOp == kIemNativeEmitEflOp_Clear)
6008 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~fEflBit);
6009 else if RT_CONSTEXPR_IF(a_enmOp == kIemNativeEmitEflOp_Flip)
6010 off = iemNativeEmitXorGpr32ByImm(pReNative, off, idxEflReg, fEflBit);
6011 else
6012 AssertCompile( a_enmOp == kIemNativeEmitEflOp_Set /* AssertCompile(false) works with VC 2019 but not clang 15. */
6013 || a_enmOp == kIemNativeEmitEflOp_Clear
6014 || a_enmOp == kIemNativeEmitEflOp_Flip);
6015
6016 /** @todo No delayed writeback for EFLAGS right now. */
6017 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
6018
6019 /* Free but don't flush the EFLAGS register. */
6020 iemNativeRegFreeTmp(pReNative, idxEflReg);
6021
6022 return off;
6023}
6024
6025
6026/*********************************************************************************************************************************
6027* Emitters for segment register fetches (IEM_MC_FETCH_SREG_XXX).
6028*********************************************************************************************************************************/
6029
6030#define IEM_MC_FETCH_SREG_U16(a_u16Dst, a_iSReg) \
6031 off = iemNativeEmitFetchSReg(pReNative, off, a_u16Dst, a_iSReg, sizeof(uint16_t))
6032
6033#define IEM_MC_FETCH_SREG_ZX_U32(a_u32Dst, a_iSReg) \
6034 off = iemNativeEmitFetchSReg(pReNative, off, a_u32Dst, a_iSReg, sizeof(uint32_t))
6035
6036#define IEM_MC_FETCH_SREG_ZX_U64(a_u64Dst, a_iSReg) \
6037 off = iemNativeEmitFetchSReg(pReNative, off, a_u64Dst, a_iSReg, sizeof(uint64_t))
6038
6039
6040/** Emits code for IEM_MC_FETCH_SREG_U16, IEM_MC_FETCH_SREG_ZX_U32 and
6041 * IEM_MC_FETCH_SREG_ZX_U64. */
6042DECL_INLINE_THROW(uint32_t)
6043iemNativeEmitFetchSReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iSReg, int8_t cbVar)
6044{
6045 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
6046 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbVar); RT_NOREF(cbVar);
6047 Assert(iSReg < X86_SREG_COUNT);
6048
6049 /*
6050 * For now, we will not create a shadow copy of a selector. The rational
6051 * is that since we do not recompile the popping and loading of segment
6052 * registers and that the the IEM_MC_FETCH_SREG_U* MCs are only used for
6053 * pushing and moving to registers, there is only a small chance that the
6054 * shadow copy will be accessed again before the register is reloaded. One
6055 * scenario would be nested called in 16-bit code, but I doubt it's worth
6056 * the extra register pressure atm.
6057 *
6058 * What we really need first, though, is to combine iemNativeRegAllocTmpForGuestReg
6059 * and iemNativeVarRegisterAcquire for a load scenario. We only got the
6060 * store scencario covered at present (r160730).
6061 */
6062 iemNativeVarSetKindToStack(pReNative, idxDstVar);
6063 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
6064 off = iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aSRegs[iSReg].Sel));
6065 iemNativeVarRegisterRelease(pReNative, idxDstVar);
6066 return off;
6067}
6068
6069
6070
6071/*********************************************************************************************************************************
6072* Register references. *
6073*********************************************************************************************************************************/
6074
6075#define IEM_MC_REF_GREG_U8_THREADED(a_pu8Dst, a_iGRegEx) \
6076 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, false /*fConst*/)
6077
6078#define IEM_MC_REF_GREG_U8_CONST_THREADED(a_pu8Dst, a_iGRegEx) \
6079 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, true /*fConst*/)
6080
6081/** Handles IEM_MC_REF_GREG_U8[_CONST]. */
6082DECL_INLINE_THROW(uint32_t)
6083iemNativeEmitRefGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGRegEx, bool fConst)
6084{
6085 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
6086 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
6087 Assert(iGRegEx < 20);
6088
6089 if (iGRegEx < 16)
6090 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
6091 else
6092 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_GprHighByte, iGRegEx & 15);
6093
6094 /* If we've delayed writing back the register value, flush it now. */
6095 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
6096
6097 /* If it's not a const reference we need to flush the shadow copy of the register now. */
6098 if (!fConst)
6099 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGRegEx & 15)));
6100
6101 return off;
6102}
6103
6104#define IEM_MC_REF_GREG_U16(a_pu16Dst, a_iGReg) \
6105 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, false /*fConst*/)
6106
6107#define IEM_MC_REF_GREG_U16_CONST(a_pu16Dst, a_iGReg) \
6108 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, true /*fConst*/)
6109
6110#define IEM_MC_REF_GREG_U32(a_pu32Dst, a_iGReg) \
6111 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, false /*fConst*/)
6112
6113#define IEM_MC_REF_GREG_U32_CONST(a_pu32Dst, a_iGReg) \
6114 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, true /*fConst*/)
6115
6116#define IEM_MC_REF_GREG_I32(a_pi32Dst, a_iGReg) \
6117 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, false /*fConst*/)
6118
6119#define IEM_MC_REF_GREG_I32_CONST(a_pi32Dst, a_iGReg) \
6120 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, true /*fConst*/)
6121
6122#define IEM_MC_REF_GREG_U64(a_pu64Dst, a_iGReg) \
6123 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, false /*fConst*/)
6124
6125#define IEM_MC_REF_GREG_U64_CONST(a_pu64Dst, a_iGReg) \
6126 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, true /*fConst*/)
6127
6128#define IEM_MC_REF_GREG_I64(a_pi64Dst, a_iGReg) \
6129 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, false /*fConst*/)
6130
6131#define IEM_MC_REF_GREG_I64_CONST(a_pi64Dst, a_iGReg) \
6132 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, true /*fConst*/)
6133
6134/** Handles IEM_MC_REF_GREG_Uxx[_CONST] and IEM_MC_REF_GREG_Ixx[_CONST]. */
6135DECL_INLINE_THROW(uint32_t)
6136iemNativeEmitRefGregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGReg, bool fConst)
6137{
6138 Assert(iGReg < 16);
6139 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGReg);
6140 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
6141
6142 /* If we've delayed writing back the register value, flush it now. */
6143 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGReg);
6144
6145 /* If it's not a const reference we need to flush the shadow copy of the register now. */
6146 if (!fConst)
6147 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGReg)));
6148
6149 return off;
6150}
6151
6152
6153#undef IEM_MC_REF_EFLAGS /* should not be used. */
6154#define IEM_MC_REF_EFLAGS_EX(a_pEFlags, a_fEflInput, a_fEflOutput) \
6155 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
6156 off = iemNativeEmitRefEFlags(pReNative, off, a_pEFlags, a_fEflInput, a_fEflOutput)
6157
6158/** Handles IEM_MC_REF_EFLAGS. */
6159DECL_INLINE_THROW(uint32_t)
6160iemNativeEmitRefEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint32_t fEflInput, uint32_t fEflOutput)
6161{
6162 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_EFlags, 0);
6163 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
6164
6165#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
6166 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fEflInput);
6167
6168 /* Updating the skipping according to the outputs is a little early, but
6169 we don't have any other hooks for references atm. */
6170 if ((fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
6171 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
6172 else if (fEflOutput & X86_EFL_STATUS_BITS)
6173 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~(fEflOutput & X86_EFL_STATUS_BITS),
6174 RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
6175#else
6176 RT_NOREF(fEflInput, fEflOutput);
6177#endif
6178
6179 /* If we've delayed writing back the register value, flush it now. */
6180 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_EFlags, 0);
6181
6182 /* If there is a shadow copy of guest EFLAGS, flush it now. */
6183 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_EFlags));
6184
6185 return off;
6186}
6187
6188
6189/** @todo Emit code for IEM_MC_ASSERT_EFLAGS in strict builds? Once we emit
6190 * different code from threaded recompiler, maybe it would be helpful. For now
6191 * we assume the threaded recompiler catches any incorrect EFLAGS delcarations. */
6192#define IEM_MC_ASSERT_EFLAGS(a_fEflInput, a_fEflOutput) ((void)0)
6193
6194
6195#define IEM_MC_REF_XREG_U128(a_pu128Dst, a_iXReg) \
6196 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, false /*fConst*/)
6197
6198#define IEM_MC_REF_XREG_XMM(a_puXmmDst, a_iXReg) \
6199 off = iemNativeEmitRefXregXxx(pReNative, off, a_puXmmDst, a_iXReg, false /*fConst*/)
6200
6201#define IEM_MC_REF_XREG_U128_CONST(a_pu128Dst, a_iXReg) \
6202 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, true /*fConst*/)
6203
6204#define IEM_MC_REF_XREG_XMM_CONST(a_pXmmDst, a_iXReg) \
6205 off = iemNativeEmitRefXregXxx(pReNative, off, a_pXmmDst, a_iXReg, true /*fConst*/)
6206
6207#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6208/* Just being paranoid here. */
6209# ifndef _MSC_VER /* MSC can't compile this, doesn't like [0]. Added reduced version afterwards. */
6210AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].au64[0]);
6211AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].au32[0]);
6212AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].ar64[0]);
6213AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].ar32[0]);
6214# endif
6215AssertCompileMemberOffset(X86XMMREG, au64, 0);
6216AssertCompileMemberOffset(X86XMMREG, au32, 0);
6217AssertCompileMemberOffset(X86XMMREG, ar64, 0);
6218AssertCompileMemberOffset(X86XMMREG, ar32, 0);
6219
6220# define IEM_MC_REF_XREG_U32_CONST(a_pu32Dst, a_iXReg) \
6221 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu32Dst, a_iXReg, true /*fConst*/)
6222# define IEM_MC_REF_XREG_U64_CONST(a_pu64Dst, a_iXReg) \
6223 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu64Dst, a_iXReg, true /*fConst*/)
6224# define IEM_MC_REF_XREG_R32_CONST(a_pr32Dst, a_iXReg) \
6225 off = iemNativeEmitRefXregXxx(pReNative, off, a_pr32Dst, a_iXReg, true /*fConst*/)
6226# define IEM_MC_REF_XREG_R64_CONST(a_pr64Dst, a_iXReg) \
6227 off = iemNativeEmitRefXregXxx(pReNative, off, a_pr64Dst, a_iXReg, true /*fConst*/)
6228#endif
6229
6230/** Handles IEM_MC_REF_XREG_xxx[_CONST]. */
6231DECL_INLINE_THROW(uint32_t)
6232iemNativeEmitRefXregXxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iXReg, bool fConst)
6233{
6234 Assert(iXReg < 16);
6235 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_XReg, iXReg);
6236 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
6237
6238 /* If we've delayed writing back the register value, flush it now. */
6239 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_XReg, iXReg);
6240
6241#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6242 /* If it's not a const reference we need to flush the shadow copy of the register now. */
6243 if (!fConst)
6244 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(iXReg)));
6245#else
6246 RT_NOREF(fConst);
6247#endif
6248
6249 return off;
6250}
6251
6252
6253
6254/*********************************************************************************************************************************
6255* Effective Address Calculation *
6256*********************************************************************************************************************************/
6257#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_16(a_GCPtrEff, a_bRm, a_u16Disp) \
6258 off = iemNativeEmitCalcRmEffAddrThreadedAddr16(pReNative, off, a_bRm, a_u16Disp, a_GCPtrEff)
6259
6260/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_16.
6261 * @sa iemOpHlpCalcRmEffAddrThreadedAddr16 */
6262DECL_INLINE_THROW(uint32_t)
6263iemNativeEmitCalcRmEffAddrThreadedAddr16(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6264 uint8_t bRm, uint16_t u16Disp, uint8_t idxVarRet)
6265{
6266 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
6267
6268 /*
6269 * Handle the disp16 form with no registers first.
6270 *
6271 * Convert to an immediate value, as that'll delay the register allocation
6272 * and assignment till the memory access / call / whatever and we can use
6273 * a more appropriate register (or none at all).
6274 */
6275 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
6276 {
6277 iemNativeVarSetKindToConst(pReNative, idxVarRet, u16Disp);
6278 return off;
6279 }
6280
6281 /* Determin the displacment. */
6282 uint16_t u16EffAddr;
6283 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
6284 {
6285 case 0: u16EffAddr = 0; break;
6286 case 1: u16EffAddr = (int16_t)(int8_t)u16Disp; break;
6287 case 2: u16EffAddr = u16Disp; break;
6288 default: AssertFailedStmt(u16EffAddr = 0);
6289 }
6290
6291 /* Determine the registers involved. */
6292 uint8_t idxGstRegBase;
6293 uint8_t idxGstRegIndex;
6294 switch (bRm & X86_MODRM_RM_MASK)
6295 {
6296 case 0:
6297 idxGstRegBase = X86_GREG_xBX;
6298 idxGstRegIndex = X86_GREG_xSI;
6299 break;
6300 case 1:
6301 idxGstRegBase = X86_GREG_xBX;
6302 idxGstRegIndex = X86_GREG_xDI;
6303 break;
6304 case 2:
6305 idxGstRegBase = X86_GREG_xBP;
6306 idxGstRegIndex = X86_GREG_xSI;
6307 break;
6308 case 3:
6309 idxGstRegBase = X86_GREG_xBP;
6310 idxGstRegIndex = X86_GREG_xDI;
6311 break;
6312 case 4:
6313 idxGstRegBase = X86_GREG_xSI;
6314 idxGstRegIndex = UINT8_MAX;
6315 break;
6316 case 5:
6317 idxGstRegBase = X86_GREG_xDI;
6318 idxGstRegIndex = UINT8_MAX;
6319 break;
6320 case 6:
6321 idxGstRegBase = X86_GREG_xBP;
6322 idxGstRegIndex = UINT8_MAX;
6323 break;
6324#ifdef _MSC_VER /* lazy compiler, thinks idxGstRegBase and idxGstRegIndex may otherwise be used uninitialized. */
6325 default:
6326#endif
6327 case 7:
6328 idxGstRegBase = X86_GREG_xBX;
6329 idxGstRegIndex = UINT8_MAX;
6330 break;
6331 }
6332
6333 /*
6334 * Now emit code that calculates: idxRegRet = (uint16_t)(u16EffAddr + idxGstRegBase [+ idxGstRegIndex])
6335 */
6336 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
6337 uint8_t const idxRegBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
6338 kIemNativeGstRegUse_ReadOnly);
6339 uint8_t const idxRegIndex = idxGstRegIndex != UINT8_MAX
6340 ? iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
6341 kIemNativeGstRegUse_ReadOnly)
6342 : UINT8_MAX;
6343#ifdef RT_ARCH_AMD64
6344 if (idxRegIndex == UINT8_MAX)
6345 {
6346 if (u16EffAddr == 0)
6347 {
6348 /* movxz ret, base */
6349 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegRet, idxRegBase);
6350 }
6351 else
6352 {
6353 /* lea ret32, [base64 + disp32] */
6354 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
6355 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6356 if (idxRegRet >= 8 || idxRegBase >= 8)
6357 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
6358 pbCodeBuf[off++] = 0x8d;
6359 if (idxRegBase != X86_GREG_x12 /*SIB*/)
6360 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, idxRegBase & 7);
6361 else
6362 {
6363 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, 4 /*SIB*/);
6364 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
6365 }
6366 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
6367 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
6368 pbCodeBuf[off++] = 0;
6369 pbCodeBuf[off++] = 0;
6370 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6371
6372 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
6373 }
6374 }
6375 else
6376 {
6377 /* lea ret32, [index64 + base64 (+ disp32)] */
6378 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
6379 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6380 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
6381 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6382 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
6383 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
6384 pbCodeBuf[off++] = 0x8d;
6385 uint8_t const bMod = u16EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0 : X86_MOD_MEM4;
6386 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6387 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, 0);
6388 if (bMod == X86_MOD_MEM4)
6389 {
6390 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
6391 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
6392 pbCodeBuf[off++] = 0;
6393 pbCodeBuf[off++] = 0;
6394 }
6395 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6396 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
6397 }
6398
6399#elif defined(RT_ARCH_ARM64)
6400 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6401 if (u16EffAddr == 0)
6402 {
6403 if (idxRegIndex == UINT8_MAX)
6404 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegBase);
6405 else
6406 {
6407 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex, false /*f64Bit*/);
6408 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
6409 }
6410 }
6411 else
6412 {
6413 if ((int16_t)u16EffAddr < 4096 && (int16_t)u16EffAddr >= 0)
6414 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u16EffAddr, false /*f64Bit*/);
6415 else if ((int16_t)u16EffAddr > -4096 && (int16_t)u16EffAddr < 0)
6416 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
6417 (uint16_t)-(int16_t)u16EffAddr, false /*f64Bit*/);
6418 else
6419 {
6420 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegRet, u16EffAddr);
6421 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
6422 }
6423 if (idxRegIndex != UINT8_MAX)
6424 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex, false /*f64Bit*/);
6425 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
6426 }
6427
6428#else
6429# error "port me"
6430#endif
6431
6432 if (idxRegIndex != UINT8_MAX)
6433 iemNativeRegFreeTmp(pReNative, idxRegIndex);
6434 iemNativeRegFreeTmp(pReNative, idxRegBase);
6435 iemNativeVarRegisterRelease(pReNative, idxVarRet);
6436 return off;
6437}
6438
6439
6440#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_32(a_GCPtrEff, a_bRm, a_uSibAndRspOffset, a_u32Disp) \
6441 off = iemNativeEmitCalcRmEffAddrThreadedAddr32(pReNative, off, a_bRm, a_uSibAndRspOffset, a_u32Disp, a_GCPtrEff)
6442
6443/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_32.
6444 * @see iemOpHlpCalcRmEffAddrThreadedAddr32 */
6445DECL_INLINE_THROW(uint32_t)
6446iemNativeEmitCalcRmEffAddrThreadedAddr32(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6447 uint8_t bRm, uint32_t uSibAndRspOffset, uint32_t u32Disp, uint8_t idxVarRet)
6448{
6449 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
6450
6451 /*
6452 * Handle the disp32 form with no registers first.
6453 *
6454 * Convert to an immediate value, as that'll delay the register allocation
6455 * and assignment till the memory access / call / whatever and we can use
6456 * a more appropriate register (or none at all).
6457 */
6458 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
6459 {
6460 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32Disp);
6461 return off;
6462 }
6463
6464 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
6465 uint32_t u32EffAddr = 0;
6466 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
6467 {
6468 case 0: break;
6469 case 1: u32EffAddr = (int8_t)u32Disp; break;
6470 case 2: u32EffAddr = u32Disp; break;
6471 default: AssertFailed();
6472 }
6473
6474 /* Get the register (or SIB) value. */
6475 uint8_t idxGstRegBase = UINT8_MAX;
6476 uint8_t idxGstRegIndex = UINT8_MAX;
6477 uint8_t cShiftIndex = 0;
6478 switch (bRm & X86_MODRM_RM_MASK)
6479 {
6480 case 0: idxGstRegBase = X86_GREG_xAX; break;
6481 case 1: idxGstRegBase = X86_GREG_xCX; break;
6482 case 2: idxGstRegBase = X86_GREG_xDX; break;
6483 case 3: idxGstRegBase = X86_GREG_xBX; break;
6484 case 4: /* SIB */
6485 {
6486 /* index /w scaling . */
6487 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
6488 switch ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
6489 {
6490 case 0: idxGstRegIndex = X86_GREG_xAX; break;
6491 case 1: idxGstRegIndex = X86_GREG_xCX; break;
6492 case 2: idxGstRegIndex = X86_GREG_xDX; break;
6493 case 3: idxGstRegIndex = X86_GREG_xBX; break;
6494 case 4: cShiftIndex = 0; /*no index*/ break;
6495 case 5: idxGstRegIndex = X86_GREG_xBP; break;
6496 case 6: idxGstRegIndex = X86_GREG_xSI; break;
6497 case 7: idxGstRegIndex = X86_GREG_xDI; break;
6498 }
6499
6500 /* base */
6501 switch (uSibAndRspOffset & X86_SIB_BASE_MASK)
6502 {
6503 case 0: idxGstRegBase = X86_GREG_xAX; break;
6504 case 1: idxGstRegBase = X86_GREG_xCX; break;
6505 case 2: idxGstRegBase = X86_GREG_xDX; break;
6506 case 3: idxGstRegBase = X86_GREG_xBX; break;
6507 case 4:
6508 idxGstRegBase = X86_GREG_xSP;
6509 u32EffAddr += uSibAndRspOffset >> 8;
6510 break;
6511 case 5:
6512 if ((bRm & X86_MODRM_MOD_MASK) != 0)
6513 idxGstRegBase = X86_GREG_xBP;
6514 else
6515 {
6516 Assert(u32EffAddr == 0);
6517 u32EffAddr = u32Disp;
6518 }
6519 break;
6520 case 6: idxGstRegBase = X86_GREG_xSI; break;
6521 case 7: idxGstRegBase = X86_GREG_xDI; break;
6522 }
6523 break;
6524 }
6525 case 5: idxGstRegBase = X86_GREG_xBP; break;
6526 case 6: idxGstRegBase = X86_GREG_xSI; break;
6527 case 7: idxGstRegBase = X86_GREG_xDI; break;
6528 }
6529
6530 /*
6531 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
6532 * the start of the function.
6533 */
6534 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
6535 {
6536 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32EffAddr);
6537 return off;
6538 }
6539
6540 /*
6541 * Now emit code that calculates: idxRegRet = (uint32_t)(u32EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
6542 */
6543 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
6544 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
6545 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
6546 kIemNativeGstRegUse_ReadOnly);
6547 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
6548 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
6549 kIemNativeGstRegUse_ReadOnly);
6550
6551 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
6552 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
6553 {
6554 idxRegBase = idxRegIndex;
6555 idxRegIndex = UINT8_MAX;
6556 }
6557
6558#ifdef RT_ARCH_AMD64
6559 if (idxRegIndex == UINT8_MAX)
6560 {
6561 if (u32EffAddr == 0)
6562 {
6563 /* mov ret, base */
6564 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
6565 }
6566 else
6567 {
6568 /* lea ret32, [base64 + disp32] */
6569 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
6570 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6571 if (idxRegRet >= 8 || idxRegBase >= 8)
6572 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
6573 pbCodeBuf[off++] = 0x8d;
6574 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
6575 if (idxRegBase != X86_GREG_x12 /*SIB*/)
6576 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
6577 else
6578 {
6579 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6580 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
6581 }
6582 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6583 if (bMod == X86_MOD_MEM4)
6584 {
6585 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6586 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6587 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6588 }
6589 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6590 }
6591 }
6592 else
6593 {
6594 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
6595 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6596 if (idxRegBase == UINT8_MAX)
6597 {
6598 /* lea ret32, [(index64 << cShiftIndex) + disp32] */
6599 if (idxRegRet >= 8 || idxRegIndex >= 8)
6600 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6601 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
6602 pbCodeBuf[off++] = 0x8d;
6603 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
6604 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
6605 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6606 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6607 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6608 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6609 }
6610 else
6611 {
6612 /* lea ret32, [(index64 << cShiftIndex) + base64 (+ disp32)] */
6613 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
6614 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6615 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
6616 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
6617 pbCodeBuf[off++] = 0x8d;
6618 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
6619 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
6620 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6621 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
6622 if (bMod != X86_MOD_MEM0)
6623 {
6624 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6625 if (bMod == X86_MOD_MEM4)
6626 {
6627 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6628 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6629 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6630 }
6631 }
6632 }
6633 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6634 }
6635
6636#elif defined(RT_ARCH_ARM64)
6637 if (u32EffAddr == 0)
6638 {
6639 if (idxRegIndex == UINT8_MAX)
6640 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
6641 else if (idxRegBase == UINT8_MAX)
6642 {
6643 if (cShiftIndex == 0)
6644 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegIndex);
6645 else
6646 {
6647 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6648 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, false /*f64Bit*/);
6649 }
6650 }
6651 else
6652 {
6653 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6654 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
6655 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
6656 }
6657 }
6658 else
6659 {
6660 if ((int32_t)u32EffAddr < 4096 && (int32_t)u32EffAddr >= 0 && idxRegBase != UINT8_MAX)
6661 {
6662 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6663 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u32EffAddr, false /*f64Bit*/);
6664 }
6665 else if ((int32_t)u32EffAddr > -4096 && (int32_t)u32EffAddr < 0 && idxRegBase != UINT8_MAX)
6666 {
6667 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6668 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
6669 (uint32_t)-(int32_t)u32EffAddr, false /*f64Bit*/);
6670 }
6671 else
6672 {
6673 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, u32EffAddr);
6674 if (idxRegBase != UINT8_MAX)
6675 {
6676 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6677 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
6678 }
6679 }
6680 if (idxRegIndex != UINT8_MAX)
6681 {
6682 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6683 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
6684 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
6685 }
6686 }
6687
6688#else
6689# error "port me"
6690#endif
6691
6692 if (idxRegIndex != UINT8_MAX)
6693 iemNativeRegFreeTmp(pReNative, idxRegIndex);
6694 if (idxRegBase != UINT8_MAX)
6695 iemNativeRegFreeTmp(pReNative, idxRegBase);
6696 iemNativeVarRegisterRelease(pReNative, idxVarRet);
6697 return off;
6698}
6699
6700
6701#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
6702 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
6703 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
6704
6705#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_FSGS(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
6706 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
6707 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
6708
6709#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_ADDR32(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
6710 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
6711 a_u32Disp, a_cbImm, a_GCPtrEff, false /*f64Bit*/)
6712
6713/**
6714 * Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_64*.
6715 *
6716 * @returns New off.
6717 * @param pReNative .
6718 * @param off .
6719 * @param bRmEx The ModRM byte but with bit 3 set to REX.B and
6720 * bit 4 to REX.X. The two bits are part of the
6721 * REG sub-field, which isn't needed in this
6722 * function.
6723 * @param uSibAndRspOffset Two parts:
6724 * - The first 8 bits make up the SIB byte.
6725 * - The next 8 bits are the fixed RSP/ESP offset
6726 * in case of a pop [xSP].
6727 * @param u32Disp The displacement byte/word/dword, if any.
6728 * @param cbInstr The size of the fully decoded instruction. Used
6729 * for RIP relative addressing.
6730 * @param idxVarRet The result variable number.
6731 * @param f64Bit Whether to use a 64-bit or 32-bit address size
6732 * when calculating the address.
6733 *
6734 * @see iemOpHlpCalcRmEffAddrThreadedAddr64
6735 */
6736DECL_INLINE_THROW(uint32_t)
6737iemNativeEmitCalcRmEffAddrThreadedAddr64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bRmEx, uint32_t uSibAndRspOffset,
6738 uint32_t u32Disp, uint8_t cbInstr, uint8_t idxVarRet, bool f64Bit)
6739{
6740 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
6741
6742 /*
6743 * Special case the rip + disp32 form first.
6744 */
6745 if ((bRmEx & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
6746 {
6747 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
6748 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
6749 kIemNativeGstRegUse_ReadOnly);
6750 if (f64Bit)
6751 {
6752#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6753 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr + (int64_t)pReNative->Core.offPc;
6754#else
6755 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr;
6756#endif
6757#ifdef RT_ARCH_AMD64
6758 if ((int32_t)offFinalDisp == offFinalDisp)
6759 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)offFinalDisp);
6760 else
6761 {
6762 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp);
6763 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, cbInstr);
6764 }
6765#else
6766 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, offFinalDisp);
6767#endif
6768 }
6769 else
6770 {
6771# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6772 int32_t const offFinalDisp = (int32_t)u32Disp + cbInstr + (int32_t)pReNative->Core.offPc;
6773# else
6774 int32_t const offFinalDisp = (int32_t)u32Disp + cbInstr;
6775# endif
6776 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, offFinalDisp);
6777 }
6778 iemNativeRegFreeTmp(pReNative, idxRegPc);
6779 iemNativeVarRegisterRelease(pReNative, idxVarRet);
6780 return off;
6781 }
6782
6783 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
6784 int64_t i64EffAddr = 0;
6785 switch ((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
6786 {
6787 case 0: break;
6788 case 1: i64EffAddr = (int8_t)u32Disp; break;
6789 case 2: i64EffAddr = (int32_t)u32Disp; break;
6790 default: AssertFailed();
6791 }
6792
6793 /* Get the register (or SIB) value. */
6794 uint8_t idxGstRegBase = UINT8_MAX;
6795 uint8_t idxGstRegIndex = UINT8_MAX;
6796 uint8_t cShiftIndex = 0;
6797 if ((bRmEx & X86_MODRM_RM_MASK) != 4)
6798 idxGstRegBase = bRmEx & (X86_MODRM_RM_MASK | 0x8); /* bRmEx[bit 3] = REX.B */
6799 else /* SIB: */
6800 {
6801 /* index /w scaling . */
6802 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
6803 idxGstRegIndex = ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
6804 | ((bRmEx & 0x10) >> 1); /* bRmEx[bit 4] = REX.X */
6805 if (idxGstRegIndex == 4)
6806 {
6807 /* no index */
6808 cShiftIndex = 0;
6809 idxGstRegIndex = UINT8_MAX;
6810 }
6811
6812 /* base */
6813 idxGstRegBase = (uSibAndRspOffset & X86_SIB_BASE_MASK) | (bRmEx & 0x8); /* bRmEx[bit 3] = REX.B */
6814 if (idxGstRegBase == 4)
6815 {
6816 /* pop [rsp] hack */
6817 i64EffAddr += uSibAndRspOffset >> 8; /* (this is why i64EffAddr must be 64-bit) */
6818 }
6819 else if ( (idxGstRegBase & X86_SIB_BASE_MASK) == 5
6820 && (bRmEx & X86_MODRM_MOD_MASK) == 0)
6821 {
6822 /* mod=0 and base=5 -> disp32, no base reg. */
6823 Assert(i64EffAddr == 0);
6824 i64EffAddr = (int32_t)u32Disp;
6825 idxGstRegBase = UINT8_MAX;
6826 }
6827 }
6828
6829 /*
6830 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
6831 * the start of the function.
6832 */
6833 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
6834 {
6835 if (f64Bit)
6836 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint64_t)i64EffAddr);
6837 else
6838 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint32_t)i64EffAddr);
6839 return off;
6840 }
6841
6842 /*
6843 * Now emit code that calculates:
6844 * idxRegRet = (uint64_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
6845 * or if !f64Bit:
6846 * idxRegRet = (uint32_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
6847 */
6848 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
6849 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
6850 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
6851 kIemNativeGstRegUse_ReadOnly);
6852 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
6853 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
6854 kIemNativeGstRegUse_ReadOnly);
6855
6856 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
6857 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
6858 {
6859 idxRegBase = idxRegIndex;
6860 idxRegIndex = UINT8_MAX;
6861 }
6862
6863#ifdef RT_ARCH_AMD64
6864 uint8_t bFinalAdj;
6865 if (!f64Bit || (int32_t)i64EffAddr == i64EffAddr)
6866 bFinalAdj = 0; /* likely */
6867 else
6868 {
6869 /* pop [rsp] with a problematic disp32 value. Split out the
6870 RSP offset and add it separately afterwards (bFinalAdj). */
6871 /** @todo testcase: pop [rsp] with problematic disp32 (mod4). */
6872 Assert(idxGstRegBase == X86_GREG_xSP);
6873 Assert(((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK) == X86_MOD_MEM4);
6874 bFinalAdj = (uint8_t)(uSibAndRspOffset >> 8);
6875 Assert(bFinalAdj != 0);
6876 i64EffAddr -= bFinalAdj;
6877 Assert((int32_t)i64EffAddr == i64EffAddr);
6878 }
6879 uint32_t const u32EffAddr = (uint32_t)i64EffAddr;
6880//pReNative->pInstrBuf[off++] = 0xcc;
6881
6882 if (idxRegIndex == UINT8_MAX)
6883 {
6884 if (u32EffAddr == 0)
6885 {
6886 /* mov ret, base */
6887 if (f64Bit)
6888 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRet, idxRegBase);
6889 else
6890 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
6891 }
6892 else
6893 {
6894 /* lea ret, [base + disp32] */
6895 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
6896 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6897 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8)
6898 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6899 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
6900 | (f64Bit ? X86_OP_REX_W : 0);
6901 pbCodeBuf[off++] = 0x8d;
6902 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
6903 if (idxRegBase != X86_GREG_x12 /*SIB*/)
6904 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
6905 else
6906 {
6907 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6908 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
6909 }
6910 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6911 if (bMod == X86_MOD_MEM4)
6912 {
6913 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6914 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6915 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6916 }
6917 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6918 }
6919 }
6920 else
6921 {
6922 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
6923 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6924 if (idxRegBase == UINT8_MAX)
6925 {
6926 /* lea ret, [(index64 << cShiftIndex) + disp32] */
6927 if (f64Bit || idxRegRet >= 8 || idxRegIndex >= 8)
6928 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6929 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
6930 | (f64Bit ? X86_OP_REX_W : 0);
6931 pbCodeBuf[off++] = 0x8d;
6932 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
6933 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
6934 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6935 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6936 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6937 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6938 }
6939 else
6940 {
6941 /* lea ret, [(index64 << cShiftIndex) + base64 (+ disp32)] */
6942 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
6943 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6944 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
6945 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
6946 | (f64Bit ? X86_OP_REX_W : 0);
6947 pbCodeBuf[off++] = 0x8d;
6948 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
6949 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
6950 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6951 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
6952 if (bMod != X86_MOD_MEM0)
6953 {
6954 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6955 if (bMod == X86_MOD_MEM4)
6956 {
6957 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6958 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6959 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6960 }
6961 }
6962 }
6963 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6964 }
6965
6966 if (!bFinalAdj)
6967 { /* likely */ }
6968 else
6969 {
6970 Assert(f64Bit);
6971 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, bFinalAdj);
6972 }
6973
6974#elif defined(RT_ARCH_ARM64)
6975 if (i64EffAddr == 0)
6976 {
6977 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6978 if (idxRegIndex == UINT8_MAX)
6979 pu32CodeBuf[off++] = Armv8A64MkInstrMov(idxRegRet, idxRegBase, f64Bit);
6980 else if (idxRegBase != UINT8_MAX)
6981 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
6982 f64Bit, false /*fSetFlags*/, cShiftIndex);
6983 else
6984 {
6985 Assert(cShiftIndex != 0); /* See base = index swap above when shift is 0 and we have no base reg. */
6986 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, f64Bit);
6987 }
6988 }
6989 else
6990 {
6991 if (f64Bit)
6992 { /* likely */ }
6993 else
6994 i64EffAddr = (int32_t)i64EffAddr;
6995
6996 if (i64EffAddr < 4096 && i64EffAddr >= 0 && idxRegBase != UINT8_MAX)
6997 {
6998 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6999 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, i64EffAddr, f64Bit);
7000 }
7001 else if (i64EffAddr > -4096 && i64EffAddr < 0 && idxRegBase != UINT8_MAX)
7002 {
7003 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7004 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase, (uint32_t)-i64EffAddr, f64Bit);
7005 }
7006 else
7007 {
7008 if (f64Bit)
7009 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, i64EffAddr);
7010 else
7011 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, (uint32_t)i64EffAddr);
7012 if (idxRegBase != UINT8_MAX)
7013 {
7014 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7015 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, f64Bit);
7016 }
7017 }
7018 if (idxRegIndex != UINT8_MAX)
7019 {
7020 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7021 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
7022 f64Bit, false /*fSetFlags*/, cShiftIndex);
7023 }
7024 }
7025
7026#else
7027# error "port me"
7028#endif
7029
7030 if (idxRegIndex != UINT8_MAX)
7031 iemNativeRegFreeTmp(pReNative, idxRegIndex);
7032 if (idxRegBase != UINT8_MAX)
7033 iemNativeRegFreeTmp(pReNative, idxRegBase);
7034 iemNativeVarRegisterRelease(pReNative, idxVarRet);
7035 return off;
7036}
7037
7038
7039/*********************************************************************************************************************************
7040* Memory fetches and stores common *
7041*********************************************************************************************************************************/
7042
7043typedef enum IEMNATIVEMITMEMOP
7044{
7045 kIemNativeEmitMemOp_Store = 0,
7046 kIemNativeEmitMemOp_Fetch,
7047 kIemNativeEmitMemOp_Fetch_Zx_U16,
7048 kIemNativeEmitMemOp_Fetch_Zx_U32,
7049 kIemNativeEmitMemOp_Fetch_Zx_U64,
7050 kIemNativeEmitMemOp_Fetch_Sx_U16,
7051 kIemNativeEmitMemOp_Fetch_Sx_U32,
7052 kIemNativeEmitMemOp_Fetch_Sx_U64
7053} IEMNATIVEMITMEMOP;
7054
7055/** Emits code for IEM_MC_FETCH_MEM_U8/16/32/64 and IEM_MC_STORE_MEM_U8/16/32/64,
7056 * and IEM_MC_FETCH_MEM_FLAT_U8/16/32/64 and IEM_MC_STORE_MEM_FLAT_U8/16/32/64
7057 * (with iSegReg = UINT8_MAX). */
7058DECL_INLINE_THROW(uint32_t)
7059iemNativeEmitMemFetchStoreDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue, uint8_t iSegReg,
7060 uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAlignMaskAndCtl, IEMNATIVEMITMEMOP enmOp,
7061 uintptr_t pfnFunction, uint8_t idxInstr, uint8_t offDisp = 0)
7062{
7063 /*
7064 * Assert sanity.
7065 */
7066 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
7067 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
7068 Assert( enmOp != kIemNativeEmitMemOp_Store
7069 || pVarValue->enmKind == kIemNativeVarKind_Immediate
7070 || pVarValue->enmKind == kIemNativeVarKind_Stack);
7071 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
7072 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
7073 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
7074 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
7075 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7076 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
7077#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7078 Assert( cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8
7079 || cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U));
7080#else
7081 Assert(cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8);
7082#endif
7083 Assert(!(fAlignMaskAndCtl & ~(UINT32_C(0xff) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE)));
7084 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
7085#ifdef VBOX_STRICT
7086 if (iSegReg == UINT8_MAX)
7087 {
7088 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
7089 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
7090 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
7091 switch (cbMem)
7092 {
7093 case 1:
7094 Assert( pfnFunction
7095 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU8
7096 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
7097 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
7098 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
7099 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
7100 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16
7101 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32
7102 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64
7103 : UINT64_C(0xc000b000a0009000) ));
7104 Assert(!fAlignMaskAndCtl);
7105 break;
7106 case 2:
7107 Assert( pfnFunction
7108 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU16
7109 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
7110 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
7111 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
7112 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32
7113 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64
7114 : UINT64_C(0xc000b000a0009000) ));
7115 Assert(fAlignMaskAndCtl <= 1);
7116 break;
7117 case 4:
7118 Assert( pfnFunction
7119 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU32
7120 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
7121 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
7122 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64
7123 : UINT64_C(0xc000b000a0009000) ));
7124 Assert(fAlignMaskAndCtl <= 3);
7125 break;
7126 case 8:
7127 Assert( pfnFunction
7128 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU64
7129 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU64
7130 : UINT64_C(0xc000b000a0009000) ));
7131 Assert(fAlignMaskAndCtl <= 7);
7132 break;
7133#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7134 case sizeof(RTUINT128U):
7135 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
7136 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128
7137 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse
7138 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc))
7139 || ( enmOp == kIemNativeEmitMemOp_Store
7140 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse
7141 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128NoAc)));
7142 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse
7143 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse
7144 ? (fAlignMaskAndCtl & (IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE)) && (uint8_t)fAlignMaskAndCtl == 15
7145 : fAlignMaskAndCtl <= 15);
7146 break;
7147 case sizeof(RTUINT256U):
7148 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
7149 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc
7150 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx))
7151 || ( enmOp == kIemNativeEmitMemOp_Store
7152 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256NoAc
7153 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx)));
7154 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx
7155 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx
7156 ? (fAlignMaskAndCtl & IEM_MEMMAP_F_ALIGN_GP) && (uint8_t)fAlignMaskAndCtl == 31
7157 : fAlignMaskAndCtl <= 31);
7158 break;
7159#endif
7160 }
7161 }
7162 else
7163 {
7164 Assert(iSegReg < 6);
7165 switch (cbMem)
7166 {
7167 case 1:
7168 Assert( pfnFunction
7169 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU8
7170 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU8
7171 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8
7172 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8
7173 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8
7174 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16
7175 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32
7176 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64
7177 : UINT64_C(0xc000b000a0009000) ));
7178 Assert(!fAlignMaskAndCtl);
7179 break;
7180 case 2:
7181 Assert( pfnFunction
7182 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU16
7183 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU16
7184 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16
7185 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16
7186 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32
7187 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64
7188 : UINT64_C(0xc000b000a0009000) ));
7189 Assert(fAlignMaskAndCtl <= 1);
7190 break;
7191 case 4:
7192 Assert( pfnFunction
7193 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU32
7194 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU32
7195 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32
7196 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64
7197 : UINT64_C(0xc000b000a0009000) ));
7198 Assert(fAlignMaskAndCtl <= 3);
7199 break;
7200 case 8:
7201 Assert( pfnFunction
7202 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU64
7203 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU64
7204 : UINT64_C(0xc000b000a0009000) ));
7205 Assert(fAlignMaskAndCtl <= 7);
7206 break;
7207#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7208 case sizeof(RTUINT128U):
7209 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
7210 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128
7211 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse
7212 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128NoAc))
7213 || ( enmOp == kIemNativeEmitMemOp_Store
7214 && ( pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse
7215 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128NoAc)));
7216 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse
7217 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse
7218 ? (fAlignMaskAndCtl & (IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE)) && (uint8_t)fAlignMaskAndCtl == 15
7219 : fAlignMaskAndCtl <= 15);
7220 break;
7221 case sizeof(RTUINT256U):
7222 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
7223 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256NoAc
7224 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx))
7225 || ( enmOp == kIemNativeEmitMemOp_Store
7226 && ( pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256NoAc
7227 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx)));
7228 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx
7229 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx
7230 ? (fAlignMaskAndCtl & IEM_MEMMAP_F_ALIGN_GP) && (uint8_t)fAlignMaskAndCtl == 31
7231 : fAlignMaskAndCtl <= 31);
7232 break;
7233#endif
7234 }
7235 }
7236#endif
7237
7238#ifdef VBOX_STRICT
7239 /*
7240 * Check that the fExec flags we've got make sense.
7241 */
7242 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
7243#endif
7244
7245 /*
7246 * To keep things simple we have to commit any pending writes first as we
7247 * may end up making calls.
7248 */
7249 /** @todo we could postpone this till we make the call and reload the
7250 * registers after returning from the call. Not sure if that's sensible or
7251 * not, though. */
7252#ifndef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7253 off = iemNativeRegFlushPendingWrites(pReNative, off);
7254#else
7255 /* The program counter is treated differently for now. */
7256 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc));
7257#endif
7258
7259#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7260 /*
7261 * Move/spill/flush stuff out of call-volatile registers.
7262 * This is the easy way out. We could contain this to the tlb-miss branch
7263 * by saving and restoring active stuff here.
7264 */
7265 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
7266#endif
7267
7268 /*
7269 * Define labels and allocate the result register (trying for the return
7270 * register if we can).
7271 */
7272 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
7273#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7274 uint8_t idxRegValueFetch = UINT8_MAX;
7275
7276 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
7277 idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
7278 : iemNativeVarSimdRegisterAcquire(pReNative, idxVarValue, &off);
7279 else
7280 idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
7281 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
7282 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
7283 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
7284#else
7285 uint8_t const idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
7286 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
7287 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
7288 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
7289#endif
7290 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem, offDisp);
7291
7292#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7293 uint8_t idxRegValueStore = UINT8_MAX;
7294
7295 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
7296 idxRegValueStore = !TlbState.fSkip
7297 && enmOp == kIemNativeEmitMemOp_Store
7298 && pVarValue->enmKind != kIemNativeVarKind_Immediate
7299 ? iemNativeVarSimdRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
7300 : UINT8_MAX;
7301 else
7302 idxRegValueStore = !TlbState.fSkip
7303 && enmOp == kIemNativeEmitMemOp_Store
7304 && pVarValue->enmKind != kIemNativeVarKind_Immediate
7305 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
7306 : UINT8_MAX;
7307
7308#else
7309 uint8_t const idxRegValueStore = !TlbState.fSkip
7310 && enmOp == kIemNativeEmitMemOp_Store
7311 && pVarValue->enmKind != kIemNativeVarKind_Immediate
7312 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
7313 : UINT8_MAX;
7314#endif
7315 uint32_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
7316 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
7317 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
7318 : UINT32_MAX;
7319
7320 /*
7321 * Jump to the TLB lookup code.
7322 */
7323 if (!TlbState.fSkip)
7324 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
7325
7326 /*
7327 * TlbMiss:
7328 *
7329 * Call helper to do the fetching.
7330 * We flush all guest register shadow copies here.
7331 */
7332 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
7333
7334#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7335 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7336#else
7337 RT_NOREF(idxInstr);
7338#endif
7339
7340#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7341 if (pReNative->Core.offPc)
7342 {
7343 /*
7344 * Update the program counter but restore it at the end of the TlbMiss branch.
7345 * This should allow delaying more program counter updates for the TlbLookup and hit paths
7346 * which are hopefully much more frequent, reducing the amount of memory accesses.
7347 */
7348 /* Allocate a temporary PC register. */
7349/** @todo r=bird: This would technically need to be done up front as it's a register allocation. */
7350 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
7351 kIemNativeGstRegUse_ForUpdate);
7352
7353 /* Perform the addition and store the result. */
7354 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
7355 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7356# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
7357 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
7358# endif
7359
7360 /* Free and flush the PC register. */
7361 iemNativeRegFreeTmp(pReNative, idxPcReg);
7362 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
7363 }
7364#endif
7365
7366#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7367 /* Save variables in volatile registers. */
7368 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
7369 | (idxRegMemResult != UINT8_MAX ? RT_BIT_32(idxRegMemResult) : 0)
7370 | (idxRegValueFetch != UINT8_MAX ? RT_BIT_32(idxRegValueFetch) : 0);
7371 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
7372#endif
7373
7374 /* IEMNATIVE_CALL_ARG2/3_GREG = uValue (idxVarValue) - if store */
7375 uint32_t fVolGregMask = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
7376#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7377 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
7378 {
7379 /*
7380 * For SIMD based variables we pass the reference on the stack for both fetches and stores.
7381 *
7382 * @note There was a register variable assigned to the variable for the TlbLookup case above
7383 * which must not be freed or the value loaded into the register will not be synced into the register
7384 * further down the road because the variable doesn't know it had a variable assigned.
7385 *
7386 * @note For loads it is not required to sync what is in the assigned register with the stack slot
7387 * as it will be overwritten anyway.
7388 */
7389 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
7390 off = iemNativeEmitLoadArgGregWithSimdVarAddrForMemAccess(pReNative, off, idxRegArgValue, idxVarValue,
7391 enmOp == kIemNativeEmitMemOp_Store /*fSyncRegWithStack*/);
7392 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
7393 }
7394 else
7395#endif
7396 if (enmOp == kIemNativeEmitMemOp_Store)
7397 {
7398 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
7399 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, idxRegArgValue, idxVarValue, 0 /*cbAppend*/,
7400#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7401 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
7402#else
7403 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
7404 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
7405#endif
7406 }
7407
7408 /* IEMNATIVE_CALL_ARG1_GREG = GCPtrMem */
7409 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarGCPtrMem, offDisp /*cbAppend*/,
7410#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7411 fVolGregMask);
7412#else
7413 fVolGregMask, true /*fSpilledVarsInvolatileRegs*/);
7414#endif
7415
7416 if (iSegReg != UINT8_MAX)
7417 {
7418 /* IEMNATIVE_CALL_ARG2_GREG = iSegReg */
7419 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
7420 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, iSegReg);
7421 }
7422
7423 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
7424 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7425
7426 /* Done setting up parameters, make the call. */
7427 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
7428
7429 /*
7430 * Put the result in the right register if this is a fetch.
7431 */
7432 if (enmOp != kIemNativeEmitMemOp_Store)
7433 {
7434#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7435 if ( cbMem == sizeof(RTUINT128U)
7436 || cbMem == sizeof(RTUINT256U))
7437 {
7438 Assert(enmOp == kIemNativeEmitMemOp_Fetch);
7439
7440 /* Sync the value on the stack with the host register assigned to the variable. */
7441 off = iemNativeEmitSimdVarSyncStackToRegister(pReNative, off, idxVarValue);
7442 }
7443 else
7444#endif
7445 {
7446 Assert(idxRegValueFetch == pVarValue->idxReg);
7447 if (idxRegValueFetch != IEMNATIVE_CALL_RET_GREG)
7448 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegValueFetch, IEMNATIVE_CALL_RET_GREG);
7449 }
7450 }
7451
7452#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7453 /* Restore variables and guest shadow registers to volatile registers. */
7454 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
7455 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
7456#endif
7457
7458#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7459 if (pReNative->Core.offPc)
7460 {
7461 /*
7462 * Time to restore the program counter to its original value.
7463 */
7464 /* Allocate a temporary PC register. */
7465 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
7466 kIemNativeGstRegUse_ForUpdate);
7467
7468 /* Restore the original value. */
7469 off = iemNativeEmitSubGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
7470 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7471
7472 /* Free and flush the PC register. */
7473 iemNativeRegFreeTmp(pReNative, idxPcReg);
7474 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
7475 }
7476#endif
7477
7478#ifdef IEMNATIVE_WITH_TLB_LOOKUP
7479 if (!TlbState.fSkip)
7480 {
7481 /* end of TlbMiss - Jump to the done label. */
7482 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
7483 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
7484
7485 /*
7486 * TlbLookup:
7487 */
7488 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMaskAndCtl,
7489 enmOp == kIemNativeEmitMemOp_Store ? IEM_ACCESS_TYPE_WRITE : IEM_ACCESS_TYPE_READ,
7490 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult, offDisp);
7491
7492 /*
7493 * Emit code to do the actual storing / fetching.
7494 */
7495 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
7496# ifdef IEM_WITH_TLB_STATISTICS
7497 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
7498 enmOp == kIemNativeEmitMemOp_Store
7499 ? RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForFetch)
7500 : RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStore));
7501# endif
7502 switch (enmOp)
7503 {
7504 case kIemNativeEmitMemOp_Store:
7505 if (pVarValue->enmKind != kIemNativeVarKind_Immediate)
7506 {
7507 switch (cbMem)
7508 {
7509 case 1:
7510 off = iemNativeEmitStoreGpr8ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7511 break;
7512 case 2:
7513 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7514 break;
7515 case 4:
7516 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7517 break;
7518 case 8:
7519 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7520 break;
7521#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7522 case sizeof(RTUINT128U):
7523 off = iemNativeEmitStoreVecRegByGprU128Ex(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7524 break;
7525 case sizeof(RTUINT256U):
7526 off = iemNativeEmitStoreVecRegByGprU256Ex(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7527 break;
7528#endif
7529 default:
7530 AssertFailed();
7531 }
7532 }
7533 else
7534 {
7535 switch (cbMem)
7536 {
7537 case 1:
7538 off = iemNativeEmitStoreImm8ByGprEx(pCodeBuf, off, (uint8_t)pVarValue->u.uValue,
7539 idxRegMemResult, TlbState.idxReg1);
7540 break;
7541 case 2:
7542 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
7543 idxRegMemResult, TlbState.idxReg1);
7544 break;
7545 case 4:
7546 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
7547 idxRegMemResult, TlbState.idxReg1);
7548 break;
7549 case 8:
7550 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue,
7551 idxRegMemResult, TlbState.idxReg1);
7552 break;
7553 default:
7554 AssertFailed();
7555 }
7556 }
7557 break;
7558
7559 case kIemNativeEmitMemOp_Fetch:
7560 case kIemNativeEmitMemOp_Fetch_Zx_U16:
7561 case kIemNativeEmitMemOp_Fetch_Zx_U32:
7562 case kIemNativeEmitMemOp_Fetch_Zx_U64:
7563 switch (cbMem)
7564 {
7565 case 1:
7566 off = iemNativeEmitLoadGprByGprU8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7567 break;
7568 case 2:
7569 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7570 break;
7571 case 4:
7572 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7573 break;
7574 case 8:
7575 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7576 break;
7577#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7578 case sizeof(RTUINT128U):
7579 /*
7580 * No need to sync back the register with the stack, this is done by the generic variable handling
7581 * code if there is a register assigned to a variable and the stack must be accessed.
7582 */
7583 off = iemNativeEmitLoadVecRegByGprU128Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7584 break;
7585 case sizeof(RTUINT256U):
7586 /*
7587 * No need to sync back the register with the stack, this is done by the generic variable handling
7588 * code if there is a register assigned to a variable and the stack must be accessed.
7589 */
7590 off = iemNativeEmitLoadVecRegByGprU256Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7591 break;
7592#endif
7593 default:
7594 AssertFailed();
7595 }
7596 break;
7597
7598 case kIemNativeEmitMemOp_Fetch_Sx_U16:
7599 Assert(cbMem == 1);
7600 off = iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7601 break;
7602
7603 case kIemNativeEmitMemOp_Fetch_Sx_U32:
7604 Assert(cbMem == 1 || cbMem == 2);
7605 if (cbMem == 1)
7606 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7607 else
7608 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7609 break;
7610
7611 case kIemNativeEmitMemOp_Fetch_Sx_U64:
7612 switch (cbMem)
7613 {
7614 case 1:
7615 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7616 break;
7617 case 2:
7618 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7619 break;
7620 case 4:
7621 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7622 break;
7623 default:
7624 AssertFailed();
7625 }
7626 break;
7627
7628 default:
7629 AssertFailed();
7630 }
7631
7632 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
7633
7634 /*
7635 * TlbDone:
7636 */
7637 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
7638
7639 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
7640
7641# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7642 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
7643 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
7644# endif
7645 }
7646#else
7647 RT_NOREF(fAlignMaskAndCtl, idxLabelTlbMiss);
7648#endif
7649
7650 if (idxRegValueFetch != UINT8_MAX || idxRegValueStore != UINT8_MAX)
7651 iemNativeVarRegisterRelease(pReNative, idxVarValue);
7652 return off;
7653}
7654
7655
7656
7657/*********************************************************************************************************************************
7658* Memory fetches (IEM_MEM_FETCH_XXX). *
7659*********************************************************************************************************************************/
7660
7661/* 8-bit segmented: */
7662#define IEM_MC_FETCH_MEM_U8(a_u8Dst, a_iSeg, a_GCPtrMem) \
7663 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, a_iSeg, a_GCPtrMem, \
7664 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch, \
7665 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
7666
7667#define IEM_MC_FETCH_MEM_U8_ZX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
7668 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
7669 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
7670 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
7671
7672#define IEM_MC_FETCH_MEM_U8_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7673 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
7674 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
7675 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
7676
7677#define IEM_MC_FETCH_MEM_U8_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7678 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
7679 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7680 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
7681
7682#define IEM_MC_FETCH_MEM_U8_SX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
7683 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
7684 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
7685 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16, pCallEntry->idxInstr)
7686
7687#define IEM_MC_FETCH_MEM_U8_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7688 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
7689 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7690 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32, pCallEntry->idxInstr)
7691
7692#define IEM_MC_FETCH_MEM_U8_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7693 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
7694 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7695 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64, pCallEntry->idxInstr)
7696
7697/* 16-bit segmented: */
7698#define IEM_MC_FETCH_MEM_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
7699 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
7700 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
7701 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
7702
7703#define IEM_MC_FETCH_MEM_U16_DISP(a_u16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
7704 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
7705 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
7706 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr, a_offDisp)
7707
7708#define IEM_MC_FETCH_MEM_U16_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7709 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
7710 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
7711 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
7712
7713#define IEM_MC_FETCH_MEM_U16_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7714 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
7715 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7716 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
7717
7718#define IEM_MC_FETCH_MEM_U16_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7719 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
7720 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7721 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7722
7723#define IEM_MC_FETCH_MEM_U16_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7724 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
7725 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7726 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64, pCallEntry->idxInstr)
7727
7728
7729/* 32-bit segmented: */
7730#define IEM_MC_FETCH_MEM_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7731 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
7732 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7733 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
7734
7735#define IEM_MC_FETCH_MEM_U32_DISP(a_u32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
7736 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
7737 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7738 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7739
7740#define IEM_MC_FETCH_MEM_U32_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7741 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
7742 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7743 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
7744
7745#define IEM_MC_FETCH_MEM_U32_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7746 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
7747 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7748 (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64, pCallEntry->idxInstr)
7749
7750#define IEM_MC_FETCH_MEM_I16(a_i16Dst, a_iSeg, a_GCPtrMem) \
7751 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i16Dst, a_iSeg, a_GCPtrMem, \
7752 sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7753 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7754
7755#define IEM_MC_FETCH_MEM_I16_DISP(a_i16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
7756 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i16Dst, a_iSeg, a_GCPtrMem, \
7757 sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7758 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr, a_offDisp)
7759
7760#define IEM_MC_FETCH_MEM_I32(a_i32Dst, a_iSeg, a_GCPtrMem) \
7761 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i32Dst, a_iSeg, a_GCPtrMem, \
7762 sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7763 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
7764
7765#define IEM_MC_FETCH_MEM_I32_DISP(a_i32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
7766 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i32Dst, a_iSeg, a_GCPtrMem, \
7767 sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7768 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7769
7770#define IEM_MC_FETCH_MEM_I64(a_i64Dst, a_iSeg, a_GCPtrMem) \
7771 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i64Dst, a_iSeg, a_GCPtrMem, \
7772 sizeof(int64_t), sizeof(int64_t) - 1, kIemNativeEmitMemOp_Fetch, \
7773 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
7774
7775AssertCompileSize(RTFLOAT32U, sizeof(uint32_t));
7776#define IEM_MC_FETCH_MEM_R32(a_r32Dst, a_iSeg, a_GCPtrMem) \
7777 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r32Dst, a_iSeg, a_GCPtrMem, \
7778 sizeof(RTFLOAT32U), sizeof(RTFLOAT32U) - 1, kIemNativeEmitMemOp_Fetch, \
7779 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
7780
7781
7782/* 64-bit segmented: */
7783#define IEM_MC_FETCH_MEM_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7784 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
7785 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
7786 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
7787
7788AssertCompileSize(RTFLOAT64U, sizeof(uint64_t));
7789#define IEM_MC_FETCH_MEM_R64(a_r64Dst, a_iSeg, a_GCPtrMem) \
7790 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r64Dst, a_iSeg, a_GCPtrMem, \
7791 sizeof(RTFLOAT64U), sizeof(RTFLOAT64U) - 1, kIemNativeEmitMemOp_Fetch, \
7792 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
7793
7794
7795/* 8-bit flat: */
7796#define IEM_MC_FETCH_MEM_FLAT_U8(a_u8Dst, a_GCPtrMem) \
7797 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, UINT8_MAX, a_GCPtrMem, \
7798 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch, \
7799 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7800
7801#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U16(a_u16Dst, a_GCPtrMem) \
7802 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
7803 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
7804 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7805
7806#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U32(a_u32Dst, a_GCPtrMem) \
7807 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7808 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
7809 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7810
7811#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U64(a_u64Dst, a_GCPtrMem) \
7812 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7813 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7814 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7815
7816#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U16(a_u16Dst, a_GCPtrMem) \
7817 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
7818 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
7819 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16, pCallEntry->idxInstr)
7820
7821#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U32(a_u32Dst, a_GCPtrMem) \
7822 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7823 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7824 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32, pCallEntry->idxInstr)
7825
7826#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U64(a_u64Dst, a_GCPtrMem) \
7827 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7828 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7829 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64, pCallEntry->idxInstr)
7830
7831
7832/* 16-bit flat: */
7833#define IEM_MC_FETCH_MEM_FLAT_U16(a_u16Dst, a_GCPtrMem) \
7834 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
7835 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
7836 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
7837
7838#define IEM_MC_FETCH_MEM_FLAT_U16_DISP(a_u16Dst, a_GCPtrMem, a_offDisp) \
7839 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
7840 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
7841 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr, a_offDisp)
7842
7843#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U32(a_u32Dst, a_GCPtrMem) \
7844 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7845 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
7846 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
7847
7848#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U64(a_u64Dst, a_GCPtrMem) \
7849 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7850 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7851 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
7852
7853#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U32(a_u32Dst, a_GCPtrMem) \
7854 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7855 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7856 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7857
7858#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U64(a_u64Dst, a_GCPtrMem) \
7859 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7860 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7861 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64, pCallEntry->idxInstr)
7862
7863/* 32-bit flat: */
7864#define IEM_MC_FETCH_MEM_FLAT_U32(a_u32Dst, a_GCPtrMem) \
7865 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7866 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7867 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7868
7869#define IEM_MC_FETCH_MEM_FLAT_U32_DISP(a_u32Dst, a_GCPtrMem, a_offDisp) \
7870 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7871 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7872 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7873
7874#define IEM_MC_FETCH_MEM_FLAT_U32_ZX_U64(a_u64Dst, a_GCPtrMem) \
7875 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7876 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7877 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7878
7879#define IEM_MC_FETCH_MEM_FLAT_U32_SX_U64(a_u64Dst, a_GCPtrMem) \
7880 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7881 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7882 (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64, pCallEntry->idxInstr)
7883
7884#define IEM_MC_FETCH_MEM_FLAT_I16(a_i16Dst, a_GCPtrMem) \
7885 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i16Dst, UINT8_MAX, a_GCPtrMem, \
7886 sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7887 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7888
7889#define IEM_MC_FETCH_MEM_FLAT_I16_DISP(a_i16Dst, a_GCPtrMem, a_offDisp) \
7890 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i16Dst, UINT8_MAX, a_GCPtrMem, \
7891 sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7892 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr, a_offDisp)
7893
7894#define IEM_MC_FETCH_MEM_FLAT_I32(a_i32Dst, a_GCPtrMem) \
7895 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i32Dst, UINT8_MAX, a_GCPtrMem, \
7896 sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7897 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7898
7899#define IEM_MC_FETCH_MEM_FLAT_I32_DISP(a_i32Dst, a_GCPtrMem, a_offDisp) \
7900 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i32Dst, UINT8_MAX, a_GCPtrMem, \
7901 sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7902 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7903
7904#define IEM_MC_FETCH_MEM_FLAT_I64(a_i64Dst, a_GCPtrMem) \
7905 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i64Dst, UINT8_MAX, a_GCPtrMem, \
7906 sizeof(int64_t), sizeof(int64_t) - 1, kIemNativeEmitMemOp_Fetch, \
7907 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
7908
7909#define IEM_MC_FETCH_MEM_FLAT_R32(a_r32Dst, a_GCPtrMem) \
7910 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r32Dst, UINT8_MAX, a_GCPtrMem, \
7911 sizeof(RTFLOAT32U), sizeof(RTFLOAT32U) - 1, kIemNativeEmitMemOp_Fetch, \
7912 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7913
7914
7915/* 64-bit flat: */
7916#define IEM_MC_FETCH_MEM_FLAT_U64(a_u64Dst, a_GCPtrMem) \
7917 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7918 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
7919 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
7920
7921#define IEM_MC_FETCH_MEM_FLAT_R64(a_r64Dst, a_GCPtrMem) \
7922 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r64Dst, UINT8_MAX, a_GCPtrMem, \
7923 sizeof(RTFLOAT64U), sizeof(RTFLOAT64U) - 1, kIemNativeEmitMemOp_Fetch, \
7924 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
7925
7926#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7927/* 128-bit segmented: */
7928#define IEM_MC_FETCH_MEM_U128(a_u128Dst, a_iSeg, a_GCPtrMem) \
7929 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
7930 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
7931 (uintptr_t)iemNativeHlpMemFetchDataU128, pCallEntry->idxInstr)
7932
7933#define IEM_MC_FETCH_MEM_U128_ALIGN_SSE(a_u128Dst, a_iSeg, a_GCPtrMem) \
7934 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
7935 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7936 kIemNativeEmitMemOp_Fetch, \
7937 (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse, pCallEntry->idxInstr)
7938
7939AssertCompileSize(X86XMMREG, sizeof(RTUINT128U));
7940#define IEM_MC_FETCH_MEM_XMM_ALIGN_SSE(a_uXmmDst, a_iSeg, a_GCPtrMem) \
7941 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uXmmDst, a_iSeg, a_GCPtrMem, sizeof(X86XMMREG), \
7942 (sizeof(X86XMMREG) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7943 kIemNativeEmitMemOp_Fetch, \
7944 (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse, pCallEntry->idxInstr)
7945
7946#define IEM_MC_FETCH_MEM_U128_NO_AC(a_u128Dst, a_iSeg, a_GCPtrMem) \
7947 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
7948 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
7949 (uintptr_t)iemNativeHlpMemFetchDataU128NoAc, pCallEntry->idxInstr)
7950
7951#define IEM_MC_FETCH_MEM_XMM_NO_AC(a_u128Dst, a_iSeg, a_GCPtrMem) \
7952 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
7953 sizeof(X86XMMREG), sizeof(X86XMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
7954 (uintptr_t)iemNativeHlpMemFetchDataU128NoAc, pCallEntry->idxInstr)
7955
7956
7957/* 128-bit flat: */
7958#define IEM_MC_FETCH_MEM_FLAT_U128(a_u128Dst, a_GCPtrMem) \
7959 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, \
7960 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
7961 (uintptr_t)iemNativeHlpMemFlatFetchDataU128, pCallEntry->idxInstr)
7962
7963#define IEM_MC_FETCH_MEM_FLAT_U128_ALIGN_SSE(a_u128Dst, a_GCPtrMem) \
7964 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
7965 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7966 kIemNativeEmitMemOp_Fetch, \
7967 (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse, pCallEntry->idxInstr)
7968
7969#define IEM_MC_FETCH_MEM_FLAT_XMM_ALIGN_SSE(a_uXmmDst, a_GCPtrMem) \
7970 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uXmmDst, UINT8_MAX, a_GCPtrMem, sizeof(X86XMMREG), \
7971 (sizeof(X86XMMREG) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7972 kIemNativeEmitMemOp_Fetch, \
7973 (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse, pCallEntry->idxInstr)
7974
7975#define IEM_MC_FETCH_MEM_FLAT_U128_NO_AC(a_u128Dst, a_GCPtrMem) \
7976 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, \
7977 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
7978 (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc, pCallEntry->idxInstr)
7979
7980#define IEM_MC_FETCH_MEM_FLAT_XMM_NO_AC(a_uXmmDst, a_GCPtrMem) \
7981 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uXmmDst, UINT8_MAX, a_GCPtrMem, \
7982 sizeof(X86XMMREG), sizeof(X86XMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
7983 (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc, pCallEntry->idxInstr)
7984
7985/* 256-bit segmented: */
7986#define IEM_MC_FETCH_MEM_U256(a_u256Dst, a_iSeg, a_GCPtrMem) \
7987 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
7988 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
7989 (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
7990
7991#define IEM_MC_FETCH_MEM_U256_NO_AC(a_u256Dst, a_iSeg, a_GCPtrMem) \
7992 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
7993 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
7994 (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
7995
7996#define IEM_MC_FETCH_MEM_U256_ALIGN_AVX(a_u256Dst, a_iSeg, a_GCPtrMem) \
7997 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, sizeof(RTUINT256U), \
7998 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, kIemNativeEmitMemOp_Fetch, \
7999 (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx, pCallEntry->idxInstr)
8000
8001#define IEM_MC_FETCH_MEM_YMM_NO_AC(a_u256Dst, a_iSeg, a_GCPtrMem) \
8002 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
8003 sizeof(X86YMMREG), sizeof(X86YMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
8004 (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
8005
8006
8007/* 256-bit flat: */
8008#define IEM_MC_FETCH_MEM_FLAT_U256(a_u256Dst, a_GCPtrMem) \
8009 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, \
8010 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
8011 (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
8012
8013#define IEM_MC_FETCH_MEM_FLAT_U256_NO_AC(a_u256Dst, a_GCPtrMem) \
8014 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, \
8015 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
8016 (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
8017
8018#define IEM_MC_FETCH_MEM_FLAT_U256_ALIGN_AVX(a_u256Dst, a_GCPtrMem) \
8019 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT256U), \
8020 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, kIemNativeEmitMemOp_Fetch, \
8021 (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx, pCallEntry->idxInstr)
8022
8023#define IEM_MC_FETCH_MEM_FLAT_YMM_NO_AC(a_uYmmDst, a_GCPtrMem) \
8024 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uYmmDst, UINT8_MAX, a_GCPtrMem, \
8025 sizeof(X86YMMREG), sizeof(X86YMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
8026 (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
8027
8028#endif
8029
8030
8031/*********************************************************************************************************************************
8032* Memory stores (IEM_MEM_STORE_XXX). *
8033*********************************************************************************************************************************/
8034
8035#define IEM_MC_STORE_MEM_U8(a_iSeg, a_GCPtrMem, a_u8Value) \
8036 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, a_iSeg, a_GCPtrMem, \
8037 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Store, \
8038 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
8039
8040#define IEM_MC_STORE_MEM_U16(a_iSeg, a_GCPtrMem, a_u16Value) \
8041 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, a_iSeg, a_GCPtrMem, \
8042 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
8043 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
8044
8045#define IEM_MC_STORE_MEM_U32(a_iSeg, a_GCPtrMem, a_u32Value) \
8046 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, a_iSeg, a_GCPtrMem, \
8047 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
8048 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
8049
8050#define IEM_MC_STORE_MEM_U64(a_iSeg, a_GCPtrMem, a_u64Value) \
8051 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, a_iSeg, a_GCPtrMem, \
8052 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
8053 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
8054
8055
8056#define IEM_MC_STORE_MEM_FLAT_U8(a_GCPtrMem, a_u8Value) \
8057 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, UINT8_MAX, a_GCPtrMem, \
8058 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Store, \
8059 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
8060
8061#define IEM_MC_STORE_MEM_FLAT_U16(a_GCPtrMem, a_u16Value) \
8062 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, UINT8_MAX, a_GCPtrMem, \
8063 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
8064 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
8065
8066#define IEM_MC_STORE_MEM_FLAT_U32(a_GCPtrMem, a_u32Value) \
8067 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, UINT8_MAX, a_GCPtrMem, \
8068 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
8069 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
8070
8071#define IEM_MC_STORE_MEM_FLAT_U64(a_GCPtrMem, a_u64Value) \
8072 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, UINT8_MAX, a_GCPtrMem, \
8073 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
8074 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
8075
8076
8077#define IEM_MC_STORE_MEM_U8_CONST(a_iSeg, a_GCPtrMem, a_u8ConstValue) \
8078 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
8079 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
8080
8081#define IEM_MC_STORE_MEM_U16_CONST(a_iSeg, a_GCPtrMem, a_u16ConstValue) \
8082 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
8083 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
8084
8085#define IEM_MC_STORE_MEM_U32_CONST(a_iSeg, a_GCPtrMem, a_u32ConstValue) \
8086 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
8087 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
8088
8089#define IEM_MC_STORE_MEM_U64_CONST(a_iSeg, a_GCPtrMem, a_u64ConstValue) \
8090 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
8091 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
8092
8093
8094#define IEM_MC_STORE_MEM_FLAT_U8_CONST(a_GCPtrMem, a_u8ConstValue) \
8095 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
8096 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
8097
8098#define IEM_MC_STORE_MEM_FLAT_U16_CONST(a_GCPtrMem, a_u16ConstValue) \
8099 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
8100 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
8101
8102#define IEM_MC_STORE_MEM_FLAT_U32_CONST(a_GCPtrMem, a_u32ConstValue) \
8103 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
8104 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
8105
8106#define IEM_MC_STORE_MEM_FLAT_U64_CONST(a_GCPtrMem, a_u64ConstValue) \
8107 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
8108 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
8109
8110/** Emits code for IEM_MC_STORE_MEM_U8/16/32/64_CONST and
8111 * IEM_MC_STORE_MEM_FLAT_U8/16/32/64_CONST (with iSegReg = UINT8_MAX). */
8112DECL_INLINE_THROW(uint32_t)
8113iemNativeEmitMemStoreConstDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uValueConst, uint8_t iSegReg,
8114 uint8_t idxVarGCPtrMem, uint8_t cbMem, uintptr_t pfnFunction, uint8_t idxInstr)
8115{
8116 /*
8117 * Create a temporary const variable and call iemNativeEmitMemFetchStoreDataCommon
8118 * to do the grunt work.
8119 */
8120 uint8_t const idxVarConstValue = iemNativeVarAllocConst(pReNative, cbMem, uValueConst);
8121 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, idxVarConstValue, iSegReg, idxVarGCPtrMem,
8122 cbMem, cbMem - 1, kIemNativeEmitMemOp_Store,
8123 pfnFunction, idxInstr);
8124 iemNativeVarFreeLocal(pReNative, idxVarConstValue);
8125 return off;
8126}
8127
8128
8129#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8130# define IEM_MC_STORE_MEM_U128_ALIGN_SSE(a_iSeg, a_GCPtrMem, a_u128Value) \
8131 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
8132 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
8133 kIemNativeEmitMemOp_Store, \
8134 (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse, pCallEntry->idxInstr)
8135
8136# define IEM_MC_STORE_MEM_U128_NO_AC(a_iSeg, a_GCPtrMem, a_u128Value) \
8137 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, a_iSeg, a_GCPtrMem, \
8138 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
8139 (uintptr_t)iemNativeHlpMemStoreDataU128NoAc, pCallEntry->idxInstr)
8140
8141# define IEM_MC_STORE_MEM_U256_NO_AC(a_iSeg, a_GCPtrMem, a_u256Value) \
8142 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, a_iSeg, a_GCPtrMem, \
8143 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
8144 (uintptr_t)iemNativeHlpMemStoreDataU256NoAc, pCallEntry->idxInstr)
8145
8146# define IEM_MC_STORE_MEM_U256_ALIGN_AVX(a_iSeg, a_GCPtrMem, a_u256Value) \
8147 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, a_iSeg, a_GCPtrMem, sizeof(RTUINT256U), \
8148 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, kIemNativeEmitMemOp_Store, \
8149 (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx, pCallEntry->idxInstr)
8150
8151
8152# define IEM_MC_STORE_MEM_FLAT_U128_ALIGN_SSE(a_GCPtrMem, a_u128Value) \
8153 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
8154 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
8155 kIemNativeEmitMemOp_Store, \
8156 (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse, pCallEntry->idxInstr)
8157
8158# define IEM_MC_STORE_MEM_FLAT_U128_NO_AC(a_GCPtrMem, a_u128Value) \
8159 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, UINT8_MAX, a_GCPtrMem, \
8160 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
8161 (uintptr_t)iemNativeHlpMemFlatStoreDataU128NoAc, pCallEntry->idxInstr)
8162
8163# define IEM_MC_STORE_MEM_FLAT_U256_NO_AC(a_GCPtrMem, a_u256Value) \
8164 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, UINT8_MAX, a_GCPtrMem, \
8165 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
8166 (uintptr_t)iemNativeHlpMemFlatStoreDataU256NoAc, pCallEntry->idxInstr)
8167
8168# define IEM_MC_STORE_MEM_FLAT_U256_ALIGN_AVX(a_GCPtrMem, a_u256Value) \
8169 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT256U), \
8170 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, kIemNativeEmitMemOp_Store, \
8171 (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx, pCallEntry->idxInstr)
8172#endif
8173
8174
8175
8176/*********************************************************************************************************************************
8177* Stack Accesses. *
8178*********************************************************************************************************************************/
8179/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, fSReg, 0) */
8180#define IEM_MC_PUSH_U16(a_u16Value) \
8181 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
8182 (uintptr_t)iemNativeHlpStackStoreU16, pCallEntry->idxInstr)
8183#define IEM_MC_PUSH_U32(a_u32Value) \
8184 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
8185 (uintptr_t)iemNativeHlpStackStoreU32, pCallEntry->idxInstr)
8186#define IEM_MC_PUSH_U32_SREG(a_uSegVal) \
8187 off = iemNativeEmitStackPush(pReNative, off, a_uSegVal, RT_MAKE_U32_FROM_U8(32, 0, 1, 0), \
8188 (uintptr_t)iemNativeHlpStackStoreU32SReg, pCallEntry->idxInstr)
8189#define IEM_MC_PUSH_U64(a_u64Value) \
8190 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
8191 (uintptr_t)iemNativeHlpStackStoreU64, pCallEntry->idxInstr)
8192
8193#define IEM_MC_FLAT32_PUSH_U16(a_u16Value) \
8194 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
8195 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
8196#define IEM_MC_FLAT32_PUSH_U32(a_u32Value) \
8197 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
8198 (uintptr_t)iemNativeHlpStackFlatStoreU32, pCallEntry->idxInstr)
8199#define IEM_MC_FLAT32_PUSH_U32_SREG(a_u32Value) \
8200 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 1, 0), \
8201 (uintptr_t)iemNativeHlpStackFlatStoreU32SReg, pCallEntry->idxInstr)
8202
8203#define IEM_MC_FLAT64_PUSH_U16(a_u16Value) \
8204 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
8205 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
8206#define IEM_MC_FLAT64_PUSH_U64(a_u64Value) \
8207 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
8208 (uintptr_t)iemNativeHlpStackFlatStoreU64, pCallEntry->idxInstr)
8209
8210
8211/** IEM_MC[|_FLAT32|_FLAT64]_PUSH_U16/32/32_SREG/64 */
8212DECL_INLINE_THROW(uint32_t)
8213iemNativeEmitStackPush(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue,
8214 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
8215{
8216 /*
8217 * Assert sanity.
8218 */
8219 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
8220 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
8221#ifdef VBOX_STRICT
8222 if (RT_BYTE2(cBitsVarAndFlat) != 0)
8223 {
8224 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
8225 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
8226 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
8227 Assert( pfnFunction
8228 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
8229 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
8230 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 1, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32SReg
8231 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
8232 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
8233 : UINT64_C(0xc000b000a0009000) ));
8234 }
8235 else
8236 Assert( pfnFunction
8237 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
8238 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
8239 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 1, 0) ? (uintptr_t)iemNativeHlpStackStoreU32SReg
8240 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
8241 : UINT64_C(0xc000b000a0009000) ));
8242#endif
8243
8244#ifdef VBOX_STRICT
8245 /*
8246 * Check that the fExec flags we've got make sense.
8247 */
8248 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
8249#endif
8250
8251 /*
8252 * To keep things simple we have to commit any pending writes first as we
8253 * may end up making calls.
8254 */
8255 /** @todo we could postpone this till we make the call and reload the
8256 * registers after returning from the call. Not sure if that's sensible or
8257 * not, though. */
8258 off = iemNativeRegFlushPendingWrites(pReNative, off);
8259
8260 /*
8261 * First we calculate the new RSP and the effective stack pointer value.
8262 * For 64-bit mode and flat 32-bit these two are the same.
8263 * (Code structure is very similar to that of PUSH)
8264 */
8265 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
8266 bool const fIsSegReg = RT_BYTE3(cBitsVarAndFlat) != 0;
8267 bool const fIsIntelSeg = fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
8268 uint8_t const cbMemAccess = !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
8269 ? cbMem : sizeof(uint16_t);
8270 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
8271 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
8272 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
8273 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
8274 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
8275 if (cBitsFlat != 0)
8276 {
8277 Assert(idxRegEffSp == idxRegRsp);
8278 Assert(cBitsFlat == 32 || cBitsFlat == 64);
8279 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
8280 if (cBitsFlat == 64)
8281 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
8282 else
8283 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
8284 }
8285 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
8286 {
8287 Assert(idxRegEffSp != idxRegRsp);
8288 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
8289 kIemNativeGstRegUse_ReadOnly);
8290#ifdef RT_ARCH_AMD64
8291 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8292#else
8293 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
8294#endif
8295 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
8296 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
8297 offFixupJumpToUseOtherBitSp = off;
8298 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
8299 {
8300 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
8301 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8302 }
8303 else
8304 {
8305 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
8306 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8307 }
8308 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8309 }
8310 /* SpUpdateEnd: */
8311 uint32_t const offLabelSpUpdateEnd = off;
8312
8313 /*
8314 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
8315 * we're skipping lookup).
8316 */
8317 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
8318 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
8319 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
8320 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
8321 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
8322 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
8323 : UINT32_MAX;
8324 uint8_t const idxRegValue = !TlbState.fSkip
8325 && pVarValue->enmKind != kIemNativeVarKind_Immediate
8326 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/,
8327 IEMNATIVE_CALL_ARG2_GREG /*idxRegPref*/)
8328 : UINT8_MAX;
8329 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
8330
8331
8332 if (!TlbState.fSkip)
8333 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
8334 else
8335 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
8336
8337 /*
8338 * Use16BitSp:
8339 */
8340 if (cBitsFlat == 0)
8341 {
8342#ifdef RT_ARCH_AMD64
8343 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8344#else
8345 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
8346#endif
8347 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
8348 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
8349 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8350 else
8351 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8352 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
8353 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8354 }
8355
8356 /*
8357 * TlbMiss:
8358 *
8359 * Call helper to do the pushing.
8360 */
8361 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
8362
8363#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8364 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8365#else
8366 RT_NOREF(idxInstr);
8367#endif
8368
8369 /* Save variables in volatile registers. */
8370 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
8371 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
8372 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
8373 | (idxRegValue < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegValue) : 0);
8374 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
8375
8376 if ( idxRegValue == IEMNATIVE_CALL_ARG1_GREG
8377 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
8378 {
8379 /* Swap them using ARG0 as temp register: */
8380 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
8381 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
8382 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
8383 }
8384 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
8385 {
8386 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue (first!) */
8387 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue,
8388 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
8389
8390 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
8391 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
8392 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
8393 }
8394 else
8395 {
8396 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
8397 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
8398
8399 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue */
8400 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue, 0 /*offAddend*/,
8401 IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG));
8402 }
8403
8404 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
8405 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
8406
8407 /* Done setting up parameters, make the call. */
8408 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
8409
8410 /* Restore variables and guest shadow registers to volatile registers. */
8411 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
8412 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
8413
8414#ifdef IEMNATIVE_WITH_TLB_LOOKUP
8415 if (!TlbState.fSkip)
8416 {
8417 /* end of TlbMiss - Jump to the done label. */
8418 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
8419 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
8420
8421 /*
8422 * TlbLookup:
8423 */
8424 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1,
8425 IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
8426
8427 /*
8428 * Emit code to do the actual storing / fetching.
8429 */
8430 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
8431# ifdef IEM_WITH_TLB_STATISTICS
8432 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
8433 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
8434# endif
8435 if (idxRegValue != UINT8_MAX)
8436 {
8437 switch (cbMemAccess)
8438 {
8439 case 2:
8440 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
8441 break;
8442 case 4:
8443 if (!fIsIntelSeg)
8444 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
8445 else
8446 {
8447 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
8448 PUSH FS in real mode, so we have to try emulate that here.
8449 We borrow the now unused idxReg1 from the TLB lookup code here. */
8450 uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
8451 kIemNativeGstReg_EFlags);
8452 if (idxRegEfl != UINT8_MAX)
8453 {
8454#ifdef ARCH_AMD64
8455 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
8456 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
8457 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
8458#else
8459 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
8460 off, TlbState.idxReg1, idxRegEfl,
8461 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
8462#endif
8463 iemNativeRegFreeTmp(pReNative, idxRegEfl);
8464 }
8465 else
8466 {
8467 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
8468 RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
8469 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
8470 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
8471 }
8472 /* ASSUMES the upper half of idxRegValue is ZERO. */
8473 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegValue);
8474 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
8475 }
8476 break;
8477 case 8:
8478 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
8479 break;
8480 default:
8481 AssertFailed();
8482 }
8483 }
8484 else
8485 {
8486 switch (cbMemAccess)
8487 {
8488 case 2:
8489 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
8490 idxRegMemResult, TlbState.idxReg1);
8491 break;
8492 case 4:
8493 Assert(!fIsSegReg);
8494 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
8495 idxRegMemResult, TlbState.idxReg1);
8496 break;
8497 case 8:
8498 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue, idxRegMemResult, TlbState.idxReg1);
8499 break;
8500 default:
8501 AssertFailed();
8502 }
8503 }
8504
8505 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
8506 TlbState.freeRegsAndReleaseVars(pReNative);
8507
8508 /*
8509 * TlbDone:
8510 *
8511 * Commit the new RSP value.
8512 */
8513 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
8514 }
8515#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
8516
8517#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8518 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
8519#endif
8520 iemNativeRegFreeTmp(pReNative, idxRegRsp);
8521 if (idxRegEffSp != idxRegRsp)
8522 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
8523
8524 /* The value variable is implictly flushed. */
8525 if (idxRegValue != UINT8_MAX)
8526 iemNativeVarRegisterRelease(pReNative, idxVarValue);
8527 iemNativeVarFreeLocal(pReNative, idxVarValue);
8528
8529 return off;
8530}
8531
8532
8533
8534/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, 0, 0) */
8535#define IEM_MC_POP_GREG_U16(a_iGReg) \
8536 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
8537 (uintptr_t)iemNativeHlpStackFetchU16, pCallEntry->idxInstr)
8538#define IEM_MC_POP_GREG_U32(a_iGReg) \
8539 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
8540 (uintptr_t)iemNativeHlpStackFetchU32, pCallEntry->idxInstr)
8541#define IEM_MC_POP_GREG_U64(a_iGReg) \
8542 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
8543 (uintptr_t)iemNativeHlpStackFetchU64, pCallEntry->idxInstr)
8544
8545#define IEM_MC_FLAT32_POP_GREG_U16(a_iGReg) \
8546 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
8547 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
8548#define IEM_MC_FLAT32_POP_GREG_U32(a_iGReg) \
8549 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
8550 (uintptr_t)iemNativeHlpStackFlatFetchU32, pCallEntry->idxInstr)
8551
8552#define IEM_MC_FLAT64_POP_GREG_U16(a_iGReg) \
8553 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
8554 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
8555#define IEM_MC_FLAT64_POP_GREG_U64(a_iGReg) \
8556 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
8557 (uintptr_t)iemNativeHlpStackFlatFetchU64, pCallEntry->idxInstr)
8558
8559
8560DECL_FORCE_INLINE_THROW(uint32_t)
8561iemNativeEmitStackPopUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
8562 uint8_t idxRegTmp)
8563{
8564 /* Use16BitSp: */
8565#ifdef RT_ARCH_AMD64
8566 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
8567 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
8568 RT_NOREF(idxRegTmp);
8569#else
8570 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
8571 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
8572 /* add tmp, regrsp, #cbMem */
8573 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbMem, false /*f64Bit*/);
8574 /* and tmp, tmp, #0xffff */
8575 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
8576 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
8577 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
8578 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
8579#endif
8580 return off;
8581}
8582
8583
8584DECL_FORCE_INLINE(uint32_t)
8585iemNativeEmitStackPopUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
8586{
8587 /* Use32BitSp: */
8588 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
8589 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
8590 return off;
8591}
8592
8593
8594/** IEM_MC[|_FLAT32|_FLAT64]_POP_GREG_U16/32/64 */
8595DECL_INLINE_THROW(uint32_t)
8596iemNativeEmitStackPopGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGReg,
8597 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
8598{
8599 /*
8600 * Assert sanity.
8601 */
8602 Assert(idxGReg < 16);
8603#ifdef VBOX_STRICT
8604 if (RT_BYTE2(cBitsVarAndFlat) != 0)
8605 {
8606 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
8607 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
8608 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
8609 Assert( pfnFunction
8610 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
8611 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU32
8612 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
8613 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU64
8614 : UINT64_C(0xc000b000a0009000) ));
8615 }
8616 else
8617 Assert( pfnFunction
8618 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU16
8619 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU32
8620 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU64
8621 : UINT64_C(0xc000b000a0009000) ));
8622#endif
8623
8624#ifdef VBOX_STRICT
8625 /*
8626 * Check that the fExec flags we've got make sense.
8627 */
8628 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
8629#endif
8630
8631 /*
8632 * To keep things simple we have to commit any pending writes first as we
8633 * may end up making calls.
8634 */
8635 off = iemNativeRegFlushPendingWrites(pReNative, off);
8636
8637 /*
8638 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
8639 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
8640 * directly as the effective stack pointer.
8641 * (Code structure is very similar to that of PUSH)
8642 */
8643 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
8644 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
8645 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
8646 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
8647 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
8648 /** @todo can do a better job picking the register here. For cbMem >= 4 this
8649 * will be the resulting register value. */
8650 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
8651
8652 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
8653 if (cBitsFlat != 0)
8654 {
8655 Assert(idxRegEffSp == idxRegRsp);
8656 Assert(cBitsFlat == 32 || cBitsFlat == 64);
8657 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
8658 }
8659 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
8660 {
8661 Assert(idxRegEffSp != idxRegRsp);
8662 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
8663 kIemNativeGstRegUse_ReadOnly);
8664#ifdef RT_ARCH_AMD64
8665 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8666#else
8667 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
8668#endif
8669 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
8670 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
8671 offFixupJumpToUseOtherBitSp = off;
8672 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
8673 {
8674/** @todo can skip idxRegRsp updating when popping ESP. */
8675 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
8676 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8677 }
8678 else
8679 {
8680 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
8681 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
8682 }
8683 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8684 }
8685 /* SpUpdateEnd: */
8686 uint32_t const offLabelSpUpdateEnd = off;
8687
8688 /*
8689 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
8690 * we're skipping lookup).
8691 */
8692 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
8693 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
8694 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
8695 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
8696 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
8697 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
8698 : UINT32_MAX;
8699
8700 if (!TlbState.fSkip)
8701 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
8702 else
8703 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
8704
8705 /*
8706 * Use16BitSp:
8707 */
8708 if (cBitsFlat == 0)
8709 {
8710#ifdef RT_ARCH_AMD64
8711 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8712#else
8713 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
8714#endif
8715 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
8716 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
8717 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
8718 else
8719 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8720 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
8721 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8722 }
8723
8724 /*
8725 * TlbMiss:
8726 *
8727 * Call helper to do the pushing.
8728 */
8729 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
8730
8731#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8732 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8733#else
8734 RT_NOREF(idxInstr);
8735#endif
8736
8737 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
8738 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
8739 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
8740 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
8741
8742
8743 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
8744 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
8745 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
8746
8747 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
8748 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
8749
8750 /* Done setting up parameters, make the call. */
8751 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
8752
8753 /* Move the return register content to idxRegMemResult. */
8754 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
8755 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
8756
8757 /* Restore variables and guest shadow registers to volatile registers. */
8758 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
8759 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
8760
8761#ifdef IEMNATIVE_WITH_TLB_LOOKUP
8762 if (!TlbState.fSkip)
8763 {
8764 /* end of TlbMiss - Jump to the done label. */
8765 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
8766 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
8767
8768 /*
8769 * TlbLookup:
8770 */
8771 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
8772 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
8773
8774 /*
8775 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
8776 */
8777 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8778# ifdef IEM_WITH_TLB_STATISTICS
8779 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
8780 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
8781# endif
8782 switch (cbMem)
8783 {
8784 case 2:
8785 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
8786 break;
8787 case 4:
8788 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
8789 break;
8790 case 8:
8791 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
8792 break;
8793 default:
8794 AssertFailed();
8795 }
8796
8797 TlbState.freeRegsAndReleaseVars(pReNative);
8798
8799 /*
8800 * TlbDone:
8801 *
8802 * Set the new RSP value (FLAT accesses needs to calculate it first) and
8803 * commit the popped register value.
8804 */
8805 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
8806 }
8807#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
8808
8809 if (idxGReg != X86_GREG_xSP)
8810 {
8811 /* Set the register. */
8812 if (cbMem >= sizeof(uint32_t))
8813 {
8814#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
8815 AssertMsg( pReNative->idxCurCall == 0
8816 || IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))),
8817 ("%s - %u\n", g_aGstShadowInfo[idxGReg].pszName,
8818 iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))));
8819#endif
8820 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, IEMNATIVEGSTREG_GPR(idxGReg), off);
8821#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8822 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(idxGReg);
8823#endif
8824#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8825 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult,
8826 RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
8827#endif
8828 }
8829 else
8830 {
8831 Assert(cbMem == sizeof(uint16_t));
8832 uint8_t const idxRegDst = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGReg),
8833 kIemNativeGstRegUse_ForUpdate);
8834 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegDst, idxRegMemResult);
8835#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8836 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegDst, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
8837#endif
8838 iemNativeRegFreeTmp(pReNative, idxRegDst);
8839 }
8840
8841 /* Complete RSP calculation for FLAT mode. */
8842 if (idxRegEffSp == idxRegRsp)
8843 {
8844 if (cBitsFlat == 64)
8845 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, cbMem);
8846 else
8847 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, cbMem);
8848 }
8849 }
8850 else
8851 {
8852 /* We're popping RSP, ESP or SP. Only the is a bit extra work, of course. */
8853 if (cbMem == sizeof(uint64_t))
8854 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRsp, idxRegMemResult);
8855 else if (cbMem == sizeof(uint32_t))
8856 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRsp, idxRegMemResult);
8857 else
8858 {
8859 if (idxRegEffSp == idxRegRsp)
8860 {
8861 if (cBitsFlat == 64)
8862 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, cbMem);
8863 else
8864 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, cbMem);
8865 }
8866 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegRsp, idxRegMemResult);
8867 }
8868 }
8869
8870#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8871 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
8872#endif
8873
8874 iemNativeRegFreeTmp(pReNative, idxRegRsp);
8875 if (idxRegEffSp != idxRegRsp)
8876 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
8877 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
8878
8879 return off;
8880}
8881
8882
8883
8884/*********************************************************************************************************************************
8885* Memory mapping (IEM_MEM_MAP_XXX, IEM_MEM_FLAT_MAP_XXX). *
8886*********************************************************************************************************************************/
8887
8888#define IEM_MC_MEM_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8889 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
8890 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMaskAndCtl*/, \
8891 (uintptr_t)iemNativeHlpMemMapDataU8Atomic, pCallEntry->idxInstr)
8892
8893#define IEM_MC_MEM_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8894 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
8895 IEM_ACCESS_DATA_RW, 0 /*fAlignMaskAndCtl*/, \
8896 (uintptr_t)iemNativeHlpMemMapDataU8Rw, pCallEntry->idxInstr)
8897
8898#define IEM_MC_MEM_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8899 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
8900 IEM_ACCESS_DATA_W, 0 /*fAlignMaskAndCtl*/, \
8901 (uintptr_t)iemNativeHlpMemMapDataU8Wo, pCallEntry->idxInstr) \
8902
8903#define IEM_MC_MEM_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8904 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
8905 IEM_ACCESS_DATA_R, 0 /*fAlignMaskAndCtl*/, \
8906 (uintptr_t)iemNativeHlpMemMapDataU8Ro, pCallEntry->idxInstr)
8907
8908
8909#define IEM_MC_MEM_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8910 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
8911 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8912 (uintptr_t)iemNativeHlpMemMapDataU16Atomic, pCallEntry->idxInstr)
8913
8914#define IEM_MC_MEM_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8915 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
8916 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8917 (uintptr_t)iemNativeHlpMemMapDataU16Rw, pCallEntry->idxInstr)
8918
8919#define IEM_MC_MEM_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8920 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
8921 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8922 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
8923
8924#define IEM_MC_MEM_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8925 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
8926 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8927 (uintptr_t)iemNativeHlpMemMapDataU16Ro, pCallEntry->idxInstr)
8928
8929#define IEM_MC_MEM_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8930 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int16_t), \
8931 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8932 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
8933
8934
8935#define IEM_MC_MEM_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8936 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
8937 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8938 (uintptr_t)iemNativeHlpMemMapDataU32Atomic, pCallEntry->idxInstr)
8939
8940#define IEM_MC_MEM_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8941 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
8942 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8943 (uintptr_t)iemNativeHlpMemMapDataU32Rw, pCallEntry->idxInstr)
8944
8945#define IEM_MC_MEM_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8946 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
8947 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8948 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
8949
8950#define IEM_MC_MEM_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8951 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
8952 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8953 (uintptr_t)iemNativeHlpMemMapDataU32Ro, pCallEntry->idxInstr)
8954
8955#define IEM_MC_MEM_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8956 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int32_t), \
8957 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8958 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
8959
8960
8961#define IEM_MC_MEM_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8962 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
8963 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8964 (uintptr_t)iemNativeHlpMemMapDataU64Atomic, pCallEntry->idxInstr)
8965
8966#define IEM_MC_MEM_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8967 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
8968 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8969 (uintptr_t)iemNativeHlpMemMapDataU64Rw, pCallEntry->idxInstr)
8970#define IEM_MC_MEM_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8971 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
8972 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8973 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
8974
8975#define IEM_MC_MEM_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8976 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
8977 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8978 (uintptr_t)iemNativeHlpMemMapDataU64Ro, pCallEntry->idxInstr)
8979
8980#define IEM_MC_MEM_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8981 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int64_t), \
8982 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8983 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
8984
8985
8986#define IEM_MC_MEM_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8987 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
8988 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8989 (uintptr_t)iemNativeHlpMemMapDataR80Wo, pCallEntry->idxInstr) \
8990
8991#define IEM_MC_MEM_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8992 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
8993 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, /** @todo check BCD align */ \
8994 (uintptr_t)iemNativeHlpMemMapDataD80Wo, pCallEntry->idxInstr) \
8995
8996
8997#define IEM_MC_MEM_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8998 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
8999 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
9000 (uintptr_t)iemNativeHlpMemMapDataU128Atomic, pCallEntry->idxInstr)
9001
9002#define IEM_MC_MEM_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9003 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
9004 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
9005 (uintptr_t)iemNativeHlpMemMapDataU128Rw, pCallEntry->idxInstr)
9006
9007#define IEM_MC_MEM_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9008 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
9009 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
9010 (uintptr_t)iemNativeHlpMemMapDataU128Wo, pCallEntry->idxInstr) \
9011
9012#define IEM_MC_MEM_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9013 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
9014 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
9015 (uintptr_t)iemNativeHlpMemMapDataU128Ro, pCallEntry->idxInstr)
9016
9017
9018
9019#define IEM_MC_MEM_FLAT_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
9020 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
9021 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMaskAndCtl*/, \
9022 (uintptr_t)iemNativeHlpMemFlatMapDataU8Atomic, pCallEntry->idxInstr)
9023
9024#define IEM_MC_MEM_FLAT_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
9025 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
9026 IEM_ACCESS_DATA_RW, 0 /*fAlignMaskAndCtl*/, \
9027 (uintptr_t)iemNativeHlpMemFlatMapDataU8Rw, pCallEntry->idxInstr)
9028
9029#define IEM_MC_MEM_FLAT_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
9030 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
9031 IEM_ACCESS_DATA_W, 0 /*fAlignMaskAndCtl*/, \
9032 (uintptr_t)iemNativeHlpMemFlatMapDataU8Wo, pCallEntry->idxInstr) \
9033
9034#define IEM_MC_MEM_FLAT_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
9035 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
9036 IEM_ACCESS_DATA_R, 0 /*fAlignMaskAndCtl*/, \
9037 (uintptr_t)iemNativeHlpMemFlatMapDataU8Ro, pCallEntry->idxInstr)
9038
9039
9040#define IEM_MC_MEM_FLAT_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
9041 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
9042 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
9043 (uintptr_t)iemNativeHlpMemFlatMapDataU16Atomic, pCallEntry->idxInstr)
9044
9045#define IEM_MC_MEM_FLAT_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
9046 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
9047 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
9048 (uintptr_t)iemNativeHlpMemFlatMapDataU16Rw, pCallEntry->idxInstr)
9049
9050#define IEM_MC_MEM_FLAT_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
9051 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
9052 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
9053 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
9054
9055#define IEM_MC_MEM_FLAT_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
9056 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
9057 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
9058 (uintptr_t)iemNativeHlpMemFlatMapDataU16Ro, pCallEntry->idxInstr)
9059
9060#define IEM_MC_MEM_FLAT_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_GCPtrMem) \
9061 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int16_t), \
9062 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
9063 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
9064
9065
9066#define IEM_MC_MEM_FLAT_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
9067 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
9068 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
9069 (uintptr_t)iemNativeHlpMemFlatMapDataU32Atomic, pCallEntry->idxInstr)
9070
9071#define IEM_MC_MEM_FLAT_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
9072 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
9073 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
9074 (uintptr_t)iemNativeHlpMemFlatMapDataU32Rw, pCallEntry->idxInstr)
9075
9076#define IEM_MC_MEM_FLAT_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
9077 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
9078 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
9079 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
9080
9081#define IEM_MC_MEM_FLAT_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
9082 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
9083 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
9084 (uintptr_t)iemNativeHlpMemFlatMapDataU32Ro, pCallEntry->idxInstr)
9085
9086#define IEM_MC_MEM_FLAT_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_GCPtrMem) \
9087 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int32_t), \
9088 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
9089 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
9090
9091
9092#define IEM_MC_MEM_FLAT_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
9093 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
9094 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
9095 (uintptr_t)iemNativeHlpMemFlatMapDataU64Atomic, pCallEntry->idxInstr)
9096
9097#define IEM_MC_MEM_FLAT_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
9098 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
9099 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
9100 (uintptr_t)iemNativeHlpMemFlatMapDataU64Rw, pCallEntry->idxInstr)
9101
9102#define IEM_MC_MEM_FLAT_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
9103 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
9104 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
9105 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
9106
9107#define IEM_MC_MEM_FLAT_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
9108 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
9109 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
9110 (uintptr_t)iemNativeHlpMemFlatMapDataU64Ro, pCallEntry->idxInstr)
9111
9112#define IEM_MC_MEM_FLAT_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_GCPtrMem) \
9113 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int64_t), \
9114 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
9115 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
9116
9117
9118#define IEM_MC_MEM_FLAT_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_GCPtrMem) \
9119 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
9120 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
9121 (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo, pCallEntry->idxInstr) \
9122
9123#define IEM_MC_MEM_FLAT_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_GCPtrMem) \
9124 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
9125 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, /** @todo check BCD align */ \
9126 (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo, pCallEntry->idxInstr) \
9127
9128
9129#define IEM_MC_MEM_FLAT_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
9130 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
9131 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
9132 (uintptr_t)iemNativeHlpMemFlatMapDataU128Atomic, pCallEntry->idxInstr)
9133
9134#define IEM_MC_MEM_FLAT_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
9135 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
9136 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
9137 (uintptr_t)iemNativeHlpMemFlatMapDataU128Rw, pCallEntry->idxInstr)
9138
9139#define IEM_MC_MEM_FLAT_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
9140 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
9141 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
9142 (uintptr_t)iemNativeHlpMemFlatMapDataU128Wo, pCallEntry->idxInstr) \
9143
9144#define IEM_MC_MEM_FLAT_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
9145 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
9146 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
9147 (uintptr_t)iemNativeHlpMemFlatMapDataU128Ro, pCallEntry->idxInstr)
9148
9149
9150DECL_INLINE_THROW(uint32_t)
9151iemNativeEmitMemMapCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarMem, uint8_t idxVarUnmapInfo,
9152 uint8_t iSegReg, uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAccess, uint32_t fAlignMaskAndCtl,
9153 uintptr_t pfnFunction, uint8_t idxInstr)
9154{
9155 /*
9156 * Assert sanity.
9157 */
9158 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarMem);
9159 PIEMNATIVEVAR const pVarMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarMem)];
9160 AssertStmt( pVarMem->enmKind == kIemNativeVarKind_Invalid
9161 && pVarMem->cbVar == sizeof(void *),
9162 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9163
9164 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
9165 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
9166 AssertStmt( pVarUnmapInfo->enmKind == kIemNativeVarKind_Invalid
9167 && pVarUnmapInfo->cbVar == sizeof(uint8_t),
9168 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9169
9170 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
9171 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
9172 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
9173 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
9174 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9175
9176 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
9177
9178 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
9179
9180#ifdef VBOX_STRICT
9181# define IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) \
9182 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ) \
9183 ? (uintptr_t)RT_CONCAT(a_fnBase,Rw) \
9184 : ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == IEM_ACCESS_TYPE_READ \
9185 ? (uintptr_t)RT_CONCAT(a_fnBase,Ro) : (uintptr_t)RT_CONCAT(a_fnBase,Wo) )
9186# define IEM_MAP_HLP_FN(a_fAccess, a_fnBase) \
9187 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ | IEM_ACCESS_ATOMIC) \
9188 ? (uintptr_t)RT_CONCAT(a_fnBase,Atomic) \
9189 : IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) )
9190
9191 if (iSegReg == UINT8_MAX)
9192 {
9193 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
9194 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
9195 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
9196 switch (cbMem)
9197 {
9198 case 1:
9199 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU8));
9200 Assert(!fAlignMaskAndCtl);
9201 break;
9202 case 2:
9203 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU16));
9204 Assert(fAlignMaskAndCtl < 2);
9205 break;
9206 case 4:
9207 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU32));
9208 Assert(fAlignMaskAndCtl < 4);
9209 break;
9210 case 8:
9211 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU64));
9212 Assert(fAlignMaskAndCtl < 8);
9213 break;
9214 case 10:
9215 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo
9216 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo);
9217 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
9218 Assert(fAlignMaskAndCtl < 8);
9219 break;
9220 case 16:
9221 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU128));
9222 Assert(fAlignMaskAndCtl < 16);
9223 break;
9224# if 0
9225 case 32:
9226 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU256));
9227 Assert(fAlignMaskAndCtl < 32);
9228 break;
9229 case 64:
9230 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU512));
9231 Assert(fAlignMaskAndCtl < 64);
9232 break;
9233# endif
9234 default: AssertFailed(); break;
9235 }
9236 }
9237 else
9238 {
9239 Assert(iSegReg < 6);
9240 switch (cbMem)
9241 {
9242 case 1:
9243 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU8));
9244 Assert(!fAlignMaskAndCtl);
9245 break;
9246 case 2:
9247 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU16));
9248 Assert(fAlignMaskAndCtl < 2);
9249 break;
9250 case 4:
9251 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU32));
9252 Assert(fAlignMaskAndCtl < 4);
9253 break;
9254 case 8:
9255 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU64));
9256 Assert(fAlignMaskAndCtl < 8);
9257 break;
9258 case 10:
9259 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemMapDataR80Wo
9260 || pfnFunction == (uintptr_t)iemNativeHlpMemMapDataD80Wo);
9261 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
9262 Assert(fAlignMaskAndCtl < 8);
9263 break;
9264 case 16:
9265 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU128));
9266 Assert(fAlignMaskAndCtl < 16);
9267 break;
9268# if 0
9269 case 32:
9270 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU256));
9271 Assert(fAlignMaskAndCtl < 32);
9272 break;
9273 case 64:
9274 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU512));
9275 Assert(fAlignMaskAndCtl < 64);
9276 break;
9277# endif
9278 default: AssertFailed(); break;
9279 }
9280 }
9281# undef IEM_MAP_HLP_FN
9282# undef IEM_MAP_HLP_FN_NO_AT
9283#endif
9284
9285#ifdef VBOX_STRICT
9286 /*
9287 * Check that the fExec flags we've got make sense.
9288 */
9289 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
9290#endif
9291
9292 /*
9293 * To keep things simple we have to commit any pending writes first as we
9294 * may end up making calls.
9295 */
9296 off = iemNativeRegFlushPendingWrites(pReNative, off);
9297
9298#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
9299 /*
9300 * Move/spill/flush stuff out of call-volatile registers.
9301 * This is the easy way out. We could contain this to the tlb-miss branch
9302 * by saving and restoring active stuff here.
9303 */
9304 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
9305 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
9306#endif
9307
9308 /* The bUnmapInfo variable will get a register in the tlb-hit code path,
9309 while the tlb-miss codepath will temporarily put it on the stack.
9310 Set the the type to stack here so we don't need to do it twice below. */
9311 iemNativeVarSetKindToStack(pReNative, idxVarUnmapInfo);
9312 uint8_t const idxRegUnmapInfo = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off);
9313 /** @todo use a tmp register from TlbState, since they'll be free after tlb
9314 * lookup is done. */
9315
9316 /*
9317 * Define labels and allocate the result register (trying for the return
9318 * register if we can).
9319 */
9320 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
9321 uint8_t const idxRegMemResult = !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
9322 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarMem, IEMNATIVE_CALL_RET_GREG, &off)
9323 : iemNativeVarRegisterAcquire(pReNative, idxVarMem, &off);
9324 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem);
9325 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
9326 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
9327 : UINT32_MAX;
9328
9329 /*
9330 * Jump to the TLB lookup code.
9331 */
9332 if (!TlbState.fSkip)
9333 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
9334
9335 /*
9336 * TlbMiss:
9337 *
9338 * Call helper to do the fetching.
9339 * We flush all guest register shadow copies here.
9340 */
9341 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
9342
9343#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
9344 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
9345#else
9346 RT_NOREF(idxInstr);
9347#endif
9348
9349#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
9350 /* Save variables in volatile registers. */
9351 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave() | RT_BIT_32(idxRegMemResult) | RT_BIT_32(idxRegUnmapInfo);
9352 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
9353#endif
9354
9355 /* IEMNATIVE_CALL_ARG2_GREG = GCPtrMem - load first as it is from a variable. */
9356 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarGCPtrMem, 0 /*cbAppend*/,
9357#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
9358 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
9359#else
9360 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
9361#endif
9362
9363 /* IEMNATIVE_CALL_ARG3_GREG = iSegReg */
9364 if (iSegReg != UINT8_MAX)
9365 {
9366 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
9367 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, iSegReg);
9368 }
9369
9370 /* IEMNATIVE_CALL_ARG1_GREG = &idxVarUnmapInfo; stackslot address, load any register with result after the call. */
9371 int32_t const offBpDispVarUnmapInfo = iemNativeStackCalcBpDisp(iemNativeVarGetStackSlot(pReNative, idxVarUnmapInfo));
9372 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, offBpDispVarUnmapInfo);
9373
9374 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
9375 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9376
9377 /* Done setting up parameters, make the call. */
9378 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
9379
9380 /*
9381 * Put the output in the right registers.
9382 */
9383 Assert(idxRegMemResult == pVarMem->idxReg);
9384 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
9385 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
9386
9387#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
9388 /* Restore variables and guest shadow registers to volatile registers. */
9389 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
9390 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
9391#endif
9392
9393 Assert(pVarUnmapInfo->idxReg == idxRegUnmapInfo);
9394 off = iemNativeEmitLoadGprByBpU8(pReNative, off, idxRegUnmapInfo, offBpDispVarUnmapInfo);
9395
9396#ifdef IEMNATIVE_WITH_TLB_LOOKUP
9397 if (!TlbState.fSkip)
9398 {
9399 /* end of tlbsmiss - Jump to the done label. */
9400 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
9401 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
9402
9403 /*
9404 * TlbLookup:
9405 */
9406 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMaskAndCtl, fAccess,
9407 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
9408# ifdef IEM_WITH_TLB_STATISTICS
9409 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, TlbState.idxReg1, TlbState.idxReg2,
9410 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForMapped));
9411# endif
9412
9413 /* [idxVarUnmapInfo] = 0; */
9414 off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegUnmapInfo, 0);
9415
9416 /*
9417 * TlbDone:
9418 */
9419 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
9420
9421 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
9422
9423# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
9424 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
9425 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
9426# endif
9427 }
9428#else
9429 RT_NOREF(fAccess, fAlignMaskAndCtl, idxLabelTlbMiss);
9430#endif
9431
9432 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
9433 iemNativeVarRegisterRelease(pReNative, idxVarMem);
9434
9435 return off;
9436}
9437
9438
9439#define IEM_MC_MEM_COMMIT_AND_UNMAP_ATOMIC(a_bMapInfo) \
9440 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_ATOMIC, \
9441 (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic, pCallEntry->idxInstr)
9442
9443#define IEM_MC_MEM_COMMIT_AND_UNMAP_RW(a_bMapInfo) \
9444 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_RW, \
9445 (uintptr_t)iemNativeHlpMemCommitAndUnmapRw, pCallEntry->idxInstr)
9446
9447#define IEM_MC_MEM_COMMIT_AND_UNMAP_WO(a_bMapInfo) \
9448 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_W, \
9449 (uintptr_t)iemNativeHlpMemCommitAndUnmapWo, pCallEntry->idxInstr)
9450
9451#define IEM_MC_MEM_COMMIT_AND_UNMAP_RO(a_bMapInfo) \
9452 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_R, \
9453 (uintptr_t)iemNativeHlpMemCommitAndUnmapRo, pCallEntry->idxInstr)
9454
9455DECL_INLINE_THROW(uint32_t)
9456iemNativeEmitMemCommitAndUnmap(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarUnmapInfo,
9457 uint32_t fAccess, uintptr_t pfnFunction, uint8_t idxInstr)
9458{
9459 /*
9460 * Assert sanity.
9461 */
9462 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
9463#if defined(VBOX_STRICT) || defined(RT_ARCH_AMD64)
9464 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
9465#endif
9466 Assert(pVarUnmapInfo->enmKind == kIemNativeVarKind_Stack);
9467 Assert( pVarUnmapInfo->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
9468 || pVarUnmapInfo->idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS); /* must be initialized */
9469#ifdef VBOX_STRICT
9470 switch (fAccess & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC))
9471 {
9472 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_ATOMIC:
9473 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic); break;
9474 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE:
9475 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRw); break;
9476 case IEM_ACCESS_TYPE_WRITE:
9477 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapWo); break;
9478 case IEM_ACCESS_TYPE_READ:
9479 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRo); break;
9480 default: AssertFailed();
9481 }
9482#else
9483 RT_NOREF(fAccess);
9484#endif
9485
9486 /*
9487 * To keep things simple we have to commit any pending writes first as we
9488 * may end up making calls (there shouldn't be any at this point, so this
9489 * is just for consistency).
9490 */
9491 /** @todo we could postpone this till we make the call and reload the
9492 * registers after returning from the call. Not sure if that's sensible or
9493 * not, though. */
9494 off = iemNativeRegFlushPendingWrites(pReNative, off);
9495
9496 /*
9497 * Move/spill/flush stuff out of call-volatile registers.
9498 *
9499 * We exclude any register holding the bUnmapInfo variable, as we'll be
9500 * checking it after returning from the call and will free it afterwards.
9501 */
9502 /** @todo save+restore active registers and maybe guest shadows in miss
9503 * scenario. */
9504 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */,
9505 RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)));
9506
9507 /*
9508 * If idxVarUnmapInfo is zero, we can skip all this. Otherwise we'll have
9509 * to call the unmap helper function.
9510 *
9511 * The likelyhood of it being zero is higher than for the TLB hit when doing
9512 * the mapping, as a TLB miss for an well aligned and unproblematic memory
9513 * access should also end up with a mapping that won't need special unmapping.
9514 */
9515 /** @todo Go over iemMemMapJmp and implement the no-unmap-needed case! That
9516 * should speed up things for the pure interpreter as well when TLBs
9517 * are enabled. */
9518#ifdef RT_ARCH_AMD64
9519 if (pVarUnmapInfo->idxReg == UINT8_MAX)
9520 {
9521 /* test byte [rbp - xxx], 0ffh */
9522 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
9523 pbCodeBuf[off++] = 0xf6;
9524 uint8_t const idxStackSlot = pVarUnmapInfo->idxStackSlot;
9525 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, 0, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
9526 pbCodeBuf[off++] = 0xff;
9527 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9528 }
9529 else
9530#endif
9531 {
9532 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off,
9533 true /*fInitialized*/, IEMNATIVE_CALL_ARG1_GREG /*idxRegPref*/);
9534 off = iemNativeEmitTestAnyBitsInGpr8(pReNative, off, idxVarReg, 0xff);
9535 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
9536 }
9537 uint32_t const offJmpFixup = off;
9538 off = iemNativeEmitJzToFixed(pReNative, off, off /* ASSUME jz rel8 suffices*/);
9539
9540 /*
9541 * Call the unmap helper function.
9542 */
9543#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING /** @todo This should be unnecessary, the mapping call will already have set it! */
9544 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
9545#else
9546 RT_NOREF(idxInstr);
9547#endif
9548
9549 /* IEMNATIVE_CALL_ARG1_GREG = idxVarUnmapInfo (first!) */
9550 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo,
9551 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
9552
9553 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
9554 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9555
9556 /* Done setting up parameters, make the call. */
9557 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
9558
9559 /* The bUnmapInfo variable is implictly free by these MCs. */
9560 iemNativeVarFreeLocal(pReNative, idxVarUnmapInfo);
9561
9562 /*
9563 * Done, just fixup the jump for the non-call case.
9564 */
9565 iemNativeFixupFixedJump(pReNative, offJmpFixup, off);
9566
9567 return off;
9568}
9569
9570
9571
9572/*********************************************************************************************************************************
9573* State and Exceptions *
9574*********************************************************************************************************************************/
9575
9576#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9577#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
9578
9579#define IEM_MC_PREPARE_SSE_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9580#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9581#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
9582
9583#define IEM_MC_PREPARE_AVX_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9584#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9585#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
9586
9587
9588DECL_INLINE_THROW(uint32_t) iemNativeEmitPrepareFpuForUse(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fForChange)
9589{
9590#ifndef IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS
9591 RT_NOREF(pReNative, fForChange);
9592#else
9593 if ( !(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SYNCED)
9594 && fForChange)
9595 {
9596# ifdef RT_ARCH_AMD64
9597
9598 /* Need to save the host MXCSR the first time, and clear the exception flags. */
9599 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED))
9600 {
9601 PIEMNATIVEINSTR pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
9602
9603 /* stmxcsr */
9604 if (IEMNATIVE_REG_FIXED_PVMCPU >= 8)
9605 pbCodeBuf[off++] = X86_OP_REX_B;
9606 pbCodeBuf[off++] = 0x0f;
9607 pbCodeBuf[off++] = 0xae;
9608 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, 3, IEMNATIVE_REG_FIXED_PVMCPU & 7);
9609 pbCodeBuf[off++] = RT_BYTE1(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9610 pbCodeBuf[off++] = RT_BYTE2(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9611 pbCodeBuf[off++] = RT_BYTE3(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9612 pbCodeBuf[off++] = RT_BYTE4(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9613 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9614
9615 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED;
9616 }
9617
9618 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
9619 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr, kIemNativeGstRegUse_ReadOnly);
9620
9621 /*
9622 * Mask any exceptions and clear the exception status and save into MXCSR,
9623 * taking a detour through memory here because ldmxcsr/stmxcsr don't support
9624 * a register source/target (sigh).
9625 */
9626 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, idxRegMxCsr);
9627 off = iemNativeEmitOrGpr32ByImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, X86_MXCSR_XCPT_MASK);
9628 off = iemNativeEmitAndGpr32ByImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, ~X86_MXCSR_XCPT_FLAGS);
9629 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9630
9631 PIEMNATIVEINSTR pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
9632
9633 /* ldmxcsr */
9634 if (IEMNATIVE_REG_FIXED_PVMCPU >= 8)
9635 pbCodeBuf[off++] = X86_OP_REX_B;
9636 pbCodeBuf[off++] = 0x0f;
9637 pbCodeBuf[off++] = 0xae;
9638 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, 2, IEMNATIVE_REG_FIXED_PVMCPU & 7);
9639 pbCodeBuf[off++] = RT_BYTE1(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9640 pbCodeBuf[off++] = RT_BYTE2(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9641 pbCodeBuf[off++] = RT_BYTE3(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9642 pbCodeBuf[off++] = RT_BYTE4(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9643 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9644
9645 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
9646 iemNativeRegFreeTmp(pReNative, idxRegTmp);
9647
9648# elif defined(RT_ARCH_ARM64)
9649 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
9650
9651 /* Need to save the host floating point control register the first time, clear FPSR. */
9652 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED))
9653 {
9654 PIEMNATIVEINSTR pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
9655 pu32CodeBuf[off++] = Armv8A64MkInstrMsr(ARMV8_A64_REG_XZR, ARMV8_AARCH64_SYSREG_FPSR);
9656 pu32CodeBuf[off++] = Armv8A64MkInstrMrs(idxRegTmp, ARMV8_AARCH64_SYSREG_FPCR);
9657 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9658 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED;
9659 }
9660
9661 /*
9662 * Translate MXCSR to FPCR.
9663 *
9664 * Unfortunately we can't emulate the exact behavior of MXCSR as we can't take
9665 * FEAT_AFP on arm64 for granted (My M2 Macbook doesn't has it). So we can't map
9666 * MXCSR.DAZ to FPCR.FIZ and MXCSR.FZ to FPCR.FZ with FPCR.AH being set.
9667 * We can only use FPCR.FZ which will flush inputs _and_ output de-normals to zero.
9668 */
9669 /** @todo Check the host supported flags (needs additional work to get the host features from CPUM)
9670 * and implement alternate handling if FEAT_AFP is present. */
9671 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr, kIemNativeGstRegUse_ReadOnly);
9672
9673 PIEMNATIVEINSTR pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
9674
9675 /* First make sure that there is nothing set for the upper 16-bits (X86_MXCSR_MM, which we don't emulate right now). */
9676 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegTmp, idxRegMxCsr);
9677
9678 /* If either MXCSR.FZ or MXCSR.DAZ is set FPCR.FZ will be set. */
9679 pu32CodeBuf[off++] = Armv8A64MkInstrUbfx(IEMNATIVE_REG_FIXED_TMP0, idxRegTmp, X86_MXCSR_DAZ_BIT, 1);
9680 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegTmp, idxRegTmp, X86_MXCSR_FZ_BIT);
9681 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(idxRegTmp, idxRegTmp, IEMNATIVE_REG_FIXED_TMP0);
9682 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegTmp, idxRegTmp, ARMV8_FPCR_FZ_BIT);
9683
9684 /*
9685 * Init the rounding mode, the layout differs between MXCSR.RM[14:13] and FPCR.RMode[23:22]:
9686 *
9687 * Value MXCSR FPCR
9688 * 0 RN RN
9689 * 1 R- R+
9690 * 2 R+ R-
9691 * 3 RZ RZ
9692 *
9693 * Conversion can be achieved by switching bit positions
9694 */
9695 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxRegMxCsr, X86_MXCSR_RC_SHIFT);
9696 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxRegTmp, IEMNATIVE_REG_FIXED_TMP0, 14, 1);
9697 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxRegMxCsr, X86_MXCSR_RC_SHIFT + 1);
9698 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxRegTmp, IEMNATIVE_REG_FIXED_TMP0, 13, 1);
9699
9700 /* Write the value to FPCR. */
9701 pu32CodeBuf[off++] = Armv8A64MkInstrMsr(idxRegTmp, ARMV8_AARCH64_SYSREG_FPCR);
9702
9703 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9704 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
9705 iemNativeRegFreeTmp(pReNative, idxRegTmp);
9706# else
9707# error "Port me"
9708# endif
9709 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SYNCED;
9710 }
9711#endif
9712 return off;
9713}
9714
9715
9716
9717/*********************************************************************************************************************************
9718* Emitters for FPU related operations. *
9719*********************************************************************************************************************************/
9720
9721#define IEM_MC_FETCH_FCW(a_u16Fcw) \
9722 off = iemNativeEmitFetchFpuFcw(pReNative, off, a_u16Fcw)
9723
9724/** Emits code for IEM_MC_FETCH_FCW. */
9725DECL_INLINE_THROW(uint32_t)
9726iemNativeEmitFetchFpuFcw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
9727{
9728 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9729 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
9730
9731 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9732
9733 /* Allocate a temporary FCW register. */
9734 /** @todo eliminate extra register */
9735 uint8_t const idxFcwReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFcw,
9736 kIemNativeGstRegUse_ReadOnly);
9737
9738 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFcwReg);
9739
9740 /* Free but don't flush the FCW register. */
9741 iemNativeRegFreeTmp(pReNative, idxFcwReg);
9742 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9743
9744 return off;
9745}
9746
9747
9748#define IEM_MC_FETCH_FSW(a_u16Fsw) \
9749 off = iemNativeEmitFetchFpuFsw(pReNative, off, a_u16Fsw)
9750
9751/** Emits code for IEM_MC_FETCH_FSW. */
9752DECL_INLINE_THROW(uint32_t)
9753iemNativeEmitFetchFpuFsw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
9754{
9755 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9756 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
9757
9758 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, false /*fInitialized*/);
9759 /* Allocate a temporary FSW register. */
9760 /** @todo eliminate extra register */
9761 uint8_t const idxFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw,
9762 kIemNativeGstRegUse_ReadOnly);
9763
9764 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFswReg);
9765
9766 /* Free but don't flush the FSW register. */
9767 iemNativeRegFreeTmp(pReNative, idxFswReg);
9768 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9769
9770 return off;
9771}
9772
9773
9774
9775#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
9776
9777
9778/*********************************************************************************************************************************
9779* Emitters for SSE/AVX specific operations. *
9780*********************************************************************************************************************************/
9781
9782#define IEM_MC_COPY_XREG_U128(a_iXRegDst, a_iXRegSrc) \
9783 off = iemNativeEmitSimdCopyXregU128(pReNative, off, a_iXRegDst, a_iXRegSrc)
9784
9785/** Emits code for IEM_MC_COPY_XREG_U128. */
9786DECL_INLINE_THROW(uint32_t)
9787iemNativeEmitSimdCopyXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXRegDst, uint8_t iXRegSrc)
9788{
9789 /* This is a nop if the source and destination register are the same. */
9790 if (iXRegDst != iXRegSrc)
9791 {
9792 /* Allocate destination and source register. */
9793 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegDst),
9794 kIemNativeGstSimdRegLdStSz_Low128,
9795 kIemNativeGstRegUse_ForFullWrite);
9796 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegSrc),
9797 kIemNativeGstSimdRegLdStSz_Low128,
9798 kIemNativeGstRegUse_ReadOnly);
9799
9800 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
9801
9802 /* Free but don't flush the source and destination register. */
9803 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9804 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9805 }
9806
9807 return off;
9808}
9809
9810
9811#define IEM_MC_FETCH_XREG_U128(a_u128Value, a_iXReg) \
9812 off = iemNativeEmitSimdFetchXregU128(pReNative, off, a_u128Value, a_iXReg)
9813
9814/** Emits code for IEM_MC_FETCH_XREG_U128. */
9815DECL_INLINE_THROW(uint32_t)
9816iemNativeEmitSimdFetchXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg)
9817{
9818 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9819 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
9820
9821 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9822 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
9823
9824 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
9825
9826 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxVarReg, idxSimdRegSrc);
9827
9828 /* Free but don't flush the source register. */
9829 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9830 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
9831
9832 return off;
9833}
9834
9835
9836#define IEM_MC_FETCH_XREG_U64(a_u64Value, a_iXReg, a_iQWord) \
9837 off = iemNativeEmitSimdFetchXregU64(pReNative, off, a_u64Value, a_iXReg, a_iQWord)
9838
9839#define IEM_MC_FETCH_XREG_R64(a_r64Value, a_iXReg, a_iQWord) \
9840 off = iemNativeEmitSimdFetchXregU64(pReNative, off, a_r64Value, a_iXReg, a_iQWord)
9841
9842/** Emits code for IEM_MC_FETCH_XREG_U64. */
9843DECL_INLINE_THROW(uint32_t)
9844iemNativeEmitSimdFetchXregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iQWord)
9845{
9846 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9847 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
9848
9849 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9850 kIemNativeGstSimdRegLdStSz_Low128,
9851 kIemNativeGstRegUse_ReadOnly);
9852
9853 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9854 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9855
9856 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
9857
9858 /* Free but don't flush the source register. */
9859 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9860 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9861
9862 return off;
9863}
9864
9865
9866#define IEM_MC_FETCH_XREG_U32(a_u32Value, a_iXReg, a_iDWord) \
9867 off = iemNativeEmitSimdFetchXregU32(pReNative, off, a_u32Value, a_iXReg, a_iDWord)
9868
9869#define IEM_MC_FETCH_XREG_R32(a_r32Value, a_iXReg, a_iDWord) \
9870 off = iemNativeEmitSimdFetchXregU32(pReNative, off, a_r32Value, a_iXReg, a_iDWord)
9871
9872/** Emits code for IEM_MC_FETCH_XREG_U32/IEM_MC_FETCH_XREG_R32. */
9873DECL_INLINE_THROW(uint32_t)
9874iemNativeEmitSimdFetchXregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iDWord)
9875{
9876 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9877 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
9878
9879 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9880 kIemNativeGstSimdRegLdStSz_Low128,
9881 kIemNativeGstRegUse_ReadOnly);
9882
9883 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9884 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9885
9886 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
9887
9888 /* Free but don't flush the source register. */
9889 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9890 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9891
9892 return off;
9893}
9894
9895
9896#define IEM_MC_FETCH_XREG_U16(a_u64Value, a_iXReg, a_iWord) \
9897 off = iemNativeEmitSimdFetchXregU16(pReNative, off, a_u64Value, a_iXReg, a_iWord)
9898
9899/** Emits code for IEM_MC_FETCH_XREG_U16. */
9900DECL_INLINE_THROW(uint32_t)
9901iemNativeEmitSimdFetchXregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iWord)
9902{
9903 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9904 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
9905
9906 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9907 kIemNativeGstSimdRegLdStSz_Low128,
9908 kIemNativeGstRegUse_ReadOnly);
9909
9910 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9911 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9912
9913 off = iemNativeEmitSimdLoadGprFromVecRegU16(pReNative, off, idxVarReg, idxSimdRegSrc, iWord);
9914
9915 /* Free but don't flush the source register. */
9916 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9917 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9918
9919 return off;
9920}
9921
9922
9923#define IEM_MC_FETCH_XREG_U8(a_u64Value, a_iXReg, a_iByte) \
9924 off = iemNativeEmitSimdFetchXregU8(pReNative, off, a_u64Value, a_iXReg, a_iByte)
9925
9926/** Emits code for IEM_MC_FETCH_XREG_U8. */
9927DECL_INLINE_THROW(uint32_t)
9928iemNativeEmitSimdFetchXregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iByte)
9929{
9930 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9931 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint8_t));
9932
9933 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9934 kIemNativeGstSimdRegLdStSz_Low128,
9935 kIemNativeGstRegUse_ReadOnly);
9936
9937 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9938 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9939
9940 off = iemNativeEmitSimdLoadGprFromVecRegU8(pReNative, off, idxVarReg, idxSimdRegSrc, iByte);
9941
9942 /* Free but don't flush the source register. */
9943 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9944 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9945
9946 return off;
9947}
9948
9949
9950#define IEM_MC_STORE_XREG_U128(a_iXReg, a_u128Value) \
9951 off = iemNativeEmitSimdStoreXregU128(pReNative, off, a_iXReg, a_u128Value)
9952
9953AssertCompileSize(X86XMMREG, sizeof(RTUINT128U));
9954#define IEM_MC_STORE_XREG_XMM(a_iXReg, a_XmmValue) \
9955 off = iemNativeEmitSimdStoreXregU128(pReNative, off, a_iXReg, a_XmmValue)
9956
9957
9958/** Emits code for IEM_MC_STORE_XREG_U128/IEM_MC_STORE_XREG_XMM. */
9959DECL_INLINE_THROW(uint32_t)
9960iemNativeEmitSimdStoreXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
9961{
9962 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9963 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
9964
9965 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9966 kIemNativeGstSimdRegLdStSz_Low128,
9967 kIemNativeGstRegUse_ForFullWrite);
9968 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitialized*/);
9969
9970 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
9971
9972 /* Free but don't flush the source register. */
9973 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9974 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9975
9976 return off;
9977}
9978
9979
9980#define IEM_MC_STORE_XREG_U64(a_iXReg, a_iQWord, a_u64Value) \
9981 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u64Value, sizeof(uint64_t), a_iQWord)
9982
9983#define IEM_MC_STORE_XREG_U32(a_iXReg, a_iDWord, a_u32Value) \
9984 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint32_t), a_iDWord)
9985
9986#define IEM_MC_STORE_XREG_U16(a_iXReg, a_iWord, a_u32Value) \
9987 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint16_t), a_iWord)
9988
9989#define IEM_MC_STORE_XREG_U8(a_iXReg, a_iByte, a_u32Value) \
9990 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint8_t), a_iByte)
9991
9992#define IEM_MC_STORE_XREG_R32(a_iXReg, a_r32Value) \
9993 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_r32Value, sizeof(RTFLOAT32U), 0 /*iElem*/)
9994
9995#define IEM_MC_STORE_XREG_R64(a_iXReg, a_r64Value) \
9996 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_r64Value, sizeof(RTFLOAT64U), 0 /*iElem*/)
9997
9998/** Emits code for IEM_MC_STORE_XREG_U64/IEM_MC_STORE_XREG_U32/IEM_MC_STORE_XREG_U16/IEM_MC_STORE_XREG_U8. */
9999DECL_INLINE_THROW(uint32_t)
10000iemNativeEmitSimdStoreXregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar,
10001 uint8_t cbLocal, uint8_t iElem)
10002{
10003 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10004 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbLocal);
10005
10006#ifdef VBOX_STRICT
10007 switch (cbLocal)
10008 {
10009 case sizeof(uint64_t): Assert(iElem < 2); break;
10010 case sizeof(uint32_t): Assert(iElem < 4); break;
10011 case sizeof(uint16_t): Assert(iElem < 8); break;
10012 case sizeof(uint8_t): Assert(iElem < 16); break;
10013 default: AssertFailed();
10014 }
10015#endif
10016
10017 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10018 kIemNativeGstSimdRegLdStSz_Low128,
10019 kIemNativeGstRegUse_ForUpdate);
10020 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
10021
10022 switch (cbLocal)
10023 {
10024 case sizeof(uint64_t): off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
10025 case sizeof(uint32_t): off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
10026 case sizeof(uint16_t): off = iemNativeEmitSimdStoreGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
10027 case sizeof(uint8_t): off = iemNativeEmitSimdStoreGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
10028 default: AssertFailed();
10029 }
10030
10031 /* Free but don't flush the source register. */
10032 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10033 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10034
10035 return off;
10036}
10037
10038
10039#define IEM_MC_STORE_XREG_U64_ZX_U128(a_iXReg, a_u64Value) \
10040 off = iemNativeEmitSimdStoreXregU64ZxU128(pReNative, off, a_iXReg, a_u64Value)
10041
10042/** Emits code for IEM_MC_STORE_XREG_U64_ZX_U128. */
10043DECL_INLINE_THROW(uint32_t)
10044iemNativeEmitSimdStoreXregU64ZxU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
10045{
10046 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10047 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
10048
10049 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10050 kIemNativeGstSimdRegLdStSz_Low128,
10051 kIemNativeGstRegUse_ForUpdate);
10052 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
10053
10054 /* Zero the vector register first, then store the 64-bit value to the lower 64-bit. */
10055 off = iemNativeEmitSimdZeroVecRegLowU128(pReNative, off, idxSimdRegDst);
10056 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0);
10057
10058 /* Free but don't flush the source register. */
10059 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10060 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10061
10062 return off;
10063}
10064
10065
10066#define IEM_MC_STORE_XREG_U32_ZX_U128(a_iXReg, a_u32Value) \
10067 off = iemNativeEmitSimdStoreXregU32ZxU128(pReNative, off, a_iXReg, a_u32Value)
10068
10069/** Emits code for IEM_MC_STORE_XREG_U32_ZX_U128. */
10070DECL_INLINE_THROW(uint32_t)
10071iemNativeEmitSimdStoreXregU32ZxU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
10072{
10073 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10074 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
10075
10076 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10077 kIemNativeGstSimdRegLdStSz_Low128,
10078 kIemNativeGstRegUse_ForUpdate);
10079 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
10080
10081 /* Zero the vector register first, then store the 32-bit value to the lowest 32-bit element. */
10082 off = iemNativeEmitSimdZeroVecRegLowU128(pReNative, off, idxSimdRegDst);
10083 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, 0);
10084
10085 /* Free but don't flush the source register. */
10086 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10087 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10088
10089 return off;
10090}
10091
10092
10093#define IEM_MC_STORE_XREG_U32_U128(a_iXReg, a_iDwDst, a_u128Value, a_iDwSrc) \
10094 off = iemNativeEmitSimdStoreXregU32U128(pReNative, off, a_iXReg, a_iDwDst, a_u128Value, a_iDwSrc)
10095
10096/** Emits code for IEM_MC_STORE_XREG_U32_U128. */
10097DECL_INLINE_THROW(uint32_t)
10098iemNativeEmitSimdStoreXregU32U128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t iDwDst,
10099 uint8_t idxSrcVar, uint8_t iDwSrc)
10100{
10101 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10102 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
10103
10104 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10105 kIemNativeGstSimdRegLdStSz_Low128,
10106 kIemNativeGstRegUse_ForUpdate);
10107 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitialized*/);
10108
10109 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, idxVarReg, iDwSrc);
10110 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, IEMNATIVE_REG_FIXED_TMP0, iDwDst);
10111
10112 /* Free but don't flush the destination register. */
10113 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10114 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10115
10116 return off;
10117}
10118
10119
10120#define IEM_MC_COPY_YREG_U128_ZX_VLMAX(a_iYRegDst, a_iYRegSrc) \
10121 off = iemNativeEmitSimdCopyYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrc)
10122
10123/** Emits code for IEM_MC_COPY_YREG_U128_ZX_VLMAX. */
10124DECL_INLINE_THROW(uint32_t)
10125iemNativeEmitSimdCopyYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrc)
10126{
10127 /*
10128 * The iYRegSrc == iYRegDst case needs to be treated differently here, because
10129 * if iYRegDst gets allocated first for the full write it won't load the
10130 * actual value from CPUMCTX. When allocating iYRegSrc afterwards it will get
10131 * duplicated from the already allocated host register for iYRegDst containing
10132 * garbage. This will be catched by the guest register value checking in debug
10133 * builds.
10134 */
10135 if (iYRegDst != iYRegSrc)
10136 {
10137 /* Allocate destination and source register. */
10138 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10139 kIemNativeGstSimdRegLdStSz_256,
10140 kIemNativeGstRegUse_ForFullWrite);
10141 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
10142 kIemNativeGstSimdRegLdStSz_Low128,
10143 kIemNativeGstRegUse_ReadOnly);
10144
10145 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
10146 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10147
10148 /* Free but don't flush the source and destination register. */
10149 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10150 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10151 }
10152 else
10153 {
10154 /* This effectively only clears the upper 128-bits of the register. */
10155 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10156 kIemNativeGstSimdRegLdStSz_High128,
10157 kIemNativeGstRegUse_ForFullWrite);
10158
10159 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
10160
10161 /* Free but don't flush the destination register. */
10162 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
10163 }
10164
10165 return off;
10166}
10167
10168
10169#define IEM_MC_COPY_YREG_U256_ZX_VLMAX(a_iYRegDst, a_iYRegSrc) \
10170 off = iemNativeEmitSimdCopyYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrc)
10171
10172/** Emits code for IEM_MC_COPY_YREG_U256_ZX_VLMAX. */
10173DECL_INLINE_THROW(uint32_t)
10174iemNativeEmitSimdCopyYregU256ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrc)
10175{
10176 /*
10177 * The iYRegSrc == iYRegDst case needs to be treated differently here, because
10178 * if iYRegDst gets allocated first for the full write it won't load the
10179 * actual value from CPUMCTX. When allocating iYRegSrc afterwards it will get
10180 * duplicated from the already allocated host register for iYRegDst containing
10181 * garbage. This will be catched by the guest register value checking in debug
10182 * builds. iYRegSrc == iYRegDst would effectively only clear any upper 256-bits
10183 * for a zmm register we don't support yet, so this is just a nop.
10184 */
10185 if (iYRegDst != iYRegSrc)
10186 {
10187 /* Allocate destination and source register. */
10188 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
10189 kIemNativeGstSimdRegLdStSz_256,
10190 kIemNativeGstRegUse_ReadOnly);
10191 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10192 kIemNativeGstSimdRegLdStSz_256,
10193 kIemNativeGstRegUse_ForFullWrite);
10194
10195 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
10196
10197 /* Free but don't flush the source and destination register. */
10198 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10199 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10200 }
10201
10202 return off;
10203}
10204
10205
10206#define IEM_MC_FETCH_YREG_U128(a_u128Dst, a_iYRegSrc, a_iDQWord) \
10207 off = iemNativeEmitSimdFetchYregU128(pReNative, off, a_u128Dst, a_iYRegSrc, a_iDQWord)
10208
10209/** Emits code for IEM_MC_FETCH_YREG_U128. */
10210DECL_INLINE_THROW(uint32_t)
10211iemNativeEmitSimdFetchYregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iDQWord)
10212{
10213 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10214 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
10215
10216 Assert(iDQWord <= 1);
10217 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10218 iDQWord == 1
10219 ? kIemNativeGstSimdRegLdStSz_High128
10220 : kIemNativeGstSimdRegLdStSz_Low128,
10221 kIemNativeGstRegUse_ReadOnly);
10222
10223 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10224 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
10225
10226 if (iDQWord == 1)
10227 off = iemNativeEmitSimdLoadVecRegLowU128FromVecRegHighU128(pReNative, off, idxVarReg, idxSimdRegSrc);
10228 else
10229 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxVarReg, idxSimdRegSrc);
10230
10231 /* Free but don't flush the source register. */
10232 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10233 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
10234
10235 return off;
10236}
10237
10238
10239#define IEM_MC_FETCH_YREG_U64(a_u64Dst, a_iYRegSrc, a_iQWord) \
10240 off = iemNativeEmitSimdFetchYregU64(pReNative, off, a_u64Dst, a_iYRegSrc, a_iQWord)
10241
10242/** Emits code for IEM_MC_FETCH_YREG_U64. */
10243DECL_INLINE_THROW(uint32_t)
10244iemNativeEmitSimdFetchYregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iQWord)
10245{
10246 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10247 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
10248
10249 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10250 iQWord >= 2
10251 ? kIemNativeGstSimdRegLdStSz_High128
10252 : kIemNativeGstSimdRegLdStSz_Low128,
10253 kIemNativeGstRegUse_ReadOnly);
10254
10255 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10256 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10257
10258 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
10259
10260 /* Free but don't flush the source register. */
10261 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10262 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10263
10264 return off;
10265}
10266
10267
10268#define IEM_MC_FETCH_YREG_U32(a_u32Dst, a_iYRegSrc) \
10269 off = iemNativeEmitSimdFetchYregU32(pReNative, off, a_u32Dst, a_iYRegSrc, 0)
10270
10271/** Emits code for IEM_MC_FETCH_YREG_U32. */
10272DECL_INLINE_THROW(uint32_t)
10273iemNativeEmitSimdFetchYregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iDWord)
10274{
10275 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10276 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
10277
10278 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10279 iDWord >= 4
10280 ? kIemNativeGstSimdRegLdStSz_High128
10281 : kIemNativeGstSimdRegLdStSz_Low128,
10282 kIemNativeGstRegUse_ReadOnly);
10283
10284 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10285 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10286
10287 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
10288
10289 /* Free but don't flush the source register. */
10290 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10291 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10292
10293 return off;
10294}
10295
10296
10297#define IEM_MC_CLEAR_YREG_128_UP(a_iYReg) \
10298 off = iemNativeEmitSimdClearYregHighU128(pReNative, off, a_iYReg)
10299
10300/** Emits code for IEM_MC_CLEAR_YREG_128_UP. */
10301DECL_INLINE_THROW(uint32_t)
10302iemNativeEmitSimdClearYregHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg)
10303{
10304 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10305 kIemNativeGstSimdRegLdStSz_High128,
10306 kIemNativeGstRegUse_ForFullWrite);
10307
10308 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
10309
10310 /* Free but don't flush the register. */
10311 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
10312
10313 return off;
10314}
10315
10316
10317#define IEM_MC_STORE_YREG_U128(a_iYRegDst, a_iDQword, a_u128Value) \
10318 off = iemNativeEmitSimdStoreYregU128(pReNative, off, a_iYRegDst, a_iDQword, a_u128Value)
10319
10320/** Emits code for IEM_MC_STORE_YREG_U128. */
10321DECL_INLINE_THROW(uint32_t)
10322iemNativeEmitSimdStoreYregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t iDQword, uint8_t idxSrcVar)
10323{
10324 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10325 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
10326
10327 Assert(iDQword <= 1);
10328 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10329 iDQword == 0
10330 ? kIemNativeGstSimdRegLdStSz_Low128
10331 : kIemNativeGstSimdRegLdStSz_High128,
10332 kIemNativeGstRegUse_ForFullWrite);
10333
10334 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
10335
10336 if (iDQword == 0)
10337 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
10338 else
10339 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128(pReNative, off, idxSimdRegDst, idxVarReg);
10340
10341 /* Free but don't flush the source register. */
10342 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10343 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10344
10345 return off;
10346}
10347
10348
10349#define IEM_MC_STORE_YREG_U128_ZX_VLMAX(a_iYRegDst, a_u128Src) \
10350 off = iemNativeEmitSimdStoreYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_u128Src)
10351
10352/** Emits code for IEM_MC_STORE_YREG_U128_ZX_VLMAX. */
10353DECL_INLINE_THROW(uint32_t)
10354iemNativeEmitSimdStoreYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10355{
10356 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10357 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
10358
10359 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10360 kIemNativeGstSimdRegLdStSz_256,
10361 kIemNativeGstRegUse_ForFullWrite);
10362
10363 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
10364
10365 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
10366 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10367
10368 /* Free but don't flush the source register. */
10369 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10370 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10371
10372 return off;
10373}
10374
10375
10376#define IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX(a_iXRegDst, a_u8Src) \
10377 off = iemNativeEmitSimdBroadcastXregU8ZxVlmax(pReNative, off, a_iXRegDst, a_u8Src)
10378
10379/** Emits code for IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX. */
10380DECL_INLINE_THROW(uint32_t)
10381iemNativeEmitSimdBroadcastXregU8ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
10382{
10383 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10384 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint8_t));
10385
10386 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10387 kIemNativeGstSimdRegLdStSz_256,
10388 kIemNativeGstRegUse_ForFullWrite);
10389
10390 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10391
10392 off = iemNativeEmitSimdBroadcastGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
10393 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10394
10395 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10396 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10397
10398 return off;
10399}
10400
10401
10402#define IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX(a_iXRegDst, a_u16Src) \
10403 off = iemNativeEmitSimdBroadcastXregU16ZxVlmax(pReNative, off, a_iXRegDst, a_u16Src)
10404
10405/** Emits code for IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX. */
10406DECL_INLINE_THROW(uint32_t)
10407iemNativeEmitSimdBroadcastXregU16ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
10408{
10409 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10410 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint16_t));
10411
10412 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10413 kIemNativeGstSimdRegLdStSz_256,
10414 kIemNativeGstRegUse_ForFullWrite);
10415
10416 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10417
10418 off = iemNativeEmitSimdBroadcastGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
10419 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10420
10421 /* Free but don't flush the source register. */
10422 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10423 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10424
10425 return off;
10426}
10427
10428
10429#define IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX(a_iXRegDst, a_u32Src) \
10430 off = iemNativeEmitSimdBroadcastXregU32ZxVlmax(pReNative, off, a_iXRegDst, a_u32Src)
10431
10432/** Emits code for IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX. */
10433DECL_INLINE_THROW(uint32_t)
10434iemNativeEmitSimdBroadcastXregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
10435{
10436 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10437 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
10438
10439 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10440 kIemNativeGstSimdRegLdStSz_256,
10441 kIemNativeGstRegUse_ForFullWrite);
10442
10443 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10444
10445 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
10446 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10447
10448 /* Free but don't flush the source register. */
10449 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10450 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10451
10452 return off;
10453}
10454
10455
10456#define IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX(a_iXRegDst, a_u64Src) \
10457 off = iemNativeEmitSimdBroadcastXregU64ZxVlmax(pReNative, off, a_iXRegDst, a_u64Src)
10458
10459/** Emits code for IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX. */
10460DECL_INLINE_THROW(uint32_t)
10461iemNativeEmitSimdBroadcastXregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
10462{
10463 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10464 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10465
10466 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10467 kIemNativeGstSimdRegLdStSz_256,
10468 kIemNativeGstRegUse_ForFullWrite);
10469
10470 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10471
10472 off = iemNativeEmitSimdBroadcastGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
10473 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10474
10475 /* Free but don't flush the source register. */
10476 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10477 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10478
10479 return off;
10480}
10481
10482
10483#define IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX(a_iYRegDst, a_u8Src) \
10484 off = iemNativeEmitSimdBroadcastYregU8ZxVlmax(pReNative, off, a_iYRegDst, a_u8Src)
10485
10486/** Emits code for IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX. */
10487DECL_INLINE_THROW(uint32_t)
10488iemNativeEmitSimdBroadcastYregU8ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10489{
10490 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10491 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint8_t));
10492
10493 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10494 kIemNativeGstSimdRegLdStSz_256,
10495 kIemNativeGstRegUse_ForFullWrite);
10496
10497 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10498
10499 off = iemNativeEmitSimdBroadcastGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
10500
10501 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10502 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10503
10504 return off;
10505}
10506
10507
10508#define IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX(a_iYRegDst, a_u16Src) \
10509 off = iemNativeEmitSimdBroadcastYregU16ZxVlmax(pReNative, off, a_iYRegDst, a_u16Src)
10510
10511/** Emits code for IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX. */
10512DECL_INLINE_THROW(uint32_t)
10513iemNativeEmitSimdBroadcastYregU16ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10514{
10515 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10516 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint16_t));
10517
10518 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10519 kIemNativeGstSimdRegLdStSz_256,
10520 kIemNativeGstRegUse_ForFullWrite);
10521
10522 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10523
10524 off = iemNativeEmitSimdBroadcastGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
10525
10526 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10527 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10528
10529 return off;
10530}
10531
10532
10533#define IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX(a_iYRegDst, a_u32Src) \
10534 off = iemNativeEmitSimdBroadcastYregU32ZxVlmax(pReNative, off, a_iYRegDst, a_u32Src)
10535
10536/** Emits code for IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX. */
10537DECL_INLINE_THROW(uint32_t)
10538iemNativeEmitSimdBroadcastYregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10539{
10540 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10541 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
10542
10543 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10544 kIemNativeGstSimdRegLdStSz_256,
10545 kIemNativeGstRegUse_ForFullWrite);
10546
10547 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10548
10549 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
10550
10551 /* Free but don't flush the source register. */
10552 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10553 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10554
10555 return off;
10556}
10557
10558
10559#define IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX(a_iYRegDst, a_u64Src) \
10560 off = iemNativeEmitSimdBroadcastYregU64ZxVlmax(pReNative, off, a_iYRegDst, a_u64Src)
10561
10562/** Emits code for IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX. */
10563DECL_INLINE_THROW(uint32_t)
10564iemNativeEmitSimdBroadcastYregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10565{
10566 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10567 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10568
10569 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10570 kIemNativeGstSimdRegLdStSz_256,
10571 kIemNativeGstRegUse_ForFullWrite);
10572
10573 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10574
10575 off = iemNativeEmitSimdBroadcastGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
10576
10577 /* Free but don't flush the source register. */
10578 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10579 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10580
10581 return off;
10582}
10583
10584
10585#define IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX(a_iYRegDst, a_u128Src) \
10586 off = iemNativeEmitSimdBroadcastYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_u128Src)
10587
10588/** Emits code for IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX. */
10589DECL_INLINE_THROW(uint32_t)
10590iemNativeEmitSimdBroadcastYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10591{
10592 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10593 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
10594
10595 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10596 kIemNativeGstSimdRegLdStSz_256,
10597 kIemNativeGstRegUse_ForFullWrite);
10598
10599 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
10600
10601 off = iemNativeEmitSimdBroadcastVecRegU128ToVecReg(pReNative, off, idxSimdRegDst, idxVarReg);
10602
10603 /* Free but don't flush the source register. */
10604 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10605 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10606
10607 return off;
10608}
10609
10610
10611#define IEM_MC_STORE_YREG_U32_ZX_VLMAX(a_iYRegDst, a_u32Src) \
10612 off = iemNativeEmitSimdStoreYregU32ZxVlmax(pReNative, off, a_iYRegDst, a_u32Src)
10613
10614/** Emits code for IEM_MC_STORE_YREG_U32_ZX_VLMAX. */
10615DECL_INLINE_THROW(uint32_t)
10616iemNativeEmitSimdStoreYregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10617{
10618 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10619 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
10620
10621 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10622 kIemNativeGstSimdRegLdStSz_256,
10623 kIemNativeGstRegUse_ForFullWrite);
10624
10625 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10626
10627 off = iemNativeEmitSimdZeroVecRegU256(pReNative, off, idxSimdRegDst);
10628 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iDWord*/);
10629
10630 /* Free but don't flush the source register. */
10631 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10632 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10633
10634 return off;
10635}
10636
10637
10638#define IEM_MC_STORE_YREG_U64_ZX_VLMAX(a_iYRegDst, a_u64Src) \
10639 off = iemNativeEmitSimdStoreYregU64ZxVlmax(pReNative, off, a_iYRegDst, a_u64Src)
10640
10641/** Emits code for IEM_MC_STORE_YREG_U64_ZX_VLMAX. */
10642DECL_INLINE_THROW(uint32_t)
10643iemNativeEmitSimdStoreYregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10644{
10645 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10646 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10647
10648 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10649 kIemNativeGstSimdRegLdStSz_256,
10650 kIemNativeGstRegUse_ForFullWrite);
10651
10652 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10653
10654 off = iemNativeEmitSimdZeroVecRegU256(pReNative, off, idxSimdRegDst);
10655 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iQWord*/);
10656
10657 /* Free but don't flush the source register. */
10658 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10659 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10660
10661 return off;
10662}
10663
10664
10665#define IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX(a_iYRegDst, a_u64Local, a_iYRegSrcHx) \
10666 off = iemNativeEmitSimdMergeYregU64LocalU64HiZxVlmax(pReNative, off, a_iYRegDst, a_u64Local, a_iYRegSrcHx)
10667
10668/** Emits code for IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX. */
10669DECL_INLINE_THROW(uint32_t)
10670iemNativeEmitSimdMergeYregU64LocalU64HiZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar, uint8_t iYRegSrcHx)
10671{
10672 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10673 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10674
10675 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10676 kIemNativeGstSimdRegLdStSz_256,
10677 kIemNativeGstRegUse_ForFullWrite);
10678 uint8_t const idxSimdRegSrcHx = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrcHx),
10679 kIemNativeGstSimdRegLdStSz_Low128,
10680 kIemNativeGstRegUse_ReadOnly);
10681 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10682
10683 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrcHx);
10684 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iQWord*/);
10685 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10686
10687 /* Free but don't flush the source and destination registers. */
10688 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrcHx);
10689 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10690 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10691
10692 return off;
10693}
10694
10695
10696#define IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX(a_iYRegDst, a_iYRegSrcHx, a_u64Local) \
10697 off = iemNativeEmitSimdMergeYregU64LoU64LocalZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrcHx, a_u64Local)
10698
10699/** Emits code for IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX. */
10700DECL_INLINE_THROW(uint32_t)
10701iemNativeEmitSimdMergeYregU64LoU64LocalZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrcHx, uint8_t idxSrcVar)
10702{
10703 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10704 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10705
10706 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10707 kIemNativeGstSimdRegLdStSz_256,
10708 kIemNativeGstRegUse_ForFullWrite);
10709 uint8_t const idxSimdRegSrcHx = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrcHx),
10710 kIemNativeGstSimdRegLdStSz_Low128,
10711 kIemNativeGstRegUse_ReadOnly);
10712 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10713
10714 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrcHx);
10715 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 1 /*iQWord*/);
10716 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10717
10718 /* Free but don't flush the source and destination registers. */
10719 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrcHx);
10720 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10721 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10722
10723 return off;
10724}
10725
10726
10727#define IEM_MC_CLEAR_XREG_U32_MASK(a_iXReg, a_bMask) \
10728 off = iemNativeEmitSimdClearXregU32Mask(pReNative, off, a_iXReg, a_bMask)
10729
10730
10731/** Emits code for IEM_MC_CLEAR_XREG_U32_MASK. */
10732DECL_INLINE_THROW(uint32_t)
10733iemNativeEmitSimdClearXregU32Mask(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t bImm8Mask)
10734{
10735 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10736 kIemNativeGstSimdRegLdStSz_Low128,
10737 kIemNativeGstRegUse_ForUpdate);
10738
10739 /** @todo r=aeichner For certain bit combinations we could reduce the number of emitted instructions. */
10740 if (bImm8Mask & RT_BIT(0))
10741 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 0 /*iDWord*/);
10742 if (bImm8Mask & RT_BIT(1))
10743 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 1 /*iDWord*/);
10744 if (bImm8Mask & RT_BIT(2))
10745 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 2 /*iDWord*/);
10746 if (bImm8Mask & RT_BIT(3))
10747 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 3 /*iDWord*/);
10748
10749 /* Free but don't flush the destination register. */
10750 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10751
10752 return off;
10753}
10754
10755
10756#define IEM_MC_FETCH_YREG_U256(a_u256Dst, a_iYRegSrc) \
10757 off = iemNativeEmitSimdFetchYregU256(pReNative, off, a_u256Dst, a_iYRegSrc)
10758
10759#define IEM_MC_FETCH_YREG_YMM(a_uYmmDst, a_iYRegSrc) \
10760 off = iemNativeEmitSimdFetchYregU256(pReNative, off, a_uYmmDst, a_iYRegSrc)
10761
10762/** Emits code for IEM_MC_FETCH_YREG_U256/IEM_MC_FETCH_YREG_YMM. */
10763DECL_INLINE_THROW(uint32_t)
10764iemNativeEmitSimdFetchYregU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYRegSrc)
10765{
10766 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10767 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT256U));
10768
10769 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
10770 kIemNativeGstSimdRegLdStSz_256,
10771 kIemNativeGstRegUse_ReadOnly);
10772 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
10773
10774 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxVarReg, idxSimdRegSrc);
10775
10776 /* Free but don't flush the source register. */
10777 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10778 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
10779
10780 return off;
10781}
10782
10783
10784#define IEM_MC_STORE_YREG_U256_ZX_VLMAX(a_iYRegDst, a_u256Src) \
10785 off = iemNativeEmitSimdStoreYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_u256Src)
10786
10787#define IEM_MC_STORE_YREG_YMM_ZX_VLMAX(a_iYRegDst, a_uYmmSrc) \
10788 off = iemNativeEmitSimdStoreYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_uYmmSrc)
10789
10790/** Emits code for IEM_MC_STORE_YREG_U256_ZX_VLMAX/IEM_MC_STORE_YREG_YMM_ZX_VLMAX. */
10791DECL_INLINE_THROW(uint32_t)
10792iemNativeEmitSimdStoreYregU256ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar)
10793{
10794 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10795 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
10796
10797 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10798 kIemNativeGstSimdRegLdStSz_256,
10799 kIemNativeGstRegUse_ForFullWrite);
10800 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
10801
10802 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxSimdRegDst, idxVarRegSrc);
10803
10804 /* Free but don't flush the source register. */
10805 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10806 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10807
10808 return off;
10809}
10810
10811
10812#define IEM_MC_STORE_YREG_U32_U256(a_iYRegDst, a_iDwDst, a_u256Value, a_iDwSrc) \
10813 off = iemNativeEmitSimdStoreYregU32FromU256(pReNative, off, a_iYRegDst, a_iDwDst, a_u256Value, a_iDwSrc)
10814
10815
10816/** Emits code for IEM_MC_STORE_YREG_U32_U256. */
10817DECL_INLINE_THROW(uint32_t)
10818iemNativeEmitSimdStoreYregU32FromU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iDwDst,
10819 uint8_t idxSrcVar, uint8_t iDwSrc)
10820{
10821 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10822 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
10823
10824 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10825 iDwDst < 4
10826 ? kIemNativeGstSimdRegLdStSz_Low128
10827 : kIemNativeGstSimdRegLdStSz_High128,
10828 kIemNativeGstRegUse_ForUpdate);
10829 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
10830 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
10831
10832 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxRegTmp, idxVarRegSrc, iDwSrc);
10833 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxRegTmp, iDwDst);
10834
10835 /* Free but don't flush the source register. */
10836 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10837 iemNativeRegFreeTmp(pReNative, idxRegTmp);
10838 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10839
10840 return off;
10841}
10842
10843
10844#define IEM_MC_STORE_YREG_U64_U256(a_iYRegDst, a_iQwDst, a_u256Value, a_iQwSrc) \
10845 off = iemNativeEmitSimdStoreYregU64FromU256(pReNative, off, a_iYRegDst, a_iQwDst, a_u256Value, a_iQwSrc)
10846
10847
10848/** Emits code for IEM_MC_STORE_YREG_U64_U256. */
10849DECL_INLINE_THROW(uint32_t)
10850iemNativeEmitSimdStoreYregU64FromU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iQwDst,
10851 uint8_t idxSrcVar, uint8_t iQwSrc)
10852{
10853 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10854 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
10855
10856 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10857 iQwDst < 2
10858 ? kIemNativeGstSimdRegLdStSz_Low128
10859 : kIemNativeGstSimdRegLdStSz_High128,
10860 kIemNativeGstRegUse_ForUpdate);
10861 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
10862 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
10863
10864 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxRegTmp, idxVarRegSrc, iQwSrc);
10865 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxRegTmp, iQwDst);
10866
10867 /* Free but don't flush the source register. */
10868 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10869 iemNativeRegFreeTmp(pReNative, idxRegTmp);
10870 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10871
10872 return off;
10873}
10874
10875
10876#define IEM_MC_STORE_YREG_U64(a_iYRegDst, a_iQword, a_u64Value) \
10877 off = iemNativeEmitSimdStoreYregU64(pReNative, off, a_iYRegDst, a_iQword, a_u64Value)
10878
10879
10880/** Emits code for IEM_MC_STORE_YREG_U64. */
10881DECL_INLINE_THROW(uint32_t)
10882iemNativeEmitSimdStoreYregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iQwDst, uint8_t idxSrcVar)
10883{
10884 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10885 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10886
10887 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10888 iQwDst < 2
10889 ? kIemNativeGstSimdRegLdStSz_Low128
10890 : kIemNativeGstSimdRegLdStSz_High128,
10891 kIemNativeGstRegUse_ForUpdate);
10892
10893 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10894
10895 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, iQwDst);
10896
10897 /* Free but don't flush the source register. */
10898 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10899 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10900
10901 return off;
10902}
10903
10904
10905#define IEM_MC_CLEAR_ZREG_256_UP(a_iYReg) \
10906 off = iemNativeEmitSimdClearZregU256Vlmax(pReNative, off, a_iYReg)
10907
10908/** Emits code for IEM_MC_CLEAR_ZREG_256_UP. */
10909DECL_INLINE_THROW(uint32_t)
10910iemNativeEmitSimdClearZregU256Vlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg)
10911{
10912 RT_NOREF(pReNative, iYReg);
10913 /** @todo Needs to be implemented when support for AVX-512 is added. */
10914 return off;
10915}
10916
10917
10918
10919/*********************************************************************************************************************************
10920* Emitters for IEM_MC_CALL_SSE_AIMPL_XXX *
10921*********************************************************************************************************************************/
10922
10923/**
10924 * Common worker for IEM_MC_CALL_SSE_AIMPL_XXX/IEM_MC_CALL_AVX_AIMPL_XXX.
10925 */
10926DECL_INLINE_THROW(uint32_t)
10927iemNativeEmitCallSseAvxAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t cArgs, uint8_t idxInstr)
10928{
10929 /* Grab the MXCSR register, it must not be call volatile or we end up freeing it when setting up the call below. */
10930 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr,
10931 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
10932 AssertRelease(!(RT_BIT_32(idxRegMxCsr) & IEMNATIVE_CALL_VOLATILE_GREG_MASK));
10933
10934#if 0 /* This is not required right now as the called helper will set up the SSE/AVX state if it is an assembly one. */
10935 /*
10936 * Need to do the FPU preparation.
10937 */
10938 off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/);
10939#endif
10940
10941 /*
10942 * Do all the call setup and cleanup.
10943 */
10944 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_SSE_AIMPL_HIDDEN_ARGS, IEM_SSE_AIMPL_HIDDEN_ARGS,
10945 false /*fFlushPendingWrites*/);
10946
10947 /*
10948 * Load the MXCSR register into the first argument and mask out the current exception flags.
10949 */
10950 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, idxRegMxCsr);
10951 off = iemNativeEmitAndGpr32ByImm(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, ~X86_MXCSR_XCPT_FLAGS);
10952
10953 /*
10954 * Make the call.
10955 */
10956 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
10957
10958 /*
10959 * The updated MXCSR is in the return register, update exception status flags.
10960 *
10961 * The return register is marked allocated as a temporary because it is required for the
10962 * exception generation check below.
10963 */
10964 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG)));
10965 uint8_t const idxRegTmp = iemNativeRegMarkAllocated(pReNative, IEMNATIVE_CALL_RET_GREG, kIemNativeWhat_Tmp);
10966 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, idxRegMxCsr, idxRegTmp);
10967
10968#ifndef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
10969 /* Writeback the MXCSR register value (there is no delayed writeback for such registers at the moment). */
10970 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxRegMxCsr, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.XState.x87.MXCSR));
10971#endif
10972
10973 /*
10974 * Make sure we don't have any outstanding guest register writes as we may
10975 * raise an \#UD or \#XF and all guest register must be up to date in CPUMCTX.
10976 */
10977 off = iemNativeRegFlushPendingWrites(pReNative, off);
10978
10979#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
10980 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
10981#else
10982 RT_NOREF(idxInstr);
10983#endif
10984
10985 /** @todo r=aeichner ANDN from BMI1 would save us a temporary and additional instruction here but I don't
10986 * want to assume the existence for this instruction at the moment. */
10987 uint8_t const idxRegTmp2 = iemNativeRegAllocTmp(pReNative, &off);
10988
10989 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegTmp2, idxRegTmp);
10990 /* tmp &= X86_MXCSR_XCPT_MASK */
10991 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK);
10992 /* tmp >>= X86_MXCSR_XCPT_MASK_SHIFT */
10993 off = iemNativeEmitShiftGprRight(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK_SHIFT);
10994 /* tmp = ~tmp */
10995 off = iemNativeEmitInvBitsGpr(pReNative, off, idxRegTmp, idxRegTmp, false /*f64Bit*/);
10996 /* tmp &= mxcsr */
10997 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxRegTmp, idxRegTmp2);
10998 off = iemNativeEmitTestAnyBitsInGprAndTbExitIfAnySet(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_FLAGS,
10999 kIemNativeLabelType_RaiseSseAvxFpRelated);
11000
11001 iemNativeRegFreeTmp(pReNative, idxRegTmp2);
11002 iemNativeRegFreeTmp(pReNative, idxRegTmp);
11003 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
11004
11005 return off;
11006}
11007
11008
11009#define IEM_MC_CALL_SSE_AIMPL_2(a_pfnAImpl, a0, a1) \
11010 off = iemNativeEmitCallSseAImpl2(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1))
11011
11012/** Emits code for IEM_MC_CALL_SSE_AIMPL_2. */
11013DECL_INLINE_THROW(uint32_t)
11014iemNativeEmitCallSseAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
11015{
11016 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_SSE_AIMPL_HIDDEN_ARGS);
11017 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_SSE_AIMPL_HIDDEN_ARGS);
11018 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 2, idxInstr);
11019}
11020
11021
11022#define IEM_MC_CALL_SSE_AIMPL_3(a_pfnAImpl, a0, a1, a2) \
11023 off = iemNativeEmitCallSseAImpl3(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1), (a2))
11024
11025/** Emits code for IEM_MC_CALL_SSE_AIMPL_3. */
11026DECL_INLINE_THROW(uint32_t)
11027iemNativeEmitCallSseAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl,
11028 uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
11029{
11030 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_SSE_AIMPL_HIDDEN_ARGS);
11031 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_SSE_AIMPL_HIDDEN_ARGS);
11032 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_SSE_AIMPL_HIDDEN_ARGS);
11033 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 3, idxInstr);
11034}
11035
11036
11037/*********************************************************************************************************************************
11038* Emitters for IEM_MC_CALL_AVX_AIMPL_XXX *
11039*********************************************************************************************************************************/
11040
11041#define IEM_MC_CALL_AVX_AIMPL_2(a_pfnAImpl, a0, a1) \
11042 off = iemNativeEmitCallAvxAImpl2(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1))
11043
11044/** Emits code for IEM_MC_CALL_AVX_AIMPL_2. */
11045DECL_INLINE_THROW(uint32_t)
11046iemNativeEmitCallAvxAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
11047{
11048 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_AVX_AIMPL_HIDDEN_ARGS);
11049 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_AVX_AIMPL_HIDDEN_ARGS);
11050 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 2, idxInstr);
11051}
11052
11053
11054#define IEM_MC_CALL_AVX_AIMPL_3(a_pfnAImpl, a0, a1, a2) \
11055 off = iemNativeEmitCallAvxAImpl3(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1), (a2))
11056
11057/** Emits code for IEM_MC_CALL_AVX_AIMPL_3. */
11058DECL_INLINE_THROW(uint32_t)
11059iemNativeEmitCallAvxAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl,
11060 uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
11061{
11062 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_AVX_AIMPL_HIDDEN_ARGS);
11063 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_AVX_AIMPL_HIDDEN_ARGS);
11064 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_AVX_AIMPL_HIDDEN_ARGS);
11065 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 3, idxInstr);
11066}
11067
11068
11069#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
11070
11071
11072/*********************************************************************************************************************************
11073* Include instruction emitters. *
11074*********************************************************************************************************************************/
11075#include "target-x86/IEMAllN8veEmit-x86.h"
11076
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette