VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompFuncs.h@ 106090

Last change on this file since 106090 was 106090, checked in by vboxsync, 2 months ago

VMM/IEM: More liveness work for delayed eflags updating. bugref:10720 bugref:10372

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 540.8 KB
Line 
1/* $Id: IEMAllN8veRecompFuncs.h 106090 2024-09-19 09:13:54Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler - Inlined Bits.
4 */
5
6/*
7 * Copyright (C) 2023-2024 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
33#define IEM_WITH_OPAQUE_DECODER_STATE
34#define VMCPU_INCL_CPUM_GST_CTX
35#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
36#define IEMNATIVE_INCL_TABLE_FUNCTION_PROTOTYPES
37#include <VBox/vmm/iem.h>
38#include <VBox/vmm/cpum.h>
39#include <VBox/vmm/dbgf.h>
40#include "IEMInternal.h"
41#include <VBox/vmm/vmcc.h>
42#include <VBox/log.h>
43#include <VBox/err.h>
44#include <VBox/dis.h>
45#include <VBox/param.h>
46#include <iprt/assert.h>
47#include <iprt/heap.h>
48#include <iprt/mem.h>
49#include <iprt/string.h>
50#if defined(RT_ARCH_AMD64)
51# include <iprt/x86.h>
52#elif defined(RT_ARCH_ARM64)
53# include <iprt/armv8.h>
54#endif
55
56#include "IEMInline.h"
57#include "IEMThreadedFunctions.h"
58#include "IEMN8veRecompiler.h"
59#include "IEMN8veRecompilerEmit.h"
60#include "IEMN8veRecompilerTlbLookup.h"
61#include "IEMNativeFunctions.h"
62
63
64/*
65 * Narrow down configs here to avoid wasting time on unused configs here.
66 * Note! Same checks in IEMAllThrdRecompiler.cpp.
67 */
68
69#ifndef IEM_WITH_CODE_TLB
70# error The code TLB must be enabled for the recompiler.
71#endif
72
73#ifndef IEM_WITH_DATA_TLB
74# error The data TLB must be enabled for the recompiler.
75#endif
76
77#ifndef IEM_WITH_SETJMP
78# error The setjmp approach must be enabled for the recompiler.
79#endif
80
81#if defined(IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS) && !defined(IEMNATIVE_WITH_SIMD_REG_ALLOCATOR)
82# error "IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS requires IEMNATIVE_WITH_SIMD_REG_ALLOCATOR"
83#endif
84
85
86/*********************************************************************************************************************************
87* Code emitters for flushing pending guest register writes and sanity checks *
88*********************************************************************************************************************************/
89
90#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
91
92# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
93/**
94 * Updates IEMCPU::uPcUpdatingDebug.
95 */
96DECL_INLINE_THROW(uint32_t) iemNativeEmitPcDebugAdd(PIEMRECOMPILERSTATE pReNative, uint32_t off, int64_t offDisp, uint8_t cBits)
97{
98# ifdef RT_ARCH_AMD64
99 if (pReNative->Core.fDebugPcInitialized && cBits >= 32)
100 {
101 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
102 if ((int32_t)offDisp == offDisp || cBits != 64)
103 {
104 /* add [q]word [pVCpu->iem.s.uPcUpdatingDebug], imm32/imm8 */
105 if (cBits == 64)
106 pCodeBuf[off++] = X86_OP_REX_W;
107 pCodeBuf[off++] = (int8_t)offDisp == offDisp ? 0x83 : 0x81;
108 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
109 if ((int8_t)offDisp == offDisp)
110 pCodeBuf[off++] = (int8_t)offDisp;
111 else
112 {
113 *(int32_t *)&pCodeBuf[off] = (int32_t)offDisp;
114 off += sizeof(int32_t);
115 }
116 }
117 else
118 {
119 /* mov tmp0, imm64 */
120 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, IEMNATIVE_REG_FIXED_TMP0, offDisp);
121
122 /* add [pVCpu->iem.s.uPcUpdatingDebug], tmp0 */
123 if (cBits == 64)
124 pCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 >= 8 ? X86_OP_REX_R : 0);
125 else if (IEMNATIVE_REG_FIXED_TMP0 >= 8)
126 pCodeBuf[off++] = X86_OP_REX_R;
127 pCodeBuf[off++] = 0x01;
128 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, IEMNATIVE_REG_FIXED_TMP0 & 7,
129 RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
130 }
131 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
132 return off;
133 }
134# endif
135
136 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
137 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, RT_ARCH_VAL == RT_ARCH_VAL_AMD64 ? 32 : 12);
138
139 if (pReNative->Core.fDebugPcInitialized)
140 {
141 Log4(("uPcUpdatingDebug+=%ld cBits=%d off=%#x\n", offDisp, cBits, off));
142 off = iemNativeEmitLoadGprFromVCpuU64Ex(pCodeBuf, off, idxTmpReg, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
143 }
144 else
145 {
146 Log4(("uPcUpdatingDebug=rip+%ld cBits=%d off=%#x\n", offDisp, cBits, off));
147 pReNative->Core.fDebugPcInitialized = true;
148 off = iemNativeEmitLoadGprFromVCpuU64Ex(pCodeBuf, off, idxTmpReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
149 }
150
151 if (cBits == 64)
152 off = iemNativeEmitAddGprImmEx(pCodeBuf, off, idxTmpReg, offDisp, IEMNATIVE_REG_FIXED_TMP0);
153 else
154 {
155 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxTmpReg, (int32_t)offDisp, IEMNATIVE_REG_FIXED_TMP0);
156 if (cBits == 16)
157 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxTmpReg, UINT16_MAX);
158 }
159
160 off = iemNativeEmitStoreGprToVCpuU64Ex(pCodeBuf, off, idxTmpReg, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug),
161 IEMNATIVE_REG_FIXED_TMP0);
162
163 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
164 iemNativeRegFreeTmp(pReNative, idxTmpReg);
165 return off;
166}
167
168
169# elif defined(IEMNATIVE_REG_FIXED_PC_DBG)
170DECL_INLINE_THROW(uint32_t) iemNativePcAdjustCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
171{
172 /* Compare the shadow with the context value, they should match. */
173 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, IEMNATIVE_REG_FIXED_PC_DBG);
174 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, pReNative->Core.offPc);
175 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, kIemNativeGstReg_Pc);
176 return off;
177}
178# endif
179
180#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
181
182/**
183 * Flushes delayed write of a specific guest register.
184 *
185 * This must be called prior to calling CImpl functions and any helpers that use
186 * the guest state (like raising exceptions) and such.
187 *
188 * This optimization has not yet been implemented. The first target would be
189 * RIP updates, since these are the most common ones.
190 */
191DECL_INLINE_THROW(uint32_t)
192iemNativeRegFlushPendingSpecificWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREGREF enmClass, uint8_t idxReg)
193{
194#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
195 /* If for whatever reason it is possible to reference the PC register at some point we need to do the writeback here first. */
196#endif
197
198#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
199#if 0 /** @todo r=aeichner EFLAGS writeback delay. */
200 if ( enmClass == kIemNativeGstRegRef_EFlags
201 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(kIemNativeGstReg_EFlags))
202 off = iemNativeRegFlushPendingWrite(pReNative, off, kIemNativeGstReg_EFlags);
203#else
204 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(kIemNativeGstReg_EFlags)));
205#endif
206
207 if ( enmClass == kIemNativeGstRegRef_Gpr
208 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxReg))
209 off = iemNativeRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTREG_GPR(idxReg));
210#endif
211
212#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
213 if ( enmClass == kIemNativeGstRegRef_XReg
214 && pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxReg))
215 {
216 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxReg));
217 /* Flush the shadows as the register needs to be reloaded (there is no guarantee right now, that the referenced register doesn't change). */
218 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxReg];
219
220 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off);
221 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxReg)));
222 }
223#endif
224 RT_NOREF(pReNative, enmClass, idxReg);
225 return off;
226}
227
228
229
230/*********************************************************************************************************************************
231* Emitters for IEM_MC_BEGIN_EX and IEM_MC_END. *
232*********************************************************************************************************************************/
233
234#undef IEM_MC_BEGIN /* unused */
235#define IEM_MC_BEGIN_EX(a_fMcFlags, a_fCImplFlags, a_cArgsIncludingHidden) \
236 { \
237 Assert(pReNative->Core.bmVars == 0); \
238 Assert(pReNative->Core.u64ArgVars == UINT64_MAX); \
239 Assert(pReNative->Core.bmStack == 0); \
240 pReNative->fMc = (a_fMcFlags); \
241 pReNative->fCImpl = (a_fCImplFlags); \
242 pReNative->cArgsX = (a_cArgsIncludingHidden)
243
244/** We have to get to the end in recompilation mode, as otherwise we won't
245 * generate code for all the IEM_MC_IF_XXX branches. */
246#define IEM_MC_END() \
247 iemNativeVarFreeAll(pReNative); \
248 } return off
249
250
251
252/*********************************************************************************************************************************
253* Liveness Stubs *
254*********************************************************************************************************************************/
255
256#define IEM_MC_LIVENESS_GREG_INPUT(a_iGReg) ((void)0)
257#define IEM_MC_LIVENESS_GREG_CLOBBER(a_iGReg) ((void)0)
258#define IEM_MC_LIVENESS_GREG_MODIFY(a_iGReg) ((void)0)
259
260#define IEM_MC_LIVENESS_MREG_INPUT(a_iMReg) ((void)0)
261#define IEM_MC_LIVENESS_MREG_CLOBBER(a_iMReg) ((void)0)
262#define IEM_MC_LIVENESS_MREG_MODIFY(a_iMReg) ((void)0)
263
264#define IEM_MC_LIVENESS_XREG_INPUT(a_iXReg) ((void)0)
265#define IEM_MC_LIVENESS_XREG_CLOBBER(a_iXReg) ((void)0)
266#define IEM_MC_LIVENESS_XREG_MODIFY(a_iXReg) ((void)0)
267
268
269
270/*********************************************************************************************************************************
271* Native Emitter Support. *
272*********************************************************************************************************************************/
273
274#define IEM_MC_NATIVE_IF(a_fSupportedHosts) if (RT_ARCH_VAL & (a_fSupportedHosts)) {
275
276#define IEM_MC_NATIVE_ELSE() } else {
277
278#define IEM_MC_NATIVE_ENDIF() } ((void)0)
279
280
281#define IEM_MC_NATIVE_EMIT_0(a_fnEmitter) \
282 off = a_fnEmitter(pReNative, off)
283
284#define IEM_MC_NATIVE_EMIT_1(a_fnEmitter, a0) \
285 off = a_fnEmitter(pReNative, off, (a0))
286
287#define IEM_MC_NATIVE_EMIT_2(a_fnEmitter, a0, a1) \
288 off = a_fnEmitter(pReNative, off, (a0), (a1))
289
290#define IEM_MC_NATIVE_EMIT_2_EX(a_fnEmitter, a0, a1) \
291 off = a_fnEmitter(pReNative, off, pCallEntry->idxInstr, (a0), (a1))
292
293#define IEM_MC_NATIVE_EMIT_3(a_fnEmitter, a0, a1, a2) \
294 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2))
295
296#define IEM_MC_NATIVE_EMIT_4(a_fnEmitter, a0, a1, a2, a3) \
297 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3))
298
299#define IEM_MC_NATIVE_EMIT_5(a_fnEmitter, a0, a1, a2, a3, a4) \
300 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4))
301
302#define IEM_MC_NATIVE_EMIT_6(a_fnEmitter, a0, a1, a2, a3, a4, a5) \
303 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5))
304
305#define IEM_MC_NATIVE_EMIT_7(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6) \
306 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6))
307
308#define IEM_MC_NATIVE_EMIT_8(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6, a7) \
309 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6), (a7))
310
311
312#ifndef RT_ARCH_AMD64
313# define IEM_MC_NATIVE_SET_AMD64_HOST_REG_FOR_LOCAL(a_VarNm, a_idxHostReg) ((void)0)
314#else
315/** @note This is a naive approach that ASSUMES that the register isn't
316 * allocated, so it only works safely for the first allocation(s) in
317 * a MC block. */
318# define IEM_MC_NATIVE_SET_AMD64_HOST_REG_FOR_LOCAL(a_VarNm, a_idxHostReg) \
319 off = iemNativeVarSetAmd64HostRegisterForLocal(pReNative, off, a_VarNm, a_idxHostReg)
320
321DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg,
322 uint32_t off, bool fAllocated);
323
324DECL_INLINE_THROW(uint32_t)
325iemNativeVarSetAmd64HostRegisterForLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t idxHstReg)
326{
327 Log12(("iemNativeVarSetAmd64HostRegisterForLocal: idxVar=%#x idxHstReg=%s (%#x) off=%#x\n", idxVar, g_apszIemNativeHstRegNames[idxHstReg], idxHstReg, off));
328 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
329 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg))); /* iemNativeVarRegisterSet does a throw/longjmp on this */
330
331# ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
332 /* Must flush the register if it hold pending writes. */
333 if ( (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
334 && (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows) )
335 off = iemNativeRegFlushDirtyGuest(pReNative, off, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
336# endif
337
338 iemNativeVarRegisterSet(pReNative, idxVar, idxHstReg, off, false /*fAllocated*/);
339 return off;
340}
341
342#endif /* RT_ARCH_AMD64 */
343
344
345
346/*********************************************************************************************************************************
347* Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX) *
348*********************************************************************************************************************************/
349
350#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl) \
351 pReNative->fMc = 0; \
352 pReNative->fCImpl = (a_fFlags); \
353 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, \
354 a_cbInstr) /** @todo not used ... */
355
356
357#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
358 pReNative->fMc = 0; \
359 pReNative->fCImpl = (a_fFlags); \
360 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
361
362DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off,
363 uint8_t idxInstr, uint64_t a_fGstShwFlush,
364 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
365{
366 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
367}
368
369
370#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
371 pReNative->fMc = 0; \
372 pReNative->fCImpl = (a_fFlags); \
373 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
374 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
375
376DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off,
377 uint8_t idxInstr, uint64_t a_fGstShwFlush,
378 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
379{
380 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
381}
382
383
384#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
385 pReNative->fMc = 0; \
386 pReNative->fCImpl = (a_fFlags); \
387 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
388 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
389
390DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off,
391 uint8_t idxInstr, uint64_t a_fGstShwFlush,
392 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
393 uint64_t uArg2)
394{
395 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
396}
397
398
399
400/*********************************************************************************************************************************
401* Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX) *
402*********************************************************************************************************************************/
403
404/** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
405 * and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
406DECL_INLINE_THROW(uint32_t)
407iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
408{
409 /*
410 * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
411 * return with special status code and make the execution loop deal with
412 * this. If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
413 * exception and won't continue execution. While CPUMCTX_DBG_DBGF_MASK
414 * could continue w/o interruption, it probably will drop into the
415 * debugger, so not worth the effort of trying to services it here and we
416 * just lump it in with the handling of the others.
417 *
418 * To simplify the code and the register state management even more (wrt
419 * immediate in AND operation), we always update the flags and skip the
420 * extra check associated conditional jump.
421 */
422 AssertCompile( (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
423 <= UINT32_MAX);
424#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
425 AssertMsg( pReNative->idxCurCall == 0
426 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1],
427 IEMLIVENESSBIT_IDX_EFL_OTHER)),
428 ("Efl_Other - %u\n", iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1],
429 IEMLIVENESSBIT_IDX_EFL_OTHER)));
430#endif
431
432 /*
433 * As this code can break out of the execution loop when jumping to the ReturnWithFlags label
434 * any pending register writes must be flushed.
435 */
436 off = iemNativeRegFlushPendingWrites(pReNative, off);
437
438 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
439 kIemNativeGstRegUse_ForUpdate, false /*fNoVolatileRegs*/,
440 true /*fSkipLivenessAssert*/);
441 off = iemNativeEmitTestAnyBitsInGprAndTbExitIfAnySet(pReNative, off, idxEflReg,
442 X86_EFL_TF | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK,
443 kIemNativeLabelType_ReturnWithFlags);
444 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
445 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
446
447 /* Free but don't flush the EFLAGS register. */
448 iemNativeRegFreeTmp(pReNative, idxEflReg);
449
450 return off;
451}
452
453
454/** Helper for iemNativeEmitFinishInstructionWithStatus. */
455DECLINLINE(RTGCPHYS) iemNativeCallEntryToGCPhysPc(PCIEMTB pTb, PCIEMTHRDEDCALLENTRY pCallEntry)
456{
457 unsigned const offOpcodes = pCallEntry->offOpcode;
458 unsigned const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
459 for (unsigned idxRange = 0; idxRange < cRanges; idxRange++)
460 {
461 unsigned const offRange = offOpcodes - (unsigned)pTb->aRanges[idxRange].offOpcodes;
462 if (offRange < (unsigned)pTb->aRanges[idxRange].cbOpcodes)
463 return iemTbGetRangePhysPageAddr(pTb, idxRange) + offRange + pTb->aRanges[idxRange].offPhysPage;
464 }
465 AssertFailedReturn(NIL_RTGCPHYS);
466}
467
468
469/** The VINF_SUCCESS dummy. */
470template<int const a_rcNormal, bool const a_fIsJump>
471DECL_FORCE_INLINE_THROW(uint32_t)
472iemNativeEmitFinishInstructionWithStatus(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry,
473 int32_t const offJump)
474{
475 AssertCompile(a_rcNormal == VINF_SUCCESS || a_rcNormal == VINF_IEM_REEXEC_BREAK);
476 if (a_rcNormal != VINF_SUCCESS)
477 {
478#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
479 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
480#else
481 RT_NOREF_PV(pCallEntry);
482#endif
483
484 /* As this code returns from the TB any pending register writes must be flushed. */
485 off = iemNativeRegFlushPendingWrites(pReNative, off);
486
487 /*
488 * If we're in a conditional, mark the current branch as exiting so we
489 * can disregard its state when we hit the IEM_MC_ENDIF.
490 */
491 iemNativeMarkCurCondBranchAsExiting(pReNative);
492
493 /*
494 * Use the lookup table for getting to the next TB quickly.
495 * Note! In this code path there can only be one entry at present.
496 */
497 uint8_t const idxTbLookupFirst = IEM_TB_LOOKUP_TAB_GET_IDX(pCallEntry->uTbLookup);
498 PCIEMTB const pTbOrg = pReNative->pTbOrg;
499 Assert(idxTbLookupFirst < pTbOrg->cTbLookupEntries);
500 Assert(IEM_TB_LOOKUP_TAB_GET_SIZE(pCallEntry->uTbLookup) == 1);
501
502#if 0
503 /* Update IEMCPU::ppTbLookupEntryR3 to get the best lookup effect. */
504 PIEMTB * const ppTbLookupFirst = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTbOrg, idxTbLookupFirst);
505 Assert(IEM_TB_LOOKUP_TAB_GET_SIZE(pCallEntry->uTbLookup) == 1); /* large stuff later/never */
506 off = iemNativeEmitStoreImmToVCpuU64(pReNative, off, (uintptr_t)ppTbLookupFirst,
507 RT_UOFFSETOF(VMCPU, iem.s.ppTbLookupEntryR3));
508
509 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnBreak);
510
511#else
512 /* Load the index as argument #1 for the helper call at the given label. */
513 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxTbLookupFirst);
514
515 /*
516 * Figure out the physical address of the current instruction and see
517 * whether the next instruction we're about to execute is in the same
518 * page so we by can optimistically skip TLB loading.
519 *
520 * - This is safe for all cases in FLAT mode.
521 * - In segmentmented modes it is complicated, given that a negative
522 * jump may underflow EIP and a forward jump may overflow or run into
523 * CS.LIM and triggering a #GP. The only thing we can get away with
524 * now at compile time is forward jumps w/o CS.LIM checks, since the
525 * lack of CS.LIM checks means we're good for the entire physical page
526 * we're executing on and another 15 bytes before we run into CS.LIM.
527 */
528 if ( IEM_F_MODE_X86_IS_FLAT(pReNative->fExec)
529# if 0 /** @todo breaks on IP/EIP/RIP wraparound tests in bs3-cpu-weird-1. See also iemNativeHlpReturnBreakViaLookup. */
530 || !(pTbOrg->fFlags & IEMTB_F_CS_LIM_CHECKS)
531# endif
532 )
533 {
534 RTGCPHYS const GCPhysPcCurrent = iemNativeCallEntryToGCPhysPc(pTbOrg, pCallEntry);
535 RTGCPHYS const GCPhysPcNext = GCPhysPcCurrent + pCallEntry->cbOpcode + (int64_t)(a_fIsJump ? offJump : 0);
536 if ( (GCPhysPcNext >> GUEST_PAGE_SHIFT) == (GCPhysPcCurrent >> GUEST_PAGE_SHIFT)
537 && GUEST_PAGE_SIZE - (GCPhysPcCurrent & GUEST_PAGE_OFFSET_MASK) >= pCallEntry->cbOpcode /* 0xfff: je -56h */ )
538
539 {
540 /* Load the next GCPhysPc into the 3rd argument for the helper call. */
541 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, GCPhysPcNext);
542
543 /* Load the key lookup flags into the 2nd argument for the helper call.
544 - This is safe wrt CS limit checking since we're only here for FLAT modes.
545 - ASSUMING that this isn't a STI or POPF instruction, we can exclude any
546 interrupt shadow.
547 - The NMI inhibiting is more questionable, though... */
548 /** @todo We don't implement NMI blocking atm, except via VT-x/AMD-V.
549 * Should we copy it into fExec to simplify this? OTOH, it's just a
550 * couple of extra instructions if EFLAGS are already in a register. */
551 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG,
552 (pReNative->fExec & IEMTB_F_KEY_MASK) | IEMTB_F_TYPE_NATIVE);
553
554 if (pReNative->idxLastCheckIrqCallNo != UINT32_MAX)
555 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnBreakViaLookup);
556 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnBreakViaLookupWithIrq);
557 }
558 }
559 if (pReNative->idxLastCheckIrqCallNo != UINT32_MAX)
560 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnBreakViaLookupWithTlb);
561 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnBreakViaLookupWithTlbAndIrq);
562#endif
563 }
564 return off;
565}
566
567
568#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr, a_rcNormal) \
569 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
570 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
571
572#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr, a_rcNormal) \
573 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
574 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
575 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
576
577/** Same as iemRegAddToRip64AndFinishingNoFlags. */
578DECL_INLINE_THROW(uint32_t)
579iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
580{
581#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
582# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
583 if (!pReNative->Core.offPc)
584 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
585# endif
586
587 /* Allocate a temporary PC register. */
588 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
589
590 /* Perform the addition and store the result. */
591 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
592 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
593
594 /* Free but don't flush the PC register. */
595 iemNativeRegFreeTmp(pReNative, idxPcReg);
596#endif
597
598#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
599 pReNative->Core.offPc += cbInstr;
600 Log4(("offPc=%#RX64 cbInstr=%#x off=%#x\n", pReNative->Core.offPc, cbInstr, off));
601# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
602 off = iemNativeEmitPcDebugAdd(pReNative, off, cbInstr, 64);
603 off = iemNativeEmitPcDebugCheck(pReNative, off);
604# elif defined(IEMNATIVE_REG_FIXED_PC_DBG)
605 off = iemNativePcAdjustCheck(pReNative, off);
606# endif
607 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
608#endif
609
610 return off;
611}
612
613
614#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr, a_rcNormal) \
615 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
616 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
617
618#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr, a_rcNormal) \
619 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
620 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
621 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
622
623/** Same as iemRegAddToEip32AndFinishingNoFlags. */
624DECL_INLINE_THROW(uint32_t)
625iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
626{
627#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
628# ifdef IEMNATIVE_REG_FIXED_PC_DBG
629 if (!pReNative->Core.offPc)
630 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
631# endif
632
633 /* Allocate a temporary PC register. */
634 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
635
636 /* Perform the addition and store the result. */
637 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
638 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
639
640 /* Free but don't flush the PC register. */
641 iemNativeRegFreeTmp(pReNative, idxPcReg);
642#endif
643
644#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
645 pReNative->Core.offPc += cbInstr;
646 Log4(("offPc=%#RX64 cbInstr=%#x off=%#x\n", pReNative->Core.offPc, cbInstr, off));
647# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
648 off = iemNativeEmitPcDebugAdd(pReNative, off, cbInstr, 32);
649 off = iemNativeEmitPcDebugCheck(pReNative, off);
650# elif defined(IEMNATIVE_REG_FIXED_PC_DBG)
651 off = iemNativePcAdjustCheck(pReNative, off);
652# endif
653 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
654#endif
655
656 return off;
657}
658
659
660#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr, a_rcNormal) \
661 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
662 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
663
664#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr, a_rcNormal) \
665 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
666 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
667 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
668
669/** Same as iemRegAddToIp16AndFinishingNoFlags. */
670DECL_INLINE_THROW(uint32_t)
671iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
672{
673#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
674# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
675 if (!pReNative->Core.offPc)
676 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
677# endif
678
679 /* Allocate a temporary PC register. */
680 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
681
682 /* Perform the addition and store the result. */
683 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
684 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
685 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
686
687 /* Free but don't flush the PC register. */
688 iemNativeRegFreeTmp(pReNative, idxPcReg);
689#endif
690
691#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
692 pReNative->Core.offPc += cbInstr;
693 Log4(("offPc=%#RX64 cbInstr=%#x off=%#x\n", pReNative->Core.offPc, cbInstr, off));
694# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
695 off = iemNativeEmitPcDebugAdd(pReNative, off, cbInstr, 16);
696 off = iemNativeEmitPcDebugCheck(pReNative, off);
697# elif defined(IEMNATIVE_REG_FIXED_PC_DBG)
698 off = iemNativePcAdjustCheck(pReNative, off);
699# endif
700 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
701#endif
702
703 return off;
704}
705
706
707/*********************************************************************************************************************************
708* Common code for changing PC/RIP/EIP/IP. *
709*********************************************************************************************************************************/
710
711/**
712 * Emits code to check if the content of @a idxAddrReg is a canonical address,
713 * raising a \#GP(0) if it isn't.
714 *
715 * @returns New code buffer offset, UINT32_MAX on failure.
716 * @param pReNative The native recompile state.
717 * @param off The code buffer offset.
718 * @param idxAddrReg The host register with the address to check.
719 * @param idxInstr The current instruction.
720 */
721DECL_FORCE_INLINE_THROW(uint32_t)
722iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
723{
724 /*
725 * Make sure we don't have any outstanding guest register writes as we may
726 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
727 */
728 off = iemNativeRegFlushPendingWrites(pReNative, off);
729
730#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
731 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
732#else
733 RT_NOREF(idxInstr);
734#endif
735
736#ifdef RT_ARCH_AMD64
737 /*
738 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
739 * return raisexcpt();
740 * ---- this variant avoid loading a 64-bit immediate, but is an instruction longer.
741 */
742 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
743
744 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
745 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
746 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
747 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
748 off = iemNativeEmitJnzTbExit(pReNative, off, kIemNativeLabelType_RaiseGp0);
749
750 iemNativeRegFreeTmp(pReNative, iTmpReg);
751
752#elif defined(RT_ARCH_ARM64)
753 /*
754 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
755 * return raisexcpt();
756 * ----
757 * mov x1, 0x800000000000
758 * add x1, x0, x1
759 * cmp xzr, x1, lsr 48
760 * b.ne .Lraisexcpt
761 */
762 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
763
764 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
765 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
766 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
767 off = iemNativeEmitJnzTbExit(pReNative, off, kIemNativeLabelType_RaiseGp0);
768
769 iemNativeRegFreeTmp(pReNative, iTmpReg);
770
771#else
772# error "Port me"
773#endif
774 return off;
775}
776
777
778/**
779 * Emits code to check if the content of @a idxAddrReg is a canonical address,
780 * raising a \#GP(0) if it isn't.
781 *
782 * Caller makes sure everything is flushed, except maybe PC.
783 *
784 * @returns New code buffer offset, UINT32_MAX on failure.
785 * @param pReNative The native recompile state.
786 * @param off The code buffer offset.
787 * @param idxAddrReg The host register with the address to check.
788 * @param offDisp The relative displacement that has already been
789 * added to idxAddrReg and must be subtracted if
790 * raising a \#GP(0).
791 * @param idxInstr The current instruction.
792 */
793DECL_FORCE_INLINE_THROW(uint32_t)
794iemNativeEmitCheckGprCanonicalMaybeRaiseGp0WithDisp(PIEMRECOMPILERSTATE pReNative, uint32_t off,
795 uint8_t idxAddrReg, int64_t offDisp, uint8_t idxInstr)
796{
797#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
798 Assert(pReNative->Core.bmGstRegShadowDirty == 0);
799#endif
800
801#ifdef RT_ARCH_AMD64
802 /*
803 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
804 * return raisexcpt();
805 * ---- this variant avoid loading a 64-bit immediate, but is an instruction longer.
806 */
807 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
808
809 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
810 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
811 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
812 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
813
814#elif defined(RT_ARCH_ARM64)
815 /*
816 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
817 * return raisexcpt();
818 * ----
819 * mov x1, 0x800000000000
820 * add x1, x0, x1
821 * cmp xzr, x1, lsr 48
822 * b.ne .Lraisexcpt
823 */
824 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
825
826 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
827 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
828 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
829#else
830# error "Port me"
831#endif
832
833 /* Jump to the #GP code (hoping static prediction considers forward branches as not-taken). */
834 uint32_t const offFixup1 = off;
835 off = iemNativeEmitJnzToFixed(pReNative, off, off /*8-bit jump suffices*/);
836
837 /* jump .Lnoexcept; Skip the #GP code. */
838 uint32_t const offFixup2 = off;
839 off = iemNativeEmitJmpToFixed(pReNative, off, off /*8-bit jump suffices*/);
840
841 /* .Lraisexcpt: */
842 iemNativeFixupFixedJump(pReNative, offFixup1, off);
843#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
844 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr), iTmpReg);
845#else
846 RT_NOREF(idxInstr);
847#endif
848
849 /* Undo the PC adjustment and store the old PC value. */
850 off = iemNativeEmitSubGprImm(pReNative, off, idxAddrReg, offDisp, iTmpReg);
851 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxAddrReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
852
853 off = iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_RaiseGp0, false /*fActuallyExitingTb*/);
854
855 /* .Lnoexcept: */
856 iemNativeFixupFixedJump(pReNative, offFixup2, off);
857
858 iemNativeRegFreeTmp(pReNative, iTmpReg);
859 return off;
860}
861
862
863/**
864 * Emits code to check if the content of @a idxAddrReg is a canonical address,
865 * raising a \#GP(0) if it isn't.
866 *
867 * Caller makes sure everything is flushed, except maybe PC.
868 *
869 * @returns New code buffer offset, UINT32_MAX on failure.
870 * @param pReNative The native recompile state.
871 * @param off The code buffer offset.
872 * @param idxAddrReg The host register with the address to check.
873 * @param idxOldPcReg Register holding the old PC that offPc is relative
874 * to if available, otherwise UINT8_MAX.
875 * @param idxInstr The current instruction.
876 */
877DECL_FORCE_INLINE_THROW(uint32_t)
878iemNativeEmitCheckGprCanonicalMaybeRaiseGp0WithOldPc(PIEMRECOMPILERSTATE pReNative, uint32_t off,
879 uint8_t idxAddrReg, uint8_t idxOldPcReg, uint8_t idxInstr)
880{
881#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
882 Assert(pReNative->Core.bmGstRegShadowDirty == 0);
883#endif
884
885#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
886# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
887 if (!pReNative->Core.offPc)
888# endif
889 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
890#else
891 RT_NOREF(idxInstr);
892#endif
893
894#ifdef RT_ARCH_AMD64
895 /*
896 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
897 * return raisexcpt();
898 * ---- this variant avoid loading a 64-bit immediate, but is an instruction longer.
899 */
900 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
901
902 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
903 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
904 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
905 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
906
907#elif defined(RT_ARCH_ARM64)
908 /*
909 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
910 * return raisexcpt();
911 * ----
912 * mov x1, 0x800000000000
913 * add x1, x0, x1
914 * cmp xzr, x1, lsr 48
915 * b.ne .Lraisexcpt
916 */
917 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
918
919 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
920 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
921 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
922#else
923# error "Port me"
924#endif
925
926#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
927 if (pReNative->Core.offPc)
928 {
929 /** @todo On x86, it is said that conditional jumps forward are statically
930 * predicited as not taken, so this isn't a very good construct.
931 * Investigate whether it makes sense to invert it and add another
932 * jump. Also, find out wtf the static predictor does here on arm! */
933 uint32_t const offFixup = off;
934 off = iemNativeEmitJzToFixed(pReNative, off, off + 16 /*8-bit suffices*/);
935
936 /* .Lraisexcpt: */
937# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
938 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr), iTmpReg);
939# endif
940 /* We need to update cpum.GstCtx.rip. */
941 if (idxOldPcReg == UINT8_MAX)
942 {
943 idxOldPcReg = iTmpReg;
944 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxOldPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
945 }
946 off = iemNativeEmitAddGprImm(pReNative, off, idxOldPcReg, pReNative->Core.offPc);
947 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxOldPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
948
949 off = iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_RaiseGp0, false /*fActuallyExitingTb*/);
950 iemNativeFixupFixedJump(pReNative, offFixup, off);
951 }
952 else
953#endif
954 off = iemNativeEmitJnzTbExit(pReNative, off, kIemNativeLabelType_RaiseGp0);
955
956 iemNativeRegFreeTmp(pReNative, iTmpReg);
957
958 return off;
959}
960
961
962/**
963 * Emits code to check if that the content of @a idxAddrReg is within the limit
964 * of CS, raising a \#GP(0) if it isn't.
965 *
966 * @returns New code buffer offset; throws VBox status code on error.
967 * @param pReNative The native recompile state.
968 * @param off The code buffer offset.
969 * @param idxAddrReg The host register (32-bit) with the address to
970 * check.
971 * @param idxInstr The current instruction.
972 */
973DECL_FORCE_INLINE_THROW(uint32_t)
974iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
975 uint8_t idxAddrReg, uint8_t idxInstr)
976{
977 /*
978 * Make sure we don't have any outstanding guest register writes as we may
979 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
980 */
981 off = iemNativeRegFlushPendingWrites(pReNative, off);
982
983#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
984 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
985#else
986 RT_NOREF(idxInstr);
987#endif
988
989 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
990 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
991 kIemNativeGstRegUse_ReadOnly);
992
993 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
994 off = iemNativeEmitJaTbExit(pReNative, off, kIemNativeLabelType_RaiseGp0);
995
996 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
997 return off;
998}
999
1000
1001
1002
1003/**
1004 * Emits code to check if that the content of @a idxAddrReg is within the limit
1005 * of CS, raising a \#GP(0) if it isn't.
1006 *
1007 * Caller makes sure everything is flushed, except maybe PC.
1008 *
1009 * @returns New code buffer offset; throws VBox status code on error.
1010 * @param pReNative The native recompile state.
1011 * @param off The code buffer offset.
1012 * @param idxAddrReg The host register (32-bit) with the address to
1013 * check.
1014 * @param idxOldPcReg Register holding the old PC that offPc is relative
1015 * to if available, otherwise UINT8_MAX.
1016 * @param idxInstr The current instruction.
1017 */
1018DECL_FORCE_INLINE_THROW(uint32_t)
1019iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0WithOldPc(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1020 uint8_t idxAddrReg, uint8_t idxOldPcReg, uint8_t idxInstr)
1021{
1022#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
1023 Assert(pReNative->Core.bmGstRegShadowDirty == 0);
1024#endif
1025
1026#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1027# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1028 if (!pReNative->Core.offPc)
1029# endif
1030 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1031#else
1032 RT_NOREF(idxInstr);
1033#endif
1034
1035 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
1036 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
1037 kIemNativeGstRegUse_ReadOnly);
1038
1039 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
1040#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1041 if (pReNative->Core.offPc)
1042 {
1043 uint32_t const offFixup = off;
1044 off = iemNativeEmitJbeToFixed(pReNative, off, off + 16 /*8-bit suffices*/);
1045
1046 /* Raising a GP(0), but first we need to update cpum.GstCtx.rip. */
1047 if (idxOldPcReg == UINT8_MAX)
1048 {
1049 idxOldPcReg = idxAddrReg;
1050 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxOldPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1051 }
1052 off = iemNativeEmitAddGprImm(pReNative, off, idxOldPcReg, pReNative->Core.offPc);
1053 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxOldPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1054# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1055 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1056# endif
1057 off = iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_RaiseGp0, false /*fActuallyExitingTb*/);
1058 iemNativeFixupFixedJump(pReNative, offFixup, off);
1059 }
1060 else
1061#endif
1062 off = iemNativeEmitJaTbExit(pReNative, off, kIemNativeLabelType_RaiseGp0);
1063
1064 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
1065 return off;
1066}
1067
1068
1069/*********************************************************************************************************************************
1070* Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX). *
1071*********************************************************************************************************************************/
1072
1073#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1074 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1075 (a_enmEffOpSize), pCallEntry->idxInstr); \
1076 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1077
1078#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1079 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1080 (a_enmEffOpSize), pCallEntry->idxInstr); \
1081 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1082 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1083
1084#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr, a_rcNormal) \
1085 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1086 IEMMODE_16BIT, pCallEntry->idxInstr); \
1087 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1088
1089#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
1090 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1091 IEMMODE_16BIT, pCallEntry->idxInstr); \
1092 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1093 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1094
1095#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr, a_rcNormal) \
1096 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (a_i32), \
1097 IEMMODE_64BIT, pCallEntry->idxInstr); \
1098 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1099
1100#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
1101 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (a_i32), \
1102 IEMMODE_64BIT, pCallEntry->idxInstr); \
1103 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1104 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1105
1106
1107#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_INTRAPG(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1108 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1109 (a_enmEffOpSize), pCallEntry->idxInstr); \
1110 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1111
1112#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_INTRAPG_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1113 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1114 (a_enmEffOpSize), pCallEntry->idxInstr); \
1115 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1116 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1117
1118#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_INTRAPG(a_i16, a_cbInstr, a_rcNormal) \
1119 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1120 IEMMODE_16BIT, pCallEntry->idxInstr); \
1121 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1122
1123#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_INTRAPG_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
1124 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1125 IEMMODE_16BIT, pCallEntry->idxInstr); \
1126 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1127 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1128
1129#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_INTRAPG(a_i32, a_cbInstr, a_rcNormal) \
1130 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (a_i32), \
1131 IEMMODE_64BIT, pCallEntry->idxInstr); \
1132 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1133
1134#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_INTRAPG_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
1135 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (a_i32), \
1136 IEMMODE_64BIT, pCallEntry->idxInstr); \
1137 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1138 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1139
1140/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
1141 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
1142 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
1143template<bool const a_fWithinPage>
1144DECL_INLINE_THROW(uint32_t)
1145iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
1146 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
1147{
1148 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
1149#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1150 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1151 if (a_fWithinPage && enmEffOpSize == IEMMODE_64BIT)
1152 {
1153 /* No #GP checking required, just update offPc and get on with it. */
1154 pReNative->Core.offPc += (int64_t)offDisp + cbInstr;
1155# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1156 off = iemNativeEmitPcDebugAdd(pReNative, off, (int64_t)offDisp + cbInstr, enmEffOpSize == IEMMODE_64BIT ? 64 : 16);
1157# endif
1158 }
1159 else
1160#endif
1161 {
1162 /* Flush all but PC iff we're doing a 64-bit update here and this isn't within a page.. */
1163 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT && !a_fWithinPage))
1164 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc) /*fGstShwExcept*/);
1165
1166 /* Allocate a temporary PC register. */
1167 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1168 kIemNativeGstRegUse_ForUpdate);
1169
1170 /* Perform the addition. */
1171 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr + pReNative->Core.offPc);
1172
1173 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
1174 {
1175 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't.
1176 We can skip this if the target is within the same page. */
1177 if (!a_fWithinPage)
1178 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0WithDisp(pReNative, off, idxPcReg,
1179 (int64_t)offDisp + cbInstr, idxInstr);
1180 }
1181 else
1182 {
1183 /* Just truncate the result to 16-bit IP. */
1184 Assert(enmEffOpSize == IEMMODE_16BIT);
1185 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
1186 }
1187
1188#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1189# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1190 off = iemNativeEmitPcDebugAdd(pReNative, off, (int64_t)offDisp + cbInstr, enmEffOpSize == IEMMODE_64BIT ? 64 : 16);
1191 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
1192# endif
1193 /* Since we've already got the new PC value in idxPcReg, we can just as
1194 well write it out and reset offPc to zero. Otherwise, we'd need to use
1195 a copy the shadow PC, which will cost another move instruction here. */
1196# if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || defined(LOG_ENABLED) || defined(VBOX_WITH_STATISTICS)
1197 uint8_t const idxOldInstrPlusOne = pReNative->idxInstrPlusOneOfLastPcUpdate;
1198 pReNative->idxInstrPlusOneOfLastPcUpdate = RT_MAX(idxInstr + 1, idxOldInstrPlusOne);
1199 uint8_t const cInstrsSkipped = idxInstr <= idxOldInstrPlusOne ? 0 : idxInstr - idxOldInstrPlusOne;
1200 Log4(("iemNativeEmitRip64RelativeJumpAndFinishingNoFlags: offPc=%#RX64 -> 0; off=%#x; idxInstr=%u cInstrsSkipped=%u cCondDepth=%d\n",
1201 pReNative->Core.offPc, off, idxInstr, cInstrsSkipped, pReNative->cCondDepth));
1202 STAM_COUNTER_ADD(&pReNative->pVCpu->iem.s.StatNativePcUpdateDelayed, cInstrsSkipped);
1203# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1204 iemNativeDbgInfoAddNativeOffset(pReNative, off);
1205 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc, cInstrsSkipped);
1206# endif
1207# endif
1208 pReNative->Core.offPc = 0;
1209#endif
1210
1211 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1212
1213 /* Free but don't flush the PC register. */
1214 iemNativeRegFreeTmp(pReNative, idxPcReg);
1215 }
1216 return off;
1217}
1218
1219
1220#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1221 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1222 (a_enmEffOpSize), pCallEntry->idxInstr); \
1223 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1224
1225#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1226 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1227 (a_enmEffOpSize), pCallEntry->idxInstr); \
1228 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1229 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1230
1231#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr, a_rcNormal) \
1232 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1233 IEMMODE_16BIT, pCallEntry->idxInstr); \
1234 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1235
1236#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
1237 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1238 IEMMODE_16BIT, pCallEntry->idxInstr); \
1239 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1240 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1241
1242#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr, a_rcNormal) \
1243 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (a_i32), \
1244 IEMMODE_32BIT, pCallEntry->idxInstr); \
1245 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1246
1247#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
1248 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (a_i32), \
1249 IEMMODE_32BIT, pCallEntry->idxInstr); \
1250 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1251 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1252
1253
1254#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_FLAT(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1255 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1256 (a_enmEffOpSize), pCallEntry->idxInstr); \
1257 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1258
1259#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_FLAT_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1260 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1261 (a_enmEffOpSize), pCallEntry->idxInstr); \
1262 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1263 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1264
1265#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_FLAT(a_i16, a_cbInstr, a_rcNormal) \
1266 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1267 IEMMODE_16BIT, pCallEntry->idxInstr); \
1268 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1269
1270#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_FLAT_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
1271 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1272 IEMMODE_16BIT, pCallEntry->idxInstr); \
1273 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1274 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1275
1276#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_FLAT(a_i32, a_cbInstr, a_rcNormal) \
1277 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (a_i32), \
1278 IEMMODE_32BIT, pCallEntry->idxInstr); \
1279 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1280
1281#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_FLAT_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
1282 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (a_i32), \
1283 IEMMODE_32BIT, pCallEntry->idxInstr); \
1284 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1285 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1286
1287/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
1288 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
1289 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
1290template<bool const a_fFlat>
1291DECL_INLINE_THROW(uint32_t)
1292iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
1293 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
1294{
1295 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
1296#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1297 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1298#endif
1299
1300 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1301 if (!a_fFlat || enmEffOpSize == IEMMODE_16BIT)
1302 {
1303 off = iemNativeRegFlushPendingWrites(pReNative, off);
1304#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1305 Assert(pReNative->Core.offPc == 0);
1306#endif
1307 }
1308
1309 /* Allocate a temporary PC register. */
1310 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
1311
1312 /* Perform the addition. */
1313#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1314 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr + (int32_t)pReNative->Core.offPc);
1315#else
1316 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr + (int32_t)pReNative->Core.offPc);
1317#endif
1318
1319 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
1320 if (enmEffOpSize == IEMMODE_16BIT)
1321 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
1322
1323 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
1324 if (!a_fFlat)
1325 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
1326
1327 /* Commit it. */
1328#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1329 off = iemNativeEmitPcDebugAdd(pReNative, off, offDisp + cbInstr, enmEffOpSize == IEMMODE_32BIT ? 32 : 16);
1330 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
1331#endif
1332
1333 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1334#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1335 pReNative->Core.offPc = 0;
1336#endif
1337
1338 /* Free but don't flush the PC register. */
1339 iemNativeRegFreeTmp(pReNative, idxPcReg);
1340
1341 return off;
1342}
1343
1344
1345#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr, a_rcNormal) \
1346 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
1347 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1348
1349#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr, a_rcNormal) \
1350 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
1351 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1352 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1353
1354#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr, a_rcNormal) \
1355 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
1356 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1357
1358#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
1359 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
1360 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1361 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1362
1363#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr, a_rcNormal) \
1364 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
1365 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, a_i32)
1366
1367#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
1368 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
1369 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1370 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, a_i32)
1371
1372/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
1373DECL_INLINE_THROW(uint32_t)
1374iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1375 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
1376{
1377 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1378 off = iemNativeRegFlushPendingWrites(pReNative, off);
1379
1380#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1381 Assert(pReNative->Core.offPc == 0);
1382 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1383#endif
1384
1385 /* Allocate a temporary PC register. */
1386 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
1387
1388 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
1389 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
1390 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
1391 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
1392#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1393 off = iemNativeEmitPcDebugAdd(pReNative, off, offDisp + cbInstr, 16);
1394 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
1395#endif
1396 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1397
1398 /* Free but don't flush the PC register. */
1399 iemNativeRegFreeTmp(pReNative, idxPcReg);
1400
1401 return off;
1402}
1403
1404
1405
1406/*********************************************************************************************************************************
1407* Emitters for changing PC/RIP/EIP/IP with a indirect jump (IEM_MC_SET_RIP_UXX_AND_FINISH). *
1408*********************************************************************************************************************************/
1409
1410/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets. */
1411#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP) \
1412 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1413
1414/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets. */
1415#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP) \
1416 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1417
1418/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code. */
1419#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP) \
1420 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1421
1422/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets that checks and
1423 * clears flags. */
1424#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP) \
1425 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP); \
1426 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1427
1428/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets that checks and
1429 * clears flags. */
1430#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP) \
1431 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP); \
1432 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1433
1434/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code that checks and
1435 * clears flags. */
1436#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16NewIP) \
1437 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP); \
1438 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1439
1440#undef IEM_MC_SET_RIP_U16_AND_FINISH
1441
1442
1443/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets. */
1444#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP) \
1445 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
1446
1447/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code. */
1448#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP) \
1449 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
1450
1451/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets that checks and
1452 * clears flags. */
1453#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP) \
1454 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP); \
1455 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1456
1457/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code that checks
1458 * and clears flags. */
1459#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u32NewEIP) \
1460 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP); \
1461 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1462
1463#undef IEM_MC_SET_RIP_U32_AND_FINISH
1464
1465
1466/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code. */
1467#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP) \
1468 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u64NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
1469
1470/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code that checks
1471 * and clears flags. */
1472#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewEIP) \
1473 IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP); \
1474 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1475
1476#undef IEM_MC_SET_RIP_U64_AND_FINISH
1477
1478
1479/** Same as iemRegRipJumpU16AndFinishNoFlags,
1480 * iemRegRipJumpU32AndFinishNoFlags and iemRegRipJumpU64AndFinishNoFlags. */
1481DECL_INLINE_THROW(uint32_t)
1482iemNativeEmitRipJumpNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarPc, bool f64Bit,
1483 uint8_t idxInstr, uint8_t cbVar)
1484{
1485 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
1486 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
1487
1488 /* If we can't rule out a #GP(0) below, flush all dirty register except for
1489 PC which will be handled specially by the two workers below if they raise a GP. */
1490 bool const fMayRaiseGp0 = (f64Bit && cbVar > sizeof(uint32_t)) || (!f64Bit && !IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
1491 uint8_t const idxOldPcReg = fMayRaiseGp0
1492 ? iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off, kIemNativeGstReg_Pc)
1493 : UINT8_MAX;
1494 if (fMayRaiseGp0)
1495 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc) /*fGstShwExcept*/);
1496
1497 /* Get a register with the new PC loaded from idxVarPc.
1498 Note! This ASSUMES that the high bits of the GPR is zeroed. */
1499 uint8_t const idxPcReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxVarPc, kIemNativeGstReg_Pc, &off);
1500
1501 /* Check that the target is within CS.LIM / is canonical (may #GP(0) + exit TB). */
1502 if (fMayRaiseGp0)
1503 {
1504 if (f64Bit)
1505 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0WithOldPc(pReNative, off, idxPcReg, idxOldPcReg, idxInstr);
1506 else
1507 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0WithOldPc(pReNative, off, idxPcReg, idxOldPcReg, idxInstr);
1508 }
1509
1510 /* Store the result. */
1511 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1512
1513#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1514 pReNative->Core.offPc = 0;
1515 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1516# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1517 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
1518 pReNative->Core.fDebugPcInitialized = true;
1519 Log4(("uPcUpdatingDebug=rip off=%#x\n", off));
1520# endif
1521#endif
1522
1523 if (idxOldPcReg != UINT8_MAX)
1524 iemNativeRegFreeTmp(pReNative, idxOldPcReg);
1525 iemNativeVarRegisterRelease(pReNative, idxVarPc);
1526 /** @todo implictly free the variable? */
1527
1528 return off;
1529}
1530
1531
1532
1533/*********************************************************************************************************************************
1534* Emitters for changing PC/RIP/EIP/IP with a relative call jump (IEM_MC_IND_CALL_UXX_AND_FINISH) (requires stack emmiters). *
1535*********************************************************************************************************************************/
1536
1537/** @todo These helpers belong to the stack push API naturally but we already need them up here (we could of course move
1538 * this below the stack emitters but then this is not close to the rest of the PC/RIP handling...). */
1539DECL_FORCE_INLINE_THROW(uint32_t)
1540iemNativeEmitStackPushUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
1541{
1542 /* Use16BitSp: */
1543#ifdef RT_ARCH_AMD64
1544 off = iemNativeEmitSubGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
1545 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
1546#else
1547 /* sub regeff, regrsp, #cbMem */
1548 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegEffSp, idxRegRsp, cbMem, false /*f64Bit*/);
1549 /* and regeff, regeff, #0xffff */
1550 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
1551 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegEffSp, idxRegEffSp, 15, 0, false /*f64Bit*/);
1552 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
1553 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegEffSp, 0, 16, false /*f64Bit*/);
1554#endif
1555 return off;
1556}
1557
1558
1559DECL_FORCE_INLINE(uint32_t)
1560iemNativeEmitStackPushUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
1561{
1562 /* Use32BitSp: */
1563 off = iemNativeEmitSubGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
1564 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
1565 return off;
1566}
1567
1568
1569DECL_INLINE_THROW(uint32_t)
1570iemNativeEmitStackPushRip(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const idxRegPc,
1571 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
1572{
1573 /*
1574 * Assert sanity.
1575 */
1576#ifdef VBOX_STRICT
1577 if (RT_BYTE2(cBitsVarAndFlat) != 0)
1578 {
1579 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
1580 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
1581 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
1582 Assert( pfnFunction
1583 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
1584 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
1585 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
1586 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
1587 : UINT64_C(0xc000b000a0009000) ));
1588 }
1589 else
1590 Assert( pfnFunction
1591 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
1592 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
1593 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
1594 : UINT64_C(0xc000b000a0009000) ));
1595#endif
1596
1597#ifdef VBOX_STRICT
1598 /*
1599 * Check that the fExec flags we've got make sense.
1600 */
1601 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
1602#endif
1603
1604 /*
1605 * To keep things simple we have to commit any pending writes first as we
1606 * may end up making calls.
1607 */
1608 /** @todo we could postpone this till we make the call and reload the
1609 * registers after returning from the call. Not sure if that's sensible or
1610 * not, though. */
1611 off = iemNativeRegFlushPendingWrites(pReNative, off);
1612
1613 /*
1614 * First we calculate the new RSP and the effective stack pointer value.
1615 * For 64-bit mode and flat 32-bit these two are the same.
1616 * (Code structure is very similar to that of PUSH)
1617 */
1618 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
1619 bool const fIsSegReg = RT_BYTE3(cBitsVarAndFlat) != 0;
1620 bool const fIsIntelSeg = fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
1621 uint8_t const cbMemAccess = !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
1622 ? cbMem : sizeof(uint16_t);
1623 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
1624 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
1625 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
1626 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
1627 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
1628 if (cBitsFlat != 0)
1629 {
1630 Assert(idxRegEffSp == idxRegRsp);
1631 Assert(cBitsFlat == 32 || cBitsFlat == 64);
1632 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
1633 if (cBitsFlat == 64)
1634 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
1635 else
1636 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
1637 }
1638 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
1639 {
1640 Assert(idxRegEffSp != idxRegRsp);
1641 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
1642 kIemNativeGstRegUse_ReadOnly);
1643#ifdef RT_ARCH_AMD64
1644 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1645#else
1646 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1647#endif
1648 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
1649 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
1650 offFixupJumpToUseOtherBitSp = off;
1651 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
1652 {
1653 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
1654 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1655 }
1656 else
1657 {
1658 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
1659 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1660 }
1661 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1662 }
1663 /* SpUpdateEnd: */
1664 uint32_t const offLabelSpUpdateEnd = off;
1665
1666 /*
1667 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
1668 * we're skipping lookup).
1669 */
1670 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
1671 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
1672 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
1673 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
1674 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
1675 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
1676 : UINT32_MAX;
1677 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
1678
1679
1680 if (!TlbState.fSkip)
1681 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
1682 else
1683 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
1684
1685 /*
1686 * Use16BitSp:
1687 */
1688 if (cBitsFlat == 0)
1689 {
1690#ifdef RT_ARCH_AMD64
1691 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1692#else
1693 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1694#endif
1695 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
1696 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
1697 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1698 else
1699 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1700 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
1701 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1702 }
1703
1704 /*
1705 * TlbMiss:
1706 *
1707 * Call helper to do the pushing.
1708 */
1709 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
1710
1711#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1712 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1713#else
1714 RT_NOREF(idxInstr);
1715#endif
1716
1717 /* Save variables in volatile registers. */
1718 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
1719 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
1720 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
1721 | (RT_BIT_32(idxRegPc));
1722 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
1723
1724 if ( idxRegPc == IEMNATIVE_CALL_ARG1_GREG
1725 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
1726 {
1727 /* Swap them using ARG0 as temp register: */
1728 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
1729 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
1730 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
1731 }
1732 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
1733 {
1734 /* IEMNATIVE_CALL_ARG2_GREG = idxRegPc (first!) */
1735 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxRegPc);
1736
1737 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
1738 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
1739 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
1740 }
1741 else
1742 {
1743 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
1744 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
1745
1746 /* IEMNATIVE_CALL_ARG2_GREG = idxRegPc */
1747 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxRegPc);
1748 }
1749
1750 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
1751 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
1752
1753 /* Done setting up parameters, make the call. */
1754 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
1755
1756 /* Restore variables and guest shadow registers to volatile registers. */
1757 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
1758 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
1759
1760#ifdef IEMNATIVE_WITH_TLB_LOOKUP
1761 if (!TlbState.fSkip)
1762 {
1763 /* end of TlbMiss - Jump to the done label. */
1764 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
1765 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
1766
1767 /*
1768 * TlbLookup:
1769 */
1770 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1,
1771 IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
1772
1773 /*
1774 * Emit code to do the actual storing / fetching.
1775 */
1776 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
1777# ifdef IEM_WITH_TLB_STATISTICS
1778 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
1779 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
1780# endif
1781 switch (cbMemAccess)
1782 {
1783 case 2:
1784 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1785 break;
1786 case 4:
1787 if (!fIsIntelSeg)
1788 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1789 else
1790 {
1791 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
1792 PUSH FS in real mode, so we have to try emulate that here.
1793 We borrow the now unused idxReg1 from the TLB lookup code here. */
1794 uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
1795 kIemNativeGstReg_EFlags);
1796 if (idxRegEfl != UINT8_MAX)
1797 {
1798#ifdef ARCH_AMD64
1799 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
1800 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
1801 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
1802#else
1803 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
1804 off, TlbState.idxReg1, idxRegEfl,
1805 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
1806#endif
1807 iemNativeRegFreeTmp(pReNative, idxRegEfl);
1808 }
1809 else
1810 {
1811 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
1812 RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
1813 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
1814 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
1815 }
1816 /* ASSUMES the upper half of idxRegPc is ZERO. */
1817 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegPc);
1818 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
1819 }
1820 break;
1821 case 8:
1822 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1823 break;
1824 default:
1825 AssertFailed();
1826 }
1827
1828 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
1829 TlbState.freeRegsAndReleaseVars(pReNative);
1830
1831 /*
1832 * TlbDone:
1833 *
1834 * Commit the new RSP value.
1835 */
1836 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
1837 }
1838#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
1839
1840#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
1841 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
1842#endif
1843 iemNativeRegFreeTmp(pReNative, idxRegRsp);
1844 if (idxRegEffSp != idxRegRsp)
1845 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
1846
1847 return off;
1848}
1849
1850
1851/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for pre-386 targets. */
1852#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16(a_u16NewIP, a_cbInstr) \
1853 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1854
1855/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for pre-386 targets that checks and
1856 * clears flags. */
1857#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP, a_cbInstr) \
1858 IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16(a_u16NewIP, a_cbInstr); \
1859 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1860
1861/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for 386+ targets. */
1862#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32(a_u16NewIP, a_cbInstr) \
1863 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1864
1865/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for 386+ targets that checks and
1866 * clears flags. */
1867#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP, a_cbInstr) \
1868 IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32(a_u16NewIP, a_cbInstr); \
1869 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1870
1871#undef IEM_MC_IND_CALL_U16_AND_FINISH
1872
1873
1874/** Variant of IEM_MC_IND_CALL_U32_AND_FINISH for 386+ targets. */
1875#define IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP, a_cbInstr) \
1876 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
1877
1878/** Variant of IEM_MC_IND_CALL_U32_AND_FINISH for 386+ targets that checks and
1879 * clears flags. */
1880#define IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP, a_cbInstr) \
1881 IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP, a_cbInstr); \
1882 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1883
1884#undef IEM_MC_IND_CALL_U32_AND_FINISH
1885
1886
1887/** Variant of IEM_MC_IND_CALL_U64_AND_FINISH with instruction length as
1888 * an extra parameter, for use in 64-bit code. */
1889#define IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64(a_u64NewIP, a_cbInstr) \
1890 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u64NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
1891
1892
1893/** Variant of IEM_MC_IND_CALL_U64_AND_FINISH with instruction length as
1894 * an extra parameter, for use in 64-bit code and we need to check and clear
1895 * flags. */
1896#define IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewIP, a_cbInstr) \
1897 IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64(a_u64NewIP, a_cbInstr); \
1898 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1899
1900#undef IEM_MC_IND_CALL_U64_AND_FINISH
1901
1902/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
1903 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
1904DECL_INLINE_THROW(uint32_t)
1905iemNativeEmitRipIndirectCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxVarPc, bool f64Bit,
1906 uint8_t idxInstr, uint8_t cbVar)
1907{
1908 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
1909 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
1910
1911 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1912 off = iemNativeRegFlushPendingWrites(pReNative, off);
1913
1914#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1915 Assert(pReNative->Core.offPc == 0);
1916 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1917#endif
1918
1919 /* Get a register with the new PC loaded from idxVarPc.
1920 Note! This ASSUMES that the high bits of the GPR is zeroed. */
1921 uint8_t const idxNewPcReg = iemNativeVarRegisterAcquire(pReNative, idxVarPc, &off);
1922
1923 /* Check limit (may #GP(0) + exit TB). */
1924 if (!f64Bit)
1925/** @todo we can skip this test in FLAT 32-bit mode. */
1926 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxNewPcReg, idxInstr);
1927 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
1928 else if (cbVar > sizeof(uint32_t))
1929 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxNewPcReg, idxInstr);
1930
1931#if 1
1932 /* Allocate a temporary PC register, we don't want it shadowed. */
1933 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1934 kIemNativeGstRegUse_Calculation, true /*fNoVolatileRegs*/);
1935#else
1936 /* Allocate a temporary PC register. */
1937 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate,
1938 true /*fNoVolatileRegs*/);
1939#endif
1940
1941 /* Perform the addition and push the variable to the guest stack. */
1942 /** @todo Flat variants for PC32 variants. */
1943 switch (cbVar)
1944 {
1945 case sizeof(uint16_t):
1946 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
1947 /* Truncate the result to 16-bit IP. */
1948 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
1949 off = iemNativeEmitStackPushRip(pReNative, off, idxPcReg, RT_MAKE_U32_FROM_U8(16, 0, 0, 0),
1950 (uintptr_t)iemNativeHlpStackStoreU16, idxInstr);
1951 break;
1952 case sizeof(uint32_t):
1953 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
1954 /** @todo In FLAT mode we can use the flat variant. */
1955 off = iemNativeEmitStackPushRip(pReNative, off, idxPcReg, RT_MAKE_U32_FROM_U8(32, 0, 0, 0),
1956 (uintptr_t)iemNativeHlpStackStoreU32, idxInstr);
1957 break;
1958 case sizeof(uint64_t):
1959 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
1960 off = iemNativeEmitStackPushRip(pReNative, off, idxPcReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0),
1961 (uintptr_t)iemNativeHlpStackFlatStoreU64, idxInstr);
1962 break;
1963 default:
1964 AssertFailed();
1965 }
1966
1967 /* RSP got changed, so do this again. */
1968 off = iemNativeRegFlushPendingWrites(pReNative, off);
1969
1970 /* Store the result. */
1971 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxNewPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1972#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1973 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxNewPcReg, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
1974 pReNative->Core.fDebugPcInitialized = true;
1975 Log4(("uPcUpdatingDebug=rip/indirect-call off=%#x\n", off));
1976#endif
1977
1978#if 1
1979 /* Need to transfer the shadow information to the new RIP register. */
1980 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxNewPcReg, kIemNativeGstReg_Pc, off);
1981#else
1982 /* Sync the new PC. */
1983 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxPcReg, idxNewPcReg);
1984#endif
1985 iemNativeVarRegisterRelease(pReNative, idxVarPc);
1986 iemNativeRegFreeTmp(pReNative, idxPcReg);
1987 /** @todo implictly free the variable? */
1988
1989 return off;
1990}
1991
1992
1993/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1994 * an extra parameter, for use in 16-bit code on a pre-386 CPU. */
1995#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr) \
1996 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
1997
1998/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1999 * an extra parameter, for use in 16-bit code on a pre-386 CPU and we need to check and clear
2000 * flags. */
2001#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr) \
2002 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr); \
2003 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2004
2005/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
2006 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
2007#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr) \
2008 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
2009
2010/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
2011 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
2012 * flags. */
2013#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr) \
2014 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr); \
2015 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2016
2017/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
2018 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
2019#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr) \
2020 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
2021
2022/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
2023 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
2024 * flags. */
2025#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr) \
2026 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr); \
2027 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2028
2029#undef IEM_MC_REL_CALL_S16_AND_FINISH
2030
2031/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
2032 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
2033DECL_INLINE_THROW(uint32_t)
2034iemNativeEmitRipRelativeCallS16NoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int16_t offDisp,
2035 uint8_t idxInstr)
2036{
2037 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
2038 off = iemNativeRegFlushPendingWrites(pReNative, off);
2039
2040#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2041 Assert(pReNative->Core.offPc == 0);
2042 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
2043#endif
2044
2045 /* Allocate a temporary PC register. */
2046 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
2047 kIemNativeGstRegUse_Calculation, true /*fNoVolatileRegs*/);
2048 uint8_t const idxPcRegNew = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
2049
2050 /* Calculate the new RIP. */
2051 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcRegOld, cbInstr);
2052 /* Truncate the result to 16-bit IP. */
2053 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcRegOld);
2054 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxPcRegNew, idxPcRegOld);
2055 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcRegNew, offDisp);
2056
2057 /* Truncate the result to 16-bit IP. */
2058 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcRegNew);
2059
2060 /* Check limit (may #GP(0) + exit TB). */
2061 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
2062
2063 /* Perform the addition and push the variable to the guest stack. */
2064 off = iemNativeEmitStackPushRip(pReNative, off, idxPcRegOld, RT_MAKE_U32_FROM_U8(16, 0, 0, 0),
2065 (uintptr_t)iemNativeHlpStackStoreU16, idxInstr);
2066
2067 /* RSP got changed, so flush again. */
2068 off = iemNativeRegFlushPendingWrites(pReNative, off);
2069
2070 /* Store the result. */
2071 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
2072#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
2073 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
2074 pReNative->Core.fDebugPcInitialized = true;
2075 Log4(("uPcUpdatingDebug=rip/rel-call-16 off=%#x offDisp=%d\n", off, offDisp));
2076#endif
2077
2078 /* Need to transfer the shadow information to the new RIP register. */
2079 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
2080 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
2081 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
2082
2083 return off;
2084}
2085
2086
2087/** Variant of IEM_MC_REL_CALL_S32_AND_FINISH with instruction length as
2088 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
2089#define IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr) \
2090 off = iemNativeEmitEip32RelativeCallNoFlags(pReNative, off, a_cbInstr, (a_i32), pCallEntry->idxInstr)
2091
2092/** Variant of IEM_MC_REL_CALL_S32_AND_FINISH with instruction length as
2093 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
2094 * flags. */
2095#define IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr) \
2096 IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr); \
2097 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2098
2099#undef IEM_MC_REL_CALL_S32_AND_FINISH
2100
2101/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
2102 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
2103DECL_INLINE_THROW(uint32_t)
2104iemNativeEmitEip32RelativeCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int32_t offDisp,
2105 uint8_t idxInstr)
2106{
2107 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
2108 off = iemNativeRegFlushPendingWrites(pReNative, off);
2109
2110#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2111 Assert(pReNative->Core.offPc == 0);
2112 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
2113#endif
2114
2115 /* Allocate a temporary PC register. */
2116 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
2117 kIemNativeGstRegUse_ReadOnly, true /*fNoVolatileRegs*/);
2118 uint8_t const idxPcRegNew = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
2119
2120 /* Update the EIP to get the return address. */
2121 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcRegOld, cbInstr);
2122
2123 /* Load address, add the displacement and check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
2124 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxPcRegNew, idxPcRegOld);
2125 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcRegNew, offDisp);
2126 /** @todo we can skip this test in FLAT 32-bit mode. */
2127 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
2128
2129 /* Perform Perform the return address to the guest stack. */
2130 /** @todo Can avoid the stack limit checks in FLAT 32-bit mode. */
2131 off = iemNativeEmitStackPushRip(pReNative, off, idxPcRegOld, RT_MAKE_U32_FROM_U8(32, 0, 0, 0),
2132 (uintptr_t)iemNativeHlpStackStoreU32, idxInstr);
2133
2134 /* RSP got changed, so do this again. */
2135 off = iemNativeRegFlushPendingWrites(pReNative, off);
2136
2137 /* Store the result. */
2138 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
2139#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
2140 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
2141 pReNative->Core.fDebugPcInitialized = true;
2142 Log4(("uPcUpdatingDebug=eip/rel-call-32 off=%#x offDisp=%d\n", off, offDisp));
2143#endif
2144
2145 /* Need to transfer the shadow information to the new RIP register. */
2146 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
2147 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
2148 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
2149
2150 return off;
2151}
2152
2153
2154/** Variant of IEM_MC_REL_CALL_S64_AND_FINISH with instruction length as
2155 * an extra parameter, for use in 64-bit code. */
2156#define IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64(a_i64, a_cbInstr) \
2157 off = iemNativeEmitRip64RelativeCallNoFlags(pReNative, off, a_cbInstr, (a_i64), pCallEntry->idxInstr)
2158
2159/** Variant of IEM_MC_REL_CALL_S64_AND_FINISH with instruction length as
2160 * an extra parameter, for use in 64-bit code and we need to check and clear
2161 * flags. */
2162#define IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i64, a_cbInstr) \
2163 IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64(a_i64, a_cbInstr); \
2164 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2165
2166#undef IEM_MC_REL_CALL_S64_AND_FINISH
2167
2168/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
2169 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
2170DECL_INLINE_THROW(uint32_t)
2171iemNativeEmitRip64RelativeCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int64_t offDisp,
2172 uint8_t idxInstr)
2173{
2174 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
2175 off = iemNativeRegFlushPendingWrites(pReNative, off);
2176
2177#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2178 Assert(pReNative->Core.offPc == 0);
2179 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
2180#endif
2181
2182 /* Allocate a temporary PC register. */
2183 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
2184 kIemNativeGstRegUse_ReadOnly, true /*fNoVolatileRegs*/);
2185 uint8_t const idxPcRegNew = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
2186
2187 /* Update the RIP to get the return address. */
2188 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcRegOld, cbInstr);
2189
2190 /* Load address, add the displacement and check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
2191 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxPcRegNew, idxPcRegOld);
2192 off = iemNativeEmitAddGprImm(pReNative, off, idxPcRegNew, offDisp);
2193 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
2194
2195 /* Perform Perform the return address to the guest stack. */
2196 off = iemNativeEmitStackPushRip(pReNative, off, idxPcRegOld, RT_MAKE_U32_FROM_U8(64, 64, 0, 0),
2197 (uintptr_t)iemNativeHlpStackFlatStoreU64, idxInstr);
2198
2199 /* RSP got changed, so do this again. */
2200 off = iemNativeRegFlushPendingWrites(pReNative, off);
2201
2202 /* Store the result. */
2203 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
2204#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
2205 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
2206 pReNative->Core.fDebugPcInitialized = true;
2207 Log4(("uPcUpdatingDebug=rip/rel-call-64 off=%#x offDisp=%ld\n", off, offDisp));
2208#endif
2209
2210 /* Need to transfer the shadow information to the new RIP register. */
2211 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
2212 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
2213 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
2214
2215 return off;
2216}
2217
2218
2219/*********************************************************************************************************************************
2220* Emitters for changing PC/RIP/EIP/IP with a RETN (Iw) instruction (IEM_MC_RETN_AND_FINISH) (requires stack emmiters). *
2221*********************************************************************************************************************************/
2222
2223DECL_FORCE_INLINE_THROW(uint32_t)
2224iemNativeEmitStackPopForRetnUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
2225 uint16_t cbPopAdd, uint8_t idxRegTmp)
2226{
2227 /* Use16BitSp: */
2228#ifdef RT_ARCH_AMD64
2229 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
2230 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
2231 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbPopAdd); /* ASSUMES this does NOT modify bits [63:16]! */
2232 RT_NOREF(idxRegTmp);
2233
2234#elif defined(RT_ARCH_ARM64)
2235 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
2236 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
2237 /* add tmp, regrsp, #cbMem */
2238 uint16_t const cbCombined = cbMem + cbPopAdd;
2239 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbCombined & (RT_BIT_32(12) - 1U), false /*f64Bit*/);
2240 if (cbCombined >= RT_BIT_32(12))
2241 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegTmp, cbCombined >> 12,
2242 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift12*/);
2243 /* and tmp, tmp, #0xffff */
2244 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
2245 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
2246 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
2247 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
2248
2249#else
2250# error "Port me"
2251#endif
2252 return off;
2253}
2254
2255
2256DECL_FORCE_INLINE_THROW(uint32_t)
2257iemNativeEmitStackPopForRetnUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
2258 uint16_t cbPopAdd)
2259{
2260 /* Use32BitSp: */
2261 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
2262 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem + cbPopAdd);
2263 return off;
2264}
2265
2266
2267/** Variant of IEM_MC_RETN_AND_FINISH for pre-386 targets. */
2268#define IEM_MC_RETN_AND_FINISH_THREADED_PC16(a_u16Pop, a_cbInstr) \
2269 off = iemNativeEmitRetn(pReNative, off, (a_cbInstr), (a_u16Pop), false /*f64Bit*/, IEMMODE_16BIT, pCallEntry->idxInstr)
2270
2271/** Variant of IEM_MC_RETN_AND_FINISH for 386+ targets. */
2272#define IEM_MC_RETN_AND_FINISH_THREADED_PC32(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
2273 off = iemNativeEmitRetn(pReNative, off, (a_cbInstr), (a_u16Pop), false /*f64Bit*/, (a_enmEffOpSize), pCallEntry->idxInstr)
2274
2275/** Variant of IEM_MC_RETN_AND_FINISH for use in 64-bit code. */
2276#define IEM_MC_RETN_AND_FINISH_THREADED_PC64(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
2277 off = iemNativeEmitRetn(pReNative, off, (a_cbInstr), (a_u16Pop), true /*f64Bit*/, (a_enmEffOpSize), pCallEntry->idxInstr)
2278
2279/** Variant of IEM_MC_RETN_AND_FINISH for pre-386 targets that checks and
2280 * clears flags. */
2281#define IEM_MC_RETN_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16Pop, a_cbInstr) \
2282 IEM_MC_RETN_AND_FINISH_THREADED_PC16(a_u16Pop, a_cbInstr); \
2283 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2284
2285/** Variant of IEM_MC_RETN_AND_FINISH for 386+ targets that checks and
2286 * clears flags. */
2287#define IEM_MC_RETN_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
2288 IEM_MC_RETN_AND_FINISH_THREADED_PC32(a_u16Pop, a_cbInstr, a_enmEffOpSize); \
2289 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2290
2291/** Variant of IEM_MC_RETN_AND_FINISH for use in 64-bit code that checks and
2292 * clears flags. */
2293#define IEM_MC_RETN_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16Pop, a_cbInstr, a_enmEffOpSize) \
2294 IEM_MC_RETN_AND_FINISH_THREADED_PC64(a_u16Pop, a_cbInstr, a_enmEffOpSize); \
2295 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2296
2297/** IEM_MC[|_FLAT32|_FLAT64]_RETN_AND_FINISH */
2298DECL_INLINE_THROW(uint32_t)
2299iemNativeEmitRetn(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint16_t cbPop, bool f64Bit,
2300 IEMMODE enmEffOpSize, uint8_t idxInstr)
2301{
2302 RT_NOREF(cbInstr);
2303
2304#ifdef VBOX_STRICT
2305 /*
2306 * Check that the fExec flags we've got make sense.
2307 */
2308 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
2309#endif
2310
2311 /*
2312 * To keep things simple we have to commit any pending writes first as we
2313 * may end up making calls.
2314 */
2315 off = iemNativeRegFlushPendingWrites(pReNative, off);
2316
2317 /*
2318 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
2319 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
2320 * directly as the effective stack pointer.
2321 * (Code structure is very similar to that of PUSH)
2322 *
2323 * Note! As a simplification, we treat opsize overridden returns (o16 ret)
2324 * in FLAT 32-bit mode as if we weren't in FLAT mode since these
2325 * aren't commonly used (or useful) and thus not in need of optimizing.
2326 *
2327 * Note! For non flat modes the guest RSP is not allocated for update but rather for calculation
2328 * as the shadowed register would remain modified even if the return address throws a \#GP(0)
2329 * due to being outside the CS limit causing a wrong stack pointer value in the guest (see
2330 * the near return testcase in bs3-cpu-basic-2). If no exception is thrown the shadowing is transfered
2331 * to the new register returned by iemNativeRegAllocTmpForGuestReg() at the end.
2332 */
2333 uint8_t const cbMem = enmEffOpSize == IEMMODE_64BIT
2334 ? sizeof(uint64_t)
2335 : enmEffOpSize == IEMMODE_32BIT
2336 ? sizeof(uint32_t)
2337 : sizeof(uint16_t);
2338 bool const fFlat = IEM_F_MODE_X86_IS_FLAT(pReNative->fExec) && enmEffOpSize != IEMMODE_16BIT; /* see note */
2339 uintptr_t const pfnFunction = fFlat
2340 ? enmEffOpSize == IEMMODE_64BIT
2341 ? (uintptr_t)iemNativeHlpStackFlatFetchU64
2342 : (uintptr_t)iemNativeHlpStackFlatFetchU32
2343 : enmEffOpSize == IEMMODE_32BIT
2344 ? (uintptr_t)iemNativeHlpStackFetchU32
2345 : (uintptr_t)iemNativeHlpStackFetchU16;
2346 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
2347 fFlat ? kIemNativeGstRegUse_ForUpdate
2348 : kIemNativeGstRegUse_Calculation,
2349 true /*fNoVolatileRegs*/);
2350 uint8_t const idxRegEffSp = fFlat ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
2351 /** @todo can do a better job picking the register here. For cbMem >= 4 this
2352 * will be the resulting register value. */
2353 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
2354
2355 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
2356 if (fFlat)
2357 Assert(idxRegEffSp == idxRegRsp);
2358 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
2359 {
2360 Assert(idxRegEffSp != idxRegRsp);
2361 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
2362 kIemNativeGstRegUse_ReadOnly);
2363#ifdef RT_ARCH_AMD64
2364 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
2365#else
2366 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
2367#endif
2368 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
2369 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
2370 offFixupJumpToUseOtherBitSp = off;
2371 if (enmEffOpSize == IEMMODE_32BIT)
2372 {
2373 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
2374 off = iemNativeEmitStackPopForRetnUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop);
2375 }
2376 else
2377 {
2378 Assert(enmEffOpSize == IEMMODE_16BIT);
2379 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
2380 off = iemNativeEmitStackPopForRetnUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop,
2381 idxRegMemResult);
2382 }
2383 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2384 }
2385 /* SpUpdateEnd: */
2386 uint32_t const offLabelSpUpdateEnd = off;
2387
2388 /*
2389 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
2390 * we're skipping lookup).
2391 */
2392 uint8_t const iSegReg = fFlat ? UINT8_MAX : X86_SREG_SS;
2393 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
2394 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
2395 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
2396 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
2397 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
2398 : UINT32_MAX;
2399
2400 if (!TlbState.fSkip)
2401 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
2402 else
2403 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
2404
2405 /*
2406 * Use16BitSp:
2407 */
2408 if (!fFlat)
2409 {
2410#ifdef RT_ARCH_AMD64
2411 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
2412#else
2413 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
2414#endif
2415 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
2416 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
2417 off = iemNativeEmitStackPopForRetnUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop,
2418 idxRegMemResult);
2419 else
2420 off = iemNativeEmitStackPopForRetnUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPop);
2421 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
2422 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2423 }
2424
2425 /*
2426 * TlbMiss:
2427 *
2428 * Call helper to do the pushing.
2429 */
2430 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
2431
2432#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2433 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2434#else
2435 RT_NOREF(idxInstr);
2436#endif
2437
2438 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
2439 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
2440 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
2441 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
2442
2443
2444 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
2445 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
2446 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
2447
2448 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
2449 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
2450
2451 /* Done setting up parameters, make the call. */
2452 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
2453
2454 /* Move the return register content to idxRegMemResult. */
2455 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
2456 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
2457
2458 /* Restore variables and guest shadow registers to volatile registers. */
2459 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
2460 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
2461
2462#ifdef IEMNATIVE_WITH_TLB_LOOKUP
2463 if (!TlbState.fSkip)
2464 {
2465 /* end of TlbMiss - Jump to the done label. */
2466 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
2467 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
2468
2469 /*
2470 * TlbLookup:
2471 */
2472 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
2473 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
2474
2475 /*
2476 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
2477 */
2478 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
2479# ifdef IEM_WITH_TLB_STATISTICS
2480 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
2481 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
2482# endif
2483 switch (cbMem)
2484 {
2485 case 2:
2486 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
2487 break;
2488 case 4:
2489 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
2490 break;
2491 case 8:
2492 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
2493 break;
2494 default:
2495 AssertFailed();
2496 }
2497
2498 TlbState.freeRegsAndReleaseVars(pReNative);
2499
2500 /*
2501 * TlbDone:
2502 *
2503 * Set the new RSP value (FLAT accesses needs to calculate it first) and
2504 * commit the popped register value.
2505 */
2506 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
2507 }
2508#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
2509
2510 /* Check limit before committing RIP and RSP (may #GP(0) + exit TB). */
2511 if (!f64Bit)
2512/** @todo we can skip this test in FLAT 32-bit mode. */
2513 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxRegMemResult, idxInstr);
2514 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
2515 else if (enmEffOpSize == IEMMODE_64BIT)
2516 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxRegMemResult, idxInstr);
2517
2518 /* Complete RSP calculation for FLAT mode. */
2519 if (idxRegEffSp == idxRegRsp)
2520 {
2521 if (enmEffOpSize == IEMMODE_64BIT)
2522 off = iemNativeEmitAddGprImm(pReNative, off, idxRegRsp, sizeof(uint64_t) + cbPop);
2523 else
2524 {
2525 Assert(enmEffOpSize == IEMMODE_32BIT);
2526 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxRegRsp, sizeof(uint32_t) + cbPop);
2527 }
2528 }
2529
2530 /* Commit the result and clear any current guest shadows for RIP. */
2531 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
2532 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
2533 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, kIemNativeGstReg_Pc, off);
2534#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
2535 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
2536 pReNative->Core.fDebugPcInitialized = true;
2537 Log4(("uPcUpdatingDebug=rip/ret off=%#x\n", off));
2538#endif
2539
2540 /* Need to transfer the shadowing information to the host register containing the updated value now. */
2541 if (!fFlat)
2542 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegRsp, IEMNATIVEGSTREG_GPR(X86_GREG_xSP), off);
2543
2544 iemNativeRegFreeTmp(pReNative, idxRegRsp);
2545 if (idxRegEffSp != idxRegRsp)
2546 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
2547 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
2548 return off;
2549}
2550
2551
2552/*********************************************************************************************************************************
2553* Emitters for raising exceptions (IEM_MC_MAYBE_RAISE_XXX) *
2554*********************************************************************************************************************************/
2555
2556#define IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE() \
2557 off = iemNativeEmitMaybeRaiseDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
2558
2559/**
2560 * Emits code to check if a \#NM exception should be raised.
2561 *
2562 * @returns New code buffer offset, UINT32_MAX on failure.
2563 * @param pReNative The native recompile state.
2564 * @param off The code buffer offset.
2565 * @param idxInstr The current instruction.
2566 */
2567DECL_INLINE_THROW(uint32_t)
2568iemNativeEmitMaybeRaiseDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2569{
2570#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2571 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeDeviceNotAvailXcptCheckPotential);
2572
2573 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE))
2574 {
2575#endif
2576 /*
2577 * Make sure we don't have any outstanding guest register writes as we may
2578 * raise an #NM and all guest register must be up to date in CPUMCTX.
2579 */
2580 /** @todo r=aeichner Can we postpone this to the RaiseNm path? */
2581 off = iemNativeRegFlushPendingWrites(pReNative, off);
2582
2583#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2584 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2585#else
2586 RT_NOREF(idxInstr);
2587#endif
2588
2589 /* Allocate a temporary CR0 register. */
2590 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0,
2591 kIemNativeGstRegUse_ReadOnly);
2592
2593 /*
2594 * if (cr0 & (X86_CR0_EM | X86_CR0_TS) != 0)
2595 * return raisexcpt();
2596 */
2597 /* Test and jump. */
2598 off = iemNativeEmitTestAnyBitsInGprAndTbExitIfAnySet(pReNative, off, idxCr0Reg, X86_CR0_EM | X86_CR0_TS,
2599 kIemNativeLabelType_RaiseNm);
2600
2601 /* Free but don't flush the CR0 register. */
2602 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2603
2604#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2605 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE;
2606 }
2607 else
2608 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeDeviceNotAvailXcptCheckOmitted);
2609#endif
2610
2611 return off;
2612}
2613
2614
2615#define IEM_MC_MAYBE_RAISE_WAIT_DEVICE_NOT_AVAILABLE() \
2616 off = iemNativeEmitMaybeRaiseWaitDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
2617
2618/**
2619 * Emits code to check if a \#NM exception should be raised.
2620 *
2621 * @returns New code buffer offset, UINT32_MAX on failure.
2622 * @param pReNative The native recompile state.
2623 * @param off The code buffer offset.
2624 * @param idxInstr The current instruction.
2625 */
2626DECL_INLINE_THROW(uint32_t)
2627iemNativeEmitMaybeRaiseWaitDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2628{
2629#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2630 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeWaitDeviceNotAvailXcptCheckPotential);
2631
2632 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_WAIT_DEVICE_NOT_AVAILABLE))
2633 {
2634#endif
2635 /*
2636 * Make sure we don't have any outstanding guest register writes as we may
2637 * raise an #NM and all guest register must be up to date in CPUMCTX.
2638 */
2639 /** @todo r=aeichner Can we postpone this to the RaiseNm path? */
2640 off = iemNativeRegFlushPendingWrites(pReNative, off);
2641
2642#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2643 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2644#else
2645 RT_NOREF(idxInstr);
2646#endif
2647
2648 /* Allocate a temporary CR0 register. */
2649 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0,
2650 kIemNativeGstRegUse_Calculation);
2651
2652 /*
2653 * if (cr0 & (X86_CR0_MP | X86_CR0_TS) == (X86_CR0_MP | X86_CR0_TS))
2654 * return raisexcpt();
2655 */
2656 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxCr0Reg, X86_CR0_MP | X86_CR0_TS);
2657 /* Test and jump. */
2658 off = iemNativeEmitTestIfGpr32EqualsImmAndTbExit(pReNative, off, idxCr0Reg, X86_CR0_MP | X86_CR0_TS,
2659 kIemNativeLabelType_RaiseNm);
2660
2661 /* Free the CR0 register. */
2662 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2663
2664#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2665 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_WAIT_DEVICE_NOT_AVAILABLE;
2666 }
2667 else
2668 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeWaitDeviceNotAvailXcptCheckOmitted);
2669#endif
2670
2671 return off;
2672}
2673
2674
2675#define IEM_MC_MAYBE_RAISE_FPU_XCPT() \
2676 off = iemNativeEmitMaybeRaiseFpuException(pReNative, off, pCallEntry->idxInstr)
2677
2678/**
2679 * Emits code to check if a \#MF exception should be raised.
2680 *
2681 * @returns New code buffer offset, UINT32_MAX on failure.
2682 * @param pReNative The native recompile state.
2683 * @param off The code buffer offset.
2684 * @param idxInstr The current instruction.
2685 */
2686DECL_INLINE_THROW(uint32_t)
2687iemNativeEmitMaybeRaiseFpuException(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2688{
2689 /*
2690 * Make sure we don't have any outstanding guest register writes as we may
2691 * raise an #MF and all guest register must be up to date in CPUMCTX.
2692 */
2693 /** @todo r=aeichner Can we postpone this to the RaiseMf path? */
2694 off = iemNativeRegFlushPendingWrites(pReNative, off);
2695
2696#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2697 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2698#else
2699 RT_NOREF(idxInstr);
2700#endif
2701
2702 /* Allocate a temporary FSW register. */
2703 uint8_t const idxFpuFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw,
2704 kIemNativeGstRegUse_ReadOnly);
2705
2706 /*
2707 * if (FSW & X86_FSW_ES != 0)
2708 * return raisexcpt();
2709 */
2710 /* Test and jump. */
2711 off = iemNativeEmitTestBitInGprAndTbExitIfSet(pReNative, off, idxFpuFswReg, X86_FSW_ES_BIT, kIemNativeLabelType_RaiseMf);
2712
2713 /* Free but don't flush the FSW register. */
2714 iemNativeRegFreeTmp(pReNative, idxFpuFswReg);
2715
2716 return off;
2717}
2718
2719
2720#define IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT() \
2721 off = iemNativeEmitMaybeRaiseSseRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
2722
2723/**
2724 * Emits code to check if a SSE exception (either \#UD or \#NM) should be raised.
2725 *
2726 * @returns New code buffer offset, UINT32_MAX on failure.
2727 * @param pReNative The native recompile state.
2728 * @param off The code buffer offset.
2729 * @param idxInstr The current instruction.
2730 */
2731DECL_INLINE_THROW(uint32_t)
2732iemNativeEmitMaybeRaiseSseRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2733{
2734#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2735 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeSseXcptCheckPotential);
2736
2737 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE))
2738 {
2739#endif
2740 /*
2741 * Make sure we don't have any outstanding guest register writes as we may
2742 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
2743 */
2744 /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
2745 off = iemNativeRegFlushPendingWrites(pReNative, off);
2746
2747#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2748 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2749#else
2750 RT_NOREF(idxInstr);
2751#endif
2752
2753 /* Allocate a temporary CR0 and CR4 register. */
2754 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
2755 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
2756 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2757
2758 AssertCompile(!((X86_CR0_EM | X86_CR0_TS) & X86_CR4_OSFXSR));
2759#ifdef RT_ARCH_AMD64
2760 /*
2761 * We do a modified test here:
2762 * if (!(((cr4 & X86_CR4_OSFXSR) | cr0) ^ X86_CR4_OSFXSR)) { likely }
2763 * else { goto RaiseSseRelated; }
2764 * This ASSUMES that CR0[bit 9] is always zero. This is the case on
2765 * all targets except the 386, which doesn't support SSE, this should
2766 * be a safe assumption.
2767 */
2768 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6+3+3+7+7+6);
2769 //pCodeBuf[off++] = 0xcc;
2770 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, X86_CR4_OSFXSR); /* Isolate CR4.OSFXSR as CR4.TSD and */
2771 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxTmpReg, idxCr4Reg); /* CR4.DE would overlap the CR0 bits. */
2772 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, idxTmpReg, idxCr0Reg);
2773 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_CR0_EM | X86_CR0_TS | X86_CR4_OSFXSR);
2774 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_CR4_OSFXSR);
2775 off = iemNativeEmitJccTbExitEx(pReNative, pCodeBuf, off, kIemNativeLabelType_RaiseSseRelated, kIemNativeInstrCond_ne);
2776
2777#elif defined(RT_ARCH_ARM64)
2778 /*
2779 * We do a modified test here:
2780 * if (!((cr0 & (X86_CR0_EM | X86_CR0_TS)) | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1) ^ 1))) { likely }
2781 * else { goto RaiseSseRelated; }
2782 */
2783 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+5);
2784 //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
2785 Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - X86_CR0_EM_BIT) == (X86_CR0_EM | X86_CR0_TS));
2786 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxCr0Reg, 1, 32 - X86_CR0_EM_BIT, false /*f64Bit*/);
2787 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSFXSR_BIT, 1, false /*f64Bit*/);
2788 /* -> idxTmpReg[0]=OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
2789 Assert(Armv8A64ConvertImmRImmS2Mask32(0, 0) == 1);
2790 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 0, 0, false /*f64Bit*/);
2791 /* -> idxTmpReg[0]=~OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
2792 off = iemNativeEmitTestIfGprIsNotZeroAndTbExitEx(pReNative, pCodeBuf, off, idxTmpReg, false /*f64Bit*/,
2793 kIemNativeLabelType_RaiseSseRelated);
2794
2795#else
2796# error "Port me!"
2797#endif
2798
2799 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2800 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2801 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2802 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
2803
2804#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2805 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE;
2806 }
2807 else
2808 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeSseXcptCheckOmitted);
2809#endif
2810
2811 return off;
2812}
2813
2814
2815#define IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT() \
2816 off = iemNativeEmitMaybeRaiseAvxRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
2817
2818/**
2819 * Emits code to check if a AVX exception (either \#UD or \#NM) should be raised.
2820 *
2821 * @returns New code buffer offset, UINT32_MAX on failure.
2822 * @param pReNative The native recompile state.
2823 * @param off The code buffer offset.
2824 * @param idxInstr The current instruction.
2825 */
2826DECL_INLINE_THROW(uint32_t)
2827iemNativeEmitMaybeRaiseAvxRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2828{
2829#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2830 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeAvxXcptCheckPotential);
2831
2832 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX))
2833 {
2834#endif
2835 /*
2836 * Make sure we don't have any outstanding guest register writes as we may
2837 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
2838 */
2839 /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
2840 off = iemNativeRegFlushPendingWrites(pReNative, off);
2841
2842#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2843 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2844#else
2845 RT_NOREF(idxInstr);
2846#endif
2847
2848 /* Allocate a temporary CR0, CR4 and XCR0 register. */
2849 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
2850 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
2851 uint8_t const idxXcr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Xcr0);
2852 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2853
2854 /*
2855 * We have the following in IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT:
2856 * if (RT_LIKELY( ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE))
2857 * | (pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE)
2858 * | (pVCpu->cpum.GstCtx.cr0 & X86_CR0_TS))
2859 * == (XSAVE_C_YMM | XSAVE_C_SSE | X86_CR4_OSXSAVE)))
2860 * { likely }
2861 * else { goto RaiseAvxRelated; }
2862 */
2863#ifdef RT_ARCH_AMD64
2864 /* if (!( ( ((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) << 2)
2865 | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1) << 1)
2866 | ((cr0 >> X86_CR0_TS_BIT) & 1) )
2867 ^ 0x1a) ) { likely }
2868 else { goto RaiseAvxRelated; } */
2869 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6+3+5+3+5+3+7+6);
2870 //pCodeBuf[off++] = 0xcc;
2871 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, XSAVE_C_YMM | XSAVE_C_SSE);
2872 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxTmpReg, idxXcr0Reg);
2873 off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off, idxCr4Reg, X86_CR4_OSXSAVE_BIT);
2874 off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
2875 /* -> idxTmpReg[0]=CR4.OSXSAVE; idxTmpReg[1]=0; idxTmpReg[2]=SSE; idxTmpReg[3]=YMM; (the rest is zero) */
2876 off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off, idxCr0Reg, X86_CR0_TS_BIT);
2877 off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
2878 /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=SSE; idxTmpReg[4]=YMM; */
2879 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg, ((XSAVE_C_YMM | XSAVE_C_SSE) << 2) | 2);
2880 /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=~SSE; idxTmpReg[4]=~YMM; */
2881 off = iemNativeEmitJccTbExitEx(pReNative, pCodeBuf, off, kIemNativeLabelType_RaiseAvxRelated, kIemNativeInstrCond_ne);
2882
2883#elif defined(RT_ARCH_ARM64)
2884 /* if (!( (((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) | ((cr4 >> X86_CR4_OSFXSR_BIT) & 1)) ^ 7) << 1)
2885 | ((cr0 >> X86_CR0_TS_BIT) & 1) ) { likely }
2886 else { goto RaiseAvxRelated; } */
2887 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6);
2888 //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
2889 Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - XSAVE_C_SSE_BIT) == (XSAVE_C_YMM | XSAVE_C_SSE));
2890 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxXcr0Reg, 1, 32 - XSAVE_C_SSE_BIT, false /*f64Bit*/);
2891 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSXSAVE_BIT, 1, false /*f64Bit*/);
2892 /* -> idxTmpReg[0]=CR4.OSXSAVE; idxTmpReg[1]=SSE; idxTmpReg[2]=YMM; (the rest is zero) */
2893 Assert(Armv8A64ConvertImmRImmS2Mask32(2, 0) == 7);
2894 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 2, 0, false /*f64Bit*/);
2895 /* -> idxTmpReg[0]=~CR4.OSXSAVE; idxTmpReg[1]=~SSE; idxTmpReg[2]=~YMM; (the rest is zero) */
2896 pCodeBuf[off++] = Armv8A64MkInstrLslImm(idxTmpReg, idxTmpReg, 1, false /*f64Bit*/);
2897 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr0Reg, X86_CR0_TS_BIT, 1, false /*f64Bit*/);
2898 /* -> idxTmpReg[0]=CR0.TS; idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=~SSE; idxTmpReg[3]=~YMM; (the rest is zero) */
2899 off = iemNativeEmitTestIfGprIsNotZeroAndTbExitEx(pReNative, pCodeBuf, off, idxTmpReg, false /*f64Bit*/,
2900 kIemNativeLabelType_RaiseAvxRelated);
2901
2902#else
2903# error "Port me!"
2904#endif
2905
2906 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2907 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2908 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
2909 iemNativeRegFreeTmp(pReNative, idxXcr0Reg);
2910#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2911 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
2912 }
2913 else
2914 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeAvxXcptCheckOmitted);
2915#endif
2916
2917 return off;
2918}
2919
2920
2921#define IEM_MC_RAISE_DIVIDE_ERROR() \
2922 off = iemNativeEmitRaiseDivideError(pReNative, off, pCallEntry->idxInstr)
2923
2924/**
2925 * Emits code to raise a \#DE.
2926 *
2927 * @returns New code buffer offset, UINT32_MAX on failure.
2928 * @param pReNative The native recompile state.
2929 * @param off The code buffer offset.
2930 * @param idxInstr The current instruction.
2931 */
2932DECL_INLINE_THROW(uint32_t)
2933iemNativeEmitRaiseDivideError(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2934{
2935 /*
2936 * Make sure we don't have any outstanding guest register writes as we may
2937 */
2938 off = iemNativeRegFlushPendingWrites(pReNative, off);
2939
2940#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2941 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2942#else
2943 RT_NOREF(idxInstr);
2944#endif
2945
2946 /* raise \#DE exception unconditionally. */
2947 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_RaiseDe);
2948}
2949
2950
2951#define IEM_MC_RAISE_GP0_IF_EFF_ADDR_UNALIGNED(a_EffAddr, a_cbAlign) \
2952 off = iemNativeEmitRaiseGp0IfEffAddrUnaligned(pReNative, off, pCallEntry->idxInstr, a_EffAddr, a_cbAlign)
2953
2954/**
2955 * Emits code to raise a \#GP(0) if the given variable contains an unaligned address.
2956 *
2957 * @returns New code buffer offset, UINT32_MAX on failure.
2958 * @param pReNative The native recompile state.
2959 * @param off The code buffer offset.
2960 * @param idxInstr The current instruction.
2961 * @param idxVarEffAddr Index of the variable containing the effective address to check.
2962 * @param cbAlign The alignment in bytes to check against.
2963 */
2964DECL_INLINE_THROW(uint32_t)
2965iemNativeEmitRaiseGp0IfEffAddrUnaligned(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr,
2966 uint8_t idxVarEffAddr, uint8_t cbAlign)
2967{
2968 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEffAddr);
2969 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEffAddr, sizeof(RTGCPTR));
2970
2971 /*
2972 * Make sure we don't have any outstanding guest register writes as we may throw an exception.
2973 */
2974 off = iemNativeRegFlushPendingWrites(pReNative, off);
2975
2976#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2977 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2978#else
2979 RT_NOREF(idxInstr);
2980#endif
2981
2982 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarEffAddr, &off);
2983
2984 off = iemNativeEmitTestAnyBitsInGprAndTbExitIfAnySet(pReNative, off, idxVarReg, cbAlign - 1,
2985 kIemNativeLabelType_RaiseGp0);
2986
2987 iemNativeVarRegisterRelease(pReNative, idxVarEffAddr);
2988 return off;
2989}
2990
2991
2992/*********************************************************************************************************************************
2993* Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF) *
2994*********************************************************************************************************************************/
2995
2996/**
2997 * Pushes an IEM_MC_IF_XXX onto the condition stack.
2998 *
2999 * @returns Pointer to the condition stack entry on success, NULL on failure
3000 * (too many nestings)
3001 */
3002DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative)
3003{
3004 uint32_t const idxStack = pReNative->cCondDepth;
3005 AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
3006
3007 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
3008 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
3009
3010 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
3011 pEntry->fInElse = false;
3012 pEntry->fIfExitTb = false;
3013 pEntry->fElseExitTb = false;
3014 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
3015 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
3016
3017 return pEntry;
3018}
3019
3020
3021/**
3022 * Start of the if-block, snapshotting the register and variable state.
3023 */
3024DECL_INLINE_THROW(void)
3025iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
3026{
3027 Assert(offIfBlock != UINT32_MAX);
3028 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
3029 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
3030 Assert(!pEntry->fInElse);
3031
3032 /* Define the start of the IF block if request or for disassembly purposes. */
3033 if (idxLabelIf != UINT32_MAX)
3034 iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
3035#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3036 else
3037 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
3038#else
3039 RT_NOREF(offIfBlock);
3040#endif
3041
3042 /* Copy the initial state so we can restore it in the 'else' block. */
3043 pEntry->InitialState = pReNative->Core;
3044}
3045
3046
3047#define IEM_MC_ELSE() } while (0); \
3048 off = iemNativeEmitElse(pReNative, off); \
3049 do {
3050
3051/** Emits code related to IEM_MC_ELSE. */
3052DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3053{
3054 /* Check sanity and get the conditional stack entry. */
3055 Assert(off != UINT32_MAX);
3056 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
3057 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
3058 Assert(!pEntry->fInElse);
3059
3060 /* We can skip dirty register flushing and the dirty register flushing if
3061 the branch already jumped to a TB exit. */
3062 if (!pEntry->fIfExitTb)
3063 {
3064#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK) && 0
3065 /* Writeback any dirty shadow registers. */
3066 /** @todo r=aeichner Possible optimization is to only writeback guest registers which became dirty
3067 * in one of the branches and leave guest registers already dirty before the start of the if
3068 * block alone. */
3069 off = iemNativeRegFlushDirtyGuest(pReNative, off);
3070#endif
3071
3072 /* Jump to the endif. */
3073 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
3074 }
3075# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3076 else
3077 Assert(pReNative->Core.offPc == 0);
3078# endif
3079
3080 /* Define the else label and enter the else part of the condition. */
3081 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
3082 pEntry->fInElse = true;
3083
3084 /* Snapshot the core state so we can do a merge at the endif and restore
3085 the snapshot we took at the start of the if-block. */
3086 pEntry->IfFinalState = pReNative->Core;
3087 pReNative->Core = pEntry->InitialState;
3088
3089 return off;
3090}
3091
3092
3093#define IEM_MC_ENDIF() } while (0); \
3094 off = iemNativeEmitEndIf(pReNative, off)
3095
3096/** Emits code related to IEM_MC_ENDIF. */
3097DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3098{
3099 /* Check sanity and get the conditional stack entry. */
3100 Assert(off != UINT32_MAX);
3101 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
3102 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
3103
3104#if defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) && 0
3105 off = iemNativeRegFlushDirtyGuest(pReNative, off);
3106#endif
3107
3108 /*
3109 * If either of the branches exited the TB, we can take the state from the
3110 * other branch and skip all the merging headache.
3111 */
3112 bool fDefinedLabels = false;
3113 if (pEntry->fElseExitTb || pEntry->fIfExitTb)
3114 {
3115#ifdef VBOX_STRICT
3116 Assert(pReNative->cCondDepth == 1); /* Assuming this only happens in simple conditional structures. */
3117 Assert(pEntry->fElseExitTb != pEntry->fIfExitTb); /* Assuming we don't have any code where both branches exits. */
3118 PCIEMNATIVECORESTATE const pExitCoreState = pEntry->fIfExitTb && pEntry->fInElse
3119 ? &pEntry->IfFinalState : &pReNative->Core;
3120# ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3121 Assert(pExitCoreState->bmGstRegShadowDirty == 0);
3122# endif
3123# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3124 Assert(pExitCoreState->offPc == 0);
3125# endif
3126 RT_NOREF(pExitCoreState);
3127#endif
3128
3129 if (!pEntry->fIfExitTb)
3130 {
3131 Assert(pEntry->fInElse);
3132 pReNative->Core = pEntry->IfFinalState;
3133 }
3134 }
3135 else
3136 {
3137 /*
3138 * Now we have find common group with the core state at the end of the
3139 * if-final. Use the smallest common denominator and just drop anything
3140 * that isn't the same in both states.
3141 */
3142 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
3143 * which is why we're doing this at the end of the else-block.
3144 * But we'd need more info about future for that to be worth the effort. */
3145 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
3146#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3147 AssertMsgStmt(pReNative->Core.offPc == pOther->offPc,
3148 ("Core.offPc=%#RX64 pOther->offPc=%#RX64\n", pReNative->Core.offPc, pOther->offPc),
3149 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
3150#endif
3151
3152 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
3153 {
3154#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3155 /*
3156 * If the branch has differences in dirty shadow registers, we will flush
3157 * the register only dirty in the current branch and dirty any that's only
3158 * dirty in the other one.
3159 */
3160 uint64_t const fGstRegDirtyOther = pOther->bmGstRegShadowDirty;
3161 uint64_t const fGstRegDirtyThis = pReNative->Core.bmGstRegShadowDirty;
3162 uint64_t const fGstRegDirtyDiff = fGstRegDirtyOther ^ fGstRegDirtyThis;
3163 uint64_t const fGstRegDirtyHead = fGstRegDirtyThis & fGstRegDirtyDiff;
3164 uint64_t fGstRegDirtyTail = fGstRegDirtyOther & fGstRegDirtyDiff;
3165 if (!fGstRegDirtyDiff)
3166 { /* likely */ }
3167 else
3168 {
3169 //uint64_t const fGstRegDirtyHead = pReNative->Core.bmGstRegShadowDirty & fGstRegDirtyDiff;
3170 if (fGstRegDirtyHead)
3171 {
3172 Log12(("iemNativeEmitEndIf: flushing dirty guest registers in current branch: %RX64\n", fGstRegDirtyHead));
3173 off = iemNativeRegFlushDirtyGuest(pReNative, off, fGstRegDirtyHead);
3174 }
3175 }
3176#endif
3177
3178 /*
3179 * Shadowed guest registers.
3180 *
3181 * We drop any shadows where the two states disagree about where
3182 * things are kept. We may end up flushing dirty more registers
3183 * here, if the two branches keeps things in different registers.
3184 */
3185 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
3186 if (fGstRegs)
3187 {
3188 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
3189 do
3190 {
3191 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
3192 fGstRegs &= ~RT_BIT_64(idxGstReg);
3193
3194 uint8_t const idxCurHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
3195 uint8_t const idxOtherHstReg = pOther->aidxGstRegShadows[idxGstReg];
3196 if ( idxCurHstReg != idxOtherHstReg
3197 || !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg)))
3198 {
3199#ifndef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3200 Log12(("iemNativeEmitEndIf: dropping gst %s (%d) from hst %s (other %d/%#RX64)\n",
3201 g_aGstShadowInfo[idxGstReg].pszName, idxGstReg, g_apszIemNativeHstRegNames[idxCurHstReg],
3202 idxOtherHstReg, pOther->bmGstRegShadows));
3203#else
3204 Log12(("iemNativeEmitEndIf: dropping %s gst %s (%d) from hst %s (other %d/%#RX64/%s)\n",
3205 pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg) ? "_dirty_" : "clean",
3206 g_aGstShadowInfo[idxGstReg].pszName, idxGstReg, g_apszIemNativeHstRegNames[idxCurHstReg],
3207 idxOtherHstReg, pOther->bmGstRegShadows,
3208 pOther->bmGstRegShadowDirty & RT_BIT_64(idxGstReg) ? "dirty" : "clean"));
3209 if (pOther->bmGstRegShadowDirty & RT_BIT_64(idxGstReg))
3210 fGstRegDirtyTail |= RT_BIT_64(idxGstReg);
3211 if (pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg))
3212 off = iemNativeRegFlushPendingWrite(pReNative, off, (IEMNATIVEGSTREG)idxGstReg);
3213#endif
3214 iemNativeRegClearGstRegShadowingOne(pReNative, idxCurHstReg, (IEMNATIVEGSTREG)idxGstReg, off);
3215 }
3216 } while (fGstRegs);
3217 }
3218 else
3219 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
3220
3221#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3222 /*
3223 * Generate jumpy code for flushing dirty registers from the other
3224 * branch that aren't dirty in the current one.
3225 */
3226 if (!fGstRegDirtyTail)
3227 { /* likely */ }
3228 else
3229 {
3230 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeEndIfOtherBranchDirty);
3231 Log12(("iemNativeEmitEndIf: Dirty register only in the other branch: %#RX64 - BAD!\n", fGstRegDirtyTail));
3232
3233 /* First the current branch has to jump over the dirty flushing from the other branch. */
3234 uint32_t const offFixup1 = off;
3235 off = iemNativeEmitJmpToFixed(pReNative, off, off + 10);
3236
3237 /* Put the endif and maybe else label here so the other branch ends up here. */
3238 if (!pEntry->fInElse)
3239 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
3240 else
3241 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
3242 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
3243 fDefinedLabels = true;
3244
3245 /* Flush the dirty guest registers from the other branch. */
3246 while (fGstRegDirtyTail)
3247 {
3248 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegDirtyTail) - 1;
3249 fGstRegDirtyTail &= ~RT_BIT_64(idxGstReg);
3250 Log12(("iemNativeEmitEndIf: tail flushing %s (%d) from other branch %d (cur %d/%#RX64)\n",
3251 g_aGstShadowInfo[idxGstReg].pszName, idxGstReg, pOther->aidxGstRegShadows[idxGstReg],
3252 pReNative->Core.aidxGstRegShadows[idxGstReg], pReNative->Core.bmGstRegShadows));
3253
3254 off = iemNativeRegFlushPendingWriteEx(pReNative, off, (PIEMNATIVECORESTATE)pOther, (IEMNATIVEGSTREG)idxGstReg);
3255
3256 /* Mismatching shadowing should've been dropped in the previous step already. */
3257 Assert( !(pReNative->Core.bmGstRegShadows & RT_BIT_64(idxGstReg))
3258 || pReNative->Core.aidxGstRegShadows[idxGstReg] == pOther->aidxGstRegShadows[idxGstReg]);
3259 }
3260
3261 /* Here is the actual endif label, fixup the above jump to land here. */
3262 iemNativeFixupFixedJump(pReNative, offFixup1, off);
3263 }
3264#endif
3265
3266 /*
3267 * Check variables next. For now we must require them to be identical
3268 * or stuff we can recreate. (No code is emitted here.)
3269 */
3270 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
3271#ifdef VBOX_STRICT
3272 uint32_t const offAssert = off;
3273#endif
3274 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
3275 if (fVars)
3276 {
3277 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
3278 do
3279 {
3280 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
3281 fVars &= ~RT_BIT_32(idxVar);
3282
3283 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
3284 {
3285 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
3286 continue;
3287 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
3288 {
3289 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
3290 if (idxHstReg != UINT8_MAX)
3291 {
3292 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3293 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3294 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x\n",
3295 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
3296 }
3297 continue;
3298 }
3299 }
3300 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
3301 continue;
3302
3303 /* Irreconcilable, so drop it. */
3304 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
3305 if (idxHstReg != UINT8_MAX)
3306 {
3307 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3308 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3309 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x (also dropped)\n",
3310 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
3311 }
3312 Log11(("iemNativeEmitEndIf: Freeing variable #%u/%#x\n", idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
3313 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
3314 } while (fVars);
3315 }
3316 Assert(off == offAssert);
3317
3318 /*
3319 * Finally, check that the host register allocations matches.
3320 */
3321 AssertMsgStmt((pReNative->Core.bmHstRegs & (pReNative->Core.bmHstRegs ^ pOther->bmHstRegs)) == 0,
3322 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
3323 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
3324 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
3325 }
3326 }
3327
3328 /*
3329 * Define the endif label and maybe the else one if we're still in the 'if' part.
3330 */
3331 if (!fDefinedLabels)
3332 {
3333 if (!pEntry->fInElse)
3334 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
3335 else
3336 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
3337 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
3338 }
3339
3340 /* Pop the conditional stack.*/
3341 pReNative->cCondDepth -= 1;
3342
3343 return off;
3344}
3345
3346
3347#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
3348 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits)); \
3349 do {
3350
3351/** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
3352DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
3353{
3354 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitsInEfl);
3355 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3356
3357 /* Get the eflags. */
3358 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
3359 kIemNativeGstRegUse_ReadOnly);
3360
3361 /* Test and jump. */
3362 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
3363
3364 /* Free but don't flush the EFlags register. */
3365 iemNativeRegFreeTmp(pReNative, idxEflReg);
3366
3367 /* Make a copy of the core state now as we start the if-block. */
3368 iemNativeCondStartIfBlock(pReNative, off);
3369
3370 return off;
3371}
3372
3373
3374#define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
3375 off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits)); \
3376 do {
3377
3378/** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
3379DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
3380{
3381 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitsInEfl);
3382 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3383
3384 /* Get the eflags. */
3385 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
3386 kIemNativeGstRegUse_ReadOnly);
3387
3388 /* Test and jump. */
3389 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
3390
3391 /* Free but don't flush the EFlags register. */
3392 iemNativeRegFreeTmp(pReNative, idxEflReg);
3393
3394 /* Make a copy of the core state now as we start the if-block. */
3395 iemNativeCondStartIfBlock(pReNative, off);
3396
3397 return off;
3398}
3399
3400
3401#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
3402 off = iemNativeEmitIfEflagsBitSet(pReNative, off, (a_fBit)); \
3403 do {
3404
3405/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
3406DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
3407{
3408 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
3409 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3410
3411 /* Get the eflags. */
3412 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
3413 kIemNativeGstRegUse_ReadOnly);
3414
3415 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
3416 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
3417
3418 /* Test and jump. */
3419 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
3420
3421 /* Free but don't flush the EFlags register. */
3422 iemNativeRegFreeTmp(pReNative, idxEflReg);
3423
3424 /* Make a copy of the core state now as we start the if-block. */
3425 iemNativeCondStartIfBlock(pReNative, off);
3426
3427 return off;
3428}
3429
3430
3431#define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
3432 off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, (a_fBit)); \
3433 do {
3434
3435/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
3436DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
3437{
3438 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
3439 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3440
3441 /* Get the eflags. */
3442 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
3443 kIemNativeGstRegUse_ReadOnly);
3444
3445 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
3446 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
3447
3448 /* Test and jump. */
3449 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
3450
3451 /* Free but don't flush the EFlags register. */
3452 iemNativeRegFreeTmp(pReNative, idxEflReg);
3453
3454 /* Make a copy of the core state now as we start the if-block. */
3455 iemNativeCondStartIfBlock(pReNative, off);
3456
3457 return off;
3458}
3459
3460
3461#define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2) \
3462 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, false /*fInverted*/); \
3463 do {
3464
3465#define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2) \
3466 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, true /*fInverted*/); \
3467 do {
3468
3469/** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
3470DECL_INLINE_THROW(uint32_t)
3471iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3472 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
3473{
3474 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBit1InEfl | fBit2InEfl);
3475 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3476
3477 /* Get the eflags. */
3478 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
3479 kIemNativeGstRegUse_ReadOnly);
3480
3481 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
3482 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
3483
3484 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
3485 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
3486 Assert(iBitNo1 != iBitNo2);
3487
3488#ifdef RT_ARCH_AMD64
3489 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl);
3490
3491 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
3492 if (iBitNo1 > iBitNo2)
3493 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
3494 else
3495 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
3496 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
3497
3498#elif defined(RT_ARCH_ARM64)
3499 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3500 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
3501
3502 /* and tmpreg, eflreg, #1<<iBitNo1 */
3503 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
3504
3505 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
3506 if (iBitNo1 > iBitNo2)
3507 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
3508 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
3509 else
3510 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
3511 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
3512
3513 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3514
3515#else
3516# error "Port me"
3517#endif
3518
3519 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
3520 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
3521 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
3522
3523 /* Free but don't flush the EFlags and tmp registers. */
3524 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3525 iemNativeRegFreeTmp(pReNative, idxEflReg);
3526
3527 /* Make a copy of the core state now as we start the if-block. */
3528 iemNativeCondStartIfBlock(pReNative, off);
3529
3530 return off;
3531}
3532
3533
3534#define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
3535 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, false /*fInverted*/); \
3536 do {
3537
3538#define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
3539 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, true /*fInverted*/); \
3540 do {
3541
3542/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
3543 * IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
3544DECL_INLINE_THROW(uint32_t)
3545iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl,
3546 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
3547{
3548 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl | fBit1InEfl | fBit2InEfl);
3549 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3550
3551 /* We need an if-block label for the non-inverted variant. */
3552 uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
3553 pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
3554
3555 /* Get the eflags. */
3556 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
3557 kIemNativeGstRegUse_ReadOnly);
3558
3559 /* Translate the flag masks to bit numbers. */
3560 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
3561 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
3562
3563 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
3564 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
3565 Assert(iBitNo1 != iBitNo);
3566
3567 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
3568 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
3569 Assert(iBitNo2 != iBitNo);
3570 Assert(iBitNo2 != iBitNo1);
3571
3572#ifdef RT_ARCH_AMD64
3573 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl); /* This must come before we jump anywhere! */
3574#elif defined(RT_ARCH_ARM64)
3575 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3576#endif
3577
3578 /* Check for the lone bit first. */
3579 if (!fInverted)
3580 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
3581 else
3582 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
3583
3584 /* Then extract and compare the other two bits. */
3585#ifdef RT_ARCH_AMD64
3586 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
3587 if (iBitNo1 > iBitNo2)
3588 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
3589 else
3590 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
3591 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
3592
3593#elif defined(RT_ARCH_ARM64)
3594 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
3595
3596 /* and tmpreg, eflreg, #1<<iBitNo1 */
3597 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
3598
3599 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
3600 if (iBitNo1 > iBitNo2)
3601 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
3602 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
3603 else
3604 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
3605 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
3606
3607 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3608
3609#else
3610# error "Port me"
3611#endif
3612
3613 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
3614 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
3615 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
3616
3617 /* Free but don't flush the EFlags and tmp registers. */
3618 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3619 iemNativeRegFreeTmp(pReNative, idxEflReg);
3620
3621 /* Make a copy of the core state now as we start the if-block. */
3622 iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
3623
3624 return off;
3625}
3626
3627
3628#define IEM_MC_IF_CX_IS_NZ() \
3629 off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
3630 do {
3631
3632/** Emits code for IEM_MC_IF_CX_IS_NZ. */
3633DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3634{
3635 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3636
3637 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3638 kIemNativeGstRegUse_ReadOnly);
3639 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
3640 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3641
3642 iemNativeCondStartIfBlock(pReNative, off);
3643 return off;
3644}
3645
3646
3647#define IEM_MC_IF_ECX_IS_NZ() \
3648 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
3649 do {
3650
3651#define IEM_MC_IF_RCX_IS_NZ() \
3652 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
3653 do {
3654
3655/** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
3656DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
3657{
3658 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3659
3660 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3661 kIemNativeGstRegUse_ReadOnly);
3662 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
3663 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3664
3665 iemNativeCondStartIfBlock(pReNative, off);
3666 return off;
3667}
3668
3669
3670#define IEM_MC_IF_CX_IS_NOT_ONE() \
3671 off = iemNativeEmitIfCxIsNotOne(pReNative, off); \
3672 do {
3673
3674/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE. */
3675DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3676{
3677 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3678
3679 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3680 kIemNativeGstRegUse_ReadOnly);
3681#ifdef RT_ARCH_AMD64
3682 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3683#else
3684 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3685 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
3686 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3687#endif
3688 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3689
3690 iemNativeCondStartIfBlock(pReNative, off);
3691 return off;
3692}
3693
3694
3695#define IEM_MC_IF_ECX_IS_NOT_ONE() \
3696 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, false /*f64Bit*/); \
3697 do {
3698
3699#define IEM_MC_IF_RCX_IS_NOT_ONE() \
3700 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, true /*f64Bit*/); \
3701 do {
3702
3703/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE and IEM_MC_IF_RCX_IS_NOT_ONE. */
3704DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
3705{
3706 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3707
3708 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3709 kIemNativeGstRegUse_ReadOnly);
3710 if (f64Bit)
3711 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3712 else
3713 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3714 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3715
3716 iemNativeCondStartIfBlock(pReNative, off);
3717 return off;
3718}
3719
3720
3721#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
3722 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/); \
3723 do {
3724
3725#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3726 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/); \
3727 do {
3728
3729/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET and
3730 * IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
3731DECL_INLINE_THROW(uint32_t)
3732iemNativeEmitIfCxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl, bool fCheckIfSet)
3733{
3734 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
3735 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3736
3737 /* We have to load both RCX and EFLAGS before we can start branching,
3738 otherwise we'll end up in the else-block with an inconsistent
3739 register allocator state.
3740 Doing EFLAGS first as it's more likely to be loaded, right? */
3741 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
3742 kIemNativeGstRegUse_ReadOnly);
3743 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3744 kIemNativeGstRegUse_ReadOnly);
3745
3746 /** @todo we could reduce this to a single branch instruction by spending a
3747 * temporary register and some setnz stuff. Not sure if loops are
3748 * worth it. */
3749 /* Check CX. */
3750#ifdef RT_ARCH_AMD64
3751 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3752#else
3753 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3754 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
3755 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3756#endif
3757
3758 /* Check the EFlags bit. */
3759 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
3760 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
3761 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
3762 !fCheckIfSet /*fJmpIfSet*/);
3763
3764 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3765 iemNativeRegFreeTmp(pReNative, idxEflReg);
3766
3767 iemNativeCondStartIfBlock(pReNative, off);
3768 return off;
3769}
3770
3771
3772#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
3773 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, false /*f64Bit*/); \
3774 do {
3775
3776#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3777 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, false /*f64Bit*/); \
3778 do {
3779
3780#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
3781 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, true /*f64Bit*/); \
3782 do {
3783
3784#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3785 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, true /*f64Bit*/); \
3786 do {
3787
3788/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET,
3789 * IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET,
3790 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET and
3791 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
3792DECL_INLINE_THROW(uint32_t)
3793iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3794 uint32_t fBitInEfl, bool fCheckIfSet, bool f64Bit)
3795{
3796 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
3797 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3798
3799 /* We have to load both RCX and EFLAGS before we can start branching,
3800 otherwise we'll end up in the else-block with an inconsistent
3801 register allocator state.
3802 Doing EFLAGS first as it's more likely to be loaded, right? */
3803 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
3804 kIemNativeGstRegUse_ReadOnly);
3805 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3806 kIemNativeGstRegUse_ReadOnly);
3807
3808 /** @todo we could reduce this to a single branch instruction by spending a
3809 * temporary register and some setnz stuff. Not sure if loops are
3810 * worth it. */
3811 /* Check RCX/ECX. */
3812 if (f64Bit)
3813 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3814 else
3815 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3816
3817 /* Check the EFlags bit. */
3818 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
3819 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
3820 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
3821 !fCheckIfSet /*fJmpIfSet*/);
3822
3823 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3824 iemNativeRegFreeTmp(pReNative, idxEflReg);
3825
3826 iemNativeCondStartIfBlock(pReNative, off);
3827 return off;
3828}
3829
3830
3831#define IEM_MC_IF_LOCAL_IS_Z(a_Local) \
3832 off = iemNativeEmitIfLocalIsZ(pReNative, off, a_Local); \
3833 do {
3834
3835/** Emits code for IEM_MC_IF_LOCAL_IS_Z. */
3836DECL_INLINE_THROW(uint32_t)
3837iemNativeEmitIfLocalIsZ(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarLocal)
3838{
3839 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3840
3841 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarLocal);
3842 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarLocal)];
3843 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
3844 AssertStmt(pVarRc->cbVar == sizeof(int32_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
3845
3846 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarLocal, &off);
3847
3848 off = iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(pReNative, off, idxReg, false /*f64Bit*/, pEntry->idxLabelElse);
3849
3850 iemNativeVarRegisterRelease(pReNative, idxVarLocal);
3851
3852 iemNativeCondStartIfBlock(pReNative, off);
3853 return off;
3854}
3855
3856
3857#define IEM_MC_IF_GREG_BIT_SET(a_iGReg, a_iBitNo) \
3858 off = iemNativeEmitIfGregBitSet(pReNative, off, a_iGReg, a_iBitNo); \
3859 do {
3860
3861/** Emits code for IEM_MC_IF_GREG_BIT_SET. */
3862DECL_INLINE_THROW(uint32_t)
3863iemNativeEmitIfGregBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t iBitNo)
3864{
3865 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3866 Assert(iGReg < 16);
3867
3868 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3869 kIemNativeGstRegUse_ReadOnly);
3870
3871 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxGstFullReg, iBitNo, pEntry->idxLabelElse);
3872
3873 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3874
3875 iemNativeCondStartIfBlock(pReNative, off);
3876 return off;
3877}
3878
3879
3880
3881/*********************************************************************************************************************************
3882* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
3883*********************************************************************************************************************************/
3884
3885#define IEM_MC_NOREF(a_Name) \
3886 RT_NOREF_PV(a_Name)
3887
3888#define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
3889 uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
3890
3891#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
3892 uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
3893
3894#define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_Local, a_iArg) \
3895 uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
3896
3897#define IEM_MC_LOCAL(a_Type, a_Name) \
3898 uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
3899
3900#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
3901 uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
3902
3903#define IEM_MC_LOCAL_ASSIGN(a_Type, a_Name, a_Value) \
3904 uint8_t const a_Name = iemNativeVarAllocAssign(pReNative, &off, sizeof(a_Type), (a_Value))
3905
3906
3907/**
3908 * Sets the host register for @a idxVarRc to @a idxReg.
3909 *
3910 * Any guest register shadowing will be implictly dropped by this call.
3911 *
3912 * The variable must not have any register associated with it (causes
3913 * VERR_IEM_VAR_IPE_10 to be raised). Conversion to a stack variable is
3914 * implied.
3915 *
3916 * @returns idxReg
3917 * @param pReNative The recompiler state.
3918 * @param idxVar The variable.
3919 * @param idxReg The host register (typically IEMNATIVE_CALL_RET_GREG).
3920 * @param off For recording in debug info.
3921 * @param fAllocated Set if the register is already allocated, false if not.
3922 *
3923 * @throws VERR_IEM_VAR_IPE_10, VERR_IEM_VAR_IPE_11
3924 */
3925DECL_INLINE_THROW(uint8_t)
3926iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off, bool fAllocated)
3927{
3928 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3929 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3930 Assert(!pVar->fRegAcquired);
3931 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3932 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_10));
3933 AssertStmt(RT_BOOL(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)) == fAllocated,
3934 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_11));
3935
3936 iemNativeRegClearGstRegShadowing(pReNative, idxReg, off);
3937 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
3938
3939 iemNativeVarSetKindToStack(pReNative, idxVar);
3940 pVar->idxReg = idxReg;
3941
3942 return idxReg;
3943}
3944
3945
3946/**
3947 * A convenient helper function.
3948 */
3949DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSetAndAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
3950 uint8_t idxReg, uint32_t *poff)
3951{
3952 idxReg = iemNativeVarRegisterSet(pReNative, idxVar, idxReg, *poff, false /*fAllocated*/);
3953 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fRegAcquired = true;
3954 return idxReg;
3955}
3956
3957
3958/**
3959 * This is called by IEM_MC_END() to clean up all variables.
3960 */
3961DECL_FORCE_INLINE(void) iemNativeVarFreeAll(PIEMRECOMPILERSTATE pReNative)
3962{
3963 uint32_t const bmVars = pReNative->Core.bmVars;
3964 if (bmVars != 0)
3965 iemNativeVarFreeAllSlow(pReNative, bmVars);
3966 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
3967 Assert(pReNative->Core.bmStack == 0);
3968}
3969
3970
3971#define IEM_MC_FREE_LOCAL(a_Name) iemNativeVarFreeLocal(pReNative, a_Name)
3972
3973/**
3974 * This is called by IEM_MC_FREE_LOCAL.
3975 */
3976DECLINLINE(void) iemNativeVarFreeLocal(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
3977{
3978 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3979 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo == UINT8_MAX);
3980 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
3981}
3982
3983
3984#define IEM_MC_FREE_ARG(a_Name) iemNativeVarFreeArg(pReNative, a_Name)
3985
3986/**
3987 * This is called by IEM_MC_FREE_ARG.
3988 */
3989DECLINLINE(void) iemNativeVarFreeArg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
3990{
3991 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3992 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars));
3993 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
3994}
3995
3996
3997#define IEM_MC_ASSIGN_TO_SMALLER(a_VarDst, a_VarSrcEol) off = iemNativeVarAssignToSmaller(pReNative, off, a_VarDst, a_VarSrcEol)
3998
3999/**
4000 * This is called by IEM_MC_ASSIGN_TO_SMALLER.
4001 */
4002DECL_INLINE_THROW(uint32_t)
4003iemNativeVarAssignToSmaller(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarDst, uint8_t idxVarSrc)
4004{
4005 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarDst);
4006 PIEMNATIVEVAR const pVarDst = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarDst)];
4007 AssertStmt(pVarDst->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4008 Assert( pVarDst->cbVar == sizeof(uint16_t)
4009 || pVarDst->cbVar == sizeof(uint32_t));
4010
4011 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarSrc);
4012 PIEMNATIVEVAR const pVarSrc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarSrc)];
4013 AssertStmt( pVarSrc->enmKind == kIemNativeVarKind_Stack
4014 || pVarSrc->enmKind == kIemNativeVarKind_Immediate,
4015 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4016
4017 Assert(pVarDst->cbVar < pVarSrc->cbVar);
4018
4019 /*
4020 * Special case for immediates.
4021 */
4022 if (pVarSrc->enmKind == kIemNativeVarKind_Immediate)
4023 {
4024 switch (pVarDst->cbVar)
4025 {
4026 case sizeof(uint16_t):
4027 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint16_t)pVarSrc->u.uValue);
4028 break;
4029 case sizeof(uint32_t):
4030 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint32_t)pVarSrc->u.uValue);
4031 break;
4032 default: AssertFailed(); break;
4033 }
4034 }
4035 else
4036 {
4037 /*
4038 * The generic solution for now.
4039 */
4040 /** @todo optimize this by having the python script make sure the source
4041 * variable passed to IEM_MC_ASSIGN_TO_SMALLER is not used after the
4042 * statement. Then we could just transfer the register assignments. */
4043 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off);
4044 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off);
4045 switch (pVarDst->cbVar)
4046 {
4047 case sizeof(uint16_t):
4048 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegDst, idxRegSrc);
4049 break;
4050 case sizeof(uint32_t):
4051 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegDst, idxRegSrc);
4052 break;
4053 default: AssertFailed(); break;
4054 }
4055 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
4056 iemNativeVarRegisterRelease(pReNative, idxVarDst);
4057 }
4058 return off;
4059}
4060
4061
4062
4063/*********************************************************************************************************************************
4064* Emitters for IEM_MC_CALL_CIMPL_XXX *
4065*********************************************************************************************************************************/
4066
4067/** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
4068DECL_INLINE_THROW(uint32_t)
4069iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
4070 uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cArgs)
4071
4072{
4073 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
4074
4075#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4076 /* Clear the appropriate IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_XXX flags
4077 when a calls clobber any of the relevant control registers. */
4078# if 1
4079 if (!(fGstShwFlush & (RT_BIT_64(kIemNativeGstReg_Cr0) | RT_BIT_64(kIemNativeGstReg_Cr4) | RT_BIT_64(kIemNativeGstReg_Xcr0))))
4080 {
4081 /* Likely as long as call+ret are done via cimpl. */
4082 Assert( /*pfnCImpl != (uintptr_t)iemCImpl_mov_Cd_Rd && pfnCImpl != (uintptr_t)iemCImpl_xsetbv
4083 &&*/ pfnCImpl != (uintptr_t)iemCImpl_lmsw && pfnCImpl != (uintptr_t)iemCImpl_clts);
4084 }
4085 else if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_Xcr0))
4086 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
4087 else if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_Cr4))
4088 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
4089 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE);
4090 else
4091 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
4092 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
4093 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
4094
4095# else
4096 if (pfnCImpl == (uintptr_t)iemCImpl_xsetbv) /* Modifies xcr0 which only the AVX check uses. */
4097 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
4098 else if (pfnCImpl == (uintptr_t)iemCImpl_mov_Cd_Rd) /* Can modify cr4 which all checks use. */
4099 pReNative->fSimdRaiseXcptChecksEmitted = 0;
4100 else if ( pfnCImpl == (uintptr_t)iemCImpl_FarJmp
4101 || pfnCImpl == (uintptr_t)iemCImpl_callf
4102 || pfnCImpl == (uintptr_t)iemCImpl_lmsw
4103 || pfnCImpl == (uintptr_t)iemCImpl_clts) /* Will only modify cr0 */
4104 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
4105 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
4106 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
4107# endif
4108
4109# ifdef IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS
4110 /* Mark the host floating point control register as not synced if MXCSR is modified. */
4111 if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_MxCsr))
4112 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SYNCED;
4113# endif
4114#endif
4115
4116 /*
4117 * Do all the call setup and cleanup.
4118 */
4119 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
4120
4121 /*
4122 * Load the two or three hidden arguments.
4123 */
4124#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
4125 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
4126 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
4127 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
4128#else
4129 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
4130 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
4131#endif
4132
4133 /*
4134 * Make the call and check the return code.
4135 *
4136 * Shadow PC copies are always flushed here, other stuff depends on flags.
4137 * Segment and general purpose registers are explictily flushed via the
4138 * IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG and IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG
4139 * macros.
4140 */
4141 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
4142#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
4143 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
4144#endif
4145 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, fGstShwFlush | RT_BIT_64(kIemNativeGstReg_Pc));
4146 if (!(pReNative->fMc & IEM_MC_F_WITHOUT_FLAGS)) /** @todo We don't emit with-flags/without-flags variations for CIMPL calls. */
4147 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
4148 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
4149
4150#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
4151 pReNative->Core.fDebugPcInitialized = false;
4152 Log4(("fDebugPcInitialized=false cimpl off=%#x (v1)\n", off));
4153#endif
4154
4155 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
4156}
4157
4158
4159#define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
4160 off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0)
4161
4162/** Emits code for IEM_MC_CALL_CIMPL_1. */
4163DECL_INLINE_THROW(uint32_t)
4164iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
4165 uintptr_t pfnCImpl, uint8_t idxArg0)
4166{
4167 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
4168 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 1);
4169}
4170
4171
4172#define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
4173 off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0, a1)
4174
4175/** Emits code for IEM_MC_CALL_CIMPL_2. */
4176DECL_INLINE_THROW(uint32_t)
4177iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
4178 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
4179{
4180 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
4181 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
4182 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 2);
4183}
4184
4185
4186#define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
4187 off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
4188 (uintptr_t)a_pfnCImpl, a0, a1, a2)
4189
4190/** Emits code for IEM_MC_CALL_CIMPL_3. */
4191DECL_INLINE_THROW(uint32_t)
4192iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
4193 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
4194{
4195 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
4196 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
4197 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
4198 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 3);
4199}
4200
4201
4202#define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3) \
4203 off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
4204 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
4205
4206/** Emits code for IEM_MC_CALL_CIMPL_4. */
4207DECL_INLINE_THROW(uint32_t)
4208iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
4209 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
4210{
4211 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
4212 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
4213 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
4214 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
4215 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 4);
4216}
4217
4218
4219#define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3, a4) \
4220 off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
4221 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
4222
4223/** Emits code for IEM_MC_CALL_CIMPL_4. */
4224DECL_INLINE_THROW(uint32_t)
4225iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
4226 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
4227{
4228 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
4229 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
4230 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
4231 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
4232 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg4, 4 + IEM_CIMPL_HIDDEN_ARGS);
4233 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 5);
4234}
4235
4236
4237/** Recompiler debugging: Flush guest register shadow copies. */
4238#define IEM_MC_HINT_FLUSH_GUEST_SHADOW(g_fGstShwFlush) iemNativeRegFlushGuestShadows(pReNative, g_fGstShwFlush)
4239
4240
4241
4242/*********************************************************************************************************************************
4243* Emitters for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX *
4244*********************************************************************************************************************************/
4245
4246/**
4247 * Common worker for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX.
4248 */
4249DECL_INLINE_THROW(uint32_t)
4250iemNativeEmitCallAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
4251 uintptr_t pfnAImpl, uint8_t cArgs)
4252{
4253 if (idxVarRc != UINT8_MAX)
4254 {
4255 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRc);
4256 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarRc)];
4257 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
4258 AssertStmt(pVarRc->cbVar <= sizeof(uint64_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
4259 }
4260
4261 /*
4262 * Do all the call setup and cleanup.
4263 *
4264 * It is only required to flush pending guest register writes in call volatile registers as
4265 * assembly helpers can't throw and don't access anything living in CPUMCTX, they only
4266 * access parameters. The flushing of call volatile registers is always done in iemNativeEmitCallCommon()
4267 * no matter the fFlushPendingWrites parameter.
4268 */
4269 off = iemNativeEmitCallCommon(pReNative, off, cArgs, 0 /*cHiddenArgs*/, false /*fFlushPendingWrites*/);
4270
4271 /*
4272 * Make the call and update the return code variable if we've got one.
4273 */
4274 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
4275 if (idxVarRc != UINT8_MAX)
4276 iemNativeVarRegisterSet(pReNative, idxVarRc, IEMNATIVE_CALL_RET_GREG, off, false /*fAllocated*/);
4277
4278 return off;
4279}
4280
4281
4282
4283#define IEM_MC_CALL_VOID_AIMPL_0(a_pfn) \
4284 off = iemNativeEmitCallAImpl0(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn))
4285
4286#define IEM_MC_CALL_AIMPL_0(a_rc, a_pfn) \
4287 off = iemNativeEmitCallAImpl0(pReNative, off, a_rc, (uintptr_t)(a_pfn))
4288
4289/** Emits code for IEM_MC_CALL_VOID_AIMPL_0 and IEM_MC_CALL_AIMPL_0. */
4290DECL_INLINE_THROW(uint32_t)
4291iemNativeEmitCallAImpl0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl)
4292{
4293 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 0);
4294}
4295
4296
4297#define IEM_MC_CALL_VOID_AIMPL_1(a_pfn, a0) \
4298 off = iemNativeEmitCallAImpl1(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0)
4299
4300#define IEM_MC_CALL_AIMPL_1(a_rc, a_pfn, a0) \
4301 off = iemNativeEmitCallAImpl1(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0)
4302
4303/** Emits code for IEM_MC_CALL_VOID_AIMPL_1 and IEM_MC_CALL_AIMPL_1. */
4304DECL_INLINE_THROW(uint32_t)
4305iemNativeEmitCallAImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl, uint8_t idxArg0)
4306{
4307 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
4308 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 1);
4309}
4310
4311
4312#define IEM_MC_CALL_VOID_AIMPL_2(a_pfn, a0, a1) \
4313 off = iemNativeEmitCallAImpl2(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1)
4314
4315#define IEM_MC_CALL_AIMPL_2(a_rc, a_pfn, a0, a1) \
4316 off = iemNativeEmitCallAImpl2(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1)
4317
4318/** Emits code for IEM_MC_CALL_VOID_AIMPL_2 and IEM_MC_CALL_AIMPL_2. */
4319DECL_INLINE_THROW(uint32_t)
4320iemNativeEmitCallAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
4321 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
4322{
4323 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
4324 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
4325 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 2);
4326}
4327
4328
4329#define IEM_MC_CALL_VOID_AIMPL_3(a_pfn, a0, a1, a2) \
4330 off = iemNativeEmitCallAImpl3(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2)
4331
4332#define IEM_MC_CALL_AIMPL_3(a_rcType, a_rc, a_pfn, a0, a1, a2) \
4333 IEM_MC_LOCAL(a_rcType, a_rc); \
4334 off = iemNativeEmitCallAImpl3(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2)
4335
4336/** Emits code for IEM_MC_CALL_VOID_AIMPL_3 and IEM_MC_CALL_AIMPL_3. */
4337DECL_INLINE_THROW(uint32_t)
4338iemNativeEmitCallAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
4339 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
4340{
4341 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
4342 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
4343 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
4344 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 3);
4345}
4346
4347
4348#define IEM_MC_CALL_VOID_AIMPL_4(a_pfn, a0, a1, a2, a3) \
4349 off = iemNativeEmitCallAImpl4(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2, a3)
4350
4351#define IEM_MC_CALL_AIMPL_4(a_rcType, a_rc, a_pfn, a0, a1, a2, a3) \
4352 IEM_MC_LOCAL(a_rcType, a_rc); \
4353 off = iemNativeEmitCallAImpl4(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2, a3)
4354
4355/** Emits code for IEM_MC_CALL_VOID_AIMPL_4 and IEM_MC_CALL_AIMPL_4. */
4356DECL_INLINE_THROW(uint32_t)
4357iemNativeEmitCallAImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
4358 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
4359{
4360 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
4361 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
4362 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
4363 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3);
4364 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 4);
4365}
4366
4367
4368
4369/*********************************************************************************************************************************
4370* Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX). *
4371*********************************************************************************************************************************/
4372
4373#define IEM_MC_FETCH_GREG_U8_THREADED(a_u8Dst, a_iGRegEx) \
4374 off = iemNativeEmitFetchGregU8(pReNative, off, a_u8Dst, a_iGRegEx, sizeof(uint8_t) /*cbZeroExtended*/)
4375
4376#define IEM_MC_FETCH_GREG_U8_ZX_U16_THREADED(a_u16Dst, a_iGRegEx) \
4377 off = iemNativeEmitFetchGregU8(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t) /*cbZeroExtended*/)
4378
4379#define IEM_MC_FETCH_GREG_U8_ZX_U32_THREADED(a_u32Dst, a_iGRegEx) \
4380 off = iemNativeEmitFetchGregU8(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t) /*cbZeroExtended*/)
4381
4382#define IEM_MC_FETCH_GREG_U8_ZX_U64_THREADED(a_u64Dst, a_iGRegEx) \
4383 off = iemNativeEmitFetchGregU8(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t) /*cbZeroExtended*/)
4384
4385
4386/** Emits code for IEM_MC_FETCH_GREG_U8_THREADED and
4387 * IEM_MC_FETCH_GREG_U8_ZX_U16/32/64_THREADED. */
4388DECL_INLINE_THROW(uint32_t)
4389iemNativeEmitFetchGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, int8_t cbZeroExtended)
4390{
4391 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4392 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
4393 Assert(iGRegEx < 20);
4394
4395 /* Same discussion as in iemNativeEmitFetchGregU16 */
4396 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
4397 kIemNativeGstRegUse_ReadOnly);
4398
4399 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4400 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4401
4402 /* The value is zero-extended to the full 64-bit host register width. */
4403 if (iGRegEx < 16)
4404 off = iemNativeEmitLoadGprFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
4405 else
4406 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
4407
4408 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4409 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4410 return off;
4411}
4412
4413
4414#define IEM_MC_FETCH_GREG_U8_SX_U16_THREADED(a_u16Dst, a_iGRegEx) \
4415 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t))
4416
4417#define IEM_MC_FETCH_GREG_U8_SX_U32_THREADED(a_u32Dst, a_iGRegEx) \
4418 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t))
4419
4420#define IEM_MC_FETCH_GREG_U8_SX_U64_THREADED(a_u64Dst, a_iGRegEx) \
4421 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t))
4422
4423/** Emits code for IEM_MC_FETCH_GREG_U8_SX_U16/32/64_THREADED. */
4424DECL_INLINE_THROW(uint32_t)
4425iemNativeEmitFetchGregU8Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, uint8_t cbSignExtended)
4426{
4427 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4428 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
4429 Assert(iGRegEx < 20);
4430
4431 /* Same discussion as in iemNativeEmitFetchGregU16 */
4432 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
4433 kIemNativeGstRegUse_ReadOnly);
4434
4435 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4436 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4437
4438 if (iGRegEx < 16)
4439 {
4440 switch (cbSignExtended)
4441 {
4442 case sizeof(uint16_t):
4443 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
4444 break;
4445 case sizeof(uint32_t):
4446 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
4447 break;
4448 case sizeof(uint64_t):
4449 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
4450 break;
4451 default: AssertFailed(); break;
4452 }
4453 }
4454 else
4455 {
4456 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
4457 switch (cbSignExtended)
4458 {
4459 case sizeof(uint16_t):
4460 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
4461 break;
4462 case sizeof(uint32_t):
4463 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
4464 break;
4465 case sizeof(uint64_t):
4466 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
4467 break;
4468 default: AssertFailed(); break;
4469 }
4470 }
4471
4472 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4473 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4474 return off;
4475}
4476
4477
4478
4479#define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
4480 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint16_t))
4481
4482#define IEM_MC_FETCH_GREG_U16_ZX_U32(a_u16Dst, a_iGReg) \
4483 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
4484
4485#define IEM_MC_FETCH_GREG_U16_ZX_U64(a_u16Dst, a_iGReg) \
4486 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
4487
4488/** Emits code for IEM_MC_FETCH_GREG_U16 and IEM_MC_FETCH_GREG_U16_ZX_U32/64. */
4489DECL_INLINE_THROW(uint32_t)
4490iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
4491{
4492 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4493 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
4494 Assert(iGReg < 16);
4495
4496 /*
4497 * We can either just load the low 16-bit of the GPR into a host register
4498 * for the variable, or we can do so via a shadow copy host register. The
4499 * latter will avoid having to reload it if it's being stored later, but
4500 * will waste a host register if it isn't touched again. Since we don't
4501 * know what going to happen, we choose the latter for now.
4502 */
4503 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4504 kIemNativeGstRegUse_ReadOnly);
4505
4506 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4507 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4508 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4509 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4510
4511 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4512 return off;
4513}
4514
4515#define IEM_MC_FETCH_GREG_I16(a_i16Dst, a_iGReg) \
4516 off = iemNativeEmitFetchGregI16(pReNative, off, a_i16Dst, a_iGReg)
4517
4518/** Emits code for IEM_MC_FETCH_GREG_I16. */
4519DECL_INLINE_THROW(uint32_t)
4520iemNativeEmitFetchGregI16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
4521{
4522 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4523 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(int16_t));
4524 Assert(iGReg < 16);
4525
4526 /*
4527 * We can either just load the low 16-bit of the GPR into a host register
4528 * for the variable, or we can do so via a shadow copy host register. The
4529 * latter will avoid having to reload it if it's being stored later, but
4530 * will waste a host register if it isn't touched again. Since we don't
4531 * know what going to happen, we choose the latter for now.
4532 */
4533 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4534 kIemNativeGstRegUse_ReadOnly);
4535
4536 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4537 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4538#ifdef RT_ARCH_AMD64
4539 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4540#elif defined(RT_ARCH_ARM64) /* Note! There are no 16-bit registers on ARM, we emulate that through 32-bit registers which requires sign extension. */
4541 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4542#endif
4543 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4544
4545 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4546 return off;
4547}
4548
4549
4550#define IEM_MC_FETCH_GREG_U16_SX_U32(a_u16Dst, a_iGReg) \
4551 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
4552
4553#define IEM_MC_FETCH_GREG_U16_SX_U64(a_u16Dst, a_iGReg) \
4554 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
4555
4556/** Emits code for IEM_MC_FETCH_GREG_U16_SX_U32/64. */
4557DECL_INLINE_THROW(uint32_t)
4558iemNativeEmitFetchGregU16Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbSignExtended)
4559{
4560 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4561 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
4562 Assert(iGReg < 16);
4563
4564 /*
4565 * We can either just load the low 16-bit of the GPR into a host register
4566 * for the variable, or we can do so via a shadow copy host register. The
4567 * latter will avoid having to reload it if it's being stored later, but
4568 * will waste a host register if it isn't touched again. Since we don't
4569 * know what going to happen, we choose the latter for now.
4570 */
4571 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4572 kIemNativeGstRegUse_ReadOnly);
4573
4574 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4575 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4576 if (cbSignExtended == sizeof(uint32_t))
4577 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4578 else
4579 {
4580 Assert(cbSignExtended == sizeof(uint64_t));
4581 off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4582 }
4583 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4584
4585 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4586 return off;
4587}
4588
4589
4590#define IEM_MC_FETCH_GREG_I32(a_i32Dst, a_iGReg) \
4591 off = iemNativeEmitFetchGregU32(pReNative, off, a_i32Dst, a_iGReg, sizeof(uint32_t))
4592
4593#define IEM_MC_FETCH_GREG_U32(a_u32Dst, a_iGReg) \
4594 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint32_t))
4595
4596#define IEM_MC_FETCH_GREG_U32_ZX_U64(a_u32Dst, a_iGReg) \
4597 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint64_t))
4598
4599/** Emits code for IEM_MC_FETCH_GREG_U32. */
4600DECL_INLINE_THROW(uint32_t)
4601iemNativeEmitFetchGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
4602{
4603 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4604 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
4605 Assert(iGReg < 16);
4606
4607 /*
4608 * We can either just load the low 16-bit of the GPR into a host register
4609 * for the variable, or we can do so via a shadow copy host register. The
4610 * latter will avoid having to reload it if it's being stored later, but
4611 * will waste a host register if it isn't touched again. Since we don't
4612 * know what going to happen, we choose the latter for now.
4613 */
4614 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4615 kIemNativeGstRegUse_ReadOnly);
4616
4617 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4618 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4619 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
4620 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4621
4622 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4623 return off;
4624}
4625
4626
4627#define IEM_MC_FETCH_GREG_U32_SX_U64(a_u32Dst, a_iGReg) \
4628 off = iemNativeEmitFetchGregU32SxU64(pReNative, off, a_u32Dst, a_iGReg)
4629
4630/** Emits code for IEM_MC_FETCH_GREG_U32. */
4631DECL_INLINE_THROW(uint32_t)
4632iemNativeEmitFetchGregU32SxU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
4633{
4634 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4635 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
4636 Assert(iGReg < 16);
4637
4638 /*
4639 * We can either just load the low 32-bit of the GPR into a host register
4640 * for the variable, or we can do so via a shadow copy host register. The
4641 * latter will avoid having to reload it if it's being stored later, but
4642 * will waste a host register if it isn't touched again. Since we don't
4643 * know what going to happen, we choose the latter for now.
4644 */
4645 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4646 kIemNativeGstRegUse_ReadOnly);
4647
4648 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4649 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4650 off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
4651 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4652
4653 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4654 return off;
4655}
4656
4657
4658#define IEM_MC_FETCH_GREG_U64(a_u64Dst, a_iGReg) \
4659 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
4660
4661#define IEM_MC_FETCH_GREG_U64_ZX_U64(a_u64Dst, a_iGReg) \
4662 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
4663
4664/** Emits code for IEM_MC_FETCH_GREG_U64 (and the
4665 * IEM_MC_FETCH_GREG_U64_ZX_U64 alias). */
4666DECL_INLINE_THROW(uint32_t)
4667iemNativeEmitFetchGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
4668{
4669 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4670 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
4671 Assert(iGReg < 16);
4672
4673 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4674 kIemNativeGstRegUse_ReadOnly);
4675
4676 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4677 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4678 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxVarReg, idxGstFullReg);
4679 /** @todo name the register a shadow one already? */
4680 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4681
4682 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4683 return off;
4684}
4685
4686
4687#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4688#define IEM_MC_FETCH_GREG_PAIR_U64(a_u128Dst, a_iGRegLo, a_iGRegHi) \
4689 off = iemNativeEmitFetchGregPairU64(pReNative, off, a_u128Dst, a_iGRegLo, a_iGRegHi)
4690
4691/** Emits code for IEM_MC_FETCH_GREG_PAIR_U64. */
4692DECL_INLINE_THROW(uint32_t)
4693iemNativeEmitFetchGregPairU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegLo, uint8_t iGRegHi)
4694{
4695 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4696 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
4697 Assert(iGRegLo < 16 && iGRegHi < 16);
4698
4699 uint8_t const idxGstFullRegLo = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegLo),
4700 kIemNativeGstRegUse_ReadOnly);
4701 uint8_t const idxGstFullRegHi = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegHi),
4702 kIemNativeGstRegUse_ReadOnly);
4703
4704 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4705 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
4706 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxVarReg, idxGstFullRegLo, 0);
4707 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxVarReg, idxGstFullRegHi, 1);
4708
4709 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
4710 iemNativeRegFreeTmp(pReNative, idxGstFullRegLo);
4711 iemNativeRegFreeTmp(pReNative, idxGstFullRegHi);
4712 return off;
4713}
4714#endif
4715
4716
4717/*********************************************************************************************************************************
4718* Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX). *
4719*********************************************************************************************************************************/
4720
4721#define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
4722 off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
4723
4724/** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
4725DECL_INLINE_THROW(uint32_t)
4726iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
4727{
4728 Assert(iGRegEx < 20);
4729 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
4730 kIemNativeGstRegUse_ForUpdate);
4731#ifdef RT_ARCH_AMD64
4732 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
4733
4734 /* To the lowest byte of the register: mov r8, imm8 */
4735 if (iGRegEx < 16)
4736 {
4737 if (idxGstTmpReg >= 8)
4738 pbCodeBuf[off++] = X86_OP_REX_B;
4739 else if (idxGstTmpReg >= 4)
4740 pbCodeBuf[off++] = X86_OP_REX;
4741 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
4742 pbCodeBuf[off++] = u8Value;
4743 }
4744 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
4745 else if (idxGstTmpReg < 4)
4746 {
4747 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
4748 pbCodeBuf[off++] = u8Value;
4749 }
4750 else
4751 {
4752 /* ror reg64, 8 */
4753 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4754 pbCodeBuf[off++] = 0xc1;
4755 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4756 pbCodeBuf[off++] = 8;
4757
4758 /* mov reg8, imm8 */
4759 if (idxGstTmpReg >= 8)
4760 pbCodeBuf[off++] = X86_OP_REX_B;
4761 else if (idxGstTmpReg >= 4)
4762 pbCodeBuf[off++] = X86_OP_REX;
4763 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
4764 pbCodeBuf[off++] = u8Value;
4765
4766 /* rol reg64, 8 */
4767 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4768 pbCodeBuf[off++] = 0xc1;
4769 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4770 pbCodeBuf[off++] = 8;
4771 }
4772
4773#elif defined(RT_ARCH_ARM64)
4774 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
4775 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4776 if (iGRegEx < 16)
4777 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
4778 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
4779 else
4780 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
4781 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
4782 iemNativeRegFreeTmp(pReNative, idxImmReg);
4783
4784#else
4785# error "Port me!"
4786#endif
4787
4788 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4789
4790#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4791 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
4792#endif
4793
4794 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4795 return off;
4796}
4797
4798
4799#define IEM_MC_STORE_GREG_U8_THREADED(a_iGRegEx, a_u8Value) \
4800 off = iemNativeEmitStoreGregU8(pReNative, off, a_iGRegEx, a_u8Value)
4801
4802/** Emits code for IEM_MC_STORE_GREG_U8_THREADED. */
4803DECL_INLINE_THROW(uint32_t)
4804iemNativeEmitStoreGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t idxValueVar)
4805{
4806 Assert(iGRegEx < 20);
4807 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
4808
4809 /*
4810 * If it's a constant value (unlikely) we treat this as a
4811 * IEM_MC_STORE_GREG_U8_CONST statement.
4812 */
4813 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
4814 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
4815 { /* likely */ }
4816 else
4817 {
4818 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
4819 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4820 return iemNativeEmitStoreGregU8Const(pReNative, off, iGRegEx, (uint8_t)pValueVar->u.uValue);
4821 }
4822
4823 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
4824 kIemNativeGstRegUse_ForUpdate);
4825 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
4826
4827#ifdef RT_ARCH_AMD64
4828 /* To the lowest byte of the register: mov reg8, reg8(r/m) */
4829 if (iGRegEx < 16)
4830 {
4831 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4832 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
4833 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
4834 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
4835 pbCodeBuf[off++] = X86_OP_REX;
4836 pbCodeBuf[off++] = 0x8a;
4837 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
4838 }
4839 /* Otherwise it's to ah, ch, dh or bh from al, cl, dl or bl: use mov r8, r8 if we can, otherwise, we rotate. */
4840 else if (idxGstTmpReg < 4 && idxVarReg < 4)
4841 {
4842 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2+1);
4843 pbCodeBuf[off++] = 0x8a;
4844 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg + 4, idxVarReg);
4845 }
4846 else
4847 {
4848 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
4849
4850 /* ror reg64, 8 */
4851 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4852 pbCodeBuf[off++] = 0xc1;
4853 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4854 pbCodeBuf[off++] = 8;
4855
4856 /* mov reg8, reg8(r/m) */
4857 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
4858 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
4859 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
4860 pbCodeBuf[off++] = X86_OP_REX;
4861 pbCodeBuf[off++] = 0x8a;
4862 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
4863
4864 /* rol reg64, 8 */
4865 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4866 pbCodeBuf[off++] = 0xc1;
4867 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4868 pbCodeBuf[off++] = 8;
4869 }
4870
4871#elif defined(RT_ARCH_ARM64)
4872 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 7:0.
4873 or
4874 bfi w1, w2, 8, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 15:8. */
4875 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4876 if (iGRegEx < 16)
4877 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 8);
4878 else
4879 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 8, 8);
4880
4881#else
4882# error "Port me!"
4883#endif
4884 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4885
4886 iemNativeVarRegisterRelease(pReNative, idxValueVar);
4887
4888#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4889 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
4890#endif
4891 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4892 return off;
4893}
4894
4895
4896
4897#define IEM_MC_STORE_GREG_U16_CONST(a_iGReg, a_u16Const) \
4898 off = iemNativeEmitStoreGregU16Const(pReNative, off, a_iGReg, a_u16Const)
4899
4900/** Emits code for IEM_MC_STORE_GREG_U16. */
4901DECL_INLINE_THROW(uint32_t)
4902iemNativeEmitStoreGregU16Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint16_t uValue)
4903{
4904 Assert(iGReg < 16);
4905 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4906 kIemNativeGstRegUse_ForUpdate);
4907#ifdef RT_ARCH_AMD64
4908 /* mov reg16, imm16 */
4909 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
4910 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4911 if (idxGstTmpReg >= 8)
4912 pbCodeBuf[off++] = X86_OP_REX_B;
4913 pbCodeBuf[off++] = 0xb8 + (idxGstTmpReg & 7);
4914 pbCodeBuf[off++] = RT_BYTE1(uValue);
4915 pbCodeBuf[off++] = RT_BYTE2(uValue);
4916
4917#elif defined(RT_ARCH_ARM64)
4918 /* movk xdst, #uValue, lsl #0 */
4919 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4920 pu32CodeBuf[off++] = Armv8A64MkInstrMovK(idxGstTmpReg, uValue);
4921
4922#else
4923# error "Port me!"
4924#endif
4925
4926 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4927
4928#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4929 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4930#endif
4931 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4932 return off;
4933}
4934
4935
4936#define IEM_MC_STORE_GREG_U16(a_iGReg, a_u16Value) \
4937 off = iemNativeEmitStoreGregU16(pReNative, off, a_iGReg, a_u16Value)
4938
4939/** Emits code for IEM_MC_STORE_GREG_U16. */
4940DECL_INLINE_THROW(uint32_t)
4941iemNativeEmitStoreGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
4942{
4943 Assert(iGReg < 16);
4944 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
4945
4946 /*
4947 * If it's a constant value (unlikely) we treat this as a
4948 * IEM_MC_STORE_GREG_U16_CONST statement.
4949 */
4950 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
4951 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
4952 { /* likely */ }
4953 else
4954 {
4955 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
4956 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4957 return iemNativeEmitStoreGregU16Const(pReNative, off, iGReg, (uint16_t)pValueVar->u.uValue);
4958 }
4959
4960 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4961 kIemNativeGstRegUse_ForUpdate);
4962
4963#ifdef RT_ARCH_AMD64
4964 /* mov reg16, reg16 or [mem16] */
4965 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
4966 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4967 if (pValueVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
4968 {
4969 if (idxGstTmpReg >= 8 || pValueVar->idxReg >= 8)
4970 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0)
4971 | (pValueVar->idxReg >= 8 ? X86_OP_REX_B : 0);
4972 pbCodeBuf[off++] = 0x8b;
4973 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, pValueVar->idxReg & 7);
4974 }
4975 else
4976 {
4977 uint8_t const idxStackSlot = pValueVar->idxStackSlot;
4978 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
4979 if (idxGstTmpReg >= 8)
4980 pbCodeBuf[off++] = X86_OP_REX_R;
4981 pbCodeBuf[off++] = 0x8b;
4982 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, idxGstTmpReg, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
4983 }
4984
4985#elif defined(RT_ARCH_ARM64)
4986 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
4987 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
4988 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4989 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 16);
4990 iemNativeVarRegisterRelease(pReNative, idxValueVar);
4991
4992#else
4993# error "Port me!"
4994#endif
4995
4996 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4997
4998#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4999 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5000#endif
5001 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5002 return off;
5003}
5004
5005
5006#define IEM_MC_STORE_GREG_U32_CONST(a_iGReg, a_u32Const) \
5007 off = iemNativeEmitStoreGregU32Const(pReNative, off, a_iGReg, a_u32Const)
5008
5009/** Emits code for IEM_MC_STORE_GREG_U32_CONST. */
5010DECL_INLINE_THROW(uint32_t)
5011iemNativeEmitStoreGregU32Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint32_t uValue)
5012{
5013 Assert(iGReg < 16);
5014 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5015 kIemNativeGstRegUse_ForFullWrite);
5016 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
5017#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5018 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5019#endif
5020 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5021 return off;
5022}
5023
5024
5025#define IEM_MC_STORE_GREG_U32(a_iGReg, a_u32Value) \
5026 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_u32Value)
5027
5028#define IEM_MC_STORE_GREG_I32(a_iGReg, a_i32Value) \
5029 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_i32Value)
5030
5031/** Emits code for IEM_MC_STORE_GREG_U32/IEM_MC_STORE_GREG_I32. */
5032DECL_INLINE_THROW(uint32_t)
5033iemNativeEmitStoreGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
5034{
5035 Assert(iGReg < 16);
5036 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
5037
5038 /*
5039 * If it's a constant value (unlikely) we treat this as a
5040 * IEM_MC_STORE_GREG_U32_CONST statement.
5041 */
5042 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
5043 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
5044 { /* likely */ }
5045 else
5046 {
5047 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
5048 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
5049 return iemNativeEmitStoreGregU32Const(pReNative, off, iGReg, (uint32_t)pValueVar->u.uValue);
5050 }
5051
5052 /*
5053 * For the rest we allocate a guest register for the variable and writes
5054 * it to the CPUMCTX structure.
5055 */
5056 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
5057#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5058 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5059#else
5060 RT_NOREF(idxVarReg);
5061#endif
5062#ifdef VBOX_STRICT
5063 off = iemNativeEmitTop32BitsClearCheck(pReNative, off, idxVarReg);
5064#endif
5065 iemNativeVarRegisterRelease(pReNative, idxValueVar);
5066 return off;
5067}
5068
5069
5070#define IEM_MC_STORE_GREG_U64_CONST(a_iGReg, a_u64Const) \
5071 off = iemNativeEmitStoreGregU64Const(pReNative, off, a_iGReg, a_u64Const)
5072
5073/** Emits code for IEM_MC_STORE_GREG_U64_CONST. */
5074DECL_INLINE_THROW(uint32_t)
5075iemNativeEmitStoreGregU64Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uValue)
5076{
5077 Assert(iGReg < 16);
5078 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5079 kIemNativeGstRegUse_ForFullWrite);
5080 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
5081#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5082 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5083#endif
5084 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5085 return off;
5086}
5087
5088
5089#define IEM_MC_STORE_GREG_U64(a_iGReg, a_u64Value) \
5090 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_u64Value)
5091
5092#define IEM_MC_STORE_GREG_I64(a_iGReg, a_i64Value) \
5093 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_i64Value)
5094
5095/** Emits code for IEM_MC_STORE_GREG_U64. */
5096DECL_INLINE_THROW(uint32_t)
5097iemNativeEmitStoreGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
5098{
5099 Assert(iGReg < 16);
5100 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
5101
5102 /*
5103 * If it's a constant value (unlikely) we treat this as a
5104 * IEM_MC_STORE_GREG_U64_CONST statement.
5105 */
5106 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
5107 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
5108 { /* likely */ }
5109 else
5110 {
5111 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
5112 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
5113 return iemNativeEmitStoreGregU64Const(pReNative, off, iGReg, pValueVar->u.uValue);
5114 }
5115
5116 /*
5117 * For the rest we allocate a guest register for the variable and writes
5118 * it to the CPUMCTX structure.
5119 */
5120 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
5121#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5122 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5123#else
5124 RT_NOREF(idxVarReg);
5125#endif
5126 iemNativeVarRegisterRelease(pReNative, idxValueVar);
5127 return off;
5128}
5129
5130
5131#define IEM_MC_CLEAR_HIGH_GREG_U64(a_iGReg) \
5132 off = iemNativeEmitClearHighGregU64(pReNative, off, a_iGReg)
5133
5134/** Emits code for IEM_MC_CLEAR_HIGH_GREG_U64. */
5135DECL_INLINE_THROW(uint32_t)
5136iemNativeEmitClearHighGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg)
5137{
5138 Assert(iGReg < 16);
5139 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5140 kIemNativeGstRegUse_ForUpdate);
5141 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxGstTmpReg, idxGstTmpReg);
5142#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5143 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5144#endif
5145 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5146 return off;
5147}
5148
5149
5150#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5151#define IEM_MC_STORE_GREG_PAIR_U64(a_iGRegLo, a_iGRegHi, a_u128Value) \
5152 off = iemNativeEmitStoreGregPairU64(pReNative, off, a_iGRegLo, a_iGRegHi, a_u128Value)
5153
5154/** Emits code for IEM_MC_FETCH_GREG_PAIR_U64. */
5155DECL_INLINE_THROW(uint32_t)
5156iemNativeEmitStoreGregPairU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegLo, uint8_t iGRegHi, uint8_t idxDstVar)
5157{
5158 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
5159 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
5160 Assert(iGRegLo < 16 && iGRegHi < 16);
5161
5162 uint8_t const idxGstFullRegLo = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegLo),
5163 kIemNativeGstRegUse_ForFullWrite);
5164 uint8_t const idxGstFullRegHi = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegHi),
5165 kIemNativeGstRegUse_ForFullWrite);
5166
5167 iemNativeVarSetKindToStack(pReNative, idxDstVar);
5168 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
5169 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxGstFullRegLo, idxVarReg, 0);
5170 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxGstFullRegHi, idxVarReg, 1);
5171
5172 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
5173 iemNativeRegFreeTmp(pReNative, idxGstFullRegLo);
5174 iemNativeRegFreeTmp(pReNative, idxGstFullRegHi);
5175 return off;
5176}
5177#endif
5178
5179
5180/*********************************************************************************************************************************
5181* General purpose register manipulation (add, sub). *
5182*********************************************************************************************************************************/
5183
5184#define IEM_MC_ADD_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
5185 off = iemNativeEmitAddGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
5186
5187/** Emits code for IEM_MC_ADD_GREG_U16. */
5188DECL_INLINE_THROW(uint32_t)
5189iemNativeEmitAddGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend)
5190{
5191 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5192 kIemNativeGstRegUse_ForUpdate);
5193
5194#ifdef RT_ARCH_AMD64
5195 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
5196 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5197 if (idxGstTmpReg >= 8)
5198 pbCodeBuf[off++] = X86_OP_REX_B;
5199 if (uAddend == 1)
5200 {
5201 pbCodeBuf[off++] = 0xff; /* inc */
5202 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5203 }
5204 else
5205 {
5206 pbCodeBuf[off++] = 0x81;
5207 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5208 pbCodeBuf[off++] = uAddend;
5209 pbCodeBuf[off++] = 0;
5210 }
5211
5212#else
5213 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5214 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5215
5216 /* sub tmp, gstgrp, uAddend */
5217 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxTmpReg, idxGstTmpReg, uAddend, false /*f64Bit*/);
5218
5219 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
5220 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
5221
5222 iemNativeRegFreeTmp(pReNative, idxTmpReg);
5223#endif
5224
5225 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5226
5227#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5228 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5229#endif
5230
5231 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5232 return off;
5233}
5234
5235
5236#define IEM_MC_ADD_GREG_U32(a_iGReg, a_u8Const) \
5237 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
5238
5239#define IEM_MC_ADD_GREG_U64(a_iGReg, a_u8Const) \
5240 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
5241
5242/** Emits code for IEM_MC_ADD_GREG_U32 and IEM_MC_ADD_GREG_U64. */
5243DECL_INLINE_THROW(uint32_t)
5244iemNativeEmitAddGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend, bool f64Bit)
5245{
5246 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5247 kIemNativeGstRegUse_ForUpdate);
5248
5249#ifdef RT_ARCH_AMD64
5250 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
5251 if (f64Bit)
5252 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
5253 else if (idxGstTmpReg >= 8)
5254 pbCodeBuf[off++] = X86_OP_REX_B;
5255 if (uAddend == 1)
5256 {
5257 pbCodeBuf[off++] = 0xff; /* inc */
5258 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5259 }
5260 else if (uAddend < 128)
5261 {
5262 pbCodeBuf[off++] = 0x83; /* add */
5263 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5264 pbCodeBuf[off++] = RT_BYTE1(uAddend);
5265 }
5266 else
5267 {
5268 pbCodeBuf[off++] = 0x81; /* add */
5269 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5270 pbCodeBuf[off++] = RT_BYTE1(uAddend);
5271 pbCodeBuf[off++] = 0;
5272 pbCodeBuf[off++] = 0;
5273 pbCodeBuf[off++] = 0;
5274 }
5275
5276#else
5277 /* sub tmp, gstgrp, uAddend */
5278 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5279 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGstTmpReg, idxGstTmpReg, uAddend, f64Bit);
5280
5281#endif
5282
5283 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5284
5285#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5286 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5287#endif
5288
5289 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5290 return off;
5291}
5292
5293
5294
5295#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
5296 off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
5297
5298/** Emits code for IEM_MC_SUB_GREG_U16. */
5299DECL_INLINE_THROW(uint32_t)
5300iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
5301{
5302 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5303 kIemNativeGstRegUse_ForUpdate);
5304
5305#ifdef RT_ARCH_AMD64
5306 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
5307 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5308 if (idxGstTmpReg >= 8)
5309 pbCodeBuf[off++] = X86_OP_REX_B;
5310 if (uSubtrahend == 1)
5311 {
5312 pbCodeBuf[off++] = 0xff; /* dec */
5313 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
5314 }
5315 else
5316 {
5317 pbCodeBuf[off++] = 0x81;
5318 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
5319 pbCodeBuf[off++] = uSubtrahend;
5320 pbCodeBuf[off++] = 0;
5321 }
5322
5323#else
5324 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5325 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5326
5327 /* sub tmp, gstgrp, uSubtrahend */
5328 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
5329
5330 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
5331 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
5332
5333 iemNativeRegFreeTmp(pReNative, idxTmpReg);
5334#endif
5335
5336 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5337
5338#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5339 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5340#endif
5341
5342 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5343 return off;
5344}
5345
5346
5347#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
5348 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
5349
5350#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
5351 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
5352
5353/** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
5354DECL_INLINE_THROW(uint32_t)
5355iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
5356{
5357 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5358 kIemNativeGstRegUse_ForUpdate);
5359
5360#ifdef RT_ARCH_AMD64
5361 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
5362 if (f64Bit)
5363 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
5364 else if (idxGstTmpReg >= 8)
5365 pbCodeBuf[off++] = X86_OP_REX_B;
5366 if (uSubtrahend == 1)
5367 {
5368 pbCodeBuf[off++] = 0xff; /* dec */
5369 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
5370 }
5371 else if (uSubtrahend < 128)
5372 {
5373 pbCodeBuf[off++] = 0x83; /* sub */
5374 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
5375 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
5376 }
5377 else
5378 {
5379 pbCodeBuf[off++] = 0x81; /* sub */
5380 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
5381 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
5382 pbCodeBuf[off++] = 0;
5383 pbCodeBuf[off++] = 0;
5384 pbCodeBuf[off++] = 0;
5385 }
5386
5387#else
5388 /* sub tmp, gstgrp, uSubtrahend */
5389 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5390 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
5391
5392#endif
5393
5394 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5395
5396#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5397 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5398#endif
5399
5400 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5401 return off;
5402}
5403
5404
5405#define IEM_MC_AND_GREG_U8(a_iGReg, a_u8Mask) \
5406 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u8Mask, sizeof(uint8_t))
5407
5408#define IEM_MC_AND_GREG_U16(a_iGReg, a_u16Mask) \
5409 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u16Mask, sizeof(uint16_t))
5410
5411#define IEM_MC_AND_GREG_U32(a_iGReg, a_u32Mask) \
5412 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u32Mask, sizeof(uint32_t))
5413
5414#define IEM_MC_AND_GREG_U64(a_iGReg, a_u64Mask) \
5415 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u64Mask, sizeof(uint64_t))
5416
5417/** Emits code for IEM_MC_AND_GREG_U8, IEM_MC_AND_GREG_U16, IEM_MC_AND_GREG_U32 and IEM_MC_AND_GREG_U64. */
5418DECL_INLINE_THROW(uint32_t)
5419iemNativeEmitAndGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uMask, uint8_t cbMask)
5420{
5421#ifdef VBOX_STRICT
5422 switch (cbMask)
5423 {
5424 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
5425 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
5426 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
5427 case sizeof(uint64_t): break;
5428 default: AssertFailedBreak();
5429 }
5430#endif
5431
5432 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5433 kIemNativeGstRegUse_ForUpdate);
5434
5435 switch (cbMask)
5436 {
5437 case sizeof(uint8_t): /* Leaves the higher bits untouched. */
5438 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask | UINT64_C(0xffffffffffffff00));
5439 break;
5440 case sizeof(uint16_t): /* Leaves the higher bits untouched. */
5441 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask | UINT64_C(0xffffffffffff0000));
5442 break;
5443 case sizeof(uint32_t): /* Zeroes the high 32 bits of the guest register. */
5444 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxGstTmpReg, uMask);
5445 break;
5446 case sizeof(uint64_t):
5447 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask);
5448 break;
5449 default: AssertFailedBreak();
5450 }
5451
5452 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5453
5454#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5455 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5456#endif
5457
5458 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5459 return off;
5460}
5461
5462
5463#define IEM_MC_OR_GREG_U8(a_iGReg, a_u8Mask) \
5464 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u8Mask, sizeof(uint8_t))
5465
5466#define IEM_MC_OR_GREG_U16(a_iGReg, a_u16Mask) \
5467 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u16Mask, sizeof(uint16_t))
5468
5469#define IEM_MC_OR_GREG_U32(a_iGReg, a_u32Mask) \
5470 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u32Mask, sizeof(uint32_t))
5471
5472#define IEM_MC_OR_GREG_U64(a_iGReg, a_u64Mask) \
5473 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u64Mask, sizeof(uint64_t))
5474
5475/** Emits code for IEM_MC_OR_GREG_U8, IEM_MC_OR_GREG_U16, IEM_MC_OR_GREG_U32 and IEM_MC_OR_GREG_U64. */
5476DECL_INLINE_THROW(uint32_t)
5477iemNativeEmitOrGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uMask, uint8_t cbMask)
5478{
5479#ifdef VBOX_STRICT
5480 switch (cbMask)
5481 {
5482 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
5483 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
5484 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
5485 case sizeof(uint64_t): break;
5486 default: AssertFailedBreak();
5487 }
5488#endif
5489
5490 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5491 kIemNativeGstRegUse_ForUpdate);
5492
5493 switch (cbMask)
5494 {
5495 case sizeof(uint8_t): /* Leaves the higher bits untouched. */
5496 case sizeof(uint16_t):
5497 case sizeof(uint64_t):
5498 off = iemNativeEmitOrGprByImm(pReNative, off, idxGstTmpReg, uMask);
5499 break;
5500 case sizeof(uint32_t): /* Zeroes the high 32 bits of the guest register. */
5501 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxGstTmpReg, uMask);
5502 break;
5503 default: AssertFailedBreak();
5504 }
5505
5506 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5507
5508#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5509 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5510#endif
5511
5512 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5513 return off;
5514}
5515
5516
5517/*********************************************************************************************************************************
5518* Local/Argument variable manipulation (add, sub, and, or). *
5519*********************************************************************************************************************************/
5520
5521#define IEM_MC_AND_LOCAL_U8(a_u8Local, a_u8Mask) \
5522 off = iemNativeEmitAndLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
5523
5524#define IEM_MC_AND_LOCAL_U16(a_u16Local, a_u16Mask) \
5525 off = iemNativeEmitAndLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
5526
5527#define IEM_MC_AND_LOCAL_U32(a_u32Local, a_u32Mask) \
5528 off = iemNativeEmitAndLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
5529
5530#define IEM_MC_AND_LOCAL_U64(a_u64Local, a_u64Mask) \
5531 off = iemNativeEmitAndLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
5532
5533
5534#define IEM_MC_AND_ARG_U16(a_u16Arg, a_u16Mask) \
5535 off = iemNativeEmitAndLocal(pReNative, off, a_u16Arg, a_u16Mask, sizeof(uint16_t))
5536
5537#define IEM_MC_AND_ARG_U32(a_u32Arg, a_u32Mask) \
5538 off = iemNativeEmitAndLocal(pReNative, off, a_u32Arg, a_u32Mask, sizeof(uint32_t))
5539
5540#define IEM_MC_AND_ARG_U64(a_u64Arg, a_u64Mask) \
5541 off = iemNativeEmitAndLocal(pReNative, off, a_u64Arg, a_u64Mask, sizeof(uint64_t))
5542
5543/** Emits code for AND'ing a local and a constant value. */
5544DECL_INLINE_THROW(uint32_t)
5545iemNativeEmitAndLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
5546{
5547#ifdef VBOX_STRICT
5548 switch (cbMask)
5549 {
5550 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
5551 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
5552 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
5553 case sizeof(uint64_t): break;
5554 default: AssertFailedBreak();
5555 }
5556#endif
5557
5558 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
5559 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
5560
5561 if (cbMask <= sizeof(uint32_t))
5562 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg, uMask);
5563 else
5564 off = iemNativeEmitAndGprByImm(pReNative, off, idxVarReg, uMask);
5565
5566 iemNativeVarRegisterRelease(pReNative, idxVar);
5567 return off;
5568}
5569
5570
5571#define IEM_MC_OR_LOCAL_U8(a_u8Local, a_u8Mask) \
5572 off = iemNativeEmitOrLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
5573
5574#define IEM_MC_OR_LOCAL_U16(a_u16Local, a_u16Mask) \
5575 off = iemNativeEmitOrLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
5576
5577#define IEM_MC_OR_LOCAL_U32(a_u32Local, a_u32Mask) \
5578 off = iemNativeEmitOrLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
5579
5580#define IEM_MC_OR_LOCAL_U64(a_u64Local, a_u64Mask) \
5581 off = iemNativeEmitOrLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
5582
5583/** Emits code for OR'ing a local and a constant value. */
5584DECL_INLINE_THROW(uint32_t)
5585iemNativeEmitOrLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
5586{
5587#ifdef VBOX_STRICT
5588 switch (cbMask)
5589 {
5590 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
5591 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
5592 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
5593 case sizeof(uint64_t): break;
5594 default: AssertFailedBreak();
5595 }
5596#endif
5597
5598 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
5599 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
5600
5601 if (cbMask <= sizeof(uint32_t))
5602 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxVarReg, uMask);
5603 else
5604 off = iemNativeEmitOrGprByImm(pReNative, off, idxVarReg, uMask);
5605
5606 iemNativeVarRegisterRelease(pReNative, idxVar);
5607 return off;
5608}
5609
5610
5611#define IEM_MC_BSWAP_LOCAL_U16(a_u16Local) \
5612 off = iemNativeEmitBswapLocal(pReNative, off, a_u16Local, sizeof(uint16_t))
5613
5614#define IEM_MC_BSWAP_LOCAL_U32(a_u32Local) \
5615 off = iemNativeEmitBswapLocal(pReNative, off, a_u32Local, sizeof(uint32_t))
5616
5617#define IEM_MC_BSWAP_LOCAL_U64(a_u64Local) \
5618 off = iemNativeEmitBswapLocal(pReNative, off, a_u64Local, sizeof(uint64_t))
5619
5620/** Emits code for reversing the byte order in a local value. */
5621DECL_INLINE_THROW(uint32_t)
5622iemNativeEmitBswapLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal)
5623{
5624 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
5625 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
5626
5627 switch (cbLocal)
5628 {
5629 case sizeof(uint16_t): off = iemNativeEmitBswapGpr16(pReNative, off, idxVarReg); break;
5630 case sizeof(uint32_t): off = iemNativeEmitBswapGpr32(pReNative, off, idxVarReg); break;
5631 case sizeof(uint64_t): off = iemNativeEmitBswapGpr(pReNative, off, idxVarReg); break;
5632 default: AssertFailedBreak();
5633 }
5634
5635 iemNativeVarRegisterRelease(pReNative, idxVar);
5636 return off;
5637}
5638
5639
5640#define IEM_MC_SHL_LOCAL_S16(a_i16Local, a_cShift) \
5641 off = iemNativeEmitShlLocal(pReNative, off, a_i16Local, sizeof(int16_t), a_cShift)
5642
5643#define IEM_MC_SHL_LOCAL_S32(a_i32Local, a_cShift) \
5644 off = iemNativeEmitShlLocal(pReNative, off, a_i32Local, sizeof(int32_t), a_cShift)
5645
5646#define IEM_MC_SHL_LOCAL_S64(a_i64Local, a_cShift) \
5647 off = iemNativeEmitShlLocal(pReNative, off, a_i64Local, sizeof(int64_t), a_cShift)
5648
5649/** Emits code for shifting left a local value. */
5650DECL_INLINE_THROW(uint32_t)
5651iemNativeEmitShlLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal, uint8_t cShift)
5652{
5653#ifdef VBOX_STRICT
5654 switch (cbLocal)
5655 {
5656 case sizeof(uint8_t): Assert(cShift < 8); break;
5657 case sizeof(uint16_t): Assert(cShift < 16); break;
5658 case sizeof(uint32_t): Assert(cShift < 32); break;
5659 case sizeof(uint64_t): Assert(cShift < 64); break;
5660 default: AssertFailedBreak();
5661 }
5662#endif
5663
5664 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
5665 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
5666
5667 if (cbLocal <= sizeof(uint32_t))
5668 {
5669 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxVarReg, cShift);
5670 if (cbLocal < sizeof(uint32_t))
5671 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg,
5672 cbLocal == sizeof(uint16_t)
5673 ? UINT32_C(0xffff)
5674 : UINT32_C(0xff));
5675 }
5676 else
5677 off = iemNativeEmitShiftGprLeft(pReNative, off, idxVarReg, cShift);
5678
5679 iemNativeVarRegisterRelease(pReNative, idxVar);
5680 return off;
5681}
5682
5683
5684#define IEM_MC_SAR_LOCAL_S16(a_i16Local, a_cShift) \
5685 off = iemNativeEmitSarLocal(pReNative, off, a_i16Local, sizeof(int16_t), a_cShift)
5686
5687#define IEM_MC_SAR_LOCAL_S32(a_i32Local, a_cShift) \
5688 off = iemNativeEmitSarLocal(pReNative, off, a_i32Local, sizeof(int32_t), a_cShift)
5689
5690#define IEM_MC_SAR_LOCAL_S64(a_i64Local, a_cShift) \
5691 off = iemNativeEmitSarLocal(pReNative, off, a_i64Local, sizeof(int64_t), a_cShift)
5692
5693/** Emits code for shifting left a local value. */
5694DECL_INLINE_THROW(uint32_t)
5695iemNativeEmitSarLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal, uint8_t cShift)
5696{
5697#ifdef VBOX_STRICT
5698 switch (cbLocal)
5699 {
5700 case sizeof(int8_t): Assert(cShift < 8); break;
5701 case sizeof(int16_t): Assert(cShift < 16); break;
5702 case sizeof(int32_t): Assert(cShift < 32); break;
5703 case sizeof(int64_t): Assert(cShift < 64); break;
5704 default: AssertFailedBreak();
5705 }
5706#endif
5707
5708 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
5709 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
5710
5711 /* Need to sign extend the value first to make sure the sign is correct in the following arithmetic shift. */
5712 if (cbLocal == sizeof(uint8_t))
5713 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
5714 else if (cbLocal == sizeof(uint16_t))
5715 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxVarReg);
5716
5717 if (cbLocal <= sizeof(uint32_t))
5718 off = iemNativeEmitArithShiftGpr32Right(pReNative, off, idxVarReg, cShift);
5719 else
5720 off = iemNativeEmitArithShiftGprRight(pReNative, off, idxVarReg, cShift);
5721
5722 iemNativeVarRegisterRelease(pReNative, idxVar);
5723 return off;
5724}
5725
5726
5727#define IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR(a_EffAddr, a_i16) \
5728 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i16, sizeof(int16_t))
5729
5730#define IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR(a_EffAddr, a_i32) \
5731 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i32, sizeof(int32_t))
5732
5733#define IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR(a_EffAddr, a_i64) \
5734 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i64, sizeof(int64_t))
5735
5736/** Emits code for IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR/IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR/IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR. */
5737DECL_INLINE_THROW(uint32_t)
5738iemNativeEmitAddLocalToEffAddr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEffAddr, uint8_t idxVar, uint8_t cbLocal)
5739{
5740 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEffAddr);
5741 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEffAddr, sizeof(RTGCPTR));
5742 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
5743 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
5744
5745 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
5746 uint8_t const idxVarRegEffAddr = iemNativeVarRegisterAcquire(pReNative, idxVarEffAddr, &off, true /*fInitialized*/);
5747
5748 /* Need to sign extend the value. */
5749 if (cbLocal <= sizeof(uint32_t))
5750 {
5751/** @todo ARM64: In case of boredone, the extended add instruction can do the
5752 * conversion directly: ADD idxVarRegEffAddr, idxVarRegEffAddr, [w]idxVarReg, SXTH/SXTW */
5753 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
5754
5755 switch (cbLocal)
5756 {
5757 case sizeof(int16_t): off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxRegTmp, idxVarReg); break;
5758 case sizeof(int32_t): off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxRegTmp, idxVarReg); break;
5759 default: AssertFailed();
5760 }
5761
5762 off = iemNativeEmitAddTwoGprs(pReNative, off, idxVarRegEffAddr, idxRegTmp);
5763 iemNativeRegFreeTmp(pReNative, idxRegTmp);
5764 }
5765 else
5766 off = iemNativeEmitAddTwoGprs(pReNative, off, idxVarRegEffAddr, idxVarReg);
5767
5768 iemNativeVarRegisterRelease(pReNative, idxVarEffAddr);
5769 iemNativeVarRegisterRelease(pReNative, idxVar);
5770 return off;
5771}
5772
5773
5774
5775/*********************************************************************************************************************************
5776* EFLAGS *
5777*********************************************************************************************************************************/
5778
5779#if !defined(VBOX_WITH_STATISTICS) || !defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
5780# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) ((void)0)
5781#else
5782# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) \
5783 iemNativeEFlagsOptimizationStats(pReNative, a_fEflInput, a_fEflOutput)
5784
5785DECLINLINE(void) iemNativeEFlagsOptimizationStats(PIEMRECOMPILERSTATE pReNative, uint32_t fEflInput, uint32_t fEflOutput)
5786{
5787 if (fEflOutput)
5788 {
5789 PVMCPUCC const pVCpu = pReNative->pVCpu;
5790# ifndef IEMLIVENESS_EXTENDED_LAYOUT
5791 IEMLIVENESSBIT const LivenessBit0 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit0;
5792 IEMLIVENESSBIT const LivenessBit1 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit1;
5793 AssertCompile(IEMLIVENESS_STATE_CLOBBERED == 0);
5794# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
5795 if (fEflOutput & (a_fEfl)) \
5796 { \
5797 if (LivenessBit0.a_fLivenessMember | LivenessBit1.a_fLivenessMember) \
5798 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
5799 else \
5800 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
5801 } else do { } while (0)
5802# else
5803 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall];
5804 IEMLIVENESSBIT const LivenessClobbered =
5805 {
5806 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
5807 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_POTENTIAL_CALL].bm64
5808 | pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
5809 | pLivenessEntry->aBits[IEMLIVENESS_BIT_CALL].bm64)
5810 };
5811 IEMLIVENESSBIT const LivenessDelayable =
5812 {
5813 pLivenessEntry->aBits[IEMLIVENESS_BIT_POTENTIAL_CALL].bm64
5814 & pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
5815 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
5816 | pLivenessEntry->aBits[IEMLIVENESS_BIT_CALL].bm64)
5817 };
5818# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
5819 if (fEflOutput & (a_fEfl)) \
5820 { \
5821 if (LivenessClobbered.a_fLivenessMember) \
5822 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
5823 else if (LivenessDelayable.a_fLivenessMember) \
5824 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Delayable); \
5825 else \
5826 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
5827 } else do { } while (0)
5828# endif
5829 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_CF, fEflCf, StatNativeLivenessEflCf);
5830 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_PF, fEflPf, StatNativeLivenessEflPf);
5831 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_AF, fEflAf, StatNativeLivenessEflAf);
5832 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_ZF, fEflZf, StatNativeLivenessEflZf);
5833 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_SF, fEflSf, StatNativeLivenessEflSf);
5834 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_OF, fEflOf, StatNativeLivenessEflOf);
5835 //CHECK_FLAG_AND_UPDATE_STATS(~X86_EFL_STATUS_BITS, fEflOther, StatNativeLivenessEflOther);
5836# undef CHECK_FLAG_AND_UPDATE_STATS
5837 }
5838 RT_NOREF(fEflInput);
5839}
5840#endif /* VBOX_WITH_STATISTICS */
5841
5842#undef IEM_MC_FETCH_EFLAGS /* should not be used */
5843#define IEM_MC_FETCH_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5844 off = iemNativeEmitFetchEFlags(pReNative, off, a_EFlags, a_fEflInput, a_fEflOutput)
5845
5846/** Handles IEM_MC_FETCH_EFLAGS_EX. */
5847DECL_INLINE_THROW(uint32_t)
5848iemNativeEmitFetchEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags,
5849 uint32_t fEflInput, uint32_t fEflOutput)
5850{
5851 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
5852 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
5853 RT_NOREF(fEflInput, fEflOutput);
5854
5855#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
5856# ifdef VBOX_STRICT
5857 if ( pReNative->idxCurCall != 0
5858 && (fEflInput != 0 || fEflOutput != 0) /* for NOT these are both zero for now. */)
5859 {
5860 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall - 1];
5861 uint32_t const fBoth = fEflInput | fEflOutput;
5862# define ASSERT_ONE_EFL(a_fElfConst, a_idxField) \
5863 AssertMsg( !(fBoth & (a_fElfConst)) \
5864 || (!(fEflInput & (a_fElfConst)) \
5865 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
5866 : !(fEflOutput & (a_fElfConst)) \
5867 ? IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
5868 : IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) ), \
5869 ("%s - %u\n", #a_fElfConst, iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)))
5870 ASSERT_ONE_EFL(~(uint32_t)X86_EFL_STATUS_BITS, IEMLIVENESSBIT_IDX_EFL_OTHER);
5871 ASSERT_ONE_EFL(X86_EFL_CF, IEMLIVENESSBIT_IDX_EFL_CF);
5872 ASSERT_ONE_EFL(X86_EFL_PF, IEMLIVENESSBIT_IDX_EFL_PF);
5873 ASSERT_ONE_EFL(X86_EFL_AF, IEMLIVENESSBIT_IDX_EFL_AF);
5874 ASSERT_ONE_EFL(X86_EFL_ZF, IEMLIVENESSBIT_IDX_EFL_ZF);
5875 ASSERT_ONE_EFL(X86_EFL_SF, IEMLIVENESSBIT_IDX_EFL_SF);
5876 ASSERT_ONE_EFL(X86_EFL_OF, IEMLIVENESSBIT_IDX_EFL_OF);
5877# undef ASSERT_ONE_EFL
5878 }
5879# endif
5880#endif
5881
5882 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fEflInput);
5883
5884 /** @todo This could be prettier...*/
5885 /** @todo Also, the shadowing+liveness handling of EFlags is currently
5886 * problematic, but I'll try tackle that soon (@bugref{10720}). */
5887 PCIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarEFlags)];
5888 Assert(pVar->enmKind == kIemNativeVarKind_Invalid || pVar->enmKind == kIemNativeVarKind_Stack);
5889 Assert(pVar->idxReg == UINT8_MAX);
5890 if (pVar->uArgNo >= IEMNATIVE_CALL_ARG_GREG_COUNT)
5891 {
5892 /** @todo We could use kIemNativeGstRegUse_ReadOnly here when fOutput is
5893 * zero, but since iemNativeVarRegisterSet clears the shadowing,
5894 * that's counter productive... */
5895 uint8_t const idxGstReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
5896 kIemNativeGstRegUse_ForUpdate, false /*fNoVolatileRegs*/,
5897 true /** @todo EFlags shadowing+liveness weirdness (@bugref{10720}). */);
5898 iemNativeVarRegisterSet(pReNative, idxVarEFlags, idxGstReg, off, true /*fAllocated*/);
5899 }
5900 else
5901 {
5902 /* Register argument variable: Avoid assertions in generic call code and load it the traditional way. */
5903 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, false /*fInitialized*/);
5904 uint8_t const idxGstReg = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off, kIemNativeGstReg_EFlags);
5905 if (idxGstReg != UINT8_MAX)
5906 {
5907 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstReg);
5908 iemNativeRegFreeTmp(pReNative, idxGstReg);
5909 }
5910 else
5911 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxVarReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
5912 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
5913 }
5914 return off;
5915}
5916
5917
5918
5919/** @todo emit strict build assertions for IEM_MC_COMMIT_EFLAGS_EX when we
5920 * start using it with custom native code emission (inlining assembly
5921 * instruction helpers). */
5922#undef IEM_MC_COMMIT_EFLAGS /* should not be used */
5923#define IEM_MC_COMMIT_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5924 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
5925 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags, a_fEflOutput, true /*fUpdateSkipping*/)
5926
5927#undef IEM_MC_COMMIT_EFLAGS_OPT /* should not be used */
5928#define IEM_MC_COMMIT_EFLAGS_OPT_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5929 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
5930 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags, a_fEflOutput, false /*fUpdateSkipping*/)
5931
5932/** Handles IEM_MC_COMMIT_EFLAGS_EX. */
5933DECL_INLINE_THROW(uint32_t)
5934iemNativeEmitCommitEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags, uint32_t fEflOutput,
5935 bool fUpdateSkipping)
5936{
5937 RT_NOREF(fEflOutput);
5938 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, true /*fInitialized*/);
5939 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
5940
5941#ifdef VBOX_STRICT
5942 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RA1_MASK);
5943 uint32_t offFixup = off;
5944 off = iemNativeEmitJnzToFixed(pReNative, off, off);
5945 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2001));
5946 iemNativeFixupFixedJump(pReNative, offFixup, off);
5947
5948 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RAZ_MASK & CPUMX86EFLAGS_HW_MASK_32);
5949 offFixup = off;
5950 off = iemNativeEmitJzToFixed(pReNative, off, off);
5951 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2002));
5952 iemNativeFixupFixedJump(pReNative, offFixup, off);
5953
5954 /** @todo validate that only bits in the fElfOutput mask changed. */
5955#endif
5956
5957#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
5958 if (fUpdateSkipping)
5959 {
5960 if ((fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
5961 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
5962 else
5963 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~(fEflOutput & X86_EFL_STATUS_BITS),
5964 RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
5965 }
5966#else
5967 RT_NOREF_PV(fUpdateSkipping);
5968#endif
5969
5970 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
5971 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF_DYN(VMCPUCC, cpum.GstCtx.eflags));
5972 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
5973 return off;
5974}
5975
5976
5977typedef enum IEMNATIVEMITEFLOP
5978{
5979 kIemNativeEmitEflOp_Set,
5980 kIemNativeEmitEflOp_Clear,
5981 kIemNativeEmitEflOp_Flip
5982} IEMNATIVEMITEFLOP;
5983
5984#define IEM_MC_SET_EFL_BIT(a_fBit) \
5985 off = iemNativeEmitModifyEFlagsBit<kIemNativeEmitEflOp_Set>(pReNative, off, a_fBit)
5986
5987#define IEM_MC_CLEAR_EFL_BIT(a_fBit) \
5988 off = iemNativeEmitModifyEFlagsBit<kIemNativeEmitEflOp_Clear>(pReNative, off, a_fBit)
5989
5990#define IEM_MC_FLIP_EFL_BIT(a_fBit) \
5991 off = iemNativeEmitModifyEFlagsBit<kIemNativeEmitEflOp_Flip>(pReNative, off, a_fBit)
5992
5993/** Handles IEM_MC_SET_EFL_BIT/IEM_MC_CLEAR_EFL_BIT/IEM_MC_FLIP_EFL_BIT. */
5994template<IEMNATIVEMITEFLOP const a_enmOp>
5995DECL_INLINE_THROW(uint32_t) iemNativeEmitModifyEFlagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fEflBit)
5996{
5997 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
5998 kIemNativeGstRegUse_ForUpdate, false /*fNoVolatileRegs*/,
5999 true /*fSkipLivenessAssert*/); /** @todo proper liveness / eflags fix */
6000
6001 /* Using 'if constexpr' forces code elimination in debug builds with VC. */
6002 if RT_CONSTEXPR_IF(a_enmOp == kIemNativeEmitEflOp_Set)
6003 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxEflReg, fEflBit);
6004 else if RT_CONSTEXPR_IF(a_enmOp == kIemNativeEmitEflOp_Clear)
6005 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~fEflBit);
6006 else if RT_CONSTEXPR_IF(a_enmOp == kIemNativeEmitEflOp_Flip)
6007 off = iemNativeEmitXorGpr32ByImm(pReNative, off, idxEflReg, fEflBit);
6008 else
6009 AssertCompile( a_enmOp == kIemNativeEmitEflOp_Set /* AssertCompile(false) works with VC 2019 but not clang 15. */
6010 || a_enmOp == kIemNativeEmitEflOp_Clear
6011 || a_enmOp == kIemNativeEmitEflOp_Flip);
6012
6013 /** @todo No delayed writeback for EFLAGS right now. */
6014 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
6015
6016 /* Free but don't flush the EFLAGS register. */
6017 iemNativeRegFreeTmp(pReNative, idxEflReg);
6018
6019 return off;
6020}
6021
6022
6023/*********************************************************************************************************************************
6024* Emitters for segment register fetches (IEM_MC_FETCH_SREG_XXX).
6025*********************************************************************************************************************************/
6026
6027#define IEM_MC_FETCH_SREG_U16(a_u16Dst, a_iSReg) \
6028 off = iemNativeEmitFetchSReg(pReNative, off, a_u16Dst, a_iSReg, sizeof(uint16_t))
6029
6030#define IEM_MC_FETCH_SREG_ZX_U32(a_u32Dst, a_iSReg) \
6031 off = iemNativeEmitFetchSReg(pReNative, off, a_u32Dst, a_iSReg, sizeof(uint32_t))
6032
6033#define IEM_MC_FETCH_SREG_ZX_U64(a_u64Dst, a_iSReg) \
6034 off = iemNativeEmitFetchSReg(pReNative, off, a_u64Dst, a_iSReg, sizeof(uint64_t))
6035
6036
6037/** Emits code for IEM_MC_FETCH_SREG_U16, IEM_MC_FETCH_SREG_ZX_U32 and
6038 * IEM_MC_FETCH_SREG_ZX_U64. */
6039DECL_INLINE_THROW(uint32_t)
6040iemNativeEmitFetchSReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iSReg, int8_t cbVar)
6041{
6042 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
6043 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbVar); RT_NOREF(cbVar);
6044 Assert(iSReg < X86_SREG_COUNT);
6045
6046 /*
6047 * For now, we will not create a shadow copy of a selector. The rational
6048 * is that since we do not recompile the popping and loading of segment
6049 * registers and that the the IEM_MC_FETCH_SREG_U* MCs are only used for
6050 * pushing and moving to registers, there is only a small chance that the
6051 * shadow copy will be accessed again before the register is reloaded. One
6052 * scenario would be nested called in 16-bit code, but I doubt it's worth
6053 * the extra register pressure atm.
6054 *
6055 * What we really need first, though, is to combine iemNativeRegAllocTmpForGuestReg
6056 * and iemNativeVarRegisterAcquire for a load scenario. We only got the
6057 * store scencario covered at present (r160730).
6058 */
6059 iemNativeVarSetKindToStack(pReNative, idxDstVar);
6060 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
6061 off = iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aSRegs[iSReg].Sel));
6062 iemNativeVarRegisterRelease(pReNative, idxDstVar);
6063 return off;
6064}
6065
6066
6067
6068/*********************************************************************************************************************************
6069* Register references. *
6070*********************************************************************************************************************************/
6071
6072#define IEM_MC_REF_GREG_U8_THREADED(a_pu8Dst, a_iGRegEx) \
6073 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, false /*fConst*/)
6074
6075#define IEM_MC_REF_GREG_U8_CONST_THREADED(a_pu8Dst, a_iGRegEx) \
6076 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, true /*fConst*/)
6077
6078/** Handles IEM_MC_REF_GREG_U8[_CONST]. */
6079DECL_INLINE_THROW(uint32_t)
6080iemNativeEmitRefGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGRegEx, bool fConst)
6081{
6082 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
6083 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
6084 Assert(iGRegEx < 20);
6085
6086 if (iGRegEx < 16)
6087 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
6088 else
6089 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_GprHighByte, iGRegEx & 15);
6090
6091 /* If we've delayed writing back the register value, flush it now. */
6092 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
6093
6094 /* If it's not a const reference we need to flush the shadow copy of the register now. */
6095 if (!fConst)
6096 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGRegEx & 15)));
6097
6098 return off;
6099}
6100
6101#define IEM_MC_REF_GREG_U16(a_pu16Dst, a_iGReg) \
6102 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, false /*fConst*/)
6103
6104#define IEM_MC_REF_GREG_U16_CONST(a_pu16Dst, a_iGReg) \
6105 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, true /*fConst*/)
6106
6107#define IEM_MC_REF_GREG_U32(a_pu32Dst, a_iGReg) \
6108 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, false /*fConst*/)
6109
6110#define IEM_MC_REF_GREG_U32_CONST(a_pu32Dst, a_iGReg) \
6111 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, true /*fConst*/)
6112
6113#define IEM_MC_REF_GREG_I32(a_pi32Dst, a_iGReg) \
6114 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, false /*fConst*/)
6115
6116#define IEM_MC_REF_GREG_I32_CONST(a_pi32Dst, a_iGReg) \
6117 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, true /*fConst*/)
6118
6119#define IEM_MC_REF_GREG_U64(a_pu64Dst, a_iGReg) \
6120 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, false /*fConst*/)
6121
6122#define IEM_MC_REF_GREG_U64_CONST(a_pu64Dst, a_iGReg) \
6123 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, true /*fConst*/)
6124
6125#define IEM_MC_REF_GREG_I64(a_pi64Dst, a_iGReg) \
6126 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, false /*fConst*/)
6127
6128#define IEM_MC_REF_GREG_I64_CONST(a_pi64Dst, a_iGReg) \
6129 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, true /*fConst*/)
6130
6131/** Handles IEM_MC_REF_GREG_Uxx[_CONST] and IEM_MC_REF_GREG_Ixx[_CONST]. */
6132DECL_INLINE_THROW(uint32_t)
6133iemNativeEmitRefGregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGReg, bool fConst)
6134{
6135 Assert(iGReg < 16);
6136 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGReg);
6137 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
6138
6139 /* If we've delayed writing back the register value, flush it now. */
6140 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGReg);
6141
6142 /* If it's not a const reference we need to flush the shadow copy of the register now. */
6143 if (!fConst)
6144 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGReg)));
6145
6146 return off;
6147}
6148
6149
6150#undef IEM_MC_REF_EFLAGS /* should not be used. */
6151#define IEM_MC_REF_EFLAGS_EX(a_pEFlags, a_fEflInput, a_fEflOutput) \
6152 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
6153 off = iemNativeEmitRefEFlags(pReNative, off, a_pEFlags, a_fEflInput, a_fEflOutput)
6154
6155/** Handles IEM_MC_REF_EFLAGS. */
6156DECL_INLINE_THROW(uint32_t)
6157iemNativeEmitRefEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint32_t fEflInput, uint32_t fEflOutput)
6158{
6159 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_EFlags, 0);
6160 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
6161
6162#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
6163 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fEflInput);
6164
6165 /* Updating the skipping according to the outputs is a little early, but
6166 we don't have any other hooks for references atm. */
6167 if ((fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
6168 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
6169 else if (fEflOutput & X86_EFL_STATUS_BITS)
6170 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~(fEflOutput & X86_EFL_STATUS_BITS),
6171 RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
6172#else
6173 RT_NOREF(fEflInput, fEflOutput);
6174#endif
6175
6176 /* If we've delayed writing back the register value, flush it now. */
6177 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_EFlags, 0);
6178
6179 /* If there is a shadow copy of guest EFLAGS, flush it now. */
6180 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_EFlags));
6181
6182 return off;
6183}
6184
6185
6186/** @todo Emit code for IEM_MC_ASSERT_EFLAGS in strict builds? Once we emit
6187 * different code from threaded recompiler, maybe it would be helpful. For now
6188 * we assume the threaded recompiler catches any incorrect EFLAGS delcarations. */
6189#define IEM_MC_ASSERT_EFLAGS(a_fEflInput, a_fEflOutput) ((void)0)
6190
6191
6192#define IEM_MC_REF_XREG_U128(a_pu128Dst, a_iXReg) \
6193 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, false /*fConst*/)
6194
6195#define IEM_MC_REF_XREG_XMM(a_puXmmDst, a_iXReg) \
6196 off = iemNativeEmitRefXregXxx(pReNative, off, a_puXmmDst, a_iXReg, false /*fConst*/)
6197
6198#define IEM_MC_REF_XREG_U128_CONST(a_pu128Dst, a_iXReg) \
6199 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, true /*fConst*/)
6200
6201#define IEM_MC_REF_XREG_XMM_CONST(a_pXmmDst, a_iXReg) \
6202 off = iemNativeEmitRefXregXxx(pReNative, off, a_pXmmDst, a_iXReg, true /*fConst*/)
6203
6204#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6205/* Just being paranoid here. */
6206# ifndef _MSC_VER /* MSC can't compile this, doesn't like [0]. Added reduced version afterwards. */
6207AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].au64[0]);
6208AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].au32[0]);
6209AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].ar64[0]);
6210AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].ar32[0]);
6211# endif
6212AssertCompileMemberOffset(X86XMMREG, au64, 0);
6213AssertCompileMemberOffset(X86XMMREG, au32, 0);
6214AssertCompileMemberOffset(X86XMMREG, ar64, 0);
6215AssertCompileMemberOffset(X86XMMREG, ar32, 0);
6216
6217# define IEM_MC_REF_XREG_U32_CONST(a_pu32Dst, a_iXReg) \
6218 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu32Dst, a_iXReg, true /*fConst*/)
6219# define IEM_MC_REF_XREG_U64_CONST(a_pu64Dst, a_iXReg) \
6220 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu64Dst, a_iXReg, true /*fConst*/)
6221# define IEM_MC_REF_XREG_R32_CONST(a_pr32Dst, a_iXReg) \
6222 off = iemNativeEmitRefXregXxx(pReNative, off, a_pr32Dst, a_iXReg, true /*fConst*/)
6223# define IEM_MC_REF_XREG_R64_CONST(a_pr64Dst, a_iXReg) \
6224 off = iemNativeEmitRefXregXxx(pReNative, off, a_pr64Dst, a_iXReg, true /*fConst*/)
6225#endif
6226
6227/** Handles IEM_MC_REF_XREG_xxx[_CONST]. */
6228DECL_INLINE_THROW(uint32_t)
6229iemNativeEmitRefXregXxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iXReg, bool fConst)
6230{
6231 Assert(iXReg < 16);
6232 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_XReg, iXReg);
6233 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
6234
6235 /* If we've delayed writing back the register value, flush it now. */
6236 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_XReg, iXReg);
6237
6238#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6239 /* If it's not a const reference we need to flush the shadow copy of the register now. */
6240 if (!fConst)
6241 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(iXReg)));
6242#else
6243 RT_NOREF(fConst);
6244#endif
6245
6246 return off;
6247}
6248
6249
6250
6251/*********************************************************************************************************************************
6252* Effective Address Calculation *
6253*********************************************************************************************************************************/
6254#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_16(a_GCPtrEff, a_bRm, a_u16Disp) \
6255 off = iemNativeEmitCalcRmEffAddrThreadedAddr16(pReNative, off, a_bRm, a_u16Disp, a_GCPtrEff)
6256
6257/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_16.
6258 * @sa iemOpHlpCalcRmEffAddrThreadedAddr16 */
6259DECL_INLINE_THROW(uint32_t)
6260iemNativeEmitCalcRmEffAddrThreadedAddr16(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6261 uint8_t bRm, uint16_t u16Disp, uint8_t idxVarRet)
6262{
6263 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
6264
6265 /*
6266 * Handle the disp16 form with no registers first.
6267 *
6268 * Convert to an immediate value, as that'll delay the register allocation
6269 * and assignment till the memory access / call / whatever and we can use
6270 * a more appropriate register (or none at all).
6271 */
6272 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
6273 {
6274 iemNativeVarSetKindToConst(pReNative, idxVarRet, u16Disp);
6275 return off;
6276 }
6277
6278 /* Determin the displacment. */
6279 uint16_t u16EffAddr;
6280 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
6281 {
6282 case 0: u16EffAddr = 0; break;
6283 case 1: u16EffAddr = (int16_t)(int8_t)u16Disp; break;
6284 case 2: u16EffAddr = u16Disp; break;
6285 default: AssertFailedStmt(u16EffAddr = 0);
6286 }
6287
6288 /* Determine the registers involved. */
6289 uint8_t idxGstRegBase;
6290 uint8_t idxGstRegIndex;
6291 switch (bRm & X86_MODRM_RM_MASK)
6292 {
6293 case 0:
6294 idxGstRegBase = X86_GREG_xBX;
6295 idxGstRegIndex = X86_GREG_xSI;
6296 break;
6297 case 1:
6298 idxGstRegBase = X86_GREG_xBX;
6299 idxGstRegIndex = X86_GREG_xDI;
6300 break;
6301 case 2:
6302 idxGstRegBase = X86_GREG_xBP;
6303 idxGstRegIndex = X86_GREG_xSI;
6304 break;
6305 case 3:
6306 idxGstRegBase = X86_GREG_xBP;
6307 idxGstRegIndex = X86_GREG_xDI;
6308 break;
6309 case 4:
6310 idxGstRegBase = X86_GREG_xSI;
6311 idxGstRegIndex = UINT8_MAX;
6312 break;
6313 case 5:
6314 idxGstRegBase = X86_GREG_xDI;
6315 idxGstRegIndex = UINT8_MAX;
6316 break;
6317 case 6:
6318 idxGstRegBase = X86_GREG_xBP;
6319 idxGstRegIndex = UINT8_MAX;
6320 break;
6321#ifdef _MSC_VER /* lazy compiler, thinks idxGstRegBase and idxGstRegIndex may otherwise be used uninitialized. */
6322 default:
6323#endif
6324 case 7:
6325 idxGstRegBase = X86_GREG_xBX;
6326 idxGstRegIndex = UINT8_MAX;
6327 break;
6328 }
6329
6330 /*
6331 * Now emit code that calculates: idxRegRet = (uint16_t)(u16EffAddr + idxGstRegBase [+ idxGstRegIndex])
6332 */
6333 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
6334 uint8_t const idxRegBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
6335 kIemNativeGstRegUse_ReadOnly);
6336 uint8_t const idxRegIndex = idxGstRegIndex != UINT8_MAX
6337 ? iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
6338 kIemNativeGstRegUse_ReadOnly)
6339 : UINT8_MAX;
6340#ifdef RT_ARCH_AMD64
6341 if (idxRegIndex == UINT8_MAX)
6342 {
6343 if (u16EffAddr == 0)
6344 {
6345 /* movxz ret, base */
6346 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegRet, idxRegBase);
6347 }
6348 else
6349 {
6350 /* lea ret32, [base64 + disp32] */
6351 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
6352 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6353 if (idxRegRet >= 8 || idxRegBase >= 8)
6354 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
6355 pbCodeBuf[off++] = 0x8d;
6356 if (idxRegBase != X86_GREG_x12 /*SIB*/)
6357 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, idxRegBase & 7);
6358 else
6359 {
6360 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, 4 /*SIB*/);
6361 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
6362 }
6363 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
6364 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
6365 pbCodeBuf[off++] = 0;
6366 pbCodeBuf[off++] = 0;
6367 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6368
6369 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
6370 }
6371 }
6372 else
6373 {
6374 /* lea ret32, [index64 + base64 (+ disp32)] */
6375 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
6376 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6377 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
6378 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6379 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
6380 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
6381 pbCodeBuf[off++] = 0x8d;
6382 uint8_t const bMod = u16EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0 : X86_MOD_MEM4;
6383 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6384 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, 0);
6385 if (bMod == X86_MOD_MEM4)
6386 {
6387 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
6388 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
6389 pbCodeBuf[off++] = 0;
6390 pbCodeBuf[off++] = 0;
6391 }
6392 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6393 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
6394 }
6395
6396#elif defined(RT_ARCH_ARM64)
6397 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6398 if (u16EffAddr == 0)
6399 {
6400 if (idxRegIndex == UINT8_MAX)
6401 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegBase);
6402 else
6403 {
6404 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex, false /*f64Bit*/);
6405 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
6406 }
6407 }
6408 else
6409 {
6410 if ((int16_t)u16EffAddr < 4096 && (int16_t)u16EffAddr >= 0)
6411 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u16EffAddr, false /*f64Bit*/);
6412 else if ((int16_t)u16EffAddr > -4096 && (int16_t)u16EffAddr < 0)
6413 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
6414 (uint16_t)-(int16_t)u16EffAddr, false /*f64Bit*/);
6415 else
6416 {
6417 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegRet, u16EffAddr);
6418 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
6419 }
6420 if (idxRegIndex != UINT8_MAX)
6421 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex, false /*f64Bit*/);
6422 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
6423 }
6424
6425#else
6426# error "port me"
6427#endif
6428
6429 if (idxRegIndex != UINT8_MAX)
6430 iemNativeRegFreeTmp(pReNative, idxRegIndex);
6431 iemNativeRegFreeTmp(pReNative, idxRegBase);
6432 iemNativeVarRegisterRelease(pReNative, idxVarRet);
6433 return off;
6434}
6435
6436
6437#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_32(a_GCPtrEff, a_bRm, a_uSibAndRspOffset, a_u32Disp) \
6438 off = iemNativeEmitCalcRmEffAddrThreadedAddr32(pReNative, off, a_bRm, a_uSibAndRspOffset, a_u32Disp, a_GCPtrEff)
6439
6440/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_32.
6441 * @see iemOpHlpCalcRmEffAddrThreadedAddr32 */
6442DECL_INLINE_THROW(uint32_t)
6443iemNativeEmitCalcRmEffAddrThreadedAddr32(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6444 uint8_t bRm, uint32_t uSibAndRspOffset, uint32_t u32Disp, uint8_t idxVarRet)
6445{
6446 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
6447
6448 /*
6449 * Handle the disp32 form with no registers first.
6450 *
6451 * Convert to an immediate value, as that'll delay the register allocation
6452 * and assignment till the memory access / call / whatever and we can use
6453 * a more appropriate register (or none at all).
6454 */
6455 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
6456 {
6457 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32Disp);
6458 return off;
6459 }
6460
6461 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
6462 uint32_t u32EffAddr = 0;
6463 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
6464 {
6465 case 0: break;
6466 case 1: u32EffAddr = (int8_t)u32Disp; break;
6467 case 2: u32EffAddr = u32Disp; break;
6468 default: AssertFailed();
6469 }
6470
6471 /* Get the register (or SIB) value. */
6472 uint8_t idxGstRegBase = UINT8_MAX;
6473 uint8_t idxGstRegIndex = UINT8_MAX;
6474 uint8_t cShiftIndex = 0;
6475 switch (bRm & X86_MODRM_RM_MASK)
6476 {
6477 case 0: idxGstRegBase = X86_GREG_xAX; break;
6478 case 1: idxGstRegBase = X86_GREG_xCX; break;
6479 case 2: idxGstRegBase = X86_GREG_xDX; break;
6480 case 3: idxGstRegBase = X86_GREG_xBX; break;
6481 case 4: /* SIB */
6482 {
6483 /* index /w scaling . */
6484 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
6485 switch ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
6486 {
6487 case 0: idxGstRegIndex = X86_GREG_xAX; break;
6488 case 1: idxGstRegIndex = X86_GREG_xCX; break;
6489 case 2: idxGstRegIndex = X86_GREG_xDX; break;
6490 case 3: idxGstRegIndex = X86_GREG_xBX; break;
6491 case 4: cShiftIndex = 0; /*no index*/ break;
6492 case 5: idxGstRegIndex = X86_GREG_xBP; break;
6493 case 6: idxGstRegIndex = X86_GREG_xSI; break;
6494 case 7: idxGstRegIndex = X86_GREG_xDI; break;
6495 }
6496
6497 /* base */
6498 switch (uSibAndRspOffset & X86_SIB_BASE_MASK)
6499 {
6500 case 0: idxGstRegBase = X86_GREG_xAX; break;
6501 case 1: idxGstRegBase = X86_GREG_xCX; break;
6502 case 2: idxGstRegBase = X86_GREG_xDX; break;
6503 case 3: idxGstRegBase = X86_GREG_xBX; break;
6504 case 4:
6505 idxGstRegBase = X86_GREG_xSP;
6506 u32EffAddr += uSibAndRspOffset >> 8;
6507 break;
6508 case 5:
6509 if ((bRm & X86_MODRM_MOD_MASK) != 0)
6510 idxGstRegBase = X86_GREG_xBP;
6511 else
6512 {
6513 Assert(u32EffAddr == 0);
6514 u32EffAddr = u32Disp;
6515 }
6516 break;
6517 case 6: idxGstRegBase = X86_GREG_xSI; break;
6518 case 7: idxGstRegBase = X86_GREG_xDI; break;
6519 }
6520 break;
6521 }
6522 case 5: idxGstRegBase = X86_GREG_xBP; break;
6523 case 6: idxGstRegBase = X86_GREG_xSI; break;
6524 case 7: idxGstRegBase = X86_GREG_xDI; break;
6525 }
6526
6527 /*
6528 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
6529 * the start of the function.
6530 */
6531 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
6532 {
6533 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32EffAddr);
6534 return off;
6535 }
6536
6537 /*
6538 * Now emit code that calculates: idxRegRet = (uint32_t)(u32EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
6539 */
6540 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
6541 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
6542 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
6543 kIemNativeGstRegUse_ReadOnly);
6544 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
6545 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
6546 kIemNativeGstRegUse_ReadOnly);
6547
6548 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
6549 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
6550 {
6551 idxRegBase = idxRegIndex;
6552 idxRegIndex = UINT8_MAX;
6553 }
6554
6555#ifdef RT_ARCH_AMD64
6556 if (idxRegIndex == UINT8_MAX)
6557 {
6558 if (u32EffAddr == 0)
6559 {
6560 /* mov ret, base */
6561 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
6562 }
6563 else
6564 {
6565 /* lea ret32, [base64 + disp32] */
6566 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
6567 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6568 if (idxRegRet >= 8 || idxRegBase >= 8)
6569 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
6570 pbCodeBuf[off++] = 0x8d;
6571 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
6572 if (idxRegBase != X86_GREG_x12 /*SIB*/)
6573 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
6574 else
6575 {
6576 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6577 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
6578 }
6579 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6580 if (bMod == X86_MOD_MEM4)
6581 {
6582 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6583 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6584 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6585 }
6586 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6587 }
6588 }
6589 else
6590 {
6591 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
6592 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6593 if (idxRegBase == UINT8_MAX)
6594 {
6595 /* lea ret32, [(index64 << cShiftIndex) + disp32] */
6596 if (idxRegRet >= 8 || idxRegIndex >= 8)
6597 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6598 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
6599 pbCodeBuf[off++] = 0x8d;
6600 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
6601 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
6602 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6603 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6604 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6605 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6606 }
6607 else
6608 {
6609 /* lea ret32, [(index64 << cShiftIndex) + base64 (+ disp32)] */
6610 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
6611 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6612 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
6613 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
6614 pbCodeBuf[off++] = 0x8d;
6615 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
6616 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
6617 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6618 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
6619 if (bMod != X86_MOD_MEM0)
6620 {
6621 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6622 if (bMod == X86_MOD_MEM4)
6623 {
6624 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6625 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6626 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6627 }
6628 }
6629 }
6630 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6631 }
6632
6633#elif defined(RT_ARCH_ARM64)
6634 if (u32EffAddr == 0)
6635 {
6636 if (idxRegIndex == UINT8_MAX)
6637 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
6638 else if (idxRegBase == UINT8_MAX)
6639 {
6640 if (cShiftIndex == 0)
6641 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegIndex);
6642 else
6643 {
6644 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6645 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, false /*f64Bit*/);
6646 }
6647 }
6648 else
6649 {
6650 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6651 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
6652 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
6653 }
6654 }
6655 else
6656 {
6657 if ((int32_t)u32EffAddr < 4096 && (int32_t)u32EffAddr >= 0 && idxRegBase != UINT8_MAX)
6658 {
6659 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6660 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u32EffAddr, false /*f64Bit*/);
6661 }
6662 else if ((int32_t)u32EffAddr > -4096 && (int32_t)u32EffAddr < 0 && idxRegBase != UINT8_MAX)
6663 {
6664 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6665 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
6666 (uint32_t)-(int32_t)u32EffAddr, false /*f64Bit*/);
6667 }
6668 else
6669 {
6670 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, u32EffAddr);
6671 if (idxRegBase != UINT8_MAX)
6672 {
6673 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6674 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
6675 }
6676 }
6677 if (idxRegIndex != UINT8_MAX)
6678 {
6679 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6680 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
6681 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
6682 }
6683 }
6684
6685#else
6686# error "port me"
6687#endif
6688
6689 if (idxRegIndex != UINT8_MAX)
6690 iemNativeRegFreeTmp(pReNative, idxRegIndex);
6691 if (idxRegBase != UINT8_MAX)
6692 iemNativeRegFreeTmp(pReNative, idxRegBase);
6693 iemNativeVarRegisterRelease(pReNative, idxVarRet);
6694 return off;
6695}
6696
6697
6698#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
6699 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
6700 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
6701
6702#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_FSGS(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
6703 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
6704 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
6705
6706#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_ADDR32(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
6707 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
6708 a_u32Disp, a_cbImm, a_GCPtrEff, false /*f64Bit*/)
6709
6710/**
6711 * Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_64*.
6712 *
6713 * @returns New off.
6714 * @param pReNative .
6715 * @param off .
6716 * @param bRmEx The ModRM byte but with bit 3 set to REX.B and
6717 * bit 4 to REX.X. The two bits are part of the
6718 * REG sub-field, which isn't needed in this
6719 * function.
6720 * @param uSibAndRspOffset Two parts:
6721 * - The first 8 bits make up the SIB byte.
6722 * - The next 8 bits are the fixed RSP/ESP offset
6723 * in case of a pop [xSP].
6724 * @param u32Disp The displacement byte/word/dword, if any.
6725 * @param cbInstr The size of the fully decoded instruction. Used
6726 * for RIP relative addressing.
6727 * @param idxVarRet The result variable number.
6728 * @param f64Bit Whether to use a 64-bit or 32-bit address size
6729 * when calculating the address.
6730 *
6731 * @see iemOpHlpCalcRmEffAddrThreadedAddr64
6732 */
6733DECL_INLINE_THROW(uint32_t)
6734iemNativeEmitCalcRmEffAddrThreadedAddr64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bRmEx, uint32_t uSibAndRspOffset,
6735 uint32_t u32Disp, uint8_t cbInstr, uint8_t idxVarRet, bool f64Bit)
6736{
6737 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
6738
6739 /*
6740 * Special case the rip + disp32 form first.
6741 */
6742 if ((bRmEx & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
6743 {
6744 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
6745 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
6746 kIemNativeGstRegUse_ReadOnly);
6747 if (f64Bit)
6748 {
6749#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6750 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr + (int64_t)pReNative->Core.offPc;
6751#else
6752 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr;
6753#endif
6754#ifdef RT_ARCH_AMD64
6755 if ((int32_t)offFinalDisp == offFinalDisp)
6756 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)offFinalDisp);
6757 else
6758 {
6759 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp);
6760 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, cbInstr);
6761 }
6762#else
6763 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, offFinalDisp);
6764#endif
6765 }
6766 else
6767 {
6768# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6769 int32_t const offFinalDisp = (int32_t)u32Disp + cbInstr + (int32_t)pReNative->Core.offPc;
6770# else
6771 int32_t const offFinalDisp = (int32_t)u32Disp + cbInstr;
6772# endif
6773 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, offFinalDisp);
6774 }
6775 iemNativeRegFreeTmp(pReNative, idxRegPc);
6776 iemNativeVarRegisterRelease(pReNative, idxVarRet);
6777 return off;
6778 }
6779
6780 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
6781 int64_t i64EffAddr = 0;
6782 switch ((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
6783 {
6784 case 0: break;
6785 case 1: i64EffAddr = (int8_t)u32Disp; break;
6786 case 2: i64EffAddr = (int32_t)u32Disp; break;
6787 default: AssertFailed();
6788 }
6789
6790 /* Get the register (or SIB) value. */
6791 uint8_t idxGstRegBase = UINT8_MAX;
6792 uint8_t idxGstRegIndex = UINT8_MAX;
6793 uint8_t cShiftIndex = 0;
6794 if ((bRmEx & X86_MODRM_RM_MASK) != 4)
6795 idxGstRegBase = bRmEx & (X86_MODRM_RM_MASK | 0x8); /* bRmEx[bit 3] = REX.B */
6796 else /* SIB: */
6797 {
6798 /* index /w scaling . */
6799 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
6800 idxGstRegIndex = ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
6801 | ((bRmEx & 0x10) >> 1); /* bRmEx[bit 4] = REX.X */
6802 if (idxGstRegIndex == 4)
6803 {
6804 /* no index */
6805 cShiftIndex = 0;
6806 idxGstRegIndex = UINT8_MAX;
6807 }
6808
6809 /* base */
6810 idxGstRegBase = (uSibAndRspOffset & X86_SIB_BASE_MASK) | (bRmEx & 0x8); /* bRmEx[bit 3] = REX.B */
6811 if (idxGstRegBase == 4)
6812 {
6813 /* pop [rsp] hack */
6814 i64EffAddr += uSibAndRspOffset >> 8; /* (this is why i64EffAddr must be 64-bit) */
6815 }
6816 else if ( (idxGstRegBase & X86_SIB_BASE_MASK) == 5
6817 && (bRmEx & X86_MODRM_MOD_MASK) == 0)
6818 {
6819 /* mod=0 and base=5 -> disp32, no base reg. */
6820 Assert(i64EffAddr == 0);
6821 i64EffAddr = (int32_t)u32Disp;
6822 idxGstRegBase = UINT8_MAX;
6823 }
6824 }
6825
6826 /*
6827 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
6828 * the start of the function.
6829 */
6830 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
6831 {
6832 if (f64Bit)
6833 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint64_t)i64EffAddr);
6834 else
6835 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint32_t)i64EffAddr);
6836 return off;
6837 }
6838
6839 /*
6840 * Now emit code that calculates:
6841 * idxRegRet = (uint64_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
6842 * or if !f64Bit:
6843 * idxRegRet = (uint32_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
6844 */
6845 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
6846 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
6847 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
6848 kIemNativeGstRegUse_ReadOnly);
6849 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
6850 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
6851 kIemNativeGstRegUse_ReadOnly);
6852
6853 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
6854 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
6855 {
6856 idxRegBase = idxRegIndex;
6857 idxRegIndex = UINT8_MAX;
6858 }
6859
6860#ifdef RT_ARCH_AMD64
6861 uint8_t bFinalAdj;
6862 if (!f64Bit || (int32_t)i64EffAddr == i64EffAddr)
6863 bFinalAdj = 0; /* likely */
6864 else
6865 {
6866 /* pop [rsp] with a problematic disp32 value. Split out the
6867 RSP offset and add it separately afterwards (bFinalAdj). */
6868 /** @todo testcase: pop [rsp] with problematic disp32 (mod4). */
6869 Assert(idxGstRegBase == X86_GREG_xSP);
6870 Assert(((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK) == X86_MOD_MEM4);
6871 bFinalAdj = (uint8_t)(uSibAndRspOffset >> 8);
6872 Assert(bFinalAdj != 0);
6873 i64EffAddr -= bFinalAdj;
6874 Assert((int32_t)i64EffAddr == i64EffAddr);
6875 }
6876 uint32_t const u32EffAddr = (uint32_t)i64EffAddr;
6877//pReNative->pInstrBuf[off++] = 0xcc;
6878
6879 if (idxRegIndex == UINT8_MAX)
6880 {
6881 if (u32EffAddr == 0)
6882 {
6883 /* mov ret, base */
6884 if (f64Bit)
6885 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRet, idxRegBase);
6886 else
6887 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
6888 }
6889 else
6890 {
6891 /* lea ret, [base + disp32] */
6892 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
6893 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6894 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8)
6895 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6896 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
6897 | (f64Bit ? X86_OP_REX_W : 0);
6898 pbCodeBuf[off++] = 0x8d;
6899 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
6900 if (idxRegBase != X86_GREG_x12 /*SIB*/)
6901 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
6902 else
6903 {
6904 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6905 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
6906 }
6907 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6908 if (bMod == X86_MOD_MEM4)
6909 {
6910 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6911 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6912 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6913 }
6914 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6915 }
6916 }
6917 else
6918 {
6919 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
6920 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6921 if (idxRegBase == UINT8_MAX)
6922 {
6923 /* lea ret, [(index64 << cShiftIndex) + disp32] */
6924 if (f64Bit || idxRegRet >= 8 || idxRegIndex >= 8)
6925 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6926 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
6927 | (f64Bit ? X86_OP_REX_W : 0);
6928 pbCodeBuf[off++] = 0x8d;
6929 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
6930 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
6931 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6932 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6933 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6934 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6935 }
6936 else
6937 {
6938 /* lea ret, [(index64 << cShiftIndex) + base64 (+ disp32)] */
6939 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
6940 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6941 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
6942 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
6943 | (f64Bit ? X86_OP_REX_W : 0);
6944 pbCodeBuf[off++] = 0x8d;
6945 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
6946 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
6947 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6948 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
6949 if (bMod != X86_MOD_MEM0)
6950 {
6951 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6952 if (bMod == X86_MOD_MEM4)
6953 {
6954 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6955 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6956 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6957 }
6958 }
6959 }
6960 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6961 }
6962
6963 if (!bFinalAdj)
6964 { /* likely */ }
6965 else
6966 {
6967 Assert(f64Bit);
6968 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, bFinalAdj);
6969 }
6970
6971#elif defined(RT_ARCH_ARM64)
6972 if (i64EffAddr == 0)
6973 {
6974 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6975 if (idxRegIndex == UINT8_MAX)
6976 pu32CodeBuf[off++] = Armv8A64MkInstrMov(idxRegRet, idxRegBase, f64Bit);
6977 else if (idxRegBase != UINT8_MAX)
6978 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
6979 f64Bit, false /*fSetFlags*/, cShiftIndex);
6980 else
6981 {
6982 Assert(cShiftIndex != 0); /* See base = index swap above when shift is 0 and we have no base reg. */
6983 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, f64Bit);
6984 }
6985 }
6986 else
6987 {
6988 if (f64Bit)
6989 { /* likely */ }
6990 else
6991 i64EffAddr = (int32_t)i64EffAddr;
6992
6993 if (i64EffAddr < 4096 && i64EffAddr >= 0 && idxRegBase != UINT8_MAX)
6994 {
6995 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6996 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, i64EffAddr, f64Bit);
6997 }
6998 else if (i64EffAddr > -4096 && i64EffAddr < 0 && idxRegBase != UINT8_MAX)
6999 {
7000 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7001 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase, (uint32_t)-i64EffAddr, f64Bit);
7002 }
7003 else
7004 {
7005 if (f64Bit)
7006 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, i64EffAddr);
7007 else
7008 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, (uint32_t)i64EffAddr);
7009 if (idxRegBase != UINT8_MAX)
7010 {
7011 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7012 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, f64Bit);
7013 }
7014 }
7015 if (idxRegIndex != UINT8_MAX)
7016 {
7017 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7018 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
7019 f64Bit, false /*fSetFlags*/, cShiftIndex);
7020 }
7021 }
7022
7023#else
7024# error "port me"
7025#endif
7026
7027 if (idxRegIndex != UINT8_MAX)
7028 iemNativeRegFreeTmp(pReNative, idxRegIndex);
7029 if (idxRegBase != UINT8_MAX)
7030 iemNativeRegFreeTmp(pReNative, idxRegBase);
7031 iemNativeVarRegisterRelease(pReNative, idxVarRet);
7032 return off;
7033}
7034
7035
7036/*********************************************************************************************************************************
7037* Memory fetches and stores common *
7038*********************************************************************************************************************************/
7039
7040typedef enum IEMNATIVEMITMEMOP
7041{
7042 kIemNativeEmitMemOp_Store = 0,
7043 kIemNativeEmitMemOp_Fetch,
7044 kIemNativeEmitMemOp_Fetch_Zx_U16,
7045 kIemNativeEmitMemOp_Fetch_Zx_U32,
7046 kIemNativeEmitMemOp_Fetch_Zx_U64,
7047 kIemNativeEmitMemOp_Fetch_Sx_U16,
7048 kIemNativeEmitMemOp_Fetch_Sx_U32,
7049 kIemNativeEmitMemOp_Fetch_Sx_U64
7050} IEMNATIVEMITMEMOP;
7051
7052/** Emits code for IEM_MC_FETCH_MEM_U8/16/32/64 and IEM_MC_STORE_MEM_U8/16/32/64,
7053 * and IEM_MC_FETCH_MEM_FLAT_U8/16/32/64 and IEM_MC_STORE_MEM_FLAT_U8/16/32/64
7054 * (with iSegReg = UINT8_MAX). */
7055DECL_INLINE_THROW(uint32_t)
7056iemNativeEmitMemFetchStoreDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue, uint8_t iSegReg,
7057 uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAlignMaskAndCtl, IEMNATIVEMITMEMOP enmOp,
7058 uintptr_t pfnFunction, uint8_t idxInstr, uint8_t offDisp = 0)
7059{
7060 /*
7061 * Assert sanity.
7062 */
7063 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
7064 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
7065 Assert( enmOp != kIemNativeEmitMemOp_Store
7066 || pVarValue->enmKind == kIemNativeVarKind_Immediate
7067 || pVarValue->enmKind == kIemNativeVarKind_Stack);
7068 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
7069 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
7070 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
7071 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
7072 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7073 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
7074#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7075 Assert( cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8
7076 || cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U));
7077#else
7078 Assert(cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8);
7079#endif
7080 Assert(!(fAlignMaskAndCtl & ~(UINT32_C(0xff) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE)));
7081 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
7082#ifdef VBOX_STRICT
7083 if (iSegReg == UINT8_MAX)
7084 {
7085 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
7086 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
7087 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
7088 switch (cbMem)
7089 {
7090 case 1:
7091 Assert( pfnFunction
7092 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU8
7093 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
7094 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
7095 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
7096 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
7097 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16
7098 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32
7099 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64
7100 : UINT64_C(0xc000b000a0009000) ));
7101 Assert(!fAlignMaskAndCtl);
7102 break;
7103 case 2:
7104 Assert( pfnFunction
7105 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU16
7106 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
7107 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
7108 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
7109 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32
7110 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64
7111 : UINT64_C(0xc000b000a0009000) ));
7112 Assert(fAlignMaskAndCtl <= 1);
7113 break;
7114 case 4:
7115 Assert( pfnFunction
7116 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU32
7117 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
7118 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
7119 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64
7120 : UINT64_C(0xc000b000a0009000) ));
7121 Assert(fAlignMaskAndCtl <= 3);
7122 break;
7123 case 8:
7124 Assert( pfnFunction
7125 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU64
7126 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU64
7127 : UINT64_C(0xc000b000a0009000) ));
7128 Assert(fAlignMaskAndCtl <= 7);
7129 break;
7130#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7131 case sizeof(RTUINT128U):
7132 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
7133 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128
7134 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse
7135 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc))
7136 || ( enmOp == kIemNativeEmitMemOp_Store
7137 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse
7138 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128NoAc)));
7139 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse
7140 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse
7141 ? (fAlignMaskAndCtl & (IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE)) && (uint8_t)fAlignMaskAndCtl == 15
7142 : fAlignMaskAndCtl <= 15);
7143 break;
7144 case sizeof(RTUINT256U):
7145 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
7146 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc
7147 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx))
7148 || ( enmOp == kIemNativeEmitMemOp_Store
7149 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256NoAc
7150 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx)));
7151 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx
7152 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx
7153 ? (fAlignMaskAndCtl & IEM_MEMMAP_F_ALIGN_GP) && (uint8_t)fAlignMaskAndCtl == 31
7154 : fAlignMaskAndCtl <= 31);
7155 break;
7156#endif
7157 }
7158 }
7159 else
7160 {
7161 Assert(iSegReg < 6);
7162 switch (cbMem)
7163 {
7164 case 1:
7165 Assert( pfnFunction
7166 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU8
7167 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU8
7168 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8
7169 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8
7170 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8
7171 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16
7172 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32
7173 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64
7174 : UINT64_C(0xc000b000a0009000) ));
7175 Assert(!fAlignMaskAndCtl);
7176 break;
7177 case 2:
7178 Assert( pfnFunction
7179 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU16
7180 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU16
7181 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16
7182 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16
7183 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32
7184 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64
7185 : UINT64_C(0xc000b000a0009000) ));
7186 Assert(fAlignMaskAndCtl <= 1);
7187 break;
7188 case 4:
7189 Assert( pfnFunction
7190 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU32
7191 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU32
7192 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32
7193 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64
7194 : UINT64_C(0xc000b000a0009000) ));
7195 Assert(fAlignMaskAndCtl <= 3);
7196 break;
7197 case 8:
7198 Assert( pfnFunction
7199 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU64
7200 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU64
7201 : UINT64_C(0xc000b000a0009000) ));
7202 Assert(fAlignMaskAndCtl <= 7);
7203 break;
7204#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7205 case sizeof(RTUINT128U):
7206 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
7207 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128
7208 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse
7209 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128NoAc))
7210 || ( enmOp == kIemNativeEmitMemOp_Store
7211 && ( pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse
7212 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128NoAc)));
7213 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse
7214 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse
7215 ? (fAlignMaskAndCtl & (IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE)) && (uint8_t)fAlignMaskAndCtl == 15
7216 : fAlignMaskAndCtl <= 15);
7217 break;
7218 case sizeof(RTUINT256U):
7219 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
7220 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256NoAc
7221 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx))
7222 || ( enmOp == kIemNativeEmitMemOp_Store
7223 && ( pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256NoAc
7224 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx)));
7225 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx
7226 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx
7227 ? (fAlignMaskAndCtl & IEM_MEMMAP_F_ALIGN_GP) && (uint8_t)fAlignMaskAndCtl == 31
7228 : fAlignMaskAndCtl <= 31);
7229 break;
7230#endif
7231 }
7232 }
7233#endif
7234
7235#ifdef VBOX_STRICT
7236 /*
7237 * Check that the fExec flags we've got make sense.
7238 */
7239 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
7240#endif
7241
7242 /*
7243 * To keep things simple we have to commit any pending writes first as we
7244 * may end up making calls.
7245 */
7246 /** @todo we could postpone this till we make the call and reload the
7247 * registers after returning from the call. Not sure if that's sensible or
7248 * not, though. */
7249#ifndef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7250 off = iemNativeRegFlushPendingWrites(pReNative, off);
7251#else
7252 /* The program counter is treated differently for now. */
7253 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc));
7254#endif
7255
7256#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7257 /*
7258 * Move/spill/flush stuff out of call-volatile registers.
7259 * This is the easy way out. We could contain this to the tlb-miss branch
7260 * by saving and restoring active stuff here.
7261 */
7262 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
7263#endif
7264
7265 /*
7266 * Define labels and allocate the result register (trying for the return
7267 * register if we can).
7268 */
7269 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
7270#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7271 uint8_t idxRegValueFetch = UINT8_MAX;
7272
7273 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
7274 idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
7275 : iemNativeVarSimdRegisterAcquire(pReNative, idxVarValue, &off);
7276 else
7277 idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
7278 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
7279 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
7280 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
7281#else
7282 uint8_t const idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
7283 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
7284 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
7285 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
7286#endif
7287 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem, offDisp);
7288
7289#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7290 uint8_t idxRegValueStore = UINT8_MAX;
7291
7292 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
7293 idxRegValueStore = !TlbState.fSkip
7294 && enmOp == kIemNativeEmitMemOp_Store
7295 && pVarValue->enmKind != kIemNativeVarKind_Immediate
7296 ? iemNativeVarSimdRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
7297 : UINT8_MAX;
7298 else
7299 idxRegValueStore = !TlbState.fSkip
7300 && enmOp == kIemNativeEmitMemOp_Store
7301 && pVarValue->enmKind != kIemNativeVarKind_Immediate
7302 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
7303 : UINT8_MAX;
7304
7305#else
7306 uint8_t const idxRegValueStore = !TlbState.fSkip
7307 && enmOp == kIemNativeEmitMemOp_Store
7308 && pVarValue->enmKind != kIemNativeVarKind_Immediate
7309 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
7310 : UINT8_MAX;
7311#endif
7312 uint32_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
7313 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
7314 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
7315 : UINT32_MAX;
7316
7317 /*
7318 * Jump to the TLB lookup code.
7319 */
7320 if (!TlbState.fSkip)
7321 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
7322
7323 /*
7324 * TlbMiss:
7325 *
7326 * Call helper to do the fetching.
7327 * We flush all guest register shadow copies here.
7328 */
7329 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
7330
7331#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7332 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7333#else
7334 RT_NOREF(idxInstr);
7335#endif
7336
7337#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7338 if (pReNative->Core.offPc)
7339 {
7340 /*
7341 * Update the program counter but restore it at the end of the TlbMiss branch.
7342 * This should allow delaying more program counter updates for the TlbLookup and hit paths
7343 * which are hopefully much more frequent, reducing the amount of memory accesses.
7344 */
7345 /* Allocate a temporary PC register. */
7346/** @todo r=bird: This would technically need to be done up front as it's a register allocation. */
7347 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
7348 kIemNativeGstRegUse_ForUpdate);
7349
7350 /* Perform the addition and store the result. */
7351 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
7352 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7353# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
7354 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
7355# endif
7356
7357 /* Free and flush the PC register. */
7358 iemNativeRegFreeTmp(pReNative, idxPcReg);
7359 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
7360 }
7361#endif
7362
7363#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7364 /* Save variables in volatile registers. */
7365 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
7366 | (idxRegMemResult != UINT8_MAX ? RT_BIT_32(idxRegMemResult) : 0)
7367 | (idxRegValueFetch != UINT8_MAX ? RT_BIT_32(idxRegValueFetch) : 0);
7368 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
7369#endif
7370
7371 /* IEMNATIVE_CALL_ARG2/3_GREG = uValue (idxVarValue) - if store */
7372 uint32_t fVolGregMask = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
7373#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7374 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
7375 {
7376 /*
7377 * For SIMD based variables we pass the reference on the stack for both fetches and stores.
7378 *
7379 * @note There was a register variable assigned to the variable for the TlbLookup case above
7380 * which must not be freed or the value loaded into the register will not be synced into the register
7381 * further down the road because the variable doesn't know it had a variable assigned.
7382 *
7383 * @note For loads it is not required to sync what is in the assigned register with the stack slot
7384 * as it will be overwritten anyway.
7385 */
7386 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
7387 off = iemNativeEmitLoadArgGregWithSimdVarAddrForMemAccess(pReNative, off, idxRegArgValue, idxVarValue,
7388 enmOp == kIemNativeEmitMemOp_Store /*fSyncRegWithStack*/);
7389 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
7390 }
7391 else
7392#endif
7393 if (enmOp == kIemNativeEmitMemOp_Store)
7394 {
7395 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
7396 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, idxRegArgValue, idxVarValue, 0 /*cbAppend*/,
7397#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7398 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
7399#else
7400 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
7401 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
7402#endif
7403 }
7404
7405 /* IEMNATIVE_CALL_ARG1_GREG = GCPtrMem */
7406 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarGCPtrMem, offDisp /*cbAppend*/,
7407#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7408 fVolGregMask);
7409#else
7410 fVolGregMask, true /*fSpilledVarsInvolatileRegs*/);
7411#endif
7412
7413 if (iSegReg != UINT8_MAX)
7414 {
7415 /* IEMNATIVE_CALL_ARG2_GREG = iSegReg */
7416 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
7417 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, iSegReg);
7418 }
7419
7420 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
7421 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7422
7423 /* Done setting up parameters, make the call. */
7424 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
7425
7426 /*
7427 * Put the result in the right register if this is a fetch.
7428 */
7429 if (enmOp != kIemNativeEmitMemOp_Store)
7430 {
7431#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7432 if ( cbMem == sizeof(RTUINT128U)
7433 || cbMem == sizeof(RTUINT256U))
7434 {
7435 Assert(enmOp == kIemNativeEmitMemOp_Fetch);
7436
7437 /* Sync the value on the stack with the host register assigned to the variable. */
7438 off = iemNativeEmitSimdVarSyncStackToRegister(pReNative, off, idxVarValue);
7439 }
7440 else
7441#endif
7442 {
7443 Assert(idxRegValueFetch == pVarValue->idxReg);
7444 if (idxRegValueFetch != IEMNATIVE_CALL_RET_GREG)
7445 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegValueFetch, IEMNATIVE_CALL_RET_GREG);
7446 }
7447 }
7448
7449#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7450 /* Restore variables and guest shadow registers to volatile registers. */
7451 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
7452 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
7453#endif
7454
7455#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7456 if (pReNative->Core.offPc)
7457 {
7458 /*
7459 * Time to restore the program counter to its original value.
7460 */
7461 /* Allocate a temporary PC register. */
7462 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
7463 kIemNativeGstRegUse_ForUpdate);
7464
7465 /* Restore the original value. */
7466 off = iemNativeEmitSubGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
7467 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7468
7469 /* Free and flush the PC register. */
7470 iemNativeRegFreeTmp(pReNative, idxPcReg);
7471 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
7472 }
7473#endif
7474
7475#ifdef IEMNATIVE_WITH_TLB_LOOKUP
7476 if (!TlbState.fSkip)
7477 {
7478 /* end of TlbMiss - Jump to the done label. */
7479 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
7480 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
7481
7482 /*
7483 * TlbLookup:
7484 */
7485 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMaskAndCtl,
7486 enmOp == kIemNativeEmitMemOp_Store ? IEM_ACCESS_TYPE_WRITE : IEM_ACCESS_TYPE_READ,
7487 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult, offDisp);
7488
7489 /*
7490 * Emit code to do the actual storing / fetching.
7491 */
7492 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
7493# ifdef IEM_WITH_TLB_STATISTICS
7494 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
7495 enmOp == kIemNativeEmitMemOp_Store
7496 ? RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForFetch)
7497 : RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStore));
7498# endif
7499 switch (enmOp)
7500 {
7501 case kIemNativeEmitMemOp_Store:
7502 if (pVarValue->enmKind != kIemNativeVarKind_Immediate)
7503 {
7504 switch (cbMem)
7505 {
7506 case 1:
7507 off = iemNativeEmitStoreGpr8ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7508 break;
7509 case 2:
7510 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7511 break;
7512 case 4:
7513 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7514 break;
7515 case 8:
7516 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7517 break;
7518#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7519 case sizeof(RTUINT128U):
7520 off = iemNativeEmitStoreVecRegByGprU128Ex(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7521 break;
7522 case sizeof(RTUINT256U):
7523 off = iemNativeEmitStoreVecRegByGprU256Ex(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7524 break;
7525#endif
7526 default:
7527 AssertFailed();
7528 }
7529 }
7530 else
7531 {
7532 switch (cbMem)
7533 {
7534 case 1:
7535 off = iemNativeEmitStoreImm8ByGprEx(pCodeBuf, off, (uint8_t)pVarValue->u.uValue,
7536 idxRegMemResult, TlbState.idxReg1);
7537 break;
7538 case 2:
7539 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
7540 idxRegMemResult, TlbState.idxReg1);
7541 break;
7542 case 4:
7543 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
7544 idxRegMemResult, TlbState.idxReg1);
7545 break;
7546 case 8:
7547 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue,
7548 idxRegMemResult, TlbState.idxReg1);
7549 break;
7550 default:
7551 AssertFailed();
7552 }
7553 }
7554 break;
7555
7556 case kIemNativeEmitMemOp_Fetch:
7557 case kIemNativeEmitMemOp_Fetch_Zx_U16:
7558 case kIemNativeEmitMemOp_Fetch_Zx_U32:
7559 case kIemNativeEmitMemOp_Fetch_Zx_U64:
7560 switch (cbMem)
7561 {
7562 case 1:
7563 off = iemNativeEmitLoadGprByGprU8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7564 break;
7565 case 2:
7566 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7567 break;
7568 case 4:
7569 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7570 break;
7571 case 8:
7572 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7573 break;
7574#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7575 case sizeof(RTUINT128U):
7576 /*
7577 * No need to sync back the register with the stack, this is done by the generic variable handling
7578 * code if there is a register assigned to a variable and the stack must be accessed.
7579 */
7580 off = iemNativeEmitLoadVecRegByGprU128Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7581 break;
7582 case sizeof(RTUINT256U):
7583 /*
7584 * No need to sync back the register with the stack, this is done by the generic variable handling
7585 * code if there is a register assigned to a variable and the stack must be accessed.
7586 */
7587 off = iemNativeEmitLoadVecRegByGprU256Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7588 break;
7589#endif
7590 default:
7591 AssertFailed();
7592 }
7593 break;
7594
7595 case kIemNativeEmitMemOp_Fetch_Sx_U16:
7596 Assert(cbMem == 1);
7597 off = iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7598 break;
7599
7600 case kIemNativeEmitMemOp_Fetch_Sx_U32:
7601 Assert(cbMem == 1 || cbMem == 2);
7602 if (cbMem == 1)
7603 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7604 else
7605 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7606 break;
7607
7608 case kIemNativeEmitMemOp_Fetch_Sx_U64:
7609 switch (cbMem)
7610 {
7611 case 1:
7612 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7613 break;
7614 case 2:
7615 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7616 break;
7617 case 4:
7618 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7619 break;
7620 default:
7621 AssertFailed();
7622 }
7623 break;
7624
7625 default:
7626 AssertFailed();
7627 }
7628
7629 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
7630
7631 /*
7632 * TlbDone:
7633 */
7634 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
7635
7636 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
7637
7638# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7639 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
7640 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
7641# endif
7642 }
7643#else
7644 RT_NOREF(fAlignMaskAndCtl, idxLabelTlbMiss);
7645#endif
7646
7647 if (idxRegValueFetch != UINT8_MAX || idxRegValueStore != UINT8_MAX)
7648 iemNativeVarRegisterRelease(pReNative, idxVarValue);
7649 return off;
7650}
7651
7652
7653
7654/*********************************************************************************************************************************
7655* Memory fetches (IEM_MEM_FETCH_XXX). *
7656*********************************************************************************************************************************/
7657
7658/* 8-bit segmented: */
7659#define IEM_MC_FETCH_MEM_U8(a_u8Dst, a_iSeg, a_GCPtrMem) \
7660 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, a_iSeg, a_GCPtrMem, \
7661 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch, \
7662 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
7663
7664#define IEM_MC_FETCH_MEM_U8_ZX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
7665 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
7666 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
7667 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
7668
7669#define IEM_MC_FETCH_MEM_U8_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7670 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
7671 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
7672 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
7673
7674#define IEM_MC_FETCH_MEM_U8_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7675 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
7676 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7677 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
7678
7679#define IEM_MC_FETCH_MEM_U8_SX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
7680 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
7681 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
7682 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16, pCallEntry->idxInstr)
7683
7684#define IEM_MC_FETCH_MEM_U8_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7685 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
7686 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7687 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32, pCallEntry->idxInstr)
7688
7689#define IEM_MC_FETCH_MEM_U8_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7690 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
7691 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7692 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64, pCallEntry->idxInstr)
7693
7694/* 16-bit segmented: */
7695#define IEM_MC_FETCH_MEM_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
7696 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
7697 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
7698 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
7699
7700#define IEM_MC_FETCH_MEM_U16_DISP(a_u16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
7701 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
7702 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
7703 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr, a_offDisp)
7704
7705#define IEM_MC_FETCH_MEM_U16_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7706 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
7707 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
7708 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
7709
7710#define IEM_MC_FETCH_MEM_U16_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7711 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
7712 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7713 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
7714
7715#define IEM_MC_FETCH_MEM_U16_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7716 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
7717 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7718 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7719
7720#define IEM_MC_FETCH_MEM_U16_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7721 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
7722 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7723 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64, pCallEntry->idxInstr)
7724
7725
7726/* 32-bit segmented: */
7727#define IEM_MC_FETCH_MEM_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7728 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
7729 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7730 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
7731
7732#define IEM_MC_FETCH_MEM_U32_DISP(a_u32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
7733 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
7734 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7735 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7736
7737#define IEM_MC_FETCH_MEM_U32_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7738 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
7739 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7740 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
7741
7742#define IEM_MC_FETCH_MEM_U32_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7743 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
7744 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7745 (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64, pCallEntry->idxInstr)
7746
7747#define IEM_MC_FETCH_MEM_I16(a_i16Dst, a_iSeg, a_GCPtrMem) \
7748 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i16Dst, a_iSeg, a_GCPtrMem, \
7749 sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7750 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7751
7752#define IEM_MC_FETCH_MEM_I16_DISP(a_i16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
7753 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i16Dst, a_iSeg, a_GCPtrMem, \
7754 sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7755 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr, a_offDisp)
7756
7757#define IEM_MC_FETCH_MEM_I32(a_i32Dst, a_iSeg, a_GCPtrMem) \
7758 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i32Dst, a_iSeg, a_GCPtrMem, \
7759 sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7760 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
7761
7762#define IEM_MC_FETCH_MEM_I32_DISP(a_i32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
7763 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i32Dst, a_iSeg, a_GCPtrMem, \
7764 sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7765 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7766
7767#define IEM_MC_FETCH_MEM_I64(a_i64Dst, a_iSeg, a_GCPtrMem) \
7768 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i64Dst, a_iSeg, a_GCPtrMem, \
7769 sizeof(int64_t), sizeof(int64_t) - 1, kIemNativeEmitMemOp_Fetch, \
7770 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
7771
7772AssertCompileSize(RTFLOAT32U, sizeof(uint32_t));
7773#define IEM_MC_FETCH_MEM_R32(a_r32Dst, a_iSeg, a_GCPtrMem) \
7774 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r32Dst, a_iSeg, a_GCPtrMem, \
7775 sizeof(RTFLOAT32U), sizeof(RTFLOAT32U) - 1, kIemNativeEmitMemOp_Fetch, \
7776 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
7777
7778
7779/* 64-bit segmented: */
7780#define IEM_MC_FETCH_MEM_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7781 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
7782 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
7783 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
7784
7785AssertCompileSize(RTFLOAT64U, sizeof(uint64_t));
7786#define IEM_MC_FETCH_MEM_R64(a_r64Dst, a_iSeg, a_GCPtrMem) \
7787 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r64Dst, a_iSeg, a_GCPtrMem, \
7788 sizeof(RTFLOAT64U), sizeof(RTFLOAT64U) - 1, kIemNativeEmitMemOp_Fetch, \
7789 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
7790
7791
7792/* 8-bit flat: */
7793#define IEM_MC_FETCH_MEM_FLAT_U8(a_u8Dst, a_GCPtrMem) \
7794 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, UINT8_MAX, a_GCPtrMem, \
7795 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch, \
7796 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7797
7798#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U16(a_u16Dst, a_GCPtrMem) \
7799 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
7800 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
7801 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7802
7803#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U32(a_u32Dst, a_GCPtrMem) \
7804 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7805 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
7806 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7807
7808#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U64(a_u64Dst, a_GCPtrMem) \
7809 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7810 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7811 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7812
7813#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U16(a_u16Dst, a_GCPtrMem) \
7814 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
7815 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
7816 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16, pCallEntry->idxInstr)
7817
7818#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U32(a_u32Dst, a_GCPtrMem) \
7819 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7820 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7821 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32, pCallEntry->idxInstr)
7822
7823#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U64(a_u64Dst, a_GCPtrMem) \
7824 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7825 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7826 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64, pCallEntry->idxInstr)
7827
7828
7829/* 16-bit flat: */
7830#define IEM_MC_FETCH_MEM_FLAT_U16(a_u16Dst, a_GCPtrMem) \
7831 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
7832 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
7833 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
7834
7835#define IEM_MC_FETCH_MEM_FLAT_U16_DISP(a_u16Dst, a_GCPtrMem, a_offDisp) \
7836 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
7837 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
7838 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr, a_offDisp)
7839
7840#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U32(a_u32Dst, a_GCPtrMem) \
7841 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7842 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
7843 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
7844
7845#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U64(a_u64Dst, a_GCPtrMem) \
7846 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7847 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7848 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
7849
7850#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U32(a_u32Dst, a_GCPtrMem) \
7851 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7852 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7853 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7854
7855#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U64(a_u64Dst, a_GCPtrMem) \
7856 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7857 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7858 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64, pCallEntry->idxInstr)
7859
7860/* 32-bit flat: */
7861#define IEM_MC_FETCH_MEM_FLAT_U32(a_u32Dst, a_GCPtrMem) \
7862 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7863 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7864 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7865
7866#define IEM_MC_FETCH_MEM_FLAT_U32_DISP(a_u32Dst, a_GCPtrMem, a_offDisp) \
7867 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
7868 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7869 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7870
7871#define IEM_MC_FETCH_MEM_FLAT_U32_ZX_U64(a_u64Dst, a_GCPtrMem) \
7872 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7873 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
7874 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7875
7876#define IEM_MC_FETCH_MEM_FLAT_U32_SX_U64(a_u64Dst, a_GCPtrMem) \
7877 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7878 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
7879 (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64, pCallEntry->idxInstr)
7880
7881#define IEM_MC_FETCH_MEM_FLAT_I16(a_i16Dst, a_GCPtrMem) \
7882 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i16Dst, UINT8_MAX, a_GCPtrMem, \
7883 sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7884 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7885
7886#define IEM_MC_FETCH_MEM_FLAT_I16_DISP(a_i16Dst, a_GCPtrMem, a_offDisp) \
7887 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i16Dst, UINT8_MAX, a_GCPtrMem, \
7888 sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
7889 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr, a_offDisp)
7890
7891#define IEM_MC_FETCH_MEM_FLAT_I32(a_i32Dst, a_GCPtrMem) \
7892 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i32Dst, UINT8_MAX, a_GCPtrMem, \
7893 sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7894 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7895
7896#define IEM_MC_FETCH_MEM_FLAT_I32_DISP(a_i32Dst, a_GCPtrMem, a_offDisp) \
7897 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i32Dst, UINT8_MAX, a_GCPtrMem, \
7898 sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, \
7899 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7900
7901#define IEM_MC_FETCH_MEM_FLAT_I64(a_i64Dst, a_GCPtrMem) \
7902 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i64Dst, UINT8_MAX, a_GCPtrMem, \
7903 sizeof(int64_t), sizeof(int64_t) - 1, kIemNativeEmitMemOp_Fetch, \
7904 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
7905
7906#define IEM_MC_FETCH_MEM_FLAT_R32(a_r32Dst, a_GCPtrMem) \
7907 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r32Dst, UINT8_MAX, a_GCPtrMem, \
7908 sizeof(RTFLOAT32U), sizeof(RTFLOAT32U) - 1, kIemNativeEmitMemOp_Fetch, \
7909 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7910
7911
7912/* 64-bit flat: */
7913#define IEM_MC_FETCH_MEM_FLAT_U64(a_u64Dst, a_GCPtrMem) \
7914 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
7915 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
7916 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
7917
7918#define IEM_MC_FETCH_MEM_FLAT_R64(a_r64Dst, a_GCPtrMem) \
7919 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r64Dst, UINT8_MAX, a_GCPtrMem, \
7920 sizeof(RTFLOAT64U), sizeof(RTFLOAT64U) - 1, kIemNativeEmitMemOp_Fetch, \
7921 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
7922
7923#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7924/* 128-bit segmented: */
7925#define IEM_MC_FETCH_MEM_U128(a_u128Dst, a_iSeg, a_GCPtrMem) \
7926 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
7927 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
7928 (uintptr_t)iemNativeHlpMemFetchDataU128, pCallEntry->idxInstr)
7929
7930#define IEM_MC_FETCH_MEM_U128_ALIGN_SSE(a_u128Dst, a_iSeg, a_GCPtrMem) \
7931 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
7932 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7933 kIemNativeEmitMemOp_Fetch, \
7934 (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse, pCallEntry->idxInstr)
7935
7936AssertCompileSize(X86XMMREG, sizeof(RTUINT128U));
7937#define IEM_MC_FETCH_MEM_XMM_ALIGN_SSE(a_uXmmDst, a_iSeg, a_GCPtrMem) \
7938 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uXmmDst, a_iSeg, a_GCPtrMem, sizeof(X86XMMREG), \
7939 (sizeof(X86XMMREG) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7940 kIemNativeEmitMemOp_Fetch, \
7941 (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse, pCallEntry->idxInstr)
7942
7943#define IEM_MC_FETCH_MEM_U128_NO_AC(a_u128Dst, a_iSeg, a_GCPtrMem) \
7944 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
7945 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
7946 (uintptr_t)iemNativeHlpMemFetchDataU128NoAc, pCallEntry->idxInstr)
7947
7948#define IEM_MC_FETCH_MEM_XMM_NO_AC(a_u128Dst, a_iSeg, a_GCPtrMem) \
7949 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
7950 sizeof(X86XMMREG), sizeof(X86XMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
7951 (uintptr_t)iemNativeHlpMemFetchDataU128NoAc, pCallEntry->idxInstr)
7952
7953
7954/* 128-bit flat: */
7955#define IEM_MC_FETCH_MEM_FLAT_U128(a_u128Dst, a_GCPtrMem) \
7956 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, \
7957 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
7958 (uintptr_t)iemNativeHlpMemFlatFetchDataU128, pCallEntry->idxInstr)
7959
7960#define IEM_MC_FETCH_MEM_FLAT_U128_ALIGN_SSE(a_u128Dst, a_GCPtrMem) \
7961 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
7962 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7963 kIemNativeEmitMemOp_Fetch, \
7964 (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse, pCallEntry->idxInstr)
7965
7966#define IEM_MC_FETCH_MEM_FLAT_XMM_ALIGN_SSE(a_uXmmDst, a_GCPtrMem) \
7967 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uXmmDst, UINT8_MAX, a_GCPtrMem, sizeof(X86XMMREG), \
7968 (sizeof(X86XMMREG) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
7969 kIemNativeEmitMemOp_Fetch, \
7970 (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse, pCallEntry->idxInstr)
7971
7972#define IEM_MC_FETCH_MEM_FLAT_U128_NO_AC(a_u128Dst, a_GCPtrMem) \
7973 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, \
7974 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
7975 (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc, pCallEntry->idxInstr)
7976
7977#define IEM_MC_FETCH_MEM_FLAT_XMM_NO_AC(a_uXmmDst, a_GCPtrMem) \
7978 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uXmmDst, UINT8_MAX, a_GCPtrMem, \
7979 sizeof(X86XMMREG), sizeof(X86XMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
7980 (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc, pCallEntry->idxInstr)
7981
7982/* 256-bit segmented: */
7983#define IEM_MC_FETCH_MEM_U256(a_u256Dst, a_iSeg, a_GCPtrMem) \
7984 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
7985 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
7986 (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
7987
7988#define IEM_MC_FETCH_MEM_U256_NO_AC(a_u256Dst, a_iSeg, a_GCPtrMem) \
7989 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
7990 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
7991 (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
7992
7993#define IEM_MC_FETCH_MEM_U256_ALIGN_AVX(a_u256Dst, a_iSeg, a_GCPtrMem) \
7994 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, sizeof(RTUINT256U), \
7995 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, kIemNativeEmitMemOp_Fetch, \
7996 (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx, pCallEntry->idxInstr)
7997
7998#define IEM_MC_FETCH_MEM_YMM_NO_AC(a_u256Dst, a_iSeg, a_GCPtrMem) \
7999 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
8000 sizeof(X86YMMREG), sizeof(X86YMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
8001 (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
8002
8003
8004/* 256-bit flat: */
8005#define IEM_MC_FETCH_MEM_FLAT_U256(a_u256Dst, a_GCPtrMem) \
8006 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, \
8007 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
8008 (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
8009
8010#define IEM_MC_FETCH_MEM_FLAT_U256_NO_AC(a_u256Dst, a_GCPtrMem) \
8011 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, \
8012 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
8013 (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
8014
8015#define IEM_MC_FETCH_MEM_FLAT_U256_ALIGN_AVX(a_u256Dst, a_GCPtrMem) \
8016 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT256U), \
8017 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, kIemNativeEmitMemOp_Fetch, \
8018 (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx, pCallEntry->idxInstr)
8019
8020#define IEM_MC_FETCH_MEM_FLAT_YMM_NO_AC(a_uYmmDst, a_GCPtrMem) \
8021 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uYmmDst, UINT8_MAX, a_GCPtrMem, \
8022 sizeof(X86YMMREG), sizeof(X86YMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
8023 (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
8024
8025#endif
8026
8027
8028/*********************************************************************************************************************************
8029* Memory stores (IEM_MEM_STORE_XXX). *
8030*********************************************************************************************************************************/
8031
8032#define IEM_MC_STORE_MEM_U8(a_iSeg, a_GCPtrMem, a_u8Value) \
8033 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, a_iSeg, a_GCPtrMem, \
8034 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Store, \
8035 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
8036
8037#define IEM_MC_STORE_MEM_U16(a_iSeg, a_GCPtrMem, a_u16Value) \
8038 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, a_iSeg, a_GCPtrMem, \
8039 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
8040 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
8041
8042#define IEM_MC_STORE_MEM_U32(a_iSeg, a_GCPtrMem, a_u32Value) \
8043 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, a_iSeg, a_GCPtrMem, \
8044 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
8045 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
8046
8047#define IEM_MC_STORE_MEM_U64(a_iSeg, a_GCPtrMem, a_u64Value) \
8048 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, a_iSeg, a_GCPtrMem, \
8049 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
8050 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
8051
8052
8053#define IEM_MC_STORE_MEM_FLAT_U8(a_GCPtrMem, a_u8Value) \
8054 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, UINT8_MAX, a_GCPtrMem, \
8055 sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Store, \
8056 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
8057
8058#define IEM_MC_STORE_MEM_FLAT_U16(a_GCPtrMem, a_u16Value) \
8059 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, UINT8_MAX, a_GCPtrMem, \
8060 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
8061 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
8062
8063#define IEM_MC_STORE_MEM_FLAT_U32(a_GCPtrMem, a_u32Value) \
8064 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, UINT8_MAX, a_GCPtrMem, \
8065 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
8066 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
8067
8068#define IEM_MC_STORE_MEM_FLAT_U64(a_GCPtrMem, a_u64Value) \
8069 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, UINT8_MAX, a_GCPtrMem, \
8070 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
8071 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
8072
8073
8074#define IEM_MC_STORE_MEM_U8_CONST(a_iSeg, a_GCPtrMem, a_u8ConstValue) \
8075 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
8076 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
8077
8078#define IEM_MC_STORE_MEM_U16_CONST(a_iSeg, a_GCPtrMem, a_u16ConstValue) \
8079 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
8080 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
8081
8082#define IEM_MC_STORE_MEM_U32_CONST(a_iSeg, a_GCPtrMem, a_u32ConstValue) \
8083 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
8084 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
8085
8086#define IEM_MC_STORE_MEM_U64_CONST(a_iSeg, a_GCPtrMem, a_u64ConstValue) \
8087 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
8088 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
8089
8090
8091#define IEM_MC_STORE_MEM_FLAT_U8_CONST(a_GCPtrMem, a_u8ConstValue) \
8092 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
8093 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
8094
8095#define IEM_MC_STORE_MEM_FLAT_U16_CONST(a_GCPtrMem, a_u16ConstValue) \
8096 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
8097 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
8098
8099#define IEM_MC_STORE_MEM_FLAT_U32_CONST(a_GCPtrMem, a_u32ConstValue) \
8100 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
8101 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
8102
8103#define IEM_MC_STORE_MEM_FLAT_U64_CONST(a_GCPtrMem, a_u64ConstValue) \
8104 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
8105 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
8106
8107/** Emits code for IEM_MC_STORE_MEM_U8/16/32/64_CONST and
8108 * IEM_MC_STORE_MEM_FLAT_U8/16/32/64_CONST (with iSegReg = UINT8_MAX). */
8109DECL_INLINE_THROW(uint32_t)
8110iemNativeEmitMemStoreConstDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uValueConst, uint8_t iSegReg,
8111 uint8_t idxVarGCPtrMem, uint8_t cbMem, uintptr_t pfnFunction, uint8_t idxInstr)
8112{
8113 /*
8114 * Create a temporary const variable and call iemNativeEmitMemFetchStoreDataCommon
8115 * to do the grunt work.
8116 */
8117 uint8_t const idxVarConstValue = iemNativeVarAllocConst(pReNative, cbMem, uValueConst);
8118 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, idxVarConstValue, iSegReg, idxVarGCPtrMem,
8119 cbMem, cbMem - 1, kIemNativeEmitMemOp_Store,
8120 pfnFunction, idxInstr);
8121 iemNativeVarFreeLocal(pReNative, idxVarConstValue);
8122 return off;
8123}
8124
8125
8126#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8127# define IEM_MC_STORE_MEM_U128_ALIGN_SSE(a_iSeg, a_GCPtrMem, a_u128Value) \
8128 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
8129 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
8130 kIemNativeEmitMemOp_Store, \
8131 (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse, pCallEntry->idxInstr)
8132
8133# define IEM_MC_STORE_MEM_U128_NO_AC(a_iSeg, a_GCPtrMem, a_u128Value) \
8134 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, a_iSeg, a_GCPtrMem, \
8135 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
8136 (uintptr_t)iemNativeHlpMemStoreDataU128NoAc, pCallEntry->idxInstr)
8137
8138# define IEM_MC_STORE_MEM_U256_NO_AC(a_iSeg, a_GCPtrMem, a_u256Value) \
8139 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, a_iSeg, a_GCPtrMem, \
8140 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
8141 (uintptr_t)iemNativeHlpMemStoreDataU256NoAc, pCallEntry->idxInstr)
8142
8143# define IEM_MC_STORE_MEM_U256_ALIGN_AVX(a_iSeg, a_GCPtrMem, a_u256Value) \
8144 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, a_iSeg, a_GCPtrMem, sizeof(RTUINT256U), \
8145 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, kIemNativeEmitMemOp_Store, \
8146 (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx, pCallEntry->idxInstr)
8147
8148
8149# define IEM_MC_STORE_MEM_FLAT_U128_ALIGN_SSE(a_GCPtrMem, a_u128Value) \
8150 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
8151 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
8152 kIemNativeEmitMemOp_Store, \
8153 (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse, pCallEntry->idxInstr)
8154
8155# define IEM_MC_STORE_MEM_FLAT_U128_NO_AC(a_GCPtrMem, a_u128Value) \
8156 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, UINT8_MAX, a_GCPtrMem, \
8157 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
8158 (uintptr_t)iemNativeHlpMemFlatStoreDataU128NoAc, pCallEntry->idxInstr)
8159
8160# define IEM_MC_STORE_MEM_FLAT_U256_NO_AC(a_GCPtrMem, a_u256Value) \
8161 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, UINT8_MAX, a_GCPtrMem, \
8162 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
8163 (uintptr_t)iemNativeHlpMemFlatStoreDataU256NoAc, pCallEntry->idxInstr)
8164
8165# define IEM_MC_STORE_MEM_FLAT_U256_ALIGN_AVX(a_GCPtrMem, a_u256Value) \
8166 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT256U), \
8167 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, kIemNativeEmitMemOp_Store, \
8168 (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx, pCallEntry->idxInstr)
8169#endif
8170
8171
8172
8173/*********************************************************************************************************************************
8174* Stack Accesses. *
8175*********************************************************************************************************************************/
8176/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, fSReg, 0) */
8177#define IEM_MC_PUSH_U16(a_u16Value) \
8178 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
8179 (uintptr_t)iemNativeHlpStackStoreU16, pCallEntry->idxInstr)
8180#define IEM_MC_PUSH_U32(a_u32Value) \
8181 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
8182 (uintptr_t)iemNativeHlpStackStoreU32, pCallEntry->idxInstr)
8183#define IEM_MC_PUSH_U32_SREG(a_uSegVal) \
8184 off = iemNativeEmitStackPush(pReNative, off, a_uSegVal, RT_MAKE_U32_FROM_U8(32, 0, 1, 0), \
8185 (uintptr_t)iemNativeHlpStackStoreU32SReg, pCallEntry->idxInstr)
8186#define IEM_MC_PUSH_U64(a_u64Value) \
8187 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
8188 (uintptr_t)iemNativeHlpStackStoreU64, pCallEntry->idxInstr)
8189
8190#define IEM_MC_FLAT32_PUSH_U16(a_u16Value) \
8191 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
8192 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
8193#define IEM_MC_FLAT32_PUSH_U32(a_u32Value) \
8194 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
8195 (uintptr_t)iemNativeHlpStackFlatStoreU32, pCallEntry->idxInstr)
8196#define IEM_MC_FLAT32_PUSH_U32_SREG(a_u32Value) \
8197 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 1, 0), \
8198 (uintptr_t)iemNativeHlpStackFlatStoreU32SReg, pCallEntry->idxInstr)
8199
8200#define IEM_MC_FLAT64_PUSH_U16(a_u16Value) \
8201 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
8202 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
8203#define IEM_MC_FLAT64_PUSH_U64(a_u64Value) \
8204 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
8205 (uintptr_t)iemNativeHlpStackFlatStoreU64, pCallEntry->idxInstr)
8206
8207
8208/** IEM_MC[|_FLAT32|_FLAT64]_PUSH_U16/32/32_SREG/64 */
8209DECL_INLINE_THROW(uint32_t)
8210iemNativeEmitStackPush(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue,
8211 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
8212{
8213 /*
8214 * Assert sanity.
8215 */
8216 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
8217 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
8218#ifdef VBOX_STRICT
8219 if (RT_BYTE2(cBitsVarAndFlat) != 0)
8220 {
8221 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
8222 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
8223 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
8224 Assert( pfnFunction
8225 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
8226 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
8227 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 1, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32SReg
8228 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
8229 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
8230 : UINT64_C(0xc000b000a0009000) ));
8231 }
8232 else
8233 Assert( pfnFunction
8234 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
8235 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
8236 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 1, 0) ? (uintptr_t)iemNativeHlpStackStoreU32SReg
8237 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
8238 : UINT64_C(0xc000b000a0009000) ));
8239#endif
8240
8241#ifdef VBOX_STRICT
8242 /*
8243 * Check that the fExec flags we've got make sense.
8244 */
8245 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
8246#endif
8247
8248 /*
8249 * To keep things simple we have to commit any pending writes first as we
8250 * may end up making calls.
8251 */
8252 /** @todo we could postpone this till we make the call and reload the
8253 * registers after returning from the call. Not sure if that's sensible or
8254 * not, though. */
8255 off = iemNativeRegFlushPendingWrites(pReNative, off);
8256
8257 /*
8258 * First we calculate the new RSP and the effective stack pointer value.
8259 * For 64-bit mode and flat 32-bit these two are the same.
8260 * (Code structure is very similar to that of PUSH)
8261 */
8262 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
8263 bool const fIsSegReg = RT_BYTE3(cBitsVarAndFlat) != 0;
8264 bool const fIsIntelSeg = fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
8265 uint8_t const cbMemAccess = !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
8266 ? cbMem : sizeof(uint16_t);
8267 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
8268 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
8269 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
8270 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
8271 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
8272 if (cBitsFlat != 0)
8273 {
8274 Assert(idxRegEffSp == idxRegRsp);
8275 Assert(cBitsFlat == 32 || cBitsFlat == 64);
8276 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
8277 if (cBitsFlat == 64)
8278 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
8279 else
8280 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
8281 }
8282 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
8283 {
8284 Assert(idxRegEffSp != idxRegRsp);
8285 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
8286 kIemNativeGstRegUse_ReadOnly);
8287#ifdef RT_ARCH_AMD64
8288 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8289#else
8290 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
8291#endif
8292 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
8293 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
8294 offFixupJumpToUseOtherBitSp = off;
8295 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
8296 {
8297 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
8298 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8299 }
8300 else
8301 {
8302 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
8303 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8304 }
8305 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8306 }
8307 /* SpUpdateEnd: */
8308 uint32_t const offLabelSpUpdateEnd = off;
8309
8310 /*
8311 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
8312 * we're skipping lookup).
8313 */
8314 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
8315 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
8316 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
8317 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
8318 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
8319 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
8320 : UINT32_MAX;
8321 uint8_t const idxRegValue = !TlbState.fSkip
8322 && pVarValue->enmKind != kIemNativeVarKind_Immediate
8323 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/,
8324 IEMNATIVE_CALL_ARG2_GREG /*idxRegPref*/)
8325 : UINT8_MAX;
8326 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
8327
8328
8329 if (!TlbState.fSkip)
8330 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
8331 else
8332 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
8333
8334 /*
8335 * Use16BitSp:
8336 */
8337 if (cBitsFlat == 0)
8338 {
8339#ifdef RT_ARCH_AMD64
8340 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8341#else
8342 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
8343#endif
8344 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
8345 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
8346 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8347 else
8348 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8349 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
8350 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8351 }
8352
8353 /*
8354 * TlbMiss:
8355 *
8356 * Call helper to do the pushing.
8357 */
8358 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
8359
8360#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8361 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8362#else
8363 RT_NOREF(idxInstr);
8364#endif
8365
8366 /* Save variables in volatile registers. */
8367 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
8368 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
8369 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
8370 | (idxRegValue < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegValue) : 0);
8371 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
8372
8373 if ( idxRegValue == IEMNATIVE_CALL_ARG1_GREG
8374 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
8375 {
8376 /* Swap them using ARG0 as temp register: */
8377 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
8378 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
8379 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
8380 }
8381 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
8382 {
8383 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue (first!) */
8384 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue,
8385 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
8386
8387 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
8388 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
8389 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
8390 }
8391 else
8392 {
8393 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
8394 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
8395
8396 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue */
8397 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue, 0 /*offAddend*/,
8398 IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG));
8399 }
8400
8401 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
8402 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
8403
8404 /* Done setting up parameters, make the call. */
8405 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
8406
8407 /* Restore variables and guest shadow registers to volatile registers. */
8408 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
8409 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
8410
8411#ifdef IEMNATIVE_WITH_TLB_LOOKUP
8412 if (!TlbState.fSkip)
8413 {
8414 /* end of TlbMiss - Jump to the done label. */
8415 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
8416 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
8417
8418 /*
8419 * TlbLookup:
8420 */
8421 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1,
8422 IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
8423
8424 /*
8425 * Emit code to do the actual storing / fetching.
8426 */
8427 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
8428# ifdef IEM_WITH_TLB_STATISTICS
8429 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
8430 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
8431# endif
8432 if (idxRegValue != UINT8_MAX)
8433 {
8434 switch (cbMemAccess)
8435 {
8436 case 2:
8437 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
8438 break;
8439 case 4:
8440 if (!fIsIntelSeg)
8441 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
8442 else
8443 {
8444 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
8445 PUSH FS in real mode, so we have to try emulate that here.
8446 We borrow the now unused idxReg1 from the TLB lookup code here. */
8447 uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
8448 kIemNativeGstReg_EFlags);
8449 if (idxRegEfl != UINT8_MAX)
8450 {
8451#ifdef ARCH_AMD64
8452 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
8453 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
8454 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
8455#else
8456 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
8457 off, TlbState.idxReg1, idxRegEfl,
8458 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
8459#endif
8460 iemNativeRegFreeTmp(pReNative, idxRegEfl);
8461 }
8462 else
8463 {
8464 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
8465 RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
8466 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
8467 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
8468 }
8469 /* ASSUMES the upper half of idxRegValue is ZERO. */
8470 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegValue);
8471 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
8472 }
8473 break;
8474 case 8:
8475 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
8476 break;
8477 default:
8478 AssertFailed();
8479 }
8480 }
8481 else
8482 {
8483 switch (cbMemAccess)
8484 {
8485 case 2:
8486 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
8487 idxRegMemResult, TlbState.idxReg1);
8488 break;
8489 case 4:
8490 Assert(!fIsSegReg);
8491 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
8492 idxRegMemResult, TlbState.idxReg1);
8493 break;
8494 case 8:
8495 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue, idxRegMemResult, TlbState.idxReg1);
8496 break;
8497 default:
8498 AssertFailed();
8499 }
8500 }
8501
8502 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
8503 TlbState.freeRegsAndReleaseVars(pReNative);
8504
8505 /*
8506 * TlbDone:
8507 *
8508 * Commit the new RSP value.
8509 */
8510 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
8511 }
8512#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
8513
8514#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8515 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
8516#endif
8517 iemNativeRegFreeTmp(pReNative, idxRegRsp);
8518 if (idxRegEffSp != idxRegRsp)
8519 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
8520
8521 /* The value variable is implictly flushed. */
8522 if (idxRegValue != UINT8_MAX)
8523 iemNativeVarRegisterRelease(pReNative, idxVarValue);
8524 iemNativeVarFreeLocal(pReNative, idxVarValue);
8525
8526 return off;
8527}
8528
8529
8530
8531/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, 0, 0) */
8532#define IEM_MC_POP_GREG_U16(a_iGReg) \
8533 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
8534 (uintptr_t)iemNativeHlpStackFetchU16, pCallEntry->idxInstr)
8535#define IEM_MC_POP_GREG_U32(a_iGReg) \
8536 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
8537 (uintptr_t)iemNativeHlpStackFetchU32, pCallEntry->idxInstr)
8538#define IEM_MC_POP_GREG_U64(a_iGReg) \
8539 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
8540 (uintptr_t)iemNativeHlpStackFetchU64, pCallEntry->idxInstr)
8541
8542#define IEM_MC_FLAT32_POP_GREG_U16(a_iGReg) \
8543 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
8544 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
8545#define IEM_MC_FLAT32_POP_GREG_U32(a_iGReg) \
8546 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
8547 (uintptr_t)iemNativeHlpStackFlatFetchU32, pCallEntry->idxInstr)
8548
8549#define IEM_MC_FLAT64_POP_GREG_U16(a_iGReg) \
8550 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
8551 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
8552#define IEM_MC_FLAT64_POP_GREG_U64(a_iGReg) \
8553 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
8554 (uintptr_t)iemNativeHlpStackFlatFetchU64, pCallEntry->idxInstr)
8555
8556
8557DECL_FORCE_INLINE_THROW(uint32_t)
8558iemNativeEmitStackPopUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
8559 uint8_t idxRegTmp)
8560{
8561 /* Use16BitSp: */
8562#ifdef RT_ARCH_AMD64
8563 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
8564 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
8565 RT_NOREF(idxRegTmp);
8566#else
8567 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
8568 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
8569 /* add tmp, regrsp, #cbMem */
8570 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbMem, false /*f64Bit*/);
8571 /* and tmp, tmp, #0xffff */
8572 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
8573 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
8574 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
8575 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
8576#endif
8577 return off;
8578}
8579
8580
8581DECL_FORCE_INLINE(uint32_t)
8582iemNativeEmitStackPopUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
8583{
8584 /* Use32BitSp: */
8585 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
8586 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
8587 return off;
8588}
8589
8590
8591/** IEM_MC[|_FLAT32|_FLAT64]_POP_GREG_U16/32/64 */
8592DECL_INLINE_THROW(uint32_t)
8593iemNativeEmitStackPopGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGReg,
8594 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
8595{
8596 /*
8597 * Assert sanity.
8598 */
8599 Assert(idxGReg < 16);
8600#ifdef VBOX_STRICT
8601 if (RT_BYTE2(cBitsVarAndFlat) != 0)
8602 {
8603 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
8604 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
8605 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
8606 Assert( pfnFunction
8607 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
8608 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU32
8609 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
8610 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU64
8611 : UINT64_C(0xc000b000a0009000) ));
8612 }
8613 else
8614 Assert( pfnFunction
8615 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU16
8616 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU32
8617 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU64
8618 : UINT64_C(0xc000b000a0009000) ));
8619#endif
8620
8621#ifdef VBOX_STRICT
8622 /*
8623 * Check that the fExec flags we've got make sense.
8624 */
8625 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
8626#endif
8627
8628 /*
8629 * To keep things simple we have to commit any pending writes first as we
8630 * may end up making calls.
8631 */
8632 off = iemNativeRegFlushPendingWrites(pReNative, off);
8633
8634 /*
8635 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
8636 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
8637 * directly as the effective stack pointer.
8638 * (Code structure is very similar to that of PUSH)
8639 */
8640 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
8641 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
8642 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
8643 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
8644 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
8645 /** @todo can do a better job picking the register here. For cbMem >= 4 this
8646 * will be the resulting register value. */
8647 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
8648
8649 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
8650 if (cBitsFlat != 0)
8651 {
8652 Assert(idxRegEffSp == idxRegRsp);
8653 Assert(cBitsFlat == 32 || cBitsFlat == 64);
8654 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
8655 }
8656 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
8657 {
8658 Assert(idxRegEffSp != idxRegRsp);
8659 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
8660 kIemNativeGstRegUse_ReadOnly);
8661#ifdef RT_ARCH_AMD64
8662 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8663#else
8664 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
8665#endif
8666 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
8667 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
8668 offFixupJumpToUseOtherBitSp = off;
8669 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
8670 {
8671/** @todo can skip idxRegRsp updating when popping ESP. */
8672 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
8673 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8674 }
8675 else
8676 {
8677 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
8678 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
8679 }
8680 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8681 }
8682 /* SpUpdateEnd: */
8683 uint32_t const offLabelSpUpdateEnd = off;
8684
8685 /*
8686 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
8687 * we're skipping lookup).
8688 */
8689 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
8690 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
8691 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
8692 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
8693 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
8694 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
8695 : UINT32_MAX;
8696
8697 if (!TlbState.fSkip)
8698 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
8699 else
8700 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
8701
8702 /*
8703 * Use16BitSp:
8704 */
8705 if (cBitsFlat == 0)
8706 {
8707#ifdef RT_ARCH_AMD64
8708 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8709#else
8710 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
8711#endif
8712 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
8713 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
8714 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
8715 else
8716 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8717 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
8718 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8719 }
8720
8721 /*
8722 * TlbMiss:
8723 *
8724 * Call helper to do the pushing.
8725 */
8726 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
8727
8728#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8729 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8730#else
8731 RT_NOREF(idxInstr);
8732#endif
8733
8734 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
8735 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
8736 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
8737 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
8738
8739
8740 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
8741 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
8742 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
8743
8744 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
8745 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
8746
8747 /* Done setting up parameters, make the call. */
8748 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
8749
8750 /* Move the return register content to idxRegMemResult. */
8751 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
8752 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
8753
8754 /* Restore variables and guest shadow registers to volatile registers. */
8755 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
8756 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
8757
8758#ifdef IEMNATIVE_WITH_TLB_LOOKUP
8759 if (!TlbState.fSkip)
8760 {
8761 /* end of TlbMiss - Jump to the done label. */
8762 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
8763 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
8764
8765 /*
8766 * TlbLookup:
8767 */
8768 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
8769 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
8770
8771 /*
8772 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
8773 */
8774 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8775# ifdef IEM_WITH_TLB_STATISTICS
8776 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
8777 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
8778# endif
8779 switch (cbMem)
8780 {
8781 case 2:
8782 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
8783 break;
8784 case 4:
8785 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
8786 break;
8787 case 8:
8788 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
8789 break;
8790 default:
8791 AssertFailed();
8792 }
8793
8794 TlbState.freeRegsAndReleaseVars(pReNative);
8795
8796 /*
8797 * TlbDone:
8798 *
8799 * Set the new RSP value (FLAT accesses needs to calculate it first) and
8800 * commit the popped register value.
8801 */
8802 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
8803 }
8804#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
8805
8806 if (idxGReg != X86_GREG_xSP)
8807 {
8808 /* Set the register. */
8809 if (cbMem >= sizeof(uint32_t))
8810 {
8811#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
8812 AssertMsg( pReNative->idxCurCall == 0
8813 || IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))),
8814 ("%s - %u\n", g_aGstShadowInfo[idxGReg].pszName,
8815 iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))));
8816#endif
8817 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, IEMNATIVEGSTREG_GPR(idxGReg), off);
8818#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8819 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(idxGReg);
8820#endif
8821#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8822 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult,
8823 RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
8824#endif
8825 }
8826 else
8827 {
8828 Assert(cbMem == sizeof(uint16_t));
8829 uint8_t const idxRegDst = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGReg),
8830 kIemNativeGstRegUse_ForUpdate);
8831 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegDst, idxRegMemResult);
8832#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8833 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegDst, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
8834#endif
8835 iemNativeRegFreeTmp(pReNative, idxRegDst);
8836 }
8837
8838 /* Complete RSP calculation for FLAT mode. */
8839 if (idxRegEffSp == idxRegRsp)
8840 {
8841 if (cBitsFlat == 64)
8842 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, cbMem);
8843 else
8844 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, cbMem);
8845 }
8846 }
8847 else
8848 {
8849 /* We're popping RSP, ESP or SP. Only the is a bit extra work, of course. */
8850 if (cbMem == sizeof(uint64_t))
8851 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRsp, idxRegMemResult);
8852 else if (cbMem == sizeof(uint32_t))
8853 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRsp, idxRegMemResult);
8854 else
8855 {
8856 if (idxRegEffSp == idxRegRsp)
8857 {
8858 if (cBitsFlat == 64)
8859 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, cbMem);
8860 else
8861 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, cbMem);
8862 }
8863 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegRsp, idxRegMemResult);
8864 }
8865 }
8866
8867#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8868 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
8869#endif
8870
8871 iemNativeRegFreeTmp(pReNative, idxRegRsp);
8872 if (idxRegEffSp != idxRegRsp)
8873 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
8874 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
8875
8876 return off;
8877}
8878
8879
8880
8881/*********************************************************************************************************************************
8882* Memory mapping (IEM_MEM_MAP_XXX, IEM_MEM_FLAT_MAP_XXX). *
8883*********************************************************************************************************************************/
8884
8885#define IEM_MC_MEM_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8886 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
8887 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMaskAndCtl*/, \
8888 (uintptr_t)iemNativeHlpMemMapDataU8Atomic, pCallEntry->idxInstr)
8889
8890#define IEM_MC_MEM_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8891 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
8892 IEM_ACCESS_DATA_RW, 0 /*fAlignMaskAndCtl*/, \
8893 (uintptr_t)iemNativeHlpMemMapDataU8Rw, pCallEntry->idxInstr)
8894
8895#define IEM_MC_MEM_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8896 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
8897 IEM_ACCESS_DATA_W, 0 /*fAlignMaskAndCtl*/, \
8898 (uintptr_t)iemNativeHlpMemMapDataU8Wo, pCallEntry->idxInstr) \
8899
8900#define IEM_MC_MEM_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8901 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
8902 IEM_ACCESS_DATA_R, 0 /*fAlignMaskAndCtl*/, \
8903 (uintptr_t)iemNativeHlpMemMapDataU8Ro, pCallEntry->idxInstr)
8904
8905
8906#define IEM_MC_MEM_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8907 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
8908 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8909 (uintptr_t)iemNativeHlpMemMapDataU16Atomic, pCallEntry->idxInstr)
8910
8911#define IEM_MC_MEM_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8912 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
8913 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8914 (uintptr_t)iemNativeHlpMemMapDataU16Rw, pCallEntry->idxInstr)
8915
8916#define IEM_MC_MEM_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8917 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
8918 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8919 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
8920
8921#define IEM_MC_MEM_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8922 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
8923 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8924 (uintptr_t)iemNativeHlpMemMapDataU16Ro, pCallEntry->idxInstr)
8925
8926#define IEM_MC_MEM_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8927 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int16_t), \
8928 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
8929 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
8930
8931
8932#define IEM_MC_MEM_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8933 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
8934 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8935 (uintptr_t)iemNativeHlpMemMapDataU32Atomic, pCallEntry->idxInstr)
8936
8937#define IEM_MC_MEM_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8938 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
8939 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8940 (uintptr_t)iemNativeHlpMemMapDataU32Rw, pCallEntry->idxInstr)
8941
8942#define IEM_MC_MEM_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8943 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
8944 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8945 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
8946
8947#define IEM_MC_MEM_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8948 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
8949 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8950 (uintptr_t)iemNativeHlpMemMapDataU32Ro, pCallEntry->idxInstr)
8951
8952#define IEM_MC_MEM_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8953 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int32_t), \
8954 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
8955 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
8956
8957
8958#define IEM_MC_MEM_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8959 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
8960 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8961 (uintptr_t)iemNativeHlpMemMapDataU64Atomic, pCallEntry->idxInstr)
8962
8963#define IEM_MC_MEM_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8964 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
8965 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8966 (uintptr_t)iemNativeHlpMemMapDataU64Rw, pCallEntry->idxInstr)
8967#define IEM_MC_MEM_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8968 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
8969 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8970 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
8971
8972#define IEM_MC_MEM_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8973 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
8974 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8975 (uintptr_t)iemNativeHlpMemMapDataU64Ro, pCallEntry->idxInstr)
8976
8977#define IEM_MC_MEM_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8978 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int64_t), \
8979 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8980 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
8981
8982
8983#define IEM_MC_MEM_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8984 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
8985 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
8986 (uintptr_t)iemNativeHlpMemMapDataR80Wo, pCallEntry->idxInstr) \
8987
8988#define IEM_MC_MEM_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8989 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
8990 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, /** @todo check BCD align */ \
8991 (uintptr_t)iemNativeHlpMemMapDataD80Wo, pCallEntry->idxInstr) \
8992
8993
8994#define IEM_MC_MEM_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8995 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
8996 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
8997 (uintptr_t)iemNativeHlpMemMapDataU128Atomic, pCallEntry->idxInstr)
8998
8999#define IEM_MC_MEM_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9000 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
9001 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
9002 (uintptr_t)iemNativeHlpMemMapDataU128Rw, pCallEntry->idxInstr)
9003
9004#define IEM_MC_MEM_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9005 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
9006 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
9007 (uintptr_t)iemNativeHlpMemMapDataU128Wo, pCallEntry->idxInstr) \
9008
9009#define IEM_MC_MEM_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9010 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
9011 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
9012 (uintptr_t)iemNativeHlpMemMapDataU128Ro, pCallEntry->idxInstr)
9013
9014
9015
9016#define IEM_MC_MEM_FLAT_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
9017 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
9018 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMaskAndCtl*/, \
9019 (uintptr_t)iemNativeHlpMemFlatMapDataU8Atomic, pCallEntry->idxInstr)
9020
9021#define IEM_MC_MEM_FLAT_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
9022 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
9023 IEM_ACCESS_DATA_RW, 0 /*fAlignMaskAndCtl*/, \
9024 (uintptr_t)iemNativeHlpMemFlatMapDataU8Rw, pCallEntry->idxInstr)
9025
9026#define IEM_MC_MEM_FLAT_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
9027 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
9028 IEM_ACCESS_DATA_W, 0 /*fAlignMaskAndCtl*/, \
9029 (uintptr_t)iemNativeHlpMemFlatMapDataU8Wo, pCallEntry->idxInstr) \
9030
9031#define IEM_MC_MEM_FLAT_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
9032 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
9033 IEM_ACCESS_DATA_R, 0 /*fAlignMaskAndCtl*/, \
9034 (uintptr_t)iemNativeHlpMemFlatMapDataU8Ro, pCallEntry->idxInstr)
9035
9036
9037#define IEM_MC_MEM_FLAT_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
9038 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
9039 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
9040 (uintptr_t)iemNativeHlpMemFlatMapDataU16Atomic, pCallEntry->idxInstr)
9041
9042#define IEM_MC_MEM_FLAT_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
9043 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
9044 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
9045 (uintptr_t)iemNativeHlpMemFlatMapDataU16Rw, pCallEntry->idxInstr)
9046
9047#define IEM_MC_MEM_FLAT_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
9048 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
9049 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
9050 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
9051
9052#define IEM_MC_MEM_FLAT_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
9053 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
9054 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
9055 (uintptr_t)iemNativeHlpMemFlatMapDataU16Ro, pCallEntry->idxInstr)
9056
9057#define IEM_MC_MEM_FLAT_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_GCPtrMem) \
9058 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int16_t), \
9059 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, \
9060 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
9061
9062
9063#define IEM_MC_MEM_FLAT_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
9064 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
9065 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
9066 (uintptr_t)iemNativeHlpMemFlatMapDataU32Atomic, pCallEntry->idxInstr)
9067
9068#define IEM_MC_MEM_FLAT_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
9069 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
9070 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
9071 (uintptr_t)iemNativeHlpMemFlatMapDataU32Rw, pCallEntry->idxInstr)
9072
9073#define IEM_MC_MEM_FLAT_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
9074 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
9075 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
9076 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
9077
9078#define IEM_MC_MEM_FLAT_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
9079 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
9080 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
9081 (uintptr_t)iemNativeHlpMemFlatMapDataU32Ro, pCallEntry->idxInstr)
9082
9083#define IEM_MC_MEM_FLAT_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_GCPtrMem) \
9084 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int32_t), \
9085 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, \
9086 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
9087
9088
9089#define IEM_MC_MEM_FLAT_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
9090 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
9091 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
9092 (uintptr_t)iemNativeHlpMemFlatMapDataU64Atomic, pCallEntry->idxInstr)
9093
9094#define IEM_MC_MEM_FLAT_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
9095 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
9096 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
9097 (uintptr_t)iemNativeHlpMemFlatMapDataU64Rw, pCallEntry->idxInstr)
9098
9099#define IEM_MC_MEM_FLAT_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
9100 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
9101 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
9102 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
9103
9104#define IEM_MC_MEM_FLAT_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
9105 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
9106 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
9107 (uintptr_t)iemNativeHlpMemFlatMapDataU64Ro, pCallEntry->idxInstr)
9108
9109#define IEM_MC_MEM_FLAT_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_GCPtrMem) \
9110 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int64_t), \
9111 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
9112 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
9113
9114
9115#define IEM_MC_MEM_FLAT_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_GCPtrMem) \
9116 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
9117 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, \
9118 (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo, pCallEntry->idxInstr) \
9119
9120#define IEM_MC_MEM_FLAT_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_GCPtrMem) \
9121 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
9122 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, /** @todo check BCD align */ \
9123 (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo, pCallEntry->idxInstr) \
9124
9125
9126#define IEM_MC_MEM_FLAT_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
9127 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
9128 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
9129 (uintptr_t)iemNativeHlpMemFlatMapDataU128Atomic, pCallEntry->idxInstr)
9130
9131#define IEM_MC_MEM_FLAT_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
9132 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
9133 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
9134 (uintptr_t)iemNativeHlpMemFlatMapDataU128Rw, pCallEntry->idxInstr)
9135
9136#define IEM_MC_MEM_FLAT_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
9137 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
9138 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
9139 (uintptr_t)iemNativeHlpMemFlatMapDataU128Wo, pCallEntry->idxInstr) \
9140
9141#define IEM_MC_MEM_FLAT_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
9142 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
9143 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, \
9144 (uintptr_t)iemNativeHlpMemFlatMapDataU128Ro, pCallEntry->idxInstr)
9145
9146
9147DECL_INLINE_THROW(uint32_t)
9148iemNativeEmitMemMapCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarMem, uint8_t idxVarUnmapInfo,
9149 uint8_t iSegReg, uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAccess, uint32_t fAlignMaskAndCtl,
9150 uintptr_t pfnFunction, uint8_t idxInstr)
9151{
9152 /*
9153 * Assert sanity.
9154 */
9155 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarMem);
9156 PIEMNATIVEVAR const pVarMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarMem)];
9157 AssertStmt( pVarMem->enmKind == kIemNativeVarKind_Invalid
9158 && pVarMem->cbVar == sizeof(void *),
9159 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9160
9161 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
9162 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
9163 AssertStmt( pVarUnmapInfo->enmKind == kIemNativeVarKind_Invalid
9164 && pVarUnmapInfo->cbVar == sizeof(uint8_t),
9165 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9166
9167 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
9168 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
9169 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
9170 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
9171 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9172
9173 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
9174
9175 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
9176
9177#ifdef VBOX_STRICT
9178# define IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) \
9179 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ) \
9180 ? (uintptr_t)RT_CONCAT(a_fnBase,Rw) \
9181 : ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == IEM_ACCESS_TYPE_READ \
9182 ? (uintptr_t)RT_CONCAT(a_fnBase,Ro) : (uintptr_t)RT_CONCAT(a_fnBase,Wo) )
9183# define IEM_MAP_HLP_FN(a_fAccess, a_fnBase) \
9184 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ | IEM_ACCESS_ATOMIC) \
9185 ? (uintptr_t)RT_CONCAT(a_fnBase,Atomic) \
9186 : IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) )
9187
9188 if (iSegReg == UINT8_MAX)
9189 {
9190 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
9191 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
9192 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
9193 switch (cbMem)
9194 {
9195 case 1:
9196 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU8));
9197 Assert(!fAlignMaskAndCtl);
9198 break;
9199 case 2:
9200 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU16));
9201 Assert(fAlignMaskAndCtl < 2);
9202 break;
9203 case 4:
9204 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU32));
9205 Assert(fAlignMaskAndCtl < 4);
9206 break;
9207 case 8:
9208 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU64));
9209 Assert(fAlignMaskAndCtl < 8);
9210 break;
9211 case 10:
9212 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo
9213 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo);
9214 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
9215 Assert(fAlignMaskAndCtl < 8);
9216 break;
9217 case 16:
9218 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU128));
9219 Assert(fAlignMaskAndCtl < 16);
9220 break;
9221# if 0
9222 case 32:
9223 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU256));
9224 Assert(fAlignMaskAndCtl < 32);
9225 break;
9226 case 64:
9227 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU512));
9228 Assert(fAlignMaskAndCtl < 64);
9229 break;
9230# endif
9231 default: AssertFailed(); break;
9232 }
9233 }
9234 else
9235 {
9236 Assert(iSegReg < 6);
9237 switch (cbMem)
9238 {
9239 case 1:
9240 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU8));
9241 Assert(!fAlignMaskAndCtl);
9242 break;
9243 case 2:
9244 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU16));
9245 Assert(fAlignMaskAndCtl < 2);
9246 break;
9247 case 4:
9248 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU32));
9249 Assert(fAlignMaskAndCtl < 4);
9250 break;
9251 case 8:
9252 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU64));
9253 Assert(fAlignMaskAndCtl < 8);
9254 break;
9255 case 10:
9256 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemMapDataR80Wo
9257 || pfnFunction == (uintptr_t)iemNativeHlpMemMapDataD80Wo);
9258 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
9259 Assert(fAlignMaskAndCtl < 8);
9260 break;
9261 case 16:
9262 Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU128));
9263 Assert(fAlignMaskAndCtl < 16);
9264 break;
9265# if 0
9266 case 32:
9267 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU256));
9268 Assert(fAlignMaskAndCtl < 32);
9269 break;
9270 case 64:
9271 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU512));
9272 Assert(fAlignMaskAndCtl < 64);
9273 break;
9274# endif
9275 default: AssertFailed(); break;
9276 }
9277 }
9278# undef IEM_MAP_HLP_FN
9279# undef IEM_MAP_HLP_FN_NO_AT
9280#endif
9281
9282#ifdef VBOX_STRICT
9283 /*
9284 * Check that the fExec flags we've got make sense.
9285 */
9286 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
9287#endif
9288
9289 /*
9290 * To keep things simple we have to commit any pending writes first as we
9291 * may end up making calls.
9292 */
9293 off = iemNativeRegFlushPendingWrites(pReNative, off);
9294
9295#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
9296 /*
9297 * Move/spill/flush stuff out of call-volatile registers.
9298 * This is the easy way out. We could contain this to the tlb-miss branch
9299 * by saving and restoring active stuff here.
9300 */
9301 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
9302 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
9303#endif
9304
9305 /* The bUnmapInfo variable will get a register in the tlb-hit code path,
9306 while the tlb-miss codepath will temporarily put it on the stack.
9307 Set the the type to stack here so we don't need to do it twice below. */
9308 iemNativeVarSetKindToStack(pReNative, idxVarUnmapInfo);
9309 uint8_t const idxRegUnmapInfo = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off);
9310 /** @todo use a tmp register from TlbState, since they'll be free after tlb
9311 * lookup is done. */
9312
9313 /*
9314 * Define labels and allocate the result register (trying for the return
9315 * register if we can).
9316 */
9317 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
9318 uint8_t const idxRegMemResult = !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
9319 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarMem, IEMNATIVE_CALL_RET_GREG, &off)
9320 : iemNativeVarRegisterAcquire(pReNative, idxVarMem, &off);
9321 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem);
9322 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
9323 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
9324 : UINT32_MAX;
9325
9326 /*
9327 * Jump to the TLB lookup code.
9328 */
9329 if (!TlbState.fSkip)
9330 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
9331
9332 /*
9333 * TlbMiss:
9334 *
9335 * Call helper to do the fetching.
9336 * We flush all guest register shadow copies here.
9337 */
9338 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
9339
9340#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
9341 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
9342#else
9343 RT_NOREF(idxInstr);
9344#endif
9345
9346#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
9347 /* Save variables in volatile registers. */
9348 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave() | RT_BIT_32(idxRegMemResult) | RT_BIT_32(idxRegUnmapInfo);
9349 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
9350#endif
9351
9352 /* IEMNATIVE_CALL_ARG2_GREG = GCPtrMem - load first as it is from a variable. */
9353 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarGCPtrMem, 0 /*cbAppend*/,
9354#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
9355 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
9356#else
9357 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
9358#endif
9359
9360 /* IEMNATIVE_CALL_ARG3_GREG = iSegReg */
9361 if (iSegReg != UINT8_MAX)
9362 {
9363 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
9364 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, iSegReg);
9365 }
9366
9367 /* IEMNATIVE_CALL_ARG1_GREG = &idxVarUnmapInfo; stackslot address, load any register with result after the call. */
9368 int32_t const offBpDispVarUnmapInfo = iemNativeStackCalcBpDisp(iemNativeVarGetStackSlot(pReNative, idxVarUnmapInfo));
9369 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, offBpDispVarUnmapInfo);
9370
9371 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
9372 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9373
9374 /* Done setting up parameters, make the call. */
9375 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
9376
9377 /*
9378 * Put the output in the right registers.
9379 */
9380 Assert(idxRegMemResult == pVarMem->idxReg);
9381 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
9382 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
9383
9384#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
9385 /* Restore variables and guest shadow registers to volatile registers. */
9386 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
9387 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
9388#endif
9389
9390 Assert(pVarUnmapInfo->idxReg == idxRegUnmapInfo);
9391 off = iemNativeEmitLoadGprByBpU8(pReNative, off, idxRegUnmapInfo, offBpDispVarUnmapInfo);
9392
9393#ifdef IEMNATIVE_WITH_TLB_LOOKUP
9394 if (!TlbState.fSkip)
9395 {
9396 /* end of tlbsmiss - Jump to the done label. */
9397 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
9398 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
9399
9400 /*
9401 * TlbLookup:
9402 */
9403 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMaskAndCtl, fAccess,
9404 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
9405# ifdef IEM_WITH_TLB_STATISTICS
9406 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, TlbState.idxReg1, TlbState.idxReg2,
9407 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForMapped));
9408# endif
9409
9410 /* [idxVarUnmapInfo] = 0; */
9411 off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegUnmapInfo, 0);
9412
9413 /*
9414 * TlbDone:
9415 */
9416 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
9417
9418 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
9419
9420# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
9421 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
9422 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
9423# endif
9424 }
9425#else
9426 RT_NOREF(fAccess, fAlignMaskAndCtl, idxLabelTlbMiss);
9427#endif
9428
9429 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
9430 iemNativeVarRegisterRelease(pReNative, idxVarMem);
9431
9432 return off;
9433}
9434
9435
9436#define IEM_MC_MEM_COMMIT_AND_UNMAP_ATOMIC(a_bMapInfo) \
9437 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_ATOMIC, \
9438 (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic, pCallEntry->idxInstr)
9439
9440#define IEM_MC_MEM_COMMIT_AND_UNMAP_RW(a_bMapInfo) \
9441 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_RW, \
9442 (uintptr_t)iemNativeHlpMemCommitAndUnmapRw, pCallEntry->idxInstr)
9443
9444#define IEM_MC_MEM_COMMIT_AND_UNMAP_WO(a_bMapInfo) \
9445 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_W, \
9446 (uintptr_t)iemNativeHlpMemCommitAndUnmapWo, pCallEntry->idxInstr)
9447
9448#define IEM_MC_MEM_COMMIT_AND_UNMAP_RO(a_bMapInfo) \
9449 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_R, \
9450 (uintptr_t)iemNativeHlpMemCommitAndUnmapRo, pCallEntry->idxInstr)
9451
9452DECL_INLINE_THROW(uint32_t)
9453iemNativeEmitMemCommitAndUnmap(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarUnmapInfo,
9454 uint32_t fAccess, uintptr_t pfnFunction, uint8_t idxInstr)
9455{
9456 /*
9457 * Assert sanity.
9458 */
9459 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
9460#if defined(VBOX_STRICT) || defined(RT_ARCH_AMD64)
9461 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
9462#endif
9463 Assert(pVarUnmapInfo->enmKind == kIemNativeVarKind_Stack);
9464 Assert( pVarUnmapInfo->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
9465 || pVarUnmapInfo->idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS); /* must be initialized */
9466#ifdef VBOX_STRICT
9467 switch (fAccess & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC))
9468 {
9469 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_ATOMIC:
9470 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic); break;
9471 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE:
9472 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRw); break;
9473 case IEM_ACCESS_TYPE_WRITE:
9474 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapWo); break;
9475 case IEM_ACCESS_TYPE_READ:
9476 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRo); break;
9477 default: AssertFailed();
9478 }
9479#else
9480 RT_NOREF(fAccess);
9481#endif
9482
9483 /*
9484 * To keep things simple we have to commit any pending writes first as we
9485 * may end up making calls (there shouldn't be any at this point, so this
9486 * is just for consistency).
9487 */
9488 /** @todo we could postpone this till we make the call and reload the
9489 * registers after returning from the call. Not sure if that's sensible or
9490 * not, though. */
9491 off = iemNativeRegFlushPendingWrites(pReNative, off);
9492
9493 /*
9494 * Move/spill/flush stuff out of call-volatile registers.
9495 *
9496 * We exclude any register holding the bUnmapInfo variable, as we'll be
9497 * checking it after returning from the call and will free it afterwards.
9498 */
9499 /** @todo save+restore active registers and maybe guest shadows in miss
9500 * scenario. */
9501 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */,
9502 RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)));
9503
9504 /*
9505 * If idxVarUnmapInfo is zero, we can skip all this. Otherwise we'll have
9506 * to call the unmap helper function.
9507 *
9508 * The likelyhood of it being zero is higher than for the TLB hit when doing
9509 * the mapping, as a TLB miss for an well aligned and unproblematic memory
9510 * access should also end up with a mapping that won't need special unmapping.
9511 */
9512 /** @todo Go over iemMemMapJmp and implement the no-unmap-needed case! That
9513 * should speed up things for the pure interpreter as well when TLBs
9514 * are enabled. */
9515#ifdef RT_ARCH_AMD64
9516 if (pVarUnmapInfo->idxReg == UINT8_MAX)
9517 {
9518 /* test byte [rbp - xxx], 0ffh */
9519 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
9520 pbCodeBuf[off++] = 0xf6;
9521 uint8_t const idxStackSlot = pVarUnmapInfo->idxStackSlot;
9522 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, 0, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
9523 pbCodeBuf[off++] = 0xff;
9524 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9525 }
9526 else
9527#endif
9528 {
9529 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off,
9530 true /*fInitialized*/, IEMNATIVE_CALL_ARG1_GREG /*idxRegPref*/);
9531 off = iemNativeEmitTestAnyBitsInGpr8(pReNative, off, idxVarReg, 0xff);
9532 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
9533 }
9534 uint32_t const offJmpFixup = off;
9535 off = iemNativeEmitJzToFixed(pReNative, off, off /* ASSUME jz rel8 suffices*/);
9536
9537 /*
9538 * Call the unmap helper function.
9539 */
9540#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING /** @todo This should be unnecessary, the mapping call will already have set it! */
9541 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
9542#else
9543 RT_NOREF(idxInstr);
9544#endif
9545
9546 /* IEMNATIVE_CALL_ARG1_GREG = idxVarUnmapInfo (first!) */
9547 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo,
9548 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
9549
9550 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
9551 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9552
9553 /* Done setting up parameters, make the call. */
9554 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
9555
9556 /* The bUnmapInfo variable is implictly free by these MCs. */
9557 iemNativeVarFreeLocal(pReNative, idxVarUnmapInfo);
9558
9559 /*
9560 * Done, just fixup the jump for the non-call case.
9561 */
9562 iemNativeFixupFixedJump(pReNative, offJmpFixup, off);
9563
9564 return off;
9565}
9566
9567
9568
9569/*********************************************************************************************************************************
9570* State and Exceptions *
9571*********************************************************************************************************************************/
9572
9573#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9574#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
9575
9576#define IEM_MC_PREPARE_SSE_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9577#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9578#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
9579
9580#define IEM_MC_PREPARE_AVX_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9581#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9582#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
9583
9584
9585DECL_INLINE_THROW(uint32_t) iemNativeEmitPrepareFpuForUse(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fForChange)
9586{
9587#ifndef IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS
9588 RT_NOREF(pReNative, fForChange);
9589#else
9590 if ( !(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SYNCED)
9591 && fForChange)
9592 {
9593# ifdef RT_ARCH_AMD64
9594
9595 /* Need to save the host MXCSR the first time, and clear the exception flags. */
9596 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED))
9597 {
9598 PIEMNATIVEINSTR pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
9599
9600 /* stmxcsr */
9601 if (IEMNATIVE_REG_FIXED_PVMCPU >= 8)
9602 pbCodeBuf[off++] = X86_OP_REX_B;
9603 pbCodeBuf[off++] = 0x0f;
9604 pbCodeBuf[off++] = 0xae;
9605 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, 3, IEMNATIVE_REG_FIXED_PVMCPU & 7);
9606 pbCodeBuf[off++] = RT_BYTE1(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9607 pbCodeBuf[off++] = RT_BYTE2(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9608 pbCodeBuf[off++] = RT_BYTE3(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9609 pbCodeBuf[off++] = RT_BYTE4(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9610 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9611
9612 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED;
9613 }
9614
9615 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
9616 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr, kIemNativeGstRegUse_ReadOnly);
9617
9618 /*
9619 * Mask any exceptions and clear the exception status and save into MXCSR,
9620 * taking a detour through memory here because ldmxcsr/stmxcsr don't support
9621 * a register source/target (sigh).
9622 */
9623 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, idxRegMxCsr);
9624 off = iemNativeEmitOrGpr32ByImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, X86_MXCSR_XCPT_MASK);
9625 off = iemNativeEmitAndGpr32ByImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, ~X86_MXCSR_XCPT_FLAGS);
9626 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9627
9628 PIEMNATIVEINSTR pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
9629
9630 /* ldmxcsr */
9631 if (IEMNATIVE_REG_FIXED_PVMCPU >= 8)
9632 pbCodeBuf[off++] = X86_OP_REX_B;
9633 pbCodeBuf[off++] = 0x0f;
9634 pbCodeBuf[off++] = 0xae;
9635 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, 2, IEMNATIVE_REG_FIXED_PVMCPU & 7);
9636 pbCodeBuf[off++] = RT_BYTE1(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9637 pbCodeBuf[off++] = RT_BYTE2(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9638 pbCodeBuf[off++] = RT_BYTE3(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9639 pbCodeBuf[off++] = RT_BYTE4(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9640 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9641
9642 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
9643 iemNativeRegFreeTmp(pReNative, idxRegTmp);
9644
9645# elif defined(RT_ARCH_ARM64)
9646 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off, false /*fPreferVolatile*/);
9647
9648 /* Need to save the host floating point control register the first time, clear FPSR. */
9649 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED))
9650 {
9651 PIEMNATIVEINSTR pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
9652 pu32CodeBuf[off++] = Armv8A64MkInstrMsr(ARMV8_A64_REG_XZR, ARMV8_AARCH64_SYSREG_FPSR);
9653 pu32CodeBuf[off++] = Armv8A64MkInstrMrs(idxRegTmp, ARMV8_AARCH64_SYSREG_FPCR);
9654 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9655 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED;
9656 }
9657
9658 /*
9659 * Translate MXCSR to FPCR.
9660 *
9661 * Unfortunately we can't emulate the exact behavior of MXCSR as we can't take
9662 * FEAT_AFP on arm64 for granted (My M2 Macbook doesn't has it). So we can't map
9663 * MXCSR.DAZ to FPCR.FIZ and MXCSR.FZ to FPCR.FZ with FPCR.AH being set.
9664 * We can only use FPCR.FZ which will flush inputs _and_ output de-normals to zero.
9665 */
9666 /** @todo Check the host supported flags (needs additional work to get the host features from CPUM)
9667 * and implement alternate handling if FEAT_AFP is present. */
9668 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr, kIemNativeGstRegUse_ReadOnly);
9669
9670 PIEMNATIVEINSTR pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
9671
9672 /* First make sure that there is nothing set for the upper 16-bits (X86_MXCSR_MM, which we don't emulate right now). */
9673 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegTmp, idxRegMxCsr);
9674
9675 /* If either MXCSR.FZ or MXCSR.DAZ is set FPCR.FZ will be set. */
9676 pu32CodeBuf[off++] = Armv8A64MkInstrUbfx(IEMNATIVE_REG_FIXED_TMP0, idxRegTmp, X86_MXCSR_DAZ_BIT, 1);
9677 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegTmp, idxRegTmp, X86_MXCSR_FZ_BIT);
9678 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(idxRegTmp, idxRegTmp, IEMNATIVE_REG_FIXED_TMP0);
9679 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegTmp, idxRegTmp, ARMV8_FPCR_FZ_BIT);
9680
9681 /*
9682 * Init the rounding mode, the layout differs between MXCSR.RM[14:13] and FPCR.RMode[23:22]:
9683 *
9684 * Value MXCSR FPCR
9685 * 0 RN RN
9686 * 1 R- R+
9687 * 2 R+ R-
9688 * 3 RZ RZ
9689 *
9690 * Conversion can be achieved by switching bit positions
9691 */
9692 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxRegMxCsr, X86_MXCSR_RC_SHIFT);
9693 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxRegTmp, IEMNATIVE_REG_FIXED_TMP0, 14, 1);
9694 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxRegMxCsr, X86_MXCSR_RC_SHIFT + 1);
9695 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxRegTmp, IEMNATIVE_REG_FIXED_TMP0, 13, 1);
9696
9697 /* Write the value to FPCR. */
9698 pu32CodeBuf[off++] = Armv8A64MkInstrMsr(idxRegTmp, ARMV8_AARCH64_SYSREG_FPCR);
9699
9700 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9701 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
9702 iemNativeRegFreeTmp(pReNative, idxRegTmp);
9703# else
9704# error "Port me"
9705# endif
9706 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SYNCED;
9707 }
9708#endif
9709 return off;
9710}
9711
9712
9713
9714/*********************************************************************************************************************************
9715* Emitters for FPU related operations. *
9716*********************************************************************************************************************************/
9717
9718#define IEM_MC_FETCH_FCW(a_u16Fcw) \
9719 off = iemNativeEmitFetchFpuFcw(pReNative, off, a_u16Fcw)
9720
9721/** Emits code for IEM_MC_FETCH_FCW. */
9722DECL_INLINE_THROW(uint32_t)
9723iemNativeEmitFetchFpuFcw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
9724{
9725 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9726 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
9727
9728 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9729
9730 /* Allocate a temporary FCW register. */
9731 /** @todo eliminate extra register */
9732 uint8_t const idxFcwReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFcw,
9733 kIemNativeGstRegUse_ReadOnly);
9734
9735 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFcwReg);
9736
9737 /* Free but don't flush the FCW register. */
9738 iemNativeRegFreeTmp(pReNative, idxFcwReg);
9739 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9740
9741 return off;
9742}
9743
9744
9745#define IEM_MC_FETCH_FSW(a_u16Fsw) \
9746 off = iemNativeEmitFetchFpuFsw(pReNative, off, a_u16Fsw)
9747
9748/** Emits code for IEM_MC_FETCH_FSW. */
9749DECL_INLINE_THROW(uint32_t)
9750iemNativeEmitFetchFpuFsw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
9751{
9752 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9753 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
9754
9755 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, false /*fInitialized*/);
9756 /* Allocate a temporary FSW register. */
9757 /** @todo eliminate extra register */
9758 uint8_t const idxFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw,
9759 kIemNativeGstRegUse_ReadOnly);
9760
9761 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFswReg);
9762
9763 /* Free but don't flush the FSW register. */
9764 iemNativeRegFreeTmp(pReNative, idxFswReg);
9765 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9766
9767 return off;
9768}
9769
9770
9771
9772#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
9773
9774
9775/*********************************************************************************************************************************
9776* Emitters for SSE/AVX specific operations. *
9777*********************************************************************************************************************************/
9778
9779#define IEM_MC_COPY_XREG_U128(a_iXRegDst, a_iXRegSrc) \
9780 off = iemNativeEmitSimdCopyXregU128(pReNative, off, a_iXRegDst, a_iXRegSrc)
9781
9782/** Emits code for IEM_MC_COPY_XREG_U128. */
9783DECL_INLINE_THROW(uint32_t)
9784iemNativeEmitSimdCopyXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXRegDst, uint8_t iXRegSrc)
9785{
9786 /* This is a nop if the source and destination register are the same. */
9787 if (iXRegDst != iXRegSrc)
9788 {
9789 /* Allocate destination and source register. */
9790 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegDst),
9791 kIemNativeGstSimdRegLdStSz_Low128,
9792 kIemNativeGstRegUse_ForFullWrite);
9793 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegSrc),
9794 kIemNativeGstSimdRegLdStSz_Low128,
9795 kIemNativeGstRegUse_ReadOnly);
9796
9797 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
9798
9799 /* Free but don't flush the source and destination register. */
9800 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9801 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9802 }
9803
9804 return off;
9805}
9806
9807
9808#define IEM_MC_FETCH_XREG_U128(a_u128Value, a_iXReg) \
9809 off = iemNativeEmitSimdFetchXregU128(pReNative, off, a_u128Value, a_iXReg)
9810
9811/** Emits code for IEM_MC_FETCH_XREG_U128. */
9812DECL_INLINE_THROW(uint32_t)
9813iemNativeEmitSimdFetchXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg)
9814{
9815 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9816 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
9817
9818 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9819 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
9820
9821 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
9822
9823 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxVarReg, idxSimdRegSrc);
9824
9825 /* Free but don't flush the source register. */
9826 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9827 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
9828
9829 return off;
9830}
9831
9832
9833#define IEM_MC_FETCH_XREG_U64(a_u64Value, a_iXReg, a_iQWord) \
9834 off = iemNativeEmitSimdFetchXregU64(pReNative, off, a_u64Value, a_iXReg, a_iQWord)
9835
9836#define IEM_MC_FETCH_XREG_R64(a_r64Value, a_iXReg, a_iQWord) \
9837 off = iemNativeEmitSimdFetchXregU64(pReNative, off, a_r64Value, a_iXReg, a_iQWord)
9838
9839/** Emits code for IEM_MC_FETCH_XREG_U64. */
9840DECL_INLINE_THROW(uint32_t)
9841iemNativeEmitSimdFetchXregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iQWord)
9842{
9843 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9844 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
9845
9846 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9847 kIemNativeGstSimdRegLdStSz_Low128,
9848 kIemNativeGstRegUse_ReadOnly);
9849
9850 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9851 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9852
9853 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
9854
9855 /* Free but don't flush the source register. */
9856 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9857 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9858
9859 return off;
9860}
9861
9862
9863#define IEM_MC_FETCH_XREG_U32(a_u32Value, a_iXReg, a_iDWord) \
9864 off = iemNativeEmitSimdFetchXregU32(pReNative, off, a_u32Value, a_iXReg, a_iDWord)
9865
9866#define IEM_MC_FETCH_XREG_R32(a_r32Value, a_iXReg, a_iDWord) \
9867 off = iemNativeEmitSimdFetchXregU32(pReNative, off, a_r32Value, a_iXReg, a_iDWord)
9868
9869/** Emits code for IEM_MC_FETCH_XREG_U32/IEM_MC_FETCH_XREG_R32. */
9870DECL_INLINE_THROW(uint32_t)
9871iemNativeEmitSimdFetchXregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iDWord)
9872{
9873 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9874 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
9875
9876 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9877 kIemNativeGstSimdRegLdStSz_Low128,
9878 kIemNativeGstRegUse_ReadOnly);
9879
9880 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9881 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9882
9883 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
9884
9885 /* Free but don't flush the source register. */
9886 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9887 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9888
9889 return off;
9890}
9891
9892
9893#define IEM_MC_FETCH_XREG_U16(a_u64Value, a_iXReg, a_iWord) \
9894 off = iemNativeEmitSimdFetchXregU16(pReNative, off, a_u64Value, a_iXReg, a_iWord)
9895
9896/** Emits code for IEM_MC_FETCH_XREG_U16. */
9897DECL_INLINE_THROW(uint32_t)
9898iemNativeEmitSimdFetchXregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iWord)
9899{
9900 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9901 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
9902
9903 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9904 kIemNativeGstSimdRegLdStSz_Low128,
9905 kIemNativeGstRegUse_ReadOnly);
9906
9907 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9908 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9909
9910 off = iemNativeEmitSimdLoadGprFromVecRegU16(pReNative, off, idxVarReg, idxSimdRegSrc, iWord);
9911
9912 /* Free but don't flush the source register. */
9913 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9914 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9915
9916 return off;
9917}
9918
9919
9920#define IEM_MC_FETCH_XREG_U8(a_u64Value, a_iXReg, a_iByte) \
9921 off = iemNativeEmitSimdFetchXregU8(pReNative, off, a_u64Value, a_iXReg, a_iByte)
9922
9923/** Emits code for IEM_MC_FETCH_XREG_U8. */
9924DECL_INLINE_THROW(uint32_t)
9925iemNativeEmitSimdFetchXregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iByte)
9926{
9927 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9928 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint8_t));
9929
9930 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9931 kIemNativeGstSimdRegLdStSz_Low128,
9932 kIemNativeGstRegUse_ReadOnly);
9933
9934 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9935 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9936
9937 off = iemNativeEmitSimdLoadGprFromVecRegU8(pReNative, off, idxVarReg, idxSimdRegSrc, iByte);
9938
9939 /* Free but don't flush the source register. */
9940 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9941 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9942
9943 return off;
9944}
9945
9946
9947#define IEM_MC_STORE_XREG_U128(a_iXReg, a_u128Value) \
9948 off = iemNativeEmitSimdStoreXregU128(pReNative, off, a_iXReg, a_u128Value)
9949
9950AssertCompileSize(X86XMMREG, sizeof(RTUINT128U));
9951#define IEM_MC_STORE_XREG_XMM(a_iXReg, a_XmmValue) \
9952 off = iemNativeEmitSimdStoreXregU128(pReNative, off, a_iXReg, a_XmmValue)
9953
9954
9955/** Emits code for IEM_MC_STORE_XREG_U128/IEM_MC_STORE_XREG_XMM. */
9956DECL_INLINE_THROW(uint32_t)
9957iemNativeEmitSimdStoreXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
9958{
9959 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9960 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
9961
9962 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9963 kIemNativeGstSimdRegLdStSz_Low128,
9964 kIemNativeGstRegUse_ForFullWrite);
9965 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitialized*/);
9966
9967 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
9968
9969 /* Free but don't flush the source register. */
9970 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9971 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9972
9973 return off;
9974}
9975
9976
9977#define IEM_MC_STORE_XREG_U64(a_iXReg, a_iQWord, a_u64Value) \
9978 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u64Value, sizeof(uint64_t), a_iQWord)
9979
9980#define IEM_MC_STORE_XREG_U32(a_iXReg, a_iDWord, a_u32Value) \
9981 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint32_t), a_iDWord)
9982
9983#define IEM_MC_STORE_XREG_U16(a_iXReg, a_iWord, a_u32Value) \
9984 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint16_t), a_iWord)
9985
9986#define IEM_MC_STORE_XREG_U8(a_iXReg, a_iByte, a_u32Value) \
9987 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint8_t), a_iByte)
9988
9989#define IEM_MC_STORE_XREG_R32(a_iXReg, a_r32Value) \
9990 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_r32Value, sizeof(RTFLOAT32U), 0 /*iElem*/)
9991
9992#define IEM_MC_STORE_XREG_R64(a_iXReg, a_r64Value) \
9993 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_r64Value, sizeof(RTFLOAT64U), 0 /*iElem*/)
9994
9995/** Emits code for IEM_MC_STORE_XREG_U64/IEM_MC_STORE_XREG_U32/IEM_MC_STORE_XREG_U16/IEM_MC_STORE_XREG_U8. */
9996DECL_INLINE_THROW(uint32_t)
9997iemNativeEmitSimdStoreXregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar,
9998 uint8_t cbLocal, uint8_t iElem)
9999{
10000 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10001 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbLocal);
10002
10003#ifdef VBOX_STRICT
10004 switch (cbLocal)
10005 {
10006 case sizeof(uint64_t): Assert(iElem < 2); break;
10007 case sizeof(uint32_t): Assert(iElem < 4); break;
10008 case sizeof(uint16_t): Assert(iElem < 8); break;
10009 case sizeof(uint8_t): Assert(iElem < 16); break;
10010 default: AssertFailed();
10011 }
10012#endif
10013
10014 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10015 kIemNativeGstSimdRegLdStSz_Low128,
10016 kIemNativeGstRegUse_ForUpdate);
10017 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
10018
10019 switch (cbLocal)
10020 {
10021 case sizeof(uint64_t): off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
10022 case sizeof(uint32_t): off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
10023 case sizeof(uint16_t): off = iemNativeEmitSimdStoreGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
10024 case sizeof(uint8_t): off = iemNativeEmitSimdStoreGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
10025 default: AssertFailed();
10026 }
10027
10028 /* Free but don't flush the source register. */
10029 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10030 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10031
10032 return off;
10033}
10034
10035
10036#define IEM_MC_STORE_XREG_U64_ZX_U128(a_iXReg, a_u64Value) \
10037 off = iemNativeEmitSimdStoreXregU64ZxU128(pReNative, off, a_iXReg, a_u64Value)
10038
10039/** Emits code for IEM_MC_STORE_XREG_U64_ZX_U128. */
10040DECL_INLINE_THROW(uint32_t)
10041iemNativeEmitSimdStoreXregU64ZxU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
10042{
10043 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10044 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
10045
10046 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10047 kIemNativeGstSimdRegLdStSz_Low128,
10048 kIemNativeGstRegUse_ForUpdate);
10049 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
10050
10051 /* Zero the vector register first, then store the 64-bit value to the lower 64-bit. */
10052 off = iemNativeEmitSimdZeroVecRegLowU128(pReNative, off, idxSimdRegDst);
10053 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0);
10054
10055 /* Free but don't flush the source register. */
10056 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10057 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10058
10059 return off;
10060}
10061
10062
10063#define IEM_MC_STORE_XREG_U32_ZX_U128(a_iXReg, a_u32Value) \
10064 off = iemNativeEmitSimdStoreXregU32ZxU128(pReNative, off, a_iXReg, a_u32Value)
10065
10066/** Emits code for IEM_MC_STORE_XREG_U32_ZX_U128. */
10067DECL_INLINE_THROW(uint32_t)
10068iemNativeEmitSimdStoreXregU32ZxU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
10069{
10070 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10071 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
10072
10073 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10074 kIemNativeGstSimdRegLdStSz_Low128,
10075 kIemNativeGstRegUse_ForUpdate);
10076 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
10077
10078 /* Zero the vector register first, then store the 32-bit value to the lowest 32-bit element. */
10079 off = iemNativeEmitSimdZeroVecRegLowU128(pReNative, off, idxSimdRegDst);
10080 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, 0);
10081
10082 /* Free but don't flush the source register. */
10083 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10084 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10085
10086 return off;
10087}
10088
10089
10090#define IEM_MC_STORE_XREG_U32_U128(a_iXReg, a_iDwDst, a_u128Value, a_iDwSrc) \
10091 off = iemNativeEmitSimdStoreXregU32U128(pReNative, off, a_iXReg, a_iDwDst, a_u128Value, a_iDwSrc)
10092
10093/** Emits code for IEM_MC_STORE_XREG_U32_U128. */
10094DECL_INLINE_THROW(uint32_t)
10095iemNativeEmitSimdStoreXregU32U128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t iDwDst,
10096 uint8_t idxSrcVar, uint8_t iDwSrc)
10097{
10098 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10099 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
10100
10101 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10102 kIemNativeGstSimdRegLdStSz_Low128,
10103 kIemNativeGstRegUse_ForUpdate);
10104 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitialized*/);
10105
10106 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, idxVarReg, iDwSrc);
10107 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, IEMNATIVE_REG_FIXED_TMP0, iDwDst);
10108
10109 /* Free but don't flush the destination register. */
10110 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10111 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10112
10113 return off;
10114}
10115
10116
10117#define IEM_MC_COPY_YREG_U128_ZX_VLMAX(a_iYRegDst, a_iYRegSrc) \
10118 off = iemNativeEmitSimdCopyYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrc)
10119
10120/** Emits code for IEM_MC_COPY_YREG_U128_ZX_VLMAX. */
10121DECL_INLINE_THROW(uint32_t)
10122iemNativeEmitSimdCopyYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrc)
10123{
10124 /*
10125 * The iYRegSrc == iYRegDst case needs to be treated differently here, because
10126 * if iYRegDst gets allocated first for the full write it won't load the
10127 * actual value from CPUMCTX. When allocating iYRegSrc afterwards it will get
10128 * duplicated from the already allocated host register for iYRegDst containing
10129 * garbage. This will be catched by the guest register value checking in debug
10130 * builds.
10131 */
10132 if (iYRegDst != iYRegSrc)
10133 {
10134 /* Allocate destination and source register. */
10135 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10136 kIemNativeGstSimdRegLdStSz_256,
10137 kIemNativeGstRegUse_ForFullWrite);
10138 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
10139 kIemNativeGstSimdRegLdStSz_Low128,
10140 kIemNativeGstRegUse_ReadOnly);
10141
10142 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
10143 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10144
10145 /* Free but don't flush the source and destination register. */
10146 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10147 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10148 }
10149 else
10150 {
10151 /* This effectively only clears the upper 128-bits of the register. */
10152 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10153 kIemNativeGstSimdRegLdStSz_High128,
10154 kIemNativeGstRegUse_ForFullWrite);
10155
10156 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
10157
10158 /* Free but don't flush the destination register. */
10159 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
10160 }
10161
10162 return off;
10163}
10164
10165
10166#define IEM_MC_COPY_YREG_U256_ZX_VLMAX(a_iYRegDst, a_iYRegSrc) \
10167 off = iemNativeEmitSimdCopyYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrc)
10168
10169/** Emits code for IEM_MC_COPY_YREG_U256_ZX_VLMAX. */
10170DECL_INLINE_THROW(uint32_t)
10171iemNativeEmitSimdCopyYregU256ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrc)
10172{
10173 /*
10174 * The iYRegSrc == iYRegDst case needs to be treated differently here, because
10175 * if iYRegDst gets allocated first for the full write it won't load the
10176 * actual value from CPUMCTX. When allocating iYRegSrc afterwards it will get
10177 * duplicated from the already allocated host register for iYRegDst containing
10178 * garbage. This will be catched by the guest register value checking in debug
10179 * builds. iYRegSrc == iYRegDst would effectively only clear any upper 256-bits
10180 * for a zmm register we don't support yet, so this is just a nop.
10181 */
10182 if (iYRegDst != iYRegSrc)
10183 {
10184 /* Allocate destination and source register. */
10185 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
10186 kIemNativeGstSimdRegLdStSz_256,
10187 kIemNativeGstRegUse_ReadOnly);
10188 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10189 kIemNativeGstSimdRegLdStSz_256,
10190 kIemNativeGstRegUse_ForFullWrite);
10191
10192 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
10193
10194 /* Free but don't flush the source and destination register. */
10195 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10196 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10197 }
10198
10199 return off;
10200}
10201
10202
10203#define IEM_MC_FETCH_YREG_U128(a_u128Dst, a_iYRegSrc, a_iDQWord) \
10204 off = iemNativeEmitSimdFetchYregU128(pReNative, off, a_u128Dst, a_iYRegSrc, a_iDQWord)
10205
10206/** Emits code for IEM_MC_FETCH_YREG_U128. */
10207DECL_INLINE_THROW(uint32_t)
10208iemNativeEmitSimdFetchYregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iDQWord)
10209{
10210 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10211 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
10212
10213 Assert(iDQWord <= 1);
10214 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10215 iDQWord == 1
10216 ? kIemNativeGstSimdRegLdStSz_High128
10217 : kIemNativeGstSimdRegLdStSz_Low128,
10218 kIemNativeGstRegUse_ReadOnly);
10219
10220 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10221 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
10222
10223 if (iDQWord == 1)
10224 off = iemNativeEmitSimdLoadVecRegLowU128FromVecRegHighU128(pReNative, off, idxVarReg, idxSimdRegSrc);
10225 else
10226 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxVarReg, idxSimdRegSrc);
10227
10228 /* Free but don't flush the source register. */
10229 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10230 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
10231
10232 return off;
10233}
10234
10235
10236#define IEM_MC_FETCH_YREG_U64(a_u64Dst, a_iYRegSrc, a_iQWord) \
10237 off = iemNativeEmitSimdFetchYregU64(pReNative, off, a_u64Dst, a_iYRegSrc, a_iQWord)
10238
10239/** Emits code for IEM_MC_FETCH_YREG_U64. */
10240DECL_INLINE_THROW(uint32_t)
10241iemNativeEmitSimdFetchYregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iQWord)
10242{
10243 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10244 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
10245
10246 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10247 iQWord >= 2
10248 ? kIemNativeGstSimdRegLdStSz_High128
10249 : kIemNativeGstSimdRegLdStSz_Low128,
10250 kIemNativeGstRegUse_ReadOnly);
10251
10252 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10253 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10254
10255 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
10256
10257 /* Free but don't flush the source register. */
10258 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10259 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10260
10261 return off;
10262}
10263
10264
10265#define IEM_MC_FETCH_YREG_U32(a_u32Dst, a_iYRegSrc) \
10266 off = iemNativeEmitSimdFetchYregU32(pReNative, off, a_u32Dst, a_iYRegSrc, 0)
10267
10268/** Emits code for IEM_MC_FETCH_YREG_U32. */
10269DECL_INLINE_THROW(uint32_t)
10270iemNativeEmitSimdFetchYregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iDWord)
10271{
10272 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10273 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
10274
10275 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10276 iDWord >= 4
10277 ? kIemNativeGstSimdRegLdStSz_High128
10278 : kIemNativeGstSimdRegLdStSz_Low128,
10279 kIemNativeGstRegUse_ReadOnly);
10280
10281 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10282 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10283
10284 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
10285
10286 /* Free but don't flush the source register. */
10287 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10288 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10289
10290 return off;
10291}
10292
10293
10294#define IEM_MC_CLEAR_YREG_128_UP(a_iYReg) \
10295 off = iemNativeEmitSimdClearYregHighU128(pReNative, off, a_iYReg)
10296
10297/** Emits code for IEM_MC_CLEAR_YREG_128_UP. */
10298DECL_INLINE_THROW(uint32_t)
10299iemNativeEmitSimdClearYregHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg)
10300{
10301 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10302 kIemNativeGstSimdRegLdStSz_High128,
10303 kIemNativeGstRegUse_ForFullWrite);
10304
10305 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
10306
10307 /* Free but don't flush the register. */
10308 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
10309
10310 return off;
10311}
10312
10313
10314#define IEM_MC_STORE_YREG_U128(a_iYRegDst, a_iDQword, a_u128Value) \
10315 off = iemNativeEmitSimdStoreYregU128(pReNative, off, a_iYRegDst, a_iDQword, a_u128Value)
10316
10317/** Emits code for IEM_MC_STORE_YREG_U128. */
10318DECL_INLINE_THROW(uint32_t)
10319iemNativeEmitSimdStoreYregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t iDQword, uint8_t idxSrcVar)
10320{
10321 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10322 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
10323
10324 Assert(iDQword <= 1);
10325 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10326 iDQword == 0
10327 ? kIemNativeGstSimdRegLdStSz_Low128
10328 : kIemNativeGstSimdRegLdStSz_High128,
10329 kIemNativeGstRegUse_ForFullWrite);
10330
10331 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
10332
10333 if (iDQword == 0)
10334 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
10335 else
10336 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128(pReNative, off, idxSimdRegDst, idxVarReg);
10337
10338 /* Free but don't flush the source register. */
10339 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10340 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10341
10342 return off;
10343}
10344
10345
10346#define IEM_MC_STORE_YREG_U128_ZX_VLMAX(a_iYRegDst, a_u128Src) \
10347 off = iemNativeEmitSimdStoreYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_u128Src)
10348
10349/** Emits code for IEM_MC_STORE_YREG_U128_ZX_VLMAX. */
10350DECL_INLINE_THROW(uint32_t)
10351iemNativeEmitSimdStoreYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10352{
10353 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10354 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
10355
10356 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10357 kIemNativeGstSimdRegLdStSz_256,
10358 kIemNativeGstRegUse_ForFullWrite);
10359
10360 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
10361
10362 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
10363 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10364
10365 /* Free but don't flush the source register. */
10366 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10367 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10368
10369 return off;
10370}
10371
10372
10373#define IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX(a_iXRegDst, a_u8Src) \
10374 off = iemNativeEmitSimdBroadcastXregU8ZxVlmax(pReNative, off, a_iXRegDst, a_u8Src)
10375
10376/** Emits code for IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX. */
10377DECL_INLINE_THROW(uint32_t)
10378iemNativeEmitSimdBroadcastXregU8ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
10379{
10380 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10381 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint8_t));
10382
10383 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10384 kIemNativeGstSimdRegLdStSz_256,
10385 kIemNativeGstRegUse_ForFullWrite);
10386
10387 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10388
10389 off = iemNativeEmitSimdBroadcastGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
10390 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10391
10392 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10393 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10394
10395 return off;
10396}
10397
10398
10399#define IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX(a_iXRegDst, a_u16Src) \
10400 off = iemNativeEmitSimdBroadcastXregU16ZxVlmax(pReNative, off, a_iXRegDst, a_u16Src)
10401
10402/** Emits code for IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX. */
10403DECL_INLINE_THROW(uint32_t)
10404iemNativeEmitSimdBroadcastXregU16ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
10405{
10406 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10407 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint16_t));
10408
10409 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10410 kIemNativeGstSimdRegLdStSz_256,
10411 kIemNativeGstRegUse_ForFullWrite);
10412
10413 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10414
10415 off = iemNativeEmitSimdBroadcastGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
10416 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10417
10418 /* Free but don't flush the source register. */
10419 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10420 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10421
10422 return off;
10423}
10424
10425
10426#define IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX(a_iXRegDst, a_u32Src) \
10427 off = iemNativeEmitSimdBroadcastXregU32ZxVlmax(pReNative, off, a_iXRegDst, a_u32Src)
10428
10429/** Emits code for IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX. */
10430DECL_INLINE_THROW(uint32_t)
10431iemNativeEmitSimdBroadcastXregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
10432{
10433 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10434 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
10435
10436 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10437 kIemNativeGstSimdRegLdStSz_256,
10438 kIemNativeGstRegUse_ForFullWrite);
10439
10440 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10441
10442 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
10443 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10444
10445 /* Free but don't flush the source register. */
10446 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10447 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10448
10449 return off;
10450}
10451
10452
10453#define IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX(a_iXRegDst, a_u64Src) \
10454 off = iemNativeEmitSimdBroadcastXregU64ZxVlmax(pReNative, off, a_iXRegDst, a_u64Src)
10455
10456/** Emits code for IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX. */
10457DECL_INLINE_THROW(uint32_t)
10458iemNativeEmitSimdBroadcastXregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
10459{
10460 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10461 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10462
10463 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10464 kIemNativeGstSimdRegLdStSz_256,
10465 kIemNativeGstRegUse_ForFullWrite);
10466
10467 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10468
10469 off = iemNativeEmitSimdBroadcastGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
10470 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10471
10472 /* Free but don't flush the source register. */
10473 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10474 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10475
10476 return off;
10477}
10478
10479
10480#define IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX(a_iYRegDst, a_u8Src) \
10481 off = iemNativeEmitSimdBroadcastYregU8ZxVlmax(pReNative, off, a_iYRegDst, a_u8Src)
10482
10483/** Emits code for IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX. */
10484DECL_INLINE_THROW(uint32_t)
10485iemNativeEmitSimdBroadcastYregU8ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10486{
10487 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10488 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint8_t));
10489
10490 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10491 kIemNativeGstSimdRegLdStSz_256,
10492 kIemNativeGstRegUse_ForFullWrite);
10493
10494 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10495
10496 off = iemNativeEmitSimdBroadcastGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
10497
10498 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10499 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10500
10501 return off;
10502}
10503
10504
10505#define IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX(a_iYRegDst, a_u16Src) \
10506 off = iemNativeEmitSimdBroadcastYregU16ZxVlmax(pReNative, off, a_iYRegDst, a_u16Src)
10507
10508/** Emits code for IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX. */
10509DECL_INLINE_THROW(uint32_t)
10510iemNativeEmitSimdBroadcastYregU16ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10511{
10512 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10513 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint16_t));
10514
10515 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10516 kIemNativeGstSimdRegLdStSz_256,
10517 kIemNativeGstRegUse_ForFullWrite);
10518
10519 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10520
10521 off = iemNativeEmitSimdBroadcastGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
10522
10523 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10524 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10525
10526 return off;
10527}
10528
10529
10530#define IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX(a_iYRegDst, a_u32Src) \
10531 off = iemNativeEmitSimdBroadcastYregU32ZxVlmax(pReNative, off, a_iYRegDst, a_u32Src)
10532
10533/** Emits code for IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX. */
10534DECL_INLINE_THROW(uint32_t)
10535iemNativeEmitSimdBroadcastYregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10536{
10537 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10538 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
10539
10540 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10541 kIemNativeGstSimdRegLdStSz_256,
10542 kIemNativeGstRegUse_ForFullWrite);
10543
10544 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10545
10546 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
10547
10548 /* Free but don't flush the source register. */
10549 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10550 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10551
10552 return off;
10553}
10554
10555
10556#define IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX(a_iYRegDst, a_u64Src) \
10557 off = iemNativeEmitSimdBroadcastYregU64ZxVlmax(pReNative, off, a_iYRegDst, a_u64Src)
10558
10559/** Emits code for IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX. */
10560DECL_INLINE_THROW(uint32_t)
10561iemNativeEmitSimdBroadcastYregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10562{
10563 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10564 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10565
10566 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10567 kIemNativeGstSimdRegLdStSz_256,
10568 kIemNativeGstRegUse_ForFullWrite);
10569
10570 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10571
10572 off = iemNativeEmitSimdBroadcastGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
10573
10574 /* Free but don't flush the source register. */
10575 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10576 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10577
10578 return off;
10579}
10580
10581
10582#define IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX(a_iYRegDst, a_u128Src) \
10583 off = iemNativeEmitSimdBroadcastYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_u128Src)
10584
10585/** Emits code for IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX. */
10586DECL_INLINE_THROW(uint32_t)
10587iemNativeEmitSimdBroadcastYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10588{
10589 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10590 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
10591
10592 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10593 kIemNativeGstSimdRegLdStSz_256,
10594 kIemNativeGstRegUse_ForFullWrite);
10595
10596 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
10597
10598 off = iemNativeEmitSimdBroadcastVecRegU128ToVecReg(pReNative, off, idxSimdRegDst, idxVarReg);
10599
10600 /* Free but don't flush the source register. */
10601 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10602 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10603
10604 return off;
10605}
10606
10607
10608#define IEM_MC_STORE_YREG_U32_ZX_VLMAX(a_iYRegDst, a_u32Src) \
10609 off = iemNativeEmitSimdStoreYregU32ZxVlmax(pReNative, off, a_iYRegDst, a_u32Src)
10610
10611/** Emits code for IEM_MC_STORE_YREG_U32_ZX_VLMAX. */
10612DECL_INLINE_THROW(uint32_t)
10613iemNativeEmitSimdStoreYregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10614{
10615 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10616 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
10617
10618 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10619 kIemNativeGstSimdRegLdStSz_256,
10620 kIemNativeGstRegUse_ForFullWrite);
10621
10622 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10623
10624 off = iemNativeEmitSimdZeroVecRegU256(pReNative, off, idxSimdRegDst);
10625 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iDWord*/);
10626
10627 /* Free but don't flush the source register. */
10628 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10629 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10630
10631 return off;
10632}
10633
10634
10635#define IEM_MC_STORE_YREG_U64_ZX_VLMAX(a_iYRegDst, a_u64Src) \
10636 off = iemNativeEmitSimdStoreYregU64ZxVlmax(pReNative, off, a_iYRegDst, a_u64Src)
10637
10638/** Emits code for IEM_MC_STORE_YREG_U64_ZX_VLMAX. */
10639DECL_INLINE_THROW(uint32_t)
10640iemNativeEmitSimdStoreYregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10641{
10642 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10643 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10644
10645 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10646 kIemNativeGstSimdRegLdStSz_256,
10647 kIemNativeGstRegUse_ForFullWrite);
10648
10649 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10650
10651 off = iemNativeEmitSimdZeroVecRegU256(pReNative, off, idxSimdRegDst);
10652 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iQWord*/);
10653
10654 /* Free but don't flush the source register. */
10655 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10656 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10657
10658 return off;
10659}
10660
10661
10662#define IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX(a_iYRegDst, a_u64Local, a_iYRegSrcHx) \
10663 off = iemNativeEmitSimdMergeYregU64LocalU64HiZxVlmax(pReNative, off, a_iYRegDst, a_u64Local, a_iYRegSrcHx)
10664
10665/** Emits code for IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX. */
10666DECL_INLINE_THROW(uint32_t)
10667iemNativeEmitSimdMergeYregU64LocalU64HiZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar, uint8_t iYRegSrcHx)
10668{
10669 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10670 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10671
10672 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10673 kIemNativeGstSimdRegLdStSz_256,
10674 kIemNativeGstRegUse_ForFullWrite);
10675 uint8_t const idxSimdRegSrcHx = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrcHx),
10676 kIemNativeGstSimdRegLdStSz_Low128,
10677 kIemNativeGstRegUse_ReadOnly);
10678 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10679
10680 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrcHx);
10681 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iQWord*/);
10682 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10683
10684 /* Free but don't flush the source and destination registers. */
10685 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrcHx);
10686 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10687 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10688
10689 return off;
10690}
10691
10692
10693#define IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX(a_iYRegDst, a_iYRegSrcHx, a_u64Local) \
10694 off = iemNativeEmitSimdMergeYregU64LoU64LocalZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrcHx, a_u64Local)
10695
10696/** Emits code for IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX. */
10697DECL_INLINE_THROW(uint32_t)
10698iemNativeEmitSimdMergeYregU64LoU64LocalZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrcHx, uint8_t idxSrcVar)
10699{
10700 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10701 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10702
10703 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10704 kIemNativeGstSimdRegLdStSz_256,
10705 kIemNativeGstRegUse_ForFullWrite);
10706 uint8_t const idxSimdRegSrcHx = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrcHx),
10707 kIemNativeGstSimdRegLdStSz_Low128,
10708 kIemNativeGstRegUse_ReadOnly);
10709 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10710
10711 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrcHx);
10712 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 1 /*iQWord*/);
10713 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10714
10715 /* Free but don't flush the source and destination registers. */
10716 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrcHx);
10717 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10718 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10719
10720 return off;
10721}
10722
10723
10724#define IEM_MC_CLEAR_XREG_U32_MASK(a_iXReg, a_bMask) \
10725 off = iemNativeEmitSimdClearXregU32Mask(pReNative, off, a_iXReg, a_bMask)
10726
10727
10728/** Emits code for IEM_MC_CLEAR_XREG_U32_MASK. */
10729DECL_INLINE_THROW(uint32_t)
10730iemNativeEmitSimdClearXregU32Mask(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t bImm8Mask)
10731{
10732 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10733 kIemNativeGstSimdRegLdStSz_Low128,
10734 kIemNativeGstRegUse_ForUpdate);
10735
10736 /** @todo r=aeichner For certain bit combinations we could reduce the number of emitted instructions. */
10737 if (bImm8Mask & RT_BIT(0))
10738 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 0 /*iDWord*/);
10739 if (bImm8Mask & RT_BIT(1))
10740 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 1 /*iDWord*/);
10741 if (bImm8Mask & RT_BIT(2))
10742 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 2 /*iDWord*/);
10743 if (bImm8Mask & RT_BIT(3))
10744 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 3 /*iDWord*/);
10745
10746 /* Free but don't flush the destination register. */
10747 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10748
10749 return off;
10750}
10751
10752
10753#define IEM_MC_FETCH_YREG_U256(a_u256Dst, a_iYRegSrc) \
10754 off = iemNativeEmitSimdFetchYregU256(pReNative, off, a_u256Dst, a_iYRegSrc)
10755
10756#define IEM_MC_FETCH_YREG_YMM(a_uYmmDst, a_iYRegSrc) \
10757 off = iemNativeEmitSimdFetchYregU256(pReNative, off, a_uYmmDst, a_iYRegSrc)
10758
10759/** Emits code for IEM_MC_FETCH_YREG_U256/IEM_MC_FETCH_YREG_YMM. */
10760DECL_INLINE_THROW(uint32_t)
10761iemNativeEmitSimdFetchYregU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYRegSrc)
10762{
10763 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10764 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT256U));
10765
10766 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
10767 kIemNativeGstSimdRegLdStSz_256,
10768 kIemNativeGstRegUse_ReadOnly);
10769 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
10770
10771 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxVarReg, idxSimdRegSrc);
10772
10773 /* Free but don't flush the source register. */
10774 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10775 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
10776
10777 return off;
10778}
10779
10780
10781#define IEM_MC_STORE_YREG_U256_ZX_VLMAX(a_iYRegDst, a_u256Src) \
10782 off = iemNativeEmitSimdStoreYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_u256Src)
10783
10784#define IEM_MC_STORE_YREG_YMM_ZX_VLMAX(a_iYRegDst, a_uYmmSrc) \
10785 off = iemNativeEmitSimdStoreYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_uYmmSrc)
10786
10787/** Emits code for IEM_MC_STORE_YREG_U256_ZX_VLMAX/IEM_MC_STORE_YREG_YMM_ZX_VLMAX. */
10788DECL_INLINE_THROW(uint32_t)
10789iemNativeEmitSimdStoreYregU256ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar)
10790{
10791 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10792 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
10793
10794 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10795 kIemNativeGstSimdRegLdStSz_256,
10796 kIemNativeGstRegUse_ForFullWrite);
10797 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
10798
10799 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxSimdRegDst, idxVarRegSrc);
10800
10801 /* Free but don't flush the source register. */
10802 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10803 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10804
10805 return off;
10806}
10807
10808
10809#define IEM_MC_STORE_YREG_U32_U256(a_iYRegDst, a_iDwDst, a_u256Value, a_iDwSrc) \
10810 off = iemNativeEmitSimdStoreYregU32FromU256(pReNative, off, a_iYRegDst, a_iDwDst, a_u256Value, a_iDwSrc)
10811
10812
10813/** Emits code for IEM_MC_STORE_YREG_U32_U256. */
10814DECL_INLINE_THROW(uint32_t)
10815iemNativeEmitSimdStoreYregU32FromU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iDwDst,
10816 uint8_t idxSrcVar, uint8_t iDwSrc)
10817{
10818 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10819 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
10820
10821 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10822 iDwDst < 4
10823 ? kIemNativeGstSimdRegLdStSz_Low128
10824 : kIemNativeGstSimdRegLdStSz_High128,
10825 kIemNativeGstRegUse_ForUpdate);
10826 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
10827 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
10828
10829 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxRegTmp, idxVarRegSrc, iDwSrc);
10830 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxRegTmp, iDwDst);
10831
10832 /* Free but don't flush the source register. */
10833 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10834 iemNativeRegFreeTmp(pReNative, idxRegTmp);
10835 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10836
10837 return off;
10838}
10839
10840
10841#define IEM_MC_STORE_YREG_U64_U256(a_iYRegDst, a_iQwDst, a_u256Value, a_iQwSrc) \
10842 off = iemNativeEmitSimdStoreYregU64FromU256(pReNative, off, a_iYRegDst, a_iQwDst, a_u256Value, a_iQwSrc)
10843
10844
10845/** Emits code for IEM_MC_STORE_YREG_U64_U256. */
10846DECL_INLINE_THROW(uint32_t)
10847iemNativeEmitSimdStoreYregU64FromU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iQwDst,
10848 uint8_t idxSrcVar, uint8_t iQwSrc)
10849{
10850 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10851 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
10852
10853 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10854 iQwDst < 2
10855 ? kIemNativeGstSimdRegLdStSz_Low128
10856 : kIemNativeGstSimdRegLdStSz_High128,
10857 kIemNativeGstRegUse_ForUpdate);
10858 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
10859 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
10860
10861 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxRegTmp, idxVarRegSrc, iQwSrc);
10862 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxRegTmp, iQwDst);
10863
10864 /* Free but don't flush the source register. */
10865 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10866 iemNativeRegFreeTmp(pReNative, idxRegTmp);
10867 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10868
10869 return off;
10870}
10871
10872
10873#define IEM_MC_STORE_YREG_U64(a_iYRegDst, a_iQword, a_u64Value) \
10874 off = iemNativeEmitSimdStoreYregU64(pReNative, off, a_iYRegDst, a_iQword, a_u64Value)
10875
10876
10877/** Emits code for IEM_MC_STORE_YREG_U64. */
10878DECL_INLINE_THROW(uint32_t)
10879iemNativeEmitSimdStoreYregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iQwDst, uint8_t idxSrcVar)
10880{
10881 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10882 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10883
10884 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10885 iQwDst < 2
10886 ? kIemNativeGstSimdRegLdStSz_Low128
10887 : kIemNativeGstSimdRegLdStSz_High128,
10888 kIemNativeGstRegUse_ForUpdate);
10889
10890 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10891
10892 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, iQwDst);
10893
10894 /* Free but don't flush the source register. */
10895 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10896 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10897
10898 return off;
10899}
10900
10901
10902#define IEM_MC_CLEAR_ZREG_256_UP(a_iYReg) \
10903 off = iemNativeEmitSimdClearZregU256Vlmax(pReNative, off, a_iYReg)
10904
10905/** Emits code for IEM_MC_CLEAR_ZREG_256_UP. */
10906DECL_INLINE_THROW(uint32_t)
10907iemNativeEmitSimdClearZregU256Vlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg)
10908{
10909 RT_NOREF(pReNative, iYReg);
10910 /** @todo Needs to be implemented when support for AVX-512 is added. */
10911 return off;
10912}
10913
10914
10915
10916/*********************************************************************************************************************************
10917* Emitters for IEM_MC_CALL_SSE_AIMPL_XXX *
10918*********************************************************************************************************************************/
10919
10920/**
10921 * Common worker for IEM_MC_CALL_SSE_AIMPL_XXX/IEM_MC_CALL_AVX_AIMPL_XXX.
10922 */
10923DECL_INLINE_THROW(uint32_t)
10924iemNativeEmitCallSseAvxAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t cArgs, uint8_t idxInstr)
10925{
10926 /* Grab the MXCSR register, it must not be call volatile or we end up freeing it when setting up the call below. */
10927 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr,
10928 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
10929 AssertRelease(!(RT_BIT_32(idxRegMxCsr) & IEMNATIVE_CALL_VOLATILE_GREG_MASK));
10930
10931#if 0 /* This is not required right now as the called helper will set up the SSE/AVX state if it is an assembly one. */
10932 /*
10933 * Need to do the FPU preparation.
10934 */
10935 off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/);
10936#endif
10937
10938 /*
10939 * Do all the call setup and cleanup.
10940 */
10941 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_SSE_AIMPL_HIDDEN_ARGS, IEM_SSE_AIMPL_HIDDEN_ARGS,
10942 false /*fFlushPendingWrites*/);
10943
10944 /*
10945 * Load the MXCSR register into the first argument and mask out the current exception flags.
10946 */
10947 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, idxRegMxCsr);
10948 off = iemNativeEmitAndGpr32ByImm(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, ~X86_MXCSR_XCPT_FLAGS);
10949
10950 /*
10951 * Make the call.
10952 */
10953 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
10954
10955 /*
10956 * The updated MXCSR is in the return register, update exception status flags.
10957 *
10958 * The return register is marked allocated as a temporary because it is required for the
10959 * exception generation check below.
10960 */
10961 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG)));
10962 uint8_t const idxRegTmp = iemNativeRegMarkAllocated(pReNative, IEMNATIVE_CALL_RET_GREG, kIemNativeWhat_Tmp);
10963 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, idxRegMxCsr, idxRegTmp);
10964
10965#ifndef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
10966 /* Writeback the MXCSR register value (there is no delayed writeback for such registers at the moment). */
10967 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxRegMxCsr, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.XState.x87.MXCSR));
10968#endif
10969
10970 /*
10971 * Make sure we don't have any outstanding guest register writes as we may
10972 * raise an \#UD or \#XF and all guest register must be up to date in CPUMCTX.
10973 */
10974 off = iemNativeRegFlushPendingWrites(pReNative, off);
10975
10976#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
10977 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
10978#else
10979 RT_NOREF(idxInstr);
10980#endif
10981
10982 /** @todo r=aeichner ANDN from BMI1 would save us a temporary and additional instruction here but I don't
10983 * want to assume the existence for this instruction at the moment. */
10984 uint8_t const idxRegTmp2 = iemNativeRegAllocTmp(pReNative, &off);
10985
10986 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegTmp2, idxRegTmp);
10987 /* tmp &= X86_MXCSR_XCPT_MASK */
10988 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK);
10989 /* tmp >>= X86_MXCSR_XCPT_MASK_SHIFT */
10990 off = iemNativeEmitShiftGprRight(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK_SHIFT);
10991 /* tmp = ~tmp */
10992 off = iemNativeEmitInvBitsGpr(pReNative, off, idxRegTmp, idxRegTmp, false /*f64Bit*/);
10993 /* tmp &= mxcsr */
10994 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxRegTmp, idxRegTmp2);
10995 off = iemNativeEmitTestAnyBitsInGprAndTbExitIfAnySet(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_FLAGS,
10996 kIemNativeLabelType_RaiseSseAvxFpRelated);
10997
10998 iemNativeRegFreeTmp(pReNative, idxRegTmp2);
10999 iemNativeRegFreeTmp(pReNative, idxRegTmp);
11000 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
11001
11002 return off;
11003}
11004
11005
11006#define IEM_MC_CALL_SSE_AIMPL_2(a_pfnAImpl, a0, a1) \
11007 off = iemNativeEmitCallSseAImpl2(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1))
11008
11009/** Emits code for IEM_MC_CALL_SSE_AIMPL_2. */
11010DECL_INLINE_THROW(uint32_t)
11011iemNativeEmitCallSseAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
11012{
11013 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_SSE_AIMPL_HIDDEN_ARGS);
11014 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_SSE_AIMPL_HIDDEN_ARGS);
11015 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 2, idxInstr);
11016}
11017
11018
11019#define IEM_MC_CALL_SSE_AIMPL_3(a_pfnAImpl, a0, a1, a2) \
11020 off = iemNativeEmitCallSseAImpl3(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1), (a2))
11021
11022/** Emits code for IEM_MC_CALL_SSE_AIMPL_3. */
11023DECL_INLINE_THROW(uint32_t)
11024iemNativeEmitCallSseAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl,
11025 uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
11026{
11027 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_SSE_AIMPL_HIDDEN_ARGS);
11028 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_SSE_AIMPL_HIDDEN_ARGS);
11029 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_SSE_AIMPL_HIDDEN_ARGS);
11030 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 3, idxInstr);
11031}
11032
11033
11034/*********************************************************************************************************************************
11035* Emitters for IEM_MC_CALL_AVX_AIMPL_XXX *
11036*********************************************************************************************************************************/
11037
11038#define IEM_MC_CALL_AVX_AIMPL_2(a_pfnAImpl, a0, a1) \
11039 off = iemNativeEmitCallAvxAImpl2(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1))
11040
11041/** Emits code for IEM_MC_CALL_AVX_AIMPL_2. */
11042DECL_INLINE_THROW(uint32_t)
11043iemNativeEmitCallAvxAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
11044{
11045 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_AVX_AIMPL_HIDDEN_ARGS);
11046 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_AVX_AIMPL_HIDDEN_ARGS);
11047 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 2, idxInstr);
11048}
11049
11050
11051#define IEM_MC_CALL_AVX_AIMPL_3(a_pfnAImpl, a0, a1, a2) \
11052 off = iemNativeEmitCallAvxAImpl3(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1), (a2))
11053
11054/** Emits code for IEM_MC_CALL_AVX_AIMPL_3. */
11055DECL_INLINE_THROW(uint32_t)
11056iemNativeEmitCallAvxAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl,
11057 uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
11058{
11059 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_AVX_AIMPL_HIDDEN_ARGS);
11060 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_AVX_AIMPL_HIDDEN_ARGS);
11061 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_AVX_AIMPL_HIDDEN_ARGS);
11062 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 3, idxInstr);
11063}
11064
11065
11066#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
11067
11068
11069/*********************************************************************************************************************************
11070* Include instruction emitters. *
11071*********************************************************************************************************************************/
11072#include "target-x86/IEMAllN8veEmit-x86.h"
11073
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette