VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompFuncs.h@ 106431

Last change on this file since 106431 was 106431, checked in by vboxsync, 6 weeks ago

VMM/IEM: Reduced the arguments for iemNativeEmitStackPushRip and eliminated some unused 'push sreg' code from a copy&paste. bugref:10720

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 541.7 KB
Line 
1/* $Id: IEMAllN8veRecompFuncs.h 106431 2024-10-17 11:29:08Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler - Inlined Bits.
4 */
5
6/*
7 * Copyright (C) 2023-2024 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
33#define IEM_WITH_OPAQUE_DECODER_STATE
34#define VMCPU_INCL_CPUM_GST_CTX
35#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
36#define IEMNATIVE_INCL_TABLE_FUNCTION_PROTOTYPES
37#include <VBox/vmm/iem.h>
38#include <VBox/vmm/cpum.h>
39#include <VBox/vmm/dbgf.h>
40#include "IEMInternal.h"
41#include <VBox/vmm/vmcc.h>
42#include <VBox/log.h>
43#include <VBox/err.h>
44#include <VBox/dis.h>
45#include <VBox/param.h>
46#include <iprt/assert.h>
47#include <iprt/heap.h>
48#include <iprt/mem.h>
49#include <iprt/string.h>
50#if defined(RT_ARCH_AMD64)
51# include <iprt/x86.h>
52#elif defined(RT_ARCH_ARM64)
53# include <iprt/armv8.h>
54#endif
55
56#include "IEMInline.h"
57#include "IEMThreadedFunctions.h"
58#include "IEMN8veRecompiler.h"
59#include "IEMN8veRecompilerEmit.h"
60#include "IEMN8veRecompilerTlbLookup.h"
61#include "IEMNativeFunctions.h"
62#include "target-x86/IEMAllN8veEmit-x86.h"
63
64
65/*
66 * Narrow down configs here to avoid wasting time on unused configs here.
67 * Note! Same checks in IEMAllThrdRecompiler.cpp.
68 */
69
70#ifndef IEM_WITH_CODE_TLB
71# error The code TLB must be enabled for the recompiler.
72#endif
73
74#ifndef IEM_WITH_DATA_TLB
75# error The data TLB must be enabled for the recompiler.
76#endif
77
78#ifndef IEM_WITH_SETJMP
79# error The setjmp approach must be enabled for the recompiler.
80#endif
81
82#if defined(IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS) && !defined(IEMNATIVE_WITH_SIMD_REG_ALLOCATOR)
83# error "IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS requires IEMNATIVE_WITH_SIMD_REG_ALLOCATOR"
84#endif
85
86
87/*********************************************************************************************************************************
88* Code emitters for flushing pending guest register writes and sanity checks *
89*********************************************************************************************************************************/
90
91#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
92
93# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
94/**
95 * Updates IEMCPU::uPcUpdatingDebug.
96 */
97DECL_INLINE_THROW(uint32_t) iemNativeEmitPcDebugAdd(PIEMRECOMPILERSTATE pReNative, uint32_t off, int64_t offDisp, uint8_t cBits)
98{
99# ifdef RT_ARCH_AMD64
100 if (pReNative->Core.fDebugPcInitialized && cBits >= 32)
101 {
102 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
103 if ((int32_t)offDisp == offDisp || cBits != 64)
104 {
105 /* add [q]word [pVCpu->iem.s.uPcUpdatingDebug], imm32/imm8 */
106 if (cBits == 64)
107 pCodeBuf[off++] = X86_OP_REX_W;
108 pCodeBuf[off++] = (int8_t)offDisp == offDisp ? 0x83 : 0x81;
109 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
110 if ((int8_t)offDisp == offDisp)
111 pCodeBuf[off++] = (int8_t)offDisp;
112 else
113 {
114 *(int32_t *)&pCodeBuf[off] = (int32_t)offDisp;
115 off += sizeof(int32_t);
116 }
117 }
118 else
119 {
120 /* mov tmp0, imm64 */
121 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, IEMNATIVE_REG_FIXED_TMP0, offDisp);
122
123 /* add [pVCpu->iem.s.uPcUpdatingDebug], tmp0 */
124 if (cBits == 64)
125 pCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 >= 8 ? X86_OP_REX_R : 0);
126 else if (IEMNATIVE_REG_FIXED_TMP0 >= 8)
127 pCodeBuf[off++] = X86_OP_REX_R;
128 pCodeBuf[off++] = 0x01;
129 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, IEMNATIVE_REG_FIXED_TMP0 & 7,
130 RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
131 }
132 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
133 return off;
134 }
135# endif
136
137 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
138 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, RT_ARCH_VAL == RT_ARCH_VAL_AMD64 ? 32 : 12);
139
140 if (pReNative->Core.fDebugPcInitialized)
141 {
142 Log4(("uPcUpdatingDebug+=%ld cBits=%d off=%#x\n", offDisp, cBits, off));
143 off = iemNativeEmitLoadGprFromVCpuU64Ex(pCodeBuf, off, idxTmpReg, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
144 }
145 else
146 {
147 Log4(("uPcUpdatingDebug=rip+%ld cBits=%d off=%#x\n", offDisp, cBits, off));
148 pReNative->Core.fDebugPcInitialized = true;
149 off = iemNativeEmitLoadGprFromVCpuU64Ex(pCodeBuf, off, idxTmpReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
150 }
151
152 if (cBits == 64)
153 off = iemNativeEmitAddGprImmEx(pCodeBuf, off, idxTmpReg, offDisp, IEMNATIVE_REG_FIXED_TMP0);
154 else
155 {
156 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxTmpReg, (int32_t)offDisp, IEMNATIVE_REG_FIXED_TMP0);
157 if (cBits == 16)
158 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxTmpReg, UINT16_MAX);
159 }
160
161 off = iemNativeEmitStoreGprToVCpuU64Ex(pCodeBuf, off, idxTmpReg, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug),
162 IEMNATIVE_REG_FIXED_TMP0);
163
164 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
165 iemNativeRegFreeTmp(pReNative, idxTmpReg);
166 return off;
167}
168
169
170# elif defined(IEMNATIVE_REG_FIXED_PC_DBG)
171DECL_INLINE_THROW(uint32_t) iemNativePcAdjustCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
172{
173 /* Compare the shadow with the context value, they should match. */
174 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, IEMNATIVE_REG_FIXED_PC_DBG);
175 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, pReNative->Core.offPc);
176 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, kIemNativeGstReg_Pc);
177 return off;
178}
179# endif
180
181#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
182
183/**
184 * Flushes delayed write of a specific guest register.
185 *
186 * This must be called prior to calling CImpl functions and any helpers that use
187 * the guest state (like raising exceptions) and such.
188 *
189 * This optimization has not yet been implemented. The first target would be
190 * RIP updates, since these are the most common ones.
191 */
192DECL_INLINE_THROW(uint32_t)
193iemNativeRegFlushPendingSpecificWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREGREF enmClass, uint8_t idxReg)
194{
195#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
196 /* If for whatever reason it is possible to reference the PC register at some point we need to do the writeback here first. */
197#endif
198
199#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
200#if 0 /** @todo r=aeichner EFLAGS writeback delay. */
201 if ( enmClass == kIemNativeGstRegRef_EFlags
202 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(kIemNativeGstReg_EFlags))
203 off = iemNativeRegFlushPendingWrite(pReNative, off, kIemNativeGstReg_EFlags);
204#else
205 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(kIemNativeGstReg_EFlags)));
206#endif
207
208 if ( enmClass == kIemNativeGstRegRef_Gpr
209 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxReg))
210 off = iemNativeRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTREG_GPR(idxReg));
211#endif
212
213#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
214 if ( enmClass == kIemNativeGstRegRef_XReg
215 && pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxReg))
216 {
217 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxReg));
218 /* Flush the shadows as the register needs to be reloaded (there is no guarantee right now, that the referenced register doesn't change). */
219 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxReg];
220
221 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off);
222 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxReg)));
223 }
224#endif
225 RT_NOREF(pReNative, enmClass, idxReg);
226 return off;
227}
228
229
230
231/*********************************************************************************************************************************
232* Emitters for IEM_MC_BEGIN_EX and IEM_MC_END. *
233*********************************************************************************************************************************/
234
235#undef IEM_MC_BEGIN /* unused */
236#define IEM_MC_BEGIN_EX(a_fMcFlags, a_fCImplFlags, a_cArgsIncludingHidden) \
237 { \
238 Assert(pReNative->Core.bmVars == 0); \
239 Assert(pReNative->Core.u64ArgVars == UINT64_MAX); \
240 Assert(pReNative->Core.bmStack == 0); \
241 pReNative->fMc = (a_fMcFlags); \
242 pReNative->fCImpl = (a_fCImplFlags); \
243 pReNative->cArgsX = (a_cArgsIncludingHidden)
244
245/** We have to get to the end in recompilation mode, as otherwise we won't
246 * generate code for all the IEM_MC_IF_XXX branches. */
247#define IEM_MC_END() \
248 iemNativeVarFreeAll(pReNative); \
249 } return off
250
251
252
253/*********************************************************************************************************************************
254* Liveness Stubs *
255*********************************************************************************************************************************/
256
257#define IEM_MC_LIVENESS_GREG_INPUT(a_iGReg) ((void)0)
258#define IEM_MC_LIVENESS_GREG_CLOBBER(a_iGReg) ((void)0)
259#define IEM_MC_LIVENESS_GREG_MODIFY(a_iGReg) ((void)0)
260
261#define IEM_MC_LIVENESS_MREG_INPUT(a_iMReg) ((void)0)
262#define IEM_MC_LIVENESS_MREG_CLOBBER(a_iMReg) ((void)0)
263#define IEM_MC_LIVENESS_MREG_MODIFY(a_iMReg) ((void)0)
264
265#define IEM_MC_LIVENESS_XREG_INPUT(a_iXReg) ((void)0)
266#define IEM_MC_LIVENESS_XREG_CLOBBER(a_iXReg) ((void)0)
267#define IEM_MC_LIVENESS_XREG_MODIFY(a_iXReg) ((void)0)
268
269#define IEM_MC_LIVENESS_MXCSR_INPUT() ((void)0)
270#define IEM_MC_LIVENESS_MXCSR_CLOBBER() ((void)0)
271#define IEM_MC_LIVENESS_MXCSR_MODIFY() ((void)0)
272
273
274/*********************************************************************************************************************************
275* Native Emitter Support. *
276*********************************************************************************************************************************/
277
278#define IEM_MC_NATIVE_IF(a_fSupportedHosts) if (RT_ARCH_VAL & (a_fSupportedHosts)) {
279
280#define IEM_MC_NATIVE_ELSE() } else {
281
282#define IEM_MC_NATIVE_ENDIF() } ((void)0)
283
284
285#define IEM_MC_NATIVE_EMIT_0(a_fnEmitter) \
286 off = a_fnEmitter(pReNative, off)
287
288#define IEM_MC_NATIVE_EMIT_1(a_fnEmitter, a0) \
289 off = a_fnEmitter(pReNative, off, (a0))
290
291#define IEM_MC_NATIVE_EMIT_2(a_fnEmitter, a0, a1) \
292 off = a_fnEmitter(pReNative, off, (a0), (a1))
293
294#define IEM_MC_NATIVE_EMIT_2_EX(a_fnEmitter, a0, a1) \
295 off = a_fnEmitter(pReNative, off, pCallEntry->idxInstr, (a0), (a1))
296
297#define IEM_MC_NATIVE_EMIT_3(a_fnEmitter, a0, a1, a2) \
298 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2))
299
300#define IEM_MC_NATIVE_EMIT_4(a_fnEmitter, a0, a1, a2, a3) \
301 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3))
302
303#define IEM_MC_NATIVE_EMIT_5(a_fnEmitter, a0, a1, a2, a3, a4) \
304 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4))
305
306#define IEM_MC_NATIVE_EMIT_6(a_fnEmitter, a0, a1, a2, a3, a4, a5) \
307 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5))
308
309#define IEM_MC_NATIVE_EMIT_7(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6) \
310 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6))
311
312#define IEM_MC_NATIVE_EMIT_8(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6, a7) \
313 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6), (a7))
314
315
316#ifndef RT_ARCH_AMD64
317# define IEM_MC_NATIVE_SET_AMD64_HOST_REG_FOR_LOCAL(a_VarNm, a_idxHostReg) ((void)0)
318#else
319/** @note This is a naive approach that ASSUMES that the register isn't
320 * allocated, so it only works safely for the first allocation(s) in
321 * a MC block. */
322# define IEM_MC_NATIVE_SET_AMD64_HOST_REG_FOR_LOCAL(a_VarNm, a_idxHostReg) \
323 off = iemNativeVarSetAmd64HostRegisterForLocal(pReNative, off, a_VarNm, a_idxHostReg)
324
325DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg,
326 uint32_t off, bool fAllocated);
327
328DECL_INLINE_THROW(uint32_t)
329iemNativeVarSetAmd64HostRegisterForLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t idxHstReg)
330{
331 Log12(("iemNativeVarSetAmd64HostRegisterForLocal: idxVar=%#x idxHstReg=%s (%#x) off=%#x\n", idxVar, g_apszIemNativeHstRegNames[idxHstReg], idxHstReg, off));
332 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
333 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg))); /* iemNativeVarRegisterSet does a throw/longjmp on this */
334
335# ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
336 /* Must flush the register if it hold pending writes. */
337 if ( (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
338 && (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows) )
339 off = iemNativeRegFlushDirtyGuest(pReNative, off, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
340# endif
341
342 iemNativeVarRegisterSet(pReNative, idxVar, idxHstReg, off, false /*fAllocated*/);
343 return off;
344}
345
346#endif /* RT_ARCH_AMD64 */
347
348
349
350/*********************************************************************************************************************************
351* Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX) *
352*********************************************************************************************************************************/
353
354#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl) \
355 pReNative->fMc = 0; \
356 pReNative->fCImpl = (a_fFlags); \
357 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, \
358 a_cbInstr) /** @todo not used ... */
359
360
361#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
362 pReNative->fMc = 0; \
363 pReNative->fCImpl = (a_fFlags); \
364 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
365
366DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off,
367 uint8_t idxInstr, uint64_t a_fGstShwFlush,
368 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
369{
370 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
371}
372
373
374#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
375 pReNative->fMc = 0; \
376 pReNative->fCImpl = (a_fFlags); \
377 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
378 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
379
380DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off,
381 uint8_t idxInstr, uint64_t a_fGstShwFlush,
382 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
383{
384 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
385}
386
387
388#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
389 pReNative->fMc = 0; \
390 pReNative->fCImpl = (a_fFlags); \
391 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
392 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
393
394DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off,
395 uint8_t idxInstr, uint64_t a_fGstShwFlush,
396 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
397 uint64_t uArg2)
398{
399 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
400}
401
402
403
404/*********************************************************************************************************************************
405* Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX) *
406*********************************************************************************************************************************/
407
408/** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
409 * and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
410DECL_INLINE_THROW(uint32_t)
411iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
412{
413 /*
414 * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
415 * return with special status code and make the execution loop deal with
416 * this. If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
417 * exception and won't continue execution. While CPUMCTX_DBG_DBGF_MASK
418 * could continue w/o interruption, it probably will drop into the
419 * debugger, so not worth the effort of trying to services it here and we
420 * just lump it in with the handling of the others.
421 *
422 * To simplify the code and the register state management even more (wrt
423 * immediate in AND operation), we always update the flags and skip the
424 * extra check associated conditional jump.
425 */
426 AssertCompile( (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
427 <= UINT32_MAX);
428#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
429 AssertMsg( pReNative->idxCurCall == 0
430 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1],
431 IEMLIVENESSBIT_IDX_EFL_OTHER)),
432 ("Efl_Other - %u\n", iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1],
433 IEMLIVENESSBIT_IDX_EFL_OTHER)));
434#endif
435
436 /*
437 * As this code can break out of the execution loop when jumping to the ReturnWithFlags label
438 * any pending register writes must be flushed.
439 */
440 off = iemNativeRegFlushPendingWrites(pReNative, off);
441
442 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsForUpdate(pReNative, &off,
443 RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_OTHER),
444 RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_OTHER));
445 off = iemNativeEmitTbExitIfAnyBitsSetInGpr<kIemNativeLabelType_ReturnWithFlags>(pReNative, off, idxEflReg,
446 X86_EFL_TF
447 | CPUMCTX_DBG_HIT_DRX_MASK
448 | CPUMCTX_DBG_DBGF_MASK);
449 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
450 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
451
452 /* Free but don't flush the EFLAGS register. */
453 iemNativeRegFreeTmp(pReNative, idxEflReg);
454
455 return off;
456}
457
458
459/** Helper for iemNativeEmitFinishInstructionWithStatus. */
460DECLINLINE(RTGCPHYS) iemNativeCallEntryToGCPhysPc(PCIEMTB pTb, PCIEMTHRDEDCALLENTRY pCallEntry)
461{
462 unsigned const offOpcodes = pCallEntry->offOpcode;
463 unsigned const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
464 for (unsigned idxRange = 0; idxRange < cRanges; idxRange++)
465 {
466 unsigned const offRange = offOpcodes - (unsigned)pTb->aRanges[idxRange].offOpcodes;
467 if (offRange < (unsigned)pTb->aRanges[idxRange].cbOpcodes)
468 return iemTbGetRangePhysPageAddr(pTb, idxRange) + offRange + pTb->aRanges[idxRange].offPhysPage;
469 }
470 AssertFailedReturn(NIL_RTGCPHYS);
471}
472
473
474/** The VINF_SUCCESS dummy. */
475template<int const a_rcNormal, bool const a_fIsJump>
476DECL_FORCE_INLINE_THROW(uint32_t)
477iemNativeEmitFinishInstructionWithStatus(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry,
478 int32_t const offJump)
479{
480 AssertCompile(a_rcNormal == VINF_SUCCESS || a_rcNormal == VINF_IEM_REEXEC_BREAK);
481 if (a_rcNormal != VINF_SUCCESS)
482 {
483#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
484 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
485#else
486 RT_NOREF_PV(pCallEntry);
487#endif
488
489 /* As this code returns from the TB any pending register writes must be flushed. */
490 off = iemNativeRegFlushPendingWrites(pReNative, off);
491
492 /*
493 * If we're in a conditional, mark the current branch as exiting so we
494 * can disregard its state when we hit the IEM_MC_ENDIF.
495 */
496 iemNativeMarkCurCondBranchAsExiting(pReNative);
497
498 /*
499 * Use the lookup table for getting to the next TB quickly.
500 * Note! In this code path there can only be one entry at present.
501 */
502 uint8_t const idxTbLookupFirst = IEM_TB_LOOKUP_TAB_GET_IDX(pCallEntry->uTbLookup);
503 PCIEMTB const pTbOrg = pReNative->pTbOrg;
504 Assert(idxTbLookupFirst < pTbOrg->cTbLookupEntries);
505 Assert(IEM_TB_LOOKUP_TAB_GET_SIZE(pCallEntry->uTbLookup) == 1);
506
507#if 0
508 /* Update IEMCPU::ppTbLookupEntryR3 to get the best lookup effect. */
509 PIEMTB * const ppTbLookupFirst = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTbOrg, idxTbLookupFirst);
510 Assert(IEM_TB_LOOKUP_TAB_GET_SIZE(pCallEntry->uTbLookup) == 1); /* large stuff later/never */
511 off = iemNativeEmitStoreImmToVCpuU64(pReNative, off, (uintptr_t)ppTbLookupFirst,
512 RT_UOFFSETOF(VMCPU, iem.s.ppTbLookupEntryR3));
513
514 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnBreak);
515
516#else
517 /* Load the index as argument #1 for the helper call at the given label. */
518 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxTbLookupFirst);
519
520 /*
521 * Figure out the physical address of the current instruction and see
522 * whether the next instruction we're about to execute is in the same
523 * page so we by can optimistically skip TLB loading.
524 *
525 * - This is safe for all cases in FLAT mode.
526 * - In segmentmented modes it is complicated, given that a negative
527 * jump may underflow EIP and a forward jump may overflow or run into
528 * CS.LIM and triggering a #GP. The only thing we can get away with
529 * now at compile time is forward jumps w/o CS.LIM checks, since the
530 * lack of CS.LIM checks means we're good for the entire physical page
531 * we're executing on and another 15 bytes before we run into CS.LIM.
532 */
533 if ( IEM_F_MODE_X86_IS_FLAT(pReNative->fExec)
534# if 0 /** @todo breaks on IP/EIP/RIP wraparound tests in bs3-cpu-weird-1. See also iemNativeHlpReturnBreakViaLookup. */
535 || !(pTbOrg->fFlags & IEMTB_F_CS_LIM_CHECKS)
536# endif
537 )
538 {
539 RTGCPHYS const GCPhysPcCurrent = iemNativeCallEntryToGCPhysPc(pTbOrg, pCallEntry);
540 RTGCPHYS const GCPhysPcNext = GCPhysPcCurrent + pCallEntry->cbOpcode + (int64_t)(a_fIsJump ? offJump : 0);
541 if ( (GCPhysPcNext >> GUEST_PAGE_SHIFT) == (GCPhysPcCurrent >> GUEST_PAGE_SHIFT)
542 && GUEST_PAGE_SIZE - (GCPhysPcCurrent & GUEST_PAGE_OFFSET_MASK) >= pCallEntry->cbOpcode /* 0xfff: je -56h */ )
543
544 {
545 /* Load the next GCPhysPc into the 3rd argument for the helper call. */
546 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, GCPhysPcNext);
547
548 /* Load the key lookup flags into the 2nd argument for the helper call.
549 - This is safe wrt CS limit checking since we're only here for FLAT modes.
550 - ASSUMING that this isn't a STI or POPF instruction, we can exclude any
551 interrupt shadow.
552 - The NMI inhibiting is more questionable, though... */
553 /** @todo We don't implement NMI blocking atm, except via VT-x/AMD-V.
554 * Should we copy it into fExec to simplify this? OTOH, it's just a
555 * couple of extra instructions if EFLAGS are already in a register. */
556 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG,
557 (pReNative->fExec & IEMTB_F_KEY_MASK) | IEMTB_F_TYPE_NATIVE);
558
559 if (pReNative->idxLastCheckIrqCallNo != UINT32_MAX)
560 return iemNativeEmitTbExit<kIemNativeLabelType_ReturnBreakViaLookup>(pReNative, off);
561 return iemNativeEmitTbExit<kIemNativeLabelType_ReturnBreakViaLookupWithIrq>(pReNative, off);
562 }
563 }
564 if (pReNative->idxLastCheckIrqCallNo != UINT32_MAX)
565 return iemNativeEmitTbExit<kIemNativeLabelType_ReturnBreakViaLookupWithTlb>(pReNative, off);
566 return iemNativeEmitTbExit<kIemNativeLabelType_ReturnBreakViaLookupWithTlbAndIrq>(pReNative, off);
567#endif
568 }
569 return off;
570}
571
572
573#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr, a_rcNormal) \
574 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
575 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
576
577#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr, a_rcNormal) \
578 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
579 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
580 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
581
582/** Same as iemRegAddToRip64AndFinishingNoFlags. */
583DECL_INLINE_THROW(uint32_t)
584iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
585{
586#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
587# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
588 if (!pReNative->Core.offPc)
589 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
590# endif
591
592 /* Allocate a temporary PC register. */
593 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
594
595 /* Perform the addition and store the result. */
596 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
597 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
598
599 /* Free but don't flush the PC register. */
600 iemNativeRegFreeTmp(pReNative, idxPcReg);
601#endif
602
603#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
604 pReNative->Core.offPc += cbInstr;
605 Log4(("offPc=%#RX64 cbInstr=%#x off=%#x\n", pReNative->Core.offPc, cbInstr, off));
606# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
607 off = iemNativeEmitPcDebugAdd(pReNative, off, cbInstr, 64);
608 off = iemNativeEmitPcDebugCheck(pReNative, off);
609# elif defined(IEMNATIVE_REG_FIXED_PC_DBG)
610 off = iemNativePcAdjustCheck(pReNative, off);
611# endif
612 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
613#endif
614
615 return off;
616}
617
618
619#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr, a_rcNormal) \
620 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
621 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
622
623#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr, a_rcNormal) \
624 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
625 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
626 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
627
628/** Same as iemRegAddToEip32AndFinishingNoFlags. */
629DECL_INLINE_THROW(uint32_t)
630iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
631{
632#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
633# ifdef IEMNATIVE_REG_FIXED_PC_DBG
634 if (!pReNative->Core.offPc)
635 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
636# endif
637
638 /* Allocate a temporary PC register. */
639 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
640
641 /* Perform the addition and store the result. */
642 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
643 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
644
645 /* Free but don't flush the PC register. */
646 iemNativeRegFreeTmp(pReNative, idxPcReg);
647#endif
648
649#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
650 pReNative->Core.offPc += cbInstr;
651 Log4(("offPc=%#RX64 cbInstr=%#x off=%#x\n", pReNative->Core.offPc, cbInstr, off));
652# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
653 off = iemNativeEmitPcDebugAdd(pReNative, off, cbInstr, 32);
654 off = iemNativeEmitPcDebugCheck(pReNative, off);
655# elif defined(IEMNATIVE_REG_FIXED_PC_DBG)
656 off = iemNativePcAdjustCheck(pReNative, off);
657# endif
658 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
659#endif
660
661 return off;
662}
663
664
665#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr, a_rcNormal) \
666 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
667 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
668
669#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr, a_rcNormal) \
670 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
671 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
672 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
673
674/** Same as iemRegAddToIp16AndFinishingNoFlags. */
675DECL_INLINE_THROW(uint32_t)
676iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
677{
678#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
679# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
680 if (!pReNative->Core.offPc)
681 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
682# endif
683
684 /* Allocate a temporary PC register. */
685 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
686
687 /* Perform the addition and store the result. */
688 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
689 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
690 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
691
692 /* Free but don't flush the PC register. */
693 iemNativeRegFreeTmp(pReNative, idxPcReg);
694#endif
695
696#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
697 pReNative->Core.offPc += cbInstr;
698 Log4(("offPc=%#RX64 cbInstr=%#x off=%#x\n", pReNative->Core.offPc, cbInstr, off));
699# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
700 off = iemNativeEmitPcDebugAdd(pReNative, off, cbInstr, 16);
701 off = iemNativeEmitPcDebugCheck(pReNative, off);
702# elif defined(IEMNATIVE_REG_FIXED_PC_DBG)
703 off = iemNativePcAdjustCheck(pReNative, off);
704# endif
705 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
706#endif
707
708 return off;
709}
710
711
712/*********************************************************************************************************************************
713* Common code for changing PC/RIP/EIP/IP. *
714*********************************************************************************************************************************/
715
716/**
717 * Emits code to check if the content of @a idxAddrReg is a canonical address,
718 * raising a \#GP(0) if it isn't.
719 *
720 * @returns New code buffer offset, UINT32_MAX on failure.
721 * @param pReNative The native recompile state.
722 * @param off The code buffer offset.
723 * @param idxAddrReg The host register with the address to check.
724 * @param idxInstr The current instruction.
725 */
726DECL_FORCE_INLINE_THROW(uint32_t)
727iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
728{
729 /*
730 * Make sure we don't have any outstanding guest register writes as we may
731 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
732 */
733 off = iemNativeRegFlushPendingWrites(pReNative, off);
734
735#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
736 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
737#else
738 RT_NOREF(idxInstr);
739#endif
740
741#ifdef RT_ARCH_AMD64
742 /*
743 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
744 * return raisexcpt();
745 * ---- this variant avoid loading a 64-bit immediate, but is an instruction longer.
746 */
747 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
748
749 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
750 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
751 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
752 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
753 off = iemNativeEmitTbExitJnz<kIemNativeLabelType_RaiseGp0>(pReNative, off);
754
755 iemNativeRegFreeTmp(pReNative, iTmpReg);
756
757#elif defined(RT_ARCH_ARM64)
758 /*
759 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
760 * return raisexcpt();
761 * ----
762 * mov x1, 0x800000000000
763 * add x1, x0, x1
764 * cmp xzr, x1, lsr 48
765 * b.ne .Lraisexcpt
766 */
767 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
768
769 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
770 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
771 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
772 off = iemNativeEmitTbExitJnz<kIemNativeLabelType_RaiseGp0>(pReNative, off);
773
774 iemNativeRegFreeTmp(pReNative, iTmpReg);
775
776#else
777# error "Port me"
778#endif
779 return off;
780}
781
782
783/**
784 * Emits code to check if the content of @a idxAddrReg is a canonical address,
785 * raising a \#GP(0) if it isn't.
786 *
787 * Caller makes sure everything is flushed, except maybe PC.
788 *
789 * @returns New code buffer offset, UINT32_MAX on failure.
790 * @param pReNative The native recompile state.
791 * @param off The code buffer offset.
792 * @param idxAddrReg The host register with the address to check.
793 * @param offDisp The relative displacement that has already been
794 * added to idxAddrReg and must be subtracted if
795 * raising a \#GP(0).
796 * @param idxInstr The current instruction.
797 */
798DECL_FORCE_INLINE_THROW(uint32_t)
799iemNativeEmitCheckGprCanonicalMaybeRaiseGp0WithDisp(PIEMRECOMPILERSTATE pReNative, uint32_t off,
800 uint8_t idxAddrReg, int64_t offDisp, uint8_t idxInstr)
801{
802#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
803 Assert(pReNative->Core.bmGstRegShadowDirty == 0);
804#endif
805
806#ifdef RT_ARCH_AMD64
807 /*
808 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
809 * return raisexcpt();
810 * ---- this variant avoid loading a 64-bit immediate, but is an instruction longer.
811 */
812 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
813
814 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
815 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
816 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
817 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
818
819#elif defined(RT_ARCH_ARM64)
820 /*
821 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
822 * return raisexcpt();
823 * ----
824 * mov x1, 0x800000000000
825 * add x1, x0, x1
826 * cmp xzr, x1, lsr 48
827 * b.ne .Lraisexcpt
828 */
829 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
830
831 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
832 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
833 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
834#else
835# error "Port me"
836#endif
837
838 /* Jump to the #GP code (hoping static prediction considers forward branches as not-taken). */
839 uint32_t const offFixup1 = off;
840 off = iemNativeEmitJnzToFixed(pReNative, off, off /*8-bit jump suffices*/);
841
842 /* jump .Lnoexcept; Skip the #GP code. */
843 uint32_t const offFixup2 = off;
844 off = iemNativeEmitJmpToFixed(pReNative, off, off /*8-bit jump suffices*/);
845
846 /* .Lraisexcpt: */
847 iemNativeFixupFixedJump(pReNative, offFixup1, off);
848#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
849 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr), iTmpReg);
850#else
851 RT_NOREF(idxInstr);
852#endif
853
854 /* Undo the PC adjustment and store the old PC value. */
855 off = iemNativeEmitSubGprImm(pReNative, off, idxAddrReg, offDisp, iTmpReg);
856 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxAddrReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
857
858 off = iemNativeEmitTbExit<kIemNativeLabelType_RaiseGp0, false /*a_fActuallyExitingTb*/>(pReNative, off);
859
860 /* .Lnoexcept: */
861 iemNativeFixupFixedJump(pReNative, offFixup2, off);
862
863 iemNativeRegFreeTmp(pReNative, iTmpReg);
864 return off;
865}
866
867
868/**
869 * Emits code to check if the content of @a idxAddrReg is a canonical address,
870 * raising a \#GP(0) if it isn't.
871 *
872 * Caller makes sure everything is flushed, except maybe PC.
873 *
874 * @returns New code buffer offset, UINT32_MAX on failure.
875 * @param pReNative The native recompile state.
876 * @param off The code buffer offset.
877 * @param idxAddrReg The host register with the address to check.
878 * @param idxOldPcReg Register holding the old PC that offPc is relative
879 * to if available, otherwise UINT8_MAX.
880 * @param idxInstr The current instruction.
881 */
882DECL_FORCE_INLINE_THROW(uint32_t)
883iemNativeEmitCheckGprCanonicalMaybeRaiseGp0WithOldPc(PIEMRECOMPILERSTATE pReNative, uint32_t off,
884 uint8_t idxAddrReg, uint8_t idxOldPcReg, uint8_t idxInstr)
885{
886#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
887 Assert(pReNative->Core.bmGstRegShadowDirty == 0);
888#endif
889
890#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
891# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
892 if (!pReNative->Core.offPc)
893# endif
894 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
895#else
896 RT_NOREF(idxInstr);
897#endif
898
899#ifdef RT_ARCH_AMD64
900 /*
901 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
902 * return raisexcpt();
903 * ---- this variant avoid loading a 64-bit immediate, but is an instruction longer.
904 */
905 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
906
907 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
908 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
909 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
910 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
911
912#elif defined(RT_ARCH_ARM64)
913 /*
914 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
915 * return raisexcpt();
916 * ----
917 * mov x1, 0x800000000000
918 * add x1, x0, x1
919 * cmp xzr, x1, lsr 48
920 * b.ne .Lraisexcpt
921 */
922 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
923
924 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
925 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
926 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
927#else
928# error "Port me"
929#endif
930
931#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
932 if (pReNative->Core.offPc)
933 {
934 /** @todo On x86, it is said that conditional jumps forward are statically
935 * predicited as not taken, so this isn't a very good construct.
936 * Investigate whether it makes sense to invert it and add another
937 * jump. Also, find out wtf the static predictor does here on arm! */
938 uint32_t const offFixup = off;
939 off = iemNativeEmitJzToFixed(pReNative, off, off + 16 /*8-bit suffices*/);
940
941 /* .Lraisexcpt: */
942# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
943 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr), iTmpReg);
944# endif
945 /* We need to update cpum.GstCtx.rip. */
946 if (idxOldPcReg == UINT8_MAX)
947 {
948 idxOldPcReg = iTmpReg;
949 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxOldPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
950 }
951 off = iemNativeEmitAddGprImm(pReNative, off, idxOldPcReg, pReNative->Core.offPc);
952 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxOldPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
953
954 off = iemNativeEmitTbExit<kIemNativeLabelType_RaiseGp0, false /*a_fActuallyExitingTb*/>(pReNative, off);
955 iemNativeFixupFixedJump(pReNative, offFixup, off);
956 }
957 else
958#endif
959 off = iemNativeEmitTbExitJnz<kIemNativeLabelType_RaiseGp0>(pReNative, off);
960
961 iemNativeRegFreeTmp(pReNative, iTmpReg);
962
963 return off;
964}
965
966
967/**
968 * Emits code to check if that the content of @a idxAddrReg is within the limit
969 * of CS, raising a \#GP(0) if it isn't.
970 *
971 * @returns New code buffer offset; throws VBox status code on error.
972 * @param pReNative The native recompile state.
973 * @param off The code buffer offset.
974 * @param idxAddrReg The host register (32-bit) with the address to
975 * check.
976 * @param idxInstr The current instruction.
977 */
978DECL_FORCE_INLINE_THROW(uint32_t)
979iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
980 uint8_t idxAddrReg, uint8_t idxInstr)
981{
982 /*
983 * Make sure we don't have any outstanding guest register writes as we may
984 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
985 */
986 off = iemNativeRegFlushPendingWrites(pReNative, off);
987
988#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
989 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
990#else
991 RT_NOREF(idxInstr);
992#endif
993
994 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
995 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
996 kIemNativeGstRegUse_ReadOnly);
997
998 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
999 off = iemNativeEmitTbExitJa<kIemNativeLabelType_RaiseGp0>(pReNative, off);
1000
1001 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
1002 return off;
1003}
1004
1005
1006
1007
1008/**
1009 * Emits code to check if that the content of @a idxAddrReg is within the limit
1010 * of CS, raising a \#GP(0) if it isn't.
1011 *
1012 * Caller makes sure everything is flushed, except maybe PC.
1013 *
1014 * @returns New code buffer offset; throws VBox status code on error.
1015 * @param pReNative The native recompile state.
1016 * @param off The code buffer offset.
1017 * @param idxAddrReg The host register (32-bit) with the address to
1018 * check.
1019 * @param idxOldPcReg Register holding the old PC that offPc is relative
1020 * to if available, otherwise UINT8_MAX.
1021 * @param idxInstr The current instruction.
1022 */
1023DECL_FORCE_INLINE_THROW(uint32_t)
1024iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0WithOldPc(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1025 uint8_t idxAddrReg, uint8_t idxOldPcReg, uint8_t idxInstr)
1026{
1027#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
1028 Assert(pReNative->Core.bmGstRegShadowDirty == 0);
1029#endif
1030
1031#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1032# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1033 if (!pReNative->Core.offPc)
1034# endif
1035 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1036#else
1037 RT_NOREF(idxInstr);
1038#endif
1039
1040 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
1041 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
1042 kIemNativeGstRegUse_ReadOnly);
1043
1044 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
1045#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1046 if (pReNative->Core.offPc)
1047 {
1048 uint32_t const offFixup = off;
1049 off = iemNativeEmitJbeToFixed(pReNative, off, off + 16 /*8-bit suffices*/);
1050
1051 /* Raising a GP(0), but first we need to update cpum.GstCtx.rip. */
1052 if (idxOldPcReg == UINT8_MAX)
1053 {
1054 idxOldPcReg = idxAddrReg;
1055 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxOldPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1056 }
1057 off = iemNativeEmitAddGprImm(pReNative, off, idxOldPcReg, pReNative->Core.offPc);
1058 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxOldPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1059# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1060 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1061# endif
1062 off = iemNativeEmitTbExit<kIemNativeLabelType_RaiseGp0, false /*a_fActuallyExitingTb*/>(pReNative, off);
1063 iemNativeFixupFixedJump(pReNative, offFixup, off);
1064 }
1065 else
1066#endif
1067 off = iemNativeEmitTbExitJa<kIemNativeLabelType_RaiseGp0>(pReNative, off);
1068
1069 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
1070 return off;
1071}
1072
1073
1074/*********************************************************************************************************************************
1075* Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX). *
1076*********************************************************************************************************************************/
1077
1078#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1079 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1080 (a_enmEffOpSize), pCallEntry->idxInstr); \
1081 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1082
1083#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1084 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1085 (a_enmEffOpSize), pCallEntry->idxInstr); \
1086 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1087 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1088
1089#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr, a_rcNormal) \
1090 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1091 IEMMODE_16BIT, pCallEntry->idxInstr); \
1092 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1093
1094#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
1095 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1096 IEMMODE_16BIT, pCallEntry->idxInstr); \
1097 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1098 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1099
1100#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr, a_rcNormal) \
1101 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (a_i32), \
1102 IEMMODE_64BIT, pCallEntry->idxInstr); \
1103 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1104
1105#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
1106 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (a_i32), \
1107 IEMMODE_64BIT, pCallEntry->idxInstr); \
1108 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1109 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1110
1111
1112#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_INTRAPG(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1113 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1114 (a_enmEffOpSize), pCallEntry->idxInstr); \
1115 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1116
1117#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_INTRAPG_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1118 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1119 (a_enmEffOpSize), pCallEntry->idxInstr); \
1120 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1121 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1122
1123#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_INTRAPG(a_i16, a_cbInstr, a_rcNormal) \
1124 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1125 IEMMODE_16BIT, pCallEntry->idxInstr); \
1126 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1127
1128#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_INTRAPG_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
1129 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1130 IEMMODE_16BIT, pCallEntry->idxInstr); \
1131 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1132 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1133
1134#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_INTRAPG(a_i32, a_cbInstr, a_rcNormal) \
1135 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (a_i32), \
1136 IEMMODE_64BIT, pCallEntry->idxInstr); \
1137 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1138
1139#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_INTRAPG_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
1140 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (a_i32), \
1141 IEMMODE_64BIT, pCallEntry->idxInstr); \
1142 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1143 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1144
1145/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
1146 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
1147 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
1148template<bool const a_fWithinPage>
1149DECL_INLINE_THROW(uint32_t)
1150iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
1151 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
1152{
1153 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
1154#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1155 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1156 if (a_fWithinPage && enmEffOpSize == IEMMODE_64BIT)
1157 {
1158 /* No #GP checking required, just update offPc and get on with it. */
1159 pReNative->Core.offPc += (int64_t)offDisp + cbInstr;
1160# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1161 off = iemNativeEmitPcDebugAdd(pReNative, off, (int64_t)offDisp + cbInstr, enmEffOpSize == IEMMODE_64BIT ? 64 : 16);
1162# endif
1163 }
1164 else
1165#endif
1166 {
1167 /* Flush all but PC iff we're doing a 64-bit update here and this isn't within a page.. */
1168 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT && !a_fWithinPage))
1169 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc) /*fGstShwExcept*/);
1170
1171 /* Allocate a temporary PC register. */
1172 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1173 kIemNativeGstRegUse_ForUpdate);
1174
1175 /* Perform the addition. */
1176 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr + pReNative->Core.offPc);
1177
1178 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
1179 {
1180 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't.
1181 We can skip this if the target is within the same page. */
1182 if (!a_fWithinPage)
1183 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0WithDisp(pReNative, off, idxPcReg,
1184 (int64_t)offDisp + cbInstr, idxInstr);
1185 }
1186 else
1187 {
1188 /* Just truncate the result to 16-bit IP. */
1189 Assert(enmEffOpSize == IEMMODE_16BIT);
1190 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
1191 }
1192
1193#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1194# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1195 off = iemNativeEmitPcDebugAdd(pReNative, off, (int64_t)offDisp + cbInstr, enmEffOpSize == IEMMODE_64BIT ? 64 : 16);
1196 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
1197# endif
1198 /* Since we've already got the new PC value in idxPcReg, we can just as
1199 well write it out and reset offPc to zero. Otherwise, we'd need to use
1200 a copy the shadow PC, which will cost another move instruction here. */
1201# if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || defined(LOG_ENABLED) || defined(VBOX_WITH_STATISTICS)
1202 uint8_t const idxOldInstrPlusOne = pReNative->idxInstrPlusOneOfLastPcUpdate;
1203 pReNative->idxInstrPlusOneOfLastPcUpdate = RT_MAX(idxInstr + 1, idxOldInstrPlusOne);
1204 uint8_t const cInstrsSkipped = idxInstr <= idxOldInstrPlusOne ? 0 : idxInstr - idxOldInstrPlusOne;
1205 Log4(("iemNativeEmitRip64RelativeJumpAndFinishingNoFlags: offPc=%#RX64 -> 0; off=%#x; idxInstr=%u cInstrsSkipped=%u cCondDepth=%d\n",
1206 pReNative->Core.offPc, off, idxInstr, cInstrsSkipped, pReNative->cCondDepth));
1207 STAM_COUNTER_ADD(&pReNative->pVCpu->iem.s.StatNativePcUpdateDelayed, cInstrsSkipped);
1208# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1209 iemNativeDbgInfoAddNativeOffset(pReNative, off);
1210 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc, cInstrsSkipped);
1211# endif
1212# endif
1213 pReNative->Core.offPc = 0;
1214#endif
1215
1216 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1217
1218 /* Free but don't flush the PC register. */
1219 iemNativeRegFreeTmp(pReNative, idxPcReg);
1220 }
1221 return off;
1222}
1223
1224
1225#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1226 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1227 (a_enmEffOpSize), pCallEntry->idxInstr); \
1228 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1229
1230#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1231 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1232 (a_enmEffOpSize), pCallEntry->idxInstr); \
1233 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1234 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1235
1236#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr, a_rcNormal) \
1237 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1238 IEMMODE_16BIT, pCallEntry->idxInstr); \
1239 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1240
1241#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
1242 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1243 IEMMODE_16BIT, pCallEntry->idxInstr); \
1244 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1245 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1246
1247#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr, a_rcNormal) \
1248 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (a_i32), \
1249 IEMMODE_32BIT, pCallEntry->idxInstr); \
1250 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1251
1252#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
1253 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (a_i32), \
1254 IEMMODE_32BIT, pCallEntry->idxInstr); \
1255 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1256 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1257
1258
1259#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_FLAT(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1260 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1261 (a_enmEffOpSize), pCallEntry->idxInstr); \
1262 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1263
1264#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_FLAT_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1265 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1266 (a_enmEffOpSize), pCallEntry->idxInstr); \
1267 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1268 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1269
1270#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_FLAT(a_i16, a_cbInstr, a_rcNormal) \
1271 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1272 IEMMODE_16BIT, pCallEntry->idxInstr); \
1273 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1274
1275#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_FLAT_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
1276 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1277 IEMMODE_16BIT, pCallEntry->idxInstr); \
1278 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1279 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1280
1281#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_FLAT(a_i32, a_cbInstr, a_rcNormal) \
1282 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (a_i32), \
1283 IEMMODE_32BIT, pCallEntry->idxInstr); \
1284 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1285
1286#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_FLAT_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
1287 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (a_i32), \
1288 IEMMODE_32BIT, pCallEntry->idxInstr); \
1289 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1290 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1291
1292/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
1293 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
1294 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
1295template<bool const a_fFlat>
1296DECL_INLINE_THROW(uint32_t)
1297iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
1298 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
1299{
1300 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
1301#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1302 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1303#endif
1304
1305 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1306 if (!a_fFlat || enmEffOpSize == IEMMODE_16BIT)
1307 {
1308 off = iemNativeRegFlushPendingWrites(pReNative, off);
1309#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1310 Assert(pReNative->Core.offPc == 0);
1311#endif
1312 }
1313
1314 /* Allocate a temporary PC register. */
1315 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
1316
1317 /* Perform the addition. */
1318#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1319 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr + (int32_t)pReNative->Core.offPc);
1320#else
1321 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr + (int32_t)pReNative->Core.offPc);
1322#endif
1323
1324 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
1325 if (enmEffOpSize == IEMMODE_16BIT)
1326 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
1327
1328 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
1329 if (!a_fFlat)
1330 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
1331
1332 /* Commit it. */
1333#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1334 off = iemNativeEmitPcDebugAdd(pReNative, off, offDisp + cbInstr, enmEffOpSize == IEMMODE_32BIT ? 32 : 16);
1335 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
1336#endif
1337
1338 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1339#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1340 pReNative->Core.offPc = 0;
1341#endif
1342
1343 /* Free but don't flush the PC register. */
1344 iemNativeRegFreeTmp(pReNative, idxPcReg);
1345
1346 return off;
1347}
1348
1349
1350#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr, a_rcNormal) \
1351 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
1352 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1353
1354#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr, a_rcNormal) \
1355 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
1356 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1357 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1358
1359#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr, a_rcNormal) \
1360 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
1361 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1362
1363#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
1364 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
1365 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1366 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1367
1368#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr, a_rcNormal) \
1369 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
1370 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, a_i32)
1371
1372#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
1373 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
1374 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1375 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, a_i32)
1376
1377/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
1378DECL_INLINE_THROW(uint32_t)
1379iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1380 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
1381{
1382 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1383 off = iemNativeRegFlushPendingWrites(pReNative, off);
1384
1385#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1386 Assert(pReNative->Core.offPc == 0);
1387 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1388#endif
1389
1390 /* Allocate a temporary PC register. */
1391 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
1392
1393 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
1394 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
1395 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
1396 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
1397#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1398 off = iemNativeEmitPcDebugAdd(pReNative, off, offDisp + cbInstr, 16);
1399 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
1400#endif
1401 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1402
1403 /* Free but don't flush the PC register. */
1404 iemNativeRegFreeTmp(pReNative, idxPcReg);
1405
1406 return off;
1407}
1408
1409
1410
1411/*********************************************************************************************************************************
1412* Emitters for changing PC/RIP/EIP/IP with a indirect jump (IEM_MC_SET_RIP_UXX_AND_FINISH). *
1413*********************************************************************************************************************************/
1414
1415/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets. */
1416#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP) \
1417 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1418
1419/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets. */
1420#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP) \
1421 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1422
1423/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code. */
1424#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP) \
1425 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1426
1427/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets that checks and
1428 * clears flags. */
1429#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP) \
1430 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP); \
1431 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1432
1433/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets that checks and
1434 * clears flags. */
1435#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP) \
1436 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP); \
1437 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1438
1439/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code that checks and
1440 * clears flags. */
1441#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16NewIP) \
1442 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP); \
1443 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1444
1445#undef IEM_MC_SET_RIP_U16_AND_FINISH
1446
1447
1448/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets. */
1449#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP) \
1450 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
1451
1452/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code. */
1453#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP) \
1454 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
1455
1456/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets that checks and
1457 * clears flags. */
1458#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP) \
1459 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP); \
1460 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1461
1462/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code that checks
1463 * and clears flags. */
1464#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u32NewEIP) \
1465 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP); \
1466 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1467
1468#undef IEM_MC_SET_RIP_U32_AND_FINISH
1469
1470
1471/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code. */
1472#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP) \
1473 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u64NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
1474
1475/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code that checks
1476 * and clears flags. */
1477#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewEIP) \
1478 IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP); \
1479 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1480
1481#undef IEM_MC_SET_RIP_U64_AND_FINISH
1482
1483
1484/** Same as iemRegRipJumpU16AndFinishNoFlags,
1485 * iemRegRipJumpU32AndFinishNoFlags and iemRegRipJumpU64AndFinishNoFlags. */
1486DECL_INLINE_THROW(uint32_t)
1487iemNativeEmitRipJumpNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarPc, bool f64Bit,
1488 uint8_t idxInstr, uint8_t cbVar)
1489{
1490 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
1491 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
1492
1493 /* If we can't rule out a #GP(0) below, flush all dirty register except for
1494 PC which will be handled specially by the two workers below if they raise a GP. */
1495 bool const fMayRaiseGp0 = (f64Bit && cbVar > sizeof(uint32_t)) || (!f64Bit && !IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
1496 uint8_t const idxOldPcReg = fMayRaiseGp0
1497 ? iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off, kIemNativeGstReg_Pc)
1498 : UINT8_MAX;
1499 if (fMayRaiseGp0)
1500 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc) /*fGstShwExcept*/);
1501
1502 /* Get a register with the new PC loaded from idxVarPc.
1503 Note! This ASSUMES that the high bits of the GPR is zeroed. */
1504 uint8_t const idxPcReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxVarPc, kIemNativeGstReg_Pc, &off);
1505
1506 /* Check that the target is within CS.LIM / is canonical (may #GP(0) + exit TB). */
1507 if (fMayRaiseGp0)
1508 {
1509 if (f64Bit)
1510 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0WithOldPc(pReNative, off, idxPcReg, idxOldPcReg, idxInstr);
1511 else
1512 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0WithOldPc(pReNative, off, idxPcReg, idxOldPcReg, idxInstr);
1513 }
1514
1515 /* Store the result. */
1516 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1517
1518#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1519 pReNative->Core.offPc = 0;
1520 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1521# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1522 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
1523 pReNative->Core.fDebugPcInitialized = true;
1524 Log4(("uPcUpdatingDebug=rip off=%#x\n", off));
1525# endif
1526#endif
1527
1528 if (idxOldPcReg != UINT8_MAX)
1529 iemNativeRegFreeTmp(pReNative, idxOldPcReg);
1530 iemNativeVarRegisterRelease(pReNative, idxVarPc);
1531 /** @todo implictly free the variable? */
1532
1533 return off;
1534}
1535
1536
1537
1538/*********************************************************************************************************************************
1539* Emitters for changing PC/RIP/EIP/IP with a relative call jump (IEM_MC_IND_CALL_UXX_AND_FINISH) (requires stack emmiters). *
1540*********************************************************************************************************************************/
1541
1542/** @todo These helpers belong to the stack push API naturally but we already need them up here (we could of course move
1543 * this below the stack emitters but then this is not close to the rest of the PC/RIP handling...). */
1544DECL_FORCE_INLINE_THROW(uint32_t)
1545iemNativeEmitStackPushUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
1546{
1547 /* Use16BitSp: */
1548#ifdef RT_ARCH_AMD64
1549 off = iemNativeEmitSubGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
1550 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
1551#else
1552 /* sub regeff, regrsp, #cbMem */
1553 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegEffSp, idxRegRsp, cbMem, false /*f64Bit*/);
1554 /* and regeff, regeff, #0xffff */
1555 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
1556 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegEffSp, idxRegEffSp, 15, 0, false /*f64Bit*/);
1557 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
1558 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegEffSp, 0, 16, false /*f64Bit*/);
1559#endif
1560 return off;
1561}
1562
1563
1564DECL_FORCE_INLINE(uint32_t)
1565iemNativeEmitStackPushUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
1566{
1567 /* Use32BitSp: */
1568 off = iemNativeEmitSubGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
1569 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
1570 return off;
1571}
1572
1573
1574template<uint8_t const a_cBitsVar, uint8_t const a_cBitsFlat>
1575DECL_INLINE_THROW(uint32_t)
1576iemNativeEmitStackPushRip(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const idxRegPc,
1577 uintptr_t pfnFunction, uint8_t idxInstr)
1578{
1579 /*
1580 * Assert sanity.
1581 */
1582#ifdef VBOX_STRICT
1583 if RT_CONSTEXPR_IF(a_cBitsFlat != 0)
1584 {
1585 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
1586 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
1587 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
1588 Assert( pfnFunction
1589 == ( RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(16, 32) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
1590 : RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(32, 32) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
1591 : RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(16, 64) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
1592 : RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(64, 64) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
1593 : UINT64_C(0xc000b000a0009000) ));
1594 }
1595 else
1596 Assert( pfnFunction
1597 == ( RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(16, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
1598 : RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(32, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
1599 : RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(64, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
1600 : UINT64_C(0xc000b000a0009000) ));
1601#endif
1602
1603#ifdef VBOX_STRICT
1604 /*
1605 * Check that the fExec flags we've got make sense.
1606 */
1607 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
1608#endif
1609
1610 /*
1611 * To keep things simple we have to commit any pending writes first as we
1612 * may end up making calls.
1613 */
1614 /** @todo we could postpone this till we make the call and reload the
1615 * registers after returning from the call. Not sure if that's sensible or
1616 * not, though. */
1617 off = iemNativeRegFlushPendingWrites(pReNative, off);
1618
1619 /*
1620 * First we calculate the new RSP and the effective stack pointer value.
1621 * For 64-bit mode and flat 32-bit these two are the same.
1622 * (Code structure is very similar to that of PUSH)
1623 */
1624 RT_CONSTEXPR
1625 uint8_t const cbMem = a_cBitsVar / 8;
1626 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
1627 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
1628 uint8_t const idxRegEffSp = a_cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
1629 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
1630 if RT_CONSTEXPR_IF(a_cBitsFlat != 0)
1631 {
1632 Assert(idxRegEffSp == idxRegRsp);
1633 Assert(a_cBitsFlat == 32 || a_cBitsFlat == 64);
1634 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
1635 if RT_CONSTEXPR_IF(a_cBitsFlat == 64)
1636 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
1637 else
1638 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
1639 }
1640 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
1641 {
1642 Assert(idxRegEffSp != idxRegRsp);
1643 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
1644 kIemNativeGstRegUse_ReadOnly);
1645#ifdef RT_ARCH_AMD64
1646 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1647#else
1648 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1649#endif
1650 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
1651 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
1652 offFixupJumpToUseOtherBitSp = off;
1653 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
1654 {
1655 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
1656 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1657 }
1658 else
1659 {
1660 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
1661 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1662 }
1663 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1664 }
1665 /* SpUpdateEnd: */
1666 uint32_t const offLabelSpUpdateEnd = off;
1667
1668 /*
1669 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
1670 * we're skipping lookup).
1671 */
1672 uint8_t const iSegReg = a_cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
1673 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
1674 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
1675 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
1676 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
1677 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
1678 : UINT32_MAX;
1679 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
1680
1681
1682 if (!TlbState.fSkip)
1683 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
1684 else
1685 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
1686
1687 /*
1688 * Use16BitSp:
1689 */
1690 if RT_CONSTEXPR_IF(a_cBitsFlat == 0)
1691 {
1692#ifdef RT_ARCH_AMD64
1693 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1694#else
1695 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1696#endif
1697 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
1698 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
1699 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1700 else
1701 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1702 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
1703 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1704 }
1705
1706 /*
1707 * TlbMiss:
1708 *
1709 * Call helper to do the pushing.
1710 */
1711 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
1712
1713#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1714 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1715#else
1716 RT_NOREF(idxInstr);
1717#endif
1718
1719 /* Save variables in volatile registers. */
1720 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
1721 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
1722 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
1723 | (RT_BIT_32(idxRegPc));
1724 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
1725
1726 if ( idxRegPc == IEMNATIVE_CALL_ARG1_GREG
1727 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
1728 {
1729 /* Swap them using ARG0 as temp register: */
1730 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
1731 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
1732 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
1733 }
1734 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
1735 {
1736 /* IEMNATIVE_CALL_ARG2_GREG = idxRegPc (first!) */
1737 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxRegPc);
1738
1739 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
1740 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
1741 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
1742 }
1743 else
1744 {
1745 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
1746 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
1747
1748 /* IEMNATIVE_CALL_ARG2_GREG = idxRegPc */
1749 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxRegPc);
1750 }
1751
1752#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
1753 /* Do delayed EFLAGS calculations. */
1754 off = iemNativeDoPostponedEFlagsAtTlbMiss< RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG)
1755 | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)>(pReNative, off, &TlbState, fHstRegsNotToSave);
1756#endif
1757
1758 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
1759 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
1760
1761 /* Done setting up parameters, make the call. */
1762 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnFunction);
1763
1764 /* Restore variables and guest shadow registers to volatile registers. */
1765 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
1766 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
1767
1768#ifdef IEMNATIVE_WITH_TLB_LOOKUP
1769 if (!TlbState.fSkip)
1770 {
1771 /* end of TlbMiss - Jump to the done label. */
1772 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
1773 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
1774
1775 /*
1776 * TlbLookup:
1777 */
1778 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1,
1779 IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
1780
1781 /*
1782 * Emit code to do the actual storing / fetching.
1783 */
1784 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
1785# ifdef IEM_WITH_TLB_STATISTICS
1786 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
1787 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
1788# endif
1789 AssertCompile(cbMem == 2 || cbMem == 4 || cbMem == 8);
1790 if RT_CONSTEXPR_IF(cbMem == 2)
1791 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1792 else if RT_CONSTEXPR_IF(cbMem == 4)
1793 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1794 else
1795 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1796
1797 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
1798 TlbState.freeRegsAndReleaseVars(pReNative);
1799
1800 /*
1801 * TlbDone:
1802 *
1803 * Commit the new RSP value.
1804 */
1805 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
1806 }
1807#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
1808
1809#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
1810 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
1811#endif
1812 iemNativeRegFreeTmp(pReNative, idxRegRsp);
1813 if (idxRegEffSp != idxRegRsp)
1814 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
1815
1816 return off;
1817}
1818
1819
1820/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for pre-386 targets. */
1821#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16(a_u16NewIP, a_cbInstr) \
1822 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1823
1824/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for pre-386 targets that checks and
1825 * clears flags. */
1826#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP, a_cbInstr) \
1827 IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16(a_u16NewIP, a_cbInstr); \
1828 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1829
1830/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for 386+ targets. */
1831#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32(a_u16NewIP, a_cbInstr) \
1832 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1833
1834/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for 386+ targets that checks and
1835 * clears flags. */
1836#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP, a_cbInstr) \
1837 IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32(a_u16NewIP, a_cbInstr); \
1838 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1839
1840#undef IEM_MC_IND_CALL_U16_AND_FINISH
1841
1842
1843/** Variant of IEM_MC_IND_CALL_U32_AND_FINISH for 386+ targets. */
1844#define IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP, a_cbInstr) \
1845 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
1846
1847/** Variant of IEM_MC_IND_CALL_U32_AND_FINISH for 386+ targets that checks and
1848 * clears flags. */
1849#define IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP, a_cbInstr) \
1850 IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP, a_cbInstr); \
1851 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1852
1853#undef IEM_MC_IND_CALL_U32_AND_FINISH
1854
1855
1856/** Variant of IEM_MC_IND_CALL_U64_AND_FINISH with instruction length as
1857 * an extra parameter, for use in 64-bit code. */
1858#define IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64(a_u64NewIP, a_cbInstr) \
1859 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u64NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
1860
1861
1862/** Variant of IEM_MC_IND_CALL_U64_AND_FINISH with instruction length as
1863 * an extra parameter, for use in 64-bit code and we need to check and clear
1864 * flags. */
1865#define IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewIP, a_cbInstr) \
1866 IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64(a_u64NewIP, a_cbInstr); \
1867 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1868
1869#undef IEM_MC_IND_CALL_U64_AND_FINISH
1870
1871/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
1872 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
1873DECL_INLINE_THROW(uint32_t)
1874iemNativeEmitRipIndirectCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxVarPc, bool f64Bit,
1875 uint8_t idxInstr, uint8_t cbVar)
1876{
1877 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
1878 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
1879
1880 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1881 off = iemNativeRegFlushPendingWrites(pReNative, off);
1882
1883#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1884 Assert(pReNative->Core.offPc == 0);
1885 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1886#endif
1887
1888 /* Get a register with the new PC loaded from idxVarPc.
1889 Note! This ASSUMES that the high bits of the GPR is zeroed. */
1890 uint8_t const idxNewPcReg = iemNativeVarRegisterAcquire(pReNative, idxVarPc, &off);
1891
1892 /* Check limit (may #GP(0) + exit TB). */
1893 if (!f64Bit)
1894/** @todo we can skip this test in FLAT 32-bit mode. */
1895 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxNewPcReg, idxInstr);
1896 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
1897 else if (cbVar > sizeof(uint32_t))
1898 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxNewPcReg, idxInstr);
1899
1900#if 1
1901 /* Allocate a temporary PC register, we don't want it shadowed. */
1902 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1903 kIemNativeGstRegUse_Calculation, true /*fNoVolatileRegs*/);
1904#else
1905 /* Allocate a temporary PC register. */
1906 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate,
1907 true /*fNoVolatileRegs*/);
1908#endif
1909
1910 /* Perform the addition and push the variable to the guest stack. */
1911 /** @todo Flat variants for PC32 variants. */
1912 switch (cbVar)
1913 {
1914 case sizeof(uint16_t):
1915 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
1916 /* Truncate the result to 16-bit IP. */
1917 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
1918 off = iemNativeEmitStackPushRip<16, 0>(pReNative, off, idxPcReg, (uintptr_t)iemNativeHlpStackStoreU16, idxInstr);
1919 break;
1920 case sizeof(uint32_t):
1921 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
1922 /** @todo In FLAT mode we can use the flat variant. */
1923 off = iemNativeEmitStackPushRip<32, 0>(pReNative, off, idxPcReg, (uintptr_t)iemNativeHlpStackStoreU32, idxInstr);
1924 break;
1925 case sizeof(uint64_t):
1926 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
1927 off = iemNativeEmitStackPushRip<64, 64>(pReNative, off, idxPcReg, (uintptr_t)iemNativeHlpStackFlatStoreU64, idxInstr);
1928 break;
1929 default:
1930 AssertFailed();
1931 }
1932
1933 /* RSP got changed, so do this again. */
1934 off = iemNativeRegFlushPendingWrites(pReNative, off);
1935
1936 /* Store the result. */
1937 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxNewPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1938#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1939 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxNewPcReg, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
1940 pReNative->Core.fDebugPcInitialized = true;
1941 Log4(("uPcUpdatingDebug=rip/indirect-call off=%#x\n", off));
1942#endif
1943
1944#if 1
1945 /* Need to transfer the shadow information to the new RIP register. */
1946 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxNewPcReg, kIemNativeGstReg_Pc, off);
1947#else
1948 /* Sync the new PC. */
1949 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxPcReg, idxNewPcReg);
1950#endif
1951 iemNativeVarRegisterRelease(pReNative, idxVarPc);
1952 iemNativeRegFreeTmp(pReNative, idxPcReg);
1953 /** @todo implictly free the variable? */
1954
1955 return off;
1956}
1957
1958
1959/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1960 * an extra parameter, for use in 16-bit code on a pre-386 CPU. */
1961#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr) \
1962 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
1963
1964/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1965 * an extra parameter, for use in 16-bit code on a pre-386 CPU and we need to check and clear
1966 * flags. */
1967#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr) \
1968 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr); \
1969 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1970
1971/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1972 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
1973#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr) \
1974 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
1975
1976/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1977 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
1978 * flags. */
1979#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr) \
1980 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr); \
1981 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1982
1983/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1984 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
1985#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr) \
1986 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
1987
1988/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1989 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
1990 * flags. */
1991#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr) \
1992 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr); \
1993 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1994
1995#undef IEM_MC_REL_CALL_S16_AND_FINISH
1996
1997/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
1998 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
1999DECL_INLINE_THROW(uint32_t)
2000iemNativeEmitRipRelativeCallS16NoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int16_t offDisp,
2001 uint8_t idxInstr)
2002{
2003 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
2004 off = iemNativeRegFlushPendingWrites(pReNative, off);
2005
2006#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2007 Assert(pReNative->Core.offPc == 0);
2008 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
2009#endif
2010
2011 /* Allocate a temporary PC register. */
2012 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
2013 kIemNativeGstRegUse_Calculation, true /*fNoVolatileRegs*/);
2014 uint8_t const idxPcRegNew = iemNativeRegAllocTmpPreferNonVolatile(pReNative, &off);
2015
2016 /* Calculate the new RIP. */
2017 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcRegOld, cbInstr);
2018 /* Truncate the result to 16-bit IP. */
2019 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcRegOld);
2020 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxPcRegNew, idxPcRegOld);
2021 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcRegNew, offDisp);
2022
2023 /* Truncate the result to 16-bit IP. */
2024 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcRegNew);
2025
2026 /* Check limit (may #GP(0) + exit TB). */
2027 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
2028
2029 /* Perform the addition and push the variable to the guest stack. */
2030 off = iemNativeEmitStackPushRip<16, 0>(pReNative, off, idxPcRegOld, (uintptr_t)iemNativeHlpStackStoreU16, idxInstr);
2031
2032 /* RSP got changed, so flush again. */
2033 off = iemNativeRegFlushPendingWrites(pReNative, off);
2034
2035 /* Store the result. */
2036 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
2037#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
2038 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
2039 pReNative->Core.fDebugPcInitialized = true;
2040 Log4(("uPcUpdatingDebug=rip/rel-call-16 off=%#x offDisp=%d\n", off, offDisp));
2041#endif
2042
2043 /* Need to transfer the shadow information to the new RIP register. */
2044 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
2045 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
2046 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
2047
2048 return off;
2049}
2050
2051
2052/** Variant of IEM_MC_REL_CALL_S32_AND_FINISH with instruction length as
2053 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
2054#define IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr) \
2055 off = iemNativeEmitEip32RelativeCallNoFlags(pReNative, off, a_cbInstr, (a_i32), pCallEntry->idxInstr)
2056
2057/** Variant of IEM_MC_REL_CALL_S32_AND_FINISH with instruction length as
2058 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
2059 * flags. */
2060#define IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr) \
2061 IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr); \
2062 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2063
2064#undef IEM_MC_REL_CALL_S32_AND_FINISH
2065
2066/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
2067 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
2068DECL_INLINE_THROW(uint32_t)
2069iemNativeEmitEip32RelativeCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int32_t offDisp,
2070 uint8_t idxInstr)
2071{
2072 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
2073 off = iemNativeRegFlushPendingWrites(pReNative, off);
2074
2075#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2076 Assert(pReNative->Core.offPc == 0);
2077 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
2078#endif
2079
2080 /* Allocate a temporary PC register. */
2081 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
2082 kIemNativeGstRegUse_ReadOnly, true /*fNoVolatileRegs*/);
2083 uint8_t const idxPcRegNew = iemNativeRegAllocTmpPreferNonVolatile(pReNative, &off);
2084
2085 /* Update the EIP to get the return address. */
2086 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcRegOld, cbInstr);
2087
2088 /* Load address, add the displacement and check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
2089 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxPcRegNew, idxPcRegOld);
2090 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcRegNew, offDisp);
2091 /** @todo we can skip this test in FLAT 32-bit mode. */
2092 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
2093
2094 /* Perform Perform the return address to the guest stack. */
2095 /** @todo Can avoid the stack limit checks in FLAT 32-bit mode. */
2096 off = iemNativeEmitStackPushRip<32, 0>(pReNative, off, idxPcRegOld, (uintptr_t)iemNativeHlpStackStoreU32, idxInstr);
2097
2098 /* RSP got changed, so do this again. */
2099 off = iemNativeRegFlushPendingWrites(pReNative, off);
2100
2101 /* Store the result. */
2102 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
2103#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
2104 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
2105 pReNative->Core.fDebugPcInitialized = true;
2106 Log4(("uPcUpdatingDebug=eip/rel-call-32 off=%#x offDisp=%d\n", off, offDisp));
2107#endif
2108
2109 /* Need to transfer the shadow information to the new RIP register. */
2110 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
2111 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
2112 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
2113
2114 return off;
2115}
2116
2117
2118/** Variant of IEM_MC_REL_CALL_S64_AND_FINISH with instruction length as
2119 * an extra parameter, for use in 64-bit code. */
2120#define IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64(a_i64, a_cbInstr) \
2121 off = iemNativeEmitRip64RelativeCallNoFlags(pReNative, off, a_cbInstr, (a_i64), pCallEntry->idxInstr)
2122
2123/** Variant of IEM_MC_REL_CALL_S64_AND_FINISH with instruction length as
2124 * an extra parameter, for use in 64-bit code and we need to check and clear
2125 * flags. */
2126#define IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i64, a_cbInstr) \
2127 IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64(a_i64, a_cbInstr); \
2128 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2129
2130#undef IEM_MC_REL_CALL_S64_AND_FINISH
2131
2132/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
2133 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
2134DECL_INLINE_THROW(uint32_t)
2135iemNativeEmitRip64RelativeCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int64_t offDisp,
2136 uint8_t idxInstr)
2137{
2138 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
2139 off = iemNativeRegFlushPendingWrites(pReNative, off);
2140
2141#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2142 Assert(pReNative->Core.offPc == 0);
2143 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
2144#endif
2145
2146 /* Allocate a temporary PC register. */
2147 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
2148 kIemNativeGstRegUse_ReadOnly, true /*fNoVolatileRegs*/);
2149 uint8_t const idxPcRegNew = iemNativeRegAllocTmpPreferNonVolatile(pReNative, &off);
2150
2151 /* Update the RIP to get the return address. */
2152 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcRegOld, cbInstr);
2153
2154 /* Load address, add the displacement and check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
2155 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxPcRegNew, idxPcRegOld);
2156 off = iemNativeEmitAddGprImm(pReNative, off, idxPcRegNew, offDisp);
2157 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
2158
2159 /* Perform Perform the return address to the guest stack. */
2160 off = iemNativeEmitStackPushRip<64, 64>(pReNative, off, idxPcRegOld, (uintptr_t)iemNativeHlpStackFlatStoreU64, idxInstr);
2161
2162 /* RSP got changed, so do this again. */
2163 off = iemNativeRegFlushPendingWrites(pReNative, off);
2164
2165 /* Store the result. */
2166 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
2167#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
2168 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
2169 pReNative->Core.fDebugPcInitialized = true;
2170 Log4(("uPcUpdatingDebug=rip/rel-call-64 off=%#x offDisp=%ld\n", off, offDisp));
2171#endif
2172
2173 /* Need to transfer the shadow information to the new RIP register. */
2174 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
2175 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
2176 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
2177
2178 return off;
2179}
2180
2181
2182/*********************************************************************************************************************************
2183* Emitters for changing PC/RIP/EIP/IP with a RETN (Iw) instruction (IEM_MC_RETN_AND_FINISH) (requires stack emmiters). *
2184*********************************************************************************************************************************/
2185
2186DECL_FORCE_INLINE_THROW(uint32_t)
2187iemNativeEmitStackPopForRetnUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
2188 uint16_t cbPopAdd, uint8_t idxRegTmp)
2189{
2190 /* Use16BitSp: */
2191#ifdef RT_ARCH_AMD64
2192 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
2193 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
2194 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbPopAdd); /* ASSUMES this does NOT modify bits [63:16]! */
2195 RT_NOREF(idxRegTmp);
2196
2197#elif defined(RT_ARCH_ARM64)
2198 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
2199 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
2200 /* add tmp, regrsp, #cbMem */
2201 uint16_t const cbCombined = cbMem + cbPopAdd;
2202 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbCombined & (RT_BIT_32(12) - 1U), false /*f64Bit*/);
2203 if (cbCombined >= RT_BIT_32(12))
2204 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegTmp, cbCombined >> 12,
2205 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift12*/);
2206 /* and tmp, tmp, #0xffff */
2207 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
2208 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
2209 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
2210 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
2211
2212#else
2213# error "Port me"
2214#endif
2215 return off;
2216}
2217
2218
2219DECL_FORCE_INLINE_THROW(uint32_t)
2220iemNativeEmitStackPopForRetnUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
2221 uint16_t cbPopAdd)
2222{
2223 /* Use32BitSp: */
2224 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
2225 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem + cbPopAdd);
2226 return off;
2227}
2228
2229
2230/** Variant of IEM_MC_RETN_AND_FINISH for pre-386 targets. */
2231#define IEM_MC_RETN_AND_FINISH_THREADED_PC16(a_cbPopArgs, a_cbInstr) \
2232 off = iemNativeEmitRetn<IEMMODE_16BIT, false>(pReNative, off, (a_cbInstr), (a_cbPopArgs), pCallEntry->idxInstr)
2233
2234/** Variant of IEM_MC_RETN_AND_FINISH for 386+ targets. */
2235#define IEM_MC_RETN_AND_FINISH_THREADED_PC32(a_cbPopArgs, a_cbInstr, a_enmEffOpSize) \
2236 Assert((a_enmEffOpSize) == IEMMODE_32BIT || (a_enmEffOpSize) == IEMMODE_16BIT); \
2237 off = (a_enmEffOpSize) == IEMMODE_32BIT \
2238 ? iemNativeEmitRetn<IEMMODE_32BIT, false>(pReNative, off, (a_cbInstr), (a_cbPopArgs), pCallEntry->idxInstr) \
2239 : iemNativeEmitRetn<IEMMODE_16BIT, false>(pReNative, off, (a_cbInstr), (a_cbPopArgs), pCallEntry->idxInstr)
2240
2241/** Variant of IEM_MC_RETN_AND_FINISH for use in 64-bit code. */
2242#define IEM_MC_RETN_AND_FINISH_THREADED_PC64(a_cbPopArgs, a_cbInstr, a_enmEffOpSize) \
2243 Assert((a_enmEffOpSize) == IEMMODE_64BIT || (a_enmEffOpSize) == IEMMODE_16BIT); \
2244 off = (a_enmEffOpSize) == IEMMODE_64BIT \
2245 ? iemNativeEmitRetn<IEMMODE_64BIT, true>(pReNative, off, (a_cbInstr), (a_cbPopArgs), pCallEntry->idxInstr) \
2246 : iemNativeEmitRetn<IEMMODE_16BIT, true>(pReNative, off, (a_cbInstr), (a_cbPopArgs), pCallEntry->idxInstr)
2247
2248/** Variant of IEM_MC_RETN_AND_FINISH for pre-386 targets that checks and
2249 * clears flags. */
2250#define IEM_MC_RETN_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbPopArgs, a_cbInstr) \
2251 IEM_MC_RETN_AND_FINISH_THREADED_PC16(a_cbPopArgs, a_cbInstr); \
2252 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2253
2254/** Variant of IEM_MC_RETN_AND_FINISH for 386+ targets that checks and
2255 * clears flags. */
2256#define IEM_MC_RETN_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbPopArgs, a_cbInstr, a_enmEffOpSize) \
2257 IEM_MC_RETN_AND_FINISH_THREADED_PC32(a_cbPopArgs, a_cbInstr, a_enmEffOpSize); \
2258 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2259
2260/** Variant of IEM_MC_RETN_AND_FINISH for use in 64-bit code that checks and
2261 * clears flags. */
2262#define IEM_MC_RETN_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbPopArgs, a_cbInstr, a_enmEffOpSize) \
2263 IEM_MC_RETN_AND_FINISH_THREADED_PC64(a_cbPopArgs, a_cbInstr, a_enmEffOpSize); \
2264 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2265
2266/** IEM_MC[|_FLAT32|_FLAT64]_RETN_AND_FINISH */
2267template<IEMMODE const a_enmEffOpSize, bool const a_f64Bit>
2268DECL_INLINE_THROW(uint32_t)
2269iemNativeEmitRetn(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint16_t cbPopArgs, uint8_t idxInstr)
2270{
2271 RT_NOREF(cbInstr);
2272 AssertCompile(a_enmEffOpSize == IEMMODE_64BIT || a_enmEffOpSize == IEMMODE_32BIT || a_enmEffOpSize == IEMMODE_16BIT);
2273
2274#ifdef VBOX_STRICT
2275 /*
2276 * Check that the fExec flags we've got make sense.
2277 */
2278 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
2279#endif
2280
2281 /*
2282 * To keep things simple we have to commit any pending writes first as we
2283 * may end up making calls.
2284 */
2285 off = iemNativeRegFlushPendingWrites(pReNative, off);
2286
2287 /*
2288 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
2289 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
2290 * directly as the effective stack pointer.
2291 *
2292 * (Code structure is very similar to that of PUSH)
2293 *
2294 * Note! As a simplification, we treat opsize overridden returns (o16 ret)
2295 * in FLAT 32-bit mode as if we weren't in FLAT mode since these
2296 * aren't commonly used (or useful) and thus not in need of optimizing.
2297 *
2298 * Note! For non-flat modes the guest RSP is not allocated for update but
2299 * rather for calculation as the shadowed register would remain modified
2300 * even if the return address throws a #GP(0) due to being outside the
2301 * CS limit causing a wrong stack pointer value in the guest (see the
2302 * near return testcase in bs3-cpu-basic-2). If no exception is thrown
2303 * the shadowing is transfered to the new register returned by
2304 * iemNativeRegAllocTmpForGuestReg() at the end.
2305 */
2306 RT_CONSTEXPR
2307 uint8_t const cbMem = a_enmEffOpSize == IEMMODE_64BIT
2308 ? sizeof(uint64_t)
2309 : a_enmEffOpSize == IEMMODE_32BIT
2310 ? sizeof(uint32_t)
2311 : sizeof(uint16_t);
2312/** @todo the basic flatness could be detected by the threaded compiler step
2313 * like for the other macros... worth it? */
2314 bool const fFlat = a_enmEffOpSize == IEMMODE_64BIT
2315 || (a_enmEffOpSize == IEMMODE_32BIT /* see note */ && IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
2316 uintptr_t const pfnFunction = a_enmEffOpSize == IEMMODE_64BIT
2317 ? (uintptr_t)iemNativeHlpStackFlatFetchU64
2318 : fFlat
2319 ? (uintptr_t)iemNativeHlpStackFlatFetchU32
2320 : a_enmEffOpSize == IEMMODE_32BIT
2321 ? (uintptr_t)iemNativeHlpStackFetchU32
2322 : (uintptr_t)iemNativeHlpStackFetchU16;
2323 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
2324 fFlat ? kIemNativeGstRegUse_ForUpdate
2325 : kIemNativeGstRegUse_Calculation,
2326 true /*fNoVolatileRegs*/);
2327 uint8_t const idxRegEffSp = fFlat ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
2328 /** @todo can do a better job picking the register here. For cbMem >= 4 this
2329 * will be the resulting register value. */
2330 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
2331
2332 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
2333 if (fFlat)
2334 Assert(idxRegEffSp == idxRegRsp);
2335 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
2336 {
2337 Assert(idxRegEffSp != idxRegRsp);
2338 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
2339 kIemNativeGstRegUse_ReadOnly);
2340#ifdef RT_ARCH_AMD64
2341 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
2342#else
2343 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
2344#endif
2345 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
2346 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
2347 offFixupJumpToUseOtherBitSp = off;
2348 if RT_CONSTEXPR_IF(a_enmEffOpSize == IEMMODE_32BIT)
2349 {
2350 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
2351 off = iemNativeEmitStackPopForRetnUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPopArgs);
2352 }
2353 else
2354 {
2355 Assert(a_enmEffOpSize == IEMMODE_16BIT);
2356 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
2357 off = iemNativeEmitStackPopForRetnUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPopArgs,
2358 idxRegMemResult);
2359 }
2360 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2361 }
2362 /* SpUpdateEnd: */
2363 uint32_t const offLabelSpUpdateEnd = off;
2364
2365 /*
2366 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
2367 * we're skipping lookup).
2368 */
2369 uint8_t const iSegReg = fFlat ? UINT8_MAX : X86_SREG_SS;
2370 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
2371 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
2372 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
2373 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
2374 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
2375 : UINT32_MAX;
2376
2377 if (!TlbState.fSkip)
2378 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
2379 else
2380 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
2381
2382 /*
2383 * Use16BitSp:
2384 */
2385 if (!fFlat)
2386 {
2387#ifdef RT_ARCH_AMD64
2388 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
2389#else
2390 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
2391#endif
2392 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
2393 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
2394 off = iemNativeEmitStackPopForRetnUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPopArgs,
2395 idxRegMemResult);
2396 else
2397 off = iemNativeEmitStackPopForRetnUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPopArgs);
2398 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
2399 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2400 }
2401
2402 /*
2403 * TlbMiss:
2404 *
2405 * Call helper to do the pushing.
2406 */
2407 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
2408
2409#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2410 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2411#else
2412 RT_NOREF(idxInstr);
2413#endif
2414
2415 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
2416 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
2417 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
2418 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
2419
2420
2421 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
2422 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
2423 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
2424
2425#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
2426 /* Do delayed EFLAGS calculations. */
2427 off = iemNativeDoPostponedEFlagsAtTlbMiss<RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG)>(pReNative, off, &TlbState, fHstRegsNotToSave);
2428#endif
2429
2430 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
2431 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
2432
2433 /* Done setting up parameters, make the call. */
2434 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnFunction);
2435
2436 /* Move the return register content to idxRegMemResult. */
2437 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
2438 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
2439
2440 /* Restore variables and guest shadow registers to volatile registers. */
2441 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
2442 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
2443
2444#ifdef IEMNATIVE_WITH_TLB_LOOKUP
2445 if (!TlbState.fSkip)
2446 {
2447 /* end of TlbMiss - Jump to the done label. */
2448 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
2449 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
2450
2451 /*
2452 * TlbLookup:
2453 */
2454 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
2455 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
2456
2457 /*
2458 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
2459 */
2460 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
2461# ifdef IEM_WITH_TLB_STATISTICS
2462 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
2463 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
2464# endif
2465 switch (cbMem)
2466 {
2467 case 2:
2468 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
2469 break;
2470 case 4:
2471 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
2472 break;
2473 case 8:
2474 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
2475 break;
2476 default:
2477 AssertFailed();
2478 }
2479
2480 TlbState.freeRegsAndReleaseVars(pReNative);
2481
2482 /*
2483 * TlbDone:
2484 *
2485 * Set the new RSP value (FLAT accesses needs to calculate it first) and
2486 * commit the popped register value.
2487 */
2488 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
2489 }
2490#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
2491
2492 /* Check limit before committing RIP and RSP (may #GP(0) + exit TB). */
2493 if RT_CONSTEXPR_IF(!a_f64Bit)
2494/** @todo we can skip this test in FLAT 32-bit mode. */
2495 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxRegMemResult, idxInstr);
2496 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
2497 else if RT_CONSTEXPR_IF(a_enmEffOpSize == IEMMODE_64BIT)
2498 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxRegMemResult, idxInstr);
2499
2500 /* Complete RSP calculation for FLAT mode. */
2501 if (idxRegEffSp == idxRegRsp)
2502 {
2503 if RT_CONSTEXPR_IF(a_enmEffOpSize == IEMMODE_64BIT)
2504 off = iemNativeEmitAddGprImm(pReNative, off, idxRegRsp, sizeof(uint64_t) + cbPopArgs);
2505 else
2506 {
2507 Assert(a_enmEffOpSize == IEMMODE_32BIT);
2508 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxRegRsp, sizeof(uint32_t) + cbPopArgs);
2509 }
2510 }
2511
2512 /* Commit the result and clear any current guest shadows for RIP. */
2513 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
2514 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
2515 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, kIemNativeGstReg_Pc, off);
2516#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
2517 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
2518 pReNative->Core.fDebugPcInitialized = true;
2519 Log4(("uPcUpdatingDebug=rip/ret off=%#x\n", off));
2520#endif
2521
2522 /* Need to transfer the shadowing information to the host register containing the updated value now. */
2523 if (!fFlat)
2524 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegRsp, IEMNATIVEGSTREG_GPR(X86_GREG_xSP), off);
2525
2526 iemNativeRegFreeTmp(pReNative, idxRegRsp);
2527 if (idxRegEffSp != idxRegRsp)
2528 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
2529 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
2530 return off;
2531}
2532
2533
2534/*********************************************************************************************************************************
2535* Emitters for raising exceptions (IEM_MC_MAYBE_RAISE_XXX) *
2536*********************************************************************************************************************************/
2537
2538#define IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE() \
2539 off = iemNativeEmitMaybeRaiseDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
2540
2541/**
2542 * Emits code to check if a \#NM exception should be raised.
2543 *
2544 * @returns New code buffer offset, UINT32_MAX on failure.
2545 * @param pReNative The native recompile state.
2546 * @param off The code buffer offset.
2547 * @param idxInstr The current instruction.
2548 */
2549DECL_INLINE_THROW(uint32_t)
2550iemNativeEmitMaybeRaiseDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2551{
2552#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2553 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeDeviceNotAvailXcptCheckPotential);
2554
2555 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE))
2556 {
2557#endif
2558 /*
2559 * Make sure we don't have any outstanding guest register writes as we may
2560 * raise an #NM and all guest register must be up to date in CPUMCTX.
2561 */
2562 /** @todo r=aeichner Can we postpone this to the RaiseNm path? */
2563 off = iemNativeRegFlushPendingWrites(pReNative, off);
2564
2565#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2566 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2567#else
2568 RT_NOREF(idxInstr);
2569#endif
2570
2571 /* Allocate a temporary CR0 register. */
2572 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0,
2573 kIemNativeGstRegUse_ReadOnly);
2574
2575 /*
2576 * if (cr0 & (X86_CR0_EM | X86_CR0_TS) != 0)
2577 * return raisexcpt();
2578 */
2579 /* Test and jump. */
2580 off = iemNativeEmitTbExitIfAnyBitsSetInGpr<kIemNativeLabelType_RaiseNm>(pReNative, off, idxCr0Reg,
2581 X86_CR0_EM | X86_CR0_TS);
2582
2583 /* Free but don't flush the CR0 register. */
2584 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2585
2586#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2587 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE;
2588 }
2589 else
2590 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeDeviceNotAvailXcptCheckOmitted);
2591#endif
2592
2593 return off;
2594}
2595
2596
2597#define IEM_MC_MAYBE_RAISE_WAIT_DEVICE_NOT_AVAILABLE() \
2598 off = iemNativeEmitMaybeRaiseWaitDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
2599
2600/**
2601 * Emits code to check if a \#NM exception should be raised.
2602 *
2603 * @returns New code buffer offset, UINT32_MAX on failure.
2604 * @param pReNative The native recompile state.
2605 * @param off The code buffer offset.
2606 * @param idxInstr The current instruction.
2607 */
2608DECL_INLINE_THROW(uint32_t)
2609iemNativeEmitMaybeRaiseWaitDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2610{
2611#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2612 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeWaitDeviceNotAvailXcptCheckPotential);
2613
2614 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_WAIT_DEVICE_NOT_AVAILABLE))
2615 {
2616#endif
2617 /*
2618 * Make sure we don't have any outstanding guest register writes as we may
2619 * raise an #NM and all guest register must be up to date in CPUMCTX.
2620 */
2621 /** @todo r=aeichner Can we postpone this to the RaiseNm path? */
2622 off = iemNativeRegFlushPendingWrites(pReNative, off);
2623
2624#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2625 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2626#else
2627 RT_NOREF(idxInstr);
2628#endif
2629
2630 /* Allocate a temporary CR0 register. */
2631 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0,
2632 kIemNativeGstRegUse_Calculation);
2633
2634 /*
2635 * if (cr0 & (X86_CR0_MP | X86_CR0_TS) == (X86_CR0_MP | X86_CR0_TS))
2636 * return raisexcpt();
2637 */
2638 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxCr0Reg, X86_CR0_MP | X86_CR0_TS);
2639 /* Test and jump. */
2640 off = iemNativeEmitTbExitIfGpr32EqualsImm<kIemNativeLabelType_RaiseNm>(pReNative, off, idxCr0Reg, X86_CR0_MP | X86_CR0_TS);
2641
2642 /* Free the CR0 register. */
2643 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2644
2645#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2646 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_WAIT_DEVICE_NOT_AVAILABLE;
2647 }
2648 else
2649 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeWaitDeviceNotAvailXcptCheckOmitted);
2650#endif
2651
2652 return off;
2653}
2654
2655
2656#define IEM_MC_MAYBE_RAISE_FPU_XCPT() \
2657 off = iemNativeEmitMaybeRaiseFpuException(pReNative, off, pCallEntry->idxInstr)
2658
2659/**
2660 * Emits code to check if a \#MF exception should be raised.
2661 *
2662 * @returns New code buffer offset, UINT32_MAX on failure.
2663 * @param pReNative The native recompile state.
2664 * @param off The code buffer offset.
2665 * @param idxInstr The current instruction.
2666 */
2667DECL_INLINE_THROW(uint32_t)
2668iemNativeEmitMaybeRaiseFpuException(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2669{
2670 /*
2671 * Make sure we don't have any outstanding guest register writes as we may
2672 * raise an #MF and all guest register must be up to date in CPUMCTX.
2673 */
2674 /** @todo r=aeichner Can we postpone this to the RaiseMf path? */
2675 off = iemNativeRegFlushPendingWrites(pReNative, off);
2676
2677#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2678 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2679#else
2680 RT_NOREF(idxInstr);
2681#endif
2682
2683 /* Allocate a temporary FSW register. */
2684 uint8_t const idxFpuFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw,
2685 kIemNativeGstRegUse_ReadOnly);
2686
2687 /*
2688 * if (FSW & X86_FSW_ES != 0)
2689 * return raisexcpt();
2690 */
2691 /* Test and jump. */
2692 off = iemNativeEmitTbExitIfBitSetInGpr<kIemNativeLabelType_RaiseMf>(pReNative, off, idxFpuFswReg, X86_FSW_ES_BIT);
2693
2694 /* Free but don't flush the FSW register. */
2695 iemNativeRegFreeTmp(pReNative, idxFpuFswReg);
2696
2697 return off;
2698}
2699
2700
2701#define IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT() \
2702 off = iemNativeEmitMaybeRaiseSseRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
2703
2704/**
2705 * Emits code to check if a SSE exception (either \#UD or \#NM) should be raised.
2706 *
2707 * @returns New code buffer offset, UINT32_MAX on failure.
2708 * @param pReNative The native recompile state.
2709 * @param off The code buffer offset.
2710 * @param idxInstr The current instruction.
2711 */
2712DECL_INLINE_THROW(uint32_t)
2713iemNativeEmitMaybeRaiseSseRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2714{
2715#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2716 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeSseXcptCheckPotential);
2717
2718 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE))
2719 {
2720#endif
2721 /*
2722 * Make sure we don't have any outstanding guest register writes as we may
2723 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
2724 */
2725 /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
2726 off = iemNativeRegFlushPendingWrites(pReNative, off);
2727
2728#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2729 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2730#else
2731 RT_NOREF(idxInstr);
2732#endif
2733
2734 /* Allocate a temporary CR0 and CR4 register. */
2735 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
2736 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
2737 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2738
2739 AssertCompile(!((X86_CR0_EM | X86_CR0_TS) & X86_CR4_OSFXSR));
2740#ifdef RT_ARCH_AMD64
2741 /*
2742 * We do a modified test here:
2743 * if (!(((cr4 & X86_CR4_OSFXSR) | cr0) ^ X86_CR4_OSFXSR)) { likely }
2744 * else { goto RaiseSseRelated; }
2745 * This ASSUMES that CR0[bit 9] is always zero. This is the case on
2746 * all targets except the 386, which doesn't support SSE, this should
2747 * be a safe assumption.
2748 */
2749 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off,
2750 1+6+3+3+7+7+6 + IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS);
2751 //pCodeBuf[off++] = 0xcc;
2752 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, X86_CR4_OSFXSR); /* Isolate CR4.OSFXSR as CR4.TSD and */
2753 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxTmpReg, idxCr4Reg); /* CR4.DE would overlap the CR0 bits. */
2754 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, idxTmpReg, idxCr0Reg);
2755 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_CR0_EM | X86_CR0_TS | X86_CR4_OSFXSR);
2756 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_CR4_OSFXSR);
2757 off = iemNativeEmitTbExitJccEx<kIemNativeLabelType_RaiseSseRelated>(pReNative, pCodeBuf, off, kIemNativeInstrCond_ne);
2758
2759#elif defined(RT_ARCH_ARM64)
2760 /*
2761 * We do a modified test here:
2762 * if (!((cr0 & (X86_CR0_EM | X86_CR0_TS)) | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1) ^ 1))) { likely }
2763 * else { goto RaiseSseRelated; }
2764 */
2765 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off,
2766 1+5 + IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS);
2767 //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
2768 Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - X86_CR0_EM_BIT) == (X86_CR0_EM | X86_CR0_TS));
2769 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxCr0Reg, 1, 32 - X86_CR0_EM_BIT, false /*f64Bit*/);
2770 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSFXSR_BIT, 1, false /*f64Bit*/);
2771 /* -> idxTmpReg[0]=OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
2772 Assert(Armv8A64ConvertImmRImmS2Mask32(0, 0) == 1);
2773 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 0, 0, false /*f64Bit*/);
2774 /* -> idxTmpReg[0]=~OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
2775 off = iemNativeEmitTbExitIfGprIsNotZeroEx<kIemNativeLabelType_RaiseSseRelated>(pReNative, pCodeBuf, off,
2776 idxTmpReg, false /*f64Bit*/);
2777
2778#else
2779# error "Port me!"
2780#endif
2781
2782 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2783 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2784 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2785 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
2786
2787#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2788 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE;
2789 }
2790 else
2791 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeSseXcptCheckOmitted);
2792#endif
2793
2794 return off;
2795}
2796
2797
2798#define IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT() \
2799 off = iemNativeEmitMaybeRaiseAvxRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
2800
2801/**
2802 * Emits code to check if a AVX exception (either \#UD or \#NM) should be raised.
2803 *
2804 * @returns New code buffer offset, UINT32_MAX on failure.
2805 * @param pReNative The native recompile state.
2806 * @param off The code buffer offset.
2807 * @param idxInstr The current instruction.
2808 */
2809DECL_INLINE_THROW(uint32_t)
2810iemNativeEmitMaybeRaiseAvxRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2811{
2812#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2813 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeAvxXcptCheckPotential);
2814
2815 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX))
2816 {
2817#endif
2818 /*
2819 * Make sure we don't have any outstanding guest register writes as we may
2820 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
2821 */
2822 /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
2823 off = iemNativeRegFlushPendingWrites(pReNative, off);
2824
2825#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2826 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2827#else
2828 RT_NOREF(idxInstr);
2829#endif
2830
2831 /* Allocate a temporary CR0, CR4 and XCR0 register. */
2832 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
2833 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
2834 uint8_t const idxXcr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Xcr0);
2835 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2836
2837 /*
2838 * We have the following in IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT:
2839 * if (RT_LIKELY( ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE))
2840 * | (pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE)
2841 * | (pVCpu->cpum.GstCtx.cr0 & X86_CR0_TS))
2842 * == (XSAVE_C_YMM | XSAVE_C_SSE | X86_CR4_OSXSAVE)))
2843 * { likely }
2844 * else { goto RaiseAvxRelated; }
2845 */
2846#ifdef RT_ARCH_AMD64
2847 /* if (!( ( ((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) << 2)
2848 | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1) << 1)
2849 | ((cr0 >> X86_CR0_TS_BIT) & 1) )
2850 ^ 0x1a) ) { likely }
2851 else { goto RaiseAvxRelated; } */
2852 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off,
2853 1+6+3+5+3+5+3+7+6 + IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS);
2854 //pCodeBuf[off++] = 0xcc;
2855 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, XSAVE_C_YMM | XSAVE_C_SSE);
2856 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxTmpReg, idxXcr0Reg);
2857 off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off, idxCr4Reg, X86_CR4_OSXSAVE_BIT);
2858 off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
2859 /* -> idxTmpReg[0]=CR4.OSXSAVE; idxTmpReg[1]=0; idxTmpReg[2]=SSE; idxTmpReg[3]=YMM; (the rest is zero) */
2860 off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off, idxCr0Reg, X86_CR0_TS_BIT);
2861 off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
2862 /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=SSE; idxTmpReg[4]=YMM; */
2863 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg, ((XSAVE_C_YMM | XSAVE_C_SSE) << 2) | 2);
2864 /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=~SSE; idxTmpReg[4]=~YMM; */
2865 off = iemNativeEmitTbExitJccEx<kIemNativeLabelType_RaiseAvxRelated>(pReNative, pCodeBuf, off, kIemNativeInstrCond_ne);
2866
2867#elif defined(RT_ARCH_ARM64)
2868 /* if (!( (((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) | ((cr4 >> X86_CR4_OSFXSR_BIT) & 1)) ^ 7) << 1)
2869 | ((cr0 >> X86_CR0_TS_BIT) & 1) ) { likely }
2870 else { goto RaiseAvxRelated; } */
2871 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off,
2872 1+6 + IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS);
2873 //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
2874 Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - XSAVE_C_SSE_BIT) == (XSAVE_C_YMM | XSAVE_C_SSE));
2875 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxXcr0Reg, 1, 32 - XSAVE_C_SSE_BIT, false /*f64Bit*/);
2876 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSXSAVE_BIT, 1, false /*f64Bit*/);
2877 /* -> idxTmpReg[0]=CR4.OSXSAVE; idxTmpReg[1]=SSE; idxTmpReg[2]=YMM; (the rest is zero) */
2878 Assert(Armv8A64ConvertImmRImmS2Mask32(2, 0) == 7);
2879 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 2, 0, false /*f64Bit*/);
2880 /* -> idxTmpReg[0]=~CR4.OSXSAVE; idxTmpReg[1]=~SSE; idxTmpReg[2]=~YMM; (the rest is zero) */
2881 pCodeBuf[off++] = Armv8A64MkInstrLslImm(idxTmpReg, idxTmpReg, 1, false /*f64Bit*/);
2882 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr0Reg, X86_CR0_TS_BIT, 1, false /*f64Bit*/);
2883 /* -> idxTmpReg[0]=CR0.TS; idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=~SSE; idxTmpReg[3]=~YMM; (the rest is zero) */
2884 off = iemNativeEmitTbExitIfGprIsNotZeroEx<kIemNativeLabelType_RaiseAvxRelated>(pReNative, pCodeBuf, off,
2885 idxTmpReg, false /*f64Bit*/);
2886
2887#else
2888# error "Port me!"
2889#endif
2890
2891 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2892 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2893 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
2894 iemNativeRegFreeTmp(pReNative, idxXcr0Reg);
2895#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2896 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
2897 }
2898 else
2899 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeAvxXcptCheckOmitted);
2900#endif
2901
2902 return off;
2903}
2904
2905
2906#define IEM_MC_RAISE_DIVIDE_ERROR_IF_LOCAL_IS_ZERO(a_uVar) \
2907 off = iemNativeEmitRaiseDivideErrorIfLocalIsZero(pReNative, off, a_uVar, pCallEntry->idxInstr)
2908
2909/**
2910 * Emits code to raise a \#DE if a local variable is zero.
2911 *
2912 * @returns New code buffer offset, UINT32_MAX on failure.
2913 * @param pReNative The native recompile state.
2914 * @param off The code buffer offset.
2915 * @param idxVar The variable to check. This must be 32-bit (EFLAGS).
2916 * @param idxInstr The current instruction.
2917 */
2918DECL_INLINE_THROW(uint32_t)
2919iemNativeEmitRaiseDivideErrorIfLocalIsZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t idxInstr)
2920{
2921 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
2922 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, sizeof(uint32_t));
2923
2924 /* Make sure we don't have any outstanding guest register writes as we may. */
2925 off = iemNativeRegFlushPendingWrites(pReNative, off);
2926
2927 /* Set the instruction number if we're counting. */
2928#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2929 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2930#else
2931 RT_NOREF(idxInstr);
2932#endif
2933
2934 /* Do the job we're here for. */
2935 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off);
2936 off = iemNativeEmitTbExitIfGprIsZero<kIemNativeLabelType_RaiseDe>(pReNative, off, idxVarReg, false /*f64Bit*/);
2937 iemNativeVarRegisterRelease(pReNative, idxVar);
2938
2939 return off;
2940}
2941
2942
2943#define IEM_MC_RAISE_GP0_IF_EFF_ADDR_UNALIGNED(a_EffAddr, a_cbAlign) \
2944 off = iemNativeEmitRaiseGp0IfEffAddrUnaligned(pReNative, off, pCallEntry->idxInstr, a_EffAddr, a_cbAlign)
2945
2946/**
2947 * Emits code to raise a \#GP(0) if the given variable contains an unaligned address.
2948 *
2949 * @returns New code buffer offset, UINT32_MAX on failure.
2950 * @param pReNative The native recompile state.
2951 * @param off The code buffer offset.
2952 * @param idxInstr The current instruction.
2953 * @param idxVarEffAddr Index of the variable containing the effective address to check.
2954 * @param cbAlign The alignment in bytes to check against.
2955 */
2956DECL_INLINE_THROW(uint32_t)
2957iemNativeEmitRaiseGp0IfEffAddrUnaligned(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr,
2958 uint8_t idxVarEffAddr, uint8_t cbAlign)
2959{
2960 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEffAddr);
2961 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEffAddr, sizeof(RTGCPTR));
2962
2963 /*
2964 * Make sure we don't have any outstanding guest register writes as we may throw an exception.
2965 */
2966 off = iemNativeRegFlushPendingWrites(pReNative, off);
2967
2968#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2969 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2970#else
2971 RT_NOREF(idxInstr);
2972#endif
2973
2974 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarEffAddr, &off);
2975 off = iemNativeEmitTbExitIfAnyBitsSetInGpr<kIemNativeLabelType_RaiseGp0>(pReNative, off, idxVarReg, cbAlign - 1);
2976 iemNativeVarRegisterRelease(pReNative, idxVarEffAddr);
2977
2978 return off;
2979}
2980
2981
2982/*********************************************************************************************************************************
2983* Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF) *
2984*********************************************************************************************************************************/
2985
2986/**
2987 * Pushes an IEM_MC_IF_XXX onto the condition stack.
2988 *
2989 * @returns Pointer to the condition stack entry on success, NULL on failure
2990 * (too many nestings)
2991 */
2992DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative)
2993{
2994 uint32_t const idxStack = pReNative->cCondDepth;
2995 AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
2996
2997 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
2998 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
2999
3000 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
3001 pEntry->fInElse = false;
3002 pEntry->fIfExitTb = false;
3003 pEntry->fElseExitTb = false;
3004 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
3005 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
3006
3007 return pEntry;
3008}
3009
3010
3011/**
3012 * Start of the if-block, snapshotting the register and variable state.
3013 */
3014DECL_INLINE_THROW(void)
3015iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
3016{
3017 Assert(offIfBlock != UINT32_MAX);
3018 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
3019 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
3020 Assert(!pEntry->fInElse);
3021
3022 /* Define the start of the IF block if request or for disassembly purposes. */
3023 if (idxLabelIf != UINT32_MAX)
3024 iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
3025#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3026 else
3027 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
3028#else
3029 RT_NOREF(offIfBlock);
3030#endif
3031
3032 /* Copy the initial state so we can restore it in the 'else' block. */
3033 pEntry->InitialState = pReNative->Core;
3034}
3035
3036
3037#define IEM_MC_ELSE() } while (0); \
3038 off = iemNativeEmitElse(pReNative, off); \
3039 do {
3040
3041/** Emits code related to IEM_MC_ELSE. */
3042DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3043{
3044 /* Check sanity and get the conditional stack entry. */
3045 Assert(off != UINT32_MAX);
3046 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
3047 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
3048 Assert(!pEntry->fInElse);
3049
3050 /* We can skip dirty register flushing and the dirty register flushing if
3051 the branch already jumped to a TB exit. */
3052 if (!pEntry->fIfExitTb)
3053 {
3054#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK) && 0
3055 /* Writeback any dirty shadow registers. */
3056 /** @todo r=aeichner Possible optimization is to only writeback guest registers which became dirty
3057 * in one of the branches and leave guest registers already dirty before the start of the if
3058 * block alone. */
3059 off = iemNativeRegFlushDirtyGuest(pReNative, off);
3060#endif
3061
3062 /* Jump to the endif. */
3063 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
3064 }
3065# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3066 else
3067 Assert(pReNative->Core.offPc == 0);
3068# endif
3069
3070 /* Define the else label and enter the else part of the condition. */
3071 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
3072 pEntry->fInElse = true;
3073
3074 /* Snapshot the core state so we can do a merge at the endif and restore
3075 the snapshot we took at the start of the if-block. */
3076 pEntry->IfFinalState = pReNative->Core;
3077 pReNative->Core = pEntry->InitialState;
3078
3079 return off;
3080}
3081
3082
3083#define IEM_MC_ENDIF() } while (0); \
3084 off = iemNativeEmitEndIf(pReNative, off)
3085
3086/** Emits code related to IEM_MC_ENDIF. */
3087DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3088{
3089 /* Check sanity and get the conditional stack entry. */
3090 Assert(off != UINT32_MAX);
3091 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
3092 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
3093
3094#if defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) && 0
3095 off = iemNativeRegFlushDirtyGuest(pReNative, off);
3096#endif
3097
3098 /*
3099 * If either of the branches exited the TB, we can take the state from the
3100 * other branch and skip all the merging headache.
3101 */
3102 bool fDefinedLabels = false;
3103 if (pEntry->fElseExitTb || pEntry->fIfExitTb)
3104 {
3105#ifdef VBOX_STRICT
3106 Assert(pReNative->cCondDepth == 1); /* Assuming this only happens in simple conditional structures. */
3107 Assert(pEntry->fElseExitTb != pEntry->fIfExitTb); /* Assuming we don't have any code where both branches exits. */
3108 PCIEMNATIVECORESTATE const pExitCoreState = pEntry->fIfExitTb && pEntry->fInElse
3109 ? &pEntry->IfFinalState : &pReNative->Core;
3110# ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3111 Assert(pExitCoreState->bmGstRegShadowDirty == 0);
3112# endif
3113# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3114 Assert(pExitCoreState->offPc == 0);
3115# endif
3116 RT_NOREF(pExitCoreState);
3117#endif
3118
3119 if (!pEntry->fIfExitTb)
3120 {
3121 Assert(pEntry->fInElse);
3122 pReNative->Core = pEntry->IfFinalState;
3123 }
3124 }
3125 else
3126 {
3127 /*
3128 * Now we have find common group with the core state at the end of the
3129 * if-final. Use the smallest common denominator and just drop anything
3130 * that isn't the same in both states.
3131 */
3132 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
3133 * which is why we're doing this at the end of the else-block.
3134 * But we'd need more info about future for that to be worth the effort. */
3135 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
3136#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3137 AssertMsgStmt(pReNative->Core.offPc == pOther->offPc,
3138 ("Core.offPc=%#RX64 pOther->offPc=%#RX64\n", pReNative->Core.offPc, pOther->offPc),
3139 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
3140#endif
3141
3142 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
3143 {
3144#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3145 /*
3146 * If the branch has differences in dirty shadow registers, we will flush
3147 * the register only dirty in the current branch and dirty any that's only
3148 * dirty in the other one.
3149 */
3150 uint64_t const fGstRegDirtyOther = pOther->bmGstRegShadowDirty;
3151 uint64_t const fGstRegDirtyThis = pReNative->Core.bmGstRegShadowDirty;
3152 uint64_t const fGstRegDirtyDiff = fGstRegDirtyOther ^ fGstRegDirtyThis;
3153 uint64_t const fGstRegDirtyHead = fGstRegDirtyThis & fGstRegDirtyDiff;
3154 uint64_t fGstRegDirtyTail = fGstRegDirtyOther & fGstRegDirtyDiff;
3155 if (!fGstRegDirtyDiff)
3156 { /* likely */ }
3157 else
3158 {
3159 //uint64_t const fGstRegDirtyHead = pReNative->Core.bmGstRegShadowDirty & fGstRegDirtyDiff;
3160 if (fGstRegDirtyHead)
3161 {
3162 Log12(("iemNativeEmitEndIf: flushing dirty guest registers in current branch: %RX64\n", fGstRegDirtyHead));
3163 off = iemNativeRegFlushDirtyGuest(pReNative, off, fGstRegDirtyHead);
3164 }
3165 }
3166#endif
3167
3168 /*
3169 * Shadowed guest registers.
3170 *
3171 * We drop any shadows where the two states disagree about where
3172 * things are kept. We may end up flushing dirty more registers
3173 * here, if the two branches keeps things in different registers.
3174 */
3175 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
3176 if (fGstRegs)
3177 {
3178 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
3179 do
3180 {
3181 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
3182 fGstRegs &= ~RT_BIT_64(idxGstReg);
3183
3184 uint8_t const idxCurHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
3185 uint8_t const idxOtherHstReg = pOther->aidxGstRegShadows[idxGstReg];
3186 if ( idxCurHstReg != idxOtherHstReg
3187 || !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg)))
3188 {
3189#ifndef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3190 Log12(("iemNativeEmitEndIf: dropping gst %s (%d) from hst %s (other %d/%#RX64)\n",
3191 g_aGstShadowInfo[idxGstReg].pszName, idxGstReg, g_apszIemNativeHstRegNames[idxCurHstReg],
3192 idxOtherHstReg, pOther->bmGstRegShadows));
3193#else
3194 Log12(("iemNativeEmitEndIf: dropping %s gst %s (%d) from hst %s (other %d/%#RX64/%s)\n",
3195 pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg) ? "_dirty_" : "clean",
3196 g_aGstShadowInfo[idxGstReg].pszName, idxGstReg, g_apszIemNativeHstRegNames[idxCurHstReg],
3197 idxOtherHstReg, pOther->bmGstRegShadows,
3198 pOther->bmGstRegShadowDirty & RT_BIT_64(idxGstReg) ? "dirty" : "clean"));
3199 if (pOther->bmGstRegShadowDirty & RT_BIT_64(idxGstReg))
3200 fGstRegDirtyTail |= RT_BIT_64(idxGstReg);
3201 if (pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg))
3202 off = iemNativeRegFlushPendingWrite(pReNative, off, (IEMNATIVEGSTREG)idxGstReg);
3203#endif
3204 iemNativeRegClearGstRegShadowingOne(pReNative, idxCurHstReg, (IEMNATIVEGSTREG)idxGstReg, off);
3205 }
3206 } while (fGstRegs);
3207 }
3208 else
3209 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
3210
3211#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3212 /*
3213 * Generate jumpy code for flushing dirty registers from the other
3214 * branch that aren't dirty in the current one.
3215 */
3216 if (!fGstRegDirtyTail)
3217 { /* likely */ }
3218 else
3219 {
3220 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeEndIfOtherBranchDirty);
3221 Log12(("iemNativeEmitEndIf: Dirty register only in the other branch: %#RX64 - BAD!\n", fGstRegDirtyTail));
3222
3223 /* First the current branch has to jump over the dirty flushing from the other branch. */
3224 uint32_t const offFixup1 = off;
3225 off = iemNativeEmitJmpToFixed(pReNative, off, off + 10);
3226
3227 /* Put the endif and maybe else label here so the other branch ends up here. */
3228 if (!pEntry->fInElse)
3229 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
3230 else
3231 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
3232 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
3233 fDefinedLabels = true;
3234
3235 /* Flush the dirty guest registers from the other branch. */
3236 while (fGstRegDirtyTail)
3237 {
3238 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegDirtyTail) - 1;
3239 fGstRegDirtyTail &= ~RT_BIT_64(idxGstReg);
3240 Log12(("iemNativeEmitEndIf: tail flushing %s (%d) from other branch %d (cur %d/%#RX64)\n",
3241 g_aGstShadowInfo[idxGstReg].pszName, idxGstReg, pOther->aidxGstRegShadows[idxGstReg],
3242 pReNative->Core.aidxGstRegShadows[idxGstReg], pReNative->Core.bmGstRegShadows));
3243
3244 off = iemNativeRegFlushPendingWriteEx(pReNative, off, (PIEMNATIVECORESTATE)pOther, (IEMNATIVEGSTREG)idxGstReg);
3245
3246 /* Mismatching shadowing should've been dropped in the previous step already. */
3247 Assert( !(pReNative->Core.bmGstRegShadows & RT_BIT_64(idxGstReg))
3248 || pReNative->Core.aidxGstRegShadows[idxGstReg] == pOther->aidxGstRegShadows[idxGstReg]);
3249 }
3250
3251 /* Here is the actual endif label, fixup the above jump to land here. */
3252 iemNativeFixupFixedJump(pReNative, offFixup1, off);
3253 }
3254#endif
3255
3256 /*
3257 * Check variables next. For now we must require them to be identical
3258 * or stuff we can recreate. (No code is emitted here.)
3259 */
3260 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
3261#ifdef VBOX_STRICT
3262 uint32_t const offAssert = off;
3263#endif
3264 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
3265 if (fVars)
3266 {
3267 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
3268 do
3269 {
3270 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
3271 fVars &= ~RT_BIT_32(idxVar);
3272
3273 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
3274 {
3275 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
3276 continue;
3277 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
3278 {
3279 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
3280 if (idxHstReg != UINT8_MAX)
3281 {
3282 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3283 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3284 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x\n",
3285 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
3286 }
3287 continue;
3288 }
3289 }
3290 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
3291 continue;
3292
3293 /* Irreconcilable, so drop it. */
3294 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
3295 if (idxHstReg != UINT8_MAX)
3296 {
3297 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3298 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3299 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x (also dropped)\n",
3300 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
3301 }
3302 Log11(("iemNativeEmitEndIf: Freeing variable #%u/%#x\n", idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
3303 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
3304 } while (fVars);
3305 }
3306 Assert(off == offAssert);
3307
3308 /*
3309 * Finally, check that the host register allocations matches.
3310 */
3311 AssertMsgStmt((pReNative->Core.bmHstRegs & (pReNative->Core.bmHstRegs ^ pOther->bmHstRegs)) == 0,
3312 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
3313 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
3314 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
3315 }
3316 }
3317
3318 /*
3319 * Define the endif label and maybe the else one if we're still in the 'if' part.
3320 */
3321 if (!fDefinedLabels)
3322 {
3323 if (!pEntry->fInElse)
3324 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
3325 else
3326 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
3327 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
3328 }
3329
3330 /* Pop the conditional stack.*/
3331 pReNative->cCondDepth -= 1;
3332
3333 return off;
3334}
3335
3336
3337/**
3338 * Helper function to convert X86_EFL_xxx masks to liveness masks.
3339 *
3340 * The compiler should be able to figure this out at compile time, so sprinkling
3341 * constexpr where ever possible here to nudge it along.
3342 */
3343template<uint32_t const a_fEfl>
3344RT_CONSTEXPR uint64_t iemNativeEflagsToLivenessMask(void)
3345{
3346 return (a_fEfl & ~X86_EFL_STATUS_BITS ? RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_OTHER) : 0)
3347 | (a_fEfl & X86_EFL_CF ? RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_CF) : 0)
3348 | (a_fEfl & X86_EFL_PF ? RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_PF) : 0)
3349 | (a_fEfl & X86_EFL_AF ? RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_AF) : 0)
3350 | (a_fEfl & X86_EFL_ZF ? RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_ZF) : 0)
3351 | (a_fEfl & X86_EFL_SF ? RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_SF) : 0)
3352 | (a_fEfl & X86_EFL_OF ? RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_OF) : 0);
3353}
3354
3355
3356/**
3357 * Helper function to convert a single X86_EFL_xxxx value to bit number.
3358 *
3359 * The compiler should be able to figure this out at compile time, so sprinkling
3360 * constexpr where ever possible here to nudge it along.
3361 */
3362template<uint32_t const a_fEfl>
3363RT_CONSTEXPR unsigned iemNativeEflagsToSingleBitNo(void)
3364{
3365 AssertCompile( a_fEfl == X86_EFL_CF
3366 || a_fEfl == X86_EFL_PF
3367 || a_fEfl == X86_EFL_AF
3368 || a_fEfl == X86_EFL_ZF
3369 || a_fEfl == X86_EFL_SF
3370 || a_fEfl == X86_EFL_OF
3371 || a_fEfl == X86_EFL_DF);
3372 return a_fEfl == X86_EFL_CF ? X86_EFL_CF_BIT
3373 : a_fEfl == X86_EFL_PF ? X86_EFL_PF_BIT
3374 : a_fEfl == X86_EFL_AF ? X86_EFL_AF_BIT
3375 : a_fEfl == X86_EFL_ZF ? X86_EFL_ZF_BIT
3376 : a_fEfl == X86_EFL_SF ? X86_EFL_SF_BIT
3377 : a_fEfl == X86_EFL_OF ? X86_EFL_OF_BIT
3378 : X86_EFL_DF_BIT;
3379}
3380
3381
3382#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
3383 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits), iemNativeEflagsToLivenessMask<a_fBits>()); \
3384 do {
3385
3386/** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
3387DECL_INLINE_THROW(uint32_t)
3388iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl, uint64_t fLivenessEflBits)
3389{
3390 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, fBitsInEfl);
3391 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitsInEfl);
3392 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3393
3394 /* Get the eflags. */
3395 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsReadOnly(pReNative, &off, fLivenessEflBits);
3396
3397 /* Test and jump. */
3398 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
3399
3400 /* Free but don't flush the EFlags register. */
3401 iemNativeRegFreeTmp(pReNative, idxEflReg);
3402
3403 /* Make a copy of the core state now as we start the if-block. */
3404 iemNativeCondStartIfBlock(pReNative, off);
3405
3406 return off;
3407}
3408
3409
3410#define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
3411 off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits), iemNativeEflagsToLivenessMask<a_fBits>()); \
3412 do {
3413
3414/** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
3415DECL_INLINE_THROW(uint32_t)
3416iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl, uint64_t fLivenessEflBits)
3417{
3418 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, fBitsInEfl);
3419 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitsInEfl);
3420 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3421
3422 /* Get the eflags. */
3423 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsReadOnly(pReNative, &off, fLivenessEflBits);
3424
3425 /* Test and jump. */
3426 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
3427
3428 /* Free but don't flush the EFlags register. */
3429 iemNativeRegFreeTmp(pReNative, idxEflReg);
3430
3431 /* Make a copy of the core state now as we start the if-block. */
3432 iemNativeCondStartIfBlock(pReNative, off);
3433
3434 return off;
3435}
3436
3437
3438#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
3439 off = iemNativeEmitIfEflagsBitSet(pReNative, off, iemNativeEflagsToSingleBitNo<a_fBit>(), \
3440 iemNativeEflagsToLivenessMask<a_fBit>()); \
3441 do {
3442
3443/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
3444DECL_INLINE_THROW(uint32_t)
3445iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, unsigned iBitNo, uint64_t fLivenessEflBit)
3446{
3447 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, RT_BIT_32(iBitNo));
3448 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, RT_BIT_32(iBitNo));
3449 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3450
3451 /* Get the eflags. */
3452 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsReadOnly(pReNative, &off, fLivenessEflBit);
3453
3454 /* Test and jump. */
3455 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
3456
3457 /* Free but don't flush the EFlags register. */
3458 iemNativeRegFreeTmp(pReNative, idxEflReg);
3459
3460 /* Make a copy of the core state now as we start the if-block. */
3461 iemNativeCondStartIfBlock(pReNative, off);
3462
3463 return off;
3464}
3465
3466
3467#define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
3468 off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, iemNativeEflagsToSingleBitNo<a_fBit>(), \
3469 iemNativeEflagsToLivenessMask<a_fBit>()); \
3470 do {
3471
3472/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
3473DECL_INLINE_THROW(uint32_t)
3474iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, unsigned iBitNo, uint64_t fLivenessEflBit)
3475{
3476 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, RT_BIT_32(iBitNo));
3477 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, RT_BIT_32(iBitNo));
3478 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3479
3480 /* Get the eflags. */
3481 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsReadOnly(pReNative, &off, fLivenessEflBit);
3482
3483 /* Test and jump. */
3484 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
3485
3486 /* Free but don't flush the EFlags register. */
3487 iemNativeRegFreeTmp(pReNative, idxEflReg);
3488
3489 /* Make a copy of the core state now as we start the if-block. */
3490 iemNativeCondStartIfBlock(pReNative, off);
3491
3492 return off;
3493}
3494
3495
3496#define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2) \
3497 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, false /*fInverted*/, \
3498 iemNativeEflagsToSingleBitNo<a_fBit1>(), \
3499 iemNativeEflagsToSingleBitNo<a_fBit2>(), \
3500 iemNativeEflagsToLivenessMask<a_fBit1 | a_fBit2>()); \
3501 do {
3502
3503#define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2) \
3504 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, true /*fInverted*/, \
3505 iemNativeEflagsToSingleBitNo<a_fBit1>(), \
3506 iemNativeEflagsToSingleBitNo<a_fBit2>(), \
3507 iemNativeEflagsToLivenessMask<a_fBit1 | a_fBit2>()); \
3508 do {
3509
3510/** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
3511DECL_INLINE_THROW(uint32_t)
3512iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3513 bool fInverted, unsigned iBitNo1, unsigned iBitNo2, uint64_t fLivenessEflBits)
3514{
3515 Assert(iBitNo1 != iBitNo2);
3516 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, RT_BIT_32(iBitNo1) | RT_BIT_32(iBitNo2));
3517 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, RT_BIT_32(iBitNo1) | RT_BIT_32(iBitNo2));
3518 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3519
3520 /* Get the eflags. */
3521 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsReadOnly(pReNative, &off, fLivenessEflBits);
3522
3523#ifdef RT_ARCH_AMD64
3524 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, RT_BIT_64(iBitNo1));
3525
3526 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
3527 if (iBitNo1 > iBitNo2)
3528 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
3529 else
3530 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
3531 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
3532
3533#elif defined(RT_ARCH_ARM64)
3534 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3535 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
3536
3537 /* and tmpreg, eflreg, #1<<iBitNo1 */
3538 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
3539
3540 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
3541 if (iBitNo1 > iBitNo2)
3542 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
3543 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
3544 else
3545 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
3546 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
3547
3548 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3549
3550#else
3551# error "Port me"
3552#endif
3553
3554 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
3555 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
3556 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
3557
3558 /* Free but don't flush the EFlags and tmp registers. */
3559 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3560 iemNativeRegFreeTmp(pReNative, idxEflReg);
3561
3562 /* Make a copy of the core state now as we start the if-block. */
3563 iemNativeCondStartIfBlock(pReNative, off);
3564
3565 return off;
3566}
3567
3568
3569#define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
3570 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, false /*fInverted*/, \
3571 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3572 iemNativeEflagsToSingleBitNo<a_fBit1>(), \
3573 iemNativeEflagsToSingleBitNo<a_fBit2>(), \
3574 iemNativeEflagsToLivenessMask<a_fBit | a_fBit1 | a_fBit2>()); \
3575 do {
3576
3577#define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
3578 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, true /*fInverted*/, \
3579 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3580 iemNativeEflagsToSingleBitNo<a_fBit1>(), \
3581 iemNativeEflagsToSingleBitNo<a_fBit2>(), \
3582 iemNativeEflagsToLivenessMask<a_fBit | a_fBit1 | a_fBit2>()); \
3583 do {
3584
3585/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
3586 * IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
3587DECL_INLINE_THROW(uint32_t)
3588iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fInverted,
3589 unsigned iBitNo, unsigned iBitNo1, unsigned iBitNo2, uint64_t fLivenessEflBits)
3590{
3591 Assert(iBitNo1 != iBitNo);
3592 Assert(iBitNo2 != iBitNo);
3593 Assert(iBitNo2 != iBitNo1);
3594 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, RT_BIT_32(iBitNo) | RT_BIT_32(iBitNo1) | RT_BIT_32(iBitNo2));
3595 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, RT_BIT_32(iBitNo) | RT_BIT_32(iBitNo1) | RT_BIT_32(iBitNo2));
3596 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3597
3598 /* We need an if-block label for the non-inverted variant. */
3599 uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
3600 pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
3601
3602 /* Get the eflags. */
3603 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsReadOnly(pReNative, &off, fLivenessEflBits);
3604
3605#ifdef RT_ARCH_AMD64
3606 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, RT_BIT_64(iBitNo1)); /* This must come before we jump anywhere! */
3607#elif defined(RT_ARCH_ARM64)
3608 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3609#endif
3610
3611 /* Check for the lone bit first. */
3612 if (!fInverted)
3613 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
3614 else
3615 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
3616
3617 /* Then extract and compare the other two bits. */
3618#ifdef RT_ARCH_AMD64
3619 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
3620 if (iBitNo1 > iBitNo2)
3621 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
3622 else
3623 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
3624 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
3625
3626#elif defined(RT_ARCH_ARM64)
3627 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
3628
3629 /* and tmpreg, eflreg, #1<<iBitNo1 */
3630 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
3631
3632 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
3633 if (iBitNo1 > iBitNo2)
3634 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
3635 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
3636 else
3637 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
3638 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
3639
3640 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3641
3642#else
3643# error "Port me"
3644#endif
3645
3646 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
3647 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
3648 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
3649
3650 /* Free but don't flush the EFlags and tmp registers. */
3651 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3652 iemNativeRegFreeTmp(pReNative, idxEflReg);
3653
3654 /* Make a copy of the core state now as we start the if-block. */
3655 iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
3656
3657 return off;
3658}
3659
3660
3661#define IEM_MC_IF_CX_IS_NZ() \
3662 off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
3663 do {
3664
3665/** Emits code for IEM_MC_IF_CX_IS_NZ. */
3666DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3667{
3668 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3669
3670 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3671 kIemNativeGstRegUse_ReadOnly);
3672 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
3673 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3674
3675 iemNativeCondStartIfBlock(pReNative, off);
3676 return off;
3677}
3678
3679
3680#define IEM_MC_IF_ECX_IS_NZ() \
3681 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
3682 do {
3683
3684#define IEM_MC_IF_RCX_IS_NZ() \
3685 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
3686 do {
3687
3688/** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
3689DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
3690{
3691 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3692
3693 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3694 kIemNativeGstRegUse_ReadOnly);
3695 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
3696 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3697
3698 iemNativeCondStartIfBlock(pReNative, off);
3699 return off;
3700}
3701
3702
3703#define IEM_MC_IF_CX_IS_NOT_ONE() \
3704 off = iemNativeEmitIfCxIsNotOne(pReNative, off); \
3705 do {
3706
3707/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE. */
3708DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3709{
3710 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3711
3712 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3713 kIemNativeGstRegUse_ReadOnly);
3714#ifdef RT_ARCH_AMD64
3715 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3716#else
3717 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3718 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
3719 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3720#endif
3721 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3722
3723 iemNativeCondStartIfBlock(pReNative, off);
3724 return off;
3725}
3726
3727
3728#define IEM_MC_IF_ECX_IS_NOT_ONE() \
3729 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, false /*f64Bit*/); \
3730 do {
3731
3732#define IEM_MC_IF_RCX_IS_NOT_ONE() \
3733 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, true /*f64Bit*/); \
3734 do {
3735
3736/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE and IEM_MC_IF_RCX_IS_NOT_ONE. */
3737DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
3738{
3739 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3740
3741 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3742 kIemNativeGstRegUse_ReadOnly);
3743 if (f64Bit)
3744 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3745 else
3746 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3747 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3748
3749 iemNativeCondStartIfBlock(pReNative, off);
3750 return off;
3751}
3752
3753
3754#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
3755 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, true /*fCheckIfSet*/, \
3756 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3757 iemNativeEflagsToLivenessMask<a_fBit>()); \
3758 do {
3759
3760#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3761 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, false /*fCheckIfSet*/, \
3762 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3763 iemNativeEflagsToLivenessMask<a_fBit>()); \
3764 do {
3765
3766/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET and
3767 * IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
3768DECL_INLINE_THROW(uint32_t)
3769iemNativeEmitIfCxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3770 bool fCheckIfSet, unsigned iBitNo, uint64_t fLivenessEflBit)
3771{
3772 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, RT_BIT_32(iBitNo));
3773 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, RT_BIT_32(iBitNo));
3774 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3775
3776 /* We have to load both RCX and EFLAGS before we can start branching,
3777 otherwise we'll end up in the else-block with an inconsistent
3778 register allocator state.
3779 Doing EFLAGS first as it's more likely to be loaded, right? */
3780 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsReadOnly(pReNative, &off, fLivenessEflBit);
3781 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3782 kIemNativeGstRegUse_ReadOnly);
3783
3784 /** @todo we could reduce this to a single branch instruction by spending a
3785 * temporary register and some setnz stuff. Not sure if loops are
3786 * worth it. */
3787 /* Check CX. */
3788#ifdef RT_ARCH_AMD64
3789 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3790#else
3791 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3792 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
3793 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3794#endif
3795
3796 /* Check the EFlags bit. */
3797 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
3798 !fCheckIfSet /*fJmpIfSet*/);
3799
3800 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3801 iemNativeRegFreeTmp(pReNative, idxEflReg);
3802
3803 iemNativeCondStartIfBlock(pReNative, off);
3804 return off;
3805}
3806
3807
3808#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
3809 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, true /*fCheckIfSet*/, false /*f64Bit*/, \
3810 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3811 iemNativeEflagsToLivenessMask<a_fBit>()); \
3812 do {
3813
3814#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3815 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, false /*fCheckIfSet*/, false /*f64Bit*/, \
3816 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3817 iemNativeEflagsToLivenessMask<a_fBit>()); \
3818 do {
3819
3820#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
3821 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, true /*fCheckIfSet*/, true /*f64Bit*/, \
3822 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3823 iemNativeEflagsToLivenessMask<a_fBit>()); \
3824 do {
3825
3826#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3827 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, false /*fCheckIfSet*/, true /*f64Bit*/, \
3828 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3829 iemNativeEflagsToLivenessMask<a_fBit>()); \
3830 do {
3831
3832/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET,
3833 * IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET,
3834 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET and
3835 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
3836DECL_INLINE_THROW(uint32_t)
3837iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fCheckIfSet, bool f64Bit,
3838 unsigned iBitNo, uint64_t fLivenessEFlBit)
3839
3840{
3841 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, RT_BIT_32(iBitNo));
3842 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, RT_BIT_32(iBitNo));
3843 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3844
3845 /* We have to load both RCX and EFLAGS before we can start branching,
3846 otherwise we'll end up in the else-block with an inconsistent
3847 register allocator state.
3848 Doing EFLAGS first as it's more likely to be loaded, right? */
3849 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsReadOnly(pReNative, &off, fLivenessEFlBit);
3850 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3851 kIemNativeGstRegUse_ReadOnly);
3852
3853 /** @todo we could reduce this to a single branch instruction by spending a
3854 * temporary register and some setnz stuff. Not sure if loops are
3855 * worth it. */
3856 /* Check RCX/ECX. */
3857 if (f64Bit)
3858 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3859 else
3860 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3861
3862 /* Check the EFlags bit. */
3863 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
3864 !fCheckIfSet /*fJmpIfSet*/);
3865
3866 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3867 iemNativeRegFreeTmp(pReNative, idxEflReg);
3868
3869 iemNativeCondStartIfBlock(pReNative, off);
3870 return off;
3871}
3872
3873
3874#define IEM_MC_IF_LOCAL_IS_Z(a_Local) \
3875 off = iemNativeEmitIfLocalIsZ(pReNative, off, a_Local); \
3876 do {
3877
3878/** Emits code for IEM_MC_IF_LOCAL_IS_Z. */
3879DECL_INLINE_THROW(uint32_t)
3880iemNativeEmitIfLocalIsZ(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarLocal)
3881{
3882 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3883
3884 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarLocal);
3885 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarLocal)];
3886 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
3887 AssertStmt(pVarRc->cbVar == sizeof(int32_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
3888
3889 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarLocal, &off);
3890
3891 off = iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(pReNative, off, idxReg, false /*f64Bit*/, pEntry->idxLabelElse);
3892
3893 iemNativeVarRegisterRelease(pReNative, idxVarLocal);
3894
3895 iemNativeCondStartIfBlock(pReNative, off);
3896 return off;
3897}
3898
3899
3900#define IEM_MC_IF_GREG_BIT_SET(a_iGReg, a_iBitNo) \
3901 off = iemNativeEmitIfGregBitSet(pReNative, off, a_iGReg, a_iBitNo); \
3902 do {
3903
3904/** Emits code for IEM_MC_IF_GREG_BIT_SET. */
3905DECL_INLINE_THROW(uint32_t)
3906iemNativeEmitIfGregBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t iBitNo)
3907{
3908 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3909 Assert(iGReg < 16);
3910
3911 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3912 kIemNativeGstRegUse_ReadOnly);
3913
3914 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxGstFullReg, iBitNo, pEntry->idxLabelElse);
3915
3916 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3917
3918 iemNativeCondStartIfBlock(pReNative, off);
3919 return off;
3920}
3921
3922
3923
3924/*********************************************************************************************************************************
3925* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
3926*********************************************************************************************************************************/
3927
3928#define IEM_MC_NOREF(a_Name) \
3929 RT_NOREF_PV(a_Name)
3930
3931#define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
3932 uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
3933
3934#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
3935 uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
3936
3937#define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_Local, a_iArg) \
3938 uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
3939
3940#define IEM_MC_LOCAL(a_Type, a_Name) \
3941 uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
3942
3943#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
3944 uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
3945
3946#define IEM_MC_LOCAL_ASSIGN(a_Type, a_Name, a_Value) \
3947 uint8_t const a_Name = iemNativeVarAllocAssign(pReNative, &off, sizeof(a_Type), (a_Value))
3948
3949
3950/**
3951 * Sets the host register for @a idxVarRc to @a idxReg.
3952 *
3953 * Any guest register shadowing will be implictly dropped by this call.
3954 *
3955 * The variable must not have any register associated with it (causes
3956 * VERR_IEM_VAR_IPE_10 to be raised). Conversion to a stack variable is
3957 * implied.
3958 *
3959 * @returns idxReg
3960 * @param pReNative The recompiler state.
3961 * @param idxVar The variable.
3962 * @param idxReg The host register (typically IEMNATIVE_CALL_RET_GREG).
3963 * @param off For recording in debug info.
3964 * @param fAllocated Set if the register is already allocated, false if not.
3965 *
3966 * @throws VERR_IEM_VAR_IPE_10, VERR_IEM_VAR_IPE_11
3967 */
3968DECL_INLINE_THROW(uint8_t)
3969iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off, bool fAllocated)
3970{
3971 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3972 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3973 Assert(!pVar->fRegAcquired);
3974 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3975 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_10));
3976 AssertStmt(RT_BOOL(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)) == fAllocated,
3977 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_11));
3978
3979 iemNativeRegClearGstRegShadowing(pReNative, idxReg, off);
3980 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
3981
3982 iemNativeVarSetKindToStack(pReNative, idxVar);
3983 pVar->idxReg = idxReg;
3984
3985 return idxReg;
3986}
3987
3988
3989/**
3990 * A convenient helper function.
3991 */
3992DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSetAndAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
3993 uint8_t idxReg, uint32_t *poff)
3994{
3995 idxReg = iemNativeVarRegisterSet(pReNative, idxVar, idxReg, *poff, false /*fAllocated*/);
3996 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fRegAcquired = true;
3997 return idxReg;
3998}
3999
4000
4001/**
4002 * This is called by IEM_MC_END() to clean up all variables.
4003 */
4004DECL_FORCE_INLINE(void) iemNativeVarFreeAll(PIEMRECOMPILERSTATE pReNative)
4005{
4006 uint32_t const bmVars = pReNative->Core.bmVars;
4007 if (bmVars != 0)
4008 iemNativeVarFreeAllSlow(pReNative, bmVars);
4009 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
4010 Assert(pReNative->Core.bmStack == 0);
4011}
4012
4013
4014#define IEM_MC_FREE_LOCAL(a_Name) iemNativeVarFreeLocal(pReNative, a_Name)
4015
4016/**
4017 * This is called by IEM_MC_FREE_LOCAL.
4018 */
4019DECLINLINE(void) iemNativeVarFreeLocal(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
4020{
4021 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4022 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo == UINT8_MAX);
4023 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
4024}
4025
4026
4027#define IEM_MC_FREE_ARG(a_Name) iemNativeVarFreeArg(pReNative, a_Name)
4028
4029/**
4030 * This is called by IEM_MC_FREE_ARG.
4031 */
4032DECLINLINE(void) iemNativeVarFreeArg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
4033{
4034 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4035 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars));
4036 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
4037}
4038
4039
4040#define IEM_MC_ASSIGN_TO_SMALLER(a_VarDst, a_VarSrcEol) off = iemNativeVarAssignToSmaller(pReNative, off, a_VarDst, a_VarSrcEol)
4041
4042/**
4043 * This is called by IEM_MC_ASSIGN_TO_SMALLER.
4044 */
4045DECL_INLINE_THROW(uint32_t)
4046iemNativeVarAssignToSmaller(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarDst, uint8_t idxVarSrc)
4047{
4048 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarDst);
4049 PIEMNATIVEVAR const pVarDst = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarDst)];
4050 AssertStmt(pVarDst->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4051 Assert( pVarDst->cbVar == sizeof(uint16_t)
4052 || pVarDst->cbVar == sizeof(uint32_t));
4053
4054 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarSrc);
4055 PIEMNATIVEVAR const pVarSrc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarSrc)];
4056 AssertStmt( pVarSrc->enmKind == kIemNativeVarKind_Stack
4057 || pVarSrc->enmKind == kIemNativeVarKind_Immediate,
4058 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4059
4060 Assert(pVarDst->cbVar < pVarSrc->cbVar);
4061
4062 /*
4063 * Special case for immediates.
4064 */
4065 if (pVarSrc->enmKind == kIemNativeVarKind_Immediate)
4066 {
4067 switch (pVarDst->cbVar)
4068 {
4069 case sizeof(uint16_t):
4070 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint16_t)pVarSrc->u.uValue);
4071 break;
4072 case sizeof(uint32_t):
4073 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint32_t)pVarSrc->u.uValue);
4074 break;
4075 default: AssertFailed(); break;
4076 }
4077 }
4078 else
4079 {
4080 /*
4081 * The generic solution for now.
4082 */
4083 /** @todo optimize this by having the python script make sure the source
4084 * variable passed to IEM_MC_ASSIGN_TO_SMALLER is not used after the
4085 * statement. Then we could just transfer the register assignments. */
4086 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off);
4087 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off);
4088 switch (pVarDst->cbVar)
4089 {
4090 case sizeof(uint16_t):
4091 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegDst, idxRegSrc);
4092 break;
4093 case sizeof(uint32_t):
4094 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegDst, idxRegSrc);
4095 break;
4096 default: AssertFailed(); break;
4097 }
4098 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
4099 iemNativeVarRegisterRelease(pReNative, idxVarDst);
4100 }
4101 return off;
4102}
4103
4104
4105
4106/*********************************************************************************************************************************
4107* Emitters for IEM_MC_CALL_CIMPL_XXX *
4108*********************************************************************************************************************************/
4109
4110/** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
4111DECL_INLINE_THROW(uint32_t)
4112iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
4113 uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cArgs)
4114
4115{
4116 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, X86_EFL_STATUS_BITS);
4117 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
4118
4119#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4120 /* Clear the appropriate IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_XXX flags
4121 when a calls clobber any of the relevant control registers. */
4122# if 1
4123 if (!(fGstShwFlush & (RT_BIT_64(kIemNativeGstReg_Cr0) | RT_BIT_64(kIemNativeGstReg_Cr4) | RT_BIT_64(kIemNativeGstReg_Xcr0))))
4124 {
4125 /* Likely as long as call+ret are done via cimpl. */
4126 Assert( /*pfnCImpl != (uintptr_t)iemCImpl_mov_Cd_Rd && pfnCImpl != (uintptr_t)iemCImpl_xsetbv
4127 &&*/ pfnCImpl != (uintptr_t)iemCImpl_lmsw && pfnCImpl != (uintptr_t)iemCImpl_clts);
4128 }
4129 else if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_Xcr0))
4130 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
4131 else if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_Cr4))
4132 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
4133 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE);
4134 else
4135 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
4136 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
4137 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
4138
4139# else
4140 if (pfnCImpl == (uintptr_t)iemCImpl_xsetbv) /* Modifies xcr0 which only the AVX check uses. */
4141 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
4142 else if (pfnCImpl == (uintptr_t)iemCImpl_mov_Cd_Rd) /* Can modify cr4 which all checks use. */
4143 pReNative->fSimdRaiseXcptChecksEmitted = 0;
4144 else if ( pfnCImpl == (uintptr_t)iemCImpl_FarJmp
4145 || pfnCImpl == (uintptr_t)iemCImpl_callf
4146 || pfnCImpl == (uintptr_t)iemCImpl_lmsw
4147 || pfnCImpl == (uintptr_t)iemCImpl_clts) /* Will only modify cr0 */
4148 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
4149 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
4150 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
4151# endif
4152
4153# ifdef IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS
4154 /* Mark the host floating point control register as not synced if MXCSR is modified. */
4155 if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_MxCsr))
4156 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SYNCED;
4157# endif
4158#endif
4159
4160 /*
4161 * Do all the call setup and cleanup.
4162 */
4163 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
4164
4165 /*
4166 * Load the two or three hidden arguments.
4167 */
4168#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
4169 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
4170 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
4171 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
4172#else
4173 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
4174 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
4175#endif
4176
4177 /*
4178 * Make the call and check the return code.
4179 *
4180 * Shadow PC copies are always flushed here, other stuff depends on flags.
4181 * Segment and general purpose registers are explictily flushed via the
4182 * IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG and IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG
4183 * macros.
4184 */
4185 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
4186#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
4187 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
4188#endif
4189 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, fGstShwFlush | RT_BIT_64(kIemNativeGstReg_Pc));
4190 if (!(pReNative->fMc & IEM_MC_F_WITHOUT_FLAGS)) /** @todo We don't emit with-flags/without-flags variations for CIMPL calls. */
4191 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
4192 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
4193
4194#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
4195 pReNative->Core.fDebugPcInitialized = false;
4196 Log4(("fDebugPcInitialized=false cimpl off=%#x (v1)\n", off));
4197#endif
4198
4199 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
4200}
4201
4202
4203#define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
4204 off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0)
4205
4206/** Emits code for IEM_MC_CALL_CIMPL_1. */
4207DECL_INLINE_THROW(uint32_t)
4208iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
4209 uintptr_t pfnCImpl, uint8_t idxArg0)
4210{
4211 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
4212 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 1);
4213}
4214
4215
4216#define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
4217 off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0, a1)
4218
4219/** Emits code for IEM_MC_CALL_CIMPL_2. */
4220DECL_INLINE_THROW(uint32_t)
4221iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
4222 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
4223{
4224 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
4225 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
4226 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 2);
4227}
4228
4229
4230#define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
4231 off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
4232 (uintptr_t)a_pfnCImpl, a0, a1, a2)
4233
4234/** Emits code for IEM_MC_CALL_CIMPL_3. */
4235DECL_INLINE_THROW(uint32_t)
4236iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
4237 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
4238{
4239 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
4240 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
4241 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
4242 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 3);
4243}
4244
4245
4246#define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3) \
4247 off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
4248 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
4249
4250/** Emits code for IEM_MC_CALL_CIMPL_4. */
4251DECL_INLINE_THROW(uint32_t)
4252iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
4253 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
4254{
4255 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
4256 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
4257 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
4258 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
4259 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 4);
4260}
4261
4262
4263#define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3, a4) \
4264 off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
4265 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
4266
4267/** Emits code for IEM_MC_CALL_CIMPL_4. */
4268DECL_INLINE_THROW(uint32_t)
4269iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
4270 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
4271{
4272 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
4273 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
4274 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
4275 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
4276 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg4, 4 + IEM_CIMPL_HIDDEN_ARGS);
4277 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 5);
4278}
4279
4280
4281/** Recompiler debugging: Flush guest register shadow copies. */
4282#define IEM_MC_HINT_FLUSH_GUEST_SHADOW(g_fGstShwFlush) iemNativeRegFlushGuestShadows(pReNative, g_fGstShwFlush)
4283
4284
4285
4286/*********************************************************************************************************************************
4287* Emitters for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX *
4288*********************************************************************************************************************************/
4289
4290/**
4291 * Common worker for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX.
4292 */
4293DECL_INLINE_THROW(uint32_t)
4294iemNativeEmitCallAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
4295 uintptr_t pfnAImpl, uint8_t cArgs)
4296{
4297 if (idxVarRc != UINT8_MAX)
4298 {
4299 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRc);
4300 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarRc)];
4301 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
4302 AssertStmt(pVarRc->cbVar <= sizeof(uint64_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
4303 }
4304
4305 /*
4306 * Do all the call setup and cleanup.
4307 *
4308 * It is only required to flush pending guest register writes in call volatile registers as
4309 * assembly helpers can't throw and don't access anything living in CPUMCTX, they only
4310 * access parameters. The flushing of call volatile registers is always done in iemNativeEmitCallCommon()
4311 * no matter the fFlushPendingWrites parameter.
4312 */
4313 off = iemNativeEmitCallCommon(pReNative, off, cArgs, 0 /*cHiddenArgs*/, false /*fFlushPendingWrites*/);
4314
4315 /*
4316 * Make the call and update the return code variable if we've got one.
4317 */
4318 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnAImpl);
4319 if (idxVarRc != UINT8_MAX)
4320 iemNativeVarRegisterSet(pReNative, idxVarRc, IEMNATIVE_CALL_RET_GREG, off, false /*fAllocated*/);
4321
4322 return off;
4323}
4324
4325
4326
4327#define IEM_MC_CALL_VOID_AIMPL_0(a_pfn) \
4328 off = iemNativeEmitCallAImpl0(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn))
4329
4330#define IEM_MC_CALL_AIMPL_0(a_rc, a_pfn) \
4331 off = iemNativeEmitCallAImpl0(pReNative, off, a_rc, (uintptr_t)(a_pfn))
4332
4333/** Emits code for IEM_MC_CALL_VOID_AIMPL_0 and IEM_MC_CALL_AIMPL_0. */
4334DECL_INLINE_THROW(uint32_t)
4335iemNativeEmitCallAImpl0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl)
4336{
4337 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 0);
4338}
4339
4340
4341#define IEM_MC_CALL_VOID_AIMPL_1(a_pfn, a0) \
4342 off = iemNativeEmitCallAImpl1(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0)
4343
4344#define IEM_MC_CALL_AIMPL_1(a_rc, a_pfn, a0) \
4345 off = iemNativeEmitCallAImpl1(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0)
4346
4347/** Emits code for IEM_MC_CALL_VOID_AIMPL_1 and IEM_MC_CALL_AIMPL_1. */
4348DECL_INLINE_THROW(uint32_t)
4349iemNativeEmitCallAImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl, uint8_t idxArg0)
4350{
4351 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
4352 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 1);
4353}
4354
4355
4356#define IEM_MC_CALL_VOID_AIMPL_2(a_pfn, a0, a1) \
4357 off = iemNativeEmitCallAImpl2(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1)
4358
4359#define IEM_MC_CALL_AIMPL_2(a_rc, a_pfn, a0, a1) \
4360 off = iemNativeEmitCallAImpl2(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1)
4361
4362/** Emits code for IEM_MC_CALL_VOID_AIMPL_2 and IEM_MC_CALL_AIMPL_2. */
4363DECL_INLINE_THROW(uint32_t)
4364iemNativeEmitCallAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
4365 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
4366{
4367 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
4368 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
4369 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 2);
4370}
4371
4372
4373#define IEM_MC_CALL_VOID_AIMPL_3(a_pfn, a0, a1, a2) \
4374 off = iemNativeEmitCallAImpl3(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2)
4375
4376#define IEM_MC_CALL_AIMPL_3(a_rcType, a_rc, a_pfn, a0, a1, a2) \
4377 IEM_MC_LOCAL(a_rcType, a_rc); \
4378 off = iemNativeEmitCallAImpl3(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2)
4379
4380/** Emits code for IEM_MC_CALL_VOID_AIMPL_3 and IEM_MC_CALL_AIMPL_3. */
4381DECL_INLINE_THROW(uint32_t)
4382iemNativeEmitCallAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
4383 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
4384{
4385 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
4386 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
4387 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
4388 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 3);
4389}
4390
4391
4392#define IEM_MC_CALL_VOID_AIMPL_4(a_pfn, a0, a1, a2, a3) \
4393 off = iemNativeEmitCallAImpl4(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2, a3)
4394
4395#define IEM_MC_CALL_AIMPL_4(a_rcType, a_rc, a_pfn, a0, a1, a2, a3) \
4396 IEM_MC_LOCAL(a_rcType, a_rc); \
4397 off = iemNativeEmitCallAImpl4(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2, a3)
4398
4399/** Emits code for IEM_MC_CALL_VOID_AIMPL_4 and IEM_MC_CALL_AIMPL_4. */
4400DECL_INLINE_THROW(uint32_t)
4401iemNativeEmitCallAImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
4402 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
4403{
4404 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
4405 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
4406 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
4407 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3);
4408 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 4);
4409}
4410
4411
4412
4413/*********************************************************************************************************************************
4414* Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX). *
4415*********************************************************************************************************************************/
4416
4417#define IEM_MC_FETCH_GREG_U8_THREADED(a_u8Dst, a_iGRegEx) \
4418 off = iemNativeEmitFetchGregU8(pReNative, off, a_u8Dst, a_iGRegEx, sizeof(uint8_t) /*cbZeroExtended*/)
4419
4420#define IEM_MC_FETCH_GREG_U8_ZX_U16_THREADED(a_u16Dst, a_iGRegEx) \
4421 off = iemNativeEmitFetchGregU8(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t) /*cbZeroExtended*/)
4422
4423#define IEM_MC_FETCH_GREG_U8_ZX_U32_THREADED(a_u32Dst, a_iGRegEx) \
4424 off = iemNativeEmitFetchGregU8(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t) /*cbZeroExtended*/)
4425
4426#define IEM_MC_FETCH_GREG_U8_ZX_U64_THREADED(a_u64Dst, a_iGRegEx) \
4427 off = iemNativeEmitFetchGregU8(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t) /*cbZeroExtended*/)
4428
4429
4430/** Emits code for IEM_MC_FETCH_GREG_U8_THREADED and
4431 * IEM_MC_FETCH_GREG_U8_ZX_U16/32/64_THREADED. */
4432DECL_INLINE_THROW(uint32_t)
4433iemNativeEmitFetchGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, int8_t cbZeroExtended)
4434{
4435 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4436 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
4437 Assert(iGRegEx < 20);
4438
4439 /* Same discussion as in iemNativeEmitFetchGregU16 */
4440 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
4441 kIemNativeGstRegUse_ReadOnly);
4442
4443 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4444 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4445
4446 /* The value is zero-extended to the full 64-bit host register width. */
4447 if (iGRegEx < 16)
4448 off = iemNativeEmitLoadGprFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
4449 else
4450 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
4451
4452 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4453 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4454 return off;
4455}
4456
4457
4458#define IEM_MC_FETCH_GREG_U8_SX_U16_THREADED(a_u16Dst, a_iGRegEx) \
4459 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t))
4460
4461#define IEM_MC_FETCH_GREG_U8_SX_U32_THREADED(a_u32Dst, a_iGRegEx) \
4462 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t))
4463
4464#define IEM_MC_FETCH_GREG_U8_SX_U64_THREADED(a_u64Dst, a_iGRegEx) \
4465 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t))
4466
4467/** Emits code for IEM_MC_FETCH_GREG_U8_SX_U16/32/64_THREADED. */
4468DECL_INLINE_THROW(uint32_t)
4469iemNativeEmitFetchGregU8Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, uint8_t cbSignExtended)
4470{
4471 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4472 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
4473 Assert(iGRegEx < 20);
4474
4475 /* Same discussion as in iemNativeEmitFetchGregU16 */
4476 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
4477 kIemNativeGstRegUse_ReadOnly);
4478
4479 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4480 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4481
4482 if (iGRegEx < 16)
4483 {
4484 switch (cbSignExtended)
4485 {
4486 case sizeof(uint16_t):
4487 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
4488 break;
4489 case sizeof(uint32_t):
4490 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
4491 break;
4492 case sizeof(uint64_t):
4493 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
4494 break;
4495 default: AssertFailed(); break;
4496 }
4497 }
4498 else
4499 {
4500 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
4501 switch (cbSignExtended)
4502 {
4503 case sizeof(uint16_t):
4504 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
4505 break;
4506 case sizeof(uint32_t):
4507 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
4508 break;
4509 case sizeof(uint64_t):
4510 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
4511 break;
4512 default: AssertFailed(); break;
4513 }
4514 }
4515
4516 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4517 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4518 return off;
4519}
4520
4521
4522
4523#define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
4524 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint16_t))
4525
4526#define IEM_MC_FETCH_GREG_U16_ZX_U32(a_u16Dst, a_iGReg) \
4527 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
4528
4529#define IEM_MC_FETCH_GREG_U16_ZX_U64(a_u16Dst, a_iGReg) \
4530 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
4531
4532/** Emits code for IEM_MC_FETCH_GREG_U16 and IEM_MC_FETCH_GREG_U16_ZX_U32/64. */
4533DECL_INLINE_THROW(uint32_t)
4534iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
4535{
4536 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4537 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
4538 Assert(iGReg < 16);
4539
4540 /*
4541 * We can either just load the low 16-bit of the GPR into a host register
4542 * for the variable, or we can do so via a shadow copy host register. The
4543 * latter will avoid having to reload it if it's being stored later, but
4544 * will waste a host register if it isn't touched again. Since we don't
4545 * know what going to happen, we choose the latter for now.
4546 */
4547 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4548 kIemNativeGstRegUse_ReadOnly);
4549
4550 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4551 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4552 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4553 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4554
4555 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4556 return off;
4557}
4558
4559#define IEM_MC_FETCH_GREG_I16(a_i16Dst, a_iGReg) \
4560 off = iemNativeEmitFetchGregI16(pReNative, off, a_i16Dst, a_iGReg)
4561
4562/** Emits code for IEM_MC_FETCH_GREG_I16. */
4563DECL_INLINE_THROW(uint32_t)
4564iemNativeEmitFetchGregI16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
4565{
4566 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4567 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(int16_t));
4568 Assert(iGReg < 16);
4569
4570 /*
4571 * We can either just load the low 16-bit of the GPR into a host register
4572 * for the variable, or we can do so via a shadow copy host register. The
4573 * latter will avoid having to reload it if it's being stored later, but
4574 * will waste a host register if it isn't touched again. Since we don't
4575 * know what going to happen, we choose the latter for now.
4576 */
4577 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4578 kIemNativeGstRegUse_ReadOnly);
4579
4580 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4581 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4582#ifdef RT_ARCH_AMD64
4583 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4584#elif defined(RT_ARCH_ARM64) /* Note! There are no 16-bit registers on ARM, we emulate that through 32-bit registers which requires sign extension. */
4585 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4586#endif
4587 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4588
4589 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4590 return off;
4591}
4592
4593
4594#define IEM_MC_FETCH_GREG_U16_SX_U32(a_u16Dst, a_iGReg) \
4595 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
4596
4597#define IEM_MC_FETCH_GREG_U16_SX_U64(a_u16Dst, a_iGReg) \
4598 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
4599
4600/** Emits code for IEM_MC_FETCH_GREG_U16_SX_U32/64. */
4601DECL_INLINE_THROW(uint32_t)
4602iemNativeEmitFetchGregU16Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbSignExtended)
4603{
4604 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4605 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
4606 Assert(iGReg < 16);
4607
4608 /*
4609 * We can either just load the low 16-bit of the GPR into a host register
4610 * for the variable, or we can do so via a shadow copy host register. The
4611 * latter will avoid having to reload it if it's being stored later, but
4612 * will waste a host register if it isn't touched again. Since we don't
4613 * know what going to happen, we choose the latter for now.
4614 */
4615 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4616 kIemNativeGstRegUse_ReadOnly);
4617
4618 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4619 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4620 if (cbSignExtended == sizeof(uint32_t))
4621 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4622 else
4623 {
4624 Assert(cbSignExtended == sizeof(uint64_t));
4625 off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4626 }
4627 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4628
4629 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4630 return off;
4631}
4632
4633
4634#define IEM_MC_FETCH_GREG_I32(a_i32Dst, a_iGReg) \
4635 off = iemNativeEmitFetchGregU32(pReNative, off, a_i32Dst, a_iGReg, sizeof(uint32_t))
4636
4637#define IEM_MC_FETCH_GREG_U32(a_u32Dst, a_iGReg) \
4638 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint32_t))
4639
4640#define IEM_MC_FETCH_GREG_U32_ZX_U64(a_u32Dst, a_iGReg) \
4641 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint64_t))
4642
4643/** Emits code for IEM_MC_FETCH_GREG_U32. */
4644DECL_INLINE_THROW(uint32_t)
4645iemNativeEmitFetchGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
4646{
4647 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4648 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
4649 Assert(iGReg < 16);
4650
4651 /*
4652 * We can either just load the low 16-bit of the GPR into a host register
4653 * for the variable, or we can do so via a shadow copy host register. The
4654 * latter will avoid having to reload it if it's being stored later, but
4655 * will waste a host register if it isn't touched again. Since we don't
4656 * know what going to happen, we choose the latter for now.
4657 */
4658 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4659 kIemNativeGstRegUse_ReadOnly);
4660
4661 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4662 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4663 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
4664 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4665
4666 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4667 return off;
4668}
4669
4670
4671#define IEM_MC_FETCH_GREG_U32_SX_U64(a_u32Dst, a_iGReg) \
4672 off = iemNativeEmitFetchGregU32SxU64(pReNative, off, a_u32Dst, a_iGReg)
4673
4674/** Emits code for IEM_MC_FETCH_GREG_U32. */
4675DECL_INLINE_THROW(uint32_t)
4676iemNativeEmitFetchGregU32SxU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
4677{
4678 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4679 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
4680 Assert(iGReg < 16);
4681
4682 /*
4683 * We can either just load the low 32-bit of the GPR into a host register
4684 * for the variable, or we can do so via a shadow copy host register. The
4685 * latter will avoid having to reload it if it's being stored later, but
4686 * will waste a host register if it isn't touched again. Since we don't
4687 * know what going to happen, we choose the latter for now.
4688 */
4689 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4690 kIemNativeGstRegUse_ReadOnly);
4691
4692 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4693 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4694 off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
4695 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4696
4697 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4698 return off;
4699}
4700
4701
4702#define IEM_MC_FETCH_GREG_U64(a_u64Dst, a_iGReg) \
4703 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
4704
4705#define IEM_MC_FETCH_GREG_U64_ZX_U64(a_u64Dst, a_iGReg) \
4706 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
4707
4708/** Emits code for IEM_MC_FETCH_GREG_U64 (and the
4709 * IEM_MC_FETCH_GREG_U64_ZX_U64 alias). */
4710DECL_INLINE_THROW(uint32_t)
4711iemNativeEmitFetchGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
4712{
4713 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4714 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
4715 Assert(iGReg < 16);
4716
4717 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4718 kIemNativeGstRegUse_ReadOnly);
4719
4720 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4721 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4722 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxVarReg, idxGstFullReg);
4723 /** @todo name the register a shadow one already? */
4724 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4725
4726 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4727 return off;
4728}
4729
4730
4731#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4732#define IEM_MC_FETCH_GREG_PAIR_U64(a_u128Dst, a_iGRegLo, a_iGRegHi) \
4733 off = iemNativeEmitFetchGregPairU64(pReNative, off, a_u128Dst, a_iGRegLo, a_iGRegHi)
4734
4735/** Emits code for IEM_MC_FETCH_GREG_PAIR_U64. */
4736DECL_INLINE_THROW(uint32_t)
4737iemNativeEmitFetchGregPairU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegLo, uint8_t iGRegHi)
4738{
4739 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4740 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
4741 Assert(iGRegLo < 16 && iGRegHi < 16);
4742
4743 uint8_t const idxGstFullRegLo = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegLo),
4744 kIemNativeGstRegUse_ReadOnly);
4745 uint8_t const idxGstFullRegHi = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegHi),
4746 kIemNativeGstRegUse_ReadOnly);
4747
4748 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4749 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
4750 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxVarReg, idxGstFullRegLo, 0);
4751 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxVarReg, idxGstFullRegHi, 1);
4752
4753 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
4754 iemNativeRegFreeTmp(pReNative, idxGstFullRegLo);
4755 iemNativeRegFreeTmp(pReNative, idxGstFullRegHi);
4756 return off;
4757}
4758#endif
4759
4760
4761/*********************************************************************************************************************************
4762* Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX). *
4763*********************************************************************************************************************************/
4764
4765#define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
4766 off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
4767
4768/** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
4769DECL_INLINE_THROW(uint32_t)
4770iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
4771{
4772 Assert(iGRegEx < 20);
4773 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
4774 kIemNativeGstRegUse_ForUpdate);
4775#ifdef RT_ARCH_AMD64
4776 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
4777
4778 /* To the lowest byte of the register: mov r8, imm8 */
4779 if (iGRegEx < 16)
4780 {
4781 if (idxGstTmpReg >= 8)
4782 pbCodeBuf[off++] = X86_OP_REX_B;
4783 else if (idxGstTmpReg >= 4)
4784 pbCodeBuf[off++] = X86_OP_REX;
4785 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
4786 pbCodeBuf[off++] = u8Value;
4787 }
4788 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
4789 else if (idxGstTmpReg < 4)
4790 {
4791 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
4792 pbCodeBuf[off++] = u8Value;
4793 }
4794 else
4795 {
4796 /* ror reg64, 8 */
4797 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4798 pbCodeBuf[off++] = 0xc1;
4799 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4800 pbCodeBuf[off++] = 8;
4801
4802 /* mov reg8, imm8 */
4803 if (idxGstTmpReg >= 8)
4804 pbCodeBuf[off++] = X86_OP_REX_B;
4805 else if (idxGstTmpReg >= 4)
4806 pbCodeBuf[off++] = X86_OP_REX;
4807 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
4808 pbCodeBuf[off++] = u8Value;
4809
4810 /* rol reg64, 8 */
4811 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4812 pbCodeBuf[off++] = 0xc1;
4813 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4814 pbCodeBuf[off++] = 8;
4815 }
4816
4817#elif defined(RT_ARCH_ARM64)
4818 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
4819 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4820 if (iGRegEx < 16)
4821 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
4822 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
4823 else
4824 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
4825 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
4826 iemNativeRegFreeTmp(pReNative, idxImmReg);
4827
4828#else
4829# error "Port me!"
4830#endif
4831
4832 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4833
4834#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4835 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
4836#endif
4837
4838 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4839 return off;
4840}
4841
4842
4843#define IEM_MC_STORE_GREG_U8_THREADED(a_iGRegEx, a_u8Value) \
4844 off = iemNativeEmitStoreGregU8(pReNative, off, a_iGRegEx, a_u8Value)
4845
4846/** Emits code for IEM_MC_STORE_GREG_U8_THREADED. */
4847DECL_INLINE_THROW(uint32_t)
4848iemNativeEmitStoreGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t idxValueVar)
4849{
4850 Assert(iGRegEx < 20);
4851 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
4852
4853 /*
4854 * If it's a constant value (unlikely) we treat this as a
4855 * IEM_MC_STORE_GREG_U8_CONST statement.
4856 */
4857 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
4858 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
4859 { /* likely */ }
4860 else
4861 {
4862 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
4863 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4864 return iemNativeEmitStoreGregU8Const(pReNative, off, iGRegEx, (uint8_t)pValueVar->u.uValue);
4865 }
4866
4867 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
4868 kIemNativeGstRegUse_ForUpdate);
4869 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxValueVar, &off);
4870
4871#ifdef RT_ARCH_AMD64
4872 /* To the lowest byte of the register: mov reg8, reg8(r/m) */
4873 if (iGRegEx < 16)
4874 {
4875 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4876 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
4877 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
4878 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
4879 pbCodeBuf[off++] = X86_OP_REX;
4880 pbCodeBuf[off++] = 0x8a;
4881 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
4882 }
4883 /* Otherwise it's to ah, ch, dh or bh from al, cl, dl or bl: use mov r8, r8 if we can, otherwise, we rotate. */
4884 else if (idxGstTmpReg < 4 && idxVarReg < 4)
4885 {
4886 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2+1);
4887 pbCodeBuf[off++] = 0x8a;
4888 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg + 4, idxVarReg);
4889 }
4890 else
4891 {
4892 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
4893
4894 /* ror reg64, 8 */
4895 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4896 pbCodeBuf[off++] = 0xc1;
4897 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4898 pbCodeBuf[off++] = 8;
4899
4900 /* mov reg8, reg8(r/m) */
4901 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
4902 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
4903 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
4904 pbCodeBuf[off++] = X86_OP_REX;
4905 pbCodeBuf[off++] = 0x8a;
4906 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
4907
4908 /* rol reg64, 8 */
4909 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4910 pbCodeBuf[off++] = 0xc1;
4911 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4912 pbCodeBuf[off++] = 8;
4913 }
4914
4915#elif defined(RT_ARCH_ARM64)
4916 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 7:0.
4917 or
4918 bfi w1, w2, 8, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 15:8. */
4919 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4920 if (iGRegEx < 16)
4921 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 8);
4922 else
4923 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 8, 8);
4924
4925#else
4926# error "Port me!"
4927#endif
4928 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4929
4930 iemNativeVarRegisterRelease(pReNative, idxValueVar);
4931
4932#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4933 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
4934#endif
4935 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4936 return off;
4937}
4938
4939
4940
4941#define IEM_MC_STORE_GREG_U16_CONST(a_iGReg, a_u16Const) \
4942 off = iemNativeEmitStoreGregU16Const(pReNative, off, a_iGReg, a_u16Const)
4943
4944/** Emits code for IEM_MC_STORE_GREG_U16. */
4945DECL_INLINE_THROW(uint32_t)
4946iemNativeEmitStoreGregU16Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint16_t uValue)
4947{
4948 Assert(iGReg < 16);
4949 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4950 kIemNativeGstRegUse_ForUpdate);
4951#ifdef RT_ARCH_AMD64
4952 /* mov reg16, imm16 */
4953 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
4954 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4955 if (idxGstTmpReg >= 8)
4956 pbCodeBuf[off++] = X86_OP_REX_B;
4957 pbCodeBuf[off++] = 0xb8 + (idxGstTmpReg & 7);
4958 pbCodeBuf[off++] = RT_BYTE1(uValue);
4959 pbCodeBuf[off++] = RT_BYTE2(uValue);
4960
4961#elif defined(RT_ARCH_ARM64)
4962 /* movk xdst, #uValue, lsl #0 */
4963 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4964 pu32CodeBuf[off++] = Armv8A64MkInstrMovK(idxGstTmpReg, uValue);
4965
4966#else
4967# error "Port me!"
4968#endif
4969
4970 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4971
4972#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4973 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4974#endif
4975 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4976 return off;
4977}
4978
4979
4980#define IEM_MC_STORE_GREG_U16(a_iGReg, a_u16Value) \
4981 off = iemNativeEmitStoreGregU16(pReNative, off, a_iGReg, a_u16Value)
4982
4983/** Emits code for IEM_MC_STORE_GREG_U16. */
4984DECL_INLINE_THROW(uint32_t)
4985iemNativeEmitStoreGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
4986{
4987 Assert(iGReg < 16);
4988 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
4989
4990 /*
4991 * If it's a constant value (unlikely) we treat this as a
4992 * IEM_MC_STORE_GREG_U16_CONST statement.
4993 */
4994 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
4995 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
4996 { /* likely */ }
4997 else
4998 {
4999 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
5000 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
5001 return iemNativeEmitStoreGregU16Const(pReNative, off, iGReg, (uint16_t)pValueVar->u.uValue);
5002 }
5003
5004 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5005 kIemNativeGstRegUse_ForUpdate);
5006
5007#ifdef RT_ARCH_AMD64
5008 /* mov reg16, reg16 or [mem16] */
5009 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
5010 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5011 if (pValueVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
5012 {
5013 if (idxGstTmpReg >= 8 || pValueVar->idxReg >= 8)
5014 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0)
5015 | (pValueVar->idxReg >= 8 ? X86_OP_REX_B : 0);
5016 pbCodeBuf[off++] = 0x8b;
5017 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, pValueVar->idxReg & 7);
5018 }
5019 else
5020 {
5021 uint8_t const idxStackSlot = pValueVar->idxStackSlot;
5022 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
5023 if (idxGstTmpReg >= 8)
5024 pbCodeBuf[off++] = X86_OP_REX_R;
5025 pbCodeBuf[off++] = 0x8b;
5026 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, idxGstTmpReg, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
5027 }
5028
5029#elif defined(RT_ARCH_ARM64)
5030 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
5031 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxValueVar, &off);
5032 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5033 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 16);
5034 iemNativeVarRegisterRelease(pReNative, idxValueVar);
5035
5036#else
5037# error "Port me!"
5038#endif
5039
5040 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5041
5042#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5043 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5044#endif
5045 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5046 return off;
5047}
5048
5049
5050#define IEM_MC_STORE_GREG_U32_CONST(a_iGReg, a_u32Const) \
5051 off = iemNativeEmitStoreGregU32Const(pReNative, off, a_iGReg, a_u32Const)
5052
5053/** Emits code for IEM_MC_STORE_GREG_U32_CONST. */
5054DECL_INLINE_THROW(uint32_t)
5055iemNativeEmitStoreGregU32Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint32_t uValue)
5056{
5057 Assert(iGReg < 16);
5058 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5059 kIemNativeGstRegUse_ForFullWrite);
5060 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
5061#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5062 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5063#endif
5064 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5065 return off;
5066}
5067
5068
5069#define IEM_MC_STORE_GREG_U32(a_iGReg, a_u32Value) \
5070 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_u32Value)
5071
5072#define IEM_MC_STORE_GREG_I32(a_iGReg, a_i32Value) \
5073 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_i32Value)
5074
5075/** Emits code for IEM_MC_STORE_GREG_U32/IEM_MC_STORE_GREG_I32. */
5076DECL_INLINE_THROW(uint32_t)
5077iemNativeEmitStoreGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
5078{
5079 Assert(iGReg < 16);
5080 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
5081
5082 /*
5083 * If it's a constant value (unlikely) we treat this as a
5084 * IEM_MC_STORE_GREG_U32_CONST statement.
5085 */
5086 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
5087 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
5088 { /* likely */ }
5089 else
5090 {
5091 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
5092 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
5093 return iemNativeEmitStoreGregU32Const(pReNative, off, iGReg, (uint32_t)pValueVar->u.uValue);
5094 }
5095
5096 /*
5097 * For the rest we allocate a guest register for the variable and writes
5098 * it to the CPUMCTX structure.
5099 */
5100 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
5101#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5102 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5103#else
5104 RT_NOREF(idxVarReg);
5105#endif
5106#ifdef VBOX_STRICT
5107 off = iemNativeEmitTop32BitsClearCheck(pReNative, off, idxVarReg);
5108#endif
5109 iemNativeVarRegisterRelease(pReNative, idxValueVar);
5110 return off;
5111}
5112
5113
5114#define IEM_MC_STORE_GREG_U64_CONST(a_iGReg, a_u64Const) \
5115 off = iemNativeEmitStoreGregU64Const(pReNative, off, a_iGReg, a_u64Const)
5116
5117/** Emits code for IEM_MC_STORE_GREG_U64_CONST. */
5118DECL_INLINE_THROW(uint32_t)
5119iemNativeEmitStoreGregU64Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uValue)
5120{
5121 Assert(iGReg < 16);
5122 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5123 kIemNativeGstRegUse_ForFullWrite);
5124 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
5125#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5126 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5127#endif
5128 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5129 return off;
5130}
5131
5132
5133#define IEM_MC_STORE_GREG_U64(a_iGReg, a_u64Value) \
5134 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_u64Value)
5135
5136#define IEM_MC_STORE_GREG_I64(a_iGReg, a_i64Value) \
5137 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_i64Value)
5138
5139/** Emits code for IEM_MC_STORE_GREG_U64. */
5140DECL_INLINE_THROW(uint32_t)
5141iemNativeEmitStoreGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
5142{
5143 Assert(iGReg < 16);
5144 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
5145
5146 /*
5147 * If it's a constant value (unlikely) we treat this as a
5148 * IEM_MC_STORE_GREG_U64_CONST statement.
5149 */
5150 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
5151 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
5152 { /* likely */ }
5153 else
5154 {
5155 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
5156 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
5157 return iemNativeEmitStoreGregU64Const(pReNative, off, iGReg, pValueVar->u.uValue);
5158 }
5159
5160 /*
5161 * For the rest we allocate a guest register for the variable and writes
5162 * it to the CPUMCTX structure.
5163 */
5164 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
5165#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5166 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5167#else
5168 RT_NOREF(idxVarReg);
5169#endif
5170 iemNativeVarRegisterRelease(pReNative, idxValueVar);
5171 return off;
5172}
5173
5174
5175#define IEM_MC_CLEAR_HIGH_GREG_U64(a_iGReg) \
5176 off = iemNativeEmitClearHighGregU64(pReNative, off, a_iGReg)
5177
5178/** Emits code for IEM_MC_CLEAR_HIGH_GREG_U64. */
5179DECL_INLINE_THROW(uint32_t)
5180iemNativeEmitClearHighGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg)
5181{
5182 Assert(iGReg < 16);
5183 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5184 kIemNativeGstRegUse_ForUpdate);
5185 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxGstTmpReg, idxGstTmpReg);
5186#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5187 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5188#endif
5189 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5190 return off;
5191}
5192
5193
5194#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5195#define IEM_MC_STORE_GREG_PAIR_U64(a_iGRegLo, a_iGRegHi, a_u128Value) \
5196 off = iemNativeEmitStoreGregPairU64(pReNative, off, a_iGRegLo, a_iGRegHi, a_u128Value)
5197
5198/** Emits code for IEM_MC_FETCH_GREG_PAIR_U64. */
5199DECL_INLINE_THROW(uint32_t)
5200iemNativeEmitStoreGregPairU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegLo, uint8_t iGRegHi, uint8_t idxDstVar)
5201{
5202 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
5203 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
5204 Assert(iGRegLo < 16 && iGRegHi < 16);
5205
5206 uint8_t const idxGstFullRegLo = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegLo),
5207 kIemNativeGstRegUse_ForFullWrite);
5208 uint8_t const idxGstFullRegHi = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegHi),
5209 kIemNativeGstRegUse_ForFullWrite);
5210
5211 iemNativeVarSetKindToStack(pReNative, idxDstVar);
5212 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
5213 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxGstFullRegLo, idxVarReg, 0);
5214 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxGstFullRegHi, idxVarReg, 1);
5215
5216 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
5217 iemNativeRegFreeTmp(pReNative, idxGstFullRegLo);
5218 iemNativeRegFreeTmp(pReNative, idxGstFullRegHi);
5219 return off;
5220}
5221#endif
5222
5223
5224/*********************************************************************************************************************************
5225* General purpose register manipulation (add, sub). *
5226*********************************************************************************************************************************/
5227
5228#define IEM_MC_ADD_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
5229 off = iemNativeEmitAddGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
5230
5231/** Emits code for IEM_MC_ADD_GREG_U16. */
5232DECL_INLINE_THROW(uint32_t)
5233iemNativeEmitAddGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend)
5234{
5235 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5236 kIemNativeGstRegUse_ForUpdate);
5237
5238#ifdef RT_ARCH_AMD64
5239 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
5240 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5241 if (idxGstTmpReg >= 8)
5242 pbCodeBuf[off++] = X86_OP_REX_B;
5243 if (uAddend == 1)
5244 {
5245 pbCodeBuf[off++] = 0xff; /* inc */
5246 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5247 }
5248 else
5249 {
5250 pbCodeBuf[off++] = 0x81;
5251 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5252 pbCodeBuf[off++] = uAddend;
5253 pbCodeBuf[off++] = 0;
5254 }
5255
5256#else
5257 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5258 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5259
5260 /* sub tmp, gstgrp, uAddend */
5261 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxTmpReg, idxGstTmpReg, uAddend, false /*f64Bit*/);
5262
5263 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
5264 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
5265
5266 iemNativeRegFreeTmp(pReNative, idxTmpReg);
5267#endif
5268
5269 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5270
5271#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5272 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5273#endif
5274
5275 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5276 return off;
5277}
5278
5279
5280#define IEM_MC_ADD_GREG_U32(a_iGReg, a_u8Const) \
5281 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
5282
5283#define IEM_MC_ADD_GREG_U64(a_iGReg, a_u8Const) \
5284 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
5285
5286/** Emits code for IEM_MC_ADD_GREG_U32 and IEM_MC_ADD_GREG_U64. */
5287DECL_INLINE_THROW(uint32_t)
5288iemNativeEmitAddGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend, bool f64Bit)
5289{
5290 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5291 kIemNativeGstRegUse_ForUpdate);
5292
5293#ifdef RT_ARCH_AMD64
5294 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
5295 if (f64Bit)
5296 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
5297 else if (idxGstTmpReg >= 8)
5298 pbCodeBuf[off++] = X86_OP_REX_B;
5299 if (uAddend == 1)
5300 {
5301 pbCodeBuf[off++] = 0xff; /* inc */
5302 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5303 }
5304 else if (uAddend < 128)
5305 {
5306 pbCodeBuf[off++] = 0x83; /* add */
5307 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5308 pbCodeBuf[off++] = RT_BYTE1(uAddend);
5309 }
5310 else
5311 {
5312 pbCodeBuf[off++] = 0x81; /* add */
5313 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5314 pbCodeBuf[off++] = RT_BYTE1(uAddend);
5315 pbCodeBuf[off++] = 0;
5316 pbCodeBuf[off++] = 0;
5317 pbCodeBuf[off++] = 0;
5318 }
5319
5320#else
5321 /* sub tmp, gstgrp, uAddend */
5322 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5323 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGstTmpReg, idxGstTmpReg, uAddend, f64Bit);
5324
5325#endif
5326
5327 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5328
5329#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5330 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5331#endif
5332
5333 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5334 return off;
5335}
5336
5337
5338
5339#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
5340 off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
5341
5342/** Emits code for IEM_MC_SUB_GREG_U16. */
5343DECL_INLINE_THROW(uint32_t)
5344iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
5345{
5346 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5347 kIemNativeGstRegUse_ForUpdate);
5348
5349#ifdef RT_ARCH_AMD64
5350 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
5351 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5352 if (idxGstTmpReg >= 8)
5353 pbCodeBuf[off++] = X86_OP_REX_B;
5354 if (uSubtrahend == 1)
5355 {
5356 pbCodeBuf[off++] = 0xff; /* dec */
5357 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
5358 }
5359 else
5360 {
5361 pbCodeBuf[off++] = 0x81;
5362 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
5363 pbCodeBuf[off++] = uSubtrahend;
5364 pbCodeBuf[off++] = 0;
5365 }
5366
5367#else
5368 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5369 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5370
5371 /* sub tmp, gstgrp, uSubtrahend */
5372 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
5373
5374 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
5375 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
5376
5377 iemNativeRegFreeTmp(pReNative, idxTmpReg);
5378#endif
5379
5380 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5381
5382#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5383 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5384#endif
5385
5386 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5387 return off;
5388}
5389
5390
5391#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
5392 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
5393
5394#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
5395 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
5396
5397/** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
5398DECL_INLINE_THROW(uint32_t)
5399iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
5400{
5401 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5402 kIemNativeGstRegUse_ForUpdate);
5403
5404#ifdef RT_ARCH_AMD64
5405 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
5406 if (f64Bit)
5407 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
5408 else if (idxGstTmpReg >= 8)
5409 pbCodeBuf[off++] = X86_OP_REX_B;
5410 if (uSubtrahend == 1)
5411 {
5412 pbCodeBuf[off++] = 0xff; /* dec */
5413 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
5414 }
5415 else if (uSubtrahend < 128)
5416 {
5417 pbCodeBuf[off++] = 0x83; /* sub */
5418 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
5419 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
5420 }
5421 else
5422 {
5423 pbCodeBuf[off++] = 0x81; /* sub */
5424 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
5425 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
5426 pbCodeBuf[off++] = 0;
5427 pbCodeBuf[off++] = 0;
5428 pbCodeBuf[off++] = 0;
5429 }
5430
5431#else
5432 /* sub tmp, gstgrp, uSubtrahend */
5433 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5434 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
5435
5436#endif
5437
5438 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5439
5440#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5441 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5442#endif
5443
5444 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5445 return off;
5446}
5447
5448
5449#define IEM_MC_AND_GREG_U8(a_iGReg, a_u8Mask) \
5450 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u8Mask, sizeof(uint8_t))
5451
5452#define IEM_MC_AND_GREG_U16(a_iGReg, a_u16Mask) \
5453 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u16Mask, sizeof(uint16_t))
5454
5455#define IEM_MC_AND_GREG_U32(a_iGReg, a_u32Mask) \
5456 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u32Mask, sizeof(uint32_t))
5457
5458#define IEM_MC_AND_GREG_U64(a_iGReg, a_u64Mask) \
5459 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u64Mask, sizeof(uint64_t))
5460
5461/** Emits code for IEM_MC_AND_GREG_U8, IEM_MC_AND_GREG_U16, IEM_MC_AND_GREG_U32 and IEM_MC_AND_GREG_U64. */
5462DECL_INLINE_THROW(uint32_t)
5463iemNativeEmitAndGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uMask, uint8_t cbMask)
5464{
5465#ifdef VBOX_STRICT
5466 switch (cbMask)
5467 {
5468 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
5469 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
5470 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
5471 case sizeof(uint64_t): break;
5472 default: AssertFailedBreak();
5473 }
5474#endif
5475
5476 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5477 kIemNativeGstRegUse_ForUpdate);
5478
5479 switch (cbMask)
5480 {
5481 case sizeof(uint8_t): /* Leaves the higher bits untouched. */
5482 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask | UINT64_C(0xffffffffffffff00));
5483 break;
5484 case sizeof(uint16_t): /* Leaves the higher bits untouched. */
5485 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask | UINT64_C(0xffffffffffff0000));
5486 break;
5487 case sizeof(uint32_t): /* Zeroes the high 32 bits of the guest register. */
5488 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxGstTmpReg, uMask);
5489 break;
5490 case sizeof(uint64_t):
5491 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask);
5492 break;
5493 default: AssertFailedBreak();
5494 }
5495
5496 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5497
5498#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5499 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5500#endif
5501
5502 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5503 return off;
5504}
5505
5506
5507#define IEM_MC_OR_GREG_U8(a_iGReg, a_u8Mask) \
5508 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u8Mask, sizeof(uint8_t))
5509
5510#define IEM_MC_OR_GREG_U16(a_iGReg, a_u16Mask) \
5511 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u16Mask, sizeof(uint16_t))
5512
5513#define IEM_MC_OR_GREG_U32(a_iGReg, a_u32Mask) \
5514 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u32Mask, sizeof(uint32_t))
5515
5516#define IEM_MC_OR_GREG_U64(a_iGReg, a_u64Mask) \
5517 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u64Mask, sizeof(uint64_t))
5518
5519/** Emits code for IEM_MC_OR_GREG_U8, IEM_MC_OR_GREG_U16, IEM_MC_OR_GREG_U32 and IEM_MC_OR_GREG_U64. */
5520DECL_INLINE_THROW(uint32_t)
5521iemNativeEmitOrGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uMask, uint8_t cbMask)
5522{
5523#ifdef VBOX_STRICT
5524 switch (cbMask)
5525 {
5526 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
5527 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
5528 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
5529 case sizeof(uint64_t): break;
5530 default: AssertFailedBreak();
5531 }
5532#endif
5533
5534 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5535 kIemNativeGstRegUse_ForUpdate);
5536
5537 switch (cbMask)
5538 {
5539 case sizeof(uint8_t): /* Leaves the higher bits untouched. */
5540 case sizeof(uint16_t):
5541 case sizeof(uint64_t):
5542 off = iemNativeEmitOrGprByImm(pReNative, off, idxGstTmpReg, uMask);
5543 break;
5544 case sizeof(uint32_t): /* Zeroes the high 32 bits of the guest register. */
5545 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxGstTmpReg, uMask);
5546 break;
5547 default: AssertFailedBreak();
5548 }
5549
5550 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5551
5552#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5553 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5554#endif
5555
5556 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5557 return off;
5558}
5559
5560
5561/*********************************************************************************************************************************
5562* Local/Argument variable manipulation (add, sub, and, or). *
5563*********************************************************************************************************************************/
5564
5565#define IEM_MC_AND_LOCAL_U8(a_u8Local, a_u8Mask) \
5566 off = iemNativeEmitAndLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
5567
5568#define IEM_MC_AND_LOCAL_U16(a_u16Local, a_u16Mask) \
5569 off = iemNativeEmitAndLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
5570
5571#define IEM_MC_AND_LOCAL_U32(a_u32Local, a_u32Mask) \
5572 off = iemNativeEmitAndLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
5573
5574#define IEM_MC_AND_LOCAL_U64(a_u64Local, a_u64Mask) \
5575 off = iemNativeEmitAndLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
5576
5577
5578#define IEM_MC_AND_ARG_U16(a_u16Arg, a_u16Mask) \
5579 off = iemNativeEmitAndLocal(pReNative, off, a_u16Arg, a_u16Mask, sizeof(uint16_t))
5580
5581#define IEM_MC_AND_ARG_U32(a_u32Arg, a_u32Mask) \
5582 off = iemNativeEmitAndLocal(pReNative, off, a_u32Arg, a_u32Mask, sizeof(uint32_t))
5583
5584#define IEM_MC_AND_ARG_U64(a_u64Arg, a_u64Mask) \
5585 off = iemNativeEmitAndLocal(pReNative, off, a_u64Arg, a_u64Mask, sizeof(uint64_t))
5586
5587/** Emits code for AND'ing a local and a constant value. */
5588DECL_INLINE_THROW(uint32_t)
5589iemNativeEmitAndLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
5590{
5591#ifdef VBOX_STRICT
5592 switch (cbMask)
5593 {
5594 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
5595 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
5596 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
5597 case sizeof(uint64_t): break;
5598 default: AssertFailedBreak();
5599 }
5600#endif
5601
5602 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxVar, &off);
5603 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
5604
5605 if (cbMask <= sizeof(uint32_t))
5606 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg, uMask);
5607 else
5608 off = iemNativeEmitAndGprByImm(pReNative, off, idxVarReg, uMask);
5609
5610 iemNativeVarRegisterRelease(pReNative, idxVar);
5611 return off;
5612}
5613
5614
5615#define IEM_MC_OR_LOCAL_U8(a_u8Local, a_u8Mask) \
5616 off = iemNativeEmitOrLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
5617
5618#define IEM_MC_OR_LOCAL_U16(a_u16Local, a_u16Mask) \
5619 off = iemNativeEmitOrLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
5620
5621#define IEM_MC_OR_LOCAL_U32(a_u32Local, a_u32Mask) \
5622 off = iemNativeEmitOrLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
5623
5624#define IEM_MC_OR_LOCAL_U64(a_u64Local, a_u64Mask) \
5625 off = iemNativeEmitOrLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
5626
5627/** Emits code for OR'ing a local and a constant value. */
5628DECL_INLINE_THROW(uint32_t)
5629iemNativeEmitOrLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
5630{
5631#ifdef VBOX_STRICT
5632 switch (cbMask)
5633 {
5634 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
5635 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
5636 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
5637 case sizeof(uint64_t): break;
5638 default: AssertFailedBreak();
5639 }
5640#endif
5641
5642 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxVar, &off);
5643 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
5644
5645 if (cbMask <= sizeof(uint32_t))
5646 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxVarReg, uMask);
5647 else
5648 off = iemNativeEmitOrGprByImm(pReNative, off, idxVarReg, uMask);
5649
5650 iemNativeVarRegisterRelease(pReNative, idxVar);
5651 return off;
5652}
5653
5654
5655#define IEM_MC_BSWAP_LOCAL_U16(a_u16Local) \
5656 off = iemNativeEmitBswapLocal(pReNative, off, a_u16Local, sizeof(uint16_t))
5657
5658#define IEM_MC_BSWAP_LOCAL_U32(a_u32Local) \
5659 off = iemNativeEmitBswapLocal(pReNative, off, a_u32Local, sizeof(uint32_t))
5660
5661#define IEM_MC_BSWAP_LOCAL_U64(a_u64Local) \
5662 off = iemNativeEmitBswapLocal(pReNative, off, a_u64Local, sizeof(uint64_t))
5663
5664/** Emits code for reversing the byte order in a local value. */
5665DECL_INLINE_THROW(uint32_t)
5666iemNativeEmitBswapLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal)
5667{
5668 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxVar, &off);
5669 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
5670
5671 switch (cbLocal)
5672 {
5673 case sizeof(uint16_t): off = iemNativeEmitBswapGpr16(pReNative, off, idxVarReg); break;
5674 case sizeof(uint32_t): off = iemNativeEmitBswapGpr32(pReNative, off, idxVarReg); break;
5675 case sizeof(uint64_t): off = iemNativeEmitBswapGpr(pReNative, off, idxVarReg); break;
5676 default: AssertFailedBreak();
5677 }
5678
5679 iemNativeVarRegisterRelease(pReNative, idxVar);
5680 return off;
5681}
5682
5683
5684#define IEM_MC_SHL_LOCAL_S16(a_i16Local, a_cShift) \
5685 off = iemNativeEmitShlLocal(pReNative, off, a_i16Local, sizeof(int16_t), a_cShift)
5686
5687#define IEM_MC_SHL_LOCAL_S32(a_i32Local, a_cShift) \
5688 off = iemNativeEmitShlLocal(pReNative, off, a_i32Local, sizeof(int32_t), a_cShift)
5689
5690#define IEM_MC_SHL_LOCAL_S64(a_i64Local, a_cShift) \
5691 off = iemNativeEmitShlLocal(pReNative, off, a_i64Local, sizeof(int64_t), a_cShift)
5692
5693/** Emits code for shifting left a local value. */
5694DECL_INLINE_THROW(uint32_t)
5695iemNativeEmitShlLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal, uint8_t cShift)
5696{
5697#ifdef VBOX_STRICT
5698 switch (cbLocal)
5699 {
5700 case sizeof(uint8_t): Assert(cShift < 8); break;
5701 case sizeof(uint16_t): Assert(cShift < 16); break;
5702 case sizeof(uint32_t): Assert(cShift < 32); break;
5703 case sizeof(uint64_t): Assert(cShift < 64); break;
5704 default: AssertFailedBreak();
5705 }
5706#endif
5707
5708 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxVar, &off);
5709 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
5710
5711 if (cbLocal <= sizeof(uint32_t))
5712 {
5713 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxVarReg, cShift);
5714 if (cbLocal < sizeof(uint32_t))
5715 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg,
5716 cbLocal == sizeof(uint16_t)
5717 ? UINT32_C(0xffff)
5718 : UINT32_C(0xff));
5719 }
5720 else
5721 off = iemNativeEmitShiftGprLeft(pReNative, off, idxVarReg, cShift);
5722
5723 iemNativeVarRegisterRelease(pReNative, idxVar);
5724 return off;
5725}
5726
5727
5728#define IEM_MC_SAR_LOCAL_S16(a_i16Local, a_cShift) \
5729 off = iemNativeEmitSarLocal(pReNative, off, a_i16Local, sizeof(int16_t), a_cShift)
5730
5731#define IEM_MC_SAR_LOCAL_S32(a_i32Local, a_cShift) \
5732 off = iemNativeEmitSarLocal(pReNative, off, a_i32Local, sizeof(int32_t), a_cShift)
5733
5734#define IEM_MC_SAR_LOCAL_S64(a_i64Local, a_cShift) \
5735 off = iemNativeEmitSarLocal(pReNative, off, a_i64Local, sizeof(int64_t), a_cShift)
5736
5737/** Emits code for shifting left a local value. */
5738DECL_INLINE_THROW(uint32_t)
5739iemNativeEmitSarLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal, uint8_t cShift)
5740{
5741#ifdef VBOX_STRICT
5742 switch (cbLocal)
5743 {
5744 case sizeof(int8_t): Assert(cShift < 8); break;
5745 case sizeof(int16_t): Assert(cShift < 16); break;
5746 case sizeof(int32_t): Assert(cShift < 32); break;
5747 case sizeof(int64_t): Assert(cShift < 64); break;
5748 default: AssertFailedBreak();
5749 }
5750#endif
5751
5752 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxVar, &off);
5753 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
5754
5755 /* Need to sign extend the value first to make sure the sign is correct in the following arithmetic shift. */
5756 if (cbLocal == sizeof(uint8_t))
5757 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
5758 else if (cbLocal == sizeof(uint16_t))
5759 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxVarReg);
5760
5761 if (cbLocal <= sizeof(uint32_t))
5762 off = iemNativeEmitArithShiftGpr32Right(pReNative, off, idxVarReg, cShift);
5763 else
5764 off = iemNativeEmitArithShiftGprRight(pReNative, off, idxVarReg, cShift);
5765
5766 iemNativeVarRegisterRelease(pReNative, idxVar);
5767 return off;
5768}
5769
5770
5771#define IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR(a_EffAddr, a_i16) \
5772 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i16, sizeof(int16_t))
5773
5774#define IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR(a_EffAddr, a_i32) \
5775 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i32, sizeof(int32_t))
5776
5777#define IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR(a_EffAddr, a_i64) \
5778 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i64, sizeof(int64_t))
5779
5780/** Emits code for IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR/IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR/IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR. */
5781DECL_INLINE_THROW(uint32_t)
5782iemNativeEmitAddLocalToEffAddr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEffAddr, uint8_t idxVar, uint8_t cbLocal)
5783{
5784 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEffAddr);
5785 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEffAddr, sizeof(RTGCPTR));
5786 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
5787 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
5788
5789 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxVar, &off);
5790 uint8_t const idxVarRegEffAddr = iemNativeVarRegisterAcquireInited(pReNative, idxVarEffAddr, &off);
5791
5792 /* Need to sign extend the value. */
5793 if (cbLocal <= sizeof(uint32_t))
5794 {
5795/** @todo ARM64: In case of boredone, the extended add instruction can do the
5796 * conversion directly: ADD idxVarRegEffAddr, idxVarRegEffAddr, [w]idxVarReg, SXTH/SXTW */
5797 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
5798
5799 switch (cbLocal)
5800 {
5801 case sizeof(int16_t): off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxRegTmp, idxVarReg); break;
5802 case sizeof(int32_t): off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxRegTmp, idxVarReg); break;
5803 default: AssertFailed();
5804 }
5805
5806 off = iemNativeEmitAddTwoGprs(pReNative, off, idxVarRegEffAddr, idxRegTmp);
5807 iemNativeRegFreeTmp(pReNative, idxRegTmp);
5808 }
5809 else
5810 off = iemNativeEmitAddTwoGprs(pReNative, off, idxVarRegEffAddr, idxVarReg);
5811
5812 iemNativeVarRegisterRelease(pReNative, idxVarEffAddr);
5813 iemNativeVarRegisterRelease(pReNative, idxVar);
5814 return off;
5815}
5816
5817
5818
5819/*********************************************************************************************************************************
5820* EFLAGS *
5821*********************************************************************************************************************************/
5822
5823#if !defined(VBOX_WITH_STATISTICS) || !defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
5824# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) ((void)0)
5825#else
5826# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) \
5827 iemNativeEFlagsOptimizationStats(pReNative, a_fEflInput, a_fEflOutput)
5828
5829DECLINLINE(void) iemNativeEFlagsOptimizationStats(PIEMRECOMPILERSTATE pReNative, uint32_t fEflInput, uint32_t fEflOutput)
5830{
5831 if (fEflOutput)
5832 {
5833 PVMCPUCC const pVCpu = pReNative->pVCpu;
5834# ifndef IEMLIVENESS_EXTENDED_LAYOUT
5835 IEMLIVENESSBIT const LivenessBit0 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit0;
5836 IEMLIVENESSBIT const LivenessBit1 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit1;
5837 AssertCompile(IEMLIVENESS_STATE_CLOBBERED == 0);
5838# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
5839 if (fEflOutput & (a_fEfl)) \
5840 { \
5841 if (LivenessBit0.a_fLivenessMember | LivenessBit1.a_fLivenessMember) \
5842 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
5843 else \
5844 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
5845 } else do { } while (0)
5846# else
5847 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall];
5848 IEMLIVENESSBIT const LivenessClobbered = { IEMLIVENESS_STATE_GET_WILL_BE_CLOBBERED_SET(pLivenessEntry) };
5849 IEMLIVENESSBIT const LivenessDelayable = { IEMLIVENESS_STATE_GET_CAN_BE_POSTPONED_SET(pLivenessEntry) };
5850# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
5851 if (fEflOutput & (a_fEfl)) \
5852 { \
5853 if (LivenessClobbered.a_fLivenessMember) \
5854 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
5855 else if (LivenessDelayable.a_fLivenessMember) \
5856 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Delayable); \
5857 else \
5858 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
5859 } else do { } while (0)
5860# endif
5861 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_CF, fEflCf, StatNativeLivenessEflCf);
5862 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_PF, fEflPf, StatNativeLivenessEflPf);
5863 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_AF, fEflAf, StatNativeLivenessEflAf);
5864 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_ZF, fEflZf, StatNativeLivenessEflZf);
5865 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_SF, fEflSf, StatNativeLivenessEflSf);
5866 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_OF, fEflOf, StatNativeLivenessEflOf);
5867 //CHECK_FLAG_AND_UPDATE_STATS(~X86_EFL_STATUS_BITS, fEflOther, StatNativeLivenessEflOther);
5868# undef CHECK_FLAG_AND_UPDATE_STATS
5869 }
5870 RT_NOREF(fEflInput);
5871}
5872#endif /* VBOX_WITH_STATISTICS */
5873
5874#undef IEM_MC_FETCH_EFLAGS /* should not be used */
5875#define IEM_MC_FETCH_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5876 off = iemNativeEmitFetchEFlags<a_fEflInput, iemNativeEflagsToLivenessMask<a_fEflInput>(),\
5877 a_fEflOutput, iemNativeEflagsToLivenessMask<a_fEflOutput>()>(pReNative, off, a_EFlags)
5878
5879/** Handles IEM_MC_FETCH_EFLAGS_EX. */
5880template<uint32_t const a_fEflInput, uint64_t const a_fLivenessEflInput,
5881 uint32_t const a_fEflOutput, uint64_t const a_fLivenessEflOutput>
5882DECL_INLINE_THROW(uint32_t)
5883iemNativeEmitFetchEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags)
5884{
5885 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
5886 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
5887 /** @todo fix NOT AssertCompile(a_fEflInput != 0 || a_fEflOutput != 0); */
5888
5889#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
5890# ifdef VBOX_STRICT
5891 if ( pReNative->idxCurCall != 0
5892 && (a_fEflInput != 0 || a_fEflOutput != 0) /* for NOT these are both zero for now. */)
5893 {
5894 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall - 1];
5895 RT_CONSTEXPR uint32_t const fBoth = a_fEflInput | a_fEflOutput;
5896# define ASSERT_ONE_EFL(a_fElfConst, a_idxField) \
5897 AssertMsg( !(fBoth & (a_fElfConst)) \
5898 || (!(a_fEflInput & (a_fElfConst)) \
5899 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
5900 : !(a_fEflOutput & (a_fElfConst)) \
5901 ? IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
5902 : IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) ), \
5903 ("%s - %u\n", #a_fElfConst, iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)))
5904 ASSERT_ONE_EFL(~(uint32_t)X86_EFL_STATUS_BITS, IEMLIVENESSBIT_IDX_EFL_OTHER);
5905 ASSERT_ONE_EFL(X86_EFL_CF, IEMLIVENESSBIT_IDX_EFL_CF);
5906 ASSERT_ONE_EFL(X86_EFL_PF, IEMLIVENESSBIT_IDX_EFL_PF);
5907 ASSERT_ONE_EFL(X86_EFL_AF, IEMLIVENESSBIT_IDX_EFL_AF);
5908 ASSERT_ONE_EFL(X86_EFL_ZF, IEMLIVENESSBIT_IDX_EFL_ZF);
5909 ASSERT_ONE_EFL(X86_EFL_SF, IEMLIVENESSBIT_IDX_EFL_SF);
5910 ASSERT_ONE_EFL(X86_EFL_OF, IEMLIVENESSBIT_IDX_EFL_OF);
5911# undef ASSERT_ONE_EFL
5912 }
5913# endif
5914#endif
5915
5916 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, a_fEflInput);
5917 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, a_fEflInput);
5918
5919 /** @todo This could be prettier...*/
5920 /** @todo Also, the shadowing+liveness handling of EFlags is currently
5921 * problematic, but I'll try tackle that soon (@bugref{10720}). */
5922 PCIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarEFlags)];
5923 Assert(pVar->enmKind == kIemNativeVarKind_Invalid || pVar->enmKind == kIemNativeVarKind_Stack);
5924 Assert(pVar->idxReg == UINT8_MAX);
5925 if (pVar->uArgNo >= IEMNATIVE_CALL_ARG_GREG_COUNT)
5926 {
5927 /** @todo We could use kIemNativeGstRegUse_ReadOnly here when fOutput is
5928 * zero, but since iemNativeVarRegisterSet clears the shadowing,
5929 * that's counter productive... */
5930 uint8_t const idxGstReg = iemNativeRegAllocTmpForGuestEFlagsForUpdate(pReNative, &off,
5931 a_fLivenessEflInput, a_fLivenessEflOutput);
5932 iemNativeVarRegisterSet(pReNative, idxVarEFlags, idxGstReg, off, true /*fAllocated*/);
5933 }
5934 else
5935 {
5936 /* Register argument variable: Avoid assertions in generic call code and load it the traditional way. */
5937 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off);
5938 uint8_t const idxGstReg = iemNativeRegAllocTmpForGuestEFlagsIfAlreadyPresent(pReNative, &off,
5939 a_fLivenessEflInput, a_fLivenessEflOutput);
5940 if (idxGstReg != UINT8_MAX)
5941 {
5942 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstReg);
5943 iemNativeRegFreeTmp(pReNative, idxGstReg);
5944 }
5945 else
5946 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxVarReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
5947 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
5948 }
5949 return off;
5950}
5951
5952
5953
5954/** @todo emit strict build assertions for IEM_MC_COMMIT_EFLAGS_EX when we
5955 * start using it with custom native code emission (inlining assembly
5956 * instruction helpers). */
5957#undef IEM_MC_COMMIT_EFLAGS /* should not be used */
5958#define IEM_MC_COMMIT_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5959 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
5960 off = iemNativeEmitCommitEFlags<true /*a_fUpdateSkippingAndPostponing*/, a_fEflOutput, \
5961 iemNativeEflagsToLivenessMask<a_fEflInput>(), \
5962 iemNativeEflagsToLivenessMask<a_fEflOutput>()>(pReNative, off, a_EFlags, a_fEflInput)
5963
5964#undef IEM_MC_COMMIT_EFLAGS_OPT /* should not be used */
5965#define IEM_MC_COMMIT_EFLAGS_OPT_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5966 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
5967 off = iemNativeEmitCommitEFlags<false /*a_fUpdateSkippingAndPostponing*/, a_fEflOutput, \
5968 iemNativeEflagsToLivenessMask<a_fEflInput>(), \
5969 iemNativeEflagsToLivenessMask<a_fEflOutput>()>(pReNative, off, a_EFlags, a_fEflInput)
5970
5971/** Handles IEM_MC_COMMIT_EFLAGS_EX. */
5972template<bool const a_fUpdateSkippingAndPostponing, uint32_t const a_fEflOutput,
5973 uint64_t const a_fLivenessEflInputBits, uint64_t const a_fLivenessEflOutputBits>
5974DECL_INLINE_THROW(uint32_t)
5975iemNativeEmitCommitEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags, uint32_t fElfInput)
5976{
5977 uint8_t const idxReg = iemNativeVarRegisterAcquireInited(pReNative, idxVarEFlags, &off);
5978 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
5979
5980#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
5981# ifdef VBOX_STRICT
5982 if ( pReNative->idxCurCall != 0
5983 && (a_fLivenessEflInputBits != 0 || a_fLivenessEflOutputBits != 0) /* for NOT these are both zero for now. */)
5984 {
5985 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall - 1];
5986# define ASSERT_ONE_EFL(a_idxField) \
5987 if RT_CONSTEXPR_IF(((a_fLivenessEflInputBits | a_fLivenessEflOutputBits) & RT_BIT_64(a_idxField)) != 0) \
5988 AssertMsg(!(a_fLivenessEflInputBits & RT_BIT_64(a_idxField)) \
5989 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
5990 : !(a_fLivenessEflOutputBits & RT_BIT_64(a_idxField)) \
5991 ? IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
5992 : IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)), \
5993 ("%s - %u\n", #a_idxField, iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)))
5994 ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_OTHER);
5995 ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_CF);
5996 ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_PF);
5997 ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_AF);
5998 ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_ZF);
5999 ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_SF);
6000 ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_OF);
6001# undef ASSERT_ONE_EFL
6002 }
6003# endif
6004#endif
6005
6006#ifdef VBOX_STRICT
6007 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RA1_MASK);
6008 uint32_t offFixup = off;
6009 off = iemNativeEmitJnzToFixed(pReNative, off, off);
6010 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2001));
6011 iemNativeFixupFixedJump(pReNative, offFixup, off);
6012
6013 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RAZ_MASK & CPUMX86EFLAGS_HW_MASK_32);
6014 offFixup = off;
6015 off = iemNativeEmitJzToFixed(pReNative, off, off);
6016 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2002));
6017 iemNativeFixupFixedJump(pReNative, offFixup, off);
6018
6019 /** @todo validate that only bits in the a_fEflOutput mask changed. */
6020#endif
6021
6022#ifdef IEMNATIVE_WITH_EFLAGS_SKIPPING
6023 if RT_CONSTEXPR_IF(a_fUpdateSkippingAndPostponing)
6024 {
6025 Assert(!(pReNative->fSkippingEFlags & fElfInput)); RT_NOREF(fElfInput);
6026 if (pReNative->fSkippingEFlags)
6027 Log5(("EFLAGS: fSkippingEFlags %#x -> %#x (iemNativeEmitCommitEFlags)\n",
6028 pReNative->fSkippingEFlags, pReNative->fSkippingEFlags & ~(a_fEflOutput & X86_EFL_STATUS_BITS) ));
6029 if RT_CONSTEXPR_IF((a_fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
6030 pReNative->fSkippingEFlags = 0;
6031 else
6032 pReNative->fSkippingEFlags &= ~(a_fEflOutput & X86_EFL_STATUS_BITS);
6033# ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
6034 if RT_CONSTEXPR_IF((a_fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
6035 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
6036 else
6037 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~(a_fEflOutput & X86_EFL_STATUS_BITS),
6038 RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
6039# endif
6040 IEMNATIVE_CLEAR_POSTPONED_EFLAGS(pReNative, a_fEflOutput);
6041 }
6042#endif
6043
6044 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
6045 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF_DYN(VMCPUCC, cpum.GstCtx.eflags));
6046 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
6047 return off;
6048}
6049
6050
6051typedef enum IEMNATIVEMITEFLOP
6052{
6053 kIemNativeEmitEflOp_Set,
6054 kIemNativeEmitEflOp_Clear,
6055 kIemNativeEmitEflOp_Flip
6056} IEMNATIVEMITEFLOP;
6057
6058#define IEM_MC_SET_EFL_BIT(a_fBit) \
6059 off = iemNativeEmitModifyEFlagsBit<kIemNativeEmitEflOp_Set, a_fBit, iemNativeEflagsToLivenessMask<a_fBit>()>(pReNative, off)
6060
6061#define IEM_MC_CLEAR_EFL_BIT(a_fBit) \
6062 off = iemNativeEmitModifyEFlagsBit<kIemNativeEmitEflOp_Clear, a_fBit, iemNativeEflagsToLivenessMask<a_fBit>()>(pReNative, off)
6063
6064#define IEM_MC_FLIP_EFL_BIT(a_fBit) \
6065 off = iemNativeEmitModifyEFlagsBit<kIemNativeEmitEflOp_Flip, a_fBit, iemNativeEflagsToLivenessMask<a_fBit>()>(pReNative, off)
6066
6067/** Handles IEM_MC_SET_EFL_BIT/IEM_MC_CLEAR_EFL_BIT/IEM_MC_FLIP_EFL_BIT. */
6068template<IEMNATIVEMITEFLOP const a_enmOp, uint32_t const a_fEflBit, uint64_t const a_fLivenessEflBit>
6069DECL_INLINE_THROW(uint32_t) iemNativeEmitModifyEFlagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6070{
6071 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsForUpdate(pReNative, &off,
6072 a_enmOp == kIemNativeEmitEflOp_Flip
6073 ? a_fLivenessEflBit : 0,
6074 a_fLivenessEflBit);
6075
6076 /* Using 'if constexpr' forces code elimination in debug builds with VC. */
6077 if RT_CONSTEXPR_IF(a_enmOp == kIemNativeEmitEflOp_Set)
6078 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxEflReg, a_fEflBit);
6079 else if RT_CONSTEXPR_IF(a_enmOp == kIemNativeEmitEflOp_Clear)
6080 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~a_fEflBit);
6081 else if RT_CONSTEXPR_IF(a_enmOp == kIemNativeEmitEflOp_Flip)
6082 off = iemNativeEmitXorGpr32ByImm(pReNative, off, idxEflReg, a_fEflBit);
6083 else
6084 AssertCompile( a_enmOp == kIemNativeEmitEflOp_Set /* AssertCompile(false) works with VC 2019 but not clang 15. */
6085 || a_enmOp == kIemNativeEmitEflOp_Clear
6086 || a_enmOp == kIemNativeEmitEflOp_Flip);
6087
6088 /** @todo No delayed writeback for EFLAGS right now. */
6089 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
6090
6091 /* Free but don't flush the EFLAGS register. */
6092 iemNativeRegFreeTmp(pReNative, idxEflReg);
6093
6094#ifdef IEMNATIVE_WITH_EFLAGS_SKIPPING
6095 /* Clear the bit in the skipped mask if we're clobbering and it's a status bit. */
6096 if RT_CONSTEXPR_IF( (a_enmOp == kIemNativeEmitEflOp_Set || a_enmOp == kIemNativeEmitEflOp_Clear)
6097 && (a_fEflBit & X86_EFL_STATUS_BITS))
6098 {
6099 if (pReNative->fSkippingEFlags)
6100 Log5(("EFLAGS: fSkippingEFlags %#x -> %#x (iemNativeEmitModifyEFlagsBit)\n",
6101 pReNative->fSkippingEFlags, pReNative->fSkippingEFlags & ~a_fEflBit ));
6102 pReNative->fSkippingEFlags &= ~a_fEflBit;
6103# ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
6104 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~a_fEflBit, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
6105# endif
6106 }
6107#endif
6108
6109 return off;
6110}
6111
6112
6113/*********************************************************************************************************************************
6114* Emitters for segment register fetches (IEM_MC_FETCH_SREG_XXX).
6115*********************************************************************************************************************************/
6116
6117#define IEM_MC_FETCH_SREG_U16(a_u16Dst, a_iSReg) \
6118 off = iemNativeEmitFetchSReg(pReNative, off, a_u16Dst, a_iSReg, sizeof(uint16_t))
6119
6120#define IEM_MC_FETCH_SREG_ZX_U32(a_u32Dst, a_iSReg) \
6121 off = iemNativeEmitFetchSReg(pReNative, off, a_u32Dst, a_iSReg, sizeof(uint32_t))
6122
6123#define IEM_MC_FETCH_SREG_ZX_U64(a_u64Dst, a_iSReg) \
6124 off = iemNativeEmitFetchSReg(pReNative, off, a_u64Dst, a_iSReg, sizeof(uint64_t))
6125
6126
6127/** Emits code for IEM_MC_FETCH_SREG_U16, IEM_MC_FETCH_SREG_ZX_U32 and
6128 * IEM_MC_FETCH_SREG_ZX_U64. */
6129DECL_INLINE_THROW(uint32_t)
6130iemNativeEmitFetchSReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iSReg, int8_t cbVar)
6131{
6132 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
6133 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbVar); RT_NOREF(cbVar);
6134 Assert(iSReg < X86_SREG_COUNT);
6135
6136 /*
6137 * For now, we will not create a shadow copy of a selector. The rational
6138 * is that since we do not recompile the popping and loading of segment
6139 * registers and that the the IEM_MC_FETCH_SREG_U* MCs are only used for
6140 * pushing and moving to registers, there is only a small chance that the
6141 * shadow copy will be accessed again before the register is reloaded. One
6142 * scenario would be nested called in 16-bit code, but I doubt it's worth
6143 * the extra register pressure atm.
6144 *
6145 * What we really need first, though, is to combine iemNativeRegAllocTmpForGuestReg
6146 * and iemNativeVarRegisterAcquire for a load scenario. We only got the
6147 * store scencario covered at present (r160730).
6148 */
6149 iemNativeVarSetKindToStack(pReNative, idxDstVar);
6150 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
6151 off = iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aSRegs[iSReg].Sel));
6152 iemNativeVarRegisterRelease(pReNative, idxDstVar);
6153 return off;
6154}
6155
6156
6157
6158/*********************************************************************************************************************************
6159* Register references. *
6160*********************************************************************************************************************************/
6161
6162#define IEM_MC_REF_GREG_U8_THREADED(a_pu8Dst, a_iGRegEx) \
6163 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, false /*fConst*/)
6164
6165#define IEM_MC_REF_GREG_U8_CONST_THREADED(a_pu8Dst, a_iGRegEx) \
6166 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, true /*fConst*/)
6167
6168/** Handles IEM_MC_REF_GREG_U8[_CONST]. */
6169DECL_INLINE_THROW(uint32_t)
6170iemNativeEmitRefGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGRegEx, bool fConst)
6171{
6172 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
6173 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
6174 Assert(iGRegEx < 20);
6175
6176 if (iGRegEx < 16)
6177 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
6178 else
6179 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_GprHighByte, iGRegEx & 15);
6180
6181 /* If we've delayed writing back the register value, flush it now. */
6182 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
6183
6184 /* If it's not a const reference we need to flush the shadow copy of the register now. */
6185 if (!fConst)
6186 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGRegEx & 15)));
6187
6188 return off;
6189}
6190
6191#define IEM_MC_REF_GREG_U16(a_pu16Dst, a_iGReg) \
6192 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, false /*fConst*/)
6193
6194#define IEM_MC_REF_GREG_U16_CONST(a_pu16Dst, a_iGReg) \
6195 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, true /*fConst*/)
6196
6197#define IEM_MC_REF_GREG_U32(a_pu32Dst, a_iGReg) \
6198 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, false /*fConst*/)
6199
6200#define IEM_MC_REF_GREG_U32_CONST(a_pu32Dst, a_iGReg) \
6201 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, true /*fConst*/)
6202
6203#define IEM_MC_REF_GREG_I32(a_pi32Dst, a_iGReg) \
6204 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, false /*fConst*/)
6205
6206#define IEM_MC_REF_GREG_I32_CONST(a_pi32Dst, a_iGReg) \
6207 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, true /*fConst*/)
6208
6209#define IEM_MC_REF_GREG_U64(a_pu64Dst, a_iGReg) \
6210 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, false /*fConst*/)
6211
6212#define IEM_MC_REF_GREG_U64_CONST(a_pu64Dst, a_iGReg) \
6213 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, true /*fConst*/)
6214
6215#define IEM_MC_REF_GREG_I64(a_pi64Dst, a_iGReg) \
6216 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, false /*fConst*/)
6217
6218#define IEM_MC_REF_GREG_I64_CONST(a_pi64Dst, a_iGReg) \
6219 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, true /*fConst*/)
6220
6221/** Handles IEM_MC_REF_GREG_Uxx[_CONST] and IEM_MC_REF_GREG_Ixx[_CONST]. */
6222DECL_INLINE_THROW(uint32_t)
6223iemNativeEmitRefGregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGReg, bool fConst)
6224{
6225 Assert(iGReg < 16);
6226 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGReg);
6227 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
6228
6229 /* If we've delayed writing back the register value, flush it now. */
6230 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGReg);
6231
6232 /* If it's not a const reference we need to flush the shadow copy of the register now. */
6233 if (!fConst)
6234 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGReg)));
6235
6236 return off;
6237}
6238
6239
6240#undef IEM_MC_REF_EFLAGS /* should not be used. */
6241#define IEM_MC_REF_EFLAGS_EX(a_pEFlags, a_fEflInput, a_fEflOutput) \
6242 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
6243 off = iemNativeEmitRefEFlags<a_fEflOutput>(pReNative, off, a_pEFlags, a_fEflInput)
6244
6245/** Handles IEM_MC_REF_EFLAGS. */
6246template<uint32_t const a_fEflOutput>
6247DECL_INLINE_THROW(uint32_t)
6248iemNativeEmitRefEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint32_t fEflInput)
6249{
6250 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_EFlags, 0);
6251 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
6252
6253#ifdef IEMNATIVE_WITH_EFLAGS_SKIPPING
6254 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, fEflInput);
6255 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fEflInput);
6256 if (pReNative->fSkippingEFlags)
6257 Log5(("EFLAGS: fSkippingEFlags %#x -> %#x (iemNativeEmitRefEFlags)\n",
6258 pReNative->fSkippingEFlags, pReNative->fSkippingEFlags & ~a_fEflOutput ));
6259 pReNative->fSkippingEFlags &= ~a_fEflOutput;
6260# ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
6261
6262 /* Updating the skipping according to the outputs is a little early, but
6263 we don't have any other hooks for references atm. */
6264 if RT_CONSTEXPR_IF((a_fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
6265 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
6266 else if RT_CONSTEXPR_IF((a_fEflOutput & X86_EFL_STATUS_BITS) != 0)
6267 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~(a_fEflOutput & X86_EFL_STATUS_BITS),
6268 RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
6269# endif
6270
6271 /* This ASSUMES that EFLAGS references are not taken before use. */
6272 IEMNATIVE_CLEAR_POSTPONED_EFLAGS(pReNative, a_fEflOutput);
6273
6274#endif
6275 RT_NOREF(fEflInput);
6276
6277 /* If we've delayed writing back the register value, flush it now. */
6278 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_EFlags, 0);
6279
6280 /* If there is a shadow copy of guest EFLAGS, flush it now. */
6281 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_EFlags));
6282
6283 return off;
6284}
6285
6286
6287/** @todo Emit code for IEM_MC_ASSERT_EFLAGS in strict builds? Once we emit
6288 * different code from threaded recompiler, maybe it would be helpful. For now
6289 * we assume the threaded recompiler catches any incorrect EFLAGS delcarations. */
6290#define IEM_MC_ASSERT_EFLAGS(a_fEflInput, a_fEflOutput) ((void)0)
6291
6292
6293#define IEM_MC_REF_XREG_U128(a_pu128Dst, a_iXReg) \
6294 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, false /*fConst*/)
6295
6296#define IEM_MC_REF_XREG_XMM(a_puXmmDst, a_iXReg) \
6297 off = iemNativeEmitRefXregXxx(pReNative, off, a_puXmmDst, a_iXReg, false /*fConst*/)
6298
6299#define IEM_MC_REF_XREG_U128_CONST(a_pu128Dst, a_iXReg) \
6300 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, true /*fConst*/)
6301
6302#define IEM_MC_REF_XREG_XMM_CONST(a_pXmmDst, a_iXReg) \
6303 off = iemNativeEmitRefXregXxx(pReNative, off, a_pXmmDst, a_iXReg, true /*fConst*/)
6304
6305#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6306/* Just being paranoid here. */
6307# ifndef _MSC_VER /* MSC can't compile this, doesn't like [0]. Added reduced version afterwards. */
6308AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].au64[0]);
6309AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].au32[0]);
6310AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].ar64[0]);
6311AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].ar32[0]);
6312# endif
6313AssertCompileMemberOffset(X86XMMREG, au64, 0);
6314AssertCompileMemberOffset(X86XMMREG, au32, 0);
6315AssertCompileMemberOffset(X86XMMREG, ar64, 0);
6316AssertCompileMemberOffset(X86XMMREG, ar32, 0);
6317
6318# define IEM_MC_REF_XREG_U32_CONST(a_pu32Dst, a_iXReg) \
6319 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu32Dst, a_iXReg, true /*fConst*/)
6320# define IEM_MC_REF_XREG_U64_CONST(a_pu64Dst, a_iXReg) \
6321 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu64Dst, a_iXReg, true /*fConst*/)
6322# define IEM_MC_REF_XREG_R32_CONST(a_pr32Dst, a_iXReg) \
6323 off = iemNativeEmitRefXregXxx(pReNative, off, a_pr32Dst, a_iXReg, true /*fConst*/)
6324# define IEM_MC_REF_XREG_R64_CONST(a_pr64Dst, a_iXReg) \
6325 off = iemNativeEmitRefXregXxx(pReNative, off, a_pr64Dst, a_iXReg, true /*fConst*/)
6326#endif
6327
6328/** Handles IEM_MC_REF_XREG_xxx[_CONST]. */
6329DECL_INLINE_THROW(uint32_t)
6330iemNativeEmitRefXregXxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iXReg, bool fConst)
6331{
6332 Assert(iXReg < 16);
6333 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_XReg, iXReg);
6334 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
6335
6336 /* If we've delayed writing back the register value, flush it now. */
6337 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_XReg, iXReg);
6338
6339#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6340 /* If it's not a const reference we need to flush the shadow copy of the register now. */
6341 if (!fConst)
6342 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(iXReg)));
6343#else
6344 RT_NOREF(fConst);
6345#endif
6346
6347 return off;
6348}
6349
6350
6351
6352/*********************************************************************************************************************************
6353* Effective Address Calculation *
6354*********************************************************************************************************************************/
6355#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_16(a_GCPtrEff, a_bRm, a_u16Disp) \
6356 off = iemNativeEmitCalcRmEffAddrThreadedAddr16(pReNative, off, a_bRm, a_u16Disp, a_GCPtrEff)
6357
6358/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_16.
6359 * @sa iemOpHlpCalcRmEffAddrThreadedAddr16 */
6360DECL_INLINE_THROW(uint32_t)
6361iemNativeEmitCalcRmEffAddrThreadedAddr16(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6362 uint8_t bRm, uint16_t u16Disp, uint8_t idxVarRet)
6363{
6364 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
6365
6366 /*
6367 * Handle the disp16 form with no registers first.
6368 *
6369 * Convert to an immediate value, as that'll delay the register allocation
6370 * and assignment till the memory access / call / whatever and we can use
6371 * a more appropriate register (or none at all).
6372 */
6373 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
6374 {
6375 iemNativeVarSetKindToConst(pReNative, idxVarRet, u16Disp);
6376 return off;
6377 }
6378
6379 /* Determin the displacment. */
6380 uint16_t u16EffAddr;
6381 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
6382 {
6383 case 0: u16EffAddr = 0; break;
6384 case 1: u16EffAddr = (int16_t)(int8_t)u16Disp; break;
6385 case 2: u16EffAddr = u16Disp; break;
6386 default: AssertFailedStmt(u16EffAddr = 0);
6387 }
6388
6389 /* Determine the registers involved. */
6390 uint8_t idxGstRegBase;
6391 uint8_t idxGstRegIndex;
6392 switch (bRm & X86_MODRM_RM_MASK)
6393 {
6394 case 0:
6395 idxGstRegBase = X86_GREG_xBX;
6396 idxGstRegIndex = X86_GREG_xSI;
6397 break;
6398 case 1:
6399 idxGstRegBase = X86_GREG_xBX;
6400 idxGstRegIndex = X86_GREG_xDI;
6401 break;
6402 case 2:
6403 idxGstRegBase = X86_GREG_xBP;
6404 idxGstRegIndex = X86_GREG_xSI;
6405 break;
6406 case 3:
6407 idxGstRegBase = X86_GREG_xBP;
6408 idxGstRegIndex = X86_GREG_xDI;
6409 break;
6410 case 4:
6411 idxGstRegBase = X86_GREG_xSI;
6412 idxGstRegIndex = UINT8_MAX;
6413 break;
6414 case 5:
6415 idxGstRegBase = X86_GREG_xDI;
6416 idxGstRegIndex = UINT8_MAX;
6417 break;
6418 case 6:
6419 idxGstRegBase = X86_GREG_xBP;
6420 idxGstRegIndex = UINT8_MAX;
6421 break;
6422#ifdef _MSC_VER /* lazy compiler, thinks idxGstRegBase and idxGstRegIndex may otherwise be used uninitialized. */
6423 default:
6424#endif
6425 case 7:
6426 idxGstRegBase = X86_GREG_xBX;
6427 idxGstRegIndex = UINT8_MAX;
6428 break;
6429 }
6430
6431 /*
6432 * Now emit code that calculates: idxRegRet = (uint16_t)(u16EffAddr + idxGstRegBase [+ idxGstRegIndex])
6433 */
6434 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
6435 uint8_t const idxRegBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
6436 kIemNativeGstRegUse_ReadOnly);
6437 uint8_t const idxRegIndex = idxGstRegIndex != UINT8_MAX
6438 ? iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
6439 kIemNativeGstRegUse_ReadOnly)
6440 : UINT8_MAX;
6441#ifdef RT_ARCH_AMD64
6442 if (idxRegIndex == UINT8_MAX)
6443 {
6444 if (u16EffAddr == 0)
6445 {
6446 /* movxz ret, base */
6447 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegRet, idxRegBase);
6448 }
6449 else
6450 {
6451 /* lea ret32, [base64 + disp32] */
6452 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
6453 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6454 if (idxRegRet >= 8 || idxRegBase >= 8)
6455 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
6456 pbCodeBuf[off++] = 0x8d;
6457 if (idxRegBase != X86_GREG_x12 /*SIB*/)
6458 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, idxRegBase & 7);
6459 else
6460 {
6461 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, 4 /*SIB*/);
6462 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
6463 }
6464 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
6465 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
6466 pbCodeBuf[off++] = 0;
6467 pbCodeBuf[off++] = 0;
6468 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6469
6470 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
6471 }
6472 }
6473 else
6474 {
6475 /* lea ret32, [index64 + base64 (+ disp32)] */
6476 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
6477 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6478 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
6479 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6480 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
6481 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
6482 pbCodeBuf[off++] = 0x8d;
6483 uint8_t const bMod = u16EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0 : X86_MOD_MEM4;
6484 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6485 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, 0);
6486 if (bMod == X86_MOD_MEM4)
6487 {
6488 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
6489 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
6490 pbCodeBuf[off++] = 0;
6491 pbCodeBuf[off++] = 0;
6492 }
6493 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6494 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
6495 }
6496
6497#elif defined(RT_ARCH_ARM64)
6498 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6499 if (u16EffAddr == 0)
6500 {
6501 if (idxRegIndex == UINT8_MAX)
6502 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegBase);
6503 else
6504 {
6505 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex, false /*f64Bit*/);
6506 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
6507 }
6508 }
6509 else
6510 {
6511 if ((int16_t)u16EffAddr < 4096 && (int16_t)u16EffAddr >= 0)
6512 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u16EffAddr, false /*f64Bit*/);
6513 else if ((int16_t)u16EffAddr > -4096 && (int16_t)u16EffAddr < 0)
6514 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
6515 (uint16_t)-(int16_t)u16EffAddr, false /*f64Bit*/);
6516 else
6517 {
6518 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegRet, u16EffAddr);
6519 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
6520 }
6521 if (idxRegIndex != UINT8_MAX)
6522 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex, false /*f64Bit*/);
6523 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
6524 }
6525
6526#else
6527# error "port me"
6528#endif
6529
6530 if (idxRegIndex != UINT8_MAX)
6531 iemNativeRegFreeTmp(pReNative, idxRegIndex);
6532 iemNativeRegFreeTmp(pReNative, idxRegBase);
6533 iemNativeVarRegisterRelease(pReNative, idxVarRet);
6534 return off;
6535}
6536
6537
6538#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_32(a_GCPtrEff, a_bRm, a_uSibAndRspOffset, a_u32Disp) \
6539 off = iemNativeEmitCalcRmEffAddrThreadedAddr32(pReNative, off, a_bRm, a_uSibAndRspOffset, a_u32Disp, a_GCPtrEff)
6540
6541/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_32.
6542 * @see iemOpHlpCalcRmEffAddrThreadedAddr32 */
6543DECL_INLINE_THROW(uint32_t)
6544iemNativeEmitCalcRmEffAddrThreadedAddr32(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6545 uint8_t bRm, uint32_t uSibAndRspOffset, uint32_t u32Disp, uint8_t idxVarRet)
6546{
6547 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
6548
6549 /*
6550 * Handle the disp32 form with no registers first.
6551 *
6552 * Convert to an immediate value, as that'll delay the register allocation
6553 * and assignment till the memory access / call / whatever and we can use
6554 * a more appropriate register (or none at all).
6555 */
6556 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
6557 {
6558 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32Disp);
6559 return off;
6560 }
6561
6562 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
6563 uint32_t u32EffAddr = 0;
6564 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
6565 {
6566 case 0: break;
6567 case 1: u32EffAddr = (int8_t)u32Disp; break;
6568 case 2: u32EffAddr = u32Disp; break;
6569 default: AssertFailed();
6570 }
6571
6572 /* Get the register (or SIB) value. */
6573 uint8_t idxGstRegBase = UINT8_MAX;
6574 uint8_t idxGstRegIndex = UINT8_MAX;
6575 uint8_t cShiftIndex = 0;
6576 switch (bRm & X86_MODRM_RM_MASK)
6577 {
6578 case 0: idxGstRegBase = X86_GREG_xAX; break;
6579 case 1: idxGstRegBase = X86_GREG_xCX; break;
6580 case 2: idxGstRegBase = X86_GREG_xDX; break;
6581 case 3: idxGstRegBase = X86_GREG_xBX; break;
6582 case 4: /* SIB */
6583 {
6584 /* index /w scaling . */
6585 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
6586 switch ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
6587 {
6588 case 0: idxGstRegIndex = X86_GREG_xAX; break;
6589 case 1: idxGstRegIndex = X86_GREG_xCX; break;
6590 case 2: idxGstRegIndex = X86_GREG_xDX; break;
6591 case 3: idxGstRegIndex = X86_GREG_xBX; break;
6592 case 4: cShiftIndex = 0; /*no index*/ break;
6593 case 5: idxGstRegIndex = X86_GREG_xBP; break;
6594 case 6: idxGstRegIndex = X86_GREG_xSI; break;
6595 case 7: idxGstRegIndex = X86_GREG_xDI; break;
6596 }
6597
6598 /* base */
6599 switch (uSibAndRspOffset & X86_SIB_BASE_MASK)
6600 {
6601 case 0: idxGstRegBase = X86_GREG_xAX; break;
6602 case 1: idxGstRegBase = X86_GREG_xCX; break;
6603 case 2: idxGstRegBase = X86_GREG_xDX; break;
6604 case 3: idxGstRegBase = X86_GREG_xBX; break;
6605 case 4:
6606 idxGstRegBase = X86_GREG_xSP;
6607 u32EffAddr += uSibAndRspOffset >> 8;
6608 break;
6609 case 5:
6610 if ((bRm & X86_MODRM_MOD_MASK) != 0)
6611 idxGstRegBase = X86_GREG_xBP;
6612 else
6613 {
6614 Assert(u32EffAddr == 0);
6615 u32EffAddr = u32Disp;
6616 }
6617 break;
6618 case 6: idxGstRegBase = X86_GREG_xSI; break;
6619 case 7: idxGstRegBase = X86_GREG_xDI; break;
6620 }
6621 break;
6622 }
6623 case 5: idxGstRegBase = X86_GREG_xBP; break;
6624 case 6: idxGstRegBase = X86_GREG_xSI; break;
6625 case 7: idxGstRegBase = X86_GREG_xDI; break;
6626 }
6627
6628 /*
6629 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
6630 * the start of the function.
6631 */
6632 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
6633 {
6634 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32EffAddr);
6635 return off;
6636 }
6637
6638 /*
6639 * Now emit code that calculates: idxRegRet = (uint32_t)(u32EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
6640 */
6641 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
6642 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
6643 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
6644 kIemNativeGstRegUse_ReadOnly);
6645 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
6646 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
6647 kIemNativeGstRegUse_ReadOnly);
6648
6649 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
6650 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
6651 {
6652 idxRegBase = idxRegIndex;
6653 idxRegIndex = UINT8_MAX;
6654 }
6655
6656#ifdef RT_ARCH_AMD64
6657 if (idxRegIndex == UINT8_MAX)
6658 {
6659 if (u32EffAddr == 0)
6660 {
6661 /* mov ret, base */
6662 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
6663 }
6664 else
6665 {
6666 /* lea ret32, [base64 + disp32] */
6667 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
6668 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6669 if (idxRegRet >= 8 || idxRegBase >= 8)
6670 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
6671 pbCodeBuf[off++] = 0x8d;
6672 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
6673 if (idxRegBase != X86_GREG_x12 /*SIB*/)
6674 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
6675 else
6676 {
6677 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6678 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
6679 }
6680 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6681 if (bMod == X86_MOD_MEM4)
6682 {
6683 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6684 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6685 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6686 }
6687 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6688 }
6689 }
6690 else
6691 {
6692 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
6693 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6694 if (idxRegBase == UINT8_MAX)
6695 {
6696 /* lea ret32, [(index64 << cShiftIndex) + disp32] */
6697 if (idxRegRet >= 8 || idxRegIndex >= 8)
6698 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6699 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
6700 pbCodeBuf[off++] = 0x8d;
6701 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
6702 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
6703 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6704 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6705 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6706 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6707 }
6708 else
6709 {
6710 /* lea ret32, [(index64 << cShiftIndex) + base64 (+ disp32)] */
6711 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
6712 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6713 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
6714 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
6715 pbCodeBuf[off++] = 0x8d;
6716 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
6717 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
6718 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6719 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
6720 if (bMod != X86_MOD_MEM0)
6721 {
6722 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6723 if (bMod == X86_MOD_MEM4)
6724 {
6725 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6726 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6727 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6728 }
6729 }
6730 }
6731 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6732 }
6733
6734#elif defined(RT_ARCH_ARM64)
6735 if (u32EffAddr == 0)
6736 {
6737 if (idxRegIndex == UINT8_MAX)
6738 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
6739 else if (idxRegBase == UINT8_MAX)
6740 {
6741 if (cShiftIndex == 0)
6742 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegIndex);
6743 else
6744 {
6745 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6746 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, false /*f64Bit*/);
6747 }
6748 }
6749 else
6750 {
6751 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6752 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
6753 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
6754 }
6755 }
6756 else
6757 {
6758 if ((int32_t)u32EffAddr < 4096 && (int32_t)u32EffAddr >= 0 && idxRegBase != UINT8_MAX)
6759 {
6760 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6761 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u32EffAddr, false /*f64Bit*/);
6762 }
6763 else if ((int32_t)u32EffAddr > -4096 && (int32_t)u32EffAddr < 0 && idxRegBase != UINT8_MAX)
6764 {
6765 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6766 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
6767 (uint32_t)-(int32_t)u32EffAddr, false /*f64Bit*/);
6768 }
6769 else
6770 {
6771 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, u32EffAddr);
6772 if (idxRegBase != UINT8_MAX)
6773 {
6774 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6775 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
6776 }
6777 }
6778 if (idxRegIndex != UINT8_MAX)
6779 {
6780 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6781 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
6782 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
6783 }
6784 }
6785
6786#else
6787# error "port me"
6788#endif
6789
6790 if (idxRegIndex != UINT8_MAX)
6791 iemNativeRegFreeTmp(pReNative, idxRegIndex);
6792 if (idxRegBase != UINT8_MAX)
6793 iemNativeRegFreeTmp(pReNative, idxRegBase);
6794 iemNativeVarRegisterRelease(pReNative, idxVarRet);
6795 return off;
6796}
6797
6798
6799#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
6800 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
6801 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
6802
6803#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_FSGS(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
6804 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
6805 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
6806
6807#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_ADDR32(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
6808 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
6809 a_u32Disp, a_cbImm, a_GCPtrEff, false /*f64Bit*/)
6810
6811/**
6812 * Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_64*.
6813 *
6814 * @returns New off.
6815 * @param pReNative .
6816 * @param off .
6817 * @param bRmEx The ModRM byte but with bit 3 set to REX.B and
6818 * bit 4 to REX.X. The two bits are part of the
6819 * REG sub-field, which isn't needed in this
6820 * function.
6821 * @param uSibAndRspOffset Two parts:
6822 * - The first 8 bits make up the SIB byte.
6823 * - The next 8 bits are the fixed RSP/ESP offset
6824 * in case of a pop [xSP].
6825 * @param u32Disp The displacement byte/word/dword, if any.
6826 * @param cbInstr The size of the fully decoded instruction. Used
6827 * for RIP relative addressing.
6828 * @param idxVarRet The result variable number.
6829 * @param f64Bit Whether to use a 64-bit or 32-bit address size
6830 * when calculating the address.
6831 *
6832 * @see iemOpHlpCalcRmEffAddrThreadedAddr64
6833 */
6834DECL_INLINE_THROW(uint32_t)
6835iemNativeEmitCalcRmEffAddrThreadedAddr64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bRmEx, uint32_t uSibAndRspOffset,
6836 uint32_t u32Disp, uint8_t cbInstr, uint8_t idxVarRet, bool f64Bit)
6837{
6838 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
6839
6840 /*
6841 * Special case the rip + disp32 form first.
6842 */
6843 if ((bRmEx & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
6844 {
6845 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
6846 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
6847 kIemNativeGstRegUse_ReadOnly);
6848 if (f64Bit)
6849 {
6850#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6851 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr + (int64_t)pReNative->Core.offPc;
6852#else
6853 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr;
6854#endif
6855#ifdef RT_ARCH_AMD64
6856 if ((int32_t)offFinalDisp == offFinalDisp)
6857 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)offFinalDisp);
6858 else
6859 {
6860 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp);
6861 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, cbInstr);
6862 }
6863#else
6864 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, offFinalDisp);
6865#endif
6866 }
6867 else
6868 {
6869# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6870 int32_t const offFinalDisp = (int32_t)u32Disp + cbInstr + (int32_t)pReNative->Core.offPc;
6871# else
6872 int32_t const offFinalDisp = (int32_t)u32Disp + cbInstr;
6873# endif
6874 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, offFinalDisp);
6875 }
6876 iemNativeRegFreeTmp(pReNative, idxRegPc);
6877 iemNativeVarRegisterRelease(pReNative, idxVarRet);
6878 return off;
6879 }
6880
6881 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
6882 int64_t i64EffAddr = 0;
6883 switch ((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
6884 {
6885 case 0: break;
6886 case 1: i64EffAddr = (int8_t)u32Disp; break;
6887 case 2: i64EffAddr = (int32_t)u32Disp; break;
6888 default: AssertFailed();
6889 }
6890
6891 /* Get the register (or SIB) value. */
6892 uint8_t idxGstRegBase = UINT8_MAX;
6893 uint8_t idxGstRegIndex = UINT8_MAX;
6894 uint8_t cShiftIndex = 0;
6895 if ((bRmEx & X86_MODRM_RM_MASK) != 4)
6896 idxGstRegBase = bRmEx & (X86_MODRM_RM_MASK | 0x8); /* bRmEx[bit 3] = REX.B */
6897 else /* SIB: */
6898 {
6899 /* index /w scaling . */
6900 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
6901 idxGstRegIndex = ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
6902 | ((bRmEx & 0x10) >> 1); /* bRmEx[bit 4] = REX.X */
6903 if (idxGstRegIndex == 4)
6904 {
6905 /* no index */
6906 cShiftIndex = 0;
6907 idxGstRegIndex = UINT8_MAX;
6908 }
6909
6910 /* base */
6911 idxGstRegBase = (uSibAndRspOffset & X86_SIB_BASE_MASK) | (bRmEx & 0x8); /* bRmEx[bit 3] = REX.B */
6912 if (idxGstRegBase == 4)
6913 {
6914 /* pop [rsp] hack */
6915 i64EffAddr += uSibAndRspOffset >> 8; /* (this is why i64EffAddr must be 64-bit) */
6916 }
6917 else if ( (idxGstRegBase & X86_SIB_BASE_MASK) == 5
6918 && (bRmEx & X86_MODRM_MOD_MASK) == 0)
6919 {
6920 /* mod=0 and base=5 -> disp32, no base reg. */
6921 Assert(i64EffAddr == 0);
6922 i64EffAddr = (int32_t)u32Disp;
6923 idxGstRegBase = UINT8_MAX;
6924 }
6925 }
6926
6927 /*
6928 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
6929 * the start of the function.
6930 */
6931 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
6932 {
6933 if (f64Bit)
6934 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint64_t)i64EffAddr);
6935 else
6936 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint32_t)i64EffAddr);
6937 return off;
6938 }
6939
6940 /*
6941 * Now emit code that calculates:
6942 * idxRegRet = (uint64_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
6943 * or if !f64Bit:
6944 * idxRegRet = (uint32_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
6945 */
6946 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
6947 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
6948 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
6949 kIemNativeGstRegUse_ReadOnly);
6950 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
6951 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
6952 kIemNativeGstRegUse_ReadOnly);
6953
6954 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
6955 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
6956 {
6957 idxRegBase = idxRegIndex;
6958 idxRegIndex = UINT8_MAX;
6959 }
6960
6961#ifdef RT_ARCH_AMD64
6962 uint8_t bFinalAdj;
6963 if (!f64Bit || (int32_t)i64EffAddr == i64EffAddr)
6964 bFinalAdj = 0; /* likely */
6965 else
6966 {
6967 /* pop [rsp] with a problematic disp32 value. Split out the
6968 RSP offset and add it separately afterwards (bFinalAdj). */
6969 /** @todo testcase: pop [rsp] with problematic disp32 (mod4). */
6970 Assert(idxGstRegBase == X86_GREG_xSP);
6971 Assert(((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK) == X86_MOD_MEM4);
6972 bFinalAdj = (uint8_t)(uSibAndRspOffset >> 8);
6973 Assert(bFinalAdj != 0);
6974 i64EffAddr -= bFinalAdj;
6975 Assert((int32_t)i64EffAddr == i64EffAddr);
6976 }
6977 uint32_t const u32EffAddr = (uint32_t)i64EffAddr;
6978//pReNative->pInstrBuf[off++] = 0xcc;
6979
6980 if (idxRegIndex == UINT8_MAX)
6981 {
6982 if (u32EffAddr == 0)
6983 {
6984 /* mov ret, base */
6985 if (f64Bit)
6986 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRet, idxRegBase);
6987 else
6988 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
6989 }
6990 else
6991 {
6992 /* lea ret, [base + disp32] */
6993 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
6994 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6995 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8)
6996 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6997 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
6998 | (f64Bit ? X86_OP_REX_W : 0);
6999 pbCodeBuf[off++] = 0x8d;
7000 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
7001 if (idxRegBase != X86_GREG_x12 /*SIB*/)
7002 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
7003 else
7004 {
7005 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
7006 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
7007 }
7008 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
7009 if (bMod == X86_MOD_MEM4)
7010 {
7011 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
7012 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
7013 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
7014 }
7015 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7016 }
7017 }
7018 else
7019 {
7020 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
7021 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
7022 if (idxRegBase == UINT8_MAX)
7023 {
7024 /* lea ret, [(index64 << cShiftIndex) + disp32] */
7025 if (f64Bit || idxRegRet >= 8 || idxRegIndex >= 8)
7026 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
7027 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
7028 | (f64Bit ? X86_OP_REX_W : 0);
7029 pbCodeBuf[off++] = 0x8d;
7030 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
7031 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
7032 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
7033 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
7034 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
7035 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
7036 }
7037 else
7038 {
7039 /* lea ret, [(index64 << cShiftIndex) + base64 (+ disp32)] */
7040 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
7041 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
7042 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
7043 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
7044 | (f64Bit ? X86_OP_REX_W : 0);
7045 pbCodeBuf[off++] = 0x8d;
7046 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
7047 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
7048 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
7049 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
7050 if (bMod != X86_MOD_MEM0)
7051 {
7052 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
7053 if (bMod == X86_MOD_MEM4)
7054 {
7055 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
7056 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
7057 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
7058 }
7059 }
7060 }
7061 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7062 }
7063
7064 if (!bFinalAdj)
7065 { /* likely */ }
7066 else
7067 {
7068 Assert(f64Bit);
7069 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, bFinalAdj);
7070 }
7071
7072#elif defined(RT_ARCH_ARM64)
7073 if (i64EffAddr == 0)
7074 {
7075 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7076 if (idxRegIndex == UINT8_MAX)
7077 pu32CodeBuf[off++] = Armv8A64MkInstrMov(idxRegRet, idxRegBase, f64Bit);
7078 else if (idxRegBase != UINT8_MAX)
7079 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
7080 f64Bit, false /*fSetFlags*/, cShiftIndex);
7081 else
7082 {
7083 Assert(cShiftIndex != 0); /* See base = index swap above when shift is 0 and we have no base reg. */
7084 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, f64Bit);
7085 }
7086 }
7087 else
7088 {
7089 if (f64Bit)
7090 { /* likely */ }
7091 else
7092 i64EffAddr = (int32_t)i64EffAddr;
7093
7094 if (i64EffAddr < 4096 && i64EffAddr >= 0 && idxRegBase != UINT8_MAX)
7095 {
7096 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7097 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, i64EffAddr, f64Bit);
7098 }
7099 else if (i64EffAddr > -4096 && i64EffAddr < 0 && idxRegBase != UINT8_MAX)
7100 {
7101 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7102 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase, (uint32_t)-i64EffAddr, f64Bit);
7103 }
7104 else
7105 {
7106 if (f64Bit)
7107 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, i64EffAddr);
7108 else
7109 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, (uint32_t)i64EffAddr);
7110 if (idxRegBase != UINT8_MAX)
7111 {
7112 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7113 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, f64Bit);
7114 }
7115 }
7116 if (idxRegIndex != UINT8_MAX)
7117 {
7118 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7119 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
7120 f64Bit, false /*fSetFlags*/, cShiftIndex);
7121 }
7122 }
7123
7124#else
7125# error "port me"
7126#endif
7127
7128 if (idxRegIndex != UINT8_MAX)
7129 iemNativeRegFreeTmp(pReNative, idxRegIndex);
7130 if (idxRegBase != UINT8_MAX)
7131 iemNativeRegFreeTmp(pReNative, idxRegBase);
7132 iemNativeVarRegisterRelease(pReNative, idxVarRet);
7133 return off;
7134}
7135
7136
7137/*********************************************************************************************************************************
7138* Memory fetches and stores common *
7139*********************************************************************************************************************************/
7140
7141typedef enum IEMNATIVEMITMEMOP
7142{
7143 kIemNativeEmitMemOp_Store = 0,
7144 kIemNativeEmitMemOp_Fetch,
7145 kIemNativeEmitMemOp_Fetch_Zx_U16,
7146 kIemNativeEmitMemOp_Fetch_Zx_U32,
7147 kIemNativeEmitMemOp_Fetch_Zx_U64,
7148 kIemNativeEmitMemOp_Fetch_Sx_U16,
7149 kIemNativeEmitMemOp_Fetch_Sx_U32,
7150 kIemNativeEmitMemOp_Fetch_Sx_U64
7151} IEMNATIVEMITMEMOP;
7152
7153/** Emits code for IEM_MC_FETCH_MEM_U8/16/32/64 and IEM_MC_STORE_MEM_U8/16/32/64,
7154 * and IEM_MC_FETCH_MEM_FLAT_U8/16/32/64 and IEM_MC_STORE_MEM_FLAT_U8/16/32/64
7155 * (with iSegReg = UINT8_MAX). */
7156template<uint8_t const a_cbMem, uint32_t const a_fAlignMaskAndCtl, IEMNATIVEMITMEMOP const a_enmOp, bool a_fFlat = false>
7157DECL_INLINE_THROW(uint32_t)
7158iemNativeEmitMemFetchStoreDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue, uint8_t iSegReg,
7159 uint8_t idxVarGCPtrMem, uintptr_t pfnFunction, uint8_t idxInstr, uint8_t offDisp = 0)
7160{
7161 /*
7162 * Assert sanity.
7163 */
7164 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
7165 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
7166 Assert( a_enmOp != kIemNativeEmitMemOp_Store
7167 || pVarValue->enmKind == kIemNativeVarKind_Immediate
7168 || pVarValue->enmKind == kIemNativeVarKind_Stack);
7169 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
7170 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
7171 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
7172 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
7173 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7174 Assert(!a_fFlat ? iSegReg < 6 : iSegReg == UINT8_MAX);
7175#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7176 AssertCompile( a_cbMem == 1 || a_cbMem == 2 || a_cbMem == 4 || a_cbMem == 8
7177 || a_cbMem == sizeof(RTUINT128U) || a_cbMem == sizeof(RTUINT256U));
7178#else
7179 AssertCompile(a_cbMem == 1 || a_cbMem == 2 || a_cbMem == 4 || a_cbMem == 8);
7180#endif
7181 AssertCompile(!(a_fAlignMaskAndCtl & ~(UINT32_C(0xff) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE)));
7182 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
7183#ifdef VBOX_STRICT
7184 if (iSegReg == UINT8_MAX)
7185 {
7186 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
7187 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
7188 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
7189 switch (a_cbMem)
7190 {
7191 case 1:
7192 Assert( pfnFunction
7193 == ( a_enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU8
7194 : a_enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
7195 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
7196 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
7197 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
7198 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16
7199 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32
7200 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64
7201 : UINT64_C(0xc000b000a0009000) ));
7202 Assert(!a_fAlignMaskAndCtl);
7203 break;
7204 case 2:
7205 Assert( pfnFunction
7206 == ( a_enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU16
7207 : a_enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
7208 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
7209 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
7210 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32
7211 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64
7212 : UINT64_C(0xc000b000a0009000) ));
7213 Assert(a_fAlignMaskAndCtl <= 1);
7214 break;
7215 case 4:
7216 Assert( pfnFunction
7217 == ( a_enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU32
7218 : a_enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
7219 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
7220 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64
7221 : UINT64_C(0xc000b000a0009000) ));
7222 Assert(a_fAlignMaskAndCtl <= 3);
7223 break;
7224 case 8:
7225 Assert( pfnFunction
7226 == ( a_enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU64
7227 : a_enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU64
7228 : UINT64_C(0xc000b000a0009000) ));
7229 Assert(a_fAlignMaskAndCtl <= 7);
7230 break;
7231#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7232 case sizeof(RTUINT128U):
7233 Assert( ( a_enmOp == kIemNativeEmitMemOp_Fetch
7234 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128
7235 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse
7236 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc))
7237 || ( a_enmOp == kIemNativeEmitMemOp_Store
7238 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse
7239 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128NoAc)));
7240 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse
7241 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse
7242 ? (a_fAlignMaskAndCtl & (IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE)) && (a_fAlignMaskAndCtl & 0xff) == 15
7243 : a_fAlignMaskAndCtl <= 15U);
7244 break;
7245 case sizeof(RTUINT256U):
7246 Assert( ( a_enmOp == kIemNativeEmitMemOp_Fetch
7247 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc
7248 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx))
7249 || ( a_enmOp == kIemNativeEmitMemOp_Store
7250 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256NoAc
7251 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx)));
7252 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx
7253 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx
7254 ? (a_fAlignMaskAndCtl & IEM_MEMMAP_F_ALIGN_GP) && (a_fAlignMaskAndCtl & 0xff) == 31
7255 : a_fAlignMaskAndCtl <= 31);
7256 break;
7257#endif
7258 }
7259 }
7260 else
7261 {
7262 Assert(iSegReg < 6);
7263 switch (a_cbMem)
7264 {
7265 case 1:
7266 Assert( pfnFunction
7267 == ( a_enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU8
7268 : a_enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU8
7269 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8
7270 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8
7271 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8
7272 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16
7273 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32
7274 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64
7275 : UINT64_C(0xc000b000a0009000) ));
7276 Assert(!a_fAlignMaskAndCtl);
7277 break;
7278 case 2:
7279 Assert( pfnFunction
7280 == ( a_enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU16
7281 : a_enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU16
7282 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16
7283 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16
7284 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32
7285 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64
7286 : UINT64_C(0xc000b000a0009000) ));
7287 Assert(a_fAlignMaskAndCtl <= 1);
7288 break;
7289 case 4:
7290 Assert( pfnFunction
7291 == ( a_enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU32
7292 : a_enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU32
7293 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32
7294 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64
7295 : UINT64_C(0xc000b000a0009000) ));
7296 Assert(a_fAlignMaskAndCtl <= 3);
7297 break;
7298 case 8:
7299 Assert( pfnFunction
7300 == ( a_enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU64
7301 : a_enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU64
7302 : UINT64_C(0xc000b000a0009000) ));
7303 Assert(a_fAlignMaskAndCtl <= 7);
7304 break;
7305#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7306 case sizeof(RTUINT128U):
7307 Assert( ( a_enmOp == kIemNativeEmitMemOp_Fetch
7308 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128
7309 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse
7310 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128NoAc))
7311 || ( a_enmOp == kIemNativeEmitMemOp_Store
7312 && ( pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse
7313 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128NoAc)));
7314 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse
7315 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse
7316 ? (a_fAlignMaskAndCtl & (IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE)) && (a_fAlignMaskAndCtl & 0xff) == 15
7317 : a_fAlignMaskAndCtl <= 15);
7318 break;
7319 case sizeof(RTUINT256U):
7320 Assert( ( a_enmOp == kIemNativeEmitMemOp_Fetch
7321 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256NoAc
7322 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx))
7323 || ( a_enmOp == kIemNativeEmitMemOp_Store
7324 && ( pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256NoAc
7325 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx)));
7326 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx
7327 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx
7328 ? (a_fAlignMaskAndCtl & IEM_MEMMAP_F_ALIGN_GP) && (a_fAlignMaskAndCtl & 0xff) == 31
7329 : a_fAlignMaskAndCtl <= 31);
7330 break;
7331#endif
7332 }
7333 }
7334#endif
7335
7336#ifdef VBOX_STRICT
7337 /*
7338 * Check that the fExec flags we've got make sense.
7339 */
7340 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
7341#endif
7342
7343 /*
7344 * To keep things simple we have to commit any pending writes first as we
7345 * may end up making calls.
7346 */
7347 /** @todo we could postpone this till we make the call and reload the
7348 * registers after returning from the call. Not sure if that's sensible or
7349 * not, though. */
7350#ifndef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7351 off = iemNativeRegFlushPendingWrites(pReNative, off);
7352#else
7353 /* The program counter is treated differently for now. */
7354 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc));
7355#endif
7356
7357#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7358 /*
7359 * Move/spill/flush stuff out of call-volatile registers.
7360 * This is the easy way out. We could contain this to the tlb-miss branch
7361 * by saving and restoring active stuff here.
7362 */
7363 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
7364#endif
7365
7366 /*
7367 * Define labels and allocate the result register (trying for the return
7368 * register if we can).
7369 */
7370 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
7371#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7372 uint8_t idxRegValueFetch;
7373 if RT_CONSTEXPR_IF(a_cbMem == sizeof(RTUINT128U) || a_cbMem == sizeof(RTUINT256U))
7374 idxRegValueFetch = a_enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
7375 : iemNativeVarSimdRegisterAcquire(pReNative, idxVarValue, &off);
7376 else
7377 idxRegValueFetch = a_enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
7378 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
7379 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
7380 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
7381#else
7382 uint8_t const idxRegValueFetch = a_enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
7383 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
7384 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
7385 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
7386#endif
7387 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, a_cbMem, offDisp);
7388
7389#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7390 uint8_t idxRegValueStore = UINT8_MAX;
7391
7392 if RT_CONSTEXPR_IF(a_cbMem == sizeof(RTUINT128U) || a_cbMem == sizeof(RTUINT256U))
7393 idxRegValueStore = a_enmOp == kIemNativeEmitMemOp_Store
7394 && !TlbState.fSkip
7395 && pVarValue->enmKind != kIemNativeVarKind_Immediate
7396 ? iemNativeVarSimdRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
7397 : UINT8_MAX;
7398 else
7399 idxRegValueStore = a_enmOp == kIemNativeEmitMemOp_Store
7400 && !TlbState.fSkip
7401 && pVarValue->enmKind != kIemNativeVarKind_Immediate
7402 ? iemNativeVarRegisterAcquireInited(pReNative, idxVarValue, &off)
7403 : UINT8_MAX;
7404
7405#else
7406 uint8_t const idxRegValueStore = a_enmOp == kIemNativeEmitMemOp_Store
7407 && !TlbState.fSkip
7408 && pVarValue->enmKind != kIemNativeVarKind_Immediate
7409 ? iemNativeVarRegisterAcquireInited(pReNative, idxVarValue, &off)
7410 : UINT8_MAX;
7411#endif
7412 uint32_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
7413 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
7414 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
7415 : UINT32_MAX;
7416
7417 /*
7418 * Jump to the TLB lookup code.
7419 */
7420 if (!TlbState.fSkip)
7421 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
7422
7423 /*
7424 * TlbMiss:
7425 *
7426 * Call helper to do the fetching.
7427 * We flush all guest register shadow copies here.
7428 */
7429 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
7430
7431#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7432 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7433#else
7434 RT_NOREF(idxInstr);
7435#endif
7436
7437#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7438 if (pReNative->Core.offPc)
7439 {
7440 /*
7441 * Update the program counter but restore it at the end of the TlbMiss branch.
7442 * This should allow delaying more program counter updates for the TlbLookup and hit paths
7443 * which are hopefully much more frequent, reducing the amount of memory accesses.
7444 */
7445 /* Allocate a temporary PC register. */
7446/** @todo r=bird: This would technically need to be done up front as it's a register allocation. */
7447 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
7448 kIemNativeGstRegUse_ForUpdate);
7449
7450 /* Perform the addition and store the result. */
7451 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
7452 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7453# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
7454 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
7455# endif
7456
7457 /* Free and flush the PC register. */
7458 iemNativeRegFreeTmp(pReNative, idxPcReg);
7459 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
7460 }
7461#endif
7462
7463#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7464 /* Save variables in volatile registers. */
7465 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
7466 | (idxRegMemResult != UINT8_MAX ? RT_BIT_32(idxRegMemResult) : 0)
7467 | (idxRegValueFetch != UINT8_MAX ? RT_BIT_32(idxRegValueFetch) : 0);
7468 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
7469#endif
7470
7471 /* IEMNATIVE_CALL_ARG2/3_GREG = uValue (idxVarValue) - if store */
7472 uint32_t fVolGregMask = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
7473#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7474 if RT_CONSTEXPR_IF(a_cbMem == sizeof(RTUINT128U) || a_cbMem == sizeof(RTUINT256U))
7475 {
7476 /*
7477 * For SIMD based variables we pass the reference on the stack for both fetches and stores.
7478 *
7479 * Note! There was a register variable assigned to the variable for the TlbLookup case above
7480 * which must not be freed or the value loaded into the register will not be synced into the register
7481 * further down the road because the variable doesn't know it had a variable assigned.
7482 *
7483 * Note! For loads it is not required to sync what is in the assigned register with the stack slot
7484 * as it will be overwritten anyway.
7485 */
7486 uint8_t const idxRegArgValue = a_fFlat ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
7487 off = iemNativeEmitLoadArgGregWithSimdVarAddrForMemAccess(pReNative, off, idxRegArgValue, idxVarValue,
7488 a_enmOp == kIemNativeEmitMemOp_Store /*fSyncRegWithStack*/);
7489 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
7490 }
7491 else
7492#endif
7493 if RT_CONSTEXPR_IF(a_enmOp == kIemNativeEmitMemOp_Store)
7494 {
7495 uint8_t const idxRegArgValue = a_fFlat ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
7496 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, idxRegArgValue, idxVarValue, 0 /*cbAppend*/,
7497#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7498 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
7499#else
7500 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
7501 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
7502#endif
7503 }
7504
7505 /* IEMNATIVE_CALL_ARG1_GREG = GCPtrMem */
7506 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarGCPtrMem, offDisp /*cbAppend*/,
7507#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7508 fVolGregMask);
7509#else
7510 fVolGregMask, true /*fSpilledVarsInvolatileRegs*/);
7511#endif
7512
7513 if RT_CONSTEXPR_IF(!a_fFlat)
7514 {
7515 /* IEMNATIVE_CALL_ARG2_GREG = iSegReg */
7516 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
7517 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, iSegReg);
7518 }
7519
7520#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
7521 /* Do delayed EFLAGS calculations. */
7522 if RT_CONSTEXPR_IF(a_enmOp == kIemNativeEmitMemOp_Store || a_cbMem == sizeof(RTUINT128U) || a_cbMem == sizeof(RTUINT256U))
7523 {
7524 if RT_CONSTEXPR_IF(a_fFlat)
7525 off = iemNativeDoPostponedEFlagsAtTlbMiss< RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG)
7526 | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)>(pReNative, off, &TlbState,
7527 fHstRegsNotToSave);
7528 else
7529 off = iemNativeDoPostponedEFlagsAtTlbMiss< RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG)
7530 | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
7531 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG)>(pReNative, off, &TlbState,
7532 fHstRegsNotToSave);
7533 }
7534 else if RT_CONSTEXPR_IF(a_fFlat)
7535 off = iemNativeDoPostponedEFlagsAtTlbMiss< RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG)>(pReNative, off, &TlbState,
7536 fHstRegsNotToSave);
7537 else
7538 off = iemNativeDoPostponedEFlagsAtTlbMiss< RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG)
7539 | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)>(pReNative, off, &TlbState,
7540 fHstRegsNotToSave);
7541#endif
7542
7543 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
7544 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7545
7546 /* Done setting up parameters, make the call. */
7547 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnFunction);
7548
7549 /*
7550 * Put the result in the right register if this is a fetch.
7551 */
7552 if RT_CONSTEXPR_IF(a_enmOp != kIemNativeEmitMemOp_Store)
7553 {
7554#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7555 if RT_CONSTEXPR_IF(a_cbMem == sizeof(RTUINT128U) || a_cbMem == sizeof(RTUINT256U))
7556 {
7557 Assert(a_enmOp == kIemNativeEmitMemOp_Fetch);
7558
7559 /* Sync the value on the stack with the host register assigned to the variable. */
7560 off = iemNativeEmitSimdVarSyncStackToRegister(pReNative, off, idxVarValue);
7561 }
7562 else
7563#endif
7564 {
7565 Assert(idxRegValueFetch == pVarValue->idxReg);
7566 if (idxRegValueFetch != IEMNATIVE_CALL_RET_GREG)
7567 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegValueFetch, IEMNATIVE_CALL_RET_GREG);
7568 }
7569 }
7570
7571#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7572 /* Restore variables and guest shadow registers to volatile registers. */
7573 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
7574 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
7575#endif
7576
7577#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7578 if (pReNative->Core.offPc)
7579 {
7580 /*
7581 * Time to restore the program counter to its original value.
7582 */
7583 /* Allocate a temporary PC register. */
7584 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
7585 kIemNativeGstRegUse_ForUpdate);
7586
7587 /* Restore the original value. */
7588 off = iemNativeEmitSubGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
7589 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7590
7591 /* Free and flush the PC register. */
7592 iemNativeRegFreeTmp(pReNative, idxPcReg);
7593 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
7594 }
7595#endif
7596
7597#ifdef IEMNATIVE_WITH_TLB_LOOKUP
7598 if (!TlbState.fSkip)
7599 {
7600 /* end of TlbMiss - Jump to the done label. */
7601 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
7602 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
7603
7604 /*
7605 * TlbLookup:
7606 */
7607 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, a_cbMem, a_fAlignMaskAndCtl,
7608 a_enmOp == kIemNativeEmitMemOp_Store ? IEM_ACCESS_TYPE_WRITE : IEM_ACCESS_TYPE_READ,
7609 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult, offDisp);
7610
7611 /*
7612 * Emit code to do the actual storing / fetching.
7613 */
7614 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
7615# ifdef IEM_WITH_TLB_STATISTICS
7616 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
7617 a_enmOp == kIemNativeEmitMemOp_Store
7618 ? RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForFetch)
7619 : RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStore));
7620# endif
7621 switch (a_enmOp)
7622 {
7623 case kIemNativeEmitMemOp_Store:
7624 if (pVarValue->enmKind != kIemNativeVarKind_Immediate)
7625 {
7626 switch (a_cbMem)
7627 {
7628 case 1:
7629 off = iemNativeEmitStoreGpr8ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7630 break;
7631 case 2:
7632 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7633 break;
7634 case 4:
7635 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7636 break;
7637 case 8:
7638 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7639 break;
7640#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7641 case sizeof(RTUINT128U):
7642 off = iemNativeEmitStoreVecRegByGprU128Ex(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7643 break;
7644 case sizeof(RTUINT256U):
7645 off = iemNativeEmitStoreVecRegByGprU256Ex(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7646 break;
7647#endif
7648 default:
7649 AssertFailed();
7650 }
7651 }
7652 else
7653 {
7654 switch (a_cbMem)
7655 {
7656 case 1:
7657 off = iemNativeEmitStoreImm8ByGprEx(pCodeBuf, off, (uint8_t)pVarValue->u.uValue,
7658 idxRegMemResult, TlbState.idxReg1);
7659 break;
7660 case 2:
7661 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
7662 idxRegMemResult, TlbState.idxReg1);
7663 break;
7664 case 4:
7665 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
7666 idxRegMemResult, TlbState.idxReg1);
7667 break;
7668 case 8:
7669 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue,
7670 idxRegMemResult, TlbState.idxReg1);
7671 break;
7672 default:
7673 AssertFailed();
7674 }
7675 }
7676 break;
7677
7678 case kIemNativeEmitMemOp_Fetch:
7679 case kIemNativeEmitMemOp_Fetch_Zx_U16:
7680 case kIemNativeEmitMemOp_Fetch_Zx_U32:
7681 case kIemNativeEmitMemOp_Fetch_Zx_U64:
7682 switch (a_cbMem)
7683 {
7684 case 1:
7685 off = iemNativeEmitLoadGprByGprU8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7686 break;
7687 case 2:
7688 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7689 break;
7690 case 4:
7691 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7692 break;
7693 case 8:
7694 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7695 break;
7696#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7697 case sizeof(RTUINT128U):
7698 /*
7699 * No need to sync back the register with the stack, this is done by the generic variable handling
7700 * code if there is a register assigned to a variable and the stack must be accessed.
7701 */
7702 off = iemNativeEmitLoadVecRegByGprU128Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7703 break;
7704 case sizeof(RTUINT256U):
7705 /*
7706 * No need to sync back the register with the stack, this is done by the generic variable handling
7707 * code if there is a register assigned to a variable and the stack must be accessed.
7708 */
7709 off = iemNativeEmitLoadVecRegByGprU256Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7710 break;
7711#endif
7712 default:
7713 AssertFailed();
7714 }
7715 break;
7716
7717 case kIemNativeEmitMemOp_Fetch_Sx_U16:
7718 Assert(a_cbMem == 1);
7719 off = iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7720 break;
7721
7722 case kIemNativeEmitMemOp_Fetch_Sx_U32:
7723 Assert(a_cbMem == 1 || a_cbMem == 2);
7724 if (a_cbMem == 1)
7725 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7726 else
7727 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7728 break;
7729
7730 case kIemNativeEmitMemOp_Fetch_Sx_U64:
7731 switch (a_cbMem)
7732 {
7733 case 1:
7734 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7735 break;
7736 case 2:
7737 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7738 break;
7739 case 4:
7740 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7741 break;
7742 default:
7743 AssertFailed();
7744 }
7745 break;
7746
7747 default:
7748 AssertFailed();
7749 }
7750
7751 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
7752
7753 /*
7754 * TlbDone:
7755 */
7756 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
7757
7758 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
7759
7760# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7761 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
7762 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
7763# endif
7764 }
7765#else
7766 RT_NOREF(idxLabelTlbMiss);
7767#endif
7768
7769 if (idxRegValueFetch != UINT8_MAX || idxRegValueStore != UINT8_MAX)
7770 iemNativeVarRegisterRelease(pReNative, idxVarValue);
7771 return off;
7772}
7773
7774
7775
7776/*********************************************************************************************************************************
7777* Memory fetches (IEM_MEM_FETCH_XXX). *
7778*********************************************************************************************************************************/
7779
7780/* 8-bit segmented: */
7781#define IEM_MC_FETCH_MEM_U8(a_u8Dst, a_iSeg, a_GCPtrMem) \
7782 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch>( \
7783 pReNative, off, a_u8Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
7784
7785#define IEM_MC_FETCH_MEM_U8_ZX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
7786 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U16>( \
7787 pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
7788
7789#define IEM_MC_FETCH_MEM_U8_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7790 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U32>( \
7791 pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
7792
7793#define IEM_MC_FETCH_MEM_U8_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7794 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U64>( \
7795 pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
7796
7797#define IEM_MC_FETCH_MEM_U8_SX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
7798 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U16>(\
7799 pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16, pCallEntry->idxInstr)
7800
7801#define IEM_MC_FETCH_MEM_U8_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7802 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U32>(\
7803 pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32, pCallEntry->idxInstr)
7804
7805#define IEM_MC_FETCH_MEM_U8_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7806 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U64>(\
7807 pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64, pCallEntry->idxInstr)
7808
7809/* 16-bit segmented: */
7810#define IEM_MC_FETCH_MEM_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
7811 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch>(\
7812 pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
7813
7814#define IEM_MC_FETCH_MEM_U16_DISP(a_u16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
7815 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch>(\
7816 pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr, a_offDisp)
7817
7818#define IEM_MC_FETCH_MEM_U16_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7819 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32>(\
7820 pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
7821
7822#define IEM_MC_FETCH_MEM_U16_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7823 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64>(\
7824 pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
7825
7826#define IEM_MC_FETCH_MEM_U16_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7827 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32>(\
7828 pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7829
7830#define IEM_MC_FETCH_MEM_U16_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7831 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64>(\
7832 pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64, pCallEntry->idxInstr)
7833
7834
7835/* 32-bit segmented: */
7836#define IEM_MC_FETCH_MEM_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7837 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch>(\
7838 pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
7839
7840#define IEM_MC_FETCH_MEM_U32_DISP(a_u32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
7841 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch>(\
7842 pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7843
7844#define IEM_MC_FETCH_MEM_U32_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7845 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64>(\
7846 pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
7847
7848#define IEM_MC_FETCH_MEM_U32_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7849 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64>(\
7850 pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64, pCallEntry->idxInstr)
7851
7852#define IEM_MC_FETCH_MEM_I16(a_i16Dst, a_iSeg, a_GCPtrMem) \
7853 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32>(\
7854 pReNative, off, a_i16Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7855
7856#define IEM_MC_FETCH_MEM_I16_DISP(a_i16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
7857 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32>(\
7858 pReNative, off, a_i16Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr, \
7859 a_offDisp)
7860
7861#define IEM_MC_FETCH_MEM_I32(a_i32Dst, a_iSeg, a_GCPtrMem) \
7862 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch>(\
7863 pReNative, off, a_i32Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
7864
7865#define IEM_MC_FETCH_MEM_I32_DISP(a_i32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
7866 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch>(\
7867 pReNative, off, a_i32Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7868
7869#define IEM_MC_FETCH_MEM_I64(a_i64Dst, a_iSeg, a_GCPtrMem) \
7870 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(int64_t), sizeof(int64_t) - 1, kIemNativeEmitMemOp_Fetch>(\
7871 pReNative, off, a_i64Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
7872
7873AssertCompileSize(RTFLOAT32U, sizeof(uint32_t));
7874#define IEM_MC_FETCH_MEM_R32(a_r32Dst, a_iSeg, a_GCPtrMem) \
7875 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTFLOAT32U), sizeof(RTFLOAT32U) - 1, kIemNativeEmitMemOp_Fetch>(\
7876 pReNative, off, a_r32Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
7877
7878
7879/* 64-bit segmented: */
7880#define IEM_MC_FETCH_MEM_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7881 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch>(\
7882 pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
7883
7884AssertCompileSize(RTFLOAT64U, sizeof(uint64_t));
7885#define IEM_MC_FETCH_MEM_R64(a_r64Dst, a_iSeg, a_GCPtrMem) \
7886 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTFLOAT64U), sizeof(RTFLOAT64U) - 1, kIemNativeEmitMemOp_Fetch>(\
7887 pReNative, off, a_r64Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
7888
7889
7890/* 8-bit flat: */
7891#define IEM_MC_FETCH_MEM_FLAT_U8(a_u8Dst, a_GCPtrMem) \
7892 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch, true>(\
7893 pReNative, off, a_u8Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7894
7895#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U16(a_u16Dst, a_GCPtrMem) \
7896 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U16, true>(\
7897 pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7898
7899#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U32(a_u32Dst, a_GCPtrMem) \
7900 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U32, true>(\
7901 pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7902
7903#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U64(a_u64Dst, a_GCPtrMem) \
7904 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U64, true>(\
7905 pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7906
7907#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U16(a_u16Dst, a_GCPtrMem) \
7908 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U16, true>(\
7909 pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16, pCallEntry->idxInstr)
7910
7911#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U32(a_u32Dst, a_GCPtrMem) \
7912 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U32, true>(\
7913 pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32, pCallEntry->idxInstr)
7914
7915#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U64(a_u64Dst, a_GCPtrMem) \
7916 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U64, true>(\
7917 pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64, pCallEntry->idxInstr)
7918
7919
7920/* 16-bit flat: */
7921#define IEM_MC_FETCH_MEM_FLAT_U16(a_u16Dst, a_GCPtrMem) \
7922 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7923 pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
7924
7925#define IEM_MC_FETCH_MEM_FLAT_U16_DISP(a_u16Dst, a_GCPtrMem, a_offDisp) \
7926 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7927 pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr, a_offDisp)
7928
7929#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U32(a_u32Dst, a_GCPtrMem) \
7930 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, true>(\
7931 pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
7932
7933#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U64(a_u64Dst, a_GCPtrMem) \
7934 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, true>(\
7935 pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
7936
7937#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U32(a_u32Dst, a_GCPtrMem) \
7938 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, true>(\
7939 pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7940
7941#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U64(a_u64Dst, a_GCPtrMem) \
7942 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, true>(\
7943 pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64, pCallEntry->idxInstr)
7944
7945/* 32-bit flat: */
7946#define IEM_MC_FETCH_MEM_FLAT_U32(a_u32Dst, a_GCPtrMem) \
7947 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7948 pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7949
7950#define IEM_MC_FETCH_MEM_FLAT_U32_DISP(a_u32Dst, a_GCPtrMem, a_offDisp) \
7951 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7952 pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7953
7954#define IEM_MC_FETCH_MEM_FLAT_U32_ZX_U64(a_u64Dst, a_GCPtrMem) \
7955 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, true>(\
7956 pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7957
7958#define IEM_MC_FETCH_MEM_FLAT_U32_SX_U64(a_u64Dst, a_GCPtrMem) \
7959 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, true>(\
7960 pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64, pCallEntry->idxInstr)
7961
7962#define IEM_MC_FETCH_MEM_FLAT_I16(a_i16Dst, a_GCPtrMem) \
7963 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, true>(\
7964 pReNative, off, a_i16Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7965
7966#define IEM_MC_FETCH_MEM_FLAT_I16_DISP(a_i16Dst, a_GCPtrMem, a_offDisp) \
7967 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, true>(\
7968 pReNative, off, a_i16Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr, a_offDisp)
7969
7970#define IEM_MC_FETCH_MEM_FLAT_I32(a_i32Dst, a_GCPtrMem) \
7971 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7972 pReNative, off, a_i32Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7973
7974#define IEM_MC_FETCH_MEM_FLAT_I32_DISP(a_i32Dst, a_GCPtrMem, a_offDisp) \
7975 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7976 pReNative, off, a_i32Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7977
7978#define IEM_MC_FETCH_MEM_FLAT_I64(a_i64Dst, a_GCPtrMem) \
7979 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(int64_t), sizeof(int64_t) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7980 pReNative, off, a_i64Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
7981
7982#define IEM_MC_FETCH_MEM_FLAT_R32(a_r32Dst, a_GCPtrMem) \
7983 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTFLOAT32U), sizeof(RTFLOAT32U) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7984 pReNative, off, a_r32Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7985
7986
7987/* 64-bit flat: */
7988#define IEM_MC_FETCH_MEM_FLAT_U64(a_u64Dst, a_GCPtrMem) \
7989 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7990 pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
7991
7992#define IEM_MC_FETCH_MEM_FLAT_R64(a_r64Dst, a_GCPtrMem) \
7993 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTFLOAT64U), sizeof(RTFLOAT64U) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7994 pReNative, off, a_r64Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
7995
7996#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7997/* 128-bit segmented: */
7998#define IEM_MC_FETCH_MEM_U128(a_u128Dst, a_iSeg, a_GCPtrMem) \
7999 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch>(\
8000 pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU128, pCallEntry->idxInstr)
8001
8002#define IEM_MC_FETCH_MEM_U128_ALIGN_SSE(a_u128Dst, a_iSeg, a_GCPtrMem) \
8003 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT128U), \
8004 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
8005 kIemNativeEmitMemOp_Fetch>(\
8006 pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse, pCallEntry->idxInstr)
8007
8008AssertCompileSize(X86XMMREG, sizeof(RTUINT128U));
8009#define IEM_MC_FETCH_MEM_XMM_ALIGN_SSE(a_uXmmDst, a_iSeg, a_GCPtrMem) \
8010 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(X86XMMREG), \
8011 (sizeof(X86XMMREG) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
8012 kIemNativeEmitMemOp_Fetch>(\
8013 pReNative, off, a_uXmmDst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse, pCallEntry->idxInstr)
8014
8015#define IEM_MC_FETCH_MEM_U128_NO_AC(a_u128Dst, a_iSeg, a_GCPtrMem) \
8016 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch>(\
8017 pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU128NoAc, pCallEntry->idxInstr)
8018
8019#define IEM_MC_FETCH_MEM_XMM_NO_AC(a_u128Dst, a_iSeg, a_GCPtrMem) \
8020 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(X86XMMREG), sizeof(X86XMMREG) - 1, kIemNativeEmitMemOp_Fetch>(\
8021 pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU128NoAc, pCallEntry->idxInstr)
8022
8023
8024/* 128-bit flat: */
8025#define IEM_MC_FETCH_MEM_FLAT_U128(a_u128Dst, a_GCPtrMem) \
8026 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, true>(\
8027 pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU128, pCallEntry->idxInstr)
8028
8029#define IEM_MC_FETCH_MEM_FLAT_U128_ALIGN_SSE(a_u128Dst, a_GCPtrMem) \
8030 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT128U), \
8031 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
8032 kIemNativeEmitMemOp_Fetch, true>(\
8033 pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse, pCallEntry->idxInstr)
8034
8035#define IEM_MC_FETCH_MEM_FLAT_XMM_ALIGN_SSE(a_uXmmDst, a_GCPtrMem) \
8036 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(X86XMMREG), \
8037 (sizeof(X86XMMREG) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
8038 kIemNativeEmitMemOp_Fetch, true>(\
8039 pReNative, off, a_uXmmDst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse, pCallEntry->idxInstr)
8040
8041#define IEM_MC_FETCH_MEM_FLAT_U128_NO_AC(a_u128Dst, a_GCPtrMem) \
8042 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, true>(\
8043 pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc, pCallEntry->idxInstr)
8044
8045#define IEM_MC_FETCH_MEM_FLAT_XMM_NO_AC(a_uXmmDst, a_GCPtrMem) \
8046 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(X86XMMREG), sizeof(X86XMMREG) - 1, kIemNativeEmitMemOp_Fetch, true>(\
8047 pReNative, off, a_uXmmDst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc, pCallEntry->idxInstr)
8048
8049/* 256-bit segmented: */
8050#define IEM_MC_FETCH_MEM_U256(a_u256Dst, a_iSeg, a_GCPtrMem) \
8051 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch>(\
8052 pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
8053
8054#define IEM_MC_FETCH_MEM_U256_NO_AC(a_u256Dst, a_iSeg, a_GCPtrMem) \
8055 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch>(\
8056 pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
8057
8058#define IEM_MC_FETCH_MEM_U256_ALIGN_AVX(a_u256Dst, a_iSeg, a_GCPtrMem) \
8059 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT256U), \
8060 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, \
8061 kIemNativeEmitMemOp_Fetch>(\
8062 pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx, pCallEntry->idxInstr)
8063
8064#define IEM_MC_FETCH_MEM_YMM_NO_AC(a_u256Dst, a_iSeg, a_GCPtrMem) \
8065 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(X86YMMREG), sizeof(X86YMMREG) - 1, kIemNativeEmitMemOp_Fetch>(\
8066 pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
8067
8068
8069/* 256-bit flat: */
8070#define IEM_MC_FETCH_MEM_FLAT_U256(a_u256Dst, a_GCPtrMem) \
8071 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, true>(\
8072 pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
8073
8074#define IEM_MC_FETCH_MEM_FLAT_U256_NO_AC(a_u256Dst, a_GCPtrMem) \
8075 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, true>(\
8076 pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
8077
8078#define IEM_MC_FETCH_MEM_FLAT_U256_ALIGN_AVX(a_u256Dst, a_GCPtrMem) \
8079 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT256U), \
8080 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, \
8081 kIemNativeEmitMemOp_Fetch, true>(\
8082 pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx, pCallEntry->idxInstr)
8083
8084#define IEM_MC_FETCH_MEM_FLAT_YMM_NO_AC(a_uYmmDst, a_GCPtrMem) \
8085 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(X86YMMREG), sizeof(X86YMMREG) - 1, kIemNativeEmitMemOp_Fetch, true>(\
8086 pReNative, off, a_uYmmDst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
8087
8088#endif
8089
8090
8091/*********************************************************************************************************************************
8092* Memory stores (IEM_MEM_STORE_XXX). *
8093*********************************************************************************************************************************/
8094
8095#define IEM_MC_STORE_MEM_U8(a_iSeg, a_GCPtrMem, a_u8Value) \
8096 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Store>(\
8097 pReNative, off, a_u8Value, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
8098
8099#define IEM_MC_STORE_MEM_U16(a_iSeg, a_GCPtrMem, a_u16Value) \
8100 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store>(\
8101 pReNative, off, a_u16Value, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
8102
8103#define IEM_MC_STORE_MEM_U32(a_iSeg, a_GCPtrMem, a_u32Value) \
8104 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store>(\
8105 pReNative, off, a_u32Value, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
8106
8107#define IEM_MC_STORE_MEM_U64(a_iSeg, a_GCPtrMem, a_u64Value) \
8108 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store>(\
8109 pReNative, off, a_u64Value, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
8110
8111
8112#define IEM_MC_STORE_MEM_FLAT_U8(a_GCPtrMem, a_u8Value) \
8113 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Store, true>(\
8114 pReNative, off, a_u8Value, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
8115
8116#define IEM_MC_STORE_MEM_FLAT_U16(a_GCPtrMem, a_u16Value) \
8117 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, true>(\
8118 pReNative, off, a_u16Value, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
8119
8120#define IEM_MC_STORE_MEM_FLAT_U32(a_GCPtrMem, a_u32Value) \
8121 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, true>(\
8122 pReNative, off, a_u32Value, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
8123
8124#define IEM_MC_STORE_MEM_FLAT_U64(a_GCPtrMem, a_u64Value) \
8125 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, true>(\
8126 pReNative, off, a_u64Value, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
8127
8128
8129#define IEM_MC_STORE_MEM_U8_CONST(a_iSeg, a_GCPtrMem, a_u8ConstValue) \
8130 off = iemNativeEmitMemStoreConstDataCommon<sizeof(uint8_t)>(\
8131 pReNative, off, a_u8ConstValue, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
8132
8133#define IEM_MC_STORE_MEM_U16_CONST(a_iSeg, a_GCPtrMem, a_u16ConstValue) \
8134 off = iemNativeEmitMemStoreConstDataCommon<sizeof(uint16_t)>(\
8135 pReNative, off, a_u16ConstValue, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
8136
8137#define IEM_MC_STORE_MEM_U32_CONST(a_iSeg, a_GCPtrMem, a_u32ConstValue) \
8138 off = iemNativeEmitMemStoreConstDataCommon<sizeof(uint32_t)>(\
8139 pReNative, off, a_u32ConstValue, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
8140
8141#define IEM_MC_STORE_MEM_U64_CONST(a_iSeg, a_GCPtrMem, a_u64ConstValue) \
8142 off = iemNativeEmitMemStoreConstDataCommon<sizeof(uint64_t)>(\
8143 pReNative, off, a_u64ConstValue, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
8144
8145
8146#define IEM_MC_STORE_MEM_FLAT_U8_CONST(a_GCPtrMem, a_u8ConstValue) \
8147 off = iemNativeEmitMemStoreConstDataCommon<sizeof(uint8_t), true>(\
8148 pReNative, off, a_u8ConstValue, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
8149
8150#define IEM_MC_STORE_MEM_FLAT_U16_CONST(a_GCPtrMem, a_u16ConstValue) \
8151 off = iemNativeEmitMemStoreConstDataCommon<sizeof(uint16_t), true>(\
8152 pReNative, off, a_u16ConstValue, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
8153
8154#define IEM_MC_STORE_MEM_FLAT_U32_CONST(a_GCPtrMem, a_u32ConstValue) \
8155 off = iemNativeEmitMemStoreConstDataCommon<sizeof(uint32_t), true>(\
8156 pReNative, off, a_u32ConstValue, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
8157
8158#define IEM_MC_STORE_MEM_FLAT_U64_CONST(a_GCPtrMem, a_u64ConstValue) \
8159 off = iemNativeEmitMemStoreConstDataCommon<sizeof(uint64_t), true>(\
8160 pReNative, off, a_u64ConstValue, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
8161
8162/** Emits code for IEM_MC_STORE_MEM_U8/16/32/64_CONST and
8163 * IEM_MC_STORE_MEM_FLAT_U8/16/32/64_CONST (with iSegReg = UINT8_MAX). */
8164template<uint8_t const a_cbMem, bool a_fFlat = false>
8165DECL_INLINE_THROW(uint32_t)
8166iemNativeEmitMemStoreConstDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uValueConst, uint8_t iSegReg,
8167 uint8_t idxVarGCPtrMem, uintptr_t pfnFunction, uint8_t idxInstr)
8168{
8169 /*
8170 * Create a temporary const variable and call iemNativeEmitMemFetchStoreDataCommon
8171 * to do the grunt work.
8172 */
8173 uint8_t const idxVarConstValue = iemNativeVarAllocConst(pReNative, a_cbMem, uValueConst);
8174 off = iemNativeEmitMemFetchStoreDataCommon<a_cbMem, a_cbMem - 1,
8175 kIemNativeEmitMemOp_Store,
8176 a_fFlat>(pReNative, off, idxVarConstValue, iSegReg,
8177 idxVarGCPtrMem, pfnFunction, idxInstr);
8178 iemNativeVarFreeLocal(pReNative, idxVarConstValue);
8179 return off;
8180}
8181
8182
8183#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8184# define IEM_MC_STORE_MEM_U128_ALIGN_SSE(a_iSeg, a_GCPtrMem, a_u128Value) \
8185 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT128U), \
8186 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
8187 kIemNativeEmitMemOp_Store>(\
8188 pReNative, off, a_u128Value, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse, pCallEntry->idxInstr)
8189
8190# define IEM_MC_STORE_MEM_U128_NO_AC(a_iSeg, a_GCPtrMem, a_u128Value) \
8191 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store>(\
8192 pReNative, off, a_u128Value, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU128NoAc, pCallEntry->idxInstr)
8193
8194# define IEM_MC_STORE_MEM_U256_NO_AC(a_iSeg, a_GCPtrMem, a_u256Value) \
8195 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store>(\
8196 pReNative, off, a_u256Value, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU256NoAc, pCallEntry->idxInstr)
8197
8198# define IEM_MC_STORE_MEM_U256_ALIGN_AVX(a_iSeg, a_GCPtrMem, a_u256Value) \
8199 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT256U), \
8200 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, \
8201 kIemNativeEmitMemOp_Store>(\
8202 pReNative, off, a_u256Value, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx, pCallEntry->idxInstr)
8203
8204
8205# define IEM_MC_STORE_MEM_FLAT_U128_ALIGN_SSE(a_GCPtrMem, a_u128Value) \
8206 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT128U), \
8207 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
8208 kIemNativeEmitMemOp_Store, true>(\
8209 pReNative, off, a_u128Value, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse, \
8210 pCallEntry->idxInstr)
8211
8212# define IEM_MC_STORE_MEM_FLAT_U128_NO_AC(a_GCPtrMem, a_u128Value) \
8213 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, true>(\
8214 pReNative, off, a_u128Value, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU128NoAc, pCallEntry->idxInstr)
8215
8216# define IEM_MC_STORE_MEM_FLAT_U256_NO_AC(a_GCPtrMem, a_u256Value) \
8217 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, true>(\
8218 pReNative, off, a_u256Value, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU256NoAc, pCallEntry->idxInstr)
8219
8220# define IEM_MC_STORE_MEM_FLAT_U256_ALIGN_AVX(a_GCPtrMem, a_u256Value) \
8221 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT256U), \
8222 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, kIemNativeEmitMemOp_Store, \
8223 true>(\
8224 pReNative, off, a_u256Value, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx, pCallEntry->idxInstr)
8225#endif
8226
8227
8228
8229/*********************************************************************************************************************************
8230* Stack Accesses. *
8231*********************************************************************************************************************************/
8232#define IEM_MC_PUSH_U16(a_u16Value) \
8233 off = iemNativeEmitStackPush<16, 0, 0>(pReNative, off, a_u16Value, (uintptr_t)iemNativeHlpStackStoreU16, pCallEntry->idxInstr)
8234#define IEM_MC_PUSH_U32(a_u32Value) \
8235 off = iemNativeEmitStackPush<32, 0, 0>(pReNative, off, a_u32Value, (uintptr_t)iemNativeHlpStackStoreU32, pCallEntry->idxInstr)
8236#define IEM_MC_PUSH_U32_SREG(a_uSegVal) \
8237 off = iemNativeEmitStackPush<32, 0, 1>(pReNative, off, a_uSegVal, (uintptr_t)iemNativeHlpStackStoreU32SReg, pCallEntry->idxInstr)
8238#define IEM_MC_PUSH_U64(a_u64Value) \
8239 off = iemNativeEmitStackPush<64, 0, 0>(pReNative, off, a_u64Value, (uintptr_t)iemNativeHlpStackStoreU64, pCallEntry->idxInstr)
8240
8241#define IEM_MC_FLAT32_PUSH_U16(a_u16Value) \
8242 off = iemNativeEmitStackPush<16, 32, 0>(pReNative, off, a_u16Value, (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
8243#define IEM_MC_FLAT32_PUSH_U32(a_u32Value) \
8244 off = iemNativeEmitStackPush<32, 32, 0>(pReNative, off, a_u32Value, (uintptr_t)iemNativeHlpStackFlatStoreU32, pCallEntry->idxInstr)
8245#define IEM_MC_FLAT32_PUSH_U32_SREG(a_u32Value) \
8246 off = iemNativeEmitStackPush<32, 32, 1>(pReNative, off, a_u32Value, (uintptr_t)iemNativeHlpStackFlatStoreU32SReg, pCallEntry->idxInstr)
8247
8248#define IEM_MC_FLAT64_PUSH_U16(a_u16Value) \
8249 off = iemNativeEmitStackPush<16, 64, 0>(pReNative, off, a_u16Value, (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
8250#define IEM_MC_FLAT64_PUSH_U64(a_u64Value) \
8251 off = iemNativeEmitStackPush<64, 64, 0>(pReNative, off, a_u64Value, (uintptr_t)iemNativeHlpStackFlatStoreU64, pCallEntry->idxInstr)
8252
8253
8254/** IEM_MC[|_FLAT32|_FLAT64]_PUSH_U16/32/32_SREG/64 */
8255template<uint8_t const a_cBitsVar, uint8_t const a_cBitsFlat, bool a_fIsSegReg = false>
8256DECL_INLINE_THROW(uint32_t)
8257iemNativeEmitStackPush(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue, uintptr_t pfnFunction, uint8_t idxInstr)
8258{
8259 /*
8260 * Assert sanity.
8261 */
8262 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
8263 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
8264#ifdef VBOX_STRICT
8265 uint32_t const cTmplArgs = RT_MAKE_U32_FROM_U8(a_cBitsVar, a_cBitsFlat, a_fIsSegReg, 0);
8266 if (a_cBitsFlat != 0)
8267 {
8268 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
8269 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
8270 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
8271 Assert( pfnFunction
8272 == ( cTmplArgs == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
8273 : cTmplArgs == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
8274 : cTmplArgs == RT_MAKE_U32_FROM_U8(32, 32, 1, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32SReg
8275 : cTmplArgs == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
8276 : cTmplArgs == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
8277 : UINT64_C(0xc000b000a0009000) ));
8278 }
8279 else
8280 Assert( pfnFunction
8281 == ( cTmplArgs == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
8282 : cTmplArgs == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
8283 : cTmplArgs == RT_MAKE_U32_FROM_U8(32, 0, 1, 0) ? (uintptr_t)iemNativeHlpStackStoreU32SReg
8284 : cTmplArgs == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
8285 : UINT64_C(0xc000b000a0009000) ));
8286#endif
8287
8288#ifdef VBOX_STRICT
8289 /*
8290 * Check that the fExec flags we've got make sense.
8291 */
8292 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
8293#endif
8294
8295 /*
8296 * To keep things simple we have to commit any pending writes first as we
8297 * may end up making calls.
8298 */
8299 /** @todo we could postpone this till we make the call and reload the
8300 * registers after returning from the call. Not sure if that's sensible or
8301 * not, though. */
8302 off = iemNativeRegFlushPendingWrites(pReNative, off);
8303
8304 /*
8305 * First we calculate the new RSP and the effective stack pointer value.
8306 * For 64-bit mode and flat 32-bit these two are the same.
8307 * (Code structure is very similar to that of PUSH)
8308 */
8309 uint8_t const cbMem = a_cBitsVar / 8;
8310 bool const fIsIntelSeg = a_fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
8311 uint8_t const cbMemAccess = !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
8312 ? cbMem : sizeof(uint16_t);
8313 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
8314 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
8315 uint8_t const idxRegEffSp = a_cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
8316 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
8317 if RT_CONSTEXPR_IF(a_cBitsFlat != 0)
8318 {
8319 Assert(idxRegEffSp == idxRegRsp);
8320 Assert(a_cBitsFlat == 32 || a_cBitsFlat == 64);
8321 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
8322 if RT_CONSTEXPR_IF(a_cBitsFlat == 64)
8323 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
8324 else
8325 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
8326 }
8327 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
8328 {
8329 Assert(idxRegEffSp != idxRegRsp);
8330 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
8331 kIemNativeGstRegUse_ReadOnly);
8332#ifdef RT_ARCH_AMD64
8333 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8334#else
8335 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
8336#endif
8337 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
8338 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
8339 offFixupJumpToUseOtherBitSp = off;
8340 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
8341 {
8342 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
8343 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8344 }
8345 else
8346 {
8347 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
8348 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8349 }
8350 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8351 }
8352 /* SpUpdateEnd: */
8353 uint32_t const offLabelSpUpdateEnd = off;
8354
8355 /*
8356 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
8357 * we're skipping lookup).
8358 */
8359 uint8_t const iSegReg = a_cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
8360 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
8361 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
8362 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
8363 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
8364 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
8365 : UINT32_MAX;
8366 uint8_t const idxRegValue = !TlbState.fSkip
8367 && pVarValue->enmKind != kIemNativeVarKind_Immediate
8368 ? iemNativeVarRegisterAcquireInitedWithPref(pReNative, idxVarValue, &off,
8369 IEMNATIVE_CALL_ARG2_GREG)
8370 : UINT8_MAX;
8371 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
8372
8373
8374 if (!TlbState.fSkip)
8375 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
8376 else
8377 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
8378
8379 /*
8380 * Use16BitSp:
8381 */
8382 if RT_CONSTEXPR_IF(a_cBitsFlat == 0)
8383 {
8384#ifdef RT_ARCH_AMD64
8385 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8386#else
8387 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
8388#endif
8389 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
8390 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
8391 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8392 else
8393 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8394 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
8395 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8396 }
8397
8398 /*
8399 * TlbMiss:
8400 *
8401 * Call helper to do the pushing.
8402 */
8403 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
8404
8405#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8406 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8407#else
8408 RT_NOREF(idxInstr);
8409#endif
8410
8411 /* Save variables in volatile registers. */
8412 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
8413 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
8414 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
8415 | (idxRegValue < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegValue) : 0);
8416 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
8417
8418 if ( idxRegValue == IEMNATIVE_CALL_ARG1_GREG
8419 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
8420 {
8421 /* Swap them using ARG0 as temp register: */
8422 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
8423 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
8424 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
8425 }
8426 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
8427 {
8428 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue (first!) */
8429 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue,
8430 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
8431
8432 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
8433 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
8434 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
8435 }
8436 else
8437 {
8438 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
8439 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
8440
8441 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue */
8442 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue, 0 /*offAddend*/,
8443 IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG));
8444 }
8445
8446#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
8447 /* Do delayed EFLAGS calculations. */
8448 off = iemNativeDoPostponedEFlagsAtTlbMiss< RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG)
8449 | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)>(pReNative, off, &TlbState, fHstRegsNotToSave);
8450#endif
8451
8452 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
8453 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
8454
8455 /* Done setting up parameters, make the call. */
8456 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnFunction);
8457
8458 /* Restore variables and guest shadow registers to volatile registers. */
8459 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
8460 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
8461
8462#ifdef IEMNATIVE_WITH_TLB_LOOKUP
8463 if (!TlbState.fSkip)
8464 {
8465 /* end of TlbMiss - Jump to the done label. */
8466 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
8467 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
8468
8469 /*
8470 * TlbLookup:
8471 */
8472 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1,
8473 IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
8474
8475 /*
8476 * Emit code to do the actual storing / fetching.
8477 */
8478 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
8479# ifdef IEM_WITH_TLB_STATISTICS
8480 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
8481 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
8482# endif
8483 if (idxRegValue != UINT8_MAX)
8484 {
8485 switch (cbMemAccess)
8486 {
8487 case 2:
8488 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
8489 break;
8490 case 4:
8491 if (!fIsIntelSeg)
8492 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
8493 else
8494 {
8495 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
8496 PUSH FS in real mode, so we have to try emulate that here.
8497 We borrow the now unused idxReg1 from the TLB lookup code here. */
8498 uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
8499 kIemNativeGstReg_EFlags);
8500 if (idxRegEfl != UINT8_MAX)
8501 {
8502#ifdef ARCH_AMD64
8503 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
8504 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
8505 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
8506#else
8507 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
8508 off, TlbState.idxReg1, idxRegEfl,
8509 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
8510#endif
8511 iemNativeRegFreeTmp(pReNative, idxRegEfl);
8512 }
8513 else
8514 {
8515 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
8516 RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
8517 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
8518 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
8519 }
8520 /* ASSUMES the upper half of idxRegValue is ZERO. */
8521 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegValue);
8522 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
8523 }
8524 break;
8525 case 8:
8526 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
8527 break;
8528 default:
8529 AssertFailed();
8530 }
8531 }
8532 else
8533 {
8534 switch (cbMemAccess)
8535 {
8536 case 2:
8537 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
8538 idxRegMemResult, TlbState.idxReg1);
8539 break;
8540 case 4:
8541 Assert(!a_fIsSegReg);
8542 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
8543 idxRegMemResult, TlbState.idxReg1);
8544 break;
8545 case 8:
8546 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue, idxRegMemResult, TlbState.idxReg1);
8547 break;
8548 default:
8549 AssertFailed();
8550 }
8551 }
8552
8553 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
8554 TlbState.freeRegsAndReleaseVars(pReNative);
8555
8556 /*
8557 * TlbDone:
8558 *
8559 * Commit the new RSP value.
8560 */
8561 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
8562 }
8563#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
8564
8565#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8566 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
8567#endif
8568 iemNativeRegFreeTmp(pReNative, idxRegRsp);
8569 if (idxRegEffSp != idxRegRsp)
8570 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
8571
8572 /* The value variable is implictly flushed. */
8573 if (idxRegValue != UINT8_MAX)
8574 iemNativeVarRegisterRelease(pReNative, idxVarValue);
8575 iemNativeVarFreeLocal(pReNative, idxVarValue);
8576
8577 return off;
8578}
8579
8580
8581
8582#define IEM_MC_POP_GREG_U16(a_iGReg) \
8583 off = iemNativeEmitStackPopGReg<16, 0>(pReNative, off, a_iGReg, (uintptr_t)iemNativeHlpStackFetchU16, pCallEntry->idxInstr)
8584#define IEM_MC_POP_GREG_U32(a_iGReg) \
8585 off = iemNativeEmitStackPopGReg<32, 0>(pReNative, off, a_iGReg, (uintptr_t)iemNativeHlpStackFetchU32, pCallEntry->idxInstr)
8586#define IEM_MC_POP_GREG_U64(a_iGReg) \
8587 off = iemNativeEmitStackPopGReg<64, 0>(pReNative, off, a_iGReg, (uintptr_t)iemNativeHlpStackFetchU64, pCallEntry->idxInstr)
8588
8589#define IEM_MC_FLAT32_POP_GREG_U16(a_iGReg) \
8590 off = iemNativeEmitStackPopGReg<16, 32>(pReNative, off, a_iGReg, (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
8591#define IEM_MC_FLAT32_POP_GREG_U32(a_iGReg) \
8592 off = iemNativeEmitStackPopGReg<32, 32>(pReNative, off, a_iGReg, (uintptr_t)iemNativeHlpStackFlatFetchU32, pCallEntry->idxInstr)
8593
8594#define IEM_MC_FLAT64_POP_GREG_U16(a_iGReg) \
8595 off = iemNativeEmitStackPopGReg<16, 64>(pReNative, off, a_iGReg, (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
8596#define IEM_MC_FLAT64_POP_GREG_U64(a_iGReg) \
8597 off = iemNativeEmitStackPopGReg<64, 64>(pReNative, off, a_iGReg, (uintptr_t)iemNativeHlpStackFlatFetchU64, pCallEntry->idxInstr)
8598
8599
8600DECL_FORCE_INLINE_THROW(uint32_t)
8601iemNativeEmitStackPopUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
8602 uint8_t idxRegTmp)
8603{
8604 /* Use16BitSp: */
8605#ifdef RT_ARCH_AMD64
8606 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
8607 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
8608 RT_NOREF(idxRegTmp);
8609#else
8610 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
8611 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
8612 /* add tmp, regrsp, #cbMem */
8613 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbMem, false /*f64Bit*/);
8614 /* and tmp, tmp, #0xffff */
8615 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
8616 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
8617 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
8618 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
8619#endif
8620 return off;
8621}
8622
8623
8624DECL_FORCE_INLINE(uint32_t)
8625iemNativeEmitStackPopUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
8626{
8627 /* Use32BitSp: */
8628 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
8629 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
8630 return off;
8631}
8632
8633
8634/** IEM_MC[|_FLAT32|_FLAT64]_POP_GREG_U16/32/64 */
8635template<uint8_t const a_cBitsVar, uint8_t const a_cBitsFlat>
8636DECL_INLINE_THROW(uint32_t)
8637iemNativeEmitStackPopGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGReg, uintptr_t pfnFunction, uint8_t idxInstr)
8638{
8639 /*
8640 * Assert sanity.
8641 */
8642 Assert(idxGReg < 16);
8643#ifdef VBOX_STRICT
8644 if (a_cBitsFlat != 0)
8645 {
8646 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
8647 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
8648 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
8649 Assert( pfnFunction
8650 == ( RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(16, 32) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
8651 : RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(32, 32) ? (uintptr_t)iemNativeHlpStackFlatFetchU32
8652 : RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(16, 64) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
8653 : RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(64, 64) ? (uintptr_t)iemNativeHlpStackFlatFetchU64
8654 : UINT64_C(0xc000b000a0009000) ));
8655 }
8656 else
8657 Assert( pfnFunction
8658 == ( RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(16, 0) ? (uintptr_t)iemNativeHlpStackFetchU16
8659 : RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(32, 0) ? (uintptr_t)iemNativeHlpStackFetchU32
8660 : RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(64, 0) ? (uintptr_t)iemNativeHlpStackFetchU64
8661 : UINT64_C(0xc000b000a0009000) ));
8662#endif
8663
8664#ifdef VBOX_STRICT
8665 /*
8666 * Check that the fExec flags we've got make sense.
8667 */
8668 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
8669#endif
8670
8671 /*
8672 * To keep things simple we have to commit any pending writes first as we
8673 * may end up making calls.
8674 */
8675 off = iemNativeRegFlushPendingWrites(pReNative, off);
8676
8677 /*
8678 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
8679 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
8680 * directly as the effective stack pointer.
8681 * (Code structure is very similar to that of PUSH)
8682 */
8683 uint8_t const cbMem = a_cBitsVar / 8;
8684 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
8685 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
8686 uint8_t const idxRegEffSp = a_cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
8687 /** @todo can do a better job picking the register here. For cbMem >= 4 this
8688 * will be the resulting register value. */
8689 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
8690
8691 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
8692 if RT_CONSTEXPR_IF(a_cBitsFlat != 0)
8693 {
8694 Assert(idxRegEffSp == idxRegRsp);
8695 Assert(a_cBitsFlat == 32 || a_cBitsFlat == 64);
8696 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
8697 }
8698 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
8699 {
8700 Assert(idxRegEffSp != idxRegRsp);
8701 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
8702 kIemNativeGstRegUse_ReadOnly);
8703#ifdef RT_ARCH_AMD64
8704 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8705#else
8706 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
8707#endif
8708 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
8709 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
8710 offFixupJumpToUseOtherBitSp = off;
8711 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
8712 {
8713/** @todo can skip idxRegRsp updating when popping ESP. */
8714 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
8715 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8716 }
8717 else
8718 {
8719 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
8720 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
8721 }
8722 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8723 }
8724 /* SpUpdateEnd: */
8725 uint32_t const offLabelSpUpdateEnd = off;
8726
8727 /*
8728 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
8729 * we're skipping lookup).
8730 */
8731 uint8_t const iSegReg = a_cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
8732 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
8733 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
8734 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
8735 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
8736 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
8737 : UINT32_MAX;
8738
8739 if (!TlbState.fSkip)
8740 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
8741 else
8742 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
8743
8744 /*
8745 * Use16BitSp:
8746 */
8747 if RT_CONSTEXPR_IF(a_cBitsFlat == 0)
8748 {
8749#ifdef RT_ARCH_AMD64
8750 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8751#else
8752 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
8753#endif
8754 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
8755 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
8756 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
8757 else
8758 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8759 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
8760 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8761 }
8762
8763 /*
8764 * TlbMiss:
8765 *
8766 * Call helper to do the pushing.
8767 */
8768 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
8769
8770#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8771 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8772#else
8773 RT_NOREF(idxInstr);
8774#endif
8775
8776 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
8777 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
8778 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
8779 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
8780
8781
8782 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
8783 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
8784 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
8785
8786#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
8787 /* Do delayed EFLAGS calculations. */
8788 off = iemNativeDoPostponedEFlagsAtTlbMiss<RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG)>(pReNative, off, &TlbState, fHstRegsNotToSave);
8789#endif
8790
8791 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
8792 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
8793
8794 /* Done setting up parameters, make the call. */
8795 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnFunction);
8796
8797 /* Move the return register content to idxRegMemResult. */
8798 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
8799 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
8800
8801 /* Restore variables and guest shadow registers to volatile registers. */
8802 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
8803 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
8804
8805#ifdef IEMNATIVE_WITH_TLB_LOOKUP
8806 if (!TlbState.fSkip)
8807 {
8808 /* end of TlbMiss - Jump to the done label. */
8809 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
8810 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
8811
8812 /*
8813 * TlbLookup:
8814 */
8815 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
8816 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
8817
8818 /*
8819 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
8820 */
8821 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8822# ifdef IEM_WITH_TLB_STATISTICS
8823 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
8824 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
8825# endif
8826 switch (cbMem)
8827 {
8828 case 2:
8829 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
8830 break;
8831 case 4:
8832 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
8833 break;
8834 case 8:
8835 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
8836 break;
8837 default:
8838 AssertFailed();
8839 }
8840
8841 TlbState.freeRegsAndReleaseVars(pReNative);
8842
8843 /*
8844 * TlbDone:
8845 *
8846 * Set the new RSP value (FLAT accesses needs to calculate it first) and
8847 * commit the popped register value.
8848 */
8849 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
8850 }
8851#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
8852
8853 if (idxGReg != X86_GREG_xSP)
8854 {
8855 /* Set the register. */
8856 if (cbMem >= sizeof(uint32_t))
8857 {
8858#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
8859 AssertMsg( pReNative->idxCurCall == 0
8860 || IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))),
8861 ("%s - %u\n", g_aGstShadowInfo[idxGReg].pszName,
8862 iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))));
8863#endif
8864 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, IEMNATIVEGSTREG_GPR(idxGReg), off);
8865#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8866 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(idxGReg);
8867#endif
8868#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8869 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult,
8870 RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
8871#endif
8872 }
8873 else
8874 {
8875 Assert(cbMem == sizeof(uint16_t));
8876 uint8_t const idxRegDst = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGReg),
8877 kIemNativeGstRegUse_ForUpdate);
8878 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegDst, idxRegMemResult);
8879#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8880 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegDst, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
8881#endif
8882 iemNativeRegFreeTmp(pReNative, idxRegDst);
8883 }
8884
8885 /* Complete RSP calculation for FLAT mode. */
8886 if (idxRegEffSp == idxRegRsp)
8887 {
8888 if RT_CONSTEXPR_IF(a_cBitsFlat == 64)
8889 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, cbMem);
8890 else
8891 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, cbMem);
8892 }
8893 }
8894 else
8895 {
8896 /* We're popping RSP, ESP or SP. Only the is a bit extra work, of course. */
8897 if (cbMem == sizeof(uint64_t))
8898 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRsp, idxRegMemResult);
8899 else if (cbMem == sizeof(uint32_t))
8900 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRsp, idxRegMemResult);
8901 else
8902 {
8903 if (idxRegEffSp == idxRegRsp)
8904 {
8905 if RT_CONSTEXPR_IF(a_cBitsFlat == 64)
8906 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, cbMem);
8907 else
8908 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, cbMem);
8909 }
8910 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegRsp, idxRegMemResult);
8911 }
8912 }
8913
8914#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8915 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
8916#endif
8917
8918 iemNativeRegFreeTmp(pReNative, idxRegRsp);
8919 if (idxRegEffSp != idxRegRsp)
8920 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
8921 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
8922
8923 return off;
8924}
8925
8926
8927
8928/*********************************************************************************************************************************
8929* Memory mapping (IEM_MEM_MAP_XXX, IEM_MEM_FLAT_MAP_XXX). *
8930*********************************************************************************************************************************/
8931
8932#define IEM_MC_MEM_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8933 off = iemNativeEmitMemMapCommon<sizeof(uint8_t), IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMaskAndCtl*/>(\
8934 pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU8Atomic, pCallEntry->idxInstr)
8935
8936#define IEM_MC_MEM_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8937 off = iemNativeEmitMemMapCommon<sizeof(uint8_t), IEM_ACCESS_DATA_RW, 0 /*fAlignMaskAndCtl*/>(\
8938 pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU8Rw, pCallEntry->idxInstr)
8939
8940#define IEM_MC_MEM_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8941 off = iemNativeEmitMemMapCommon<sizeof(uint8_t), IEM_ACCESS_DATA_W, 0 /*fAlignMaskAndCtl*/>(\
8942 pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU8Wo, pCallEntry->idxInstr) \
8943
8944#define IEM_MC_MEM_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8945 off = iemNativeEmitMemMapCommon<sizeof(uint8_t), IEM_ACCESS_DATA_R, 0 /*fAlignMaskAndCtl*/>(\
8946 pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU8Ro, pCallEntry->idxInstr)
8947
8948
8949#define IEM_MC_MEM_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8950 off = iemNativeEmitMemMapCommon<sizeof(uint16_t), IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/>(\
8951 pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU16Atomic, pCallEntry->idxInstr)
8952
8953#define IEM_MC_MEM_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8954 off = iemNativeEmitMemMapCommon<sizeof(uint16_t), IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/>(\
8955 pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU16Rw, pCallEntry->idxInstr)
8956
8957#define IEM_MC_MEM_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8958 off = iemNativeEmitMemMapCommon<sizeof(uint16_t), IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/>(\
8959 pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
8960
8961#define IEM_MC_MEM_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8962 off = iemNativeEmitMemMapCommon<sizeof(uint16_t), IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/>(\
8963 pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU16Ro, pCallEntry->idxInstr)
8964
8965#define IEM_MC_MEM_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8966 off = iemNativeEmitMemMapCommon<sizeof(int16_t), IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/>(\
8967 pReNative, off, a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
8968
8969
8970#define IEM_MC_MEM_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8971 off = iemNativeEmitMemMapCommon<sizeof(uint32_t), IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/>(\
8972 pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU32Atomic, pCallEntry->idxInstr)
8973
8974#define IEM_MC_MEM_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8975 off = iemNativeEmitMemMapCommon<sizeof(uint32_t), IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/>(\
8976 pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU32Rw, pCallEntry->idxInstr)
8977
8978#define IEM_MC_MEM_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8979 off = iemNativeEmitMemMapCommon<sizeof(uint32_t), IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/>(\
8980 pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
8981
8982#define IEM_MC_MEM_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8983 off = iemNativeEmitMemMapCommon<sizeof(uint32_t), IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/>(\
8984 pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU32Ro, pCallEntry->idxInstr)
8985
8986#define IEM_MC_MEM_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8987 off = iemNativeEmitMemMapCommon<sizeof(int32_t), IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/>(\
8988 pReNative, off, a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
8989
8990
8991#define IEM_MC_MEM_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8992 off = iemNativeEmitMemMapCommon<sizeof(uint64_t), IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/>(\
8993 pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU64Atomic, pCallEntry->idxInstr)
8994
8995#define IEM_MC_MEM_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8996 off = iemNativeEmitMemMapCommon<sizeof(uint64_t), IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/>(\
8997 pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU64Rw, pCallEntry->idxInstr)
8998#define IEM_MC_MEM_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8999 off = iemNativeEmitMemMapCommon<sizeof(uint64_t), IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/>(\
9000 pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
9001
9002#define IEM_MC_MEM_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9003 off = iemNativeEmitMemMapCommon<sizeof(uint64_t), IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/>(\
9004 pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU64Ro, pCallEntry->idxInstr)
9005
9006#define IEM_MC_MEM_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9007 off = iemNativeEmitMemMapCommon<sizeof(int64_t), IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/>(\
9008 pReNative, off, a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
9009
9010
9011#define IEM_MC_MEM_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9012 off = iemNativeEmitMemMapCommon<sizeof(RTFLOAT80U), IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/>(\
9013 pReNative, off, a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataR80Wo, pCallEntry->idxInstr) \
9014
9015#define IEM_MC_MEM_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9016 off = iemNativeEmitMemMapCommon<sizeof(RTFLOAT80U), IEM_ACCESS_DATA_W, \
9017 sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/ /** @todo check BCD align */>(\
9018 pReNative, off, a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataD80Wo, pCallEntry->idxInstr) \
9019
9020
9021#define IEM_MC_MEM_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9022 off = iemNativeEmitMemMapCommon<sizeof(RTUINT128U), IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/>(\
9023 pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU128Atomic, pCallEntry->idxInstr)
9024
9025#define IEM_MC_MEM_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9026 off = iemNativeEmitMemMapCommon<sizeof(RTUINT128U), IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/>(\
9027 pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU128Rw, pCallEntry->idxInstr)
9028
9029#define IEM_MC_MEM_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9030 off = iemNativeEmitMemMapCommon<sizeof(RTUINT128U), IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/>(\
9031 pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU128Wo, pCallEntry->idxInstr) \
9032
9033#define IEM_MC_MEM_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9034 off = iemNativeEmitMemMapCommon<sizeof(RTUINT128U), IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/>(\
9035 pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU128Ro, pCallEntry->idxInstr)
9036
9037
9038
9039#define IEM_MC_MEM_FLAT_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
9040 off = iemNativeEmitMemMapCommon<sizeof(uint8_t), IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMaskAndCtl*/, true>(\
9041 pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU8Atomic, pCallEntry->idxInstr)
9042
9043#define IEM_MC_MEM_FLAT_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
9044 off = iemNativeEmitMemMapCommon<sizeof(uint8_t), IEM_ACCESS_DATA_RW, 0 /*fAlignMaskAndCtl*/, true>(\
9045 pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU8Rw, pCallEntry->idxInstr)
9046
9047#define IEM_MC_MEM_FLAT_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
9048 off = iemNativeEmitMemMapCommon<sizeof(uint8_t), IEM_ACCESS_DATA_W, 0 /*fAlignMaskAndCtl*/, true>(\
9049 pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU8Wo, pCallEntry->idxInstr) \
9050
9051#define IEM_MC_MEM_FLAT_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
9052 off = iemNativeEmitMemMapCommon<sizeof(uint8_t), IEM_ACCESS_DATA_R, 0 /*fAlignMaskAndCtl*/, true>(\
9053 pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU8Ro, pCallEntry->idxInstr)
9054
9055
9056#define IEM_MC_MEM_FLAT_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
9057 off = iemNativeEmitMemMapCommon<sizeof(uint16_t), IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9058 pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU16Atomic, pCallEntry->idxInstr)
9059
9060#define IEM_MC_MEM_FLAT_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
9061 off = iemNativeEmitMemMapCommon<sizeof(uint16_t), IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9062 pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU16Rw, pCallEntry->idxInstr)
9063
9064#define IEM_MC_MEM_FLAT_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
9065 off = iemNativeEmitMemMapCommon<sizeof(uint16_t), IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9066 pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
9067
9068#define IEM_MC_MEM_FLAT_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
9069 off = iemNativeEmitMemMapCommon<sizeof(uint16_t), IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9070 pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU16Ro, pCallEntry->idxInstr)
9071
9072#define IEM_MC_MEM_FLAT_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_GCPtrMem) \
9073 off = iemNativeEmitMemMapCommon<sizeof(int16_t), IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9074 pReNative, off, a_pi16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
9075
9076
9077#define IEM_MC_MEM_FLAT_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
9078 off = iemNativeEmitMemMapCommon<sizeof(uint32_t), IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9079 pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU32Atomic, pCallEntry->idxInstr)
9080
9081#define IEM_MC_MEM_FLAT_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
9082 off = iemNativeEmitMemMapCommon<sizeof(uint32_t), IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9083 pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU32Rw, pCallEntry->idxInstr)
9084
9085#define IEM_MC_MEM_FLAT_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
9086 off = iemNativeEmitMemMapCommon<sizeof(uint32_t), IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9087 pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
9088
9089#define IEM_MC_MEM_FLAT_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
9090 off = iemNativeEmitMemMapCommon<sizeof(uint32_t), IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9091 pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU32Ro, pCallEntry->idxInstr)
9092
9093#define IEM_MC_MEM_FLAT_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_GCPtrMem) \
9094 off = iemNativeEmitMemMapCommon<sizeof(int32_t), IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9095 pReNative, off, a_pi32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
9096
9097
9098#define IEM_MC_MEM_FLAT_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
9099 off = iemNativeEmitMemMapCommon<sizeof(uint64_t), IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9100 pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU64Atomic, pCallEntry->idxInstr)
9101
9102#define IEM_MC_MEM_FLAT_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
9103 off = iemNativeEmitMemMapCommon<sizeof(uint64_t), IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9104 pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU64Rw, pCallEntry->idxInstr)
9105
9106#define IEM_MC_MEM_FLAT_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
9107 off = iemNativeEmitMemMapCommon<sizeof(uint64_t), IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9108 pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
9109
9110#define IEM_MC_MEM_FLAT_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
9111 off = iemNativeEmitMemMapCommon<sizeof(uint64_t), IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9112 pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU64Ro, pCallEntry->idxInstr)
9113
9114#define IEM_MC_MEM_FLAT_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_GCPtrMem) \
9115 off = iemNativeEmitMemMapCommon<sizeof(int64_t), IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9116 pReNative, off, a_pi64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
9117
9118
9119#define IEM_MC_MEM_FLAT_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_GCPtrMem) \
9120 off = iemNativeEmitMemMapCommon<sizeof(RTFLOAT80U), IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9121 pReNative, off, a_pr80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo, pCallEntry->idxInstr) \
9122
9123#define IEM_MC_MEM_FLAT_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_GCPtrMem) \
9124 off = iemNativeEmitMemMapCommon<sizeof(RTFLOAT80U), IEM_ACCESS_DATA_W, \
9125 sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/ /** @todo check BCD align */, true>(\
9126 pReNative, off, a_pd80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo, pCallEntry->idxInstr) \
9127
9128
9129#define IEM_MC_MEM_FLAT_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
9130 off = iemNativeEmitMemMapCommon<sizeof(RTUINT128U), IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, true>(\
9131 pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU128Atomic, pCallEntry->idxInstr)
9132
9133#define IEM_MC_MEM_FLAT_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
9134 off = iemNativeEmitMemMapCommon<sizeof(RTUINT128U), IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, true>(\
9135 pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU128Rw, pCallEntry->idxInstr)
9136
9137#define IEM_MC_MEM_FLAT_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
9138 off = iemNativeEmitMemMapCommon<sizeof(RTUINT128U), IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, true>(\
9139 pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU128Wo, pCallEntry->idxInstr) \
9140
9141#define IEM_MC_MEM_FLAT_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
9142 off = iemNativeEmitMemMapCommon<sizeof(RTUINT128U), IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, true>(\
9143 pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU128Ro, pCallEntry->idxInstr)
9144
9145
9146template<uint8_t const a_cbMem, uint32_t const a_fAccess, uint32_t const a_fAlignMaskAndCtl, bool a_fFlat = false>
9147DECL_INLINE_THROW(uint32_t)
9148iemNativeEmitMemMapCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarMem, uint8_t idxVarUnmapInfo,
9149 uint8_t iSegReg, uint8_t idxVarGCPtrMem, uintptr_t pfnFunction, uint8_t idxInstr)
9150{
9151 /*
9152 * Assert sanity.
9153 */
9154 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarMem);
9155 PIEMNATIVEVAR const pVarMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarMem)];
9156 AssertStmt( pVarMem->enmKind == kIemNativeVarKind_Invalid
9157 && pVarMem->cbVar == sizeof(void *),
9158 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9159
9160 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
9161 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
9162 AssertStmt( pVarUnmapInfo->enmKind == kIemNativeVarKind_Invalid
9163 && pVarUnmapInfo->cbVar == sizeof(uint8_t),
9164 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9165
9166 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
9167 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
9168 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
9169 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
9170 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9171
9172 Assert(!a_fFlat ? iSegReg < 6 : iSegReg == UINT8_MAX);
9173
9174 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
9175
9176#ifdef VBOX_STRICT
9177# define IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) \
9178 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ) \
9179 ? (uintptr_t)RT_CONCAT(a_fnBase,Rw) \
9180 : ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == IEM_ACCESS_TYPE_READ \
9181 ? (uintptr_t)RT_CONCAT(a_fnBase,Ro) : (uintptr_t)RT_CONCAT(a_fnBase,Wo) )
9182# define IEM_MAP_HLP_FN(a_fAccess, a_fnBase) \
9183 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ | IEM_ACCESS_ATOMIC) \
9184 ? (uintptr_t)RT_CONCAT(a_fnBase,Atomic) \
9185 : IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) )
9186
9187 if RT_CONSTEXPR_IF(a_fFlat)
9188 {
9189 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
9190 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
9191 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
9192 switch (a_cbMem)
9193 {
9194 case 1:
9195 Assert(pfnFunction == IEM_MAP_HLP_FN(a_fAccess, iemNativeHlpMemFlatMapDataU8));
9196 Assert(!a_fAlignMaskAndCtl);
9197 break;
9198 case 2:
9199 Assert(pfnFunction == IEM_MAP_HLP_FN(a_fAccess, iemNativeHlpMemFlatMapDataU16));
9200 Assert(a_fAlignMaskAndCtl < 2);
9201 break;
9202 case 4:
9203 Assert(pfnFunction == IEM_MAP_HLP_FN(a_fAccess, iemNativeHlpMemFlatMapDataU32));
9204 Assert(a_fAlignMaskAndCtl < 4);
9205 break;
9206 case 8:
9207 Assert(pfnFunction == IEM_MAP_HLP_FN(a_fAccess, iemNativeHlpMemFlatMapDataU64));
9208 Assert(a_fAlignMaskAndCtl < 8);
9209 break;
9210 case 10:
9211 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo
9212 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo);
9213 Assert((a_fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
9214 Assert(a_fAlignMaskAndCtl < 8);
9215 break;
9216 case 16:
9217 Assert(pfnFunction == IEM_MAP_HLP_FN(a_fAccess, iemNativeHlpMemFlatMapDataU128));
9218 Assert(a_fAlignMaskAndCtl < 16);
9219 break;
9220# if 0
9221 case 32:
9222 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(a_fAccess, iemNativeHlpMemFlatMapDataU256));
9223 Assert(a_fAlignMaskAndCtl < 32);
9224 break;
9225 case 64:
9226 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(a_fAccess, iemNativeHlpMemFlatMapDataU512));
9227 Assert(a_fAlignMaskAndCtl < 64);
9228 break;
9229# endif
9230 default: AssertFailed(); break;
9231 }
9232 }
9233 else
9234 {
9235 Assert(iSegReg < 6);
9236 switch (a_cbMem)
9237 {
9238 case 1:
9239 Assert(pfnFunction == IEM_MAP_HLP_FN(a_fAccess, iemNativeHlpMemMapDataU8));
9240 Assert(!a_fAlignMaskAndCtl);
9241 break;
9242 case 2:
9243 Assert(pfnFunction == IEM_MAP_HLP_FN(a_fAccess, iemNativeHlpMemMapDataU16));
9244 Assert(a_fAlignMaskAndCtl < 2);
9245 break;
9246 case 4:
9247 Assert(pfnFunction == IEM_MAP_HLP_FN(a_fAccess, iemNativeHlpMemMapDataU32));
9248 Assert(a_fAlignMaskAndCtl < 4);
9249 break;
9250 case 8:
9251 Assert(pfnFunction == IEM_MAP_HLP_FN(a_fAccess, iemNativeHlpMemMapDataU64));
9252 Assert(a_fAlignMaskAndCtl < 8);
9253 break;
9254 case 10:
9255 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemMapDataR80Wo
9256 || pfnFunction == (uintptr_t)iemNativeHlpMemMapDataD80Wo);
9257 Assert((a_fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
9258 Assert(a_fAlignMaskAndCtl < 8);
9259 break;
9260 case 16:
9261 Assert(pfnFunction == IEM_MAP_HLP_FN(a_fAccess, iemNativeHlpMemMapDataU128));
9262 Assert(a_fAlignMaskAndCtl < 16);
9263 break;
9264# if 0
9265 case 32:
9266 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(a_fAccess, iemNativeHlpMemMapDataU256));
9267 Assert(a_fAlignMaskAndCtl < 32);
9268 break;
9269 case 64:
9270 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(a_fAccess, iemNativeHlpMemMapDataU512));
9271 Assert(a_fAlignMaskAndCtl < 64);
9272 break;
9273# endif
9274 default: AssertFailed(); break;
9275 }
9276 }
9277# undef IEM_MAP_HLP_FN
9278# undef IEM_MAP_HLP_FN_NO_AT
9279#endif
9280
9281#ifdef VBOX_STRICT
9282 /*
9283 * Check that the fExec flags we've got make sense.
9284 */
9285 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
9286#endif
9287
9288 /*
9289 * To keep things simple we have to commit any pending writes first as we
9290 * may end up making calls.
9291 */
9292 off = iemNativeRegFlushPendingWrites(pReNative, off);
9293
9294#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
9295 /*
9296 * Move/spill/flush stuff out of call-volatile registers.
9297 * This is the easy way out. We could contain this to the tlb-miss branch
9298 * by saving and restoring active stuff here.
9299 */
9300 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
9301 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
9302#endif
9303
9304 /* The bUnmapInfo variable will get a register in the tlb-hit code path,
9305 while the tlb-miss codepath will temporarily put it on the stack.
9306 Set the the type to stack here so we don't need to do it twice below. */
9307 iemNativeVarSetKindToStack(pReNative, idxVarUnmapInfo);
9308 uint8_t const idxRegUnmapInfo = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off);
9309 /** @todo use a tmp register from TlbState, since they'll be free after tlb
9310 * lookup is done. */
9311
9312 /*
9313 * Define labels and allocate the result register (trying for the return
9314 * register if we can).
9315 */
9316 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
9317 uint8_t const idxRegMemResult = !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
9318 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarMem, IEMNATIVE_CALL_RET_GREG, &off)
9319 : iemNativeVarRegisterAcquire(pReNative, idxVarMem, &off);
9320 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, a_cbMem);
9321 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
9322 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
9323 : UINT32_MAX;
9324
9325 /*
9326 * Jump to the TLB lookup code.
9327 */
9328 if (!TlbState.fSkip)
9329 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
9330
9331 /*
9332 * TlbMiss:
9333 *
9334 * Call helper to do the fetching.
9335 * We flush all guest register shadow copies here.
9336 */
9337 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
9338
9339#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
9340 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
9341#else
9342 RT_NOREF(idxInstr);
9343#endif
9344
9345#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
9346 /* Save variables in volatile registers. */
9347 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave() | RT_BIT_32(idxRegMemResult) | RT_BIT_32(idxRegUnmapInfo);
9348 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
9349#endif
9350
9351 /* IEMNATIVE_CALL_ARG2_GREG = GCPtrMem - load first as it is from a variable. */
9352 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarGCPtrMem, 0 /*cbAppend*/,
9353#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
9354 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
9355#else
9356 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
9357#endif
9358
9359 /* IEMNATIVE_CALL_ARG3_GREG = iSegReg */
9360 if RT_CONSTEXPR_IF(!a_fFlat)
9361 {
9362 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
9363 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, iSegReg);
9364 }
9365
9366#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
9367 /* Do delayed EFLAGS calculations. */
9368 if RT_CONSTEXPR_IF(a_fFlat)
9369 off = iemNativeDoPostponedEFlagsAtTlbMiss<RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)>(pReNative, off, &TlbState,
9370 fHstRegsNotToSave);
9371 else
9372 off = iemNativeDoPostponedEFlagsAtTlbMiss< RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
9373 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG)>(pReNative, off, &TlbState,
9374 fHstRegsNotToSave);
9375#endif
9376
9377 /* IEMNATIVE_CALL_ARG1_GREG = &idxVarUnmapInfo; stackslot address, load any register with result after the call. */
9378 int32_t const offBpDispVarUnmapInfo = iemNativeStackCalcBpDisp(iemNativeVarGetStackSlot(pReNative, idxVarUnmapInfo));
9379 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, offBpDispVarUnmapInfo);
9380
9381 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
9382 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9383
9384 /* Done setting up parameters, make the call. */
9385 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnFunction);
9386
9387 /*
9388 * Put the output in the right registers.
9389 */
9390 Assert(idxRegMemResult == pVarMem->idxReg);
9391 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
9392 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
9393
9394#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
9395 /* Restore variables and guest shadow registers to volatile registers. */
9396 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
9397 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
9398#endif
9399
9400 Assert(pVarUnmapInfo->idxReg == idxRegUnmapInfo);
9401 off = iemNativeEmitLoadGprByBpU8(pReNative, off, idxRegUnmapInfo, offBpDispVarUnmapInfo);
9402
9403#ifdef IEMNATIVE_WITH_TLB_LOOKUP
9404 if (!TlbState.fSkip)
9405 {
9406 /* end of tlbsmiss - Jump to the done label. */
9407 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
9408 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
9409
9410 /*
9411 * TlbLookup:
9412 */
9413 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, a_cbMem, a_fAlignMaskAndCtl, a_fAccess,
9414 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
9415# ifdef IEM_WITH_TLB_STATISTICS
9416 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, TlbState.idxReg1, TlbState.idxReg2,
9417 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForMapped));
9418# endif
9419
9420 /* [idxVarUnmapInfo] = 0; */
9421 off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegUnmapInfo, 0);
9422
9423 /*
9424 * TlbDone:
9425 */
9426 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
9427
9428 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
9429
9430# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
9431 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
9432 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
9433# endif
9434 }
9435#else
9436 RT_NOREF(idxLabelTlbMiss);
9437#endif
9438
9439 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
9440 iemNativeVarRegisterRelease(pReNative, idxVarMem);
9441
9442 return off;
9443}
9444
9445
9446#define IEM_MC_MEM_COMMIT_AND_UNMAP_ATOMIC(a_bMapInfo) \
9447 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic, \
9448 pCallEntry->idxInstr, IEM_ACCESS_DATA_ATOMIC)
9449
9450#define IEM_MC_MEM_COMMIT_AND_UNMAP_RW(a_bMapInfo) \
9451 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), (uintptr_t)iemNativeHlpMemCommitAndUnmapRw, \
9452 pCallEntry->idxInstr, IEM_ACCESS_DATA_RW)
9453
9454#define IEM_MC_MEM_COMMIT_AND_UNMAP_WO(a_bMapInfo) \
9455 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), (uintptr_t)iemNativeHlpMemCommitAndUnmapWo, \
9456 pCallEntry->idxInstr, IEM_ACCESS_DATA_W)
9457
9458#define IEM_MC_MEM_COMMIT_AND_UNMAP_RO(a_bMapInfo) \
9459 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), (uintptr_t)iemNativeHlpMemCommitAndUnmapRo, \
9460 pCallEntry->idxInstr, IEM_ACCESS_DATA_R)
9461
9462DECL_INLINE_THROW(uint32_t)
9463iemNativeEmitMemCommitAndUnmap(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarUnmapInfo,
9464 uintptr_t pfnFunction, uint8_t idxInstr, uint32_t fAccess)
9465{
9466 /*
9467 * Assert sanity.
9468 */
9469 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
9470#if defined(VBOX_STRICT) || defined(RT_ARCH_AMD64)
9471 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
9472#endif
9473 Assert(pVarUnmapInfo->enmKind == kIemNativeVarKind_Stack);
9474 Assert( pVarUnmapInfo->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
9475 || pVarUnmapInfo->idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS); /* must be initialized */
9476#ifdef VBOX_STRICT
9477 switch (fAccess & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC))
9478 {
9479 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_ATOMIC:
9480 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic); break;
9481 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE:
9482 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRw); break;
9483 case IEM_ACCESS_TYPE_WRITE:
9484 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapWo); break;
9485 case IEM_ACCESS_TYPE_READ:
9486 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRo); break;
9487 default: AssertFailed();
9488 }
9489#else
9490 RT_NOREF(fAccess);
9491#endif
9492
9493 /*
9494 * To keep things simple we have to commit any pending writes first as we
9495 * may end up making calls (there shouldn't be any at this point, so this
9496 * is just for consistency).
9497 */
9498 /** @todo we could postpone this till we make the call and reload the
9499 * registers after returning from the call. Not sure if that's sensible or
9500 * not, though. */
9501 off = iemNativeRegFlushPendingWrites(pReNative, off);
9502
9503 /*
9504 * Move/spill/flush stuff out of call-volatile registers.
9505 *
9506 * We exclude any register holding the bUnmapInfo variable, as we'll be
9507 * checking it after returning from the call and will free it afterwards.
9508 */
9509 /** @todo save+restore active registers and maybe guest shadows in miss
9510 * scenario. */
9511 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */,
9512 RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)));
9513
9514 /*
9515 * If idxVarUnmapInfo is zero, we can skip all this. Otherwise we'll have
9516 * to call the unmap helper function.
9517 *
9518 * The likelyhood of it being zero is higher than for the TLB hit when doing
9519 * the mapping, as a TLB miss for an well aligned and unproblematic memory
9520 * access should also end up with a mapping that won't need special unmapping.
9521 */
9522 /** @todo Go over iemMemMapJmp and implement the no-unmap-needed case! That
9523 * should speed up things for the pure interpreter as well when TLBs
9524 * are enabled. */
9525#ifdef RT_ARCH_AMD64
9526 if (pVarUnmapInfo->idxReg == UINT8_MAX)
9527 {
9528 /* test byte [rbp - xxx], 0ffh */
9529 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
9530 pbCodeBuf[off++] = 0xf6;
9531 uint8_t const idxStackSlot = pVarUnmapInfo->idxStackSlot;
9532 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, 0, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
9533 pbCodeBuf[off++] = 0xff;
9534 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9535 }
9536 else
9537#endif
9538 {
9539 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInitedWithPref(pReNative, idxVarUnmapInfo, &off,
9540 IEMNATIVE_CALL_ARG1_GREG);
9541 off = iemNativeEmitTestAnyBitsInGpr8(pReNative, off, idxVarReg, 0xff);
9542 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
9543 }
9544 uint32_t const offJmpFixup = off;
9545 off = iemNativeEmitJzToFixed(pReNative, off, off /* ASSUME jz rel8 suffices*/);
9546
9547 /*
9548 * Call the unmap helper function.
9549 */
9550#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING /** @todo This should be unnecessary, the mapping call will already have set it! */
9551 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
9552#else
9553 RT_NOREF(idxInstr);
9554#endif
9555
9556 /* IEMNATIVE_CALL_ARG1_GREG = idxVarUnmapInfo (first!) */
9557 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo,
9558 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
9559
9560 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
9561 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9562
9563 /* Done setting up parameters, make the call.
9564 Note! Since we can only end up here if we took a TLB miss, any postponed EFLAGS
9565 calculations has been done there already. Thus, a_fSkipEflChecks = true. */
9566 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnFunction);
9567
9568 /* The bUnmapInfo variable is implictly free by these MCs. */
9569 iemNativeVarFreeLocal(pReNative, idxVarUnmapInfo);
9570
9571 /*
9572 * Done, just fixup the jump for the non-call case.
9573 */
9574 iemNativeFixupFixedJump(pReNative, offJmpFixup, off);
9575
9576 return off;
9577}
9578
9579
9580
9581/*********************************************************************************************************************************
9582* State and Exceptions *
9583*********************************************************************************************************************************/
9584
9585#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9586#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
9587
9588#define IEM_MC_PREPARE_SSE_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9589#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9590#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
9591
9592#define IEM_MC_PREPARE_AVX_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9593#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9594#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
9595
9596
9597DECL_INLINE_THROW(uint32_t) iemNativeEmitPrepareFpuForUse(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fForChange)
9598{
9599#ifndef IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS
9600 RT_NOREF(pReNative, fForChange);
9601#else
9602 if ( !(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SYNCED)
9603 && fForChange)
9604 {
9605# ifdef RT_ARCH_AMD64
9606
9607 /* Need to save the host MXCSR the first time, and clear the exception flags. */
9608 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED))
9609 {
9610 PIEMNATIVEINSTR pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
9611
9612 /* stmxcsr */
9613 if (IEMNATIVE_REG_FIXED_PVMCPU >= 8)
9614 pbCodeBuf[off++] = X86_OP_REX_B;
9615 pbCodeBuf[off++] = 0x0f;
9616 pbCodeBuf[off++] = 0xae;
9617 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, 3, IEMNATIVE_REG_FIXED_PVMCPU & 7);
9618 pbCodeBuf[off++] = RT_BYTE1(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9619 pbCodeBuf[off++] = RT_BYTE2(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9620 pbCodeBuf[off++] = RT_BYTE3(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9621 pbCodeBuf[off++] = RT_BYTE4(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9622 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9623
9624 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED;
9625 }
9626
9627 uint8_t const idxRegTmp = iemNativeRegAllocTmpPreferNonVolatile(pReNative, &off);
9628 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr,
9629 kIemNativeGstRegUse_ReadOnly);
9630
9631 /*
9632 * Mask any exceptions and clear the exception status and save into MXCSR,
9633 * taking a detour through memory here because ldmxcsr/stmxcsr don't support
9634 * a register source/target (sigh).
9635 */
9636 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, idxRegMxCsr);
9637 off = iemNativeEmitOrGpr32ByImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, X86_MXCSR_XCPT_MASK);
9638 off = iemNativeEmitAndGpr32ByImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, ~X86_MXCSR_XCPT_FLAGS);
9639 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9640
9641 PIEMNATIVEINSTR pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
9642
9643 /* ldmxcsr */
9644 if (IEMNATIVE_REG_FIXED_PVMCPU >= 8)
9645 pbCodeBuf[off++] = X86_OP_REX_B;
9646 pbCodeBuf[off++] = 0x0f;
9647 pbCodeBuf[off++] = 0xae;
9648 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, 2, IEMNATIVE_REG_FIXED_PVMCPU & 7);
9649 pbCodeBuf[off++] = RT_BYTE1(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9650 pbCodeBuf[off++] = RT_BYTE2(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9651 pbCodeBuf[off++] = RT_BYTE3(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9652 pbCodeBuf[off++] = RT_BYTE4(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9653 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9654
9655 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
9656 iemNativeRegFreeTmp(pReNative, idxRegTmp);
9657
9658# elif defined(RT_ARCH_ARM64)
9659 uint8_t const idxRegTmp = iemNativeRegAllocTmpPreferNonVolatile(pReNative, &off);
9660
9661 /* Need to save the host floating point control register the first time, clear FPSR. */
9662 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED))
9663 {
9664 PIEMNATIVEINSTR pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
9665 pu32CodeBuf[off++] = Armv8A64MkInstrMsr(ARMV8_A64_REG_XZR, ARMV8_AARCH64_SYSREG_FPSR);
9666 pu32CodeBuf[off++] = Armv8A64MkInstrMrs(idxRegTmp, ARMV8_AARCH64_SYSREG_FPCR);
9667 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9668 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED;
9669 }
9670
9671 /*
9672 * Translate MXCSR to FPCR.
9673 *
9674 * Unfortunately we can't emulate the exact behavior of MXCSR as we can't take
9675 * FEAT_AFP on arm64 for granted (My M2 Macbook doesn't has it). So we can't map
9676 * MXCSR.DAZ to FPCR.FIZ and MXCSR.FZ to FPCR.FZ with FPCR.AH being set.
9677 * We can only use FPCR.FZ which will flush inputs _and_ output de-normals to zero.
9678 */
9679 /** @todo Check the host supported flags (needs additional work to get the host features from CPUM)
9680 * and implement alternate handling if FEAT_AFP is present. */
9681 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr,
9682 kIemNativeGstRegUse_ReadOnly);
9683
9684 PIEMNATIVEINSTR pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
9685
9686 /* First make sure that there is nothing set for the upper 16-bits (X86_MXCSR_MM, which we don't emulate right now). */
9687 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegTmp, idxRegMxCsr);
9688
9689 /* If either MXCSR.FZ or MXCSR.DAZ is set FPCR.FZ will be set. */
9690 pu32CodeBuf[off++] = Armv8A64MkInstrUbfx(IEMNATIVE_REG_FIXED_TMP0, idxRegTmp, X86_MXCSR_DAZ_BIT, 1);
9691 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegTmp, idxRegTmp, X86_MXCSR_FZ_BIT);
9692 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(idxRegTmp, idxRegTmp, IEMNATIVE_REG_FIXED_TMP0);
9693 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegTmp, idxRegTmp, ARMV8_FPCR_FZ_BIT);
9694
9695 /*
9696 * Init the rounding mode, the layout differs between MXCSR.RM[14:13] and FPCR.RMode[23:22]:
9697 *
9698 * Value MXCSR FPCR
9699 * 0 RN RN
9700 * 1 R- R+
9701 * 2 R+ R-
9702 * 3 RZ RZ
9703 *
9704 * Conversion can be achieved by switching bit positions
9705 */
9706 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxRegMxCsr, X86_MXCSR_RC_SHIFT);
9707 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxRegTmp, IEMNATIVE_REG_FIXED_TMP0, 14, 1);
9708 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxRegMxCsr, X86_MXCSR_RC_SHIFT + 1);
9709 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxRegTmp, IEMNATIVE_REG_FIXED_TMP0, 13, 1);
9710
9711 /* Write the value to FPCR. */
9712 pu32CodeBuf[off++] = Armv8A64MkInstrMsr(idxRegTmp, ARMV8_AARCH64_SYSREG_FPCR);
9713
9714 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9715 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
9716 iemNativeRegFreeTmp(pReNative, idxRegTmp);
9717# else
9718# error "Port me"
9719# endif
9720 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SYNCED;
9721 }
9722#endif
9723 return off;
9724}
9725
9726
9727
9728/*********************************************************************************************************************************
9729* Emitters for FPU related operations. *
9730*********************************************************************************************************************************/
9731
9732#define IEM_MC_FETCH_FCW(a_u16Fcw) \
9733 off = iemNativeEmitFetchFpuFcw(pReNative, off, a_u16Fcw)
9734
9735/** Emits code for IEM_MC_FETCH_FCW. */
9736DECL_INLINE_THROW(uint32_t)
9737iemNativeEmitFetchFpuFcw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
9738{
9739 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9740 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
9741
9742 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9743
9744 /* Allocate a temporary FCW register. */
9745 /** @todo eliminate extra register */
9746 uint8_t const idxFcwReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFcw,
9747 kIemNativeGstRegUse_ReadOnly);
9748
9749 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFcwReg);
9750
9751 /* Free but don't flush the FCW register. */
9752 iemNativeRegFreeTmp(pReNative, idxFcwReg);
9753 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9754
9755 return off;
9756}
9757
9758
9759#define IEM_MC_FETCH_FSW(a_u16Fsw) \
9760 off = iemNativeEmitFetchFpuFsw(pReNative, off, a_u16Fsw)
9761
9762/** Emits code for IEM_MC_FETCH_FSW. */
9763DECL_INLINE_THROW(uint32_t)
9764iemNativeEmitFetchFpuFsw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
9765{
9766 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9767 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
9768
9769 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9770 /* Allocate a temporary FSW register. */
9771 /** @todo eliminate extra register */
9772 uint8_t const idxFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw,
9773 kIemNativeGstRegUse_ReadOnly);
9774
9775 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFswReg);
9776
9777 /* Free but don't flush the FSW register. */
9778 iemNativeRegFreeTmp(pReNative, idxFswReg);
9779 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9780
9781 return off;
9782}
9783
9784
9785
9786#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
9787
9788
9789/*********************************************************************************************************************************
9790* Emitters for SSE/AVX specific operations. *
9791*********************************************************************************************************************************/
9792
9793#define IEM_MC_COPY_XREG_U128(a_iXRegDst, a_iXRegSrc) \
9794 off = iemNativeEmitSimdCopyXregU128(pReNative, off, a_iXRegDst, a_iXRegSrc)
9795
9796/** Emits code for IEM_MC_COPY_XREG_U128. */
9797DECL_INLINE_THROW(uint32_t)
9798iemNativeEmitSimdCopyXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXRegDst, uint8_t iXRegSrc)
9799{
9800 /* This is a nop if the source and destination register are the same. */
9801 if (iXRegDst != iXRegSrc)
9802 {
9803 /* Allocate destination and source register. */
9804 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegDst),
9805 kIemNativeGstSimdRegLdStSz_Low128,
9806 kIemNativeGstRegUse_ForFullWrite);
9807 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegSrc),
9808 kIemNativeGstSimdRegLdStSz_Low128,
9809 kIemNativeGstRegUse_ReadOnly);
9810
9811 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
9812
9813 /* Free but don't flush the source and destination register. */
9814 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9815 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9816 }
9817
9818 return off;
9819}
9820
9821
9822#define IEM_MC_FETCH_XREG_U128(a_u128Value, a_iXReg) \
9823 off = iemNativeEmitSimdFetchXregU128(pReNative, off, a_u128Value, a_iXReg)
9824
9825/** Emits code for IEM_MC_FETCH_XREG_U128. */
9826DECL_INLINE_THROW(uint32_t)
9827iemNativeEmitSimdFetchXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg)
9828{
9829 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9830 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
9831
9832 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9833 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
9834
9835 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
9836
9837 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxVarReg, idxSimdRegSrc);
9838
9839 /* Free but don't flush the source register. */
9840 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9841 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
9842
9843 return off;
9844}
9845
9846
9847#define IEM_MC_FETCH_XREG_U64(a_u64Value, a_iXReg, a_iQWord) \
9848 off = iemNativeEmitSimdFetchXregU64(pReNative, off, a_u64Value, a_iXReg, a_iQWord)
9849
9850#define IEM_MC_FETCH_XREG_R64(a_r64Value, a_iXReg, a_iQWord) \
9851 off = iemNativeEmitSimdFetchXregU64(pReNative, off, a_r64Value, a_iXReg, a_iQWord)
9852
9853/** Emits code for IEM_MC_FETCH_XREG_U64. */
9854DECL_INLINE_THROW(uint32_t)
9855iemNativeEmitSimdFetchXregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iQWord)
9856{
9857 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9858 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
9859
9860 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9861 kIemNativeGstSimdRegLdStSz_Low128,
9862 kIemNativeGstRegUse_ReadOnly);
9863
9864 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9865 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9866
9867 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
9868
9869 /* Free but don't flush the source register. */
9870 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9871 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9872
9873 return off;
9874}
9875
9876
9877#define IEM_MC_FETCH_XREG_U32(a_u32Value, a_iXReg, a_iDWord) \
9878 off = iemNativeEmitSimdFetchXregU32(pReNative, off, a_u32Value, a_iXReg, a_iDWord)
9879
9880#define IEM_MC_FETCH_XREG_R32(a_r32Value, a_iXReg, a_iDWord) \
9881 off = iemNativeEmitSimdFetchXregU32(pReNative, off, a_r32Value, a_iXReg, a_iDWord)
9882
9883/** Emits code for IEM_MC_FETCH_XREG_U32/IEM_MC_FETCH_XREG_R32. */
9884DECL_INLINE_THROW(uint32_t)
9885iemNativeEmitSimdFetchXregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iDWord)
9886{
9887 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9888 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
9889
9890 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9891 kIemNativeGstSimdRegLdStSz_Low128,
9892 kIemNativeGstRegUse_ReadOnly);
9893
9894 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9895 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9896
9897 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
9898
9899 /* Free but don't flush the source register. */
9900 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9901 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9902
9903 return off;
9904}
9905
9906
9907#define IEM_MC_FETCH_XREG_U16(a_u64Value, a_iXReg, a_iWord) \
9908 off = iemNativeEmitSimdFetchXregU16(pReNative, off, a_u64Value, a_iXReg, a_iWord)
9909
9910/** Emits code for IEM_MC_FETCH_XREG_U16. */
9911DECL_INLINE_THROW(uint32_t)
9912iemNativeEmitSimdFetchXregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iWord)
9913{
9914 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9915 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
9916
9917 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9918 kIemNativeGstSimdRegLdStSz_Low128,
9919 kIemNativeGstRegUse_ReadOnly);
9920
9921 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9922 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9923
9924 off = iemNativeEmitSimdLoadGprFromVecRegU16(pReNative, off, idxVarReg, idxSimdRegSrc, iWord);
9925
9926 /* Free but don't flush the source register. */
9927 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9928 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9929
9930 return off;
9931}
9932
9933
9934#define IEM_MC_FETCH_XREG_U8(a_u64Value, a_iXReg, a_iByte) \
9935 off = iemNativeEmitSimdFetchXregU8(pReNative, off, a_u64Value, a_iXReg, a_iByte)
9936
9937/** Emits code for IEM_MC_FETCH_XREG_U8. */
9938DECL_INLINE_THROW(uint32_t)
9939iemNativeEmitSimdFetchXregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iByte)
9940{
9941 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9942 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint8_t));
9943
9944 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9945 kIemNativeGstSimdRegLdStSz_Low128,
9946 kIemNativeGstRegUse_ReadOnly);
9947
9948 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9949 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9950
9951 off = iemNativeEmitSimdLoadGprFromVecRegU8(pReNative, off, idxVarReg, idxSimdRegSrc, iByte);
9952
9953 /* Free but don't flush the source register. */
9954 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9955 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9956
9957 return off;
9958}
9959
9960
9961#define IEM_MC_STORE_XREG_U128(a_iXReg, a_u128Value) \
9962 off = iemNativeEmitSimdStoreXregU128(pReNative, off, a_iXReg, a_u128Value)
9963
9964AssertCompileSize(X86XMMREG, sizeof(RTUINT128U));
9965#define IEM_MC_STORE_XREG_XMM(a_iXReg, a_XmmValue) \
9966 off = iemNativeEmitSimdStoreXregU128(pReNative, off, a_iXReg, a_XmmValue)
9967
9968
9969/** Emits code for IEM_MC_STORE_XREG_U128/IEM_MC_STORE_XREG_XMM. */
9970DECL_INLINE_THROW(uint32_t)
9971iemNativeEmitSimdStoreXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
9972{
9973 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9974 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
9975
9976 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9977 kIemNativeGstSimdRegLdStSz_Low128,
9978 kIemNativeGstRegUse_ForFullWrite);
9979 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitialized*/);
9980
9981 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
9982
9983 /* Free but don't flush the source register. */
9984 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9985 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
9986
9987 return off;
9988}
9989
9990
9991#define IEM_MC_STORE_XREG_U64(a_iXReg, a_iQWord, a_u64Value) \
9992 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u64Value, sizeof(uint64_t), a_iQWord)
9993
9994#define IEM_MC_STORE_XREG_U32(a_iXReg, a_iDWord, a_u32Value) \
9995 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint32_t), a_iDWord)
9996
9997#define IEM_MC_STORE_XREG_U16(a_iXReg, a_iWord, a_u32Value) \
9998 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint16_t), a_iWord)
9999
10000#define IEM_MC_STORE_XREG_U8(a_iXReg, a_iByte, a_u32Value) \
10001 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint8_t), a_iByte)
10002
10003#define IEM_MC_STORE_XREG_R32(a_iXReg, a_r32Value) \
10004 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_r32Value, sizeof(RTFLOAT32U), 0 /*iElem*/)
10005
10006#define IEM_MC_STORE_XREG_R64(a_iXReg, a_r64Value) \
10007 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_r64Value, sizeof(RTFLOAT64U), 0 /*iElem*/)
10008
10009/** Emits code for IEM_MC_STORE_XREG_U64/IEM_MC_STORE_XREG_U32/IEM_MC_STORE_XREG_U16/IEM_MC_STORE_XREG_U8. */
10010DECL_INLINE_THROW(uint32_t)
10011iemNativeEmitSimdStoreXregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar,
10012 uint8_t cbLocal, uint8_t iElem)
10013{
10014 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10015 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbLocal);
10016
10017#ifdef VBOX_STRICT
10018 switch (cbLocal)
10019 {
10020 case sizeof(uint64_t): Assert(iElem < 2); break;
10021 case sizeof(uint32_t): Assert(iElem < 4); break;
10022 case sizeof(uint16_t): Assert(iElem < 8); break;
10023 case sizeof(uint8_t): Assert(iElem < 16); break;
10024 default: AssertFailed();
10025 }
10026#endif
10027
10028 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10029 kIemNativeGstSimdRegLdStSz_Low128,
10030 kIemNativeGstRegUse_ForUpdate);
10031 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxDstVar, &off);
10032
10033 switch (cbLocal)
10034 {
10035 case sizeof(uint64_t): off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
10036 case sizeof(uint32_t): off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
10037 case sizeof(uint16_t): off = iemNativeEmitSimdStoreGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
10038 case sizeof(uint8_t): off = iemNativeEmitSimdStoreGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
10039 default: AssertFailed();
10040 }
10041
10042 /* Free but don't flush the source register. */
10043 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10044 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10045
10046 return off;
10047}
10048
10049
10050#define IEM_MC_STORE_XREG_U64_ZX_U128(a_iXReg, a_u64Value) \
10051 off = iemNativeEmitSimdStoreXregU64ZxU128(pReNative, off, a_iXReg, a_u64Value)
10052
10053/** Emits code for IEM_MC_STORE_XREG_U64_ZX_U128. */
10054DECL_INLINE_THROW(uint32_t)
10055iemNativeEmitSimdStoreXregU64ZxU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
10056{
10057 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10058 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
10059
10060 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10061 kIemNativeGstSimdRegLdStSz_Low128,
10062 kIemNativeGstRegUse_ForUpdate);
10063 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxDstVar, &off);
10064
10065 /* Zero the vector register first, then store the 64-bit value to the lower 64-bit. */
10066 off = iemNativeEmitSimdZeroVecRegLowU128(pReNative, off, idxSimdRegDst);
10067 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0);
10068
10069 /* Free but don't flush the source register. */
10070 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10071 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10072
10073 return off;
10074}
10075
10076
10077#define IEM_MC_STORE_XREG_U32_ZX_U128(a_iXReg, a_u32Value) \
10078 off = iemNativeEmitSimdStoreXregU32ZxU128(pReNative, off, a_iXReg, a_u32Value)
10079
10080/** Emits code for IEM_MC_STORE_XREG_U32_ZX_U128. */
10081DECL_INLINE_THROW(uint32_t)
10082iemNativeEmitSimdStoreXregU32ZxU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
10083{
10084 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10085 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
10086
10087 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10088 kIemNativeGstSimdRegLdStSz_Low128,
10089 kIemNativeGstRegUse_ForUpdate);
10090 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxDstVar, &off);
10091
10092 /* Zero the vector register first, then store the 32-bit value to the lowest 32-bit element. */
10093 off = iemNativeEmitSimdZeroVecRegLowU128(pReNative, off, idxSimdRegDst);
10094 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, 0);
10095
10096 /* Free but don't flush the source register. */
10097 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10098 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10099
10100 return off;
10101}
10102
10103
10104#define IEM_MC_STORE_XREG_U32_U128(a_iXReg, a_iDwDst, a_u128Value, a_iDwSrc) \
10105 off = iemNativeEmitSimdStoreXregU32U128(pReNative, off, a_iXReg, a_iDwDst, a_u128Value, a_iDwSrc)
10106
10107/** Emits code for IEM_MC_STORE_XREG_U32_U128. */
10108DECL_INLINE_THROW(uint32_t)
10109iemNativeEmitSimdStoreXregU32U128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t iDwDst,
10110 uint8_t idxSrcVar, uint8_t iDwSrc)
10111{
10112 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10113 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
10114
10115 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10116 kIemNativeGstSimdRegLdStSz_Low128,
10117 kIemNativeGstRegUse_ForUpdate);
10118 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitialized*/);
10119
10120 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, idxVarReg, iDwSrc);
10121 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, IEMNATIVE_REG_FIXED_TMP0, iDwDst);
10122
10123 /* Free but don't flush the destination register. */
10124 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10125 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10126
10127 return off;
10128}
10129
10130
10131#define IEM_MC_COPY_YREG_U128_ZX_VLMAX(a_iYRegDst, a_iYRegSrc) \
10132 off = iemNativeEmitSimdCopyYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrc)
10133
10134/** Emits code for IEM_MC_COPY_YREG_U128_ZX_VLMAX. */
10135DECL_INLINE_THROW(uint32_t)
10136iemNativeEmitSimdCopyYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrc)
10137{
10138 /*
10139 * The iYRegSrc == iYRegDst case needs to be treated differently here, because
10140 * if iYRegDst gets allocated first for the full write it won't load the
10141 * actual value from CPUMCTX. When allocating iYRegSrc afterwards it will get
10142 * duplicated from the already allocated host register for iYRegDst containing
10143 * garbage. This will be catched by the guest register value checking in debug
10144 * builds.
10145 */
10146 if (iYRegDst != iYRegSrc)
10147 {
10148 /* Allocate destination and source register. */
10149 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10150 kIemNativeGstSimdRegLdStSz_256,
10151 kIemNativeGstRegUse_ForFullWrite);
10152 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
10153 kIemNativeGstSimdRegLdStSz_Low128,
10154 kIemNativeGstRegUse_ReadOnly);
10155
10156 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
10157 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10158
10159 /* Free but don't flush the source and destination register. */
10160 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10161 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10162 }
10163 else
10164 {
10165 /* This effectively only clears the upper 128-bits of the register. */
10166 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10167 kIemNativeGstSimdRegLdStSz_High128,
10168 kIemNativeGstRegUse_ForFullWrite);
10169
10170 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
10171
10172 /* Free but don't flush the destination register. */
10173 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
10174 }
10175
10176 return off;
10177}
10178
10179
10180#define IEM_MC_COPY_YREG_U256_ZX_VLMAX(a_iYRegDst, a_iYRegSrc) \
10181 off = iemNativeEmitSimdCopyYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrc)
10182
10183/** Emits code for IEM_MC_COPY_YREG_U256_ZX_VLMAX. */
10184DECL_INLINE_THROW(uint32_t)
10185iemNativeEmitSimdCopyYregU256ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrc)
10186{
10187 /*
10188 * The iYRegSrc == iYRegDst case needs to be treated differently here, because
10189 * if iYRegDst gets allocated first for the full write it won't load the
10190 * actual value from CPUMCTX. When allocating iYRegSrc afterwards it will get
10191 * duplicated from the already allocated host register for iYRegDst containing
10192 * garbage. This will be catched by the guest register value checking in debug
10193 * builds. iYRegSrc == iYRegDst would effectively only clear any upper 256-bits
10194 * for a zmm register we don't support yet, so this is just a nop.
10195 */
10196 if (iYRegDst != iYRegSrc)
10197 {
10198 /* Allocate destination and source register. */
10199 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
10200 kIemNativeGstSimdRegLdStSz_256,
10201 kIemNativeGstRegUse_ReadOnly);
10202 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10203 kIemNativeGstSimdRegLdStSz_256,
10204 kIemNativeGstRegUse_ForFullWrite);
10205
10206 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
10207
10208 /* Free but don't flush the source and destination register. */
10209 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10210 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10211 }
10212
10213 return off;
10214}
10215
10216
10217#define IEM_MC_FETCH_YREG_U128(a_u128Dst, a_iYRegSrc, a_iDQWord) \
10218 off = iemNativeEmitSimdFetchYregU128(pReNative, off, a_u128Dst, a_iYRegSrc, a_iDQWord)
10219
10220/** Emits code for IEM_MC_FETCH_YREG_U128. */
10221DECL_INLINE_THROW(uint32_t)
10222iemNativeEmitSimdFetchYregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iDQWord)
10223{
10224 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10225 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
10226
10227 Assert(iDQWord <= 1);
10228 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10229 iDQWord == 1
10230 ? kIemNativeGstSimdRegLdStSz_High128
10231 : kIemNativeGstSimdRegLdStSz_Low128,
10232 kIemNativeGstRegUse_ReadOnly);
10233
10234 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10235 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
10236
10237 if (iDQWord == 1)
10238 off = iemNativeEmitSimdLoadVecRegLowU128FromVecRegHighU128(pReNative, off, idxVarReg, idxSimdRegSrc);
10239 else
10240 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxVarReg, idxSimdRegSrc);
10241
10242 /* Free but don't flush the source register. */
10243 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10244 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
10245
10246 return off;
10247}
10248
10249
10250#define IEM_MC_FETCH_YREG_U64(a_u64Dst, a_iYRegSrc, a_iQWord) \
10251 off = iemNativeEmitSimdFetchYregU64(pReNative, off, a_u64Dst, a_iYRegSrc, a_iQWord)
10252
10253/** Emits code for IEM_MC_FETCH_YREG_U64. */
10254DECL_INLINE_THROW(uint32_t)
10255iemNativeEmitSimdFetchYregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iQWord)
10256{
10257 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10258 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
10259
10260 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10261 iQWord >= 2
10262 ? kIemNativeGstSimdRegLdStSz_High128
10263 : kIemNativeGstSimdRegLdStSz_Low128,
10264 kIemNativeGstRegUse_ReadOnly);
10265
10266 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10267 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10268
10269 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
10270
10271 /* Free but don't flush the source register. */
10272 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10273 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10274
10275 return off;
10276}
10277
10278
10279#define IEM_MC_FETCH_YREG_U32(a_u32Dst, a_iYRegSrc) \
10280 off = iemNativeEmitSimdFetchYregU32(pReNative, off, a_u32Dst, a_iYRegSrc, 0)
10281
10282/** Emits code for IEM_MC_FETCH_YREG_U32. */
10283DECL_INLINE_THROW(uint32_t)
10284iemNativeEmitSimdFetchYregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iDWord)
10285{
10286 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10287 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
10288
10289 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10290 iDWord >= 4
10291 ? kIemNativeGstSimdRegLdStSz_High128
10292 : kIemNativeGstSimdRegLdStSz_Low128,
10293 kIemNativeGstRegUse_ReadOnly);
10294
10295 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10296 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10297
10298 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
10299
10300 /* Free but don't flush the source register. */
10301 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10302 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10303
10304 return off;
10305}
10306
10307
10308#define IEM_MC_CLEAR_YREG_128_UP(a_iYReg) \
10309 off = iemNativeEmitSimdClearYregHighU128(pReNative, off, a_iYReg)
10310
10311/** Emits code for IEM_MC_CLEAR_YREG_128_UP. */
10312DECL_INLINE_THROW(uint32_t)
10313iemNativeEmitSimdClearYregHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg)
10314{
10315 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10316 kIemNativeGstSimdRegLdStSz_High128,
10317 kIemNativeGstRegUse_ForFullWrite);
10318
10319 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
10320
10321 /* Free but don't flush the register. */
10322 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
10323
10324 return off;
10325}
10326
10327
10328#define IEM_MC_STORE_YREG_U128(a_iYRegDst, a_iDQword, a_u128Value) \
10329 off = iemNativeEmitSimdStoreYregU128(pReNative, off, a_iYRegDst, a_iDQword, a_u128Value)
10330
10331/** Emits code for IEM_MC_STORE_YREG_U128. */
10332DECL_INLINE_THROW(uint32_t)
10333iemNativeEmitSimdStoreYregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t iDQword, uint8_t idxSrcVar)
10334{
10335 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10336 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
10337
10338 Assert(iDQword <= 1);
10339 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10340 iDQword == 0
10341 ? kIemNativeGstSimdRegLdStSz_Low128
10342 : kIemNativeGstSimdRegLdStSz_High128,
10343 kIemNativeGstRegUse_ForFullWrite);
10344
10345 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
10346
10347 if (iDQword == 0)
10348 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
10349 else
10350 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128(pReNative, off, idxSimdRegDst, idxVarReg);
10351
10352 /* Free but don't flush the source register. */
10353 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10354 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10355
10356 return off;
10357}
10358
10359
10360#define IEM_MC_STORE_YREG_U128_ZX_VLMAX(a_iYRegDst, a_u128Src) \
10361 off = iemNativeEmitSimdStoreYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_u128Src)
10362
10363/** Emits code for IEM_MC_STORE_YREG_U128_ZX_VLMAX. */
10364DECL_INLINE_THROW(uint32_t)
10365iemNativeEmitSimdStoreYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10366{
10367 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10368 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
10369
10370 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10371 kIemNativeGstSimdRegLdStSz_256,
10372 kIemNativeGstRegUse_ForFullWrite);
10373
10374 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
10375
10376 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
10377 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10378
10379 /* Free but don't flush the source register. */
10380 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10381 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10382
10383 return off;
10384}
10385
10386
10387#define IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX(a_iXRegDst, a_u8Src) \
10388 off = iemNativeEmitSimdBroadcastXregU8ZxVlmax(pReNative, off, a_iXRegDst, a_u8Src)
10389
10390/** Emits code for IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX. */
10391DECL_INLINE_THROW(uint32_t)
10392iemNativeEmitSimdBroadcastXregU8ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
10393{
10394 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10395 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint8_t));
10396
10397 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10398 kIemNativeGstSimdRegLdStSz_256,
10399 kIemNativeGstRegUse_ForFullWrite);
10400
10401 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10402
10403 off = iemNativeEmitSimdBroadcastGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
10404 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10405
10406 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10407 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10408
10409 return off;
10410}
10411
10412
10413#define IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX(a_iXRegDst, a_u16Src) \
10414 off = iemNativeEmitSimdBroadcastXregU16ZxVlmax(pReNative, off, a_iXRegDst, a_u16Src)
10415
10416/** Emits code for IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX. */
10417DECL_INLINE_THROW(uint32_t)
10418iemNativeEmitSimdBroadcastXregU16ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
10419{
10420 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10421 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint16_t));
10422
10423 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10424 kIemNativeGstSimdRegLdStSz_256,
10425 kIemNativeGstRegUse_ForFullWrite);
10426
10427 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10428
10429 off = iemNativeEmitSimdBroadcastGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
10430 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10431
10432 /* Free but don't flush the source register. */
10433 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10434 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10435
10436 return off;
10437}
10438
10439
10440#define IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX(a_iXRegDst, a_u32Src) \
10441 off = iemNativeEmitSimdBroadcastXregU32ZxVlmax(pReNative, off, a_iXRegDst, a_u32Src)
10442
10443/** Emits code for IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX. */
10444DECL_INLINE_THROW(uint32_t)
10445iemNativeEmitSimdBroadcastXregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
10446{
10447 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10448 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
10449
10450 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10451 kIemNativeGstSimdRegLdStSz_256,
10452 kIemNativeGstRegUse_ForFullWrite);
10453
10454 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10455
10456 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
10457 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10458
10459 /* Free but don't flush the source register. */
10460 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10461 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10462
10463 return off;
10464}
10465
10466
10467#define IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX(a_iXRegDst, a_u64Src) \
10468 off = iemNativeEmitSimdBroadcastXregU64ZxVlmax(pReNative, off, a_iXRegDst, a_u64Src)
10469
10470/** Emits code for IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX. */
10471DECL_INLINE_THROW(uint32_t)
10472iemNativeEmitSimdBroadcastXregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
10473{
10474 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10475 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10476
10477 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10478 kIemNativeGstSimdRegLdStSz_256,
10479 kIemNativeGstRegUse_ForFullWrite);
10480
10481 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10482
10483 off = iemNativeEmitSimdBroadcastGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
10484 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10485
10486 /* Free but don't flush the source register. */
10487 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10488 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10489
10490 return off;
10491}
10492
10493
10494#define IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX(a_iYRegDst, a_u8Src) \
10495 off = iemNativeEmitSimdBroadcastYregU8ZxVlmax(pReNative, off, a_iYRegDst, a_u8Src)
10496
10497/** Emits code for IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX. */
10498DECL_INLINE_THROW(uint32_t)
10499iemNativeEmitSimdBroadcastYregU8ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10500{
10501 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10502 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint8_t));
10503
10504 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10505 kIemNativeGstSimdRegLdStSz_256,
10506 kIemNativeGstRegUse_ForFullWrite);
10507
10508 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10509
10510 off = iemNativeEmitSimdBroadcastGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
10511
10512 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10513 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10514
10515 return off;
10516}
10517
10518
10519#define IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX(a_iYRegDst, a_u16Src) \
10520 off = iemNativeEmitSimdBroadcastYregU16ZxVlmax(pReNative, off, a_iYRegDst, a_u16Src)
10521
10522/** Emits code for IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX. */
10523DECL_INLINE_THROW(uint32_t)
10524iemNativeEmitSimdBroadcastYregU16ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10525{
10526 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10527 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint16_t));
10528
10529 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10530 kIemNativeGstSimdRegLdStSz_256,
10531 kIemNativeGstRegUse_ForFullWrite);
10532
10533 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10534
10535 off = iemNativeEmitSimdBroadcastGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
10536
10537 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10538 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10539
10540 return off;
10541}
10542
10543
10544#define IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX(a_iYRegDst, a_u32Src) \
10545 off = iemNativeEmitSimdBroadcastYregU32ZxVlmax(pReNative, off, a_iYRegDst, a_u32Src)
10546
10547/** Emits code for IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX. */
10548DECL_INLINE_THROW(uint32_t)
10549iemNativeEmitSimdBroadcastYregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10550{
10551 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10552 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
10553
10554 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10555 kIemNativeGstSimdRegLdStSz_256,
10556 kIemNativeGstRegUse_ForFullWrite);
10557
10558 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10559
10560 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
10561
10562 /* Free but don't flush the source register. */
10563 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10564 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10565
10566 return off;
10567}
10568
10569
10570#define IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX(a_iYRegDst, a_u64Src) \
10571 off = iemNativeEmitSimdBroadcastYregU64ZxVlmax(pReNative, off, a_iYRegDst, a_u64Src)
10572
10573/** Emits code for IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX. */
10574DECL_INLINE_THROW(uint32_t)
10575iemNativeEmitSimdBroadcastYregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10576{
10577 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10578 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10579
10580 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10581 kIemNativeGstSimdRegLdStSz_256,
10582 kIemNativeGstRegUse_ForFullWrite);
10583
10584 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10585
10586 off = iemNativeEmitSimdBroadcastGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
10587
10588 /* Free but don't flush the source register. */
10589 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10590 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10591
10592 return off;
10593}
10594
10595
10596#define IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX(a_iYRegDst, a_u128Src) \
10597 off = iemNativeEmitSimdBroadcastYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_u128Src)
10598
10599/** Emits code for IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX. */
10600DECL_INLINE_THROW(uint32_t)
10601iemNativeEmitSimdBroadcastYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10602{
10603 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10604 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
10605
10606 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10607 kIemNativeGstSimdRegLdStSz_256,
10608 kIemNativeGstRegUse_ForFullWrite);
10609
10610 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
10611
10612 off = iemNativeEmitSimdBroadcastVecRegU128ToVecReg(pReNative, off, idxSimdRegDst, idxVarReg);
10613
10614 /* Free but don't flush the source register. */
10615 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10616 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10617
10618 return off;
10619}
10620
10621
10622#define IEM_MC_STORE_YREG_U32_ZX_VLMAX(a_iYRegDst, a_u32Src) \
10623 off = iemNativeEmitSimdStoreYregU32ZxVlmax(pReNative, off, a_iYRegDst, a_u32Src)
10624
10625/** Emits code for IEM_MC_STORE_YREG_U32_ZX_VLMAX. */
10626DECL_INLINE_THROW(uint32_t)
10627iemNativeEmitSimdStoreYregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10628{
10629 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10630 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
10631
10632 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10633 kIemNativeGstSimdRegLdStSz_256,
10634 kIemNativeGstRegUse_ForFullWrite);
10635
10636 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10637
10638 off = iemNativeEmitSimdZeroVecRegU256(pReNative, off, idxSimdRegDst);
10639 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iDWord*/);
10640
10641 /* Free but don't flush the source register. */
10642 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10643 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10644
10645 return off;
10646}
10647
10648
10649#define IEM_MC_STORE_YREG_U64_ZX_VLMAX(a_iYRegDst, a_u64Src) \
10650 off = iemNativeEmitSimdStoreYregU64ZxVlmax(pReNative, off, a_iYRegDst, a_u64Src)
10651
10652/** Emits code for IEM_MC_STORE_YREG_U64_ZX_VLMAX. */
10653DECL_INLINE_THROW(uint32_t)
10654iemNativeEmitSimdStoreYregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10655{
10656 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10657 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10658
10659 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10660 kIemNativeGstSimdRegLdStSz_256,
10661 kIemNativeGstRegUse_ForFullWrite);
10662
10663 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10664
10665 off = iemNativeEmitSimdZeroVecRegU256(pReNative, off, idxSimdRegDst);
10666 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iQWord*/);
10667
10668 /* Free but don't flush the source register. */
10669 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10670 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10671
10672 return off;
10673}
10674
10675
10676#define IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX(a_iYRegDst, a_u64Local, a_iYRegSrcHx) \
10677 off = iemNativeEmitSimdMergeYregU64LocalU64HiZxVlmax(pReNative, off, a_iYRegDst, a_u64Local, a_iYRegSrcHx)
10678
10679/** Emits code for IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX. */
10680DECL_INLINE_THROW(uint32_t)
10681iemNativeEmitSimdMergeYregU64LocalU64HiZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar, uint8_t iYRegSrcHx)
10682{
10683 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10684 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10685
10686 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10687 kIemNativeGstSimdRegLdStSz_256,
10688 kIemNativeGstRegUse_ForFullWrite);
10689 uint8_t const idxSimdRegSrcHx = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrcHx),
10690 kIemNativeGstSimdRegLdStSz_Low128,
10691 kIemNativeGstRegUse_ReadOnly);
10692 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10693
10694 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrcHx);
10695 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iQWord*/);
10696 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10697
10698 /* Free but don't flush the source and destination registers. */
10699 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrcHx);
10700 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10701 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10702
10703 return off;
10704}
10705
10706
10707#define IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX(a_iYRegDst, a_iYRegSrcHx, a_u64Local) \
10708 off = iemNativeEmitSimdMergeYregU64LoU64LocalZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrcHx, a_u64Local)
10709
10710/** Emits code for IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX. */
10711DECL_INLINE_THROW(uint32_t)
10712iemNativeEmitSimdMergeYregU64LoU64LocalZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrcHx, uint8_t idxSrcVar)
10713{
10714 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10715 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10716
10717 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10718 kIemNativeGstSimdRegLdStSz_256,
10719 kIemNativeGstRegUse_ForFullWrite);
10720 uint8_t const idxSimdRegSrcHx = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrcHx),
10721 kIemNativeGstSimdRegLdStSz_Low128,
10722 kIemNativeGstRegUse_ReadOnly);
10723 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10724
10725 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrcHx);
10726 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 1 /*iQWord*/);
10727 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10728
10729 /* Free but don't flush the source and destination registers. */
10730 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrcHx);
10731 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10732 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10733
10734 return off;
10735}
10736
10737
10738#define IEM_MC_CLEAR_XREG_U32_MASK(a_iXReg, a_bMask) \
10739 off = iemNativeEmitSimdClearXregU32Mask(pReNative, off, a_iXReg, a_bMask)
10740
10741
10742/** Emits code for IEM_MC_CLEAR_XREG_U32_MASK. */
10743DECL_INLINE_THROW(uint32_t)
10744iemNativeEmitSimdClearXregU32Mask(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t bImm8Mask)
10745{
10746 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10747 kIemNativeGstSimdRegLdStSz_Low128,
10748 kIemNativeGstRegUse_ForUpdate);
10749
10750 /** @todo r=aeichner For certain bit combinations we could reduce the number of emitted instructions. */
10751 if (bImm8Mask & RT_BIT(0))
10752 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 0 /*iDWord*/);
10753 if (bImm8Mask & RT_BIT(1))
10754 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 1 /*iDWord*/);
10755 if (bImm8Mask & RT_BIT(2))
10756 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 2 /*iDWord*/);
10757 if (bImm8Mask & RT_BIT(3))
10758 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 3 /*iDWord*/);
10759
10760 /* Free but don't flush the destination register. */
10761 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10762
10763 return off;
10764}
10765
10766
10767#define IEM_MC_FETCH_YREG_U256(a_u256Dst, a_iYRegSrc) \
10768 off = iemNativeEmitSimdFetchYregU256(pReNative, off, a_u256Dst, a_iYRegSrc)
10769
10770#define IEM_MC_FETCH_YREG_YMM(a_uYmmDst, a_iYRegSrc) \
10771 off = iemNativeEmitSimdFetchYregU256(pReNative, off, a_uYmmDst, a_iYRegSrc)
10772
10773/** Emits code for IEM_MC_FETCH_YREG_U256/IEM_MC_FETCH_YREG_YMM. */
10774DECL_INLINE_THROW(uint32_t)
10775iemNativeEmitSimdFetchYregU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYRegSrc)
10776{
10777 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10778 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT256U));
10779
10780 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
10781 kIemNativeGstSimdRegLdStSz_256,
10782 kIemNativeGstRegUse_ReadOnly);
10783 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
10784
10785 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxVarReg, idxSimdRegSrc);
10786
10787 /* Free but don't flush the source register. */
10788 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10789 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
10790
10791 return off;
10792}
10793
10794
10795#define IEM_MC_STORE_YREG_U256_ZX_VLMAX(a_iYRegDst, a_u256Src) \
10796 off = iemNativeEmitSimdStoreYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_u256Src)
10797
10798#define IEM_MC_STORE_YREG_YMM_ZX_VLMAX(a_iYRegDst, a_uYmmSrc) \
10799 off = iemNativeEmitSimdStoreYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_uYmmSrc)
10800
10801/** Emits code for IEM_MC_STORE_YREG_U256_ZX_VLMAX/IEM_MC_STORE_YREG_YMM_ZX_VLMAX. */
10802DECL_INLINE_THROW(uint32_t)
10803iemNativeEmitSimdStoreYregU256ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar)
10804{
10805 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10806 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
10807
10808 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10809 kIemNativeGstSimdRegLdStSz_256,
10810 kIemNativeGstRegUse_ForFullWrite);
10811 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
10812
10813 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxSimdRegDst, idxVarRegSrc);
10814
10815 /* Free but don't flush the source register. */
10816 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10817 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10818
10819 return off;
10820}
10821
10822
10823#define IEM_MC_STORE_YREG_U32_U256(a_iYRegDst, a_iDwDst, a_u256Value, a_iDwSrc) \
10824 off = iemNativeEmitSimdStoreYregU32FromU256(pReNative, off, a_iYRegDst, a_iDwDst, a_u256Value, a_iDwSrc)
10825
10826
10827/** Emits code for IEM_MC_STORE_YREG_U32_U256. */
10828DECL_INLINE_THROW(uint32_t)
10829iemNativeEmitSimdStoreYregU32FromU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iDwDst,
10830 uint8_t idxSrcVar, uint8_t iDwSrc)
10831{
10832 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10833 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
10834
10835 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10836 iDwDst < 4
10837 ? kIemNativeGstSimdRegLdStSz_Low128
10838 : kIemNativeGstSimdRegLdStSz_High128,
10839 kIemNativeGstRegUse_ForUpdate);
10840 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
10841 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
10842
10843 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxRegTmp, idxVarRegSrc, iDwSrc);
10844 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxRegTmp, iDwDst);
10845
10846 /* Free but don't flush the source register. */
10847 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10848 iemNativeRegFreeTmp(pReNative, idxRegTmp);
10849 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10850
10851 return off;
10852}
10853
10854
10855#define IEM_MC_STORE_YREG_U64_U256(a_iYRegDst, a_iQwDst, a_u256Value, a_iQwSrc) \
10856 off = iemNativeEmitSimdStoreYregU64FromU256(pReNative, off, a_iYRegDst, a_iQwDst, a_u256Value, a_iQwSrc)
10857
10858
10859/** Emits code for IEM_MC_STORE_YREG_U64_U256. */
10860DECL_INLINE_THROW(uint32_t)
10861iemNativeEmitSimdStoreYregU64FromU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iQwDst,
10862 uint8_t idxSrcVar, uint8_t iQwSrc)
10863{
10864 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10865 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
10866
10867 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10868 iQwDst < 2
10869 ? kIemNativeGstSimdRegLdStSz_Low128
10870 : kIemNativeGstSimdRegLdStSz_High128,
10871 kIemNativeGstRegUse_ForUpdate);
10872 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
10873 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
10874
10875 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxRegTmp, idxVarRegSrc, iQwSrc);
10876 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxRegTmp, iQwDst);
10877
10878 /* Free but don't flush the source register. */
10879 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10880 iemNativeRegFreeTmp(pReNative, idxRegTmp);
10881 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10882
10883 return off;
10884}
10885
10886
10887#define IEM_MC_STORE_YREG_U64(a_iYRegDst, a_iQword, a_u64Value) \
10888 off = iemNativeEmitSimdStoreYregU64(pReNative, off, a_iYRegDst, a_iQword, a_u64Value)
10889
10890
10891/** Emits code for IEM_MC_STORE_YREG_U64. */
10892DECL_INLINE_THROW(uint32_t)
10893iemNativeEmitSimdStoreYregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iQwDst, uint8_t idxSrcVar)
10894{
10895 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10896 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10897
10898 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10899 iQwDst < 2
10900 ? kIemNativeGstSimdRegLdStSz_Low128
10901 : kIemNativeGstSimdRegLdStSz_High128,
10902 kIemNativeGstRegUse_ForUpdate);
10903
10904 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10905
10906 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, iQwDst);
10907
10908 /* Free but don't flush the source register. */
10909 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10910 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10911
10912 return off;
10913}
10914
10915
10916#define IEM_MC_CLEAR_ZREG_256_UP(a_iYReg) \
10917 off = iemNativeEmitSimdClearZregU256Vlmax(pReNative, off, a_iYReg)
10918
10919/** Emits code for IEM_MC_CLEAR_ZREG_256_UP. */
10920DECL_INLINE_THROW(uint32_t)
10921iemNativeEmitSimdClearZregU256Vlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg)
10922{
10923 RT_NOREF(pReNative, iYReg);
10924 /** @todo Needs to be implemented when support for AVX-512 is added. */
10925 return off;
10926}
10927
10928
10929
10930/*********************************************************************************************************************************
10931* Emitters for IEM_MC_CALL_SSE_AIMPL_XXX *
10932*********************************************************************************************************************************/
10933
10934/**
10935 * Common worker for IEM_MC_CALL_SSE_AIMPL_XXX/IEM_MC_CALL_AVX_AIMPL_XXX.
10936 */
10937DECL_INLINE_THROW(uint32_t)
10938iemNativeEmitCallSseAvxAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t cArgs, uint8_t idxInstr)
10939{
10940 /* Grab the MXCSR register, it must not be call volatile or we end up freeing it when setting up the call below. */
10941 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr,
10942 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
10943 AssertRelease(!(RT_BIT_32(idxRegMxCsr) & IEMNATIVE_CALL_VOLATILE_GREG_MASK));
10944
10945#if 0 /* This is not required right now as the called helper will set up the SSE/AVX state if it is an assembly one. */
10946 /*
10947 * Need to do the FPU preparation.
10948 */
10949 off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/);
10950#endif
10951
10952 /*
10953 * Do all the call setup and cleanup.
10954 */
10955 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_SSE_AIMPL_HIDDEN_ARGS, IEM_SSE_AIMPL_HIDDEN_ARGS,
10956 false /*fFlushPendingWrites*/);
10957
10958 /*
10959 * Load the MXCSR register into the first argument and mask out the current exception flags.
10960 */
10961 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, idxRegMxCsr);
10962 off = iemNativeEmitAndGpr32ByImm(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, ~X86_MXCSR_XCPT_FLAGS);
10963
10964 /*
10965 * Make the call.
10966 */
10967 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnAImpl);
10968
10969 /*
10970 * The updated MXCSR is in the return register, update exception status flags.
10971 *
10972 * The return register is marked allocated as a temporary because it is required for the
10973 * exception generation check below.
10974 */
10975 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG)));
10976 uint8_t const idxRegTmp = iemNativeRegMarkAllocated(pReNative, IEMNATIVE_CALL_RET_GREG, kIemNativeWhat_Tmp);
10977 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, idxRegMxCsr, idxRegTmp);
10978
10979#ifndef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
10980 /* Writeback the MXCSR register value (there is no delayed writeback for such registers at the moment). */
10981 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxRegMxCsr, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.XState.x87.MXCSR));
10982#endif
10983
10984 /*
10985 * Make sure we don't have any outstanding guest register writes as we may
10986 * raise an \#UD or \#XF and all guest register must be up to date in CPUMCTX.
10987 */
10988 off = iemNativeRegFlushPendingWrites(pReNative, off);
10989
10990#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
10991 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
10992#else
10993 RT_NOREF(idxInstr);
10994#endif
10995
10996 /** @todo r=aeichner ANDN from BMI1 would save us a temporary and additional instruction here but I don't
10997 * want to assume the existence for this instruction at the moment. */
10998 uint8_t const idxRegTmp2 = iemNativeRegAllocTmp(pReNative, &off);
10999
11000 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegTmp2, idxRegTmp);
11001 /* tmp &= X86_MXCSR_XCPT_MASK */
11002 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK);
11003 /* tmp >>= X86_MXCSR_XCPT_MASK_SHIFT */
11004 off = iemNativeEmitShiftGprRight(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK_SHIFT);
11005 /* tmp = ~tmp */
11006 off = iemNativeEmitInvBitsGpr(pReNative, off, idxRegTmp, idxRegTmp, false /*f64Bit*/);
11007 /* tmp &= mxcsr */
11008 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxRegTmp, idxRegTmp2);
11009 off = iemNativeEmitTbExitIfAnyBitsSetInGpr<kIemNativeLabelType_RaiseSseAvxFpRelated>(pReNative, off, idxRegTmp,
11010 X86_MXCSR_XCPT_FLAGS);
11011
11012 iemNativeRegFreeTmp(pReNative, idxRegTmp2);
11013 iemNativeRegFreeTmp(pReNative, idxRegTmp);
11014 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
11015
11016 return off;
11017}
11018
11019
11020#define IEM_MC_CALL_SSE_AIMPL_2(a_pfnAImpl, a0, a1) \
11021 off = iemNativeEmitCallSseAImpl2(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1))
11022
11023/** Emits code for IEM_MC_CALL_SSE_AIMPL_2. */
11024DECL_INLINE_THROW(uint32_t)
11025iemNativeEmitCallSseAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
11026{
11027 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_SSE_AIMPL_HIDDEN_ARGS);
11028 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_SSE_AIMPL_HIDDEN_ARGS);
11029 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 2, idxInstr);
11030}
11031
11032
11033#define IEM_MC_CALL_SSE_AIMPL_3(a_pfnAImpl, a0, a1, a2) \
11034 off = iemNativeEmitCallSseAImpl3(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1), (a2))
11035
11036/** Emits code for IEM_MC_CALL_SSE_AIMPL_3. */
11037DECL_INLINE_THROW(uint32_t)
11038iemNativeEmitCallSseAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl,
11039 uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
11040{
11041 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_SSE_AIMPL_HIDDEN_ARGS);
11042 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_SSE_AIMPL_HIDDEN_ARGS);
11043 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_SSE_AIMPL_HIDDEN_ARGS);
11044 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 3, idxInstr);
11045}
11046
11047
11048/*********************************************************************************************************************************
11049* Emitters for IEM_MC_CALL_AVX_AIMPL_XXX *
11050*********************************************************************************************************************************/
11051
11052#define IEM_MC_CALL_AVX_AIMPL_2(a_pfnAImpl, a0, a1) \
11053 off = iemNativeEmitCallAvxAImpl2(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1))
11054
11055/** Emits code for IEM_MC_CALL_AVX_AIMPL_2. */
11056DECL_INLINE_THROW(uint32_t)
11057iemNativeEmitCallAvxAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
11058{
11059 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_AVX_AIMPL_HIDDEN_ARGS);
11060 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_AVX_AIMPL_HIDDEN_ARGS);
11061 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 2, idxInstr);
11062}
11063
11064
11065#define IEM_MC_CALL_AVX_AIMPL_3(a_pfnAImpl, a0, a1, a2) \
11066 off = iemNativeEmitCallAvxAImpl3(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1), (a2))
11067
11068/** Emits code for IEM_MC_CALL_AVX_AIMPL_3. */
11069DECL_INLINE_THROW(uint32_t)
11070iemNativeEmitCallAvxAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl,
11071 uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
11072{
11073 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_AVX_AIMPL_HIDDEN_ARGS);
11074 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_AVX_AIMPL_HIDDEN_ARGS);
11075 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_AVX_AIMPL_HIDDEN_ARGS);
11076 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 3, idxInstr);
11077}
11078
11079
11080#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
11081
11082
11083/*********************************************************************************************************************************
11084* Include instruction emitters. *
11085*********************************************************************************************************************************/
11086#include "target-x86/IEMAllN8veEmit-x86.h"
11087
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette