VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompFuncs.h@ 106443

Last change on this file since 106443 was 106443, checked in by vboxsync, 6 weeks ago

VMM/IEM: Reduced the number of arguments for iemNativeEmitTlbLookup. bugref:10720

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 543.3 KB
Line 
1/* $Id: IEMAllN8veRecompFuncs.h 106443 2024-10-17 12:02:12Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler - Inlined Bits.
4 */
5
6/*
7 * Copyright (C) 2023-2024 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
33#define IEM_WITH_OPAQUE_DECODER_STATE
34#define VMCPU_INCL_CPUM_GST_CTX
35#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
36#define IEMNATIVE_INCL_TABLE_FUNCTION_PROTOTYPES
37#include <VBox/vmm/iem.h>
38#include <VBox/vmm/cpum.h>
39#include <VBox/vmm/dbgf.h>
40#include "IEMInternal.h"
41#include <VBox/vmm/vmcc.h>
42#include <VBox/log.h>
43#include <VBox/err.h>
44#include <VBox/dis.h>
45#include <VBox/param.h>
46#include <iprt/assert.h>
47#include <iprt/heap.h>
48#include <iprt/mem.h>
49#include <iprt/string.h>
50#if defined(RT_ARCH_AMD64)
51# include <iprt/x86.h>
52#elif defined(RT_ARCH_ARM64)
53# include <iprt/armv8.h>
54#endif
55
56#include "IEMInline.h"
57#include "IEMThreadedFunctions.h"
58#include "IEMN8veRecompiler.h"
59#include "IEMN8veRecompilerEmit.h"
60#include "IEMN8veRecompilerTlbLookup.h"
61#include "IEMNativeFunctions.h"
62#include "target-x86/IEMAllN8veEmit-x86.h"
63
64
65/*
66 * Narrow down configs here to avoid wasting time on unused configs here.
67 * Note! Same checks in IEMAllThrdRecompiler.cpp.
68 */
69
70#ifndef IEM_WITH_CODE_TLB
71# error The code TLB must be enabled for the recompiler.
72#endif
73
74#ifndef IEM_WITH_DATA_TLB
75# error The data TLB must be enabled for the recompiler.
76#endif
77
78#ifndef IEM_WITH_SETJMP
79# error The setjmp approach must be enabled for the recompiler.
80#endif
81
82#if defined(IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS) && !defined(IEMNATIVE_WITH_SIMD_REG_ALLOCATOR)
83# error "IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS requires IEMNATIVE_WITH_SIMD_REG_ALLOCATOR"
84#endif
85
86
87/*********************************************************************************************************************************
88* Code emitters for flushing pending guest register writes and sanity checks *
89*********************************************************************************************************************************/
90
91#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
92
93# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
94/**
95 * Updates IEMCPU::uPcUpdatingDebug.
96 */
97DECL_INLINE_THROW(uint32_t) iemNativeEmitPcDebugAdd(PIEMRECOMPILERSTATE pReNative, uint32_t off, int64_t offDisp, uint8_t cBits)
98{
99# ifdef RT_ARCH_AMD64
100 if (pReNative->Core.fDebugPcInitialized && cBits >= 32)
101 {
102 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
103 if ((int32_t)offDisp == offDisp || cBits != 64)
104 {
105 /* add [q]word [pVCpu->iem.s.uPcUpdatingDebug], imm32/imm8 */
106 if (cBits == 64)
107 pCodeBuf[off++] = X86_OP_REX_W;
108 pCodeBuf[off++] = (int8_t)offDisp == offDisp ? 0x83 : 0x81;
109 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
110 if ((int8_t)offDisp == offDisp)
111 pCodeBuf[off++] = (int8_t)offDisp;
112 else
113 {
114 *(int32_t *)&pCodeBuf[off] = (int32_t)offDisp;
115 off += sizeof(int32_t);
116 }
117 }
118 else
119 {
120 /* mov tmp0, imm64 */
121 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, IEMNATIVE_REG_FIXED_TMP0, offDisp);
122
123 /* add [pVCpu->iem.s.uPcUpdatingDebug], tmp0 */
124 if (cBits == 64)
125 pCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 >= 8 ? X86_OP_REX_R : 0);
126 else if (IEMNATIVE_REG_FIXED_TMP0 >= 8)
127 pCodeBuf[off++] = X86_OP_REX_R;
128 pCodeBuf[off++] = 0x01;
129 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, IEMNATIVE_REG_FIXED_TMP0 & 7,
130 RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
131 }
132 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
133 return off;
134 }
135# endif
136
137 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
138 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, RT_ARCH_VAL == RT_ARCH_VAL_AMD64 ? 32 : 12);
139
140 if (pReNative->Core.fDebugPcInitialized)
141 {
142 Log4(("uPcUpdatingDebug+=%ld cBits=%d off=%#x\n", offDisp, cBits, off));
143 off = iemNativeEmitLoadGprFromVCpuU64Ex(pCodeBuf, off, idxTmpReg, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
144 }
145 else
146 {
147 Log4(("uPcUpdatingDebug=rip+%ld cBits=%d off=%#x\n", offDisp, cBits, off));
148 pReNative->Core.fDebugPcInitialized = true;
149 off = iemNativeEmitLoadGprFromVCpuU64Ex(pCodeBuf, off, idxTmpReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
150 }
151
152 if (cBits == 64)
153 off = iemNativeEmitAddGprImmEx(pCodeBuf, off, idxTmpReg, offDisp, IEMNATIVE_REG_FIXED_TMP0);
154 else
155 {
156 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxTmpReg, (int32_t)offDisp, IEMNATIVE_REG_FIXED_TMP0);
157 if (cBits == 16)
158 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxTmpReg, UINT16_MAX);
159 }
160
161 off = iemNativeEmitStoreGprToVCpuU64Ex(pCodeBuf, off, idxTmpReg, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug),
162 IEMNATIVE_REG_FIXED_TMP0);
163
164 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
165 iemNativeRegFreeTmp(pReNative, idxTmpReg);
166 return off;
167}
168
169
170# elif defined(IEMNATIVE_REG_FIXED_PC_DBG)
171DECL_INLINE_THROW(uint32_t) iemNativePcAdjustCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
172{
173 /* Compare the shadow with the context value, they should match. */
174 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, IEMNATIVE_REG_FIXED_PC_DBG);
175 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, pReNative->Core.offPc);
176 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, kIemNativeGstReg_Pc);
177 return off;
178}
179# endif
180
181#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
182
183/**
184 * Flushes delayed write of a specific guest register.
185 *
186 * This must be called prior to calling CImpl functions and any helpers that use
187 * the guest state (like raising exceptions) and such.
188 *
189 * This optimization has not yet been implemented. The first target would be
190 * RIP updates, since these are the most common ones.
191 */
192DECL_INLINE_THROW(uint32_t)
193iemNativeRegFlushPendingSpecificWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREGREF enmClass, uint8_t idxReg)
194{
195#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
196 /* If for whatever reason it is possible to reference the PC register at some point we need to do the writeback here first. */
197#endif
198
199#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
200#if 0 /** @todo r=aeichner EFLAGS writeback delay. */
201 if ( enmClass == kIemNativeGstRegRef_EFlags
202 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(kIemNativeGstReg_EFlags))
203 off = iemNativeRegFlushPendingWrite(pReNative, off, kIemNativeGstReg_EFlags);
204#else
205 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(kIemNativeGstReg_EFlags)));
206#endif
207
208 if ( enmClass == kIemNativeGstRegRef_Gpr
209 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxReg))
210 off = iemNativeRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTREG_GPR(idxReg));
211#endif
212
213#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
214 if ( enmClass == kIemNativeGstRegRef_XReg
215 && pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxReg))
216 {
217 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxReg));
218 /* Flush the shadows as the register needs to be reloaded (there is no guarantee right now, that the referenced register doesn't change). */
219 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxReg];
220
221 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off);
222 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxReg)));
223 }
224#endif
225 RT_NOREF(pReNative, enmClass, idxReg);
226 return off;
227}
228
229
230
231/*********************************************************************************************************************************
232* Emitters for IEM_MC_BEGIN_EX and IEM_MC_END. *
233*********************************************************************************************************************************/
234
235#undef IEM_MC_BEGIN /* unused */
236#define IEM_MC_BEGIN_EX(a_fMcFlags, a_fCImplFlags, a_cArgsIncludingHidden) \
237 { \
238 Assert(pReNative->Core.bmVars == 0); \
239 Assert(pReNative->Core.u64ArgVars == UINT64_MAX); \
240 Assert(pReNative->Core.bmStack == 0); \
241 pReNative->fMc = (a_fMcFlags); \
242 pReNative->fCImpl = (a_fCImplFlags); \
243 pReNative->cArgsX = (a_cArgsIncludingHidden)
244
245/** We have to get to the end in recompilation mode, as otherwise we won't
246 * generate code for all the IEM_MC_IF_XXX branches. */
247#define IEM_MC_END() \
248 iemNativeVarFreeAll(pReNative); \
249 } return off
250
251
252
253/*********************************************************************************************************************************
254* Liveness Stubs *
255*********************************************************************************************************************************/
256
257#define IEM_MC_LIVENESS_GREG_INPUT(a_iGReg) ((void)0)
258#define IEM_MC_LIVENESS_GREG_CLOBBER(a_iGReg) ((void)0)
259#define IEM_MC_LIVENESS_GREG_MODIFY(a_iGReg) ((void)0)
260
261#define IEM_MC_LIVENESS_MREG_INPUT(a_iMReg) ((void)0)
262#define IEM_MC_LIVENESS_MREG_CLOBBER(a_iMReg) ((void)0)
263#define IEM_MC_LIVENESS_MREG_MODIFY(a_iMReg) ((void)0)
264
265#define IEM_MC_LIVENESS_XREG_INPUT(a_iXReg) ((void)0)
266#define IEM_MC_LIVENESS_XREG_CLOBBER(a_iXReg) ((void)0)
267#define IEM_MC_LIVENESS_XREG_MODIFY(a_iXReg) ((void)0)
268
269#define IEM_MC_LIVENESS_MXCSR_INPUT() ((void)0)
270#define IEM_MC_LIVENESS_MXCSR_CLOBBER() ((void)0)
271#define IEM_MC_LIVENESS_MXCSR_MODIFY() ((void)0)
272
273
274/*********************************************************************************************************************************
275* Native Emitter Support. *
276*********************************************************************************************************************************/
277
278#define IEM_MC_NATIVE_IF(a_fSupportedHosts) if (RT_ARCH_VAL & (a_fSupportedHosts)) {
279
280#define IEM_MC_NATIVE_ELSE() } else {
281
282#define IEM_MC_NATIVE_ENDIF() } ((void)0)
283
284
285#define IEM_MC_NATIVE_EMIT_0(a_fnEmitter) \
286 off = a_fnEmitter(pReNative, off)
287
288#define IEM_MC_NATIVE_EMIT_1(a_fnEmitter, a0) \
289 off = a_fnEmitter(pReNative, off, (a0))
290
291#define IEM_MC_NATIVE_EMIT_2(a_fnEmitter, a0, a1) \
292 off = a_fnEmitter(pReNative, off, (a0), (a1))
293
294#define IEM_MC_NATIVE_EMIT_2_EX(a_fnEmitter, a0, a1) \
295 off = a_fnEmitter(pReNative, off, pCallEntry->idxInstr, (a0), (a1))
296
297#define IEM_MC_NATIVE_EMIT_3(a_fnEmitter, a0, a1, a2) \
298 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2))
299
300#define IEM_MC_NATIVE_EMIT_4(a_fnEmitter, a0, a1, a2, a3) \
301 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3))
302
303#define IEM_MC_NATIVE_EMIT_5(a_fnEmitter, a0, a1, a2, a3, a4) \
304 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4))
305
306#define IEM_MC_NATIVE_EMIT_6(a_fnEmitter, a0, a1, a2, a3, a4, a5) \
307 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5))
308
309#define IEM_MC_NATIVE_EMIT_7(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6) \
310 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6))
311
312#define IEM_MC_NATIVE_EMIT_8(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6, a7) \
313 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6), (a7))
314
315
316#ifndef RT_ARCH_AMD64
317# define IEM_MC_NATIVE_SET_AMD64_HOST_REG_FOR_LOCAL(a_VarNm, a_idxHostReg) ((void)0)
318#else
319/** @note This is a naive approach that ASSUMES that the register isn't
320 * allocated, so it only works safely for the first allocation(s) in
321 * a MC block. */
322# define IEM_MC_NATIVE_SET_AMD64_HOST_REG_FOR_LOCAL(a_VarNm, a_idxHostReg) \
323 off = iemNativeVarSetAmd64HostRegisterForLocal(pReNative, off, a_VarNm, a_idxHostReg)
324
325DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg,
326 uint32_t off, bool fAllocated);
327
328DECL_INLINE_THROW(uint32_t)
329iemNativeVarSetAmd64HostRegisterForLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t idxHstReg)
330{
331 Log12(("iemNativeVarSetAmd64HostRegisterForLocal: idxVar=%#x idxHstReg=%s (%#x) off=%#x\n", idxVar, g_apszIemNativeHstRegNames[idxHstReg], idxHstReg, off));
332 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
333 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg))); /* iemNativeVarRegisterSet does a throw/longjmp on this */
334
335# ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
336 /* Must flush the register if it hold pending writes. */
337 if ( (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
338 && (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows) )
339 off = iemNativeRegFlushDirtyGuest(pReNative, off, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
340# endif
341
342 iemNativeVarRegisterSet(pReNative, idxVar, idxHstReg, off, false /*fAllocated*/);
343 return off;
344}
345
346#endif /* RT_ARCH_AMD64 */
347
348
349
350/*********************************************************************************************************************************
351* Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX) *
352*********************************************************************************************************************************/
353
354#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl) \
355 pReNative->fMc = 0; \
356 pReNative->fCImpl = (a_fFlags); \
357 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, \
358 a_cbInstr) /** @todo not used ... */
359
360
361#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
362 pReNative->fMc = 0; \
363 pReNative->fCImpl = (a_fFlags); \
364 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
365
366DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off,
367 uint8_t idxInstr, uint64_t a_fGstShwFlush,
368 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
369{
370 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
371}
372
373
374#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
375 pReNative->fMc = 0; \
376 pReNative->fCImpl = (a_fFlags); \
377 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
378 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
379
380DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off,
381 uint8_t idxInstr, uint64_t a_fGstShwFlush,
382 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
383{
384 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
385}
386
387
388#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
389 pReNative->fMc = 0; \
390 pReNative->fCImpl = (a_fFlags); \
391 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
392 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
393
394DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off,
395 uint8_t idxInstr, uint64_t a_fGstShwFlush,
396 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
397 uint64_t uArg2)
398{
399 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
400}
401
402
403
404/*********************************************************************************************************************************
405* Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX) *
406*********************************************************************************************************************************/
407
408/** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
409 * and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
410DECL_INLINE_THROW(uint32_t)
411iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
412{
413 /*
414 * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
415 * return with special status code and make the execution loop deal with
416 * this. If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
417 * exception and won't continue execution. While CPUMCTX_DBG_DBGF_MASK
418 * could continue w/o interruption, it probably will drop into the
419 * debugger, so not worth the effort of trying to services it here and we
420 * just lump it in with the handling of the others.
421 *
422 * To simplify the code and the register state management even more (wrt
423 * immediate in AND operation), we always update the flags and skip the
424 * extra check associated conditional jump.
425 */
426 AssertCompile( (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
427 <= UINT32_MAX);
428#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
429 AssertMsg( pReNative->idxCurCall == 0
430 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1],
431 IEMLIVENESSBIT_IDX_EFL_OTHER)),
432 ("Efl_Other - %u\n", iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1],
433 IEMLIVENESSBIT_IDX_EFL_OTHER)));
434#endif
435
436 /*
437 * As this code can break out of the execution loop when jumping to the ReturnWithFlags label
438 * any pending register writes must be flushed.
439 */
440 off = iemNativeRegFlushPendingWrites(pReNative, off);
441
442 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsForUpdate(pReNative, &off,
443 RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_OTHER),
444 RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_OTHER));
445 off = iemNativeEmitTbExitIfAnyBitsSetInGpr<kIemNativeLabelType_ReturnWithFlags>(pReNative, off, idxEflReg,
446 X86_EFL_TF
447 | CPUMCTX_DBG_HIT_DRX_MASK
448 | CPUMCTX_DBG_DBGF_MASK);
449 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
450 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
451
452 /* Free but don't flush the EFLAGS register. */
453 iemNativeRegFreeTmp(pReNative, idxEflReg);
454
455 return off;
456}
457
458
459/** Helper for iemNativeEmitFinishInstructionWithStatus. */
460DECLINLINE(RTGCPHYS) iemNativeCallEntryToGCPhysPc(PCIEMTB pTb, PCIEMTHRDEDCALLENTRY pCallEntry)
461{
462 unsigned const offOpcodes = pCallEntry->offOpcode;
463 unsigned const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
464 for (unsigned idxRange = 0; idxRange < cRanges; idxRange++)
465 {
466 unsigned const offRange = offOpcodes - (unsigned)pTb->aRanges[idxRange].offOpcodes;
467 if (offRange < (unsigned)pTb->aRanges[idxRange].cbOpcodes)
468 return iemTbGetRangePhysPageAddr(pTb, idxRange) + offRange + pTb->aRanges[idxRange].offPhysPage;
469 }
470 AssertFailedReturn(NIL_RTGCPHYS);
471}
472
473
474/** The VINF_SUCCESS dummy. */
475template<int const a_rcNormal, bool const a_fIsJump>
476DECL_FORCE_INLINE_THROW(uint32_t)
477iemNativeEmitFinishInstructionWithStatus(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry,
478 int32_t const offJump)
479{
480 AssertCompile(a_rcNormal == VINF_SUCCESS || a_rcNormal == VINF_IEM_REEXEC_BREAK);
481 if (a_rcNormal != VINF_SUCCESS)
482 {
483#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
484 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
485#else
486 RT_NOREF_PV(pCallEntry);
487#endif
488
489 /* As this code returns from the TB any pending register writes must be flushed. */
490 off = iemNativeRegFlushPendingWrites(pReNative, off);
491
492 /*
493 * If we're in a conditional, mark the current branch as exiting so we
494 * can disregard its state when we hit the IEM_MC_ENDIF.
495 */
496 iemNativeMarkCurCondBranchAsExiting(pReNative);
497
498 /*
499 * Use the lookup table for getting to the next TB quickly.
500 * Note! In this code path there can only be one entry at present.
501 */
502 uint8_t const idxTbLookupFirst = IEM_TB_LOOKUP_TAB_GET_IDX(pCallEntry->uTbLookup);
503 PCIEMTB const pTbOrg = pReNative->pTbOrg;
504 Assert(idxTbLookupFirst < pTbOrg->cTbLookupEntries);
505 Assert(IEM_TB_LOOKUP_TAB_GET_SIZE(pCallEntry->uTbLookup) == 1);
506
507#if 0
508 /* Update IEMCPU::ppTbLookupEntryR3 to get the best lookup effect. */
509 PIEMTB * const ppTbLookupFirst = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTbOrg, idxTbLookupFirst);
510 Assert(IEM_TB_LOOKUP_TAB_GET_SIZE(pCallEntry->uTbLookup) == 1); /* large stuff later/never */
511 off = iemNativeEmitStoreImmToVCpuU64(pReNative, off, (uintptr_t)ppTbLookupFirst,
512 RT_UOFFSETOF(VMCPU, iem.s.ppTbLookupEntryR3));
513
514 return iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnBreak);
515
516#else
517 /* Load the index as argument #1 for the helper call at the given label. */
518 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxTbLookupFirst);
519
520 /*
521 * Figure out the physical address of the current instruction and see
522 * whether the next instruction we're about to execute is in the same
523 * page so we by can optimistically skip TLB loading.
524 *
525 * - This is safe for all cases in FLAT mode.
526 * - In segmentmented modes it is complicated, given that a negative
527 * jump may underflow EIP and a forward jump may overflow or run into
528 * CS.LIM and triggering a #GP. The only thing we can get away with
529 * now at compile time is forward jumps w/o CS.LIM checks, since the
530 * lack of CS.LIM checks means we're good for the entire physical page
531 * we're executing on and another 15 bytes before we run into CS.LIM.
532 */
533 if ( IEM_F_MODE_X86_IS_FLAT(pReNative->fExec)
534# if 0 /** @todo breaks on IP/EIP/RIP wraparound tests in bs3-cpu-weird-1. See also iemNativeHlpReturnBreakViaLookup. */
535 || !(pTbOrg->fFlags & IEMTB_F_CS_LIM_CHECKS)
536# endif
537 )
538 {
539 RTGCPHYS const GCPhysPcCurrent = iemNativeCallEntryToGCPhysPc(pTbOrg, pCallEntry);
540 RTGCPHYS const GCPhysPcNext = GCPhysPcCurrent + pCallEntry->cbOpcode + (int64_t)(a_fIsJump ? offJump : 0);
541 if ( (GCPhysPcNext >> GUEST_PAGE_SHIFT) == (GCPhysPcCurrent >> GUEST_PAGE_SHIFT)
542 && GUEST_PAGE_SIZE - (GCPhysPcCurrent & GUEST_PAGE_OFFSET_MASK) >= pCallEntry->cbOpcode /* 0xfff: je -56h */ )
543
544 {
545 /* Load the next GCPhysPc into the 3rd argument for the helper call. */
546 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, GCPhysPcNext);
547
548 /* Load the key lookup flags into the 2nd argument for the helper call.
549 - This is safe wrt CS limit checking since we're only here for FLAT modes.
550 - ASSUMING that this isn't a STI or POPF instruction, we can exclude any
551 interrupt shadow.
552 - The NMI inhibiting is more questionable, though... */
553 /** @todo We don't implement NMI blocking atm, except via VT-x/AMD-V.
554 * Should we copy it into fExec to simplify this? OTOH, it's just a
555 * couple of extra instructions if EFLAGS are already in a register. */
556 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG,
557 (pReNative->fExec & IEMTB_F_KEY_MASK) | IEMTB_F_TYPE_NATIVE);
558
559 if (pReNative->idxLastCheckIrqCallNo != UINT32_MAX)
560 return iemNativeEmitTbExit<kIemNativeLabelType_ReturnBreakViaLookup>(pReNative, off);
561 return iemNativeEmitTbExit<kIemNativeLabelType_ReturnBreakViaLookupWithIrq>(pReNative, off);
562 }
563 }
564 if (pReNative->idxLastCheckIrqCallNo != UINT32_MAX)
565 return iemNativeEmitTbExit<kIemNativeLabelType_ReturnBreakViaLookupWithTlb>(pReNative, off);
566 return iemNativeEmitTbExit<kIemNativeLabelType_ReturnBreakViaLookupWithTlbAndIrq>(pReNative, off);
567#endif
568 }
569 return off;
570}
571
572
573#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr, a_rcNormal) \
574 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
575 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
576
577#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr, a_rcNormal) \
578 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
579 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
580 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
581
582/** Same as iemRegAddToRip64AndFinishingNoFlags. */
583DECL_INLINE_THROW(uint32_t)
584iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
585{
586#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
587# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
588 if (!pReNative->Core.offPc)
589 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
590# endif
591
592 /* Allocate a temporary PC register. */
593 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
594
595 /* Perform the addition and store the result. */
596 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
597 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
598
599 /* Free but don't flush the PC register. */
600 iemNativeRegFreeTmp(pReNative, idxPcReg);
601#endif
602
603#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
604 pReNative->Core.offPc += cbInstr;
605 Log4(("offPc=%#RX64 cbInstr=%#x off=%#x\n", pReNative->Core.offPc, cbInstr, off));
606# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
607 off = iemNativeEmitPcDebugAdd(pReNative, off, cbInstr, 64);
608 off = iemNativeEmitPcDebugCheck(pReNative, off);
609# elif defined(IEMNATIVE_REG_FIXED_PC_DBG)
610 off = iemNativePcAdjustCheck(pReNative, off);
611# endif
612 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
613#endif
614
615 return off;
616}
617
618
619#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr, a_rcNormal) \
620 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
621 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
622
623#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr, a_rcNormal) \
624 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
625 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
626 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
627
628/** Same as iemRegAddToEip32AndFinishingNoFlags. */
629DECL_INLINE_THROW(uint32_t)
630iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
631{
632#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
633# ifdef IEMNATIVE_REG_FIXED_PC_DBG
634 if (!pReNative->Core.offPc)
635 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
636# endif
637
638 /* Allocate a temporary PC register. */
639 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
640
641 /* Perform the addition and store the result. */
642 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
643 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
644
645 /* Free but don't flush the PC register. */
646 iemNativeRegFreeTmp(pReNative, idxPcReg);
647#endif
648
649#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
650 pReNative->Core.offPc += cbInstr;
651 Log4(("offPc=%#RX64 cbInstr=%#x off=%#x\n", pReNative->Core.offPc, cbInstr, off));
652# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
653 off = iemNativeEmitPcDebugAdd(pReNative, off, cbInstr, 32);
654 off = iemNativeEmitPcDebugCheck(pReNative, off);
655# elif defined(IEMNATIVE_REG_FIXED_PC_DBG)
656 off = iemNativePcAdjustCheck(pReNative, off);
657# endif
658 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
659#endif
660
661 return off;
662}
663
664
665#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr, a_rcNormal) \
666 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
667 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
668
669#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr, a_rcNormal) \
670 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
671 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
672 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, false /*a_fIsJump*/>(pReNative, off, pCallEntry, 0)
673
674/** Same as iemRegAddToIp16AndFinishingNoFlags. */
675DECL_INLINE_THROW(uint32_t)
676iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
677{
678#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
679# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
680 if (!pReNative->Core.offPc)
681 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
682# endif
683
684 /* Allocate a temporary PC register. */
685 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
686
687 /* Perform the addition and store the result. */
688 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
689 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
690 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
691
692 /* Free but don't flush the PC register. */
693 iemNativeRegFreeTmp(pReNative, idxPcReg);
694#endif
695
696#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
697 pReNative->Core.offPc += cbInstr;
698 Log4(("offPc=%#RX64 cbInstr=%#x off=%#x\n", pReNative->Core.offPc, cbInstr, off));
699# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
700 off = iemNativeEmitPcDebugAdd(pReNative, off, cbInstr, 16);
701 off = iemNativeEmitPcDebugCheck(pReNative, off);
702# elif defined(IEMNATIVE_REG_FIXED_PC_DBG)
703 off = iemNativePcAdjustCheck(pReNative, off);
704# endif
705 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
706#endif
707
708 return off;
709}
710
711
712/*********************************************************************************************************************************
713* Common code for changing PC/RIP/EIP/IP. *
714*********************************************************************************************************************************/
715
716/**
717 * Emits code to check if the content of @a idxAddrReg is a canonical address,
718 * raising a \#GP(0) if it isn't.
719 *
720 * @returns New code buffer offset, UINT32_MAX on failure.
721 * @param pReNative The native recompile state.
722 * @param off The code buffer offset.
723 * @param idxAddrReg The host register with the address to check.
724 * @param idxInstr The current instruction.
725 */
726DECL_FORCE_INLINE_THROW(uint32_t)
727iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxAddrReg, uint8_t idxInstr)
728{
729 /*
730 * Make sure we don't have any outstanding guest register writes as we may
731 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
732 */
733 off = iemNativeRegFlushPendingWrites(pReNative, off);
734
735#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
736 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
737#else
738 RT_NOREF(idxInstr);
739#endif
740
741#ifdef RT_ARCH_AMD64
742 /*
743 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
744 * return raisexcpt();
745 * ---- this variant avoid loading a 64-bit immediate, but is an instruction longer.
746 */
747 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
748
749 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
750 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
751 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
752 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
753 off = iemNativeEmitTbExitJnz<kIemNativeLabelType_RaiseGp0>(pReNative, off);
754
755 iemNativeRegFreeTmp(pReNative, iTmpReg);
756
757#elif defined(RT_ARCH_ARM64)
758 /*
759 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
760 * return raisexcpt();
761 * ----
762 * mov x1, 0x800000000000
763 * add x1, x0, x1
764 * cmp xzr, x1, lsr 48
765 * b.ne .Lraisexcpt
766 */
767 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
768
769 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
770 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
771 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
772 off = iemNativeEmitTbExitJnz<kIemNativeLabelType_RaiseGp0>(pReNative, off);
773
774 iemNativeRegFreeTmp(pReNative, iTmpReg);
775
776#else
777# error "Port me"
778#endif
779 return off;
780}
781
782
783/**
784 * Emits code to check if the content of @a idxAddrReg is a canonical address,
785 * raising a \#GP(0) if it isn't.
786 *
787 * Caller makes sure everything is flushed, except maybe PC.
788 *
789 * @returns New code buffer offset, UINT32_MAX on failure.
790 * @param pReNative The native recompile state.
791 * @param off The code buffer offset.
792 * @param idxAddrReg The host register with the address to check.
793 * @param offDisp The relative displacement that has already been
794 * added to idxAddrReg and must be subtracted if
795 * raising a \#GP(0).
796 * @param idxInstr The current instruction.
797 */
798DECL_FORCE_INLINE_THROW(uint32_t)
799iemNativeEmitCheckGprCanonicalMaybeRaiseGp0WithDisp(PIEMRECOMPILERSTATE pReNative, uint32_t off,
800 uint8_t idxAddrReg, int64_t offDisp, uint8_t idxInstr)
801{
802#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
803 Assert(pReNative->Core.bmGstRegShadowDirty == 0);
804#endif
805
806#ifdef RT_ARCH_AMD64
807 /*
808 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
809 * return raisexcpt();
810 * ---- this variant avoid loading a 64-bit immediate, but is an instruction longer.
811 */
812 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
813
814 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
815 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
816 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
817 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
818
819#elif defined(RT_ARCH_ARM64)
820 /*
821 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
822 * return raisexcpt();
823 * ----
824 * mov x1, 0x800000000000
825 * add x1, x0, x1
826 * cmp xzr, x1, lsr 48
827 * b.ne .Lraisexcpt
828 */
829 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
830
831 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
832 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
833 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
834#else
835# error "Port me"
836#endif
837
838 /* Jump to the #GP code (hoping static prediction considers forward branches as not-taken). */
839 uint32_t const offFixup1 = off;
840 off = iemNativeEmitJnzToFixed(pReNative, off, off /*8-bit jump suffices*/);
841
842 /* jump .Lnoexcept; Skip the #GP code. */
843 uint32_t const offFixup2 = off;
844 off = iemNativeEmitJmpToFixed(pReNative, off, off /*8-bit jump suffices*/);
845
846 /* .Lraisexcpt: */
847 iemNativeFixupFixedJump(pReNative, offFixup1, off);
848#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
849 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr), iTmpReg);
850#else
851 RT_NOREF(idxInstr);
852#endif
853
854 /* Undo the PC adjustment and store the old PC value. */
855 off = iemNativeEmitSubGprImm(pReNative, off, idxAddrReg, offDisp, iTmpReg);
856 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxAddrReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
857
858 off = iemNativeEmitTbExit<kIemNativeLabelType_RaiseGp0, false /*a_fActuallyExitingTb*/>(pReNative, off);
859
860 /* .Lnoexcept: */
861 iemNativeFixupFixedJump(pReNative, offFixup2, off);
862
863 iemNativeRegFreeTmp(pReNative, iTmpReg);
864 return off;
865}
866
867
868/**
869 * Emits code to check if the content of @a idxAddrReg is a canonical address,
870 * raising a \#GP(0) if it isn't.
871 *
872 * Caller makes sure everything is flushed, except maybe PC.
873 *
874 * @returns New code buffer offset, UINT32_MAX on failure.
875 * @param pReNative The native recompile state.
876 * @param off The code buffer offset.
877 * @param idxAddrReg The host register with the address to check.
878 * @param idxOldPcReg Register holding the old PC that offPc is relative
879 * to if available, otherwise UINT8_MAX.
880 * @param idxInstr The current instruction.
881 */
882DECL_FORCE_INLINE_THROW(uint32_t)
883iemNativeEmitCheckGprCanonicalMaybeRaiseGp0WithOldPc(PIEMRECOMPILERSTATE pReNative, uint32_t off,
884 uint8_t idxAddrReg, uint8_t idxOldPcReg, uint8_t idxInstr)
885{
886#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
887 Assert(pReNative->Core.bmGstRegShadowDirty == 0);
888#endif
889
890#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
891# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
892 if (!pReNative->Core.offPc)
893# endif
894 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
895#else
896 RT_NOREF(idxInstr);
897#endif
898
899#ifdef RT_ARCH_AMD64
900 /*
901 * if ((((uint32_t)(a_u64Addr >> 32) + UINT32_C(0x8000)) >> 16) != 0)
902 * return raisexcpt();
903 * ---- this variant avoid loading a 64-bit immediate, but is an instruction longer.
904 */
905 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
906
907 off = iemNativeEmitLoadGprFromGpr(pReNative, off, iTmpReg, idxAddrReg);
908 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 32);
909 off = iemNativeEmitAddGpr32Imm(pReNative, off, iTmpReg, (int32_t)0x8000);
910 off = iemNativeEmitShiftGprRight(pReNative, off, iTmpReg, 16);
911
912#elif defined(RT_ARCH_ARM64)
913 /*
914 * if ((((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000)) >> 48) != 0)
915 * return raisexcpt();
916 * ----
917 * mov x1, 0x800000000000
918 * add x1, x0, x1
919 * cmp xzr, x1, lsr 48
920 * b.ne .Lraisexcpt
921 */
922 uint8_t const iTmpReg = iemNativeRegAllocTmp(pReNative, &off);
923
924 off = iemNativeEmitLoadGprImm64(pReNative, off, iTmpReg, UINT64_C(0x800000000000));
925 off = iemNativeEmitAddTwoGprs(pReNative, off, iTmpReg, idxAddrReg);
926 off = iemNativeEmitCmpArm64(pReNative, off, ARMV8_A64_REG_XZR, iTmpReg, true /*f64Bit*/, 48 /*cShift*/, kArmv8A64InstrShift_Lsr);
927#else
928# error "Port me"
929#endif
930
931#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
932 if (pReNative->Core.offPc)
933 {
934 /** @todo On x86, it is said that conditional jumps forward are statically
935 * predicited as not taken, so this isn't a very good construct.
936 * Investigate whether it makes sense to invert it and add another
937 * jump. Also, find out wtf the static predictor does here on arm! */
938 uint32_t const offFixup = off;
939 off = iemNativeEmitJzToFixed(pReNative, off, off + 16 /*8-bit suffices*/);
940
941 /* .Lraisexcpt: */
942# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
943 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr), iTmpReg);
944# endif
945 /* We need to update cpum.GstCtx.rip. */
946 if (idxOldPcReg == UINT8_MAX)
947 {
948 idxOldPcReg = iTmpReg;
949 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxOldPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
950 }
951 off = iemNativeEmitAddGprImm(pReNative, off, idxOldPcReg, pReNative->Core.offPc);
952 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxOldPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
953
954 off = iemNativeEmitTbExit<kIemNativeLabelType_RaiseGp0, false /*a_fActuallyExitingTb*/>(pReNative, off);
955 iemNativeFixupFixedJump(pReNative, offFixup, off);
956 }
957 else
958#endif
959 off = iemNativeEmitTbExitJnz<kIemNativeLabelType_RaiseGp0>(pReNative, off);
960
961 iemNativeRegFreeTmp(pReNative, iTmpReg);
962
963 return off;
964}
965
966
967/**
968 * Emits code to check if that the content of @a idxAddrReg is within the limit
969 * of CS, raising a \#GP(0) if it isn't.
970 *
971 * @returns New code buffer offset; throws VBox status code on error.
972 * @param pReNative The native recompile state.
973 * @param off The code buffer offset.
974 * @param idxAddrReg The host register (32-bit) with the address to
975 * check.
976 * @param idxInstr The current instruction.
977 */
978DECL_FORCE_INLINE_THROW(uint32_t)
979iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(PIEMRECOMPILERSTATE pReNative, uint32_t off,
980 uint8_t idxAddrReg, uint8_t idxInstr)
981{
982 /*
983 * Make sure we don't have any outstanding guest register writes as we may
984 * raise an #GP(0) and all guest register must be up to date in CPUMCTX.
985 */
986 off = iemNativeRegFlushPendingWrites(pReNative, off);
987
988#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
989 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
990#else
991 RT_NOREF(idxInstr);
992#endif
993
994 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
995 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
996 kIemNativeGstRegUse_ReadOnly);
997
998 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
999 off = iemNativeEmitTbExitJa<kIemNativeLabelType_RaiseGp0>(pReNative, off);
1000
1001 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
1002 return off;
1003}
1004
1005
1006
1007
1008/**
1009 * Emits code to check if that the content of @a idxAddrReg is within the limit
1010 * of CS, raising a \#GP(0) if it isn't.
1011 *
1012 * Caller makes sure everything is flushed, except maybe PC.
1013 *
1014 * @returns New code buffer offset; throws VBox status code on error.
1015 * @param pReNative The native recompile state.
1016 * @param off The code buffer offset.
1017 * @param idxAddrReg The host register (32-bit) with the address to
1018 * check.
1019 * @param idxOldPcReg Register holding the old PC that offPc is relative
1020 * to if available, otherwise UINT8_MAX.
1021 * @param idxInstr The current instruction.
1022 */
1023DECL_FORCE_INLINE_THROW(uint32_t)
1024iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0WithOldPc(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1025 uint8_t idxAddrReg, uint8_t idxOldPcReg, uint8_t idxInstr)
1026{
1027#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
1028 Assert(pReNative->Core.bmGstRegShadowDirty == 0);
1029#endif
1030
1031#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1032# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1033 if (!pReNative->Core.offPc)
1034# endif
1035 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1036#else
1037 RT_NOREF(idxInstr);
1038#endif
1039
1040 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off,
1041 (IEMNATIVEGSTREG)(kIemNativeGstReg_SegLimitFirst + X86_SREG_CS),
1042 kIemNativeGstRegUse_ReadOnly);
1043
1044 off = iemNativeEmitCmpGpr32WithGpr(pReNative, off, idxAddrReg, idxRegCsLim);
1045#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1046 if (pReNative->Core.offPc)
1047 {
1048 uint32_t const offFixup = off;
1049 off = iemNativeEmitJbeToFixed(pReNative, off, off + 16 /*8-bit suffices*/);
1050
1051 /* Raising a GP(0), but first we need to update cpum.GstCtx.rip. */
1052 if (idxOldPcReg == UINT8_MAX)
1053 {
1054 idxOldPcReg = idxAddrReg;
1055 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxOldPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1056 }
1057 off = iemNativeEmitAddGprImm(pReNative, off, idxOldPcReg, pReNative->Core.offPc);
1058 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxOldPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1059# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1060 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1061# endif
1062 off = iemNativeEmitTbExit<kIemNativeLabelType_RaiseGp0, false /*a_fActuallyExitingTb*/>(pReNative, off);
1063 iemNativeFixupFixedJump(pReNative, offFixup, off);
1064 }
1065 else
1066#endif
1067 off = iemNativeEmitTbExitJa<kIemNativeLabelType_RaiseGp0>(pReNative, off);
1068
1069 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
1070 return off;
1071}
1072
1073
1074/*********************************************************************************************************************************
1075* Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX). *
1076*********************************************************************************************************************************/
1077
1078#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1079 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1080 (a_enmEffOpSize), pCallEntry->idxInstr); \
1081 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1082
1083#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1084 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1085 (a_enmEffOpSize), pCallEntry->idxInstr); \
1086 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1087 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1088
1089#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr, a_rcNormal) \
1090 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1091 IEMMODE_16BIT, pCallEntry->idxInstr); \
1092 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1093
1094#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
1095 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1096 IEMMODE_16BIT, pCallEntry->idxInstr); \
1097 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1098 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1099
1100#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr, a_rcNormal) \
1101 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (a_i32), \
1102 IEMMODE_64BIT, pCallEntry->idxInstr); \
1103 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1104
1105#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
1106 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (a_i32), \
1107 IEMMODE_64BIT, pCallEntry->idxInstr); \
1108 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1109 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1110
1111
1112#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_INTRAPG(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1113 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1114 (a_enmEffOpSize), pCallEntry->idxInstr); \
1115 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1116
1117#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_INTRAPG_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1118 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1119 (a_enmEffOpSize), pCallEntry->idxInstr); \
1120 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1121 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1122
1123#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_INTRAPG(a_i16, a_cbInstr, a_rcNormal) \
1124 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1125 IEMMODE_16BIT, pCallEntry->idxInstr); \
1126 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1127
1128#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_INTRAPG_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
1129 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1130 IEMMODE_16BIT, pCallEntry->idxInstr); \
1131 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1132 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1133
1134#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_INTRAPG(a_i32, a_cbInstr, a_rcNormal) \
1135 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (a_i32), \
1136 IEMMODE_64BIT, pCallEntry->idxInstr); \
1137 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1138
1139#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_INTRAPG_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
1140 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (a_i32), \
1141 IEMMODE_64BIT, pCallEntry->idxInstr); \
1142 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1143 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1144
1145/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
1146 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
1147 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
1148template<bool const a_fWithinPage>
1149DECL_INLINE_THROW(uint32_t)
1150iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
1151 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
1152{
1153 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
1154#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1155 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1156 if (a_fWithinPage && enmEffOpSize == IEMMODE_64BIT)
1157 {
1158 /* No #GP checking required, just update offPc and get on with it. */
1159 pReNative->Core.offPc += (int64_t)offDisp + cbInstr;
1160# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1161 off = iemNativeEmitPcDebugAdd(pReNative, off, (int64_t)offDisp + cbInstr, enmEffOpSize == IEMMODE_64BIT ? 64 : 16);
1162# endif
1163 }
1164 else
1165#endif
1166 {
1167 /* Flush all but PC iff we're doing a 64-bit update here and this isn't within a page.. */
1168 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT && !a_fWithinPage))
1169 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc) /*fGstShwExcept*/);
1170
1171 /* Allocate a temporary PC register. */
1172 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1173 kIemNativeGstRegUse_ForUpdate);
1174
1175 /* Perform the addition. */
1176 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr + pReNative->Core.offPc);
1177
1178 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
1179 {
1180 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't.
1181 We can skip this if the target is within the same page. */
1182 if (!a_fWithinPage)
1183 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0WithDisp(pReNative, off, idxPcReg,
1184 (int64_t)offDisp + cbInstr, idxInstr);
1185 }
1186 else
1187 {
1188 /* Just truncate the result to 16-bit IP. */
1189 Assert(enmEffOpSize == IEMMODE_16BIT);
1190 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
1191 }
1192
1193#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1194# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1195 off = iemNativeEmitPcDebugAdd(pReNative, off, (int64_t)offDisp + cbInstr, enmEffOpSize == IEMMODE_64BIT ? 64 : 16);
1196 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
1197# endif
1198 /* Since we've already got the new PC value in idxPcReg, we can just as
1199 well write it out and reset offPc to zero. Otherwise, we'd need to use
1200 a copy the shadow PC, which will cost another move instruction here. */
1201# if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || defined(LOG_ENABLED) || defined(VBOX_WITH_STATISTICS)
1202 uint8_t const idxOldInstrPlusOne = pReNative->idxInstrPlusOneOfLastPcUpdate;
1203 pReNative->idxInstrPlusOneOfLastPcUpdate = RT_MAX(idxInstr + 1, idxOldInstrPlusOne);
1204 uint8_t const cInstrsSkipped = idxInstr <= idxOldInstrPlusOne ? 0 : idxInstr - idxOldInstrPlusOne;
1205 Log4(("iemNativeEmitRip64RelativeJumpAndFinishingNoFlags: offPc=%#RX64 -> 0; off=%#x; idxInstr=%u cInstrsSkipped=%u cCondDepth=%d\n",
1206 pReNative->Core.offPc, off, idxInstr, cInstrsSkipped, pReNative->cCondDepth));
1207 STAM_COUNTER_ADD(&pReNative->pVCpu->iem.s.StatNativePcUpdateDelayed, cInstrsSkipped);
1208# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1209 iemNativeDbgInfoAddNativeOffset(pReNative, off);
1210 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc, cInstrsSkipped);
1211# endif
1212# endif
1213 pReNative->Core.offPc = 0;
1214#endif
1215
1216 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1217
1218 /* Free but don't flush the PC register. */
1219 iemNativeRegFreeTmp(pReNative, idxPcReg);
1220 }
1221 return off;
1222}
1223
1224
1225#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1226 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1227 (a_enmEffOpSize), pCallEntry->idxInstr); \
1228 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1229
1230#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1231 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1232 (a_enmEffOpSize), pCallEntry->idxInstr); \
1233 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1234 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1235
1236#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr, a_rcNormal) \
1237 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1238 IEMMODE_16BIT, pCallEntry->idxInstr); \
1239 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1240
1241#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
1242 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1243 IEMMODE_16BIT, pCallEntry->idxInstr); \
1244 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1245 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1246
1247#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr, a_rcNormal) \
1248 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (a_i32), \
1249 IEMMODE_32BIT, pCallEntry->idxInstr); \
1250 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1251
1252#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
1253 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<false>(pReNative, off, (a_cbInstr), (a_i32), \
1254 IEMMODE_32BIT, pCallEntry->idxInstr); \
1255 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1256 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1257
1258
1259#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_FLAT(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1260 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1261 (a_enmEffOpSize), pCallEntry->idxInstr); \
1262 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1263
1264#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_FLAT_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
1265 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
1266 (a_enmEffOpSize), pCallEntry->idxInstr); \
1267 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1268 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1269
1270#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_FLAT(a_i16, a_cbInstr, a_rcNormal) \
1271 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1272 IEMMODE_16BIT, pCallEntry->idxInstr); \
1273 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1274
1275#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_FLAT_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
1276 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
1277 IEMMODE_16BIT, pCallEntry->idxInstr); \
1278 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1279 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1280
1281#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_FLAT(a_i32, a_cbInstr, a_rcNormal) \
1282 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (a_i32), \
1283 IEMMODE_32BIT, pCallEntry->idxInstr); \
1284 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1285
1286#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_FLAT_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
1287 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags<true>(pReNative, off, (a_cbInstr), (a_i32), \
1288 IEMMODE_32BIT, pCallEntry->idxInstr); \
1289 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1290 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (a_i32))
1291
1292/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
1293 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
1294 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
1295template<bool const a_fFlat>
1296DECL_INLINE_THROW(uint32_t)
1297iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
1298 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
1299{
1300 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
1301#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1302 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1303#endif
1304
1305 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1306 if (!a_fFlat || enmEffOpSize == IEMMODE_16BIT)
1307 {
1308 off = iemNativeRegFlushPendingWrites(pReNative, off);
1309#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1310 Assert(pReNative->Core.offPc == 0);
1311#endif
1312 }
1313
1314 /* Allocate a temporary PC register. */
1315 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
1316
1317 /* Perform the addition. */
1318#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1319 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr + (int32_t)pReNative->Core.offPc);
1320#else
1321 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr + (int32_t)pReNative->Core.offPc);
1322#endif
1323
1324 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
1325 if (enmEffOpSize == IEMMODE_16BIT)
1326 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
1327
1328 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
1329 if (!a_fFlat)
1330 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
1331
1332 /* Commit it. */
1333#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1334 off = iemNativeEmitPcDebugAdd(pReNative, off, offDisp + cbInstr, enmEffOpSize == IEMMODE_32BIT ? 32 : 16);
1335 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
1336#endif
1337
1338 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1339#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1340 pReNative->Core.offPc = 0;
1341#endif
1342
1343 /* Free but don't flush the PC register. */
1344 iemNativeRegFreeTmp(pReNative, idxPcReg);
1345
1346 return off;
1347}
1348
1349
1350#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr, a_rcNormal) \
1351 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
1352 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1353
1354#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr, a_rcNormal) \
1355 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
1356 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1357 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int8_t)(a_i8))
1358
1359#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr, a_rcNormal) \
1360 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
1361 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1362
1363#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
1364 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
1365 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1366 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, (int16_t)(a_i16))
1367
1368#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr, a_rcNormal) \
1369 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
1370 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, a_i32)
1371
1372#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
1373 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
1374 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
1375 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal, true /*a_fIsJump*/>(pReNative, off, pCallEntry, a_i32)
1376
1377/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
1378DECL_INLINE_THROW(uint32_t)
1379iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1380 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
1381{
1382 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1383 off = iemNativeRegFlushPendingWrites(pReNative, off);
1384
1385#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1386 Assert(pReNative->Core.offPc == 0);
1387 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1388#endif
1389
1390 /* Allocate a temporary PC register. */
1391 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
1392
1393 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
1394 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
1395 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
1396 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
1397#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1398 off = iemNativeEmitPcDebugAdd(pReNative, off, offDisp + cbInstr, 16);
1399 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
1400#endif
1401 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1402
1403 /* Free but don't flush the PC register. */
1404 iemNativeRegFreeTmp(pReNative, idxPcReg);
1405
1406 return off;
1407}
1408
1409
1410
1411/*********************************************************************************************************************************
1412* Emitters for changing PC/RIP/EIP/IP with a indirect jump (IEM_MC_SET_RIP_UXX_AND_FINISH). *
1413*********************************************************************************************************************************/
1414
1415/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets. */
1416#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP) \
1417 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1418
1419/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets. */
1420#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP) \
1421 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1422
1423/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code. */
1424#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP) \
1425 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1426
1427/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets that checks and
1428 * clears flags. */
1429#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP) \
1430 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP); \
1431 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1432
1433/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets that checks and
1434 * clears flags. */
1435#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP) \
1436 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP); \
1437 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1438
1439/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code that checks and
1440 * clears flags. */
1441#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16NewIP) \
1442 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP); \
1443 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1444
1445#undef IEM_MC_SET_RIP_U16_AND_FINISH
1446
1447
1448/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets. */
1449#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP) \
1450 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
1451
1452/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code. */
1453#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP) \
1454 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
1455
1456/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets that checks and
1457 * clears flags. */
1458#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP) \
1459 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP); \
1460 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1461
1462/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code that checks
1463 * and clears flags. */
1464#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u32NewEIP) \
1465 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP); \
1466 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1467
1468#undef IEM_MC_SET_RIP_U32_AND_FINISH
1469
1470
1471/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code. */
1472#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP) \
1473 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u64NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
1474
1475/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code that checks
1476 * and clears flags. */
1477#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewEIP) \
1478 IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP); \
1479 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1480
1481#undef IEM_MC_SET_RIP_U64_AND_FINISH
1482
1483
1484/** Same as iemRegRipJumpU16AndFinishNoFlags,
1485 * iemRegRipJumpU32AndFinishNoFlags and iemRegRipJumpU64AndFinishNoFlags. */
1486DECL_INLINE_THROW(uint32_t)
1487iemNativeEmitRipJumpNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarPc, bool f64Bit,
1488 uint8_t idxInstr, uint8_t cbVar)
1489{
1490 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
1491 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
1492
1493 /* If we can't rule out a #GP(0) below, flush all dirty register except for
1494 PC which will be handled specially by the two workers below if they raise a GP. */
1495 bool const fMayRaiseGp0 = (f64Bit && cbVar > sizeof(uint32_t)) || (!f64Bit && !IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
1496 uint8_t const idxOldPcReg = fMayRaiseGp0
1497 ? iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off, kIemNativeGstReg_Pc)
1498 : UINT8_MAX;
1499 if (fMayRaiseGp0)
1500 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc) /*fGstShwExcept*/);
1501
1502 /* Get a register with the new PC loaded from idxVarPc.
1503 Note! This ASSUMES that the high bits of the GPR is zeroed. */
1504 uint8_t const idxPcReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxVarPc, kIemNativeGstReg_Pc, &off);
1505
1506 /* Check that the target is within CS.LIM / is canonical (may #GP(0) + exit TB). */
1507 if (fMayRaiseGp0)
1508 {
1509 if (f64Bit)
1510 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0WithOldPc(pReNative, off, idxPcReg, idxOldPcReg, idxInstr);
1511 else
1512 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0WithOldPc(pReNative, off, idxPcReg, idxOldPcReg, idxInstr);
1513 }
1514
1515 /* Store the result. */
1516 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1517
1518#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1519 pReNative->Core.offPc = 0;
1520 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1521# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1522 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
1523 pReNative->Core.fDebugPcInitialized = true;
1524 Log4(("uPcUpdatingDebug=rip off=%#x\n", off));
1525# endif
1526#endif
1527
1528 if (idxOldPcReg != UINT8_MAX)
1529 iemNativeRegFreeTmp(pReNative, idxOldPcReg);
1530 iemNativeVarRegisterRelease(pReNative, idxVarPc);
1531 /** @todo implictly free the variable? */
1532
1533 return off;
1534}
1535
1536
1537
1538/*********************************************************************************************************************************
1539* Emitters for changing PC/RIP/EIP/IP with a relative call jump (IEM_MC_IND_CALL_UXX_AND_FINISH) (requires stack emmiters). *
1540*********************************************************************************************************************************/
1541
1542/** @todo These helpers belong to the stack push API naturally but we already need them up here (we could of course move
1543 * this below the stack emitters but then this is not close to the rest of the PC/RIP handling...). */
1544DECL_FORCE_INLINE_THROW(uint32_t)
1545iemNativeEmitStackPushUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
1546{
1547 /* Use16BitSp: */
1548#ifdef RT_ARCH_AMD64
1549 off = iemNativeEmitSubGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
1550 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
1551#else
1552 /* sub regeff, regrsp, #cbMem */
1553 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegEffSp, idxRegRsp, cbMem, false /*f64Bit*/);
1554 /* and regeff, regeff, #0xffff */
1555 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
1556 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegEffSp, idxRegEffSp, 15, 0, false /*f64Bit*/);
1557 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
1558 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegEffSp, 0, 16, false /*f64Bit*/);
1559#endif
1560 return off;
1561}
1562
1563
1564DECL_FORCE_INLINE(uint32_t)
1565iemNativeEmitStackPushUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
1566{
1567 /* Use32BitSp: */
1568 off = iemNativeEmitSubGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
1569 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
1570 return off;
1571}
1572
1573
1574template<uint8_t const a_cBitsVar, uint8_t const a_cBitsFlat>
1575DECL_INLINE_THROW(uint32_t)
1576iemNativeEmitStackPushRip(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const idxRegPc,
1577 uintptr_t pfnFunction, uint8_t idxInstr)
1578{
1579 AssertCompile(a_cBitsVar == 16 || a_cBitsVar == 32 || a_cBitsVar == 64);
1580 AssertCompile(a_cBitsFlat == 0 || a_cBitsFlat == 32 || a_cBitsFlat == 64);
1581
1582 /*
1583 * Assert sanity.
1584 */
1585#ifdef VBOX_STRICT
1586 if RT_CONSTEXPR_IF(a_cBitsFlat != 0)
1587 {
1588 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
1589 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
1590 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
1591 Assert( pfnFunction
1592 == ( RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(16, 32) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
1593 : RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(32, 32) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
1594 : RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(16, 64) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
1595 : RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(64, 64) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
1596 : UINT64_C(0xc000b000a0009000) ));
1597 }
1598 else
1599 Assert( pfnFunction
1600 == ( RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(16, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
1601 : RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(32, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
1602 : RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(64, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
1603 : UINT64_C(0xc000b000a0009000) ));
1604#endif
1605
1606#ifdef VBOX_STRICT
1607 /*
1608 * Check that the fExec flags we've got make sense.
1609 */
1610 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
1611#endif
1612
1613 /*
1614 * To keep things simple we have to commit any pending writes first as we
1615 * may end up making calls.
1616 */
1617 /** @todo we could postpone this till we make the call and reload the
1618 * registers after returning from the call. Not sure if that's sensible or
1619 * not, though. */
1620 off = iemNativeRegFlushPendingWrites(pReNative, off);
1621
1622 /*
1623 * First we calculate the new RSP and the effective stack pointer value.
1624 * For 64-bit mode and flat 32-bit these two are the same.
1625 * (Code structure is very similar to that of PUSH)
1626 */
1627 RT_CONSTEXPR
1628 uint8_t const cbMem = a_cBitsVar / 8;
1629 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
1630 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
1631 uint8_t const idxRegEffSp = a_cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
1632 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
1633 if RT_CONSTEXPR_IF(a_cBitsFlat != 0)
1634 {
1635 Assert(idxRegEffSp == idxRegRsp);
1636 Assert(a_cBitsFlat == 32 || a_cBitsFlat == 64);
1637 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
1638 if RT_CONSTEXPR_IF(a_cBitsFlat == 64)
1639 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
1640 else
1641 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
1642 }
1643 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
1644 {
1645 Assert(idxRegEffSp != idxRegRsp);
1646 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
1647 kIemNativeGstRegUse_ReadOnly);
1648#ifdef RT_ARCH_AMD64
1649 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1650#else
1651 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1652#endif
1653 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
1654 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
1655 offFixupJumpToUseOtherBitSp = off;
1656 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
1657 {
1658 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
1659 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1660 }
1661 else
1662 {
1663 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
1664 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1665 }
1666 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1667 }
1668 /* SpUpdateEnd: */
1669 uint32_t const offLabelSpUpdateEnd = off;
1670
1671 /*
1672 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
1673 * we're skipping lookup).
1674 */
1675 uint8_t const iSegReg = a_cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
1676 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
1677 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
1678 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
1679 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
1680 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
1681 : UINT32_MAX;
1682 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
1683
1684
1685 if (!TlbState.fSkip)
1686 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
1687 else
1688 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
1689
1690 /*
1691 * Use16BitSp:
1692 */
1693 if RT_CONSTEXPR_IF(a_cBitsFlat == 0)
1694 {
1695#ifdef RT_ARCH_AMD64
1696 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
1697#else
1698 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
1699#endif
1700 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
1701 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
1702 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1703 else
1704 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
1705 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
1706 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1707 }
1708
1709 /*
1710 * TlbMiss:
1711 *
1712 * Call helper to do the pushing.
1713 */
1714 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
1715
1716#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1717 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1718#else
1719 RT_NOREF(idxInstr);
1720#endif
1721
1722 /* Save variables in volatile registers. */
1723 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
1724 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
1725 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
1726 | (RT_BIT_32(idxRegPc));
1727 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
1728
1729 if ( idxRegPc == IEMNATIVE_CALL_ARG1_GREG
1730 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
1731 {
1732 /* Swap them using ARG0 as temp register: */
1733 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
1734 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
1735 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
1736 }
1737 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
1738 {
1739 /* IEMNATIVE_CALL_ARG2_GREG = idxRegPc (first!) */
1740 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxRegPc);
1741
1742 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
1743 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
1744 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
1745 }
1746 else
1747 {
1748 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
1749 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
1750
1751 /* IEMNATIVE_CALL_ARG2_GREG = idxRegPc */
1752 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxRegPc);
1753 }
1754
1755#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
1756 /* Do delayed EFLAGS calculations. */
1757 off = iemNativeDoPostponedEFlagsAtTlbMiss< RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG)
1758 | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)>(pReNative, off, &TlbState, fHstRegsNotToSave);
1759#endif
1760
1761 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
1762 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
1763
1764 /* Done setting up parameters, make the call. */
1765 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnFunction);
1766
1767 /* Restore variables and guest shadow registers to volatile registers. */
1768 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
1769 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
1770
1771#ifdef IEMNATIVE_WITH_TLB_LOOKUP
1772 if (!TlbState.fSkip)
1773 {
1774 /* end of TlbMiss - Jump to the done label. */
1775 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
1776 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
1777
1778 /*
1779 * TlbLookup:
1780 */
1781 off = iemNativeEmitTlbLookup<true, cbMem, cbMem - 1, IEM_ACCESS_TYPE_WRITE>(pReNative, off, &TlbState, iSegReg,
1782 idxLabelTlbLookup, idxLabelTlbMiss,
1783 idxRegMemResult);
1784
1785 /*
1786 * Emit code to do the actual storing / fetching.
1787 */
1788 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
1789# ifdef IEM_WITH_TLB_STATISTICS
1790 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
1791 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
1792# endif
1793 AssertCompile(cbMem == 2 || cbMem == 4 || cbMem == 8);
1794 if RT_CONSTEXPR_IF(cbMem == 2)
1795 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1796 else if RT_CONSTEXPR_IF(cbMem == 4)
1797 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1798 else
1799 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegPc, idxRegMemResult);
1800
1801 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
1802 TlbState.freeRegsAndReleaseVars(pReNative);
1803
1804 /*
1805 * TlbDone:
1806 *
1807 * Commit the new RSP value.
1808 */
1809 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
1810 }
1811#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
1812
1813#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
1814 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
1815#endif
1816 iemNativeRegFreeTmp(pReNative, idxRegRsp);
1817 if (idxRegEffSp != idxRegRsp)
1818 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
1819
1820 return off;
1821}
1822
1823
1824/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for pre-386 targets. */
1825#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16(a_u16NewIP, a_cbInstr) \
1826 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1827
1828/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for pre-386 targets that checks and
1829 * clears flags. */
1830#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP, a_cbInstr) \
1831 IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC16(a_u16NewIP, a_cbInstr); \
1832 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1833
1834/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for 386+ targets. */
1835#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32(a_u16NewIP, a_cbInstr) \
1836 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
1837
1838/** Variant of IEM_MC_IND_CALL_U16_AND_FINISH for 386+ targets that checks and
1839 * clears flags. */
1840#define IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP, a_cbInstr) \
1841 IEM_MC_IND_CALL_U16_AND_FINISH_THREADED_PC32(a_u16NewIP, a_cbInstr); \
1842 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1843
1844#undef IEM_MC_IND_CALL_U16_AND_FINISH
1845
1846
1847/** Variant of IEM_MC_IND_CALL_U32_AND_FINISH for 386+ targets. */
1848#define IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP, a_cbInstr) \
1849 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
1850
1851/** Variant of IEM_MC_IND_CALL_U32_AND_FINISH for 386+ targets that checks and
1852 * clears flags. */
1853#define IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP, a_cbInstr) \
1854 IEM_MC_IND_CALL_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP, a_cbInstr); \
1855 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1856
1857#undef IEM_MC_IND_CALL_U32_AND_FINISH
1858
1859
1860/** Variant of IEM_MC_IND_CALL_U64_AND_FINISH with instruction length as
1861 * an extra parameter, for use in 64-bit code. */
1862#define IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64(a_u64NewIP, a_cbInstr) \
1863 off = iemNativeEmitRipIndirectCallNoFlags(pReNative, off, a_cbInstr, (a_u64NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
1864
1865
1866/** Variant of IEM_MC_IND_CALL_U64_AND_FINISH with instruction length as
1867 * an extra parameter, for use in 64-bit code and we need to check and clear
1868 * flags. */
1869#define IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewIP, a_cbInstr) \
1870 IEM_MC_IND_CALL_U64_AND_FINISH_THREADED_PC64(a_u64NewIP, a_cbInstr); \
1871 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1872
1873#undef IEM_MC_IND_CALL_U64_AND_FINISH
1874
1875/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
1876 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
1877DECL_INLINE_THROW(uint32_t)
1878iemNativeEmitRipIndirectCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxVarPc, bool f64Bit,
1879 uint8_t idxInstr, uint8_t cbVar)
1880{
1881 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
1882 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
1883
1884 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
1885 off = iemNativeRegFlushPendingWrites(pReNative, off);
1886
1887#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1888 Assert(pReNative->Core.offPc == 0);
1889 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
1890#endif
1891
1892 /* Get a register with the new PC loaded from idxVarPc.
1893 Note! This ASSUMES that the high bits of the GPR is zeroed. */
1894 uint8_t const idxNewPcReg = iemNativeVarRegisterAcquire(pReNative, idxVarPc, &off);
1895
1896 /* Check limit (may #GP(0) + exit TB). */
1897 if (!f64Bit)
1898/** @todo we can skip this test in FLAT 32-bit mode. */
1899 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxNewPcReg, idxInstr);
1900 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
1901 else if (cbVar > sizeof(uint32_t))
1902 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxNewPcReg, idxInstr);
1903
1904#if 1
1905 /* Allocate a temporary PC register, we don't want it shadowed. */
1906 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1907 kIemNativeGstRegUse_Calculation, true /*fNoVolatileRegs*/);
1908#else
1909 /* Allocate a temporary PC register. */
1910 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate,
1911 true /*fNoVolatileRegs*/);
1912#endif
1913
1914 /* Perform the addition and push the variable to the guest stack. */
1915 /** @todo Flat variants for PC32 variants. */
1916 switch (cbVar)
1917 {
1918 case sizeof(uint16_t):
1919 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
1920 /* Truncate the result to 16-bit IP. */
1921 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
1922 off = iemNativeEmitStackPushRip<16, 0>(pReNative, off, idxPcReg, (uintptr_t)iemNativeHlpStackStoreU16, idxInstr);
1923 break;
1924 case sizeof(uint32_t):
1925 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
1926 /** @todo In FLAT mode we can use the flat variant. */
1927 off = iemNativeEmitStackPushRip<32, 0>(pReNative, off, idxPcReg, (uintptr_t)iemNativeHlpStackStoreU32, idxInstr);
1928 break;
1929 case sizeof(uint64_t):
1930 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
1931 off = iemNativeEmitStackPushRip<64, 64>(pReNative, off, idxPcReg, (uintptr_t)iemNativeHlpStackFlatStoreU64, idxInstr);
1932 break;
1933 default:
1934 AssertFailed();
1935 }
1936
1937 /* RSP got changed, so do this again. */
1938 off = iemNativeRegFlushPendingWrites(pReNative, off);
1939
1940 /* Store the result. */
1941 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxNewPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
1942#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
1943 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxNewPcReg, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
1944 pReNative->Core.fDebugPcInitialized = true;
1945 Log4(("uPcUpdatingDebug=rip/indirect-call off=%#x\n", off));
1946#endif
1947
1948#if 1
1949 /* Need to transfer the shadow information to the new RIP register. */
1950 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxNewPcReg, kIemNativeGstReg_Pc, off);
1951#else
1952 /* Sync the new PC. */
1953 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxPcReg, idxNewPcReg);
1954#endif
1955 iemNativeVarRegisterRelease(pReNative, idxVarPc);
1956 iemNativeRegFreeTmp(pReNative, idxPcReg);
1957 /** @todo implictly free the variable? */
1958
1959 return off;
1960}
1961
1962
1963/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1964 * an extra parameter, for use in 16-bit code on a pre-386 CPU. */
1965#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr) \
1966 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
1967
1968/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1969 * an extra parameter, for use in 16-bit code on a pre-386 CPU and we need to check and clear
1970 * flags. */
1971#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr) \
1972 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr); \
1973 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1974
1975/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1976 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
1977#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr) \
1978 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
1979
1980/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1981 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
1982 * flags. */
1983#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr) \
1984 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr); \
1985 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1986
1987/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1988 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
1989#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr) \
1990 off = iemNativeEmitRipRelativeCallS16NoFlags(pReNative, off, a_cbInstr, (a_i16), pCallEntry->idxInstr)
1991
1992/** Variant of IEM_MC_REL_CALL_S16_AND_FINISH with instruction length as
1993 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
1994 * flags. */
1995#define IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr) \
1996 IEM_MC_REL_CALL_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr); \
1997 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
1998
1999#undef IEM_MC_REL_CALL_S16_AND_FINISH
2000
2001/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
2002 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
2003DECL_INLINE_THROW(uint32_t)
2004iemNativeEmitRipRelativeCallS16NoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int16_t offDisp,
2005 uint8_t idxInstr)
2006{
2007 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
2008 off = iemNativeRegFlushPendingWrites(pReNative, off);
2009
2010#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2011 Assert(pReNative->Core.offPc == 0);
2012 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
2013#endif
2014
2015 /* Allocate a temporary PC register. */
2016 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
2017 kIemNativeGstRegUse_Calculation, true /*fNoVolatileRegs*/);
2018 uint8_t const idxPcRegNew = iemNativeRegAllocTmpPreferNonVolatile(pReNative, &off);
2019
2020 /* Calculate the new RIP. */
2021 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcRegOld, cbInstr);
2022 /* Truncate the result to 16-bit IP. */
2023 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcRegOld);
2024 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxPcRegNew, idxPcRegOld);
2025 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcRegNew, offDisp);
2026
2027 /* Truncate the result to 16-bit IP. */
2028 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcRegNew);
2029
2030 /* Check limit (may #GP(0) + exit TB). */
2031 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
2032
2033 /* Perform the addition and push the variable to the guest stack. */
2034 off = iemNativeEmitStackPushRip<16, 0>(pReNative, off, idxPcRegOld, (uintptr_t)iemNativeHlpStackStoreU16, idxInstr);
2035
2036 /* RSP got changed, so flush again. */
2037 off = iemNativeRegFlushPendingWrites(pReNative, off);
2038
2039 /* Store the result. */
2040 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
2041#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
2042 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
2043 pReNative->Core.fDebugPcInitialized = true;
2044 Log4(("uPcUpdatingDebug=rip/rel-call-16 off=%#x offDisp=%d\n", off, offDisp));
2045#endif
2046
2047 /* Need to transfer the shadow information to the new RIP register. */
2048 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
2049 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
2050 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
2051
2052 return off;
2053}
2054
2055
2056/** Variant of IEM_MC_REL_CALL_S32_AND_FINISH with instruction length as
2057 * an extra parameter, for use in 16-bit and 32-bit code on 386+. */
2058#define IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr) \
2059 off = iemNativeEmitEip32RelativeCallNoFlags(pReNative, off, a_cbInstr, (a_i32), pCallEntry->idxInstr)
2060
2061/** Variant of IEM_MC_REL_CALL_S32_AND_FINISH with instruction length as
2062 * an extra parameter, for use in 16-bit and 32-bit code on 386+ and we need to check and clear
2063 * flags. */
2064#define IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr) \
2065 IEM_MC_REL_CALL_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr); \
2066 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2067
2068#undef IEM_MC_REL_CALL_S32_AND_FINISH
2069
2070/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
2071 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
2072DECL_INLINE_THROW(uint32_t)
2073iemNativeEmitEip32RelativeCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int32_t offDisp,
2074 uint8_t idxInstr)
2075{
2076 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
2077 off = iemNativeRegFlushPendingWrites(pReNative, off);
2078
2079#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2080 Assert(pReNative->Core.offPc == 0);
2081 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
2082#endif
2083
2084 /* Allocate a temporary PC register. */
2085 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
2086 kIemNativeGstRegUse_ReadOnly, true /*fNoVolatileRegs*/);
2087 uint8_t const idxPcRegNew = iemNativeRegAllocTmpPreferNonVolatile(pReNative, &off);
2088
2089 /* Update the EIP to get the return address. */
2090 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcRegOld, cbInstr);
2091
2092 /* Load address, add the displacement and check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
2093 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxPcRegNew, idxPcRegOld);
2094 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcRegNew, offDisp);
2095 /** @todo we can skip this test in FLAT 32-bit mode. */
2096 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
2097
2098 /* Perform Perform the return address to the guest stack. */
2099 /** @todo Can avoid the stack limit checks in FLAT 32-bit mode. */
2100 off = iemNativeEmitStackPushRip<32, 0>(pReNative, off, idxPcRegOld, (uintptr_t)iemNativeHlpStackStoreU32, idxInstr);
2101
2102 /* RSP got changed, so do this again. */
2103 off = iemNativeRegFlushPendingWrites(pReNative, off);
2104
2105 /* Store the result. */
2106 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
2107#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
2108 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
2109 pReNative->Core.fDebugPcInitialized = true;
2110 Log4(("uPcUpdatingDebug=eip/rel-call-32 off=%#x offDisp=%d\n", off, offDisp));
2111#endif
2112
2113 /* Need to transfer the shadow information to the new RIP register. */
2114 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
2115 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
2116 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
2117
2118 return off;
2119}
2120
2121
2122/** Variant of IEM_MC_REL_CALL_S64_AND_FINISH with instruction length as
2123 * an extra parameter, for use in 64-bit code. */
2124#define IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64(a_i64, a_cbInstr) \
2125 off = iemNativeEmitRip64RelativeCallNoFlags(pReNative, off, a_cbInstr, (a_i64), pCallEntry->idxInstr)
2126
2127/** Variant of IEM_MC_REL_CALL_S64_AND_FINISH with instruction length as
2128 * an extra parameter, for use in 64-bit code and we need to check and clear
2129 * flags. */
2130#define IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i64, a_cbInstr) \
2131 IEM_MC_REL_CALL_S64_AND_FINISH_THREADED_PC64(a_i64, a_cbInstr); \
2132 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2133
2134#undef IEM_MC_REL_CALL_S64_AND_FINISH
2135
2136/** Same as iemRegIp16RelativeCallS16AndFinishNoFlags,
2137 * iemRegEip32RelativeCallS32AndFinishNoFlags and iemRegRip64RelativeCallS64AndFinishNoFlags. */
2138DECL_INLINE_THROW(uint32_t)
2139iemNativeEmitRip64RelativeCallNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, int64_t offDisp,
2140 uint8_t idxInstr)
2141{
2142 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
2143 off = iemNativeRegFlushPendingWrites(pReNative, off);
2144
2145#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2146 Assert(pReNative->Core.offPc == 0);
2147 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
2148#endif
2149
2150 /* Allocate a temporary PC register. */
2151 uint8_t const idxPcRegOld = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
2152 kIemNativeGstRegUse_ReadOnly, true /*fNoVolatileRegs*/);
2153 uint8_t const idxPcRegNew = iemNativeRegAllocTmpPreferNonVolatile(pReNative, &off);
2154
2155 /* Update the RIP to get the return address. */
2156 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcRegOld, cbInstr);
2157
2158 /* Load address, add the displacement and check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
2159 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxPcRegNew, idxPcRegOld);
2160 off = iemNativeEmitAddGprImm(pReNative, off, idxPcRegNew, offDisp);
2161 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcRegNew, idxInstr);
2162
2163 /* Perform Perform the return address to the guest stack. */
2164 off = iemNativeEmitStackPushRip<64, 64>(pReNative, off, idxPcRegOld, (uintptr_t)iemNativeHlpStackFlatStoreU64, idxInstr);
2165
2166 /* RSP got changed, so do this again. */
2167 off = iemNativeRegFlushPendingWrites(pReNative, off);
2168
2169 /* Store the result. */
2170 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
2171#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
2172 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcRegNew, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
2173 pReNative->Core.fDebugPcInitialized = true;
2174 Log4(("uPcUpdatingDebug=rip/rel-call-64 off=%#x offDisp=%ld\n", off, offDisp));
2175#endif
2176
2177 /* Need to transfer the shadow information to the new RIP register. */
2178 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxPcRegNew, kIemNativeGstReg_Pc, off);
2179 iemNativeRegFreeTmp(pReNative, idxPcRegNew);
2180 iemNativeRegFreeTmp(pReNative, idxPcRegOld);
2181
2182 return off;
2183}
2184
2185
2186/*********************************************************************************************************************************
2187* Emitters for changing PC/RIP/EIP/IP with a RETN (Iw) instruction (IEM_MC_RETN_AND_FINISH) (requires stack emmiters). *
2188*********************************************************************************************************************************/
2189
2190DECL_FORCE_INLINE_THROW(uint32_t)
2191iemNativeEmitStackPopForRetnUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
2192 uint16_t cbPopAdd, uint8_t idxRegTmp)
2193{
2194 /* Use16BitSp: */
2195#ifdef RT_ARCH_AMD64
2196 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
2197 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
2198 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbPopAdd); /* ASSUMES this does NOT modify bits [63:16]! */
2199 RT_NOREF(idxRegTmp);
2200
2201#elif defined(RT_ARCH_ARM64)
2202 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
2203 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
2204 /* add tmp, regrsp, #cbMem */
2205 uint16_t const cbCombined = cbMem + cbPopAdd;
2206 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbCombined & (RT_BIT_32(12) - 1U), false /*f64Bit*/);
2207 if (cbCombined >= RT_BIT_32(12))
2208 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegTmp, cbCombined >> 12,
2209 false /*f64Bit*/, false /*fSetFlags*/, true /*fShift12*/);
2210 /* and tmp, tmp, #0xffff */
2211 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
2212 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
2213 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
2214 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
2215
2216#else
2217# error "Port me"
2218#endif
2219 return off;
2220}
2221
2222
2223DECL_FORCE_INLINE_THROW(uint32_t)
2224iemNativeEmitStackPopForRetnUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
2225 uint16_t cbPopAdd)
2226{
2227 /* Use32BitSp: */
2228 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
2229 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem + cbPopAdd);
2230 return off;
2231}
2232
2233
2234/** Variant of IEM_MC_RETN_AND_FINISH for pre-386 targets. */
2235#define IEM_MC_RETN_AND_FINISH_THREADED_PC16(a_cbPopArgs, a_cbInstr) \
2236 off = iemNativeEmitRetn<IEMMODE_16BIT, false>(pReNative, off, (a_cbInstr), (a_cbPopArgs), pCallEntry->idxInstr)
2237
2238/** Variant of IEM_MC_RETN_AND_FINISH for 386+ targets. */
2239#define IEM_MC_RETN_AND_FINISH_THREADED_PC32(a_cbPopArgs, a_cbInstr, a_enmEffOpSize) \
2240 Assert((a_enmEffOpSize) == IEMMODE_32BIT || (a_enmEffOpSize) == IEMMODE_16BIT); \
2241 off = (a_enmEffOpSize) == IEMMODE_32BIT \
2242 ? iemNativeEmitRetn<IEMMODE_32BIT, false>(pReNative, off, (a_cbInstr), (a_cbPopArgs), pCallEntry->idxInstr) \
2243 : iemNativeEmitRetn<IEMMODE_16BIT, false>(pReNative, off, (a_cbInstr), (a_cbPopArgs), pCallEntry->idxInstr)
2244
2245/** Variant of IEM_MC_RETN_AND_FINISH for use in 64-bit code. */
2246#define IEM_MC_RETN_AND_FINISH_THREADED_PC64(a_cbPopArgs, a_cbInstr, a_enmEffOpSize) \
2247 Assert((a_enmEffOpSize) == IEMMODE_64BIT || (a_enmEffOpSize) == IEMMODE_16BIT); \
2248 off = (a_enmEffOpSize) == IEMMODE_64BIT \
2249 ? iemNativeEmitRetn<IEMMODE_64BIT, true>(pReNative, off, (a_cbInstr), (a_cbPopArgs), pCallEntry->idxInstr) \
2250 : iemNativeEmitRetn<IEMMODE_16BIT, true>(pReNative, off, (a_cbInstr), (a_cbPopArgs), pCallEntry->idxInstr)
2251
2252/** Variant of IEM_MC_RETN_AND_FINISH for pre-386 targets that checks and
2253 * clears flags. */
2254#define IEM_MC_RETN_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbPopArgs, a_cbInstr) \
2255 IEM_MC_RETN_AND_FINISH_THREADED_PC16(a_cbPopArgs, a_cbInstr); \
2256 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2257
2258/** Variant of IEM_MC_RETN_AND_FINISH for 386+ targets that checks and
2259 * clears flags. */
2260#define IEM_MC_RETN_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbPopArgs, a_cbInstr, a_enmEffOpSize) \
2261 IEM_MC_RETN_AND_FINISH_THREADED_PC32(a_cbPopArgs, a_cbInstr, a_enmEffOpSize); \
2262 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2263
2264/** Variant of IEM_MC_RETN_AND_FINISH for use in 64-bit code that checks and
2265 * clears flags. */
2266#define IEM_MC_RETN_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbPopArgs, a_cbInstr, a_enmEffOpSize) \
2267 IEM_MC_RETN_AND_FINISH_THREADED_PC64(a_cbPopArgs, a_cbInstr, a_enmEffOpSize); \
2268 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
2269
2270/** IEM_MC[|_FLAT32|_FLAT64]_RETN_AND_FINISH */
2271template<IEMMODE const a_enmEffOpSize, bool const a_f64Bit>
2272DECL_INLINE_THROW(uint32_t)
2273iemNativeEmitRetn(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint16_t cbPopArgs, uint8_t idxInstr)
2274{
2275 RT_NOREF(cbInstr);
2276 AssertCompile(a_enmEffOpSize == IEMMODE_64BIT || a_enmEffOpSize == IEMMODE_32BIT || a_enmEffOpSize == IEMMODE_16BIT);
2277
2278#ifdef VBOX_STRICT
2279 /*
2280 * Check that the fExec flags we've got make sense.
2281 */
2282 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
2283#endif
2284
2285 /*
2286 * To keep things simple we have to commit any pending writes first as we
2287 * may end up making calls.
2288 */
2289 off = iemNativeRegFlushPendingWrites(pReNative, off);
2290
2291 /*
2292 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
2293 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
2294 * directly as the effective stack pointer.
2295 *
2296 * (Code structure is very similar to that of PUSH)
2297 *
2298 * Note! As a simplification, we treat opsize overridden returns (o16 ret)
2299 * in FLAT 32-bit mode as if we weren't in FLAT mode since these
2300 * aren't commonly used (or useful) and thus not in need of optimizing.
2301 *
2302 * Note! For non-flat modes the guest RSP is not allocated for update but
2303 * rather for calculation as the shadowed register would remain modified
2304 * even if the return address throws a #GP(0) due to being outside the
2305 * CS limit causing a wrong stack pointer value in the guest (see the
2306 * near return testcase in bs3-cpu-basic-2). If no exception is thrown
2307 * the shadowing is transfered to the new register returned by
2308 * iemNativeRegAllocTmpForGuestReg() at the end.
2309 */
2310 RT_CONSTEXPR
2311 uint8_t const cbMem = a_enmEffOpSize == IEMMODE_64BIT
2312 ? sizeof(uint64_t)
2313 : a_enmEffOpSize == IEMMODE_32BIT
2314 ? sizeof(uint32_t)
2315 : sizeof(uint16_t);
2316/** @todo the basic flatness could be detected by the threaded compiler step
2317 * like for the other macros... worth it? */
2318 bool const fFlat = a_enmEffOpSize == IEMMODE_64BIT
2319 || (a_enmEffOpSize == IEMMODE_32BIT /* see note */ && IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
2320 uintptr_t const pfnFunction = a_enmEffOpSize == IEMMODE_64BIT
2321 ? (uintptr_t)iemNativeHlpStackFlatFetchU64
2322 : fFlat
2323 ? (uintptr_t)iemNativeHlpStackFlatFetchU32
2324 : a_enmEffOpSize == IEMMODE_32BIT
2325 ? (uintptr_t)iemNativeHlpStackFetchU32
2326 : (uintptr_t)iemNativeHlpStackFetchU16;
2327 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
2328 fFlat ? kIemNativeGstRegUse_ForUpdate
2329 : kIemNativeGstRegUse_Calculation,
2330 true /*fNoVolatileRegs*/);
2331 uint8_t const idxRegEffSp = fFlat ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
2332 /** @todo can do a better job picking the register here. For cbMem >= 4 this
2333 * will be the resulting register value. */
2334 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
2335
2336 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
2337 if (fFlat)
2338 Assert(idxRegEffSp == idxRegRsp);
2339 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
2340 {
2341 Assert(idxRegEffSp != idxRegRsp);
2342 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
2343 kIemNativeGstRegUse_ReadOnly);
2344#ifdef RT_ARCH_AMD64
2345 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
2346#else
2347 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
2348#endif
2349 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
2350 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
2351 offFixupJumpToUseOtherBitSp = off;
2352 if RT_CONSTEXPR_IF(a_enmEffOpSize == IEMMODE_32BIT)
2353 {
2354 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
2355 off = iemNativeEmitStackPopForRetnUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPopArgs);
2356 }
2357 else
2358 {
2359 Assert(a_enmEffOpSize == IEMMODE_16BIT);
2360 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
2361 off = iemNativeEmitStackPopForRetnUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPopArgs,
2362 idxRegMemResult);
2363 }
2364 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2365 }
2366 /* SpUpdateEnd: */
2367 uint32_t const offLabelSpUpdateEnd = off;
2368
2369 /*
2370 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
2371 * we're skipping lookup).
2372 */
2373 uint8_t const iSegReg = fFlat ? UINT8_MAX : X86_SREG_SS;
2374 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
2375 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
2376 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
2377 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
2378 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
2379 : UINT32_MAX;
2380
2381 if (!TlbState.fSkip)
2382 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
2383 else
2384 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
2385
2386 /*
2387 * Use16BitSp:
2388 */
2389 if (!fFlat)
2390 {
2391#ifdef RT_ARCH_AMD64
2392 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
2393#else
2394 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
2395#endif
2396 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
2397 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
2398 off = iemNativeEmitStackPopForRetnUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPopArgs,
2399 idxRegMemResult);
2400 else
2401 off = iemNativeEmitStackPopForRetnUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, cbPopArgs);
2402 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
2403 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2404 }
2405
2406 /*
2407 * TlbMiss:
2408 *
2409 * Call helper to do the pushing.
2410 */
2411 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
2412
2413#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2414 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2415#else
2416 RT_NOREF(idxInstr);
2417#endif
2418
2419 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
2420 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
2421 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
2422 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
2423
2424
2425 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
2426 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
2427 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
2428
2429#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
2430 /* Do delayed EFLAGS calculations. */
2431 off = iemNativeDoPostponedEFlagsAtTlbMiss<RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG)>(pReNative, off, &TlbState, fHstRegsNotToSave);
2432#endif
2433
2434 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
2435 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
2436
2437 /* Done setting up parameters, make the call. */
2438 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnFunction);
2439
2440 /* Move the return register content to idxRegMemResult. */
2441 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
2442 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
2443
2444 /* Restore variables and guest shadow registers to volatile registers. */
2445 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
2446 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
2447
2448#ifdef IEMNATIVE_WITH_TLB_LOOKUP
2449 if (!TlbState.fSkip)
2450 {
2451 /* end of TlbMiss - Jump to the done label. */
2452 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
2453 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
2454
2455 /*
2456 * TlbLookup:
2457 */
2458 off = iemNativeEmitTlbLookup<true, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ>(pReNative, off, &TlbState, iSegReg,
2459 idxLabelTlbLookup, idxLabelTlbMiss,
2460 idxRegMemResult);
2461
2462 /*
2463 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
2464 */
2465 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
2466# ifdef IEM_WITH_TLB_STATISTICS
2467 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
2468 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
2469# endif
2470 switch (cbMem)
2471 {
2472 case 2:
2473 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
2474 break;
2475 case 4:
2476 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
2477 break;
2478 case 8:
2479 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
2480 break;
2481 default:
2482 AssertFailed();
2483 }
2484
2485 TlbState.freeRegsAndReleaseVars(pReNative);
2486
2487 /*
2488 * TlbDone:
2489 *
2490 * Set the new RSP value (FLAT accesses needs to calculate it first) and
2491 * commit the popped register value.
2492 */
2493 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
2494 }
2495#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
2496
2497 /* Check limit before committing RIP and RSP (may #GP(0) + exit TB). */
2498 if RT_CONSTEXPR_IF(!a_f64Bit)
2499/** @todo we can skip this test in FLAT 32-bit mode. */
2500 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxRegMemResult, idxInstr);
2501 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
2502 else if RT_CONSTEXPR_IF(a_enmEffOpSize == IEMMODE_64BIT)
2503 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxRegMemResult, idxInstr);
2504
2505 /* Complete RSP calculation for FLAT mode. */
2506 if (idxRegEffSp == idxRegRsp)
2507 {
2508 if RT_CONSTEXPR_IF(a_enmEffOpSize == IEMMODE_64BIT)
2509 off = iemNativeEmitAddGprImm(pReNative, off, idxRegRsp, sizeof(uint64_t) + cbPopArgs);
2510 else
2511 {
2512 Assert(a_enmEffOpSize == IEMMODE_32BIT);
2513 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxRegRsp, sizeof(uint32_t) + cbPopArgs);
2514 }
2515 }
2516
2517 /* Commit the result and clear any current guest shadows for RIP. */
2518 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
2519 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
2520 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, kIemNativeGstReg_Pc, off);
2521#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
2522 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
2523 pReNative->Core.fDebugPcInitialized = true;
2524 Log4(("uPcUpdatingDebug=rip/ret off=%#x\n", off));
2525#endif
2526
2527 /* Need to transfer the shadowing information to the host register containing the updated value now. */
2528 if (!fFlat)
2529 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegRsp, IEMNATIVEGSTREG_GPR(X86_GREG_xSP), off);
2530
2531 iemNativeRegFreeTmp(pReNative, idxRegRsp);
2532 if (idxRegEffSp != idxRegRsp)
2533 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
2534 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
2535 return off;
2536}
2537
2538
2539/*********************************************************************************************************************************
2540* Emitters for raising exceptions (IEM_MC_MAYBE_RAISE_XXX) *
2541*********************************************************************************************************************************/
2542
2543#define IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE() \
2544 off = iemNativeEmitMaybeRaiseDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
2545
2546/**
2547 * Emits code to check if a \#NM exception should be raised.
2548 *
2549 * @returns New code buffer offset, UINT32_MAX on failure.
2550 * @param pReNative The native recompile state.
2551 * @param off The code buffer offset.
2552 * @param idxInstr The current instruction.
2553 */
2554DECL_INLINE_THROW(uint32_t)
2555iemNativeEmitMaybeRaiseDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2556{
2557#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2558 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeDeviceNotAvailXcptCheckPotential);
2559
2560 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE))
2561 {
2562#endif
2563 /*
2564 * Make sure we don't have any outstanding guest register writes as we may
2565 * raise an #NM and all guest register must be up to date in CPUMCTX.
2566 */
2567 /** @todo r=aeichner Can we postpone this to the RaiseNm path? */
2568 off = iemNativeRegFlushPendingWrites(pReNative, off);
2569
2570#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2571 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2572#else
2573 RT_NOREF(idxInstr);
2574#endif
2575
2576 /* Allocate a temporary CR0 register. */
2577 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0,
2578 kIemNativeGstRegUse_ReadOnly);
2579
2580 /*
2581 * if (cr0 & (X86_CR0_EM | X86_CR0_TS) != 0)
2582 * return raisexcpt();
2583 */
2584 /* Test and jump. */
2585 off = iemNativeEmitTbExitIfAnyBitsSetInGpr<kIemNativeLabelType_RaiseNm>(pReNative, off, idxCr0Reg,
2586 X86_CR0_EM | X86_CR0_TS);
2587
2588 /* Free but don't flush the CR0 register. */
2589 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2590
2591#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2592 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE;
2593 }
2594 else
2595 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeDeviceNotAvailXcptCheckOmitted);
2596#endif
2597
2598 return off;
2599}
2600
2601
2602#define IEM_MC_MAYBE_RAISE_WAIT_DEVICE_NOT_AVAILABLE() \
2603 off = iemNativeEmitMaybeRaiseWaitDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
2604
2605/**
2606 * Emits code to check if a \#NM exception should be raised.
2607 *
2608 * @returns New code buffer offset, UINT32_MAX on failure.
2609 * @param pReNative The native recompile state.
2610 * @param off The code buffer offset.
2611 * @param idxInstr The current instruction.
2612 */
2613DECL_INLINE_THROW(uint32_t)
2614iemNativeEmitMaybeRaiseWaitDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2615{
2616#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2617 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeWaitDeviceNotAvailXcptCheckPotential);
2618
2619 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_WAIT_DEVICE_NOT_AVAILABLE))
2620 {
2621#endif
2622 /*
2623 * Make sure we don't have any outstanding guest register writes as we may
2624 * raise an #NM and all guest register must be up to date in CPUMCTX.
2625 */
2626 /** @todo r=aeichner Can we postpone this to the RaiseNm path? */
2627 off = iemNativeRegFlushPendingWrites(pReNative, off);
2628
2629#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2630 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2631#else
2632 RT_NOREF(idxInstr);
2633#endif
2634
2635 /* Allocate a temporary CR0 register. */
2636 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0,
2637 kIemNativeGstRegUse_Calculation);
2638
2639 /*
2640 * if (cr0 & (X86_CR0_MP | X86_CR0_TS) == (X86_CR0_MP | X86_CR0_TS))
2641 * return raisexcpt();
2642 */
2643 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxCr0Reg, X86_CR0_MP | X86_CR0_TS);
2644 /* Test and jump. */
2645 off = iemNativeEmitTbExitIfGpr32EqualsImm<kIemNativeLabelType_RaiseNm>(pReNative, off, idxCr0Reg, X86_CR0_MP | X86_CR0_TS);
2646
2647 /* Free the CR0 register. */
2648 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2649
2650#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2651 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_WAIT_DEVICE_NOT_AVAILABLE;
2652 }
2653 else
2654 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeWaitDeviceNotAvailXcptCheckOmitted);
2655#endif
2656
2657 return off;
2658}
2659
2660
2661#define IEM_MC_MAYBE_RAISE_FPU_XCPT() \
2662 off = iemNativeEmitMaybeRaiseFpuException(pReNative, off, pCallEntry->idxInstr)
2663
2664/**
2665 * Emits code to check if a \#MF exception should be raised.
2666 *
2667 * @returns New code buffer offset, UINT32_MAX on failure.
2668 * @param pReNative The native recompile state.
2669 * @param off The code buffer offset.
2670 * @param idxInstr The current instruction.
2671 */
2672DECL_INLINE_THROW(uint32_t)
2673iemNativeEmitMaybeRaiseFpuException(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2674{
2675 /*
2676 * Make sure we don't have any outstanding guest register writes as we may
2677 * raise an #MF and all guest register must be up to date in CPUMCTX.
2678 */
2679 /** @todo r=aeichner Can we postpone this to the RaiseMf path? */
2680 off = iemNativeRegFlushPendingWrites(pReNative, off);
2681
2682#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2683 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2684#else
2685 RT_NOREF(idxInstr);
2686#endif
2687
2688 /* Allocate a temporary FSW register. */
2689 uint8_t const idxFpuFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw,
2690 kIemNativeGstRegUse_ReadOnly);
2691
2692 /*
2693 * if (FSW & X86_FSW_ES != 0)
2694 * return raisexcpt();
2695 */
2696 /* Test and jump. */
2697 off = iemNativeEmitTbExitIfBitSetInGpr<kIemNativeLabelType_RaiseMf>(pReNative, off, idxFpuFswReg, X86_FSW_ES_BIT);
2698
2699 /* Free but don't flush the FSW register. */
2700 iemNativeRegFreeTmp(pReNative, idxFpuFswReg);
2701
2702 return off;
2703}
2704
2705
2706#define IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT() \
2707 off = iemNativeEmitMaybeRaiseSseRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
2708
2709/**
2710 * Emits code to check if a SSE exception (either \#UD or \#NM) should be raised.
2711 *
2712 * @returns New code buffer offset, UINT32_MAX on failure.
2713 * @param pReNative The native recompile state.
2714 * @param off The code buffer offset.
2715 * @param idxInstr The current instruction.
2716 */
2717DECL_INLINE_THROW(uint32_t)
2718iemNativeEmitMaybeRaiseSseRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2719{
2720#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2721 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeSseXcptCheckPotential);
2722
2723 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE))
2724 {
2725#endif
2726 /*
2727 * Make sure we don't have any outstanding guest register writes as we may
2728 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
2729 */
2730 /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
2731 off = iemNativeRegFlushPendingWrites(pReNative, off);
2732
2733#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2734 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2735#else
2736 RT_NOREF(idxInstr);
2737#endif
2738
2739 /* Allocate a temporary CR0 and CR4 register. */
2740 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
2741 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
2742 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2743
2744 AssertCompile(!((X86_CR0_EM | X86_CR0_TS) & X86_CR4_OSFXSR));
2745#ifdef RT_ARCH_AMD64
2746 /*
2747 * We do a modified test here:
2748 * if (!(((cr4 & X86_CR4_OSFXSR) | cr0) ^ X86_CR4_OSFXSR)) { likely }
2749 * else { goto RaiseSseRelated; }
2750 * This ASSUMES that CR0[bit 9] is always zero. This is the case on
2751 * all targets except the 386, which doesn't support SSE, this should
2752 * be a safe assumption.
2753 */
2754 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off,
2755 1+6+3+3+7+7+6 + IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS);
2756 //pCodeBuf[off++] = 0xcc;
2757 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, X86_CR4_OSFXSR); /* Isolate CR4.OSFXSR as CR4.TSD and */
2758 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxTmpReg, idxCr4Reg); /* CR4.DE would overlap the CR0 bits. */
2759 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, idxTmpReg, idxCr0Reg);
2760 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_CR0_EM | X86_CR0_TS | X86_CR4_OSFXSR);
2761 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_CR4_OSFXSR);
2762 off = iemNativeEmitTbExitJccEx<kIemNativeLabelType_RaiseSseRelated>(pReNative, pCodeBuf, off, kIemNativeInstrCond_ne);
2763
2764#elif defined(RT_ARCH_ARM64)
2765 /*
2766 * We do a modified test here:
2767 * if (!((cr0 & (X86_CR0_EM | X86_CR0_TS)) | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1) ^ 1))) { likely }
2768 * else { goto RaiseSseRelated; }
2769 */
2770 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off,
2771 1+5 + IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS);
2772 //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
2773 Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - X86_CR0_EM_BIT) == (X86_CR0_EM | X86_CR0_TS));
2774 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxCr0Reg, 1, 32 - X86_CR0_EM_BIT, false /*f64Bit*/);
2775 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSFXSR_BIT, 1, false /*f64Bit*/);
2776 /* -> idxTmpReg[0]=OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
2777 Assert(Armv8A64ConvertImmRImmS2Mask32(0, 0) == 1);
2778 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 0, 0, false /*f64Bit*/);
2779 /* -> idxTmpReg[0]=~OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
2780 off = iemNativeEmitTbExitIfGprIsNotZeroEx<kIemNativeLabelType_RaiseSseRelated>(pReNative, pCodeBuf, off,
2781 idxTmpReg, false /*f64Bit*/);
2782
2783#else
2784# error "Port me!"
2785#endif
2786
2787 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2788 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2789 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2790 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
2791
2792#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2793 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE;
2794 }
2795 else
2796 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeSseXcptCheckOmitted);
2797#endif
2798
2799 return off;
2800}
2801
2802
2803#define IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT() \
2804 off = iemNativeEmitMaybeRaiseAvxRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
2805
2806/**
2807 * Emits code to check if a AVX exception (either \#UD or \#NM) should be raised.
2808 *
2809 * @returns New code buffer offset, UINT32_MAX on failure.
2810 * @param pReNative The native recompile state.
2811 * @param off The code buffer offset.
2812 * @param idxInstr The current instruction.
2813 */
2814DECL_INLINE_THROW(uint32_t)
2815iemNativeEmitMaybeRaiseAvxRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
2816{
2817#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2818 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeAvxXcptCheckPotential);
2819
2820 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX))
2821 {
2822#endif
2823 /*
2824 * Make sure we don't have any outstanding guest register writes as we may
2825 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
2826 */
2827 /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
2828 off = iemNativeRegFlushPendingWrites(pReNative, off);
2829
2830#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2831 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2832#else
2833 RT_NOREF(idxInstr);
2834#endif
2835
2836 /* Allocate a temporary CR0, CR4 and XCR0 register. */
2837 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
2838 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
2839 uint8_t const idxXcr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Xcr0);
2840 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
2841
2842 /*
2843 * We have the following in IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT:
2844 * if (RT_LIKELY( ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE))
2845 * | (pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE)
2846 * | (pVCpu->cpum.GstCtx.cr0 & X86_CR0_TS))
2847 * == (XSAVE_C_YMM | XSAVE_C_SSE | X86_CR4_OSXSAVE)))
2848 * { likely }
2849 * else { goto RaiseAvxRelated; }
2850 */
2851#ifdef RT_ARCH_AMD64
2852 /* if (!( ( ((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) << 2)
2853 | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1) << 1)
2854 | ((cr0 >> X86_CR0_TS_BIT) & 1) )
2855 ^ 0x1a) ) { likely }
2856 else { goto RaiseAvxRelated; } */
2857 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off,
2858 1+6+3+5+3+5+3+7+6 + IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS);
2859 //pCodeBuf[off++] = 0xcc;
2860 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, XSAVE_C_YMM | XSAVE_C_SSE);
2861 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxTmpReg, idxXcr0Reg);
2862 off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off, idxCr4Reg, X86_CR4_OSXSAVE_BIT);
2863 off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
2864 /* -> idxTmpReg[0]=CR4.OSXSAVE; idxTmpReg[1]=0; idxTmpReg[2]=SSE; idxTmpReg[3]=YMM; (the rest is zero) */
2865 off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off, idxCr0Reg, X86_CR0_TS_BIT);
2866 off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
2867 /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=SSE; idxTmpReg[4]=YMM; */
2868 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg, ((XSAVE_C_YMM | XSAVE_C_SSE) << 2) | 2);
2869 /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=~SSE; idxTmpReg[4]=~YMM; */
2870 off = iemNativeEmitTbExitJccEx<kIemNativeLabelType_RaiseAvxRelated>(pReNative, pCodeBuf, off, kIemNativeInstrCond_ne);
2871
2872#elif defined(RT_ARCH_ARM64)
2873 /* if (!( (((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) | ((cr4 >> X86_CR4_OSFXSR_BIT) & 1)) ^ 7) << 1)
2874 | ((cr0 >> X86_CR0_TS_BIT) & 1) ) { likely }
2875 else { goto RaiseAvxRelated; } */
2876 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off,
2877 1+6 + IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS);
2878 //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
2879 Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - XSAVE_C_SSE_BIT) == (XSAVE_C_YMM | XSAVE_C_SSE));
2880 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxXcr0Reg, 1, 32 - XSAVE_C_SSE_BIT, false /*f64Bit*/);
2881 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSXSAVE_BIT, 1, false /*f64Bit*/);
2882 /* -> idxTmpReg[0]=CR4.OSXSAVE; idxTmpReg[1]=SSE; idxTmpReg[2]=YMM; (the rest is zero) */
2883 Assert(Armv8A64ConvertImmRImmS2Mask32(2, 0) == 7);
2884 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 2, 0, false /*f64Bit*/);
2885 /* -> idxTmpReg[0]=~CR4.OSXSAVE; idxTmpReg[1]=~SSE; idxTmpReg[2]=~YMM; (the rest is zero) */
2886 pCodeBuf[off++] = Armv8A64MkInstrLslImm(idxTmpReg, idxTmpReg, 1, false /*f64Bit*/);
2887 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr0Reg, X86_CR0_TS_BIT, 1, false /*f64Bit*/);
2888 /* -> idxTmpReg[0]=CR0.TS; idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=~SSE; idxTmpReg[3]=~YMM; (the rest is zero) */
2889 off = iemNativeEmitTbExitIfGprIsNotZeroEx<kIemNativeLabelType_RaiseAvxRelated>(pReNative, pCodeBuf, off,
2890 idxTmpReg, false /*f64Bit*/);
2891
2892#else
2893# error "Port me!"
2894#endif
2895
2896 iemNativeRegFreeTmp(pReNative, idxTmpReg);
2897 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
2898 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
2899 iemNativeRegFreeTmp(pReNative, idxXcr0Reg);
2900#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2901 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
2902 }
2903 else
2904 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeAvxXcptCheckOmitted);
2905#endif
2906
2907 return off;
2908}
2909
2910
2911#define IEM_MC_RAISE_DIVIDE_ERROR_IF_LOCAL_IS_ZERO(a_uVar) \
2912 off = iemNativeEmitRaiseDivideErrorIfLocalIsZero(pReNative, off, a_uVar, pCallEntry->idxInstr)
2913
2914/**
2915 * Emits code to raise a \#DE if a local variable is zero.
2916 *
2917 * @returns New code buffer offset, UINT32_MAX on failure.
2918 * @param pReNative The native recompile state.
2919 * @param off The code buffer offset.
2920 * @param idxVar The variable to check. This must be 32-bit (EFLAGS).
2921 * @param idxInstr The current instruction.
2922 */
2923DECL_INLINE_THROW(uint32_t)
2924iemNativeEmitRaiseDivideErrorIfLocalIsZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t idxInstr)
2925{
2926 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
2927 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, sizeof(uint32_t));
2928
2929 /* Make sure we don't have any outstanding guest register writes as we may. */
2930 off = iemNativeRegFlushPendingWrites(pReNative, off);
2931
2932 /* Set the instruction number if we're counting. */
2933#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2934 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2935#else
2936 RT_NOREF(idxInstr);
2937#endif
2938
2939 /* Do the job we're here for. */
2940 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off);
2941 off = iemNativeEmitTbExitIfGprIsZero<kIemNativeLabelType_RaiseDe>(pReNative, off, idxVarReg, false /*f64Bit*/);
2942 iemNativeVarRegisterRelease(pReNative, idxVar);
2943
2944 return off;
2945}
2946
2947
2948#define IEM_MC_RAISE_GP0_IF_EFF_ADDR_UNALIGNED(a_EffAddr, a_cbAlign) \
2949 off = iemNativeEmitRaiseGp0IfEffAddrUnaligned(pReNative, off, pCallEntry->idxInstr, a_EffAddr, a_cbAlign)
2950
2951/**
2952 * Emits code to raise a \#GP(0) if the given variable contains an unaligned address.
2953 *
2954 * @returns New code buffer offset, UINT32_MAX on failure.
2955 * @param pReNative The native recompile state.
2956 * @param off The code buffer offset.
2957 * @param idxInstr The current instruction.
2958 * @param idxVarEffAddr Index of the variable containing the effective address to check.
2959 * @param cbAlign The alignment in bytes to check against.
2960 */
2961DECL_INLINE_THROW(uint32_t)
2962iemNativeEmitRaiseGp0IfEffAddrUnaligned(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr,
2963 uint8_t idxVarEffAddr, uint8_t cbAlign)
2964{
2965 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEffAddr);
2966 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEffAddr, sizeof(RTGCPTR));
2967
2968 /*
2969 * Make sure we don't have any outstanding guest register writes as we may throw an exception.
2970 */
2971 off = iemNativeRegFlushPendingWrites(pReNative, off);
2972
2973#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2974 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
2975#else
2976 RT_NOREF(idxInstr);
2977#endif
2978
2979 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarEffAddr, &off);
2980 off = iemNativeEmitTbExitIfAnyBitsSetInGpr<kIemNativeLabelType_RaiseGp0>(pReNative, off, idxVarReg, cbAlign - 1);
2981 iemNativeVarRegisterRelease(pReNative, idxVarEffAddr);
2982
2983 return off;
2984}
2985
2986
2987/*********************************************************************************************************************************
2988* Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF) *
2989*********************************************************************************************************************************/
2990
2991/**
2992 * Pushes an IEM_MC_IF_XXX onto the condition stack.
2993 *
2994 * @returns Pointer to the condition stack entry on success, NULL on failure
2995 * (too many nestings)
2996 */
2997DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative)
2998{
2999 uint32_t const idxStack = pReNative->cCondDepth;
3000 AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
3001
3002 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
3003 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
3004
3005 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
3006 pEntry->fInElse = false;
3007 pEntry->fIfExitTb = false;
3008 pEntry->fElseExitTb = false;
3009 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
3010 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
3011
3012 return pEntry;
3013}
3014
3015
3016/**
3017 * Start of the if-block, snapshotting the register and variable state.
3018 */
3019DECL_INLINE_THROW(void)
3020iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
3021{
3022 Assert(offIfBlock != UINT32_MAX);
3023 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
3024 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
3025 Assert(!pEntry->fInElse);
3026
3027 /* Define the start of the IF block if request or for disassembly purposes. */
3028 if (idxLabelIf != UINT32_MAX)
3029 iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
3030#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3031 else
3032 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
3033#else
3034 RT_NOREF(offIfBlock);
3035#endif
3036
3037 /* Copy the initial state so we can restore it in the 'else' block. */
3038 pEntry->InitialState = pReNative->Core;
3039}
3040
3041
3042#define IEM_MC_ELSE() } while (0); \
3043 off = iemNativeEmitElse(pReNative, off); \
3044 do {
3045
3046/** Emits code related to IEM_MC_ELSE. */
3047DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3048{
3049 /* Check sanity and get the conditional stack entry. */
3050 Assert(off != UINT32_MAX);
3051 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
3052 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
3053 Assert(!pEntry->fInElse);
3054
3055 /* We can skip dirty register flushing and the dirty register flushing if
3056 the branch already jumped to a TB exit. */
3057 if (!pEntry->fIfExitTb)
3058 {
3059#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK) && 0
3060 /* Writeback any dirty shadow registers. */
3061 /** @todo r=aeichner Possible optimization is to only writeback guest registers which became dirty
3062 * in one of the branches and leave guest registers already dirty before the start of the if
3063 * block alone. */
3064 off = iemNativeRegFlushDirtyGuest(pReNative, off);
3065#endif
3066
3067 /* Jump to the endif. */
3068 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
3069 }
3070# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3071 else
3072 Assert(pReNative->Core.offPc == 0);
3073# endif
3074
3075 /* Define the else label and enter the else part of the condition. */
3076 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
3077 pEntry->fInElse = true;
3078
3079 /* Snapshot the core state so we can do a merge at the endif and restore
3080 the snapshot we took at the start of the if-block. */
3081 pEntry->IfFinalState = pReNative->Core;
3082 pReNative->Core = pEntry->InitialState;
3083
3084 return off;
3085}
3086
3087
3088#define IEM_MC_ENDIF() } while (0); \
3089 off = iemNativeEmitEndIf(pReNative, off)
3090
3091/** Emits code related to IEM_MC_ENDIF. */
3092DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3093{
3094 /* Check sanity and get the conditional stack entry. */
3095 Assert(off != UINT32_MAX);
3096 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
3097 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
3098
3099#if defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) && 0
3100 off = iemNativeRegFlushDirtyGuest(pReNative, off);
3101#endif
3102
3103 /*
3104 * If either of the branches exited the TB, we can take the state from the
3105 * other branch and skip all the merging headache.
3106 */
3107 bool fDefinedLabels = false;
3108 if (pEntry->fElseExitTb || pEntry->fIfExitTb)
3109 {
3110#ifdef VBOX_STRICT
3111 Assert(pReNative->cCondDepth == 1); /* Assuming this only happens in simple conditional structures. */
3112 Assert(pEntry->fElseExitTb != pEntry->fIfExitTb); /* Assuming we don't have any code where both branches exits. */
3113 PCIEMNATIVECORESTATE const pExitCoreState = pEntry->fIfExitTb && pEntry->fInElse
3114 ? &pEntry->IfFinalState : &pReNative->Core;
3115# ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3116 Assert(pExitCoreState->bmGstRegShadowDirty == 0);
3117# endif
3118# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3119 Assert(pExitCoreState->offPc == 0);
3120# endif
3121 RT_NOREF(pExitCoreState);
3122#endif
3123
3124 if (!pEntry->fIfExitTb)
3125 {
3126 Assert(pEntry->fInElse);
3127 pReNative->Core = pEntry->IfFinalState;
3128 }
3129 }
3130 else
3131 {
3132 /*
3133 * Now we have find common group with the core state at the end of the
3134 * if-final. Use the smallest common denominator and just drop anything
3135 * that isn't the same in both states.
3136 */
3137 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
3138 * which is why we're doing this at the end of the else-block.
3139 * But we'd need more info about future for that to be worth the effort. */
3140 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
3141#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
3142 AssertMsgStmt(pReNative->Core.offPc == pOther->offPc,
3143 ("Core.offPc=%#RX64 pOther->offPc=%#RX64\n", pReNative->Core.offPc, pOther->offPc),
3144 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
3145#endif
3146
3147 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
3148 {
3149#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3150 /*
3151 * If the branch has differences in dirty shadow registers, we will flush
3152 * the register only dirty in the current branch and dirty any that's only
3153 * dirty in the other one.
3154 */
3155 uint64_t const fGstRegDirtyOther = pOther->bmGstRegShadowDirty;
3156 uint64_t const fGstRegDirtyThis = pReNative->Core.bmGstRegShadowDirty;
3157 uint64_t const fGstRegDirtyDiff = fGstRegDirtyOther ^ fGstRegDirtyThis;
3158 uint64_t const fGstRegDirtyHead = fGstRegDirtyThis & fGstRegDirtyDiff;
3159 uint64_t fGstRegDirtyTail = fGstRegDirtyOther & fGstRegDirtyDiff;
3160 if (!fGstRegDirtyDiff)
3161 { /* likely */ }
3162 else
3163 {
3164 //uint64_t const fGstRegDirtyHead = pReNative->Core.bmGstRegShadowDirty & fGstRegDirtyDiff;
3165 if (fGstRegDirtyHead)
3166 {
3167 Log12(("iemNativeEmitEndIf: flushing dirty guest registers in current branch: %RX64\n", fGstRegDirtyHead));
3168 off = iemNativeRegFlushDirtyGuest(pReNative, off, fGstRegDirtyHead);
3169 }
3170 }
3171#endif
3172
3173 /*
3174 * Shadowed guest registers.
3175 *
3176 * We drop any shadows where the two states disagree about where
3177 * things are kept. We may end up flushing dirty more registers
3178 * here, if the two branches keeps things in different registers.
3179 */
3180 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
3181 if (fGstRegs)
3182 {
3183 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
3184 do
3185 {
3186 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
3187 fGstRegs &= ~RT_BIT_64(idxGstReg);
3188
3189 uint8_t const idxCurHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
3190 uint8_t const idxOtherHstReg = pOther->aidxGstRegShadows[idxGstReg];
3191 if ( idxCurHstReg != idxOtherHstReg
3192 || !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg)))
3193 {
3194#ifndef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3195 Log12(("iemNativeEmitEndIf: dropping gst %s (%d) from hst %s (other %d/%#RX64)\n",
3196 g_aGstShadowInfo[idxGstReg].pszName, idxGstReg, g_apszIemNativeHstRegNames[idxCurHstReg],
3197 idxOtherHstReg, pOther->bmGstRegShadows));
3198#else
3199 Log12(("iemNativeEmitEndIf: dropping %s gst %s (%d) from hst %s (other %d/%#RX64/%s)\n",
3200 pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg) ? "_dirty_" : "clean",
3201 g_aGstShadowInfo[idxGstReg].pszName, idxGstReg, g_apszIemNativeHstRegNames[idxCurHstReg],
3202 idxOtherHstReg, pOther->bmGstRegShadows,
3203 pOther->bmGstRegShadowDirty & RT_BIT_64(idxGstReg) ? "dirty" : "clean"));
3204 if (pOther->bmGstRegShadowDirty & RT_BIT_64(idxGstReg))
3205 fGstRegDirtyTail |= RT_BIT_64(idxGstReg);
3206 if (pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg))
3207 off = iemNativeRegFlushPendingWrite(pReNative, off, (IEMNATIVEGSTREG)idxGstReg);
3208#endif
3209 iemNativeRegClearGstRegShadowingOne(pReNative, idxCurHstReg, (IEMNATIVEGSTREG)idxGstReg, off);
3210 }
3211 } while (fGstRegs);
3212 }
3213 else
3214 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
3215
3216#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3217 /*
3218 * Generate jumpy code for flushing dirty registers from the other
3219 * branch that aren't dirty in the current one.
3220 */
3221 if (!fGstRegDirtyTail)
3222 { /* likely */ }
3223 else
3224 {
3225 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeEndIfOtherBranchDirty);
3226 Log12(("iemNativeEmitEndIf: Dirty register only in the other branch: %#RX64 - BAD!\n", fGstRegDirtyTail));
3227
3228 /* First the current branch has to jump over the dirty flushing from the other branch. */
3229 uint32_t const offFixup1 = off;
3230 off = iemNativeEmitJmpToFixed(pReNative, off, off + 10);
3231
3232 /* Put the endif and maybe else label here so the other branch ends up here. */
3233 if (!pEntry->fInElse)
3234 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
3235 else
3236 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
3237 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
3238 fDefinedLabels = true;
3239
3240 /* Flush the dirty guest registers from the other branch. */
3241 while (fGstRegDirtyTail)
3242 {
3243 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegDirtyTail) - 1;
3244 fGstRegDirtyTail &= ~RT_BIT_64(idxGstReg);
3245 Log12(("iemNativeEmitEndIf: tail flushing %s (%d) from other branch %d (cur %d/%#RX64)\n",
3246 g_aGstShadowInfo[idxGstReg].pszName, idxGstReg, pOther->aidxGstRegShadows[idxGstReg],
3247 pReNative->Core.aidxGstRegShadows[idxGstReg], pReNative->Core.bmGstRegShadows));
3248
3249 off = iemNativeRegFlushPendingWriteEx(pReNative, off, (PIEMNATIVECORESTATE)pOther, (IEMNATIVEGSTREG)idxGstReg);
3250
3251 /* Mismatching shadowing should've been dropped in the previous step already. */
3252 Assert( !(pReNative->Core.bmGstRegShadows & RT_BIT_64(idxGstReg))
3253 || pReNative->Core.aidxGstRegShadows[idxGstReg] == pOther->aidxGstRegShadows[idxGstReg]);
3254 }
3255
3256 /* Here is the actual endif label, fixup the above jump to land here. */
3257 iemNativeFixupFixedJump(pReNative, offFixup1, off);
3258 }
3259#endif
3260
3261 /*
3262 * Check variables next. For now we must require them to be identical
3263 * or stuff we can recreate. (No code is emitted here.)
3264 */
3265 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
3266#ifdef VBOX_STRICT
3267 uint32_t const offAssert = off;
3268#endif
3269 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
3270 if (fVars)
3271 {
3272 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
3273 do
3274 {
3275 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
3276 fVars &= ~RT_BIT_32(idxVar);
3277
3278 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
3279 {
3280 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
3281 continue;
3282 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
3283 {
3284 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
3285 if (idxHstReg != UINT8_MAX)
3286 {
3287 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3288 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3289 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x\n",
3290 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
3291 }
3292 continue;
3293 }
3294 }
3295 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
3296 continue;
3297
3298 /* Irreconcilable, so drop it. */
3299 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
3300 if (idxHstReg != UINT8_MAX)
3301 {
3302 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3303 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3304 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x (also dropped)\n",
3305 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
3306 }
3307 Log11(("iemNativeEmitEndIf: Freeing variable #%u/%#x\n", idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
3308 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
3309 } while (fVars);
3310 }
3311 Assert(off == offAssert);
3312
3313 /*
3314 * Finally, check that the host register allocations matches.
3315 */
3316 AssertMsgStmt((pReNative->Core.bmHstRegs & (pReNative->Core.bmHstRegs ^ pOther->bmHstRegs)) == 0,
3317 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
3318 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
3319 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
3320 }
3321 }
3322
3323 /*
3324 * Define the endif label and maybe the else one if we're still in the 'if' part.
3325 */
3326 if (!fDefinedLabels)
3327 {
3328 if (!pEntry->fInElse)
3329 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
3330 else
3331 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
3332 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
3333 }
3334
3335 /* Pop the conditional stack.*/
3336 pReNative->cCondDepth -= 1;
3337
3338 return off;
3339}
3340
3341
3342/**
3343 * Helper function to convert X86_EFL_xxx masks to liveness masks.
3344 *
3345 * The compiler should be able to figure this out at compile time, so sprinkling
3346 * constexpr where ever possible here to nudge it along.
3347 */
3348template<uint32_t const a_fEfl>
3349RT_CONSTEXPR uint64_t iemNativeEflagsToLivenessMask(void)
3350{
3351 return (a_fEfl & ~X86_EFL_STATUS_BITS ? RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_OTHER) : 0)
3352 | (a_fEfl & X86_EFL_CF ? RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_CF) : 0)
3353 | (a_fEfl & X86_EFL_PF ? RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_PF) : 0)
3354 | (a_fEfl & X86_EFL_AF ? RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_AF) : 0)
3355 | (a_fEfl & X86_EFL_ZF ? RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_ZF) : 0)
3356 | (a_fEfl & X86_EFL_SF ? RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_SF) : 0)
3357 | (a_fEfl & X86_EFL_OF ? RT_BIT_64(IEMLIVENESSBIT_IDX_EFL_OF) : 0);
3358}
3359
3360
3361/**
3362 * Helper function to convert a single X86_EFL_xxxx value to bit number.
3363 *
3364 * The compiler should be able to figure this out at compile time, so sprinkling
3365 * constexpr where ever possible here to nudge it along.
3366 */
3367template<uint32_t const a_fEfl>
3368RT_CONSTEXPR unsigned iemNativeEflagsToSingleBitNo(void)
3369{
3370 AssertCompile( a_fEfl == X86_EFL_CF
3371 || a_fEfl == X86_EFL_PF
3372 || a_fEfl == X86_EFL_AF
3373 || a_fEfl == X86_EFL_ZF
3374 || a_fEfl == X86_EFL_SF
3375 || a_fEfl == X86_EFL_OF
3376 || a_fEfl == X86_EFL_DF);
3377 return a_fEfl == X86_EFL_CF ? X86_EFL_CF_BIT
3378 : a_fEfl == X86_EFL_PF ? X86_EFL_PF_BIT
3379 : a_fEfl == X86_EFL_AF ? X86_EFL_AF_BIT
3380 : a_fEfl == X86_EFL_ZF ? X86_EFL_ZF_BIT
3381 : a_fEfl == X86_EFL_SF ? X86_EFL_SF_BIT
3382 : a_fEfl == X86_EFL_OF ? X86_EFL_OF_BIT
3383 : X86_EFL_DF_BIT;
3384}
3385
3386
3387#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
3388 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits), iemNativeEflagsToLivenessMask<a_fBits>()); \
3389 do {
3390
3391/** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
3392DECL_INLINE_THROW(uint32_t)
3393iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl, uint64_t fLivenessEflBits)
3394{
3395 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, fBitsInEfl);
3396 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitsInEfl);
3397 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3398
3399 /* Get the eflags. */
3400 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsReadOnly(pReNative, &off, fLivenessEflBits);
3401
3402 /* Test and jump. */
3403 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
3404
3405 /* Free but don't flush the EFlags register. */
3406 iemNativeRegFreeTmp(pReNative, idxEflReg);
3407
3408 /* Make a copy of the core state now as we start the if-block. */
3409 iemNativeCondStartIfBlock(pReNative, off);
3410
3411 return off;
3412}
3413
3414
3415#define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
3416 off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits), iemNativeEflagsToLivenessMask<a_fBits>()); \
3417 do {
3418
3419/** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
3420DECL_INLINE_THROW(uint32_t)
3421iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl, uint64_t fLivenessEflBits)
3422{
3423 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, fBitsInEfl);
3424 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitsInEfl);
3425 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3426
3427 /* Get the eflags. */
3428 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsReadOnly(pReNative, &off, fLivenessEflBits);
3429
3430 /* Test and jump. */
3431 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
3432
3433 /* Free but don't flush the EFlags register. */
3434 iemNativeRegFreeTmp(pReNative, idxEflReg);
3435
3436 /* Make a copy of the core state now as we start the if-block. */
3437 iemNativeCondStartIfBlock(pReNative, off);
3438
3439 return off;
3440}
3441
3442
3443#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
3444 off = iemNativeEmitIfEflagsBitSet(pReNative, off, iemNativeEflagsToSingleBitNo<a_fBit>(), \
3445 iemNativeEflagsToLivenessMask<a_fBit>()); \
3446 do {
3447
3448/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
3449DECL_INLINE_THROW(uint32_t)
3450iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, unsigned iBitNo, uint64_t fLivenessEflBit)
3451{
3452 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, RT_BIT_32(iBitNo));
3453 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, RT_BIT_32(iBitNo));
3454 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3455
3456 /* Get the eflags. */
3457 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsReadOnly(pReNative, &off, fLivenessEflBit);
3458
3459 /* Test and jump. */
3460 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
3461
3462 /* Free but don't flush the EFlags register. */
3463 iemNativeRegFreeTmp(pReNative, idxEflReg);
3464
3465 /* Make a copy of the core state now as we start the if-block. */
3466 iemNativeCondStartIfBlock(pReNative, off);
3467
3468 return off;
3469}
3470
3471
3472#define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
3473 off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, iemNativeEflagsToSingleBitNo<a_fBit>(), \
3474 iemNativeEflagsToLivenessMask<a_fBit>()); \
3475 do {
3476
3477/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
3478DECL_INLINE_THROW(uint32_t)
3479iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, unsigned iBitNo, uint64_t fLivenessEflBit)
3480{
3481 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, RT_BIT_32(iBitNo));
3482 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, RT_BIT_32(iBitNo));
3483 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3484
3485 /* Get the eflags. */
3486 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsReadOnly(pReNative, &off, fLivenessEflBit);
3487
3488 /* Test and jump. */
3489 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
3490
3491 /* Free but don't flush the EFlags register. */
3492 iemNativeRegFreeTmp(pReNative, idxEflReg);
3493
3494 /* Make a copy of the core state now as we start the if-block. */
3495 iemNativeCondStartIfBlock(pReNative, off);
3496
3497 return off;
3498}
3499
3500
3501#define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2) \
3502 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, false /*fInverted*/, \
3503 iemNativeEflagsToSingleBitNo<a_fBit1>(), \
3504 iemNativeEflagsToSingleBitNo<a_fBit2>(), \
3505 iemNativeEflagsToLivenessMask<a_fBit1 | a_fBit2>()); \
3506 do {
3507
3508#define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2) \
3509 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, true /*fInverted*/, \
3510 iemNativeEflagsToSingleBitNo<a_fBit1>(), \
3511 iemNativeEflagsToSingleBitNo<a_fBit2>(), \
3512 iemNativeEflagsToLivenessMask<a_fBit1 | a_fBit2>()); \
3513 do {
3514
3515/** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
3516DECL_INLINE_THROW(uint32_t)
3517iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3518 bool fInverted, unsigned iBitNo1, unsigned iBitNo2, uint64_t fLivenessEflBits)
3519{
3520 Assert(iBitNo1 != iBitNo2);
3521 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, RT_BIT_32(iBitNo1) | RT_BIT_32(iBitNo2));
3522 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, RT_BIT_32(iBitNo1) | RT_BIT_32(iBitNo2));
3523 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3524
3525 /* Get the eflags. */
3526 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsReadOnly(pReNative, &off, fLivenessEflBits);
3527
3528#ifdef RT_ARCH_AMD64
3529 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, RT_BIT_64(iBitNo1));
3530
3531 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
3532 if (iBitNo1 > iBitNo2)
3533 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
3534 else
3535 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
3536 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
3537
3538#elif defined(RT_ARCH_ARM64)
3539 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3540 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
3541
3542 /* and tmpreg, eflreg, #1<<iBitNo1 */
3543 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
3544
3545 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
3546 if (iBitNo1 > iBitNo2)
3547 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
3548 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
3549 else
3550 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
3551 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
3552
3553 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3554
3555#else
3556# error "Port me"
3557#endif
3558
3559 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
3560 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
3561 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
3562
3563 /* Free but don't flush the EFlags and tmp registers. */
3564 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3565 iemNativeRegFreeTmp(pReNative, idxEflReg);
3566
3567 /* Make a copy of the core state now as we start the if-block. */
3568 iemNativeCondStartIfBlock(pReNative, off);
3569
3570 return off;
3571}
3572
3573
3574#define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
3575 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, false /*fInverted*/, \
3576 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3577 iemNativeEflagsToSingleBitNo<a_fBit1>(), \
3578 iemNativeEflagsToSingleBitNo<a_fBit2>(), \
3579 iemNativeEflagsToLivenessMask<a_fBit | a_fBit1 | a_fBit2>()); \
3580 do {
3581
3582#define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
3583 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, true /*fInverted*/, \
3584 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3585 iemNativeEflagsToSingleBitNo<a_fBit1>(), \
3586 iemNativeEflagsToSingleBitNo<a_fBit2>(), \
3587 iemNativeEflagsToLivenessMask<a_fBit | a_fBit1 | a_fBit2>()); \
3588 do {
3589
3590/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
3591 * IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
3592DECL_INLINE_THROW(uint32_t)
3593iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fInverted,
3594 unsigned iBitNo, unsigned iBitNo1, unsigned iBitNo2, uint64_t fLivenessEflBits)
3595{
3596 Assert(iBitNo1 != iBitNo);
3597 Assert(iBitNo2 != iBitNo);
3598 Assert(iBitNo2 != iBitNo1);
3599 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, RT_BIT_32(iBitNo) | RT_BIT_32(iBitNo1) | RT_BIT_32(iBitNo2));
3600 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, RT_BIT_32(iBitNo) | RT_BIT_32(iBitNo1) | RT_BIT_32(iBitNo2));
3601 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3602
3603 /* We need an if-block label for the non-inverted variant. */
3604 uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
3605 pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
3606
3607 /* Get the eflags. */
3608 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsReadOnly(pReNative, &off, fLivenessEflBits);
3609
3610#ifdef RT_ARCH_AMD64
3611 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, RT_BIT_64(iBitNo1)); /* This must come before we jump anywhere! */
3612#elif defined(RT_ARCH_ARM64)
3613 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3614#endif
3615
3616 /* Check for the lone bit first. */
3617 if (!fInverted)
3618 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
3619 else
3620 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
3621
3622 /* Then extract and compare the other two bits. */
3623#ifdef RT_ARCH_AMD64
3624 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
3625 if (iBitNo1 > iBitNo2)
3626 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
3627 else
3628 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
3629 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
3630
3631#elif defined(RT_ARCH_ARM64)
3632 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
3633
3634 /* and tmpreg, eflreg, #1<<iBitNo1 */
3635 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
3636
3637 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
3638 if (iBitNo1 > iBitNo2)
3639 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
3640 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
3641 else
3642 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
3643 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
3644
3645 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3646
3647#else
3648# error "Port me"
3649#endif
3650
3651 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
3652 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
3653 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
3654
3655 /* Free but don't flush the EFlags and tmp registers. */
3656 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3657 iemNativeRegFreeTmp(pReNative, idxEflReg);
3658
3659 /* Make a copy of the core state now as we start the if-block. */
3660 iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
3661
3662 return off;
3663}
3664
3665
3666#define IEM_MC_IF_CX_IS_NZ() \
3667 off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
3668 do {
3669
3670/** Emits code for IEM_MC_IF_CX_IS_NZ. */
3671DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3672{
3673 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3674
3675 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3676 kIemNativeGstRegUse_ReadOnly);
3677 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
3678 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3679
3680 iemNativeCondStartIfBlock(pReNative, off);
3681 return off;
3682}
3683
3684
3685#define IEM_MC_IF_ECX_IS_NZ() \
3686 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
3687 do {
3688
3689#define IEM_MC_IF_RCX_IS_NZ() \
3690 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
3691 do {
3692
3693/** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
3694DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
3695{
3696 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3697
3698 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3699 kIemNativeGstRegUse_ReadOnly);
3700 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
3701 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3702
3703 iemNativeCondStartIfBlock(pReNative, off);
3704 return off;
3705}
3706
3707
3708#define IEM_MC_IF_CX_IS_NOT_ONE() \
3709 off = iemNativeEmitIfCxIsNotOne(pReNative, off); \
3710 do {
3711
3712/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE. */
3713DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off)
3714{
3715 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3716
3717 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3718 kIemNativeGstRegUse_ReadOnly);
3719#ifdef RT_ARCH_AMD64
3720 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3721#else
3722 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3723 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
3724 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3725#endif
3726 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3727
3728 iemNativeCondStartIfBlock(pReNative, off);
3729 return off;
3730}
3731
3732
3733#define IEM_MC_IF_ECX_IS_NOT_ONE() \
3734 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, false /*f64Bit*/); \
3735 do {
3736
3737#define IEM_MC_IF_RCX_IS_NOT_ONE() \
3738 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, true /*f64Bit*/); \
3739 do {
3740
3741/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE and IEM_MC_IF_RCX_IS_NOT_ONE. */
3742DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
3743{
3744 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3745
3746 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3747 kIemNativeGstRegUse_ReadOnly);
3748 if (f64Bit)
3749 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3750 else
3751 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3752 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3753
3754 iemNativeCondStartIfBlock(pReNative, off);
3755 return off;
3756}
3757
3758
3759#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
3760 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, true /*fCheckIfSet*/, \
3761 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3762 iemNativeEflagsToLivenessMask<a_fBit>()); \
3763 do {
3764
3765#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3766 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, false /*fCheckIfSet*/, \
3767 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3768 iemNativeEflagsToLivenessMask<a_fBit>()); \
3769 do {
3770
3771/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET and
3772 * IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
3773DECL_INLINE_THROW(uint32_t)
3774iemNativeEmitIfCxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
3775 bool fCheckIfSet, unsigned iBitNo, uint64_t fLivenessEflBit)
3776{
3777 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, RT_BIT_32(iBitNo));
3778 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, RT_BIT_32(iBitNo));
3779 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3780
3781 /* We have to load both RCX and EFLAGS before we can start branching,
3782 otherwise we'll end up in the else-block with an inconsistent
3783 register allocator state.
3784 Doing EFLAGS first as it's more likely to be loaded, right? */
3785 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsReadOnly(pReNative, &off, fLivenessEflBit);
3786 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3787 kIemNativeGstRegUse_ReadOnly);
3788
3789 /** @todo we could reduce this to a single branch instruction by spending a
3790 * temporary register and some setnz stuff. Not sure if loops are
3791 * worth it. */
3792 /* Check CX. */
3793#ifdef RT_ARCH_AMD64
3794 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3795#else
3796 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3797 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
3798 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3799#endif
3800
3801 /* Check the EFlags bit. */
3802 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
3803 !fCheckIfSet /*fJmpIfSet*/);
3804
3805 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3806 iemNativeRegFreeTmp(pReNative, idxEflReg);
3807
3808 iemNativeCondStartIfBlock(pReNative, off);
3809 return off;
3810}
3811
3812
3813#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
3814 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, true /*fCheckIfSet*/, false /*f64Bit*/, \
3815 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3816 iemNativeEflagsToLivenessMask<a_fBit>()); \
3817 do {
3818
3819#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3820 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, false /*fCheckIfSet*/, false /*f64Bit*/, \
3821 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3822 iemNativeEflagsToLivenessMask<a_fBit>()); \
3823 do {
3824
3825#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
3826 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, true /*fCheckIfSet*/, true /*f64Bit*/, \
3827 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3828 iemNativeEflagsToLivenessMask<a_fBit>()); \
3829 do {
3830
3831#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
3832 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, false /*fCheckIfSet*/, true /*f64Bit*/, \
3833 iemNativeEflagsToSingleBitNo<a_fBit>(), \
3834 iemNativeEflagsToLivenessMask<a_fBit>()); \
3835 do {
3836
3837/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET,
3838 * IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET,
3839 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET and
3840 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
3841DECL_INLINE_THROW(uint32_t)
3842iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fCheckIfSet, bool f64Bit,
3843 unsigned iBitNo, uint64_t fLivenessEFlBit)
3844
3845{
3846 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, RT_BIT_32(iBitNo));
3847 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, RT_BIT_32(iBitNo));
3848 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3849
3850 /* We have to load both RCX and EFLAGS before we can start branching,
3851 otherwise we'll end up in the else-block with an inconsistent
3852 register allocator state.
3853 Doing EFLAGS first as it's more likely to be loaded, right? */
3854 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsReadOnly(pReNative, &off, fLivenessEFlBit);
3855 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
3856 kIemNativeGstRegUse_ReadOnly);
3857
3858 /** @todo we could reduce this to a single branch instruction by spending a
3859 * temporary register and some setnz stuff. Not sure if loops are
3860 * worth it. */
3861 /* Check RCX/ECX. */
3862 if (f64Bit)
3863 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3864 else
3865 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
3866
3867 /* Check the EFlags bit. */
3868 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
3869 !fCheckIfSet /*fJmpIfSet*/);
3870
3871 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
3872 iemNativeRegFreeTmp(pReNative, idxEflReg);
3873
3874 iemNativeCondStartIfBlock(pReNative, off);
3875 return off;
3876}
3877
3878
3879#define IEM_MC_IF_LOCAL_IS_Z(a_Local) \
3880 off = iemNativeEmitIfLocalIsZ(pReNative, off, a_Local); \
3881 do {
3882
3883/** Emits code for IEM_MC_IF_LOCAL_IS_Z. */
3884DECL_INLINE_THROW(uint32_t)
3885iemNativeEmitIfLocalIsZ(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarLocal)
3886{
3887 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3888
3889 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarLocal);
3890 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarLocal)];
3891 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
3892 AssertStmt(pVarRc->cbVar == sizeof(int32_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
3893
3894 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarLocal, &off);
3895
3896 off = iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(pReNative, off, idxReg, false /*f64Bit*/, pEntry->idxLabelElse);
3897
3898 iemNativeVarRegisterRelease(pReNative, idxVarLocal);
3899
3900 iemNativeCondStartIfBlock(pReNative, off);
3901 return off;
3902}
3903
3904
3905#define IEM_MC_IF_GREG_BIT_SET(a_iGReg, a_iBitNo) \
3906 off = iemNativeEmitIfGregBitSet(pReNative, off, a_iGReg, a_iBitNo); \
3907 do {
3908
3909/** Emits code for IEM_MC_IF_GREG_BIT_SET. */
3910DECL_INLINE_THROW(uint32_t)
3911iemNativeEmitIfGregBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t iBitNo)
3912{
3913 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative);
3914 Assert(iGReg < 16);
3915
3916 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3917 kIemNativeGstRegUse_ReadOnly);
3918
3919 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxGstFullReg, iBitNo, pEntry->idxLabelElse);
3920
3921 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
3922
3923 iemNativeCondStartIfBlock(pReNative, off);
3924 return off;
3925}
3926
3927
3928
3929/*********************************************************************************************************************************
3930* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
3931*********************************************************************************************************************************/
3932
3933#define IEM_MC_NOREF(a_Name) \
3934 RT_NOREF_PV(a_Name)
3935
3936#define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
3937 uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
3938
3939#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
3940 uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
3941
3942#define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_Local, a_iArg) \
3943 uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
3944
3945#define IEM_MC_LOCAL(a_Type, a_Name) \
3946 uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
3947
3948#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
3949 uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
3950
3951#define IEM_MC_LOCAL_ASSIGN(a_Type, a_Name, a_Value) \
3952 uint8_t const a_Name = iemNativeVarAllocAssign(pReNative, &off, sizeof(a_Type), (a_Value))
3953
3954
3955/**
3956 * Sets the host register for @a idxVarRc to @a idxReg.
3957 *
3958 * Any guest register shadowing will be implictly dropped by this call.
3959 *
3960 * The variable must not have any register associated with it (causes
3961 * VERR_IEM_VAR_IPE_10 to be raised). Conversion to a stack variable is
3962 * implied.
3963 *
3964 * @returns idxReg
3965 * @param pReNative The recompiler state.
3966 * @param idxVar The variable.
3967 * @param idxReg The host register (typically IEMNATIVE_CALL_RET_GREG).
3968 * @param off For recording in debug info.
3969 * @param fAllocated Set if the register is already allocated, false if not.
3970 *
3971 * @throws VERR_IEM_VAR_IPE_10, VERR_IEM_VAR_IPE_11
3972 */
3973DECL_INLINE_THROW(uint8_t)
3974iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off, bool fAllocated)
3975{
3976 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3977 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3978 Assert(!pVar->fRegAcquired);
3979 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3980 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_10));
3981 AssertStmt(RT_BOOL(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)) == fAllocated,
3982 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_11));
3983
3984 iemNativeRegClearGstRegShadowing(pReNative, idxReg, off);
3985 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
3986
3987 iemNativeVarSetKindToStack(pReNative, idxVar);
3988 pVar->idxReg = idxReg;
3989
3990 return idxReg;
3991}
3992
3993
3994/**
3995 * A convenient helper function.
3996 */
3997DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSetAndAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
3998 uint8_t idxReg, uint32_t *poff)
3999{
4000 idxReg = iemNativeVarRegisterSet(pReNative, idxVar, idxReg, *poff, false /*fAllocated*/);
4001 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fRegAcquired = true;
4002 return idxReg;
4003}
4004
4005
4006/**
4007 * This is called by IEM_MC_END() to clean up all variables.
4008 */
4009DECL_FORCE_INLINE(void) iemNativeVarFreeAll(PIEMRECOMPILERSTATE pReNative)
4010{
4011 uint32_t const bmVars = pReNative->Core.bmVars;
4012 if (bmVars != 0)
4013 iemNativeVarFreeAllSlow(pReNative, bmVars);
4014 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
4015 Assert(pReNative->Core.bmStack == 0);
4016}
4017
4018
4019#define IEM_MC_FREE_LOCAL(a_Name) iemNativeVarFreeLocal(pReNative, a_Name)
4020
4021/**
4022 * This is called by IEM_MC_FREE_LOCAL.
4023 */
4024DECLINLINE(void) iemNativeVarFreeLocal(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
4025{
4026 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4027 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo == UINT8_MAX);
4028 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
4029}
4030
4031
4032#define IEM_MC_FREE_ARG(a_Name) iemNativeVarFreeArg(pReNative, a_Name)
4033
4034/**
4035 * This is called by IEM_MC_FREE_ARG.
4036 */
4037DECLINLINE(void) iemNativeVarFreeArg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
4038{
4039 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4040 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars));
4041 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
4042}
4043
4044
4045#define IEM_MC_ASSIGN_TO_SMALLER(a_VarDst, a_VarSrcEol) off = iemNativeVarAssignToSmaller(pReNative, off, a_VarDst, a_VarSrcEol)
4046
4047/**
4048 * This is called by IEM_MC_ASSIGN_TO_SMALLER.
4049 */
4050DECL_INLINE_THROW(uint32_t)
4051iemNativeVarAssignToSmaller(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarDst, uint8_t idxVarSrc)
4052{
4053 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarDst);
4054 PIEMNATIVEVAR const pVarDst = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarDst)];
4055 AssertStmt(pVarDst->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4056 Assert( pVarDst->cbVar == sizeof(uint16_t)
4057 || pVarDst->cbVar == sizeof(uint32_t));
4058
4059 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarSrc);
4060 PIEMNATIVEVAR const pVarSrc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarSrc)];
4061 AssertStmt( pVarSrc->enmKind == kIemNativeVarKind_Stack
4062 || pVarSrc->enmKind == kIemNativeVarKind_Immediate,
4063 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4064
4065 Assert(pVarDst->cbVar < pVarSrc->cbVar);
4066
4067 /*
4068 * Special case for immediates.
4069 */
4070 if (pVarSrc->enmKind == kIemNativeVarKind_Immediate)
4071 {
4072 switch (pVarDst->cbVar)
4073 {
4074 case sizeof(uint16_t):
4075 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint16_t)pVarSrc->u.uValue);
4076 break;
4077 case sizeof(uint32_t):
4078 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint32_t)pVarSrc->u.uValue);
4079 break;
4080 default: AssertFailed(); break;
4081 }
4082 }
4083 else
4084 {
4085 /*
4086 * The generic solution for now.
4087 */
4088 /** @todo optimize this by having the python script make sure the source
4089 * variable passed to IEM_MC_ASSIGN_TO_SMALLER is not used after the
4090 * statement. Then we could just transfer the register assignments. */
4091 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off);
4092 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off);
4093 switch (pVarDst->cbVar)
4094 {
4095 case sizeof(uint16_t):
4096 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegDst, idxRegSrc);
4097 break;
4098 case sizeof(uint32_t):
4099 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegDst, idxRegSrc);
4100 break;
4101 default: AssertFailed(); break;
4102 }
4103 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
4104 iemNativeVarRegisterRelease(pReNative, idxVarDst);
4105 }
4106 return off;
4107}
4108
4109
4110
4111/*********************************************************************************************************************************
4112* Emitters for IEM_MC_CALL_CIMPL_XXX *
4113*********************************************************************************************************************************/
4114
4115/** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
4116DECL_INLINE_THROW(uint32_t)
4117iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
4118 uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cArgs)
4119
4120{
4121 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, X86_EFL_STATUS_BITS);
4122 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
4123
4124#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4125 /* Clear the appropriate IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_XXX flags
4126 when a calls clobber any of the relevant control registers. */
4127# if 1
4128 if (!(fGstShwFlush & (RT_BIT_64(kIemNativeGstReg_Cr0) | RT_BIT_64(kIemNativeGstReg_Cr4) | RT_BIT_64(kIemNativeGstReg_Xcr0))))
4129 {
4130 /* Likely as long as call+ret are done via cimpl. */
4131 Assert( /*pfnCImpl != (uintptr_t)iemCImpl_mov_Cd_Rd && pfnCImpl != (uintptr_t)iemCImpl_xsetbv
4132 &&*/ pfnCImpl != (uintptr_t)iemCImpl_lmsw && pfnCImpl != (uintptr_t)iemCImpl_clts);
4133 }
4134 else if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_Xcr0))
4135 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
4136 else if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_Cr4))
4137 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
4138 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE);
4139 else
4140 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
4141 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
4142 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
4143
4144# else
4145 if (pfnCImpl == (uintptr_t)iemCImpl_xsetbv) /* Modifies xcr0 which only the AVX check uses. */
4146 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
4147 else if (pfnCImpl == (uintptr_t)iemCImpl_mov_Cd_Rd) /* Can modify cr4 which all checks use. */
4148 pReNative->fSimdRaiseXcptChecksEmitted = 0;
4149 else if ( pfnCImpl == (uintptr_t)iemCImpl_FarJmp
4150 || pfnCImpl == (uintptr_t)iemCImpl_callf
4151 || pfnCImpl == (uintptr_t)iemCImpl_lmsw
4152 || pfnCImpl == (uintptr_t)iemCImpl_clts) /* Will only modify cr0 */
4153 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
4154 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
4155 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
4156# endif
4157
4158# ifdef IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS
4159 /* Mark the host floating point control register as not synced if MXCSR is modified. */
4160 if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_MxCsr))
4161 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SYNCED;
4162# endif
4163#endif
4164
4165 /*
4166 * Do all the call setup and cleanup.
4167 */
4168 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
4169
4170 /*
4171 * Load the two or three hidden arguments.
4172 */
4173#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
4174 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
4175 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
4176 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
4177#else
4178 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
4179 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
4180#endif
4181
4182 /*
4183 * Make the call and check the return code.
4184 *
4185 * Shadow PC copies are always flushed here, other stuff depends on flags.
4186 * Segment and general purpose registers are explictily flushed via the
4187 * IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG and IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG
4188 * macros.
4189 */
4190 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
4191#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
4192 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
4193#endif
4194 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, fGstShwFlush | RT_BIT_64(kIemNativeGstReg_Pc));
4195 if (!(pReNative->fMc & IEM_MC_F_WITHOUT_FLAGS)) /** @todo We don't emit with-flags/without-flags variations for CIMPL calls. */
4196 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
4197 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
4198
4199#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
4200 pReNative->Core.fDebugPcInitialized = false;
4201 Log4(("fDebugPcInitialized=false cimpl off=%#x (v1)\n", off));
4202#endif
4203
4204 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
4205}
4206
4207
4208#define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
4209 off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0)
4210
4211/** Emits code for IEM_MC_CALL_CIMPL_1. */
4212DECL_INLINE_THROW(uint32_t)
4213iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
4214 uintptr_t pfnCImpl, uint8_t idxArg0)
4215{
4216 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
4217 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 1);
4218}
4219
4220
4221#define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
4222 off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0, a1)
4223
4224/** Emits code for IEM_MC_CALL_CIMPL_2. */
4225DECL_INLINE_THROW(uint32_t)
4226iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
4227 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
4228{
4229 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
4230 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
4231 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 2);
4232}
4233
4234
4235#define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
4236 off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
4237 (uintptr_t)a_pfnCImpl, a0, a1, a2)
4238
4239/** Emits code for IEM_MC_CALL_CIMPL_3. */
4240DECL_INLINE_THROW(uint32_t)
4241iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
4242 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
4243{
4244 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
4245 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
4246 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
4247 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 3);
4248}
4249
4250
4251#define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3) \
4252 off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
4253 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
4254
4255/** Emits code for IEM_MC_CALL_CIMPL_4. */
4256DECL_INLINE_THROW(uint32_t)
4257iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
4258 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
4259{
4260 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
4261 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
4262 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
4263 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
4264 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 4);
4265}
4266
4267
4268#define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3, a4) \
4269 off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
4270 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
4271
4272/** Emits code for IEM_MC_CALL_CIMPL_4. */
4273DECL_INLINE_THROW(uint32_t)
4274iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
4275 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
4276{
4277 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
4278 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
4279 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
4280 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
4281 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg4, 4 + IEM_CIMPL_HIDDEN_ARGS);
4282 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 5);
4283}
4284
4285
4286/** Recompiler debugging: Flush guest register shadow copies. */
4287#define IEM_MC_HINT_FLUSH_GUEST_SHADOW(g_fGstShwFlush) iemNativeRegFlushGuestShadows(pReNative, g_fGstShwFlush)
4288
4289
4290
4291/*********************************************************************************************************************************
4292* Emitters for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX *
4293*********************************************************************************************************************************/
4294
4295/**
4296 * Common worker for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX.
4297 */
4298DECL_INLINE_THROW(uint32_t)
4299iemNativeEmitCallAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
4300 uintptr_t pfnAImpl, uint8_t cArgs)
4301{
4302 if (idxVarRc != UINT8_MAX)
4303 {
4304 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRc);
4305 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarRc)];
4306 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
4307 AssertStmt(pVarRc->cbVar <= sizeof(uint64_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
4308 }
4309
4310 /*
4311 * Do all the call setup and cleanup.
4312 *
4313 * It is only required to flush pending guest register writes in call volatile registers as
4314 * assembly helpers can't throw and don't access anything living in CPUMCTX, they only
4315 * access parameters. The flushing of call volatile registers is always done in iemNativeEmitCallCommon()
4316 * no matter the fFlushPendingWrites parameter.
4317 */
4318 off = iemNativeEmitCallCommon(pReNative, off, cArgs, 0 /*cHiddenArgs*/, false /*fFlushPendingWrites*/);
4319
4320 /*
4321 * Make the call and update the return code variable if we've got one.
4322 */
4323 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnAImpl);
4324 if (idxVarRc != UINT8_MAX)
4325 iemNativeVarRegisterSet(pReNative, idxVarRc, IEMNATIVE_CALL_RET_GREG, off, false /*fAllocated*/);
4326
4327 return off;
4328}
4329
4330
4331
4332#define IEM_MC_CALL_VOID_AIMPL_0(a_pfn) \
4333 off = iemNativeEmitCallAImpl0(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn))
4334
4335#define IEM_MC_CALL_AIMPL_0(a_rc, a_pfn) \
4336 off = iemNativeEmitCallAImpl0(pReNative, off, a_rc, (uintptr_t)(a_pfn))
4337
4338/** Emits code for IEM_MC_CALL_VOID_AIMPL_0 and IEM_MC_CALL_AIMPL_0. */
4339DECL_INLINE_THROW(uint32_t)
4340iemNativeEmitCallAImpl0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl)
4341{
4342 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 0);
4343}
4344
4345
4346#define IEM_MC_CALL_VOID_AIMPL_1(a_pfn, a0) \
4347 off = iemNativeEmitCallAImpl1(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0)
4348
4349#define IEM_MC_CALL_AIMPL_1(a_rc, a_pfn, a0) \
4350 off = iemNativeEmitCallAImpl1(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0)
4351
4352/** Emits code for IEM_MC_CALL_VOID_AIMPL_1 and IEM_MC_CALL_AIMPL_1. */
4353DECL_INLINE_THROW(uint32_t)
4354iemNativeEmitCallAImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl, uint8_t idxArg0)
4355{
4356 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
4357 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 1);
4358}
4359
4360
4361#define IEM_MC_CALL_VOID_AIMPL_2(a_pfn, a0, a1) \
4362 off = iemNativeEmitCallAImpl2(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1)
4363
4364#define IEM_MC_CALL_AIMPL_2(a_rc, a_pfn, a0, a1) \
4365 off = iemNativeEmitCallAImpl2(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1)
4366
4367/** Emits code for IEM_MC_CALL_VOID_AIMPL_2 and IEM_MC_CALL_AIMPL_2. */
4368DECL_INLINE_THROW(uint32_t)
4369iemNativeEmitCallAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
4370 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
4371{
4372 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
4373 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
4374 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 2);
4375}
4376
4377
4378#define IEM_MC_CALL_VOID_AIMPL_3(a_pfn, a0, a1, a2) \
4379 off = iemNativeEmitCallAImpl3(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2)
4380
4381#define IEM_MC_CALL_AIMPL_3(a_rcType, a_rc, a_pfn, a0, a1, a2) \
4382 IEM_MC_LOCAL(a_rcType, a_rc); \
4383 off = iemNativeEmitCallAImpl3(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2)
4384
4385/** Emits code for IEM_MC_CALL_VOID_AIMPL_3 and IEM_MC_CALL_AIMPL_3. */
4386DECL_INLINE_THROW(uint32_t)
4387iemNativeEmitCallAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
4388 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
4389{
4390 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
4391 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
4392 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
4393 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 3);
4394}
4395
4396
4397#define IEM_MC_CALL_VOID_AIMPL_4(a_pfn, a0, a1, a2, a3) \
4398 off = iemNativeEmitCallAImpl4(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2, a3)
4399
4400#define IEM_MC_CALL_AIMPL_4(a_rcType, a_rc, a_pfn, a0, a1, a2, a3) \
4401 IEM_MC_LOCAL(a_rcType, a_rc); \
4402 off = iemNativeEmitCallAImpl4(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2, a3)
4403
4404/** Emits code for IEM_MC_CALL_VOID_AIMPL_4 and IEM_MC_CALL_AIMPL_4. */
4405DECL_INLINE_THROW(uint32_t)
4406iemNativeEmitCallAImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
4407 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
4408{
4409 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
4410 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
4411 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
4412 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3);
4413 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 4);
4414}
4415
4416
4417
4418/*********************************************************************************************************************************
4419* Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX). *
4420*********************************************************************************************************************************/
4421
4422#define IEM_MC_FETCH_GREG_U8_THREADED(a_u8Dst, a_iGRegEx) \
4423 off = iemNativeEmitFetchGregU8(pReNative, off, a_u8Dst, a_iGRegEx, sizeof(uint8_t) /*cbZeroExtended*/)
4424
4425#define IEM_MC_FETCH_GREG_U8_ZX_U16_THREADED(a_u16Dst, a_iGRegEx) \
4426 off = iemNativeEmitFetchGregU8(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t) /*cbZeroExtended*/)
4427
4428#define IEM_MC_FETCH_GREG_U8_ZX_U32_THREADED(a_u32Dst, a_iGRegEx) \
4429 off = iemNativeEmitFetchGregU8(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t) /*cbZeroExtended*/)
4430
4431#define IEM_MC_FETCH_GREG_U8_ZX_U64_THREADED(a_u64Dst, a_iGRegEx) \
4432 off = iemNativeEmitFetchGregU8(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t) /*cbZeroExtended*/)
4433
4434
4435/** Emits code for IEM_MC_FETCH_GREG_U8_THREADED and
4436 * IEM_MC_FETCH_GREG_U8_ZX_U16/32/64_THREADED. */
4437DECL_INLINE_THROW(uint32_t)
4438iemNativeEmitFetchGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, int8_t cbZeroExtended)
4439{
4440 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4441 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
4442 Assert(iGRegEx < 20);
4443
4444 /* Same discussion as in iemNativeEmitFetchGregU16 */
4445 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
4446 kIemNativeGstRegUse_ReadOnly);
4447
4448 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4449 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4450
4451 /* The value is zero-extended to the full 64-bit host register width. */
4452 if (iGRegEx < 16)
4453 off = iemNativeEmitLoadGprFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
4454 else
4455 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
4456
4457 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4458 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4459 return off;
4460}
4461
4462
4463#define IEM_MC_FETCH_GREG_U8_SX_U16_THREADED(a_u16Dst, a_iGRegEx) \
4464 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t))
4465
4466#define IEM_MC_FETCH_GREG_U8_SX_U32_THREADED(a_u32Dst, a_iGRegEx) \
4467 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t))
4468
4469#define IEM_MC_FETCH_GREG_U8_SX_U64_THREADED(a_u64Dst, a_iGRegEx) \
4470 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t))
4471
4472/** Emits code for IEM_MC_FETCH_GREG_U8_SX_U16/32/64_THREADED. */
4473DECL_INLINE_THROW(uint32_t)
4474iemNativeEmitFetchGregU8Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, uint8_t cbSignExtended)
4475{
4476 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4477 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
4478 Assert(iGRegEx < 20);
4479
4480 /* Same discussion as in iemNativeEmitFetchGregU16 */
4481 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
4482 kIemNativeGstRegUse_ReadOnly);
4483
4484 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4485 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4486
4487 if (iGRegEx < 16)
4488 {
4489 switch (cbSignExtended)
4490 {
4491 case sizeof(uint16_t):
4492 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
4493 break;
4494 case sizeof(uint32_t):
4495 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
4496 break;
4497 case sizeof(uint64_t):
4498 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
4499 break;
4500 default: AssertFailed(); break;
4501 }
4502 }
4503 else
4504 {
4505 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
4506 switch (cbSignExtended)
4507 {
4508 case sizeof(uint16_t):
4509 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
4510 break;
4511 case sizeof(uint32_t):
4512 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
4513 break;
4514 case sizeof(uint64_t):
4515 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
4516 break;
4517 default: AssertFailed(); break;
4518 }
4519 }
4520
4521 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4522 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4523 return off;
4524}
4525
4526
4527
4528#define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
4529 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint16_t))
4530
4531#define IEM_MC_FETCH_GREG_U16_ZX_U32(a_u16Dst, a_iGReg) \
4532 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
4533
4534#define IEM_MC_FETCH_GREG_U16_ZX_U64(a_u16Dst, a_iGReg) \
4535 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
4536
4537/** Emits code for IEM_MC_FETCH_GREG_U16 and IEM_MC_FETCH_GREG_U16_ZX_U32/64. */
4538DECL_INLINE_THROW(uint32_t)
4539iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
4540{
4541 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4542 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
4543 Assert(iGReg < 16);
4544
4545 /*
4546 * We can either just load the low 16-bit of the GPR into a host register
4547 * for the variable, or we can do so via a shadow copy host register. The
4548 * latter will avoid having to reload it if it's being stored later, but
4549 * will waste a host register if it isn't touched again. Since we don't
4550 * know what going to happen, we choose the latter for now.
4551 */
4552 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4553 kIemNativeGstRegUse_ReadOnly);
4554
4555 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4556 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4557 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4558 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4559
4560 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4561 return off;
4562}
4563
4564#define IEM_MC_FETCH_GREG_I16(a_i16Dst, a_iGReg) \
4565 off = iemNativeEmitFetchGregI16(pReNative, off, a_i16Dst, a_iGReg)
4566
4567/** Emits code for IEM_MC_FETCH_GREG_I16. */
4568DECL_INLINE_THROW(uint32_t)
4569iemNativeEmitFetchGregI16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
4570{
4571 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4572 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(int16_t));
4573 Assert(iGReg < 16);
4574
4575 /*
4576 * We can either just load the low 16-bit of the GPR into a host register
4577 * for the variable, or we can do so via a shadow copy host register. The
4578 * latter will avoid having to reload it if it's being stored later, but
4579 * will waste a host register if it isn't touched again. Since we don't
4580 * know what going to happen, we choose the latter for now.
4581 */
4582 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4583 kIemNativeGstRegUse_ReadOnly);
4584
4585 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4586 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4587#ifdef RT_ARCH_AMD64
4588 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4589#elif defined(RT_ARCH_ARM64) /* Note! There are no 16-bit registers on ARM, we emulate that through 32-bit registers which requires sign extension. */
4590 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4591#endif
4592 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4593
4594 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4595 return off;
4596}
4597
4598
4599#define IEM_MC_FETCH_GREG_U16_SX_U32(a_u16Dst, a_iGReg) \
4600 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
4601
4602#define IEM_MC_FETCH_GREG_U16_SX_U64(a_u16Dst, a_iGReg) \
4603 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
4604
4605/** Emits code for IEM_MC_FETCH_GREG_U16_SX_U32/64. */
4606DECL_INLINE_THROW(uint32_t)
4607iemNativeEmitFetchGregU16Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbSignExtended)
4608{
4609 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4610 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
4611 Assert(iGReg < 16);
4612
4613 /*
4614 * We can either just load the low 16-bit of the GPR into a host register
4615 * for the variable, or we can do so via a shadow copy host register. The
4616 * latter will avoid having to reload it if it's being stored later, but
4617 * will waste a host register if it isn't touched again. Since we don't
4618 * know what going to happen, we choose the latter for now.
4619 */
4620 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4621 kIemNativeGstRegUse_ReadOnly);
4622
4623 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4624 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4625 if (cbSignExtended == sizeof(uint32_t))
4626 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4627 else
4628 {
4629 Assert(cbSignExtended == sizeof(uint64_t));
4630 off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
4631 }
4632 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4633
4634 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4635 return off;
4636}
4637
4638
4639#define IEM_MC_FETCH_GREG_I32(a_i32Dst, a_iGReg) \
4640 off = iemNativeEmitFetchGregU32(pReNative, off, a_i32Dst, a_iGReg, sizeof(uint32_t))
4641
4642#define IEM_MC_FETCH_GREG_U32(a_u32Dst, a_iGReg) \
4643 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint32_t))
4644
4645#define IEM_MC_FETCH_GREG_U32_ZX_U64(a_u32Dst, a_iGReg) \
4646 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint64_t))
4647
4648/** Emits code for IEM_MC_FETCH_GREG_U32. */
4649DECL_INLINE_THROW(uint32_t)
4650iemNativeEmitFetchGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
4651{
4652 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4653 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
4654 Assert(iGReg < 16);
4655
4656 /*
4657 * We can either just load the low 16-bit of the GPR into a host register
4658 * for the variable, or we can do so via a shadow copy host register. The
4659 * latter will avoid having to reload it if it's being stored later, but
4660 * will waste a host register if it isn't touched again. Since we don't
4661 * know what going to happen, we choose the latter for now.
4662 */
4663 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4664 kIemNativeGstRegUse_ReadOnly);
4665
4666 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4667 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4668 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
4669 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4670
4671 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4672 return off;
4673}
4674
4675
4676#define IEM_MC_FETCH_GREG_U32_SX_U64(a_u32Dst, a_iGReg) \
4677 off = iemNativeEmitFetchGregU32SxU64(pReNative, off, a_u32Dst, a_iGReg)
4678
4679/** Emits code for IEM_MC_FETCH_GREG_U32. */
4680DECL_INLINE_THROW(uint32_t)
4681iemNativeEmitFetchGregU32SxU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
4682{
4683 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4684 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
4685 Assert(iGReg < 16);
4686
4687 /*
4688 * We can either just load the low 32-bit of the GPR into a host register
4689 * for the variable, or we can do so via a shadow copy host register. The
4690 * latter will avoid having to reload it if it's being stored later, but
4691 * will waste a host register if it isn't touched again. Since we don't
4692 * know what going to happen, we choose the latter for now.
4693 */
4694 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4695 kIemNativeGstRegUse_ReadOnly);
4696
4697 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4698 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4699 off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
4700 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4701
4702 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4703 return off;
4704}
4705
4706
4707#define IEM_MC_FETCH_GREG_U64(a_u64Dst, a_iGReg) \
4708 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
4709
4710#define IEM_MC_FETCH_GREG_U64_ZX_U64(a_u64Dst, a_iGReg) \
4711 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
4712
4713/** Emits code for IEM_MC_FETCH_GREG_U64 (and the
4714 * IEM_MC_FETCH_GREG_U64_ZX_U64 alias). */
4715DECL_INLINE_THROW(uint32_t)
4716iemNativeEmitFetchGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
4717{
4718 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4719 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
4720 Assert(iGReg < 16);
4721
4722 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4723 kIemNativeGstRegUse_ReadOnly);
4724
4725 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4726 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4727 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxVarReg, idxGstFullReg);
4728 /** @todo name the register a shadow one already? */
4729 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4730
4731 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
4732 return off;
4733}
4734
4735
4736#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4737#define IEM_MC_FETCH_GREG_PAIR_U64(a_u128Dst, a_iGRegLo, a_iGRegHi) \
4738 off = iemNativeEmitFetchGregPairU64(pReNative, off, a_u128Dst, a_iGRegLo, a_iGRegHi)
4739
4740/** Emits code for IEM_MC_FETCH_GREG_PAIR_U64. */
4741DECL_INLINE_THROW(uint32_t)
4742iemNativeEmitFetchGregPairU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegLo, uint8_t iGRegHi)
4743{
4744 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4745 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
4746 Assert(iGRegLo < 16 && iGRegHi < 16);
4747
4748 uint8_t const idxGstFullRegLo = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegLo),
4749 kIemNativeGstRegUse_ReadOnly);
4750 uint8_t const idxGstFullRegHi = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegHi),
4751 kIemNativeGstRegUse_ReadOnly);
4752
4753 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4754 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
4755 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxVarReg, idxGstFullRegLo, 0);
4756 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxVarReg, idxGstFullRegHi, 1);
4757
4758 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
4759 iemNativeRegFreeTmp(pReNative, idxGstFullRegLo);
4760 iemNativeRegFreeTmp(pReNative, idxGstFullRegHi);
4761 return off;
4762}
4763#endif
4764
4765
4766/*********************************************************************************************************************************
4767* Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX). *
4768*********************************************************************************************************************************/
4769
4770#define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
4771 off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
4772
4773/** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
4774DECL_INLINE_THROW(uint32_t)
4775iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
4776{
4777 Assert(iGRegEx < 20);
4778 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
4779 kIemNativeGstRegUse_ForUpdate);
4780#ifdef RT_ARCH_AMD64
4781 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
4782
4783 /* To the lowest byte of the register: mov r8, imm8 */
4784 if (iGRegEx < 16)
4785 {
4786 if (idxGstTmpReg >= 8)
4787 pbCodeBuf[off++] = X86_OP_REX_B;
4788 else if (idxGstTmpReg >= 4)
4789 pbCodeBuf[off++] = X86_OP_REX;
4790 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
4791 pbCodeBuf[off++] = u8Value;
4792 }
4793 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
4794 else if (idxGstTmpReg < 4)
4795 {
4796 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
4797 pbCodeBuf[off++] = u8Value;
4798 }
4799 else
4800 {
4801 /* ror reg64, 8 */
4802 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4803 pbCodeBuf[off++] = 0xc1;
4804 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4805 pbCodeBuf[off++] = 8;
4806
4807 /* mov reg8, imm8 */
4808 if (idxGstTmpReg >= 8)
4809 pbCodeBuf[off++] = X86_OP_REX_B;
4810 else if (idxGstTmpReg >= 4)
4811 pbCodeBuf[off++] = X86_OP_REX;
4812 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
4813 pbCodeBuf[off++] = u8Value;
4814
4815 /* rol reg64, 8 */
4816 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4817 pbCodeBuf[off++] = 0xc1;
4818 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4819 pbCodeBuf[off++] = 8;
4820 }
4821
4822#elif defined(RT_ARCH_ARM64)
4823 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
4824 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
4825 if (iGRegEx < 16)
4826 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
4827 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
4828 else
4829 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
4830 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
4831 iemNativeRegFreeTmp(pReNative, idxImmReg);
4832
4833#else
4834# error "Port me!"
4835#endif
4836
4837 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4838
4839#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4840 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
4841#endif
4842
4843 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4844 return off;
4845}
4846
4847
4848#define IEM_MC_STORE_GREG_U8_THREADED(a_iGRegEx, a_u8Value) \
4849 off = iemNativeEmitStoreGregU8(pReNative, off, a_iGRegEx, a_u8Value)
4850
4851/** Emits code for IEM_MC_STORE_GREG_U8_THREADED. */
4852DECL_INLINE_THROW(uint32_t)
4853iemNativeEmitStoreGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t idxValueVar)
4854{
4855 Assert(iGRegEx < 20);
4856 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
4857
4858 /*
4859 * If it's a constant value (unlikely) we treat this as a
4860 * IEM_MC_STORE_GREG_U8_CONST statement.
4861 */
4862 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
4863 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
4864 { /* likely */ }
4865 else
4866 {
4867 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
4868 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
4869 return iemNativeEmitStoreGregU8Const(pReNative, off, iGRegEx, (uint8_t)pValueVar->u.uValue);
4870 }
4871
4872 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
4873 kIemNativeGstRegUse_ForUpdate);
4874 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxValueVar, &off);
4875
4876#ifdef RT_ARCH_AMD64
4877 /* To the lowest byte of the register: mov reg8, reg8(r/m) */
4878 if (iGRegEx < 16)
4879 {
4880 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
4881 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
4882 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
4883 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
4884 pbCodeBuf[off++] = X86_OP_REX;
4885 pbCodeBuf[off++] = 0x8a;
4886 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
4887 }
4888 /* Otherwise it's to ah, ch, dh or bh from al, cl, dl or bl: use mov r8, r8 if we can, otherwise, we rotate. */
4889 else if (idxGstTmpReg < 4 && idxVarReg < 4)
4890 {
4891 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2+1);
4892 pbCodeBuf[off++] = 0x8a;
4893 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg + 4, idxVarReg);
4894 }
4895 else
4896 {
4897 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
4898
4899 /* ror reg64, 8 */
4900 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4901 pbCodeBuf[off++] = 0xc1;
4902 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
4903 pbCodeBuf[off++] = 8;
4904
4905 /* mov reg8, reg8(r/m) */
4906 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
4907 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
4908 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
4909 pbCodeBuf[off++] = X86_OP_REX;
4910 pbCodeBuf[off++] = 0x8a;
4911 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
4912
4913 /* rol reg64, 8 */
4914 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
4915 pbCodeBuf[off++] = 0xc1;
4916 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
4917 pbCodeBuf[off++] = 8;
4918 }
4919
4920#elif defined(RT_ARCH_ARM64)
4921 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 7:0.
4922 or
4923 bfi w1, w2, 8, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 15:8. */
4924 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4925 if (iGRegEx < 16)
4926 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 8);
4927 else
4928 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 8, 8);
4929
4930#else
4931# error "Port me!"
4932#endif
4933 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4934
4935 iemNativeVarRegisterRelease(pReNative, idxValueVar);
4936
4937#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4938 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
4939#endif
4940 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4941 return off;
4942}
4943
4944
4945
4946#define IEM_MC_STORE_GREG_U16_CONST(a_iGReg, a_u16Const) \
4947 off = iemNativeEmitStoreGregU16Const(pReNative, off, a_iGReg, a_u16Const)
4948
4949/** Emits code for IEM_MC_STORE_GREG_U16. */
4950DECL_INLINE_THROW(uint32_t)
4951iemNativeEmitStoreGregU16Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint16_t uValue)
4952{
4953 Assert(iGReg < 16);
4954 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
4955 kIemNativeGstRegUse_ForUpdate);
4956#ifdef RT_ARCH_AMD64
4957 /* mov reg16, imm16 */
4958 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
4959 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
4960 if (idxGstTmpReg >= 8)
4961 pbCodeBuf[off++] = X86_OP_REX_B;
4962 pbCodeBuf[off++] = 0xb8 + (idxGstTmpReg & 7);
4963 pbCodeBuf[off++] = RT_BYTE1(uValue);
4964 pbCodeBuf[off++] = RT_BYTE2(uValue);
4965
4966#elif defined(RT_ARCH_ARM64)
4967 /* movk xdst, #uValue, lsl #0 */
4968 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4969 pu32CodeBuf[off++] = Armv8A64MkInstrMovK(idxGstTmpReg, uValue);
4970
4971#else
4972# error "Port me!"
4973#endif
4974
4975 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4976
4977#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
4978 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
4979#endif
4980 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
4981 return off;
4982}
4983
4984
4985#define IEM_MC_STORE_GREG_U16(a_iGReg, a_u16Value) \
4986 off = iemNativeEmitStoreGregU16(pReNative, off, a_iGReg, a_u16Value)
4987
4988/** Emits code for IEM_MC_STORE_GREG_U16. */
4989DECL_INLINE_THROW(uint32_t)
4990iemNativeEmitStoreGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
4991{
4992 Assert(iGReg < 16);
4993 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
4994
4995 /*
4996 * If it's a constant value (unlikely) we treat this as a
4997 * IEM_MC_STORE_GREG_U16_CONST statement.
4998 */
4999 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
5000 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
5001 { /* likely */ }
5002 else
5003 {
5004 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
5005 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
5006 return iemNativeEmitStoreGregU16Const(pReNative, off, iGReg, (uint16_t)pValueVar->u.uValue);
5007 }
5008
5009 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5010 kIemNativeGstRegUse_ForUpdate);
5011
5012#ifdef RT_ARCH_AMD64
5013 /* mov reg16, reg16 or [mem16] */
5014 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
5015 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5016 if (pValueVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
5017 {
5018 if (idxGstTmpReg >= 8 || pValueVar->idxReg >= 8)
5019 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0)
5020 | (pValueVar->idxReg >= 8 ? X86_OP_REX_B : 0);
5021 pbCodeBuf[off++] = 0x8b;
5022 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, pValueVar->idxReg & 7);
5023 }
5024 else
5025 {
5026 uint8_t const idxStackSlot = pValueVar->idxStackSlot;
5027 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
5028 if (idxGstTmpReg >= 8)
5029 pbCodeBuf[off++] = X86_OP_REX_R;
5030 pbCodeBuf[off++] = 0x8b;
5031 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, idxGstTmpReg, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
5032 }
5033
5034#elif defined(RT_ARCH_ARM64)
5035 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
5036 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxValueVar, &off);
5037 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5038 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 16);
5039 iemNativeVarRegisterRelease(pReNative, idxValueVar);
5040
5041#else
5042# error "Port me!"
5043#endif
5044
5045 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5046
5047#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5048 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5049#endif
5050 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5051 return off;
5052}
5053
5054
5055#define IEM_MC_STORE_GREG_U32_CONST(a_iGReg, a_u32Const) \
5056 off = iemNativeEmitStoreGregU32Const(pReNative, off, a_iGReg, a_u32Const)
5057
5058/** Emits code for IEM_MC_STORE_GREG_U32_CONST. */
5059DECL_INLINE_THROW(uint32_t)
5060iemNativeEmitStoreGregU32Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint32_t uValue)
5061{
5062 Assert(iGReg < 16);
5063 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5064 kIemNativeGstRegUse_ForFullWrite);
5065 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
5066#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5067 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5068#endif
5069 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5070 return off;
5071}
5072
5073
5074#define IEM_MC_STORE_GREG_U32(a_iGReg, a_u32Value) \
5075 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_u32Value)
5076
5077#define IEM_MC_STORE_GREG_I32(a_iGReg, a_i32Value) \
5078 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_i32Value)
5079
5080/** Emits code for IEM_MC_STORE_GREG_U32/IEM_MC_STORE_GREG_I32. */
5081DECL_INLINE_THROW(uint32_t)
5082iemNativeEmitStoreGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
5083{
5084 Assert(iGReg < 16);
5085 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
5086
5087 /*
5088 * If it's a constant value (unlikely) we treat this as a
5089 * IEM_MC_STORE_GREG_U32_CONST statement.
5090 */
5091 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
5092 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
5093 { /* likely */ }
5094 else
5095 {
5096 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
5097 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
5098 return iemNativeEmitStoreGregU32Const(pReNative, off, iGReg, (uint32_t)pValueVar->u.uValue);
5099 }
5100
5101 /*
5102 * For the rest we allocate a guest register for the variable and writes
5103 * it to the CPUMCTX structure.
5104 */
5105 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
5106#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5107 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5108#else
5109 RT_NOREF(idxVarReg);
5110#endif
5111#ifdef VBOX_STRICT
5112 off = iemNativeEmitTop32BitsClearCheck(pReNative, off, idxVarReg);
5113#endif
5114 iemNativeVarRegisterRelease(pReNative, idxValueVar);
5115 return off;
5116}
5117
5118
5119#define IEM_MC_STORE_GREG_U64_CONST(a_iGReg, a_u64Const) \
5120 off = iemNativeEmitStoreGregU64Const(pReNative, off, a_iGReg, a_u64Const)
5121
5122/** Emits code for IEM_MC_STORE_GREG_U64_CONST. */
5123DECL_INLINE_THROW(uint32_t)
5124iemNativeEmitStoreGregU64Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uValue)
5125{
5126 Assert(iGReg < 16);
5127 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5128 kIemNativeGstRegUse_ForFullWrite);
5129 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
5130#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5131 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5132#endif
5133 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5134 return off;
5135}
5136
5137
5138#define IEM_MC_STORE_GREG_U64(a_iGReg, a_u64Value) \
5139 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_u64Value)
5140
5141#define IEM_MC_STORE_GREG_I64(a_iGReg, a_i64Value) \
5142 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_i64Value)
5143
5144/** Emits code for IEM_MC_STORE_GREG_U64. */
5145DECL_INLINE_THROW(uint32_t)
5146iemNativeEmitStoreGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
5147{
5148 Assert(iGReg < 16);
5149 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
5150
5151 /*
5152 * If it's a constant value (unlikely) we treat this as a
5153 * IEM_MC_STORE_GREG_U64_CONST statement.
5154 */
5155 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
5156 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
5157 { /* likely */ }
5158 else
5159 {
5160 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
5161 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
5162 return iemNativeEmitStoreGregU64Const(pReNative, off, iGReg, pValueVar->u.uValue);
5163 }
5164
5165 /*
5166 * For the rest we allocate a guest register for the variable and writes
5167 * it to the CPUMCTX structure.
5168 */
5169 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
5170#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5171 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5172#else
5173 RT_NOREF(idxVarReg);
5174#endif
5175 iemNativeVarRegisterRelease(pReNative, idxValueVar);
5176 return off;
5177}
5178
5179
5180#define IEM_MC_CLEAR_HIGH_GREG_U64(a_iGReg) \
5181 off = iemNativeEmitClearHighGregU64(pReNative, off, a_iGReg)
5182
5183/** Emits code for IEM_MC_CLEAR_HIGH_GREG_U64. */
5184DECL_INLINE_THROW(uint32_t)
5185iemNativeEmitClearHighGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg)
5186{
5187 Assert(iGReg < 16);
5188 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5189 kIemNativeGstRegUse_ForUpdate);
5190 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxGstTmpReg, idxGstTmpReg);
5191#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5192 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5193#endif
5194 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5195 return off;
5196}
5197
5198
5199#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5200#define IEM_MC_STORE_GREG_PAIR_U64(a_iGRegLo, a_iGRegHi, a_u128Value) \
5201 off = iemNativeEmitStoreGregPairU64(pReNative, off, a_iGRegLo, a_iGRegHi, a_u128Value)
5202
5203/** Emits code for IEM_MC_FETCH_GREG_PAIR_U64. */
5204DECL_INLINE_THROW(uint32_t)
5205iemNativeEmitStoreGregPairU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegLo, uint8_t iGRegHi, uint8_t idxDstVar)
5206{
5207 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
5208 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
5209 Assert(iGRegLo < 16 && iGRegHi < 16);
5210
5211 uint8_t const idxGstFullRegLo = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegLo),
5212 kIemNativeGstRegUse_ForFullWrite);
5213 uint8_t const idxGstFullRegHi = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegHi),
5214 kIemNativeGstRegUse_ForFullWrite);
5215
5216 iemNativeVarSetKindToStack(pReNative, idxDstVar);
5217 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
5218 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxGstFullRegLo, idxVarReg, 0);
5219 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxGstFullRegHi, idxVarReg, 1);
5220
5221 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
5222 iemNativeRegFreeTmp(pReNative, idxGstFullRegLo);
5223 iemNativeRegFreeTmp(pReNative, idxGstFullRegHi);
5224 return off;
5225}
5226#endif
5227
5228
5229/*********************************************************************************************************************************
5230* General purpose register manipulation (add, sub). *
5231*********************************************************************************************************************************/
5232
5233#define IEM_MC_ADD_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
5234 off = iemNativeEmitAddGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
5235
5236/** Emits code for IEM_MC_ADD_GREG_U16. */
5237DECL_INLINE_THROW(uint32_t)
5238iemNativeEmitAddGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend)
5239{
5240 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5241 kIemNativeGstRegUse_ForUpdate);
5242
5243#ifdef RT_ARCH_AMD64
5244 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
5245 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5246 if (idxGstTmpReg >= 8)
5247 pbCodeBuf[off++] = X86_OP_REX_B;
5248 if (uAddend == 1)
5249 {
5250 pbCodeBuf[off++] = 0xff; /* inc */
5251 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5252 }
5253 else
5254 {
5255 pbCodeBuf[off++] = 0x81;
5256 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5257 pbCodeBuf[off++] = uAddend;
5258 pbCodeBuf[off++] = 0;
5259 }
5260
5261#else
5262 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5263 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5264
5265 /* sub tmp, gstgrp, uAddend */
5266 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxTmpReg, idxGstTmpReg, uAddend, false /*f64Bit*/);
5267
5268 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
5269 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
5270
5271 iemNativeRegFreeTmp(pReNative, idxTmpReg);
5272#endif
5273
5274 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5275
5276#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5277 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5278#endif
5279
5280 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5281 return off;
5282}
5283
5284
5285#define IEM_MC_ADD_GREG_U32(a_iGReg, a_u8Const) \
5286 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
5287
5288#define IEM_MC_ADD_GREG_U64(a_iGReg, a_u8Const) \
5289 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
5290
5291/** Emits code for IEM_MC_ADD_GREG_U32 and IEM_MC_ADD_GREG_U64. */
5292DECL_INLINE_THROW(uint32_t)
5293iemNativeEmitAddGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend, bool f64Bit)
5294{
5295 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5296 kIemNativeGstRegUse_ForUpdate);
5297
5298#ifdef RT_ARCH_AMD64
5299 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
5300 if (f64Bit)
5301 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
5302 else if (idxGstTmpReg >= 8)
5303 pbCodeBuf[off++] = X86_OP_REX_B;
5304 if (uAddend == 1)
5305 {
5306 pbCodeBuf[off++] = 0xff; /* inc */
5307 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5308 }
5309 else if (uAddend < 128)
5310 {
5311 pbCodeBuf[off++] = 0x83; /* add */
5312 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5313 pbCodeBuf[off++] = RT_BYTE1(uAddend);
5314 }
5315 else
5316 {
5317 pbCodeBuf[off++] = 0x81; /* add */
5318 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
5319 pbCodeBuf[off++] = RT_BYTE1(uAddend);
5320 pbCodeBuf[off++] = 0;
5321 pbCodeBuf[off++] = 0;
5322 pbCodeBuf[off++] = 0;
5323 }
5324
5325#else
5326 /* sub tmp, gstgrp, uAddend */
5327 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5328 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGstTmpReg, idxGstTmpReg, uAddend, f64Bit);
5329
5330#endif
5331
5332 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5333
5334#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5335 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5336#endif
5337
5338 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5339 return off;
5340}
5341
5342
5343
5344#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
5345 off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
5346
5347/** Emits code for IEM_MC_SUB_GREG_U16. */
5348DECL_INLINE_THROW(uint32_t)
5349iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
5350{
5351 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5352 kIemNativeGstRegUse_ForUpdate);
5353
5354#ifdef RT_ARCH_AMD64
5355 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
5356 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5357 if (idxGstTmpReg >= 8)
5358 pbCodeBuf[off++] = X86_OP_REX_B;
5359 if (uSubtrahend == 1)
5360 {
5361 pbCodeBuf[off++] = 0xff; /* dec */
5362 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
5363 }
5364 else
5365 {
5366 pbCodeBuf[off++] = 0x81;
5367 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
5368 pbCodeBuf[off++] = uSubtrahend;
5369 pbCodeBuf[off++] = 0;
5370 }
5371
5372#else
5373 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5374 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
5375
5376 /* sub tmp, gstgrp, uSubtrahend */
5377 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
5378
5379 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
5380 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
5381
5382 iemNativeRegFreeTmp(pReNative, idxTmpReg);
5383#endif
5384
5385 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5386
5387#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5388 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5389#endif
5390
5391 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5392 return off;
5393}
5394
5395
5396#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
5397 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
5398
5399#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
5400 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
5401
5402/** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
5403DECL_INLINE_THROW(uint32_t)
5404iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
5405{
5406 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5407 kIemNativeGstRegUse_ForUpdate);
5408
5409#ifdef RT_ARCH_AMD64
5410 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
5411 if (f64Bit)
5412 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
5413 else if (idxGstTmpReg >= 8)
5414 pbCodeBuf[off++] = X86_OP_REX_B;
5415 if (uSubtrahend == 1)
5416 {
5417 pbCodeBuf[off++] = 0xff; /* dec */
5418 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
5419 }
5420 else if (uSubtrahend < 128)
5421 {
5422 pbCodeBuf[off++] = 0x83; /* sub */
5423 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
5424 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
5425 }
5426 else
5427 {
5428 pbCodeBuf[off++] = 0x81; /* sub */
5429 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
5430 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
5431 pbCodeBuf[off++] = 0;
5432 pbCodeBuf[off++] = 0;
5433 pbCodeBuf[off++] = 0;
5434 }
5435
5436#else
5437 /* sub tmp, gstgrp, uSubtrahend */
5438 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5439 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
5440
5441#endif
5442
5443 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5444
5445#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5446 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5447#endif
5448
5449 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5450 return off;
5451}
5452
5453
5454#define IEM_MC_AND_GREG_U8(a_iGReg, a_u8Mask) \
5455 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u8Mask, sizeof(uint8_t))
5456
5457#define IEM_MC_AND_GREG_U16(a_iGReg, a_u16Mask) \
5458 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u16Mask, sizeof(uint16_t))
5459
5460#define IEM_MC_AND_GREG_U32(a_iGReg, a_u32Mask) \
5461 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u32Mask, sizeof(uint32_t))
5462
5463#define IEM_MC_AND_GREG_U64(a_iGReg, a_u64Mask) \
5464 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u64Mask, sizeof(uint64_t))
5465
5466/** Emits code for IEM_MC_AND_GREG_U8, IEM_MC_AND_GREG_U16, IEM_MC_AND_GREG_U32 and IEM_MC_AND_GREG_U64. */
5467DECL_INLINE_THROW(uint32_t)
5468iemNativeEmitAndGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uMask, uint8_t cbMask)
5469{
5470#ifdef VBOX_STRICT
5471 switch (cbMask)
5472 {
5473 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
5474 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
5475 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
5476 case sizeof(uint64_t): break;
5477 default: AssertFailedBreak();
5478 }
5479#endif
5480
5481 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5482 kIemNativeGstRegUse_ForUpdate);
5483
5484 switch (cbMask)
5485 {
5486 case sizeof(uint8_t): /* Leaves the higher bits untouched. */
5487 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask | UINT64_C(0xffffffffffffff00));
5488 break;
5489 case sizeof(uint16_t): /* Leaves the higher bits untouched. */
5490 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask | UINT64_C(0xffffffffffff0000));
5491 break;
5492 case sizeof(uint32_t): /* Zeroes the high 32 bits of the guest register. */
5493 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxGstTmpReg, uMask);
5494 break;
5495 case sizeof(uint64_t):
5496 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask);
5497 break;
5498 default: AssertFailedBreak();
5499 }
5500
5501 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5502
5503#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5504 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5505#endif
5506
5507 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5508 return off;
5509}
5510
5511
5512#define IEM_MC_OR_GREG_U8(a_iGReg, a_u8Mask) \
5513 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u8Mask, sizeof(uint8_t))
5514
5515#define IEM_MC_OR_GREG_U16(a_iGReg, a_u16Mask) \
5516 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u16Mask, sizeof(uint16_t))
5517
5518#define IEM_MC_OR_GREG_U32(a_iGReg, a_u32Mask) \
5519 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u32Mask, sizeof(uint32_t))
5520
5521#define IEM_MC_OR_GREG_U64(a_iGReg, a_u64Mask) \
5522 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u64Mask, sizeof(uint64_t))
5523
5524/** Emits code for IEM_MC_OR_GREG_U8, IEM_MC_OR_GREG_U16, IEM_MC_OR_GREG_U32 and IEM_MC_OR_GREG_U64. */
5525DECL_INLINE_THROW(uint32_t)
5526iemNativeEmitOrGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uMask, uint8_t cbMask)
5527{
5528#ifdef VBOX_STRICT
5529 switch (cbMask)
5530 {
5531 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
5532 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
5533 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
5534 case sizeof(uint64_t): break;
5535 default: AssertFailedBreak();
5536 }
5537#endif
5538
5539 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
5540 kIemNativeGstRegUse_ForUpdate);
5541
5542 switch (cbMask)
5543 {
5544 case sizeof(uint8_t): /* Leaves the higher bits untouched. */
5545 case sizeof(uint16_t):
5546 case sizeof(uint64_t):
5547 off = iemNativeEmitOrGprByImm(pReNative, off, idxGstTmpReg, uMask);
5548 break;
5549 case sizeof(uint32_t): /* Zeroes the high 32 bits of the guest register. */
5550 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxGstTmpReg, uMask);
5551 break;
5552 default: AssertFailedBreak();
5553 }
5554
5555 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5556
5557#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5558 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
5559#endif
5560
5561 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
5562 return off;
5563}
5564
5565
5566/*********************************************************************************************************************************
5567* Local/Argument variable manipulation (add, sub, and, or). *
5568*********************************************************************************************************************************/
5569
5570#define IEM_MC_AND_LOCAL_U8(a_u8Local, a_u8Mask) \
5571 off = iemNativeEmitAndLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
5572
5573#define IEM_MC_AND_LOCAL_U16(a_u16Local, a_u16Mask) \
5574 off = iemNativeEmitAndLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
5575
5576#define IEM_MC_AND_LOCAL_U32(a_u32Local, a_u32Mask) \
5577 off = iemNativeEmitAndLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
5578
5579#define IEM_MC_AND_LOCAL_U64(a_u64Local, a_u64Mask) \
5580 off = iemNativeEmitAndLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
5581
5582
5583#define IEM_MC_AND_ARG_U16(a_u16Arg, a_u16Mask) \
5584 off = iemNativeEmitAndLocal(pReNative, off, a_u16Arg, a_u16Mask, sizeof(uint16_t))
5585
5586#define IEM_MC_AND_ARG_U32(a_u32Arg, a_u32Mask) \
5587 off = iemNativeEmitAndLocal(pReNative, off, a_u32Arg, a_u32Mask, sizeof(uint32_t))
5588
5589#define IEM_MC_AND_ARG_U64(a_u64Arg, a_u64Mask) \
5590 off = iemNativeEmitAndLocal(pReNative, off, a_u64Arg, a_u64Mask, sizeof(uint64_t))
5591
5592/** Emits code for AND'ing a local and a constant value. */
5593DECL_INLINE_THROW(uint32_t)
5594iemNativeEmitAndLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
5595{
5596#ifdef VBOX_STRICT
5597 switch (cbMask)
5598 {
5599 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
5600 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
5601 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
5602 case sizeof(uint64_t): break;
5603 default: AssertFailedBreak();
5604 }
5605#endif
5606
5607 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxVar, &off);
5608 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
5609
5610 if (cbMask <= sizeof(uint32_t))
5611 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg, uMask);
5612 else
5613 off = iemNativeEmitAndGprByImm(pReNative, off, idxVarReg, uMask);
5614
5615 iemNativeVarRegisterRelease(pReNative, idxVar);
5616 return off;
5617}
5618
5619
5620#define IEM_MC_OR_LOCAL_U8(a_u8Local, a_u8Mask) \
5621 off = iemNativeEmitOrLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
5622
5623#define IEM_MC_OR_LOCAL_U16(a_u16Local, a_u16Mask) \
5624 off = iemNativeEmitOrLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
5625
5626#define IEM_MC_OR_LOCAL_U32(a_u32Local, a_u32Mask) \
5627 off = iemNativeEmitOrLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
5628
5629#define IEM_MC_OR_LOCAL_U64(a_u64Local, a_u64Mask) \
5630 off = iemNativeEmitOrLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
5631
5632/** Emits code for OR'ing a local and a constant value. */
5633DECL_INLINE_THROW(uint32_t)
5634iemNativeEmitOrLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
5635{
5636#ifdef VBOX_STRICT
5637 switch (cbMask)
5638 {
5639 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
5640 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
5641 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
5642 case sizeof(uint64_t): break;
5643 default: AssertFailedBreak();
5644 }
5645#endif
5646
5647 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxVar, &off);
5648 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
5649
5650 if (cbMask <= sizeof(uint32_t))
5651 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxVarReg, uMask);
5652 else
5653 off = iemNativeEmitOrGprByImm(pReNative, off, idxVarReg, uMask);
5654
5655 iemNativeVarRegisterRelease(pReNative, idxVar);
5656 return off;
5657}
5658
5659
5660#define IEM_MC_BSWAP_LOCAL_U16(a_u16Local) \
5661 off = iemNativeEmitBswapLocal(pReNative, off, a_u16Local, sizeof(uint16_t))
5662
5663#define IEM_MC_BSWAP_LOCAL_U32(a_u32Local) \
5664 off = iemNativeEmitBswapLocal(pReNative, off, a_u32Local, sizeof(uint32_t))
5665
5666#define IEM_MC_BSWAP_LOCAL_U64(a_u64Local) \
5667 off = iemNativeEmitBswapLocal(pReNative, off, a_u64Local, sizeof(uint64_t))
5668
5669/** Emits code for reversing the byte order in a local value. */
5670DECL_INLINE_THROW(uint32_t)
5671iemNativeEmitBswapLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal)
5672{
5673 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxVar, &off);
5674 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
5675
5676 switch (cbLocal)
5677 {
5678 case sizeof(uint16_t): off = iemNativeEmitBswapGpr16(pReNative, off, idxVarReg); break;
5679 case sizeof(uint32_t): off = iemNativeEmitBswapGpr32(pReNative, off, idxVarReg); break;
5680 case sizeof(uint64_t): off = iemNativeEmitBswapGpr(pReNative, off, idxVarReg); break;
5681 default: AssertFailedBreak();
5682 }
5683
5684 iemNativeVarRegisterRelease(pReNative, idxVar);
5685 return off;
5686}
5687
5688
5689#define IEM_MC_SHL_LOCAL_S16(a_i16Local, a_cShift) \
5690 off = iemNativeEmitShlLocal(pReNative, off, a_i16Local, sizeof(int16_t), a_cShift)
5691
5692#define IEM_MC_SHL_LOCAL_S32(a_i32Local, a_cShift) \
5693 off = iemNativeEmitShlLocal(pReNative, off, a_i32Local, sizeof(int32_t), a_cShift)
5694
5695#define IEM_MC_SHL_LOCAL_S64(a_i64Local, a_cShift) \
5696 off = iemNativeEmitShlLocal(pReNative, off, a_i64Local, sizeof(int64_t), a_cShift)
5697
5698/** Emits code for shifting left a local value. */
5699DECL_INLINE_THROW(uint32_t)
5700iemNativeEmitShlLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal, uint8_t cShift)
5701{
5702#ifdef VBOX_STRICT
5703 switch (cbLocal)
5704 {
5705 case sizeof(uint8_t): Assert(cShift < 8); break;
5706 case sizeof(uint16_t): Assert(cShift < 16); break;
5707 case sizeof(uint32_t): Assert(cShift < 32); break;
5708 case sizeof(uint64_t): Assert(cShift < 64); break;
5709 default: AssertFailedBreak();
5710 }
5711#endif
5712
5713 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxVar, &off);
5714 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
5715
5716 if (cbLocal <= sizeof(uint32_t))
5717 {
5718 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxVarReg, cShift);
5719 if (cbLocal < sizeof(uint32_t))
5720 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg,
5721 cbLocal == sizeof(uint16_t)
5722 ? UINT32_C(0xffff)
5723 : UINT32_C(0xff));
5724 }
5725 else
5726 off = iemNativeEmitShiftGprLeft(pReNative, off, idxVarReg, cShift);
5727
5728 iemNativeVarRegisterRelease(pReNative, idxVar);
5729 return off;
5730}
5731
5732
5733#define IEM_MC_SAR_LOCAL_S16(a_i16Local, a_cShift) \
5734 off = iemNativeEmitSarLocal(pReNative, off, a_i16Local, sizeof(int16_t), a_cShift)
5735
5736#define IEM_MC_SAR_LOCAL_S32(a_i32Local, a_cShift) \
5737 off = iemNativeEmitSarLocal(pReNative, off, a_i32Local, sizeof(int32_t), a_cShift)
5738
5739#define IEM_MC_SAR_LOCAL_S64(a_i64Local, a_cShift) \
5740 off = iemNativeEmitSarLocal(pReNative, off, a_i64Local, sizeof(int64_t), a_cShift)
5741
5742/** Emits code for shifting left a local value. */
5743DECL_INLINE_THROW(uint32_t)
5744iemNativeEmitSarLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal, uint8_t cShift)
5745{
5746#ifdef VBOX_STRICT
5747 switch (cbLocal)
5748 {
5749 case sizeof(int8_t): Assert(cShift < 8); break;
5750 case sizeof(int16_t): Assert(cShift < 16); break;
5751 case sizeof(int32_t): Assert(cShift < 32); break;
5752 case sizeof(int64_t): Assert(cShift < 64); break;
5753 default: AssertFailedBreak();
5754 }
5755#endif
5756
5757 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxVar, &off);
5758 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
5759
5760 /* Need to sign extend the value first to make sure the sign is correct in the following arithmetic shift. */
5761 if (cbLocal == sizeof(uint8_t))
5762 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
5763 else if (cbLocal == sizeof(uint16_t))
5764 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxVarReg);
5765
5766 if (cbLocal <= sizeof(uint32_t))
5767 off = iemNativeEmitArithShiftGpr32Right(pReNative, off, idxVarReg, cShift);
5768 else
5769 off = iemNativeEmitArithShiftGprRight(pReNative, off, idxVarReg, cShift);
5770
5771 iemNativeVarRegisterRelease(pReNative, idxVar);
5772 return off;
5773}
5774
5775
5776#define IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR(a_EffAddr, a_i16) \
5777 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i16, sizeof(int16_t))
5778
5779#define IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR(a_EffAddr, a_i32) \
5780 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i32, sizeof(int32_t))
5781
5782#define IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR(a_EffAddr, a_i64) \
5783 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i64, sizeof(int64_t))
5784
5785/** Emits code for IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR/IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR/IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR. */
5786DECL_INLINE_THROW(uint32_t)
5787iemNativeEmitAddLocalToEffAddr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEffAddr, uint8_t idxVar, uint8_t cbLocal)
5788{
5789 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEffAddr);
5790 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEffAddr, sizeof(RTGCPTR));
5791 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
5792 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
5793
5794 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxVar, &off);
5795 uint8_t const idxVarRegEffAddr = iemNativeVarRegisterAcquireInited(pReNative, idxVarEffAddr, &off);
5796
5797 /* Need to sign extend the value. */
5798 if (cbLocal <= sizeof(uint32_t))
5799 {
5800/** @todo ARM64: In case of boredone, the extended add instruction can do the
5801 * conversion directly: ADD idxVarRegEffAddr, idxVarRegEffAddr, [w]idxVarReg, SXTH/SXTW */
5802 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
5803
5804 switch (cbLocal)
5805 {
5806 case sizeof(int16_t): off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxRegTmp, idxVarReg); break;
5807 case sizeof(int32_t): off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxRegTmp, idxVarReg); break;
5808 default: AssertFailed();
5809 }
5810
5811 off = iemNativeEmitAddTwoGprs(pReNative, off, idxVarRegEffAddr, idxRegTmp);
5812 iemNativeRegFreeTmp(pReNative, idxRegTmp);
5813 }
5814 else
5815 off = iemNativeEmitAddTwoGprs(pReNative, off, idxVarRegEffAddr, idxVarReg);
5816
5817 iemNativeVarRegisterRelease(pReNative, idxVarEffAddr);
5818 iemNativeVarRegisterRelease(pReNative, idxVar);
5819 return off;
5820}
5821
5822
5823
5824/*********************************************************************************************************************************
5825* EFLAGS *
5826*********************************************************************************************************************************/
5827
5828#if !defined(VBOX_WITH_STATISTICS) || !defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
5829# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) ((void)0)
5830#else
5831# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) \
5832 iemNativeEFlagsOptimizationStats(pReNative, a_fEflInput, a_fEflOutput)
5833
5834DECLINLINE(void) iemNativeEFlagsOptimizationStats(PIEMRECOMPILERSTATE pReNative, uint32_t fEflInput, uint32_t fEflOutput)
5835{
5836 if (fEflOutput)
5837 {
5838 PVMCPUCC const pVCpu = pReNative->pVCpu;
5839# ifndef IEMLIVENESS_EXTENDED_LAYOUT
5840 IEMLIVENESSBIT const LivenessBit0 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit0;
5841 IEMLIVENESSBIT const LivenessBit1 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit1;
5842 AssertCompile(IEMLIVENESS_STATE_CLOBBERED == 0);
5843# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
5844 if (fEflOutput & (a_fEfl)) \
5845 { \
5846 if (LivenessBit0.a_fLivenessMember | LivenessBit1.a_fLivenessMember) \
5847 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
5848 else \
5849 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
5850 } else do { } while (0)
5851# else
5852 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall];
5853 IEMLIVENESSBIT const LivenessClobbered = { IEMLIVENESS_STATE_GET_WILL_BE_CLOBBERED_SET(pLivenessEntry) };
5854 IEMLIVENESSBIT const LivenessDelayable = { IEMLIVENESS_STATE_GET_CAN_BE_POSTPONED_SET(pLivenessEntry) };
5855# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
5856 if (fEflOutput & (a_fEfl)) \
5857 { \
5858 if (LivenessClobbered.a_fLivenessMember) \
5859 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
5860 else if (LivenessDelayable.a_fLivenessMember) \
5861 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Delayable); \
5862 else \
5863 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
5864 } else do { } while (0)
5865# endif
5866 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_CF, fEflCf, StatNativeLivenessEflCf);
5867 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_PF, fEflPf, StatNativeLivenessEflPf);
5868 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_AF, fEflAf, StatNativeLivenessEflAf);
5869 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_ZF, fEflZf, StatNativeLivenessEflZf);
5870 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_SF, fEflSf, StatNativeLivenessEflSf);
5871 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_OF, fEflOf, StatNativeLivenessEflOf);
5872 //CHECK_FLAG_AND_UPDATE_STATS(~X86_EFL_STATUS_BITS, fEflOther, StatNativeLivenessEflOther);
5873# undef CHECK_FLAG_AND_UPDATE_STATS
5874 }
5875 RT_NOREF(fEflInput);
5876}
5877#endif /* VBOX_WITH_STATISTICS */
5878
5879#undef IEM_MC_FETCH_EFLAGS /* should not be used */
5880#define IEM_MC_FETCH_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5881 off = iemNativeEmitFetchEFlags<a_fEflInput, iemNativeEflagsToLivenessMask<a_fEflInput>(),\
5882 a_fEflOutput, iemNativeEflagsToLivenessMask<a_fEflOutput>()>(pReNative, off, a_EFlags)
5883
5884/** Handles IEM_MC_FETCH_EFLAGS_EX. */
5885template<uint32_t const a_fEflInput, uint64_t const a_fLivenessEflInput,
5886 uint32_t const a_fEflOutput, uint64_t const a_fLivenessEflOutput>
5887DECL_INLINE_THROW(uint32_t)
5888iemNativeEmitFetchEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags)
5889{
5890 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
5891 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
5892 /** @todo fix NOT AssertCompile(a_fEflInput != 0 || a_fEflOutput != 0); */
5893
5894#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
5895# ifdef VBOX_STRICT
5896 if ( pReNative->idxCurCall != 0
5897 && (a_fEflInput != 0 || a_fEflOutput != 0) /* for NOT these are both zero for now. */)
5898 {
5899 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall - 1];
5900 RT_CONSTEXPR uint32_t const fBoth = a_fEflInput | a_fEflOutput;
5901# define ASSERT_ONE_EFL(a_fElfConst, a_idxField) \
5902 AssertMsg( !(fBoth & (a_fElfConst)) \
5903 || (!(a_fEflInput & (a_fElfConst)) \
5904 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
5905 : !(a_fEflOutput & (a_fElfConst)) \
5906 ? IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
5907 : IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) ), \
5908 ("%s - %u\n", #a_fElfConst, iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)))
5909 ASSERT_ONE_EFL(~(uint32_t)X86_EFL_STATUS_BITS, IEMLIVENESSBIT_IDX_EFL_OTHER);
5910 ASSERT_ONE_EFL(X86_EFL_CF, IEMLIVENESSBIT_IDX_EFL_CF);
5911 ASSERT_ONE_EFL(X86_EFL_PF, IEMLIVENESSBIT_IDX_EFL_PF);
5912 ASSERT_ONE_EFL(X86_EFL_AF, IEMLIVENESSBIT_IDX_EFL_AF);
5913 ASSERT_ONE_EFL(X86_EFL_ZF, IEMLIVENESSBIT_IDX_EFL_ZF);
5914 ASSERT_ONE_EFL(X86_EFL_SF, IEMLIVENESSBIT_IDX_EFL_SF);
5915 ASSERT_ONE_EFL(X86_EFL_OF, IEMLIVENESSBIT_IDX_EFL_OF);
5916# undef ASSERT_ONE_EFL
5917 }
5918# endif
5919#endif
5920
5921 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, a_fEflInput);
5922 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, a_fEflInput);
5923
5924 /** @todo This could be prettier...*/
5925 /** @todo Also, the shadowing+liveness handling of EFlags is currently
5926 * problematic, but I'll try tackle that soon (@bugref{10720}). */
5927 PCIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarEFlags)];
5928 Assert(pVar->enmKind == kIemNativeVarKind_Invalid || pVar->enmKind == kIemNativeVarKind_Stack);
5929 Assert(pVar->idxReg == UINT8_MAX);
5930 if (pVar->uArgNo >= IEMNATIVE_CALL_ARG_GREG_COUNT)
5931 {
5932 /** @todo We could use kIemNativeGstRegUse_ReadOnly here when fOutput is
5933 * zero, but since iemNativeVarRegisterSet clears the shadowing,
5934 * that's counter productive... */
5935 uint8_t const idxGstReg = iemNativeRegAllocTmpForGuestEFlagsForUpdate(pReNative, &off,
5936 a_fLivenessEflInput, a_fLivenessEflOutput);
5937 iemNativeVarRegisterSet(pReNative, idxVarEFlags, idxGstReg, off, true /*fAllocated*/);
5938 }
5939 else
5940 {
5941 /* Register argument variable: Avoid assertions in generic call code and load it the traditional way. */
5942 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off);
5943 uint8_t const idxGstReg = iemNativeRegAllocTmpForGuestEFlagsIfAlreadyPresent(pReNative, &off,
5944 a_fLivenessEflInput, a_fLivenessEflOutput);
5945 if (idxGstReg != UINT8_MAX)
5946 {
5947 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstReg);
5948 iemNativeRegFreeTmp(pReNative, idxGstReg);
5949 }
5950 else
5951 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxVarReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
5952 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
5953 }
5954 return off;
5955}
5956
5957
5958
5959/** @todo emit strict build assertions for IEM_MC_COMMIT_EFLAGS_EX when we
5960 * start using it with custom native code emission (inlining assembly
5961 * instruction helpers). */
5962#undef IEM_MC_COMMIT_EFLAGS /* should not be used */
5963#define IEM_MC_COMMIT_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5964 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
5965 off = iemNativeEmitCommitEFlags<true /*a_fUpdateSkippingAndPostponing*/, a_fEflOutput, \
5966 iemNativeEflagsToLivenessMask<a_fEflInput>(), \
5967 iemNativeEflagsToLivenessMask<a_fEflOutput>()>(pReNative, off, a_EFlags, a_fEflInput)
5968
5969#undef IEM_MC_COMMIT_EFLAGS_OPT /* should not be used */
5970#define IEM_MC_COMMIT_EFLAGS_OPT_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
5971 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
5972 off = iemNativeEmitCommitEFlags<false /*a_fUpdateSkippingAndPostponing*/, a_fEflOutput, \
5973 iemNativeEflagsToLivenessMask<a_fEflInput>(), \
5974 iemNativeEflagsToLivenessMask<a_fEflOutput>()>(pReNative, off, a_EFlags, a_fEflInput)
5975
5976/** Handles IEM_MC_COMMIT_EFLAGS_EX. */
5977template<bool const a_fUpdateSkippingAndPostponing, uint32_t const a_fEflOutput,
5978 uint64_t const a_fLivenessEflInputBits, uint64_t const a_fLivenessEflOutputBits>
5979DECL_INLINE_THROW(uint32_t)
5980iemNativeEmitCommitEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags, uint32_t fElfInput)
5981{
5982 uint8_t const idxReg = iemNativeVarRegisterAcquireInited(pReNative, idxVarEFlags, &off);
5983 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
5984
5985#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
5986# ifdef VBOX_STRICT
5987 if ( pReNative->idxCurCall != 0
5988 && (a_fLivenessEflInputBits != 0 || a_fLivenessEflOutputBits != 0) /* for NOT these are both zero for now. */)
5989 {
5990 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall - 1];
5991# define ASSERT_ONE_EFL(a_idxField) \
5992 if RT_CONSTEXPR_IF(((a_fLivenessEflInputBits | a_fLivenessEflOutputBits) & RT_BIT_64(a_idxField)) != 0) \
5993 AssertMsg(!(a_fLivenessEflInputBits & RT_BIT_64(a_idxField)) \
5994 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
5995 : !(a_fLivenessEflOutputBits & RT_BIT_64(a_idxField)) \
5996 ? IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
5997 : IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)), \
5998 ("%s - %u\n", #a_idxField, iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)))
5999 ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_OTHER);
6000 ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_CF);
6001 ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_PF);
6002 ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_AF);
6003 ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_ZF);
6004 ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_SF);
6005 ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_OF);
6006# undef ASSERT_ONE_EFL
6007 }
6008# endif
6009#endif
6010
6011#ifdef VBOX_STRICT
6012 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RA1_MASK);
6013 uint32_t offFixup = off;
6014 off = iemNativeEmitJnzToFixed(pReNative, off, off);
6015 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2001));
6016 iemNativeFixupFixedJump(pReNative, offFixup, off);
6017
6018 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RAZ_MASK & CPUMX86EFLAGS_HW_MASK_32);
6019 offFixup = off;
6020 off = iemNativeEmitJzToFixed(pReNative, off, off);
6021 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2002));
6022 iemNativeFixupFixedJump(pReNative, offFixup, off);
6023
6024 /** @todo validate that only bits in the a_fEflOutput mask changed. */
6025#endif
6026
6027#ifdef IEMNATIVE_WITH_EFLAGS_SKIPPING
6028 if RT_CONSTEXPR_IF(a_fUpdateSkippingAndPostponing)
6029 {
6030 Assert(!(pReNative->fSkippingEFlags & fElfInput)); RT_NOREF(fElfInput);
6031 if (pReNative->fSkippingEFlags)
6032 Log5(("EFLAGS: fSkippingEFlags %#x -> %#x (iemNativeEmitCommitEFlags)\n",
6033 pReNative->fSkippingEFlags, pReNative->fSkippingEFlags & ~(a_fEflOutput & X86_EFL_STATUS_BITS) ));
6034 if RT_CONSTEXPR_IF((a_fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
6035 pReNative->fSkippingEFlags = 0;
6036 else
6037 pReNative->fSkippingEFlags &= ~(a_fEflOutput & X86_EFL_STATUS_BITS);
6038# ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
6039 if RT_CONSTEXPR_IF((a_fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
6040 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
6041 else
6042 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~(a_fEflOutput & X86_EFL_STATUS_BITS),
6043 RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
6044# endif
6045 IEMNATIVE_CLEAR_POSTPONED_EFLAGS(pReNative, a_fEflOutput);
6046 }
6047#endif
6048
6049 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
6050 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF_DYN(VMCPUCC, cpum.GstCtx.eflags));
6051 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
6052 return off;
6053}
6054
6055
6056typedef enum IEMNATIVEMITEFLOP
6057{
6058 kIemNativeEmitEflOp_Set,
6059 kIemNativeEmitEflOp_Clear,
6060 kIemNativeEmitEflOp_Flip
6061} IEMNATIVEMITEFLOP;
6062
6063#define IEM_MC_SET_EFL_BIT(a_fBit) \
6064 off = iemNativeEmitModifyEFlagsBit<kIemNativeEmitEflOp_Set, a_fBit, iemNativeEflagsToLivenessMask<a_fBit>()>(pReNative, off)
6065
6066#define IEM_MC_CLEAR_EFL_BIT(a_fBit) \
6067 off = iemNativeEmitModifyEFlagsBit<kIemNativeEmitEflOp_Clear, a_fBit, iemNativeEflagsToLivenessMask<a_fBit>()>(pReNative, off)
6068
6069#define IEM_MC_FLIP_EFL_BIT(a_fBit) \
6070 off = iemNativeEmitModifyEFlagsBit<kIemNativeEmitEflOp_Flip, a_fBit, iemNativeEflagsToLivenessMask<a_fBit>()>(pReNative, off)
6071
6072/** Handles IEM_MC_SET_EFL_BIT/IEM_MC_CLEAR_EFL_BIT/IEM_MC_FLIP_EFL_BIT. */
6073template<IEMNATIVEMITEFLOP const a_enmOp, uint32_t const a_fEflBit, uint64_t const a_fLivenessEflBit>
6074DECL_INLINE_THROW(uint32_t) iemNativeEmitModifyEFlagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6075{
6076 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestEFlagsForUpdate(pReNative, &off,
6077 a_enmOp == kIemNativeEmitEflOp_Flip
6078 ? a_fLivenessEflBit : 0,
6079 a_fLivenessEflBit);
6080
6081 /* Using 'if constexpr' forces code elimination in debug builds with VC. */
6082 if RT_CONSTEXPR_IF(a_enmOp == kIemNativeEmitEflOp_Set)
6083 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxEflReg, a_fEflBit);
6084 else if RT_CONSTEXPR_IF(a_enmOp == kIemNativeEmitEflOp_Clear)
6085 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~a_fEflBit);
6086 else if RT_CONSTEXPR_IF(a_enmOp == kIemNativeEmitEflOp_Flip)
6087 off = iemNativeEmitXorGpr32ByImm(pReNative, off, idxEflReg, a_fEflBit);
6088 else
6089 AssertCompile( a_enmOp == kIemNativeEmitEflOp_Set /* AssertCompile(false) works with VC 2019 but not clang 15. */
6090 || a_enmOp == kIemNativeEmitEflOp_Clear
6091 || a_enmOp == kIemNativeEmitEflOp_Flip);
6092
6093 /** @todo No delayed writeback for EFLAGS right now. */
6094 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
6095
6096 /* Free but don't flush the EFLAGS register. */
6097 iemNativeRegFreeTmp(pReNative, idxEflReg);
6098
6099#ifdef IEMNATIVE_WITH_EFLAGS_SKIPPING
6100 /* Clear the bit in the skipped mask if we're clobbering and it's a status bit. */
6101 if RT_CONSTEXPR_IF( (a_enmOp == kIemNativeEmitEflOp_Set || a_enmOp == kIemNativeEmitEflOp_Clear)
6102 && (a_fEflBit & X86_EFL_STATUS_BITS))
6103 {
6104 if (pReNative->fSkippingEFlags)
6105 Log5(("EFLAGS: fSkippingEFlags %#x -> %#x (iemNativeEmitModifyEFlagsBit)\n",
6106 pReNative->fSkippingEFlags, pReNative->fSkippingEFlags & ~a_fEflBit ));
6107 pReNative->fSkippingEFlags &= ~a_fEflBit;
6108# ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
6109 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~a_fEflBit, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
6110# endif
6111 }
6112#endif
6113
6114 return off;
6115}
6116
6117
6118/*********************************************************************************************************************************
6119* Emitters for segment register fetches (IEM_MC_FETCH_SREG_XXX).
6120*********************************************************************************************************************************/
6121
6122#define IEM_MC_FETCH_SREG_U16(a_u16Dst, a_iSReg) \
6123 off = iemNativeEmitFetchSReg(pReNative, off, a_u16Dst, a_iSReg, sizeof(uint16_t))
6124
6125#define IEM_MC_FETCH_SREG_ZX_U32(a_u32Dst, a_iSReg) \
6126 off = iemNativeEmitFetchSReg(pReNative, off, a_u32Dst, a_iSReg, sizeof(uint32_t))
6127
6128#define IEM_MC_FETCH_SREG_ZX_U64(a_u64Dst, a_iSReg) \
6129 off = iemNativeEmitFetchSReg(pReNative, off, a_u64Dst, a_iSReg, sizeof(uint64_t))
6130
6131
6132/** Emits code for IEM_MC_FETCH_SREG_U16, IEM_MC_FETCH_SREG_ZX_U32 and
6133 * IEM_MC_FETCH_SREG_ZX_U64. */
6134DECL_INLINE_THROW(uint32_t)
6135iemNativeEmitFetchSReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iSReg, int8_t cbVar)
6136{
6137 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
6138 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbVar); RT_NOREF(cbVar);
6139 Assert(iSReg < X86_SREG_COUNT);
6140
6141 /*
6142 * For now, we will not create a shadow copy of a selector. The rational
6143 * is that since we do not recompile the popping and loading of segment
6144 * registers and that the the IEM_MC_FETCH_SREG_U* MCs are only used for
6145 * pushing and moving to registers, there is only a small chance that the
6146 * shadow copy will be accessed again before the register is reloaded. One
6147 * scenario would be nested called in 16-bit code, but I doubt it's worth
6148 * the extra register pressure atm.
6149 *
6150 * What we really need first, though, is to combine iemNativeRegAllocTmpForGuestReg
6151 * and iemNativeVarRegisterAcquire for a load scenario. We only got the
6152 * store scencario covered at present (r160730).
6153 */
6154 iemNativeVarSetKindToStack(pReNative, idxDstVar);
6155 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
6156 off = iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aSRegs[iSReg].Sel));
6157 iemNativeVarRegisterRelease(pReNative, idxDstVar);
6158 return off;
6159}
6160
6161
6162
6163/*********************************************************************************************************************************
6164* Register references. *
6165*********************************************************************************************************************************/
6166
6167#define IEM_MC_REF_GREG_U8_THREADED(a_pu8Dst, a_iGRegEx) \
6168 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, false /*fConst*/)
6169
6170#define IEM_MC_REF_GREG_U8_CONST_THREADED(a_pu8Dst, a_iGRegEx) \
6171 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, true /*fConst*/)
6172
6173/** Handles IEM_MC_REF_GREG_U8[_CONST]. */
6174DECL_INLINE_THROW(uint32_t)
6175iemNativeEmitRefGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGRegEx, bool fConst)
6176{
6177 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
6178 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
6179 Assert(iGRegEx < 20);
6180
6181 if (iGRegEx < 16)
6182 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
6183 else
6184 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_GprHighByte, iGRegEx & 15);
6185
6186 /* If we've delayed writing back the register value, flush it now. */
6187 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
6188
6189 /* If it's not a const reference we need to flush the shadow copy of the register now. */
6190 if (!fConst)
6191 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGRegEx & 15)));
6192
6193 return off;
6194}
6195
6196#define IEM_MC_REF_GREG_U16(a_pu16Dst, a_iGReg) \
6197 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, false /*fConst*/)
6198
6199#define IEM_MC_REF_GREG_U16_CONST(a_pu16Dst, a_iGReg) \
6200 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, true /*fConst*/)
6201
6202#define IEM_MC_REF_GREG_U32(a_pu32Dst, a_iGReg) \
6203 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, false /*fConst*/)
6204
6205#define IEM_MC_REF_GREG_U32_CONST(a_pu32Dst, a_iGReg) \
6206 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, true /*fConst*/)
6207
6208#define IEM_MC_REF_GREG_I32(a_pi32Dst, a_iGReg) \
6209 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, false /*fConst*/)
6210
6211#define IEM_MC_REF_GREG_I32_CONST(a_pi32Dst, a_iGReg) \
6212 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, true /*fConst*/)
6213
6214#define IEM_MC_REF_GREG_U64(a_pu64Dst, a_iGReg) \
6215 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, false /*fConst*/)
6216
6217#define IEM_MC_REF_GREG_U64_CONST(a_pu64Dst, a_iGReg) \
6218 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, true /*fConst*/)
6219
6220#define IEM_MC_REF_GREG_I64(a_pi64Dst, a_iGReg) \
6221 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, false /*fConst*/)
6222
6223#define IEM_MC_REF_GREG_I64_CONST(a_pi64Dst, a_iGReg) \
6224 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, true /*fConst*/)
6225
6226/** Handles IEM_MC_REF_GREG_Uxx[_CONST] and IEM_MC_REF_GREG_Ixx[_CONST]. */
6227DECL_INLINE_THROW(uint32_t)
6228iemNativeEmitRefGregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGReg, bool fConst)
6229{
6230 Assert(iGReg < 16);
6231 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGReg);
6232 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
6233
6234 /* If we've delayed writing back the register value, flush it now. */
6235 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGReg);
6236
6237 /* If it's not a const reference we need to flush the shadow copy of the register now. */
6238 if (!fConst)
6239 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGReg)));
6240
6241 return off;
6242}
6243
6244
6245#undef IEM_MC_REF_EFLAGS /* should not be used. */
6246#define IEM_MC_REF_EFLAGS_EX(a_pEFlags, a_fEflInput, a_fEflOutput) \
6247 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
6248 off = iemNativeEmitRefEFlags<a_fEflOutput>(pReNative, off, a_pEFlags, a_fEflInput)
6249
6250/** Handles IEM_MC_REF_EFLAGS. */
6251template<uint32_t const a_fEflOutput>
6252DECL_INLINE_THROW(uint32_t)
6253iemNativeEmitRefEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint32_t fEflInput)
6254{
6255 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_EFlags, 0);
6256 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
6257
6258#ifdef IEMNATIVE_WITH_EFLAGS_SKIPPING
6259 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, fEflInput);
6260 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fEflInput);
6261 if (pReNative->fSkippingEFlags)
6262 Log5(("EFLAGS: fSkippingEFlags %#x -> %#x (iemNativeEmitRefEFlags)\n",
6263 pReNative->fSkippingEFlags, pReNative->fSkippingEFlags & ~a_fEflOutput ));
6264 pReNative->fSkippingEFlags &= ~a_fEflOutput;
6265# ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
6266
6267 /* Updating the skipping according to the outputs is a little early, but
6268 we don't have any other hooks for references atm. */
6269 if RT_CONSTEXPR_IF((a_fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
6270 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
6271 else if RT_CONSTEXPR_IF((a_fEflOutput & X86_EFL_STATUS_BITS) != 0)
6272 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~(a_fEflOutput & X86_EFL_STATUS_BITS),
6273 RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
6274# endif
6275
6276 /* This ASSUMES that EFLAGS references are not taken before use. */
6277 IEMNATIVE_CLEAR_POSTPONED_EFLAGS(pReNative, a_fEflOutput);
6278
6279#endif
6280 RT_NOREF(fEflInput);
6281
6282 /* If we've delayed writing back the register value, flush it now. */
6283 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_EFlags, 0);
6284
6285 /* If there is a shadow copy of guest EFLAGS, flush it now. */
6286 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_EFlags));
6287
6288 return off;
6289}
6290
6291
6292/** @todo Emit code for IEM_MC_ASSERT_EFLAGS in strict builds? Once we emit
6293 * different code from threaded recompiler, maybe it would be helpful. For now
6294 * we assume the threaded recompiler catches any incorrect EFLAGS delcarations. */
6295#define IEM_MC_ASSERT_EFLAGS(a_fEflInput, a_fEflOutput) ((void)0)
6296
6297
6298#define IEM_MC_REF_XREG_U128(a_pu128Dst, a_iXReg) \
6299 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, false /*fConst*/)
6300
6301#define IEM_MC_REF_XREG_XMM(a_puXmmDst, a_iXReg) \
6302 off = iemNativeEmitRefXregXxx(pReNative, off, a_puXmmDst, a_iXReg, false /*fConst*/)
6303
6304#define IEM_MC_REF_XREG_U128_CONST(a_pu128Dst, a_iXReg) \
6305 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, true /*fConst*/)
6306
6307#define IEM_MC_REF_XREG_XMM_CONST(a_pXmmDst, a_iXReg) \
6308 off = iemNativeEmitRefXregXxx(pReNative, off, a_pXmmDst, a_iXReg, true /*fConst*/)
6309
6310#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6311/* Just being paranoid here. */
6312# ifndef _MSC_VER /* MSC can't compile this, doesn't like [0]. Added reduced version afterwards. */
6313AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].au64[0]);
6314AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].au32[0]);
6315AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].ar64[0]);
6316AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].ar32[0]);
6317# endif
6318AssertCompileMemberOffset(X86XMMREG, au64, 0);
6319AssertCompileMemberOffset(X86XMMREG, au32, 0);
6320AssertCompileMemberOffset(X86XMMREG, ar64, 0);
6321AssertCompileMemberOffset(X86XMMREG, ar32, 0);
6322
6323# define IEM_MC_REF_XREG_U32_CONST(a_pu32Dst, a_iXReg) \
6324 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu32Dst, a_iXReg, true /*fConst*/)
6325# define IEM_MC_REF_XREG_U64_CONST(a_pu64Dst, a_iXReg) \
6326 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu64Dst, a_iXReg, true /*fConst*/)
6327# define IEM_MC_REF_XREG_R32_CONST(a_pr32Dst, a_iXReg) \
6328 off = iemNativeEmitRefXregXxx(pReNative, off, a_pr32Dst, a_iXReg, true /*fConst*/)
6329# define IEM_MC_REF_XREG_R64_CONST(a_pr64Dst, a_iXReg) \
6330 off = iemNativeEmitRefXregXxx(pReNative, off, a_pr64Dst, a_iXReg, true /*fConst*/)
6331#endif
6332
6333/** Handles IEM_MC_REF_XREG_xxx[_CONST]. */
6334DECL_INLINE_THROW(uint32_t)
6335iemNativeEmitRefXregXxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iXReg, bool fConst)
6336{
6337 Assert(iXReg < 16);
6338 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_XReg, iXReg);
6339 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
6340
6341 /* If we've delayed writing back the register value, flush it now. */
6342 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_XReg, iXReg);
6343
6344#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6345 /* If it's not a const reference we need to flush the shadow copy of the register now. */
6346 if (!fConst)
6347 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(iXReg)));
6348#else
6349 RT_NOREF(fConst);
6350#endif
6351
6352 return off;
6353}
6354
6355
6356
6357/*********************************************************************************************************************************
6358* Effective Address Calculation *
6359*********************************************************************************************************************************/
6360#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_16(a_GCPtrEff, a_bRm, a_u16Disp) \
6361 off = iemNativeEmitCalcRmEffAddrThreadedAddr16(pReNative, off, a_bRm, a_u16Disp, a_GCPtrEff)
6362
6363/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_16.
6364 * @sa iemOpHlpCalcRmEffAddrThreadedAddr16 */
6365DECL_INLINE_THROW(uint32_t)
6366iemNativeEmitCalcRmEffAddrThreadedAddr16(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6367 uint8_t bRm, uint16_t u16Disp, uint8_t idxVarRet)
6368{
6369 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
6370
6371 /*
6372 * Handle the disp16 form with no registers first.
6373 *
6374 * Convert to an immediate value, as that'll delay the register allocation
6375 * and assignment till the memory access / call / whatever and we can use
6376 * a more appropriate register (or none at all).
6377 */
6378 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
6379 {
6380 iemNativeVarSetKindToConst(pReNative, idxVarRet, u16Disp);
6381 return off;
6382 }
6383
6384 /* Determin the displacment. */
6385 uint16_t u16EffAddr;
6386 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
6387 {
6388 case 0: u16EffAddr = 0; break;
6389 case 1: u16EffAddr = (int16_t)(int8_t)u16Disp; break;
6390 case 2: u16EffAddr = u16Disp; break;
6391 default: AssertFailedStmt(u16EffAddr = 0);
6392 }
6393
6394 /* Determine the registers involved. */
6395 uint8_t idxGstRegBase;
6396 uint8_t idxGstRegIndex;
6397 switch (bRm & X86_MODRM_RM_MASK)
6398 {
6399 case 0:
6400 idxGstRegBase = X86_GREG_xBX;
6401 idxGstRegIndex = X86_GREG_xSI;
6402 break;
6403 case 1:
6404 idxGstRegBase = X86_GREG_xBX;
6405 idxGstRegIndex = X86_GREG_xDI;
6406 break;
6407 case 2:
6408 idxGstRegBase = X86_GREG_xBP;
6409 idxGstRegIndex = X86_GREG_xSI;
6410 break;
6411 case 3:
6412 idxGstRegBase = X86_GREG_xBP;
6413 idxGstRegIndex = X86_GREG_xDI;
6414 break;
6415 case 4:
6416 idxGstRegBase = X86_GREG_xSI;
6417 idxGstRegIndex = UINT8_MAX;
6418 break;
6419 case 5:
6420 idxGstRegBase = X86_GREG_xDI;
6421 idxGstRegIndex = UINT8_MAX;
6422 break;
6423 case 6:
6424 idxGstRegBase = X86_GREG_xBP;
6425 idxGstRegIndex = UINT8_MAX;
6426 break;
6427#ifdef _MSC_VER /* lazy compiler, thinks idxGstRegBase and idxGstRegIndex may otherwise be used uninitialized. */
6428 default:
6429#endif
6430 case 7:
6431 idxGstRegBase = X86_GREG_xBX;
6432 idxGstRegIndex = UINT8_MAX;
6433 break;
6434 }
6435
6436 /*
6437 * Now emit code that calculates: idxRegRet = (uint16_t)(u16EffAddr + idxGstRegBase [+ idxGstRegIndex])
6438 */
6439 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
6440 uint8_t const idxRegBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
6441 kIemNativeGstRegUse_ReadOnly);
6442 uint8_t const idxRegIndex = idxGstRegIndex != UINT8_MAX
6443 ? iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
6444 kIemNativeGstRegUse_ReadOnly)
6445 : UINT8_MAX;
6446#ifdef RT_ARCH_AMD64
6447 if (idxRegIndex == UINT8_MAX)
6448 {
6449 if (u16EffAddr == 0)
6450 {
6451 /* movxz ret, base */
6452 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegRet, idxRegBase);
6453 }
6454 else
6455 {
6456 /* lea ret32, [base64 + disp32] */
6457 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
6458 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6459 if (idxRegRet >= 8 || idxRegBase >= 8)
6460 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
6461 pbCodeBuf[off++] = 0x8d;
6462 if (idxRegBase != X86_GREG_x12 /*SIB*/)
6463 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, idxRegBase & 7);
6464 else
6465 {
6466 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, 4 /*SIB*/);
6467 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
6468 }
6469 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
6470 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
6471 pbCodeBuf[off++] = 0;
6472 pbCodeBuf[off++] = 0;
6473 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6474
6475 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
6476 }
6477 }
6478 else
6479 {
6480 /* lea ret32, [index64 + base64 (+ disp32)] */
6481 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
6482 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6483 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
6484 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6485 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
6486 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
6487 pbCodeBuf[off++] = 0x8d;
6488 uint8_t const bMod = u16EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0 : X86_MOD_MEM4;
6489 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6490 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, 0);
6491 if (bMod == X86_MOD_MEM4)
6492 {
6493 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
6494 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
6495 pbCodeBuf[off++] = 0;
6496 pbCodeBuf[off++] = 0;
6497 }
6498 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6499 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
6500 }
6501
6502#elif defined(RT_ARCH_ARM64)
6503 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6504 if (u16EffAddr == 0)
6505 {
6506 if (idxRegIndex == UINT8_MAX)
6507 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegBase);
6508 else
6509 {
6510 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex, false /*f64Bit*/);
6511 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
6512 }
6513 }
6514 else
6515 {
6516 if ((int16_t)u16EffAddr < 4096 && (int16_t)u16EffAddr >= 0)
6517 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u16EffAddr, false /*f64Bit*/);
6518 else if ((int16_t)u16EffAddr > -4096 && (int16_t)u16EffAddr < 0)
6519 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
6520 (uint16_t)-(int16_t)u16EffAddr, false /*f64Bit*/);
6521 else
6522 {
6523 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegRet, u16EffAddr);
6524 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
6525 }
6526 if (idxRegIndex != UINT8_MAX)
6527 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex, false /*f64Bit*/);
6528 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
6529 }
6530
6531#else
6532# error "port me"
6533#endif
6534
6535 if (idxRegIndex != UINT8_MAX)
6536 iemNativeRegFreeTmp(pReNative, idxRegIndex);
6537 iemNativeRegFreeTmp(pReNative, idxRegBase);
6538 iemNativeVarRegisterRelease(pReNative, idxVarRet);
6539 return off;
6540}
6541
6542
6543#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_32(a_GCPtrEff, a_bRm, a_uSibAndRspOffset, a_u32Disp) \
6544 off = iemNativeEmitCalcRmEffAddrThreadedAddr32(pReNative, off, a_bRm, a_uSibAndRspOffset, a_u32Disp, a_GCPtrEff)
6545
6546/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_32.
6547 * @see iemOpHlpCalcRmEffAddrThreadedAddr32 */
6548DECL_INLINE_THROW(uint32_t)
6549iemNativeEmitCalcRmEffAddrThreadedAddr32(PIEMRECOMPILERSTATE pReNative, uint32_t off,
6550 uint8_t bRm, uint32_t uSibAndRspOffset, uint32_t u32Disp, uint8_t idxVarRet)
6551{
6552 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
6553
6554 /*
6555 * Handle the disp32 form with no registers first.
6556 *
6557 * Convert to an immediate value, as that'll delay the register allocation
6558 * and assignment till the memory access / call / whatever and we can use
6559 * a more appropriate register (or none at all).
6560 */
6561 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
6562 {
6563 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32Disp);
6564 return off;
6565 }
6566
6567 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
6568 uint32_t u32EffAddr = 0;
6569 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
6570 {
6571 case 0: break;
6572 case 1: u32EffAddr = (int8_t)u32Disp; break;
6573 case 2: u32EffAddr = u32Disp; break;
6574 default: AssertFailed();
6575 }
6576
6577 /* Get the register (or SIB) value. */
6578 uint8_t idxGstRegBase = UINT8_MAX;
6579 uint8_t idxGstRegIndex = UINT8_MAX;
6580 uint8_t cShiftIndex = 0;
6581 switch (bRm & X86_MODRM_RM_MASK)
6582 {
6583 case 0: idxGstRegBase = X86_GREG_xAX; break;
6584 case 1: idxGstRegBase = X86_GREG_xCX; break;
6585 case 2: idxGstRegBase = X86_GREG_xDX; break;
6586 case 3: idxGstRegBase = X86_GREG_xBX; break;
6587 case 4: /* SIB */
6588 {
6589 /* index /w scaling . */
6590 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
6591 switch ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
6592 {
6593 case 0: idxGstRegIndex = X86_GREG_xAX; break;
6594 case 1: idxGstRegIndex = X86_GREG_xCX; break;
6595 case 2: idxGstRegIndex = X86_GREG_xDX; break;
6596 case 3: idxGstRegIndex = X86_GREG_xBX; break;
6597 case 4: cShiftIndex = 0; /*no index*/ break;
6598 case 5: idxGstRegIndex = X86_GREG_xBP; break;
6599 case 6: idxGstRegIndex = X86_GREG_xSI; break;
6600 case 7: idxGstRegIndex = X86_GREG_xDI; break;
6601 }
6602
6603 /* base */
6604 switch (uSibAndRspOffset & X86_SIB_BASE_MASK)
6605 {
6606 case 0: idxGstRegBase = X86_GREG_xAX; break;
6607 case 1: idxGstRegBase = X86_GREG_xCX; break;
6608 case 2: idxGstRegBase = X86_GREG_xDX; break;
6609 case 3: idxGstRegBase = X86_GREG_xBX; break;
6610 case 4:
6611 idxGstRegBase = X86_GREG_xSP;
6612 u32EffAddr += uSibAndRspOffset >> 8;
6613 break;
6614 case 5:
6615 if ((bRm & X86_MODRM_MOD_MASK) != 0)
6616 idxGstRegBase = X86_GREG_xBP;
6617 else
6618 {
6619 Assert(u32EffAddr == 0);
6620 u32EffAddr = u32Disp;
6621 }
6622 break;
6623 case 6: idxGstRegBase = X86_GREG_xSI; break;
6624 case 7: idxGstRegBase = X86_GREG_xDI; break;
6625 }
6626 break;
6627 }
6628 case 5: idxGstRegBase = X86_GREG_xBP; break;
6629 case 6: idxGstRegBase = X86_GREG_xSI; break;
6630 case 7: idxGstRegBase = X86_GREG_xDI; break;
6631 }
6632
6633 /*
6634 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
6635 * the start of the function.
6636 */
6637 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
6638 {
6639 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32EffAddr);
6640 return off;
6641 }
6642
6643 /*
6644 * Now emit code that calculates: idxRegRet = (uint32_t)(u32EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
6645 */
6646 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
6647 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
6648 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
6649 kIemNativeGstRegUse_ReadOnly);
6650 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
6651 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
6652 kIemNativeGstRegUse_ReadOnly);
6653
6654 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
6655 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
6656 {
6657 idxRegBase = idxRegIndex;
6658 idxRegIndex = UINT8_MAX;
6659 }
6660
6661#ifdef RT_ARCH_AMD64
6662 if (idxRegIndex == UINT8_MAX)
6663 {
6664 if (u32EffAddr == 0)
6665 {
6666 /* mov ret, base */
6667 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
6668 }
6669 else
6670 {
6671 /* lea ret32, [base64 + disp32] */
6672 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
6673 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6674 if (idxRegRet >= 8 || idxRegBase >= 8)
6675 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
6676 pbCodeBuf[off++] = 0x8d;
6677 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
6678 if (idxRegBase != X86_GREG_x12 /*SIB*/)
6679 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
6680 else
6681 {
6682 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6683 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
6684 }
6685 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6686 if (bMod == X86_MOD_MEM4)
6687 {
6688 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6689 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6690 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6691 }
6692 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6693 }
6694 }
6695 else
6696 {
6697 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
6698 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
6699 if (idxRegBase == UINT8_MAX)
6700 {
6701 /* lea ret32, [(index64 << cShiftIndex) + disp32] */
6702 if (idxRegRet >= 8 || idxRegIndex >= 8)
6703 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6704 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
6705 pbCodeBuf[off++] = 0x8d;
6706 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
6707 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
6708 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6709 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6710 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6711 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6712 }
6713 else
6714 {
6715 /* lea ret32, [(index64 << cShiftIndex) + base64 (+ disp32)] */
6716 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
6717 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
6718 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
6719 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
6720 pbCodeBuf[off++] = 0x8d;
6721 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
6722 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
6723 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
6724 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
6725 if (bMod != X86_MOD_MEM0)
6726 {
6727 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
6728 if (bMod == X86_MOD_MEM4)
6729 {
6730 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
6731 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
6732 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
6733 }
6734 }
6735 }
6736 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6737 }
6738
6739#elif defined(RT_ARCH_ARM64)
6740 if (u32EffAddr == 0)
6741 {
6742 if (idxRegIndex == UINT8_MAX)
6743 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
6744 else if (idxRegBase == UINT8_MAX)
6745 {
6746 if (cShiftIndex == 0)
6747 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegIndex);
6748 else
6749 {
6750 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6751 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, false /*f64Bit*/);
6752 }
6753 }
6754 else
6755 {
6756 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6757 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
6758 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
6759 }
6760 }
6761 else
6762 {
6763 if ((int32_t)u32EffAddr < 4096 && (int32_t)u32EffAddr >= 0 && idxRegBase != UINT8_MAX)
6764 {
6765 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6766 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u32EffAddr, false /*f64Bit*/);
6767 }
6768 else if ((int32_t)u32EffAddr > -4096 && (int32_t)u32EffAddr < 0 && idxRegBase != UINT8_MAX)
6769 {
6770 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6771 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
6772 (uint32_t)-(int32_t)u32EffAddr, false /*f64Bit*/);
6773 }
6774 else
6775 {
6776 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, u32EffAddr);
6777 if (idxRegBase != UINT8_MAX)
6778 {
6779 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6780 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
6781 }
6782 }
6783 if (idxRegIndex != UINT8_MAX)
6784 {
6785 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
6786 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
6787 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
6788 }
6789 }
6790
6791#else
6792# error "port me"
6793#endif
6794
6795 if (idxRegIndex != UINT8_MAX)
6796 iemNativeRegFreeTmp(pReNative, idxRegIndex);
6797 if (idxRegBase != UINT8_MAX)
6798 iemNativeRegFreeTmp(pReNative, idxRegBase);
6799 iemNativeVarRegisterRelease(pReNative, idxVarRet);
6800 return off;
6801}
6802
6803
6804#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
6805 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
6806 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
6807
6808#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_FSGS(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
6809 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
6810 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
6811
6812#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_ADDR32(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
6813 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
6814 a_u32Disp, a_cbImm, a_GCPtrEff, false /*f64Bit*/)
6815
6816/**
6817 * Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_64*.
6818 *
6819 * @returns New off.
6820 * @param pReNative .
6821 * @param off .
6822 * @param bRmEx The ModRM byte but with bit 3 set to REX.B and
6823 * bit 4 to REX.X. The two bits are part of the
6824 * REG sub-field, which isn't needed in this
6825 * function.
6826 * @param uSibAndRspOffset Two parts:
6827 * - The first 8 bits make up the SIB byte.
6828 * - The next 8 bits are the fixed RSP/ESP offset
6829 * in case of a pop [xSP].
6830 * @param u32Disp The displacement byte/word/dword, if any.
6831 * @param cbInstr The size of the fully decoded instruction. Used
6832 * for RIP relative addressing.
6833 * @param idxVarRet The result variable number.
6834 * @param f64Bit Whether to use a 64-bit or 32-bit address size
6835 * when calculating the address.
6836 *
6837 * @see iemOpHlpCalcRmEffAddrThreadedAddr64
6838 */
6839DECL_INLINE_THROW(uint32_t)
6840iemNativeEmitCalcRmEffAddrThreadedAddr64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bRmEx, uint32_t uSibAndRspOffset,
6841 uint32_t u32Disp, uint8_t cbInstr, uint8_t idxVarRet, bool f64Bit)
6842{
6843 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
6844
6845 /*
6846 * Special case the rip + disp32 form first.
6847 */
6848 if ((bRmEx & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
6849 {
6850 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
6851 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
6852 kIemNativeGstRegUse_ReadOnly);
6853 if (f64Bit)
6854 {
6855#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6856 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr + (int64_t)pReNative->Core.offPc;
6857#else
6858 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr;
6859#endif
6860#ifdef RT_ARCH_AMD64
6861 if ((int32_t)offFinalDisp == offFinalDisp)
6862 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)offFinalDisp);
6863 else
6864 {
6865 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp);
6866 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, cbInstr);
6867 }
6868#else
6869 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, offFinalDisp);
6870#endif
6871 }
6872 else
6873 {
6874# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
6875 int32_t const offFinalDisp = (int32_t)u32Disp + cbInstr + (int32_t)pReNative->Core.offPc;
6876# else
6877 int32_t const offFinalDisp = (int32_t)u32Disp + cbInstr;
6878# endif
6879 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, offFinalDisp);
6880 }
6881 iemNativeRegFreeTmp(pReNative, idxRegPc);
6882 iemNativeVarRegisterRelease(pReNative, idxVarRet);
6883 return off;
6884 }
6885
6886 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
6887 int64_t i64EffAddr = 0;
6888 switch ((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
6889 {
6890 case 0: break;
6891 case 1: i64EffAddr = (int8_t)u32Disp; break;
6892 case 2: i64EffAddr = (int32_t)u32Disp; break;
6893 default: AssertFailed();
6894 }
6895
6896 /* Get the register (or SIB) value. */
6897 uint8_t idxGstRegBase = UINT8_MAX;
6898 uint8_t idxGstRegIndex = UINT8_MAX;
6899 uint8_t cShiftIndex = 0;
6900 if ((bRmEx & X86_MODRM_RM_MASK) != 4)
6901 idxGstRegBase = bRmEx & (X86_MODRM_RM_MASK | 0x8); /* bRmEx[bit 3] = REX.B */
6902 else /* SIB: */
6903 {
6904 /* index /w scaling . */
6905 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
6906 idxGstRegIndex = ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
6907 | ((bRmEx & 0x10) >> 1); /* bRmEx[bit 4] = REX.X */
6908 if (idxGstRegIndex == 4)
6909 {
6910 /* no index */
6911 cShiftIndex = 0;
6912 idxGstRegIndex = UINT8_MAX;
6913 }
6914
6915 /* base */
6916 idxGstRegBase = (uSibAndRspOffset & X86_SIB_BASE_MASK) | (bRmEx & 0x8); /* bRmEx[bit 3] = REX.B */
6917 if (idxGstRegBase == 4)
6918 {
6919 /* pop [rsp] hack */
6920 i64EffAddr += uSibAndRspOffset >> 8; /* (this is why i64EffAddr must be 64-bit) */
6921 }
6922 else if ( (idxGstRegBase & X86_SIB_BASE_MASK) == 5
6923 && (bRmEx & X86_MODRM_MOD_MASK) == 0)
6924 {
6925 /* mod=0 and base=5 -> disp32, no base reg. */
6926 Assert(i64EffAddr == 0);
6927 i64EffAddr = (int32_t)u32Disp;
6928 idxGstRegBase = UINT8_MAX;
6929 }
6930 }
6931
6932 /*
6933 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
6934 * the start of the function.
6935 */
6936 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
6937 {
6938 if (f64Bit)
6939 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint64_t)i64EffAddr);
6940 else
6941 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint32_t)i64EffAddr);
6942 return off;
6943 }
6944
6945 /*
6946 * Now emit code that calculates:
6947 * idxRegRet = (uint64_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
6948 * or if !f64Bit:
6949 * idxRegRet = (uint32_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
6950 */
6951 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
6952 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
6953 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
6954 kIemNativeGstRegUse_ReadOnly);
6955 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
6956 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
6957 kIemNativeGstRegUse_ReadOnly);
6958
6959 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
6960 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
6961 {
6962 idxRegBase = idxRegIndex;
6963 idxRegIndex = UINT8_MAX;
6964 }
6965
6966#ifdef RT_ARCH_AMD64
6967 uint8_t bFinalAdj;
6968 if (!f64Bit || (int32_t)i64EffAddr == i64EffAddr)
6969 bFinalAdj = 0; /* likely */
6970 else
6971 {
6972 /* pop [rsp] with a problematic disp32 value. Split out the
6973 RSP offset and add it separately afterwards (bFinalAdj). */
6974 /** @todo testcase: pop [rsp] with problematic disp32 (mod4). */
6975 Assert(idxGstRegBase == X86_GREG_xSP);
6976 Assert(((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK) == X86_MOD_MEM4);
6977 bFinalAdj = (uint8_t)(uSibAndRspOffset >> 8);
6978 Assert(bFinalAdj != 0);
6979 i64EffAddr -= bFinalAdj;
6980 Assert((int32_t)i64EffAddr == i64EffAddr);
6981 }
6982 uint32_t const u32EffAddr = (uint32_t)i64EffAddr;
6983//pReNative->pInstrBuf[off++] = 0xcc;
6984
6985 if (idxRegIndex == UINT8_MAX)
6986 {
6987 if (u32EffAddr == 0)
6988 {
6989 /* mov ret, base */
6990 if (f64Bit)
6991 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRet, idxRegBase);
6992 else
6993 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
6994 }
6995 else
6996 {
6997 /* lea ret, [base + disp32] */
6998 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
6999 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
7000 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8)
7001 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
7002 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
7003 | (f64Bit ? X86_OP_REX_W : 0);
7004 pbCodeBuf[off++] = 0x8d;
7005 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
7006 if (idxRegBase != X86_GREG_x12 /*SIB*/)
7007 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
7008 else
7009 {
7010 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
7011 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
7012 }
7013 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
7014 if (bMod == X86_MOD_MEM4)
7015 {
7016 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
7017 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
7018 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
7019 }
7020 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7021 }
7022 }
7023 else
7024 {
7025 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
7026 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
7027 if (idxRegBase == UINT8_MAX)
7028 {
7029 /* lea ret, [(index64 << cShiftIndex) + disp32] */
7030 if (f64Bit || idxRegRet >= 8 || idxRegIndex >= 8)
7031 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
7032 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
7033 | (f64Bit ? X86_OP_REX_W : 0);
7034 pbCodeBuf[off++] = 0x8d;
7035 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
7036 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
7037 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
7038 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
7039 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
7040 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
7041 }
7042 else
7043 {
7044 /* lea ret, [(index64 << cShiftIndex) + base64 (+ disp32)] */
7045 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
7046 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
7047 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
7048 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
7049 | (f64Bit ? X86_OP_REX_W : 0);
7050 pbCodeBuf[off++] = 0x8d;
7051 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
7052 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
7053 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
7054 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
7055 if (bMod != X86_MOD_MEM0)
7056 {
7057 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
7058 if (bMod == X86_MOD_MEM4)
7059 {
7060 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
7061 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
7062 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
7063 }
7064 }
7065 }
7066 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7067 }
7068
7069 if (!bFinalAdj)
7070 { /* likely */ }
7071 else
7072 {
7073 Assert(f64Bit);
7074 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, bFinalAdj);
7075 }
7076
7077#elif defined(RT_ARCH_ARM64)
7078 if (i64EffAddr == 0)
7079 {
7080 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7081 if (idxRegIndex == UINT8_MAX)
7082 pu32CodeBuf[off++] = Armv8A64MkInstrMov(idxRegRet, idxRegBase, f64Bit);
7083 else if (idxRegBase != UINT8_MAX)
7084 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
7085 f64Bit, false /*fSetFlags*/, cShiftIndex);
7086 else
7087 {
7088 Assert(cShiftIndex != 0); /* See base = index swap above when shift is 0 and we have no base reg. */
7089 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, f64Bit);
7090 }
7091 }
7092 else
7093 {
7094 if (f64Bit)
7095 { /* likely */ }
7096 else
7097 i64EffAddr = (int32_t)i64EffAddr;
7098
7099 if (i64EffAddr < 4096 && i64EffAddr >= 0 && idxRegBase != UINT8_MAX)
7100 {
7101 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7102 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, i64EffAddr, f64Bit);
7103 }
7104 else if (i64EffAddr > -4096 && i64EffAddr < 0 && idxRegBase != UINT8_MAX)
7105 {
7106 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7107 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase, (uint32_t)-i64EffAddr, f64Bit);
7108 }
7109 else
7110 {
7111 if (f64Bit)
7112 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, i64EffAddr);
7113 else
7114 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, (uint32_t)i64EffAddr);
7115 if (idxRegBase != UINT8_MAX)
7116 {
7117 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7118 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, f64Bit);
7119 }
7120 }
7121 if (idxRegIndex != UINT8_MAX)
7122 {
7123 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
7124 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
7125 f64Bit, false /*fSetFlags*/, cShiftIndex);
7126 }
7127 }
7128
7129#else
7130# error "port me"
7131#endif
7132
7133 if (idxRegIndex != UINT8_MAX)
7134 iemNativeRegFreeTmp(pReNative, idxRegIndex);
7135 if (idxRegBase != UINT8_MAX)
7136 iemNativeRegFreeTmp(pReNative, idxRegBase);
7137 iemNativeVarRegisterRelease(pReNative, idxVarRet);
7138 return off;
7139}
7140
7141
7142/*********************************************************************************************************************************
7143* Memory fetches and stores common *
7144*********************************************************************************************************************************/
7145
7146typedef enum IEMNATIVEMITMEMOP
7147{
7148 kIemNativeEmitMemOp_Store = 0,
7149 kIemNativeEmitMemOp_Fetch,
7150 kIemNativeEmitMemOp_Fetch_Zx_U16,
7151 kIemNativeEmitMemOp_Fetch_Zx_U32,
7152 kIemNativeEmitMemOp_Fetch_Zx_U64,
7153 kIemNativeEmitMemOp_Fetch_Sx_U16,
7154 kIemNativeEmitMemOp_Fetch_Sx_U32,
7155 kIemNativeEmitMemOp_Fetch_Sx_U64
7156} IEMNATIVEMITMEMOP;
7157
7158/** Emits code for IEM_MC_FETCH_MEM_U8/16/32/64 and IEM_MC_STORE_MEM_U8/16/32/64,
7159 * and IEM_MC_FETCH_MEM_FLAT_U8/16/32/64 and IEM_MC_STORE_MEM_FLAT_U8/16/32/64
7160 * (with iSegReg = UINT8_MAX). */
7161template<uint8_t const a_cbMem, uint32_t const a_fAlignMaskAndCtl, IEMNATIVEMITMEMOP const a_enmOp, bool a_fFlat = false>
7162DECL_INLINE_THROW(uint32_t)
7163iemNativeEmitMemFetchStoreDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue, uint8_t iSegReg,
7164 uint8_t idxVarGCPtrMem, uintptr_t pfnFunction, uint8_t idxInstr, uint8_t offDisp = 0)
7165{
7166 /*
7167 * Assert sanity.
7168 */
7169 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
7170 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
7171 Assert( a_enmOp != kIemNativeEmitMemOp_Store
7172 || pVarValue->enmKind == kIemNativeVarKind_Immediate
7173 || pVarValue->enmKind == kIemNativeVarKind_Stack);
7174 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
7175 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
7176 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
7177 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
7178 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7179 Assert(!a_fFlat ? iSegReg < 6 : iSegReg == UINT8_MAX);
7180#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7181 AssertCompile( a_cbMem == 1 || a_cbMem == 2 || a_cbMem == 4 || a_cbMem == 8
7182 || a_cbMem == sizeof(RTUINT128U) || a_cbMem == sizeof(RTUINT256U));
7183#else
7184 AssertCompile(a_cbMem == 1 || a_cbMem == 2 || a_cbMem == 4 || a_cbMem == 8);
7185#endif
7186 AssertCompile(!(a_fAlignMaskAndCtl & ~(UINT32_C(0xff) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE)));
7187 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
7188#ifdef VBOX_STRICT
7189 if (iSegReg == UINT8_MAX)
7190 {
7191 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
7192 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
7193 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
7194 switch (a_cbMem)
7195 {
7196 case 1:
7197 Assert( pfnFunction
7198 == ( a_enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU8
7199 : a_enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
7200 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
7201 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
7202 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
7203 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16
7204 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32
7205 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64
7206 : UINT64_C(0xc000b000a0009000) ));
7207 Assert(!a_fAlignMaskAndCtl);
7208 break;
7209 case 2:
7210 Assert( pfnFunction
7211 == ( a_enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU16
7212 : a_enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
7213 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
7214 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
7215 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32
7216 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64
7217 : UINT64_C(0xc000b000a0009000) ));
7218 Assert(a_fAlignMaskAndCtl <= 1);
7219 break;
7220 case 4:
7221 Assert( pfnFunction
7222 == ( a_enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU32
7223 : a_enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
7224 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
7225 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64
7226 : UINT64_C(0xc000b000a0009000) ));
7227 Assert(a_fAlignMaskAndCtl <= 3);
7228 break;
7229 case 8:
7230 Assert( pfnFunction
7231 == ( a_enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU64
7232 : a_enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU64
7233 : UINT64_C(0xc000b000a0009000) ));
7234 Assert(a_fAlignMaskAndCtl <= 7);
7235 break;
7236#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7237 case sizeof(RTUINT128U):
7238 Assert( ( a_enmOp == kIemNativeEmitMemOp_Fetch
7239 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128
7240 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse
7241 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc))
7242 || ( a_enmOp == kIemNativeEmitMemOp_Store
7243 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse
7244 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128NoAc)));
7245 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse
7246 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse
7247 ? (a_fAlignMaskAndCtl & (IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE)) && (a_fAlignMaskAndCtl & 0xff) == 15
7248 : a_fAlignMaskAndCtl <= 15U);
7249 break;
7250 case sizeof(RTUINT256U):
7251 Assert( ( a_enmOp == kIemNativeEmitMemOp_Fetch
7252 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc
7253 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx))
7254 || ( a_enmOp == kIemNativeEmitMemOp_Store
7255 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256NoAc
7256 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx)));
7257 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx
7258 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx
7259 ? (a_fAlignMaskAndCtl & IEM_MEMMAP_F_ALIGN_GP) && (a_fAlignMaskAndCtl & 0xff) == 31
7260 : a_fAlignMaskAndCtl <= 31);
7261 break;
7262#endif
7263 }
7264 }
7265 else
7266 {
7267 Assert(iSegReg < 6);
7268 switch (a_cbMem)
7269 {
7270 case 1:
7271 Assert( pfnFunction
7272 == ( a_enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU8
7273 : a_enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU8
7274 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8
7275 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8
7276 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8
7277 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16
7278 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32
7279 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64
7280 : UINT64_C(0xc000b000a0009000) ));
7281 Assert(!a_fAlignMaskAndCtl);
7282 break;
7283 case 2:
7284 Assert( pfnFunction
7285 == ( a_enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU16
7286 : a_enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU16
7287 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16
7288 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16
7289 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32
7290 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64
7291 : UINT64_C(0xc000b000a0009000) ));
7292 Assert(a_fAlignMaskAndCtl <= 1);
7293 break;
7294 case 4:
7295 Assert( pfnFunction
7296 == ( a_enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU32
7297 : a_enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU32
7298 : a_enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32
7299 : a_enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64
7300 : UINT64_C(0xc000b000a0009000) ));
7301 Assert(a_fAlignMaskAndCtl <= 3);
7302 break;
7303 case 8:
7304 Assert( pfnFunction
7305 == ( a_enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU64
7306 : a_enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU64
7307 : UINT64_C(0xc000b000a0009000) ));
7308 Assert(a_fAlignMaskAndCtl <= 7);
7309 break;
7310#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7311 case sizeof(RTUINT128U):
7312 Assert( ( a_enmOp == kIemNativeEmitMemOp_Fetch
7313 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128
7314 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse
7315 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128NoAc))
7316 || ( a_enmOp == kIemNativeEmitMemOp_Store
7317 && ( pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse
7318 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128NoAc)));
7319 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse
7320 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse
7321 ? (a_fAlignMaskAndCtl & (IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE)) && (a_fAlignMaskAndCtl & 0xff) == 15
7322 : a_fAlignMaskAndCtl <= 15);
7323 break;
7324 case sizeof(RTUINT256U):
7325 Assert( ( a_enmOp == kIemNativeEmitMemOp_Fetch
7326 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256NoAc
7327 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx))
7328 || ( a_enmOp == kIemNativeEmitMemOp_Store
7329 && ( pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256NoAc
7330 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx)));
7331 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx
7332 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx
7333 ? (a_fAlignMaskAndCtl & IEM_MEMMAP_F_ALIGN_GP) && (a_fAlignMaskAndCtl & 0xff) == 31
7334 : a_fAlignMaskAndCtl <= 31);
7335 break;
7336#endif
7337 }
7338 }
7339#endif
7340
7341#ifdef VBOX_STRICT
7342 /*
7343 * Check that the fExec flags we've got make sense.
7344 */
7345 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
7346#endif
7347
7348 /*
7349 * To keep things simple we have to commit any pending writes first as we
7350 * may end up making calls.
7351 */
7352 /** @todo we could postpone this till we make the call and reload the
7353 * registers after returning from the call. Not sure if that's sensible or
7354 * not, though. */
7355#ifndef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7356 off = iemNativeRegFlushPendingWrites(pReNative, off);
7357#else
7358 /* The program counter is treated differently for now. */
7359 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc));
7360#endif
7361
7362#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7363 /*
7364 * Move/spill/flush stuff out of call-volatile registers.
7365 * This is the easy way out. We could contain this to the tlb-miss branch
7366 * by saving and restoring active stuff here.
7367 */
7368 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
7369#endif
7370
7371 /*
7372 * Define labels and allocate the result register (trying for the return
7373 * register if we can).
7374 */
7375 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
7376#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7377 uint8_t idxRegValueFetch;
7378 if RT_CONSTEXPR_IF(a_cbMem == sizeof(RTUINT128U) || a_cbMem == sizeof(RTUINT256U))
7379 idxRegValueFetch = a_enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
7380 : iemNativeVarSimdRegisterAcquire(pReNative, idxVarValue, &off);
7381 else
7382 idxRegValueFetch = a_enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
7383 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
7384 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
7385 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
7386#else
7387 uint8_t const idxRegValueFetch = a_enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
7388 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
7389 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
7390 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
7391#endif
7392 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, a_cbMem, offDisp);
7393
7394#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7395 uint8_t idxRegValueStore = UINT8_MAX;
7396
7397 if RT_CONSTEXPR_IF(a_cbMem == sizeof(RTUINT128U) || a_cbMem == sizeof(RTUINT256U))
7398 idxRegValueStore = a_enmOp == kIemNativeEmitMemOp_Store
7399 && !TlbState.fSkip
7400 && pVarValue->enmKind != kIemNativeVarKind_Immediate
7401 ? iemNativeVarSimdRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
7402 : UINT8_MAX;
7403 else
7404 idxRegValueStore = a_enmOp == kIemNativeEmitMemOp_Store
7405 && !TlbState.fSkip
7406 && pVarValue->enmKind != kIemNativeVarKind_Immediate
7407 ? iemNativeVarRegisterAcquireInited(pReNative, idxVarValue, &off)
7408 : UINT8_MAX;
7409
7410#else
7411 uint8_t const idxRegValueStore = a_enmOp == kIemNativeEmitMemOp_Store
7412 && !TlbState.fSkip
7413 && pVarValue->enmKind != kIemNativeVarKind_Immediate
7414 ? iemNativeVarRegisterAcquireInited(pReNative, idxVarValue, &off)
7415 : UINT8_MAX;
7416#endif
7417 uint32_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
7418 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
7419 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
7420 : UINT32_MAX;
7421
7422 /*
7423 * Jump to the TLB lookup code.
7424 */
7425 if (!TlbState.fSkip)
7426 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
7427
7428 /*
7429 * TlbMiss:
7430 *
7431 * Call helper to do the fetching.
7432 * We flush all guest register shadow copies here.
7433 */
7434 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
7435
7436#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7437 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7438#else
7439 RT_NOREF(idxInstr);
7440#endif
7441
7442#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7443 if (pReNative->Core.offPc)
7444 {
7445 /*
7446 * Update the program counter but restore it at the end of the TlbMiss branch.
7447 * This should allow delaying more program counter updates for the TlbLookup and hit paths
7448 * which are hopefully much more frequent, reducing the amount of memory accesses.
7449 */
7450 /* Allocate a temporary PC register. */
7451/** @todo r=bird: This would technically need to be done up front as it's a register allocation. */
7452 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
7453 kIemNativeGstRegUse_ForUpdate);
7454
7455 /* Perform the addition and store the result. */
7456 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
7457 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7458# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
7459 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
7460# endif
7461
7462 /* Free and flush the PC register. */
7463 iemNativeRegFreeTmp(pReNative, idxPcReg);
7464 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
7465 }
7466#endif
7467
7468#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7469 /* Save variables in volatile registers. */
7470 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
7471 | (idxRegMemResult != UINT8_MAX ? RT_BIT_32(idxRegMemResult) : 0)
7472 | (idxRegValueFetch != UINT8_MAX ? RT_BIT_32(idxRegValueFetch) : 0);
7473 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
7474#endif
7475
7476 /* IEMNATIVE_CALL_ARG2/3_GREG = uValue (idxVarValue) - if store */
7477 uint32_t fVolGregMask = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
7478#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7479 if RT_CONSTEXPR_IF(a_cbMem == sizeof(RTUINT128U) || a_cbMem == sizeof(RTUINT256U))
7480 {
7481 /*
7482 * For SIMD based variables we pass the reference on the stack for both fetches and stores.
7483 *
7484 * Note! There was a register variable assigned to the variable for the TlbLookup case above
7485 * which must not be freed or the value loaded into the register will not be synced into the register
7486 * further down the road because the variable doesn't know it had a variable assigned.
7487 *
7488 * Note! For loads it is not required to sync what is in the assigned register with the stack slot
7489 * as it will be overwritten anyway.
7490 */
7491 uint8_t const idxRegArgValue = a_fFlat ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
7492 off = iemNativeEmitLoadArgGregWithSimdVarAddrForMemAccess(pReNative, off, idxRegArgValue, idxVarValue,
7493 a_enmOp == kIemNativeEmitMemOp_Store /*fSyncRegWithStack*/);
7494 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
7495 }
7496 else
7497#endif
7498 if RT_CONSTEXPR_IF(a_enmOp == kIemNativeEmitMemOp_Store)
7499 {
7500 uint8_t const idxRegArgValue = a_fFlat ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
7501 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, idxRegArgValue, idxVarValue, 0 /*cbAppend*/,
7502#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7503 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
7504#else
7505 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
7506 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
7507#endif
7508 }
7509
7510 /* IEMNATIVE_CALL_ARG1_GREG = GCPtrMem */
7511 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarGCPtrMem, offDisp /*cbAppend*/,
7512#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7513 fVolGregMask);
7514#else
7515 fVolGregMask, true /*fSpilledVarsInvolatileRegs*/);
7516#endif
7517
7518 if RT_CONSTEXPR_IF(!a_fFlat)
7519 {
7520 /* IEMNATIVE_CALL_ARG2_GREG = iSegReg */
7521 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
7522 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, iSegReg);
7523 }
7524
7525#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
7526 /* Do delayed EFLAGS calculations. */
7527 if RT_CONSTEXPR_IF(a_enmOp == kIemNativeEmitMemOp_Store || a_cbMem == sizeof(RTUINT128U) || a_cbMem == sizeof(RTUINT256U))
7528 {
7529 if RT_CONSTEXPR_IF(a_fFlat)
7530 off = iemNativeDoPostponedEFlagsAtTlbMiss< RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG)
7531 | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)>(pReNative, off, &TlbState,
7532 fHstRegsNotToSave);
7533 else
7534 off = iemNativeDoPostponedEFlagsAtTlbMiss< RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG)
7535 | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
7536 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG)>(pReNative, off, &TlbState,
7537 fHstRegsNotToSave);
7538 }
7539 else if RT_CONSTEXPR_IF(a_fFlat)
7540 off = iemNativeDoPostponedEFlagsAtTlbMiss< RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG)>(pReNative, off, &TlbState,
7541 fHstRegsNotToSave);
7542 else
7543 off = iemNativeDoPostponedEFlagsAtTlbMiss< RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG)
7544 | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)>(pReNative, off, &TlbState,
7545 fHstRegsNotToSave);
7546#endif
7547
7548 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
7549 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7550
7551 /* Done setting up parameters, make the call. */
7552 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnFunction);
7553
7554 /*
7555 * Put the result in the right register if this is a fetch.
7556 */
7557 if RT_CONSTEXPR_IF(a_enmOp != kIemNativeEmitMemOp_Store)
7558 {
7559#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7560 if RT_CONSTEXPR_IF(a_cbMem == sizeof(RTUINT128U) || a_cbMem == sizeof(RTUINT256U))
7561 {
7562 Assert(a_enmOp == kIemNativeEmitMemOp_Fetch);
7563
7564 /* Sync the value on the stack with the host register assigned to the variable. */
7565 off = iemNativeEmitSimdVarSyncStackToRegister(pReNative, off, idxVarValue);
7566 }
7567 else
7568#endif
7569 {
7570 Assert(idxRegValueFetch == pVarValue->idxReg);
7571 if (idxRegValueFetch != IEMNATIVE_CALL_RET_GREG)
7572 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegValueFetch, IEMNATIVE_CALL_RET_GREG);
7573 }
7574 }
7575
7576#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7577 /* Restore variables and guest shadow registers to volatile registers. */
7578 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
7579 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
7580#endif
7581
7582#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
7583 if (pReNative->Core.offPc)
7584 {
7585 /*
7586 * Time to restore the program counter to its original value.
7587 */
7588 /* Allocate a temporary PC register. */
7589 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
7590 kIemNativeGstRegUse_ForUpdate);
7591
7592 /* Restore the original value. */
7593 off = iemNativeEmitSubGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
7594 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
7595
7596 /* Free and flush the PC register. */
7597 iemNativeRegFreeTmp(pReNative, idxPcReg);
7598 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
7599 }
7600#endif
7601
7602#ifdef IEMNATIVE_WITH_TLB_LOOKUP
7603 if (!TlbState.fSkip)
7604 {
7605 /* end of TlbMiss - Jump to the done label. */
7606 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
7607 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
7608
7609 /*
7610 * TlbLookup:
7611 */
7612 off = iemNativeEmitTlbLookup<true, a_cbMem, a_fAlignMaskAndCtl,
7613 a_enmOp == kIemNativeEmitMemOp_Store ? IEM_ACCESS_TYPE_WRITE : IEM_ACCESS_TYPE_READ
7614 >(pReNative, off, &TlbState, iSegReg, idxLabelTlbLookup, idxLabelTlbMiss,
7615 idxRegMemResult, offDisp);
7616
7617 /*
7618 * Emit code to do the actual storing / fetching.
7619 */
7620 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
7621# ifdef IEM_WITH_TLB_STATISTICS
7622 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
7623 a_enmOp == kIemNativeEmitMemOp_Store
7624 ? RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForFetch)
7625 : RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStore));
7626# endif
7627 switch (a_enmOp)
7628 {
7629 case kIemNativeEmitMemOp_Store:
7630 if (pVarValue->enmKind != kIemNativeVarKind_Immediate)
7631 {
7632 switch (a_cbMem)
7633 {
7634 case 1:
7635 off = iemNativeEmitStoreGpr8ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7636 break;
7637 case 2:
7638 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7639 break;
7640 case 4:
7641 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7642 break;
7643 case 8:
7644 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7645 break;
7646#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7647 case sizeof(RTUINT128U):
7648 off = iemNativeEmitStoreVecRegByGprU128Ex(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7649 break;
7650 case sizeof(RTUINT256U):
7651 off = iemNativeEmitStoreVecRegByGprU256Ex(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
7652 break;
7653#endif
7654 default:
7655 AssertFailed();
7656 }
7657 }
7658 else
7659 {
7660 switch (a_cbMem)
7661 {
7662 case 1:
7663 off = iemNativeEmitStoreImm8ByGprEx(pCodeBuf, off, (uint8_t)pVarValue->u.uValue,
7664 idxRegMemResult, TlbState.idxReg1);
7665 break;
7666 case 2:
7667 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
7668 idxRegMemResult, TlbState.idxReg1);
7669 break;
7670 case 4:
7671 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
7672 idxRegMemResult, TlbState.idxReg1);
7673 break;
7674 case 8:
7675 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue,
7676 idxRegMemResult, TlbState.idxReg1);
7677 break;
7678 default:
7679 AssertFailed();
7680 }
7681 }
7682 break;
7683
7684 case kIemNativeEmitMemOp_Fetch:
7685 case kIemNativeEmitMemOp_Fetch_Zx_U16:
7686 case kIemNativeEmitMemOp_Fetch_Zx_U32:
7687 case kIemNativeEmitMemOp_Fetch_Zx_U64:
7688 switch (a_cbMem)
7689 {
7690 case 1:
7691 off = iemNativeEmitLoadGprByGprU8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7692 break;
7693 case 2:
7694 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7695 break;
7696 case 4:
7697 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7698 break;
7699 case 8:
7700 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7701 break;
7702#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7703 case sizeof(RTUINT128U):
7704 /*
7705 * No need to sync back the register with the stack, this is done by the generic variable handling
7706 * code if there is a register assigned to a variable and the stack must be accessed.
7707 */
7708 off = iemNativeEmitLoadVecRegByGprU128Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7709 break;
7710 case sizeof(RTUINT256U):
7711 /*
7712 * No need to sync back the register with the stack, this is done by the generic variable handling
7713 * code if there is a register assigned to a variable and the stack must be accessed.
7714 */
7715 off = iemNativeEmitLoadVecRegByGprU256Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7716 break;
7717#endif
7718 default:
7719 AssertFailed();
7720 }
7721 break;
7722
7723 case kIemNativeEmitMemOp_Fetch_Sx_U16:
7724 Assert(a_cbMem == 1);
7725 off = iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7726 break;
7727
7728 case kIemNativeEmitMemOp_Fetch_Sx_U32:
7729 Assert(a_cbMem == 1 || a_cbMem == 2);
7730 if (a_cbMem == 1)
7731 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7732 else
7733 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7734 break;
7735
7736 case kIemNativeEmitMemOp_Fetch_Sx_U64:
7737 switch (a_cbMem)
7738 {
7739 case 1:
7740 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7741 break;
7742 case 2:
7743 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7744 break;
7745 case 4:
7746 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
7747 break;
7748 default:
7749 AssertFailed();
7750 }
7751 break;
7752
7753 default:
7754 AssertFailed();
7755 }
7756
7757 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
7758
7759 /*
7760 * TlbDone:
7761 */
7762 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
7763
7764 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
7765
7766# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7767 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
7768 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
7769# endif
7770 }
7771#else
7772 RT_NOREF(idxLabelTlbMiss);
7773#endif
7774
7775 if (idxRegValueFetch != UINT8_MAX || idxRegValueStore != UINT8_MAX)
7776 iemNativeVarRegisterRelease(pReNative, idxVarValue);
7777 return off;
7778}
7779
7780
7781
7782/*********************************************************************************************************************************
7783* Memory fetches (IEM_MEM_FETCH_XXX). *
7784*********************************************************************************************************************************/
7785
7786/* 8-bit segmented: */
7787#define IEM_MC_FETCH_MEM_U8(a_u8Dst, a_iSeg, a_GCPtrMem) \
7788 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch>( \
7789 pReNative, off, a_u8Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
7790
7791#define IEM_MC_FETCH_MEM_U8_ZX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
7792 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U16>( \
7793 pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
7794
7795#define IEM_MC_FETCH_MEM_U8_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7796 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U32>( \
7797 pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
7798
7799#define IEM_MC_FETCH_MEM_U8_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7800 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U64>( \
7801 pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
7802
7803#define IEM_MC_FETCH_MEM_U8_SX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
7804 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U16>(\
7805 pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16, pCallEntry->idxInstr)
7806
7807#define IEM_MC_FETCH_MEM_U8_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7808 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U32>(\
7809 pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32, pCallEntry->idxInstr)
7810
7811#define IEM_MC_FETCH_MEM_U8_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7812 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U64>(\
7813 pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64, pCallEntry->idxInstr)
7814
7815/* 16-bit segmented: */
7816#define IEM_MC_FETCH_MEM_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
7817 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch>(\
7818 pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
7819
7820#define IEM_MC_FETCH_MEM_U16_DISP(a_u16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
7821 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch>(\
7822 pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr, a_offDisp)
7823
7824#define IEM_MC_FETCH_MEM_U16_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7825 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32>(\
7826 pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
7827
7828#define IEM_MC_FETCH_MEM_U16_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7829 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64>(\
7830 pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
7831
7832#define IEM_MC_FETCH_MEM_U16_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7833 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32>(\
7834 pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7835
7836#define IEM_MC_FETCH_MEM_U16_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7837 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64>(\
7838 pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64, pCallEntry->idxInstr)
7839
7840
7841/* 32-bit segmented: */
7842#define IEM_MC_FETCH_MEM_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
7843 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch>(\
7844 pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
7845
7846#define IEM_MC_FETCH_MEM_U32_DISP(a_u32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
7847 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch>(\
7848 pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7849
7850#define IEM_MC_FETCH_MEM_U32_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7851 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64>(\
7852 pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
7853
7854#define IEM_MC_FETCH_MEM_U32_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7855 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64>(\
7856 pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64, pCallEntry->idxInstr)
7857
7858#define IEM_MC_FETCH_MEM_I16(a_i16Dst, a_iSeg, a_GCPtrMem) \
7859 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32>(\
7860 pReNative, off, a_i16Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7861
7862#define IEM_MC_FETCH_MEM_I16_DISP(a_i16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
7863 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32>(\
7864 pReNative, off, a_i16Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr, \
7865 a_offDisp)
7866
7867#define IEM_MC_FETCH_MEM_I32(a_i32Dst, a_iSeg, a_GCPtrMem) \
7868 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch>(\
7869 pReNative, off, a_i32Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
7870
7871#define IEM_MC_FETCH_MEM_I32_DISP(a_i32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
7872 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch>(\
7873 pReNative, off, a_i32Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7874
7875#define IEM_MC_FETCH_MEM_I64(a_i64Dst, a_iSeg, a_GCPtrMem) \
7876 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(int64_t), sizeof(int64_t) - 1, kIemNativeEmitMemOp_Fetch>(\
7877 pReNative, off, a_i64Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
7878
7879AssertCompileSize(RTFLOAT32U, sizeof(uint32_t));
7880#define IEM_MC_FETCH_MEM_R32(a_r32Dst, a_iSeg, a_GCPtrMem) \
7881 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTFLOAT32U), sizeof(RTFLOAT32U) - 1, kIemNativeEmitMemOp_Fetch>(\
7882 pReNative, off, a_r32Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
7883
7884
7885/* 64-bit segmented: */
7886#define IEM_MC_FETCH_MEM_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
7887 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch>(\
7888 pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
7889
7890AssertCompileSize(RTFLOAT64U, sizeof(uint64_t));
7891#define IEM_MC_FETCH_MEM_R64(a_r64Dst, a_iSeg, a_GCPtrMem) \
7892 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTFLOAT64U), sizeof(RTFLOAT64U) - 1, kIemNativeEmitMemOp_Fetch>(\
7893 pReNative, off, a_r64Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
7894
7895
7896/* 8-bit flat: */
7897#define IEM_MC_FETCH_MEM_FLAT_U8(a_u8Dst, a_GCPtrMem) \
7898 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch, true>(\
7899 pReNative, off, a_u8Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7900
7901#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U16(a_u16Dst, a_GCPtrMem) \
7902 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U16, true>(\
7903 pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7904
7905#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U32(a_u32Dst, a_GCPtrMem) \
7906 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U32, true>(\
7907 pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7908
7909#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U64(a_u64Dst, a_GCPtrMem) \
7910 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Zx_U64, true>(\
7911 pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
7912
7913#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U16(a_u16Dst, a_GCPtrMem) \
7914 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U16, true>(\
7915 pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16, pCallEntry->idxInstr)
7916
7917#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U32(a_u32Dst, a_GCPtrMem) \
7918 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U32, true>(\
7919 pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32, pCallEntry->idxInstr)
7920
7921#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U64(a_u64Dst, a_GCPtrMem) \
7922 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Fetch_Sx_U64, true>(\
7923 pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64, pCallEntry->idxInstr)
7924
7925
7926/* 16-bit flat: */
7927#define IEM_MC_FETCH_MEM_FLAT_U16(a_u16Dst, a_GCPtrMem) \
7928 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7929 pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
7930
7931#define IEM_MC_FETCH_MEM_FLAT_U16_DISP(a_u16Dst, a_GCPtrMem, a_offDisp) \
7932 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7933 pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr, a_offDisp)
7934
7935#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U32(a_u32Dst, a_GCPtrMem) \
7936 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, true>(\
7937 pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
7938
7939#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U64(a_u64Dst, a_GCPtrMem) \
7940 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, true>(\
7941 pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
7942
7943#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U32(a_u32Dst, a_GCPtrMem) \
7944 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, true>(\
7945 pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7946
7947#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U64(a_u64Dst, a_GCPtrMem) \
7948 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, true>(\
7949 pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64, pCallEntry->idxInstr)
7950
7951/* 32-bit flat: */
7952#define IEM_MC_FETCH_MEM_FLAT_U32(a_u32Dst, a_GCPtrMem) \
7953 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7954 pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7955
7956#define IEM_MC_FETCH_MEM_FLAT_U32_DISP(a_u32Dst, a_GCPtrMem, a_offDisp) \
7957 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7958 pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7959
7960#define IEM_MC_FETCH_MEM_FLAT_U32_ZX_U64(a_u64Dst, a_GCPtrMem) \
7961 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, true>(\
7962 pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7963
7964#define IEM_MC_FETCH_MEM_FLAT_U32_SX_U64(a_u64Dst, a_GCPtrMem) \
7965 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, true>(\
7966 pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64, pCallEntry->idxInstr)
7967
7968#define IEM_MC_FETCH_MEM_FLAT_I16(a_i16Dst, a_GCPtrMem) \
7969 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, true>(\
7970 pReNative, off, a_i16Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
7971
7972#define IEM_MC_FETCH_MEM_FLAT_I16_DISP(a_i16Dst, a_GCPtrMem, a_offDisp) \
7973 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, true>(\
7974 pReNative, off, a_i16Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr, a_offDisp)
7975
7976#define IEM_MC_FETCH_MEM_FLAT_I32(a_i32Dst, a_GCPtrMem) \
7977 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7978 pReNative, off, a_i32Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7979
7980#define IEM_MC_FETCH_MEM_FLAT_I32_DISP(a_i32Dst, a_GCPtrMem, a_offDisp) \
7981 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7982 pReNative, off, a_i32Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
7983
7984#define IEM_MC_FETCH_MEM_FLAT_I64(a_i64Dst, a_GCPtrMem) \
7985 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(int64_t), sizeof(int64_t) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7986 pReNative, off, a_i64Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
7987
7988#define IEM_MC_FETCH_MEM_FLAT_R32(a_r32Dst, a_GCPtrMem) \
7989 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTFLOAT32U), sizeof(RTFLOAT32U) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7990 pReNative, off, a_r32Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
7991
7992
7993/* 64-bit flat: */
7994#define IEM_MC_FETCH_MEM_FLAT_U64(a_u64Dst, a_GCPtrMem) \
7995 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, true>(\
7996 pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
7997
7998#define IEM_MC_FETCH_MEM_FLAT_R64(a_r64Dst, a_GCPtrMem) \
7999 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTFLOAT64U), sizeof(RTFLOAT64U) - 1, kIemNativeEmitMemOp_Fetch, true>(\
8000 pReNative, off, a_r64Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
8001
8002#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8003/* 128-bit segmented: */
8004#define IEM_MC_FETCH_MEM_U128(a_u128Dst, a_iSeg, a_GCPtrMem) \
8005 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch>(\
8006 pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU128, pCallEntry->idxInstr)
8007
8008#define IEM_MC_FETCH_MEM_U128_ALIGN_SSE(a_u128Dst, a_iSeg, a_GCPtrMem) \
8009 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT128U), \
8010 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
8011 kIemNativeEmitMemOp_Fetch>(\
8012 pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse, pCallEntry->idxInstr)
8013
8014AssertCompileSize(X86XMMREG, sizeof(RTUINT128U));
8015#define IEM_MC_FETCH_MEM_XMM_ALIGN_SSE(a_uXmmDst, a_iSeg, a_GCPtrMem) \
8016 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(X86XMMREG), \
8017 (sizeof(X86XMMREG) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
8018 kIemNativeEmitMemOp_Fetch>(\
8019 pReNative, off, a_uXmmDst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse, pCallEntry->idxInstr)
8020
8021#define IEM_MC_FETCH_MEM_U128_NO_AC(a_u128Dst, a_iSeg, a_GCPtrMem) \
8022 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch>(\
8023 pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU128NoAc, pCallEntry->idxInstr)
8024
8025#define IEM_MC_FETCH_MEM_XMM_NO_AC(a_u128Dst, a_iSeg, a_GCPtrMem) \
8026 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(X86XMMREG), sizeof(X86XMMREG) - 1, kIemNativeEmitMemOp_Fetch>(\
8027 pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU128NoAc, pCallEntry->idxInstr)
8028
8029
8030/* 128-bit flat: */
8031#define IEM_MC_FETCH_MEM_FLAT_U128(a_u128Dst, a_GCPtrMem) \
8032 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, true>(\
8033 pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU128, pCallEntry->idxInstr)
8034
8035#define IEM_MC_FETCH_MEM_FLAT_U128_ALIGN_SSE(a_u128Dst, a_GCPtrMem) \
8036 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT128U), \
8037 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
8038 kIemNativeEmitMemOp_Fetch, true>(\
8039 pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse, pCallEntry->idxInstr)
8040
8041#define IEM_MC_FETCH_MEM_FLAT_XMM_ALIGN_SSE(a_uXmmDst, a_GCPtrMem) \
8042 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(X86XMMREG), \
8043 (sizeof(X86XMMREG) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
8044 kIemNativeEmitMemOp_Fetch, true>(\
8045 pReNative, off, a_uXmmDst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse, pCallEntry->idxInstr)
8046
8047#define IEM_MC_FETCH_MEM_FLAT_U128_NO_AC(a_u128Dst, a_GCPtrMem) \
8048 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, true>(\
8049 pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc, pCallEntry->idxInstr)
8050
8051#define IEM_MC_FETCH_MEM_FLAT_XMM_NO_AC(a_uXmmDst, a_GCPtrMem) \
8052 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(X86XMMREG), sizeof(X86XMMREG) - 1, kIemNativeEmitMemOp_Fetch, true>(\
8053 pReNative, off, a_uXmmDst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc, pCallEntry->idxInstr)
8054
8055/* 256-bit segmented: */
8056#define IEM_MC_FETCH_MEM_U256(a_u256Dst, a_iSeg, a_GCPtrMem) \
8057 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch>(\
8058 pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
8059
8060#define IEM_MC_FETCH_MEM_U256_NO_AC(a_u256Dst, a_iSeg, a_GCPtrMem) \
8061 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch>(\
8062 pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
8063
8064#define IEM_MC_FETCH_MEM_U256_ALIGN_AVX(a_u256Dst, a_iSeg, a_GCPtrMem) \
8065 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT256U), \
8066 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, \
8067 kIemNativeEmitMemOp_Fetch>(\
8068 pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx, pCallEntry->idxInstr)
8069
8070#define IEM_MC_FETCH_MEM_YMM_NO_AC(a_u256Dst, a_iSeg, a_GCPtrMem) \
8071 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(X86YMMREG), sizeof(X86YMMREG) - 1, kIemNativeEmitMemOp_Fetch>(\
8072 pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
8073
8074
8075/* 256-bit flat: */
8076#define IEM_MC_FETCH_MEM_FLAT_U256(a_u256Dst, a_GCPtrMem) \
8077 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, true>(\
8078 pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
8079
8080#define IEM_MC_FETCH_MEM_FLAT_U256_NO_AC(a_u256Dst, a_GCPtrMem) \
8081 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, true>(\
8082 pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
8083
8084#define IEM_MC_FETCH_MEM_FLAT_U256_ALIGN_AVX(a_u256Dst, a_GCPtrMem) \
8085 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT256U), \
8086 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, \
8087 kIemNativeEmitMemOp_Fetch, true>(\
8088 pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx, pCallEntry->idxInstr)
8089
8090#define IEM_MC_FETCH_MEM_FLAT_YMM_NO_AC(a_uYmmDst, a_GCPtrMem) \
8091 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(X86YMMREG), sizeof(X86YMMREG) - 1, kIemNativeEmitMemOp_Fetch, true>(\
8092 pReNative, off, a_uYmmDst, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
8093
8094#endif
8095
8096
8097/*********************************************************************************************************************************
8098* Memory stores (IEM_MEM_STORE_XXX). *
8099*********************************************************************************************************************************/
8100
8101#define IEM_MC_STORE_MEM_U8(a_iSeg, a_GCPtrMem, a_u8Value) \
8102 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Store>(\
8103 pReNative, off, a_u8Value, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
8104
8105#define IEM_MC_STORE_MEM_U16(a_iSeg, a_GCPtrMem, a_u16Value) \
8106 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store>(\
8107 pReNative, off, a_u16Value, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
8108
8109#define IEM_MC_STORE_MEM_U32(a_iSeg, a_GCPtrMem, a_u32Value) \
8110 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store>(\
8111 pReNative, off, a_u32Value, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
8112
8113#define IEM_MC_STORE_MEM_U64(a_iSeg, a_GCPtrMem, a_u64Value) \
8114 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store>(\
8115 pReNative, off, a_u64Value, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
8116
8117
8118#define IEM_MC_STORE_MEM_FLAT_U8(a_GCPtrMem, a_u8Value) \
8119 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint8_t), 0 /*fAlignMaskAndCtl*/, kIemNativeEmitMemOp_Store, true>(\
8120 pReNative, off, a_u8Value, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
8121
8122#define IEM_MC_STORE_MEM_FLAT_U16(a_GCPtrMem, a_u16Value) \
8123 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, true>(\
8124 pReNative, off, a_u16Value, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
8125
8126#define IEM_MC_STORE_MEM_FLAT_U32(a_GCPtrMem, a_u32Value) \
8127 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, true>(\
8128 pReNative, off, a_u32Value, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
8129
8130#define IEM_MC_STORE_MEM_FLAT_U64(a_GCPtrMem, a_u64Value) \
8131 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, true>(\
8132 pReNative, off, a_u64Value, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
8133
8134
8135#define IEM_MC_STORE_MEM_U8_CONST(a_iSeg, a_GCPtrMem, a_u8ConstValue) \
8136 off = iemNativeEmitMemStoreConstDataCommon<sizeof(uint8_t)>(\
8137 pReNative, off, a_u8ConstValue, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
8138
8139#define IEM_MC_STORE_MEM_U16_CONST(a_iSeg, a_GCPtrMem, a_u16ConstValue) \
8140 off = iemNativeEmitMemStoreConstDataCommon<sizeof(uint16_t)>(\
8141 pReNative, off, a_u16ConstValue, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
8142
8143#define IEM_MC_STORE_MEM_U32_CONST(a_iSeg, a_GCPtrMem, a_u32ConstValue) \
8144 off = iemNativeEmitMemStoreConstDataCommon<sizeof(uint32_t)>(\
8145 pReNative, off, a_u32ConstValue, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
8146
8147#define IEM_MC_STORE_MEM_U64_CONST(a_iSeg, a_GCPtrMem, a_u64ConstValue) \
8148 off = iemNativeEmitMemStoreConstDataCommon<sizeof(uint64_t)>(\
8149 pReNative, off, a_u64ConstValue, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
8150
8151
8152#define IEM_MC_STORE_MEM_FLAT_U8_CONST(a_GCPtrMem, a_u8ConstValue) \
8153 off = iemNativeEmitMemStoreConstDataCommon<sizeof(uint8_t), true>(\
8154 pReNative, off, a_u8ConstValue, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
8155
8156#define IEM_MC_STORE_MEM_FLAT_U16_CONST(a_GCPtrMem, a_u16ConstValue) \
8157 off = iemNativeEmitMemStoreConstDataCommon<sizeof(uint16_t), true>(\
8158 pReNative, off, a_u16ConstValue, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
8159
8160#define IEM_MC_STORE_MEM_FLAT_U32_CONST(a_GCPtrMem, a_u32ConstValue) \
8161 off = iemNativeEmitMemStoreConstDataCommon<sizeof(uint32_t), true>(\
8162 pReNative, off, a_u32ConstValue, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
8163
8164#define IEM_MC_STORE_MEM_FLAT_U64_CONST(a_GCPtrMem, a_u64ConstValue) \
8165 off = iemNativeEmitMemStoreConstDataCommon<sizeof(uint64_t), true>(\
8166 pReNative, off, a_u64ConstValue, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
8167
8168/** Emits code for IEM_MC_STORE_MEM_U8/16/32/64_CONST and
8169 * IEM_MC_STORE_MEM_FLAT_U8/16/32/64_CONST (with iSegReg = UINT8_MAX). */
8170template<uint8_t const a_cbMem, bool a_fFlat = false>
8171DECL_INLINE_THROW(uint32_t)
8172iemNativeEmitMemStoreConstDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uValueConst, uint8_t iSegReg,
8173 uint8_t idxVarGCPtrMem, uintptr_t pfnFunction, uint8_t idxInstr)
8174{
8175 /*
8176 * Create a temporary const variable and call iemNativeEmitMemFetchStoreDataCommon
8177 * to do the grunt work.
8178 */
8179 uint8_t const idxVarConstValue = iemNativeVarAllocConst(pReNative, a_cbMem, uValueConst);
8180 off = iemNativeEmitMemFetchStoreDataCommon<a_cbMem, a_cbMem - 1,
8181 kIemNativeEmitMemOp_Store,
8182 a_fFlat>(pReNative, off, idxVarConstValue, iSegReg,
8183 idxVarGCPtrMem, pfnFunction, idxInstr);
8184 iemNativeVarFreeLocal(pReNative, idxVarConstValue);
8185 return off;
8186}
8187
8188
8189#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8190# define IEM_MC_STORE_MEM_U128_ALIGN_SSE(a_iSeg, a_GCPtrMem, a_u128Value) \
8191 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT128U), \
8192 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
8193 kIemNativeEmitMemOp_Store>(\
8194 pReNative, off, a_u128Value, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse, pCallEntry->idxInstr)
8195
8196# define IEM_MC_STORE_MEM_U128_NO_AC(a_iSeg, a_GCPtrMem, a_u128Value) \
8197 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store>(\
8198 pReNative, off, a_u128Value, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU128NoAc, pCallEntry->idxInstr)
8199
8200# define IEM_MC_STORE_MEM_U256_NO_AC(a_iSeg, a_GCPtrMem, a_u256Value) \
8201 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store>(\
8202 pReNative, off, a_u256Value, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU256NoAc, pCallEntry->idxInstr)
8203
8204# define IEM_MC_STORE_MEM_U256_ALIGN_AVX(a_iSeg, a_GCPtrMem, a_u256Value) \
8205 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT256U), \
8206 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, \
8207 kIemNativeEmitMemOp_Store>(\
8208 pReNative, off, a_u256Value, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx, pCallEntry->idxInstr)
8209
8210
8211# define IEM_MC_STORE_MEM_FLAT_U128_ALIGN_SSE(a_GCPtrMem, a_u128Value) \
8212 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT128U), \
8213 (sizeof(RTUINT128U) - 1U) | IEM_MEMMAP_F_ALIGN_GP | IEM_MEMMAP_F_ALIGN_SSE, \
8214 kIemNativeEmitMemOp_Store, true>(\
8215 pReNative, off, a_u128Value, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse, \
8216 pCallEntry->idxInstr)
8217
8218# define IEM_MC_STORE_MEM_FLAT_U128_NO_AC(a_GCPtrMem, a_u128Value) \
8219 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, true>(\
8220 pReNative, off, a_u128Value, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU128NoAc, pCallEntry->idxInstr)
8221
8222# define IEM_MC_STORE_MEM_FLAT_U256_NO_AC(a_GCPtrMem, a_u256Value) \
8223 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, true>(\
8224 pReNative, off, a_u256Value, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU256NoAc, pCallEntry->idxInstr)
8225
8226# define IEM_MC_STORE_MEM_FLAT_U256_ALIGN_AVX(a_GCPtrMem, a_u256Value) \
8227 off = iemNativeEmitMemFetchStoreDataCommon<sizeof(RTUINT256U), \
8228 (sizeof(RTUINT256U) - 1U) | IEM_MEMMAP_F_ALIGN_GP, kIemNativeEmitMemOp_Store, \
8229 true>(\
8230 pReNative, off, a_u256Value, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx, pCallEntry->idxInstr)
8231#endif
8232
8233
8234
8235/*********************************************************************************************************************************
8236* Stack Accesses. *
8237*********************************************************************************************************************************/
8238#define IEM_MC_PUSH_U16(a_u16Value) \
8239 off = iemNativeEmitStackPush<16, 0, 0>(pReNative, off, a_u16Value, (uintptr_t)iemNativeHlpStackStoreU16, pCallEntry->idxInstr)
8240#define IEM_MC_PUSH_U32(a_u32Value) \
8241 off = iemNativeEmitStackPush<32, 0, 0>(pReNative, off, a_u32Value, (uintptr_t)iemNativeHlpStackStoreU32, pCallEntry->idxInstr)
8242#define IEM_MC_PUSH_U32_SREG(a_uSegVal) \
8243 off = iemNativeEmitStackPush<32, 0, 1>(pReNative, off, a_uSegVal, (uintptr_t)iemNativeHlpStackStoreU32SReg, pCallEntry->idxInstr)
8244#define IEM_MC_PUSH_U64(a_u64Value) \
8245 off = iemNativeEmitStackPush<64, 0, 0>(pReNative, off, a_u64Value, (uintptr_t)iemNativeHlpStackStoreU64, pCallEntry->idxInstr)
8246
8247#define IEM_MC_FLAT32_PUSH_U16(a_u16Value) \
8248 off = iemNativeEmitStackPush<16, 32, 0>(pReNative, off, a_u16Value, (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
8249#define IEM_MC_FLAT32_PUSH_U32(a_u32Value) \
8250 off = iemNativeEmitStackPush<32, 32, 0>(pReNative, off, a_u32Value, (uintptr_t)iemNativeHlpStackFlatStoreU32, pCallEntry->idxInstr)
8251#define IEM_MC_FLAT32_PUSH_U32_SREG(a_u32Value) \
8252 off = iemNativeEmitStackPush<32, 32, 1>(pReNative, off, a_u32Value, (uintptr_t)iemNativeHlpStackFlatStoreU32SReg, pCallEntry->idxInstr)
8253
8254#define IEM_MC_FLAT64_PUSH_U16(a_u16Value) \
8255 off = iemNativeEmitStackPush<16, 64, 0>(pReNative, off, a_u16Value, (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
8256#define IEM_MC_FLAT64_PUSH_U64(a_u64Value) \
8257 off = iemNativeEmitStackPush<64, 64, 0>(pReNative, off, a_u64Value, (uintptr_t)iemNativeHlpStackFlatStoreU64, pCallEntry->idxInstr)
8258
8259
8260/** IEM_MC[|_FLAT32|_FLAT64]_PUSH_U16/32/32_SREG/64 */
8261template<uint8_t const a_cBitsVar, uint8_t const a_cBitsFlat, bool a_fIsSegReg = false>
8262DECL_INLINE_THROW(uint32_t)
8263iemNativeEmitStackPush(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue, uintptr_t pfnFunction, uint8_t idxInstr)
8264{
8265 /*
8266 * Assert sanity.
8267 */
8268 AssertCompile(a_cBitsVar == 16 || a_cBitsVar == 32 || a_cBitsVar == 64);
8269 AssertCompile(a_cBitsFlat == 0 || a_cBitsFlat == 32 || a_cBitsFlat == 64);
8270 AssertCompile(!a_fIsSegReg || a_cBitsVar < 64);
8271 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
8272 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
8273#ifdef VBOX_STRICT
8274 uint32_t const cTmplArgs = RT_MAKE_U32_FROM_U8(a_cBitsVar, a_cBitsFlat, a_fIsSegReg, 0);
8275 if (a_cBitsFlat != 0)
8276 {
8277 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
8278 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
8279 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
8280 Assert( pfnFunction
8281 == ( cTmplArgs == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
8282 : cTmplArgs == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
8283 : cTmplArgs == RT_MAKE_U32_FROM_U8(32, 32, 1, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32SReg
8284 : cTmplArgs == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
8285 : cTmplArgs == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
8286 : UINT64_C(0xc000b000a0009000) ));
8287 }
8288 else
8289 Assert( pfnFunction
8290 == ( cTmplArgs == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
8291 : cTmplArgs == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
8292 : cTmplArgs == RT_MAKE_U32_FROM_U8(32, 0, 1, 0) ? (uintptr_t)iemNativeHlpStackStoreU32SReg
8293 : cTmplArgs == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
8294 : UINT64_C(0xc000b000a0009000) ));
8295#endif
8296
8297#ifdef VBOX_STRICT
8298 /*
8299 * Check that the fExec flags we've got make sense.
8300 */
8301 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
8302#endif
8303
8304 /*
8305 * To keep things simple we have to commit any pending writes first as we
8306 * may end up making calls.
8307 */
8308 /** @todo we could postpone this till we make the call and reload the
8309 * registers after returning from the call. Not sure if that's sensible or
8310 * not, though. */
8311 off = iemNativeRegFlushPendingWrites(pReNative, off);
8312
8313 /*
8314 * First we calculate the new RSP and the effective stack pointer value.
8315 * For 64-bit mode and flat 32-bit these two are the same.
8316 * (Code structure is very similar to that of PUSH)
8317 */
8318 RT_CONSTEXPR
8319 uint8_t const cbMem = a_cBitsVar / 8;
8320 bool const fIsIntelSeg = a_fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
8321 uint8_t const cbMemAccess = !a_fIsSegReg || !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
8322 ? cbMem : sizeof(uint16_t);
8323 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
8324 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
8325 uint8_t const idxRegEffSp = a_cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
8326 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
8327 if RT_CONSTEXPR_IF(a_cBitsFlat != 0)
8328 {
8329 Assert(idxRegEffSp == idxRegRsp);
8330 Assert(a_cBitsFlat == 32 || a_cBitsFlat == 64);
8331 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
8332 if RT_CONSTEXPR_IF(a_cBitsFlat == 64)
8333 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
8334 else
8335 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
8336 }
8337 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
8338 {
8339 Assert(idxRegEffSp != idxRegRsp);
8340 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
8341 kIemNativeGstRegUse_ReadOnly);
8342#ifdef RT_ARCH_AMD64
8343 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8344#else
8345 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
8346#endif
8347 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
8348 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
8349 offFixupJumpToUseOtherBitSp = off;
8350 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
8351 {
8352 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
8353 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8354 }
8355 else
8356 {
8357 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
8358 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8359 }
8360 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8361 }
8362 /* SpUpdateEnd: */
8363 uint32_t const offLabelSpUpdateEnd = off;
8364
8365 /*
8366 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
8367 * we're skipping lookup).
8368 */
8369 uint8_t const iSegReg = a_cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
8370 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
8371 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
8372 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
8373 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
8374 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
8375 : UINT32_MAX;
8376 uint8_t const idxRegValue = !TlbState.fSkip
8377 && pVarValue->enmKind != kIemNativeVarKind_Immediate
8378 ? iemNativeVarRegisterAcquireInitedWithPref(pReNative, idxVarValue, &off,
8379 IEMNATIVE_CALL_ARG2_GREG)
8380 : UINT8_MAX;
8381 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
8382
8383
8384 if (!TlbState.fSkip)
8385 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
8386 else
8387 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
8388
8389 /*
8390 * Use16BitSp:
8391 */
8392 if RT_CONSTEXPR_IF(a_cBitsFlat == 0)
8393 {
8394#ifdef RT_ARCH_AMD64
8395 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8396#else
8397 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
8398#endif
8399 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
8400 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
8401 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8402 else
8403 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8404 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
8405 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8406 }
8407
8408 /*
8409 * TlbMiss:
8410 *
8411 * Call helper to do the pushing.
8412 */
8413 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
8414
8415#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8416 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8417#else
8418 RT_NOREF(idxInstr);
8419#endif
8420
8421 /* Save variables in volatile registers. */
8422 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
8423 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
8424 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
8425 | (idxRegValue < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegValue) : 0);
8426 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
8427
8428 if ( idxRegValue == IEMNATIVE_CALL_ARG1_GREG
8429 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
8430 {
8431 /* Swap them using ARG0 as temp register: */
8432 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
8433 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
8434 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
8435 }
8436 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
8437 {
8438 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue (first!) */
8439 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue,
8440 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
8441
8442 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
8443 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
8444 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
8445 }
8446 else
8447 {
8448 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
8449 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
8450
8451 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue */
8452 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue, 0 /*offAddend*/,
8453 IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG));
8454 }
8455
8456#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
8457 /* Do delayed EFLAGS calculations. */
8458 off = iemNativeDoPostponedEFlagsAtTlbMiss< RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG)
8459 | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)>(pReNative, off, &TlbState, fHstRegsNotToSave);
8460#endif
8461
8462 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
8463 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
8464
8465 /* Done setting up parameters, make the call. */
8466 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnFunction);
8467
8468 /* Restore variables and guest shadow registers to volatile registers. */
8469 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
8470 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
8471
8472#ifdef IEMNATIVE_WITH_TLB_LOOKUP
8473 if (!TlbState.fSkip)
8474 {
8475 /* end of TlbMiss - Jump to the done label. */
8476 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
8477 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
8478
8479 /*
8480 * TlbLookup:
8481 */
8482 if (!a_fIsSegReg || cbMemAccess == cbMem)
8483 {
8484 Assert(cbMemAccess == cbMem);
8485 off = iemNativeEmitTlbLookup<true, cbMem, cbMem - 1, IEM_ACCESS_TYPE_WRITE>(pReNative, off, &TlbState,
8486 iSegReg, idxLabelTlbLookup,
8487 idxLabelTlbMiss, idxRegMemResult);
8488 }
8489 else
8490 {
8491 Assert(cbMemAccess == sizeof(uint16_t));
8492 off = iemNativeEmitTlbLookup<true, sizeof(uint16_t), sizeof(uint16_t) - 1,
8493 IEM_ACCESS_TYPE_WRITE>(pReNative, off, &TlbState, iSegReg,
8494 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
8495 }
8496
8497 /*
8498 * Emit code to do the actual storing / fetching.
8499 */
8500 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
8501# ifdef IEM_WITH_TLB_STATISTICS
8502 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
8503 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
8504# endif
8505 if (idxRegValue != UINT8_MAX)
8506 {
8507 switch (cbMemAccess)
8508 {
8509 case 2:
8510 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
8511 break;
8512 case 4:
8513 if (!a_fIsSegReg || !fIsIntelSeg)
8514 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
8515 else
8516 {
8517 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
8518 PUSH FS in real mode, so we have to try emulate that here.
8519 We borrow the now unused idxReg1 from the TLB lookup code here. */
8520 uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
8521 kIemNativeGstReg_EFlags);
8522 if (idxRegEfl != UINT8_MAX)
8523 {
8524#ifdef ARCH_AMD64
8525 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
8526 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
8527 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
8528#else
8529 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
8530 off, TlbState.idxReg1, idxRegEfl,
8531 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
8532#endif
8533 iemNativeRegFreeTmp(pReNative, idxRegEfl);
8534 }
8535 else
8536 {
8537 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
8538 RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
8539 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
8540 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
8541 }
8542 /* ASSUMES the upper half of idxRegValue is ZERO. */
8543 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegValue);
8544 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
8545 }
8546 break;
8547 case 8:
8548 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
8549 break;
8550 default:
8551 AssertFailed();
8552 }
8553 }
8554 else
8555 {
8556 switch (cbMemAccess)
8557 {
8558 case 2:
8559 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
8560 idxRegMemResult, TlbState.idxReg1);
8561 break;
8562 case 4:
8563 Assert(!a_fIsSegReg);
8564 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
8565 idxRegMemResult, TlbState.idxReg1);
8566 break;
8567 case 8:
8568 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue, idxRegMemResult, TlbState.idxReg1);
8569 break;
8570 default:
8571 AssertFailed();
8572 }
8573 }
8574
8575 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
8576 TlbState.freeRegsAndReleaseVars(pReNative);
8577
8578 /*
8579 * TlbDone:
8580 *
8581 * Commit the new RSP value.
8582 */
8583 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
8584 }
8585#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
8586
8587#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8588 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
8589#endif
8590 iemNativeRegFreeTmp(pReNative, idxRegRsp);
8591 if (idxRegEffSp != idxRegRsp)
8592 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
8593
8594 /* The value variable is implictly flushed. */
8595 if (idxRegValue != UINT8_MAX)
8596 iemNativeVarRegisterRelease(pReNative, idxVarValue);
8597 iemNativeVarFreeLocal(pReNative, idxVarValue);
8598
8599 return off;
8600}
8601
8602
8603
8604#define IEM_MC_POP_GREG_U16(a_iGReg) \
8605 off = iemNativeEmitStackPopGReg<16, 0>(pReNative, off, a_iGReg, (uintptr_t)iemNativeHlpStackFetchU16, pCallEntry->idxInstr)
8606#define IEM_MC_POP_GREG_U32(a_iGReg) \
8607 off = iemNativeEmitStackPopGReg<32, 0>(pReNative, off, a_iGReg, (uintptr_t)iemNativeHlpStackFetchU32, pCallEntry->idxInstr)
8608#define IEM_MC_POP_GREG_U64(a_iGReg) \
8609 off = iemNativeEmitStackPopGReg<64, 0>(pReNative, off, a_iGReg, (uintptr_t)iemNativeHlpStackFetchU64, pCallEntry->idxInstr)
8610
8611#define IEM_MC_FLAT32_POP_GREG_U16(a_iGReg) \
8612 off = iemNativeEmitStackPopGReg<16, 32>(pReNative, off, a_iGReg, (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
8613#define IEM_MC_FLAT32_POP_GREG_U32(a_iGReg) \
8614 off = iemNativeEmitStackPopGReg<32, 32>(pReNative, off, a_iGReg, (uintptr_t)iemNativeHlpStackFlatFetchU32, pCallEntry->idxInstr)
8615
8616#define IEM_MC_FLAT64_POP_GREG_U16(a_iGReg) \
8617 off = iemNativeEmitStackPopGReg<16, 64>(pReNative, off, a_iGReg, (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
8618#define IEM_MC_FLAT64_POP_GREG_U64(a_iGReg) \
8619 off = iemNativeEmitStackPopGReg<64, 64>(pReNative, off, a_iGReg, (uintptr_t)iemNativeHlpStackFlatFetchU64, pCallEntry->idxInstr)
8620
8621
8622DECL_FORCE_INLINE_THROW(uint32_t)
8623iemNativeEmitStackPopUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
8624 uint8_t idxRegTmp)
8625{
8626 /* Use16BitSp: */
8627#ifdef RT_ARCH_AMD64
8628 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
8629 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
8630 RT_NOREF(idxRegTmp);
8631#else
8632 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
8633 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
8634 /* add tmp, regrsp, #cbMem */
8635 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbMem, false /*f64Bit*/);
8636 /* and tmp, tmp, #0xffff */
8637 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
8638 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
8639 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
8640 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
8641#endif
8642 return off;
8643}
8644
8645
8646DECL_FORCE_INLINE(uint32_t)
8647iemNativeEmitStackPopUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
8648{
8649 /* Use32BitSp: */
8650 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
8651 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
8652 return off;
8653}
8654
8655
8656/** IEM_MC[|_FLAT32|_FLAT64]_POP_GREG_U16/32/64 */
8657template<uint8_t const a_cBitsVar, uint8_t const a_cBitsFlat>
8658DECL_INLINE_THROW(uint32_t)
8659iemNativeEmitStackPopGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGReg, uintptr_t pfnFunction, uint8_t idxInstr)
8660{
8661 /*
8662 * Assert sanity.
8663 */
8664 Assert(idxGReg < 16);
8665#ifdef VBOX_STRICT
8666 if (a_cBitsFlat != 0)
8667 {
8668 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
8669 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
8670 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
8671 Assert( pfnFunction
8672 == ( RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(16, 32) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
8673 : RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(32, 32) ? (uintptr_t)iemNativeHlpStackFlatFetchU32
8674 : RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(16, 64) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
8675 : RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(64, 64) ? (uintptr_t)iemNativeHlpStackFlatFetchU64
8676 : UINT64_C(0xc000b000a0009000) ));
8677 }
8678 else
8679 Assert( pfnFunction
8680 == ( RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(16, 0) ? (uintptr_t)iemNativeHlpStackFetchU16
8681 : RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(32, 0) ? (uintptr_t)iemNativeHlpStackFetchU32
8682 : RT_MAKE_U16(a_cBitsVar, a_cBitsFlat) == RT_MAKE_U16(64, 0) ? (uintptr_t)iemNativeHlpStackFetchU64
8683 : UINT64_C(0xc000b000a0009000) ));
8684#endif
8685
8686#ifdef VBOX_STRICT
8687 /*
8688 * Check that the fExec flags we've got make sense.
8689 */
8690 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
8691#endif
8692
8693 /*
8694 * To keep things simple we have to commit any pending writes first as we
8695 * may end up making calls.
8696 */
8697 off = iemNativeRegFlushPendingWrites(pReNative, off);
8698
8699 /*
8700 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
8701 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
8702 * directly as the effective stack pointer.
8703 * (Code structure is very similar to that of PUSH)
8704 */
8705 uint8_t const cbMem = a_cBitsVar / 8;
8706 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
8707 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
8708 uint8_t const idxRegEffSp = a_cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
8709 /** @todo can do a better job picking the register here. For cbMem >= 4 this
8710 * will be the resulting register value. */
8711 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
8712
8713 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
8714 if RT_CONSTEXPR_IF(a_cBitsFlat != 0)
8715 {
8716 Assert(idxRegEffSp == idxRegRsp);
8717 Assert(a_cBitsFlat == 32 || a_cBitsFlat == 64);
8718 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
8719 }
8720 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
8721 {
8722 Assert(idxRegEffSp != idxRegRsp);
8723 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
8724 kIemNativeGstRegUse_ReadOnly);
8725#ifdef RT_ARCH_AMD64
8726 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8727#else
8728 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
8729#endif
8730 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
8731 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
8732 offFixupJumpToUseOtherBitSp = off;
8733 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
8734 {
8735/** @todo can skip idxRegRsp updating when popping ESP. */
8736 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
8737 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8738 }
8739 else
8740 {
8741 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
8742 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
8743 }
8744 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8745 }
8746 /* SpUpdateEnd: */
8747 uint32_t const offLabelSpUpdateEnd = off;
8748
8749 /*
8750 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
8751 * we're skipping lookup).
8752 */
8753 uint8_t const iSegReg = a_cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
8754 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
8755 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
8756 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
8757 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
8758 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
8759 : UINT32_MAX;
8760
8761 if (!TlbState.fSkip)
8762 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
8763 else
8764 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
8765
8766 /*
8767 * Use16BitSp:
8768 */
8769 if RT_CONSTEXPR_IF(a_cBitsFlat == 0)
8770 {
8771#ifdef RT_ARCH_AMD64
8772 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8773#else
8774 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
8775#endif
8776 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
8777 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
8778 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
8779 else
8780 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
8781 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
8782 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
8783 }
8784
8785 /*
8786 * TlbMiss:
8787 *
8788 * Call helper to do the pushing.
8789 */
8790 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
8791
8792#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
8793 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
8794#else
8795 RT_NOREF(idxInstr);
8796#endif
8797
8798 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
8799 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
8800 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
8801 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
8802
8803
8804 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
8805 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
8806 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
8807
8808#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
8809 /* Do delayed EFLAGS calculations. */
8810 off = iemNativeDoPostponedEFlagsAtTlbMiss<RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG)>(pReNative, off, &TlbState, fHstRegsNotToSave);
8811#endif
8812
8813 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
8814 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
8815
8816 /* Done setting up parameters, make the call. */
8817 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnFunction);
8818
8819 /* Move the return register content to idxRegMemResult. */
8820 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
8821 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
8822
8823 /* Restore variables and guest shadow registers to volatile registers. */
8824 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
8825 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
8826
8827#ifdef IEMNATIVE_WITH_TLB_LOOKUP
8828 if (!TlbState.fSkip)
8829 {
8830 /* end of TlbMiss - Jump to the done label. */
8831 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
8832 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
8833
8834 /*
8835 * TlbLookup:
8836 */
8837 off = iemNativeEmitTlbLookup<true, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ>(pReNative, off, &TlbState, iSegReg,
8838 idxLabelTlbLookup, idxLabelTlbMiss,
8839 idxRegMemResult);
8840
8841 /*
8842 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
8843 */
8844 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
8845# ifdef IEM_WITH_TLB_STATISTICS
8846 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
8847 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
8848# endif
8849 switch (cbMem)
8850 {
8851 case 2:
8852 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
8853 break;
8854 case 4:
8855 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
8856 break;
8857 case 8:
8858 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
8859 break;
8860 default:
8861 AssertFailed();
8862 }
8863
8864 TlbState.freeRegsAndReleaseVars(pReNative);
8865
8866 /*
8867 * TlbDone:
8868 *
8869 * Set the new RSP value (FLAT accesses needs to calculate it first) and
8870 * commit the popped register value.
8871 */
8872 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
8873 }
8874#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
8875
8876 if (idxGReg != X86_GREG_xSP)
8877 {
8878 /* Set the register. */
8879 if (cbMem >= sizeof(uint32_t))
8880 {
8881#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
8882 AssertMsg( pReNative->idxCurCall == 0
8883 || IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))),
8884 ("%s - %u\n", g_aGstShadowInfo[idxGReg].pszName,
8885 iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))));
8886#endif
8887 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, IEMNATIVEGSTREG_GPR(idxGReg), off);
8888#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8889 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(idxGReg);
8890#endif
8891#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8892 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult,
8893 RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
8894#endif
8895 }
8896 else
8897 {
8898 Assert(cbMem == sizeof(uint16_t));
8899 uint8_t const idxRegDst = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGReg),
8900 kIemNativeGstRegUse_ForUpdate);
8901 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegDst, idxRegMemResult);
8902#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8903 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegDst, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
8904#endif
8905 iemNativeRegFreeTmp(pReNative, idxRegDst);
8906 }
8907
8908 /* Complete RSP calculation for FLAT mode. */
8909 if (idxRegEffSp == idxRegRsp)
8910 {
8911 if RT_CONSTEXPR_IF(a_cBitsFlat == 64)
8912 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, cbMem);
8913 else
8914 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, cbMem);
8915 }
8916 }
8917 else
8918 {
8919 /* We're popping RSP, ESP or SP. Only the is a bit extra work, of course. */
8920 if (cbMem == sizeof(uint64_t))
8921 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRsp, idxRegMemResult);
8922 else if (cbMem == sizeof(uint32_t))
8923 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRsp, idxRegMemResult);
8924 else
8925 {
8926 if (idxRegEffSp == idxRegRsp)
8927 {
8928 if RT_CONSTEXPR_IF(a_cBitsFlat == 64)
8929 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, cbMem);
8930 else
8931 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, cbMem);
8932 }
8933 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegRsp, idxRegMemResult);
8934 }
8935 }
8936
8937#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
8938 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
8939#endif
8940
8941 iemNativeRegFreeTmp(pReNative, idxRegRsp);
8942 if (idxRegEffSp != idxRegRsp)
8943 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
8944 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
8945
8946 return off;
8947}
8948
8949
8950
8951/*********************************************************************************************************************************
8952* Memory mapping (IEM_MEM_MAP_XXX, IEM_MEM_FLAT_MAP_XXX). *
8953*********************************************************************************************************************************/
8954
8955#define IEM_MC_MEM_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8956 off = iemNativeEmitMemMapCommon<sizeof(uint8_t), IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMaskAndCtl*/>(\
8957 pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU8Atomic, pCallEntry->idxInstr)
8958
8959#define IEM_MC_MEM_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8960 off = iemNativeEmitMemMapCommon<sizeof(uint8_t), IEM_ACCESS_DATA_RW, 0 /*fAlignMaskAndCtl*/>(\
8961 pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU8Rw, pCallEntry->idxInstr)
8962
8963#define IEM_MC_MEM_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8964 off = iemNativeEmitMemMapCommon<sizeof(uint8_t), IEM_ACCESS_DATA_W, 0 /*fAlignMaskAndCtl*/>(\
8965 pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU8Wo, pCallEntry->idxInstr) \
8966
8967#define IEM_MC_MEM_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8968 off = iemNativeEmitMemMapCommon<sizeof(uint8_t), IEM_ACCESS_DATA_R, 0 /*fAlignMaskAndCtl*/>(\
8969 pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU8Ro, pCallEntry->idxInstr)
8970
8971
8972#define IEM_MC_MEM_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8973 off = iemNativeEmitMemMapCommon<sizeof(uint16_t), IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/>(\
8974 pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU16Atomic, pCallEntry->idxInstr)
8975
8976#define IEM_MC_MEM_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8977 off = iemNativeEmitMemMapCommon<sizeof(uint16_t), IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/>(\
8978 pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU16Rw, pCallEntry->idxInstr)
8979
8980#define IEM_MC_MEM_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8981 off = iemNativeEmitMemMapCommon<sizeof(uint16_t), IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/>(\
8982 pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
8983
8984#define IEM_MC_MEM_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8985 off = iemNativeEmitMemMapCommon<sizeof(uint16_t), IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/>(\
8986 pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU16Ro, pCallEntry->idxInstr)
8987
8988#define IEM_MC_MEM_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8989 off = iemNativeEmitMemMapCommon<sizeof(int16_t), IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/>(\
8990 pReNative, off, a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
8991
8992
8993#define IEM_MC_MEM_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8994 off = iemNativeEmitMemMapCommon<sizeof(uint32_t), IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/>(\
8995 pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU32Atomic, pCallEntry->idxInstr)
8996
8997#define IEM_MC_MEM_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
8998 off = iemNativeEmitMemMapCommon<sizeof(uint32_t), IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/>(\
8999 pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU32Rw, pCallEntry->idxInstr)
9000
9001#define IEM_MC_MEM_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9002 off = iemNativeEmitMemMapCommon<sizeof(uint32_t), IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/>(\
9003 pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
9004
9005#define IEM_MC_MEM_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9006 off = iemNativeEmitMemMapCommon<sizeof(uint32_t), IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/>(\
9007 pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU32Ro, pCallEntry->idxInstr)
9008
9009#define IEM_MC_MEM_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9010 off = iemNativeEmitMemMapCommon<sizeof(int32_t), IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/>(\
9011 pReNative, off, a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
9012
9013
9014#define IEM_MC_MEM_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9015 off = iemNativeEmitMemMapCommon<sizeof(uint64_t), IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/>(\
9016 pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU64Atomic, pCallEntry->idxInstr)
9017
9018#define IEM_MC_MEM_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9019 off = iemNativeEmitMemMapCommon<sizeof(uint64_t), IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/>(\
9020 pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU64Rw, pCallEntry->idxInstr)
9021#define IEM_MC_MEM_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9022 off = iemNativeEmitMemMapCommon<sizeof(uint64_t), IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/>(\
9023 pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
9024
9025#define IEM_MC_MEM_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9026 off = iemNativeEmitMemMapCommon<sizeof(uint64_t), IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/>(\
9027 pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU64Ro, pCallEntry->idxInstr)
9028
9029#define IEM_MC_MEM_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9030 off = iemNativeEmitMemMapCommon<sizeof(int64_t), IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/>(\
9031 pReNative, off, a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
9032
9033
9034#define IEM_MC_MEM_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9035 off = iemNativeEmitMemMapCommon<sizeof(RTFLOAT80U), IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/>(\
9036 pReNative, off, a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataR80Wo, pCallEntry->idxInstr) \
9037
9038#define IEM_MC_MEM_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9039 off = iemNativeEmitMemMapCommon<sizeof(RTFLOAT80U), IEM_ACCESS_DATA_W, \
9040 sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/ /** @todo check BCD align */>(\
9041 pReNative, off, a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataD80Wo, pCallEntry->idxInstr) \
9042
9043
9044#define IEM_MC_MEM_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9045 off = iemNativeEmitMemMapCommon<sizeof(RTUINT128U), IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/>(\
9046 pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU128Atomic, pCallEntry->idxInstr)
9047
9048#define IEM_MC_MEM_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9049 off = iemNativeEmitMemMapCommon<sizeof(RTUINT128U), IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/>(\
9050 pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU128Rw, pCallEntry->idxInstr)
9051
9052#define IEM_MC_MEM_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9053 off = iemNativeEmitMemMapCommon<sizeof(RTUINT128U), IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/>(\
9054 pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU128Wo, pCallEntry->idxInstr) \
9055
9056#define IEM_MC_MEM_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
9057 off = iemNativeEmitMemMapCommon<sizeof(RTUINT128U), IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/>(\
9058 pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, (uintptr_t)iemNativeHlpMemMapDataU128Ro, pCallEntry->idxInstr)
9059
9060
9061
9062#define IEM_MC_MEM_FLAT_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
9063 off = iemNativeEmitMemMapCommon<sizeof(uint8_t), IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMaskAndCtl*/, true>(\
9064 pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU8Atomic, pCallEntry->idxInstr)
9065
9066#define IEM_MC_MEM_FLAT_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
9067 off = iemNativeEmitMemMapCommon<sizeof(uint8_t), IEM_ACCESS_DATA_RW, 0 /*fAlignMaskAndCtl*/, true>(\
9068 pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU8Rw, pCallEntry->idxInstr)
9069
9070#define IEM_MC_MEM_FLAT_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
9071 off = iemNativeEmitMemMapCommon<sizeof(uint8_t), IEM_ACCESS_DATA_W, 0 /*fAlignMaskAndCtl*/, true>(\
9072 pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU8Wo, pCallEntry->idxInstr) \
9073
9074#define IEM_MC_MEM_FLAT_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
9075 off = iemNativeEmitMemMapCommon<sizeof(uint8_t), IEM_ACCESS_DATA_R, 0 /*fAlignMaskAndCtl*/, true>(\
9076 pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU8Ro, pCallEntry->idxInstr)
9077
9078
9079#define IEM_MC_MEM_FLAT_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
9080 off = iemNativeEmitMemMapCommon<sizeof(uint16_t), IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9081 pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU16Atomic, pCallEntry->idxInstr)
9082
9083#define IEM_MC_MEM_FLAT_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
9084 off = iemNativeEmitMemMapCommon<sizeof(uint16_t), IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9085 pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU16Rw, pCallEntry->idxInstr)
9086
9087#define IEM_MC_MEM_FLAT_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
9088 off = iemNativeEmitMemMapCommon<sizeof(uint16_t), IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9089 pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
9090
9091#define IEM_MC_MEM_FLAT_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
9092 off = iemNativeEmitMemMapCommon<sizeof(uint16_t), IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9093 pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU16Ro, pCallEntry->idxInstr)
9094
9095#define IEM_MC_MEM_FLAT_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_GCPtrMem) \
9096 off = iemNativeEmitMemMapCommon<sizeof(int16_t), IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9097 pReNative, off, a_pi16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
9098
9099
9100#define IEM_MC_MEM_FLAT_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
9101 off = iemNativeEmitMemMapCommon<sizeof(uint32_t), IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9102 pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU32Atomic, pCallEntry->idxInstr)
9103
9104#define IEM_MC_MEM_FLAT_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
9105 off = iemNativeEmitMemMapCommon<sizeof(uint32_t), IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9106 pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU32Rw, pCallEntry->idxInstr)
9107
9108#define IEM_MC_MEM_FLAT_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
9109 off = iemNativeEmitMemMapCommon<sizeof(uint32_t), IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9110 pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
9111
9112#define IEM_MC_MEM_FLAT_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
9113 off = iemNativeEmitMemMapCommon<sizeof(uint32_t), IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9114 pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU32Ro, pCallEntry->idxInstr)
9115
9116#define IEM_MC_MEM_FLAT_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_GCPtrMem) \
9117 off = iemNativeEmitMemMapCommon<sizeof(int32_t), IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9118 pReNative, off, a_pi32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
9119
9120
9121#define IEM_MC_MEM_FLAT_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
9122 off = iemNativeEmitMemMapCommon<sizeof(uint64_t), IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9123 pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU64Atomic, pCallEntry->idxInstr)
9124
9125#define IEM_MC_MEM_FLAT_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
9126 off = iemNativeEmitMemMapCommon<sizeof(uint64_t), IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9127 pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU64Rw, pCallEntry->idxInstr)
9128
9129#define IEM_MC_MEM_FLAT_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
9130 off = iemNativeEmitMemMapCommon<sizeof(uint64_t), IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9131 pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
9132
9133#define IEM_MC_MEM_FLAT_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
9134 off = iemNativeEmitMemMapCommon<sizeof(uint64_t), IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9135 pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU64Ro, pCallEntry->idxInstr)
9136
9137#define IEM_MC_MEM_FLAT_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_GCPtrMem) \
9138 off = iemNativeEmitMemMapCommon<sizeof(int64_t), IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9139 pReNative, off, a_pi64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
9140
9141
9142#define IEM_MC_MEM_FLAT_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_GCPtrMem) \
9143 off = iemNativeEmitMemMapCommon<sizeof(RTFLOAT80U), IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/, true>(\
9144 pReNative, off, a_pr80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo, pCallEntry->idxInstr) \
9145
9146#define IEM_MC_MEM_FLAT_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_GCPtrMem) \
9147 off = iemNativeEmitMemMapCommon<sizeof(RTFLOAT80U), IEM_ACCESS_DATA_W, \
9148 sizeof(uint64_t) - 1 /*fAlignMaskAndCtl*/ /** @todo check BCD align */, true>(\
9149 pReNative, off, a_pd80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo, pCallEntry->idxInstr) \
9150
9151
9152#define IEM_MC_MEM_FLAT_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
9153 off = iemNativeEmitMemMapCommon<sizeof(RTUINT128U), IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, true>(\
9154 pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU128Atomic, pCallEntry->idxInstr)
9155
9156#define IEM_MC_MEM_FLAT_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
9157 off = iemNativeEmitMemMapCommon<sizeof(RTUINT128U), IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, true>(\
9158 pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU128Rw, pCallEntry->idxInstr)
9159
9160#define IEM_MC_MEM_FLAT_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
9161 off = iemNativeEmitMemMapCommon<sizeof(RTUINT128U), IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, true>(\
9162 pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU128Wo, pCallEntry->idxInstr) \
9163
9164#define IEM_MC_MEM_FLAT_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
9165 off = iemNativeEmitMemMapCommon<sizeof(RTUINT128U), IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMaskAndCtl*/, true>(\
9166 pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, (uintptr_t)iemNativeHlpMemFlatMapDataU128Ro, pCallEntry->idxInstr)
9167
9168
9169template<uint8_t const a_cbMem, uint32_t const a_fAccess, uint32_t const a_fAlignMaskAndCtl, bool a_fFlat = false>
9170DECL_INLINE_THROW(uint32_t)
9171iemNativeEmitMemMapCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarMem, uint8_t idxVarUnmapInfo,
9172 uint8_t iSegReg, uint8_t idxVarGCPtrMem, uintptr_t pfnFunction, uint8_t idxInstr)
9173{
9174 /*
9175 * Assert sanity.
9176 */
9177 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarMem);
9178 PIEMNATIVEVAR const pVarMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarMem)];
9179 AssertStmt( pVarMem->enmKind == kIemNativeVarKind_Invalid
9180 && pVarMem->cbVar == sizeof(void *),
9181 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9182
9183 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
9184 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
9185 AssertStmt( pVarUnmapInfo->enmKind == kIemNativeVarKind_Invalid
9186 && pVarUnmapInfo->cbVar == sizeof(uint8_t),
9187 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9188
9189 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
9190 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
9191 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
9192 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
9193 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
9194
9195 Assert(!a_fFlat ? iSegReg < 6 : iSegReg == UINT8_MAX);
9196
9197 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
9198
9199#ifdef VBOX_STRICT
9200# define IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) \
9201 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ) \
9202 ? (uintptr_t)RT_CONCAT(a_fnBase,Rw) \
9203 : ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == IEM_ACCESS_TYPE_READ \
9204 ? (uintptr_t)RT_CONCAT(a_fnBase,Ro) : (uintptr_t)RT_CONCAT(a_fnBase,Wo) )
9205# define IEM_MAP_HLP_FN(a_fAccess, a_fnBase) \
9206 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ | IEM_ACCESS_ATOMIC) \
9207 ? (uintptr_t)RT_CONCAT(a_fnBase,Atomic) \
9208 : IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) )
9209
9210 if RT_CONSTEXPR_IF(a_fFlat)
9211 {
9212 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
9213 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
9214 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
9215 switch (a_cbMem)
9216 {
9217 case 1:
9218 Assert(pfnFunction == IEM_MAP_HLP_FN(a_fAccess, iemNativeHlpMemFlatMapDataU8));
9219 Assert(!a_fAlignMaskAndCtl);
9220 break;
9221 case 2:
9222 Assert(pfnFunction == IEM_MAP_HLP_FN(a_fAccess, iemNativeHlpMemFlatMapDataU16));
9223 Assert(a_fAlignMaskAndCtl < 2);
9224 break;
9225 case 4:
9226 Assert(pfnFunction == IEM_MAP_HLP_FN(a_fAccess, iemNativeHlpMemFlatMapDataU32));
9227 Assert(a_fAlignMaskAndCtl < 4);
9228 break;
9229 case 8:
9230 Assert(pfnFunction == IEM_MAP_HLP_FN(a_fAccess, iemNativeHlpMemFlatMapDataU64));
9231 Assert(a_fAlignMaskAndCtl < 8);
9232 break;
9233 case 10:
9234 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo
9235 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo);
9236 Assert((a_fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
9237 Assert(a_fAlignMaskAndCtl < 8);
9238 break;
9239 case 16:
9240 Assert(pfnFunction == IEM_MAP_HLP_FN(a_fAccess, iemNativeHlpMemFlatMapDataU128));
9241 Assert(a_fAlignMaskAndCtl < 16);
9242 break;
9243# if 0
9244 case 32:
9245 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(a_fAccess, iemNativeHlpMemFlatMapDataU256));
9246 Assert(a_fAlignMaskAndCtl < 32);
9247 break;
9248 case 64:
9249 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(a_fAccess, iemNativeHlpMemFlatMapDataU512));
9250 Assert(a_fAlignMaskAndCtl < 64);
9251 break;
9252# endif
9253 default: AssertFailed(); break;
9254 }
9255 }
9256 else
9257 {
9258 Assert(iSegReg < 6);
9259 switch (a_cbMem)
9260 {
9261 case 1:
9262 Assert(pfnFunction == IEM_MAP_HLP_FN(a_fAccess, iemNativeHlpMemMapDataU8));
9263 Assert(!a_fAlignMaskAndCtl);
9264 break;
9265 case 2:
9266 Assert(pfnFunction == IEM_MAP_HLP_FN(a_fAccess, iemNativeHlpMemMapDataU16));
9267 Assert(a_fAlignMaskAndCtl < 2);
9268 break;
9269 case 4:
9270 Assert(pfnFunction == IEM_MAP_HLP_FN(a_fAccess, iemNativeHlpMemMapDataU32));
9271 Assert(a_fAlignMaskAndCtl < 4);
9272 break;
9273 case 8:
9274 Assert(pfnFunction == IEM_MAP_HLP_FN(a_fAccess, iemNativeHlpMemMapDataU64));
9275 Assert(a_fAlignMaskAndCtl < 8);
9276 break;
9277 case 10:
9278 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemMapDataR80Wo
9279 || pfnFunction == (uintptr_t)iemNativeHlpMemMapDataD80Wo);
9280 Assert((a_fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
9281 Assert(a_fAlignMaskAndCtl < 8);
9282 break;
9283 case 16:
9284 Assert(pfnFunction == IEM_MAP_HLP_FN(a_fAccess, iemNativeHlpMemMapDataU128));
9285 Assert(a_fAlignMaskAndCtl < 16);
9286 break;
9287# if 0
9288 case 32:
9289 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(a_fAccess, iemNativeHlpMemMapDataU256));
9290 Assert(a_fAlignMaskAndCtl < 32);
9291 break;
9292 case 64:
9293 Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(a_fAccess, iemNativeHlpMemMapDataU512));
9294 Assert(a_fAlignMaskAndCtl < 64);
9295 break;
9296# endif
9297 default: AssertFailed(); break;
9298 }
9299 }
9300# undef IEM_MAP_HLP_FN
9301# undef IEM_MAP_HLP_FN_NO_AT
9302#endif
9303
9304#ifdef VBOX_STRICT
9305 /*
9306 * Check that the fExec flags we've got make sense.
9307 */
9308 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
9309#endif
9310
9311 /*
9312 * To keep things simple we have to commit any pending writes first as we
9313 * may end up making calls.
9314 */
9315 off = iemNativeRegFlushPendingWrites(pReNative, off);
9316
9317#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
9318 /*
9319 * Move/spill/flush stuff out of call-volatile registers.
9320 * This is the easy way out. We could contain this to the tlb-miss branch
9321 * by saving and restoring active stuff here.
9322 */
9323 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
9324 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
9325#endif
9326
9327 /* The bUnmapInfo variable will get a register in the tlb-hit code path,
9328 while the tlb-miss codepath will temporarily put it on the stack.
9329 Set the the type to stack here so we don't need to do it twice below. */
9330 iemNativeVarSetKindToStack(pReNative, idxVarUnmapInfo);
9331 uint8_t const idxRegUnmapInfo = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off);
9332 /** @todo use a tmp register from TlbState, since they'll be free after tlb
9333 * lookup is done. */
9334
9335 /*
9336 * Define labels and allocate the result register (trying for the return
9337 * register if we can).
9338 */
9339 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
9340 uint8_t const idxRegMemResult = !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
9341 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarMem, IEMNATIVE_CALL_RET_GREG, &off)
9342 : iemNativeVarRegisterAcquire(pReNative, idxVarMem, &off);
9343 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, a_cbMem);
9344 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
9345 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
9346 : UINT32_MAX;
9347
9348 /*
9349 * Jump to the TLB lookup code.
9350 */
9351 if (!TlbState.fSkip)
9352 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
9353
9354 /*
9355 * TlbMiss:
9356 *
9357 * Call helper to do the fetching.
9358 * We flush all guest register shadow copies here.
9359 */
9360 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
9361
9362#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
9363 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
9364#else
9365 RT_NOREF(idxInstr);
9366#endif
9367
9368#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
9369 /* Save variables in volatile registers. */
9370 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave() | RT_BIT_32(idxRegMemResult) | RT_BIT_32(idxRegUnmapInfo);
9371 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
9372#endif
9373
9374 /* IEMNATIVE_CALL_ARG2_GREG = GCPtrMem - load first as it is from a variable. */
9375 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarGCPtrMem, 0 /*cbAppend*/,
9376#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
9377 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
9378#else
9379 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
9380#endif
9381
9382 /* IEMNATIVE_CALL_ARG3_GREG = iSegReg */
9383 if RT_CONSTEXPR_IF(!a_fFlat)
9384 {
9385 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
9386 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, iSegReg);
9387 }
9388
9389#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
9390 /* Do delayed EFLAGS calculations. */
9391 if RT_CONSTEXPR_IF(a_fFlat)
9392 off = iemNativeDoPostponedEFlagsAtTlbMiss<RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)>(pReNative, off, &TlbState,
9393 fHstRegsNotToSave);
9394 else
9395 off = iemNativeDoPostponedEFlagsAtTlbMiss< RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
9396 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG)>(pReNative, off, &TlbState,
9397 fHstRegsNotToSave);
9398#endif
9399
9400 /* IEMNATIVE_CALL_ARG1_GREG = &idxVarUnmapInfo; stackslot address, load any register with result after the call. */
9401 int32_t const offBpDispVarUnmapInfo = iemNativeStackCalcBpDisp(iemNativeVarGetStackSlot(pReNative, idxVarUnmapInfo));
9402 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, offBpDispVarUnmapInfo);
9403
9404 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
9405 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9406
9407 /* Done setting up parameters, make the call. */
9408 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnFunction);
9409
9410 /*
9411 * Put the output in the right registers.
9412 */
9413 Assert(idxRegMemResult == pVarMem->idxReg);
9414 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
9415 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
9416
9417#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
9418 /* Restore variables and guest shadow registers to volatile registers. */
9419 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
9420 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
9421#endif
9422
9423 Assert(pVarUnmapInfo->idxReg == idxRegUnmapInfo);
9424 off = iemNativeEmitLoadGprByBpU8(pReNative, off, idxRegUnmapInfo, offBpDispVarUnmapInfo);
9425
9426#ifdef IEMNATIVE_WITH_TLB_LOOKUP
9427 if (!TlbState.fSkip)
9428 {
9429 /* end of tlbsmiss - Jump to the done label. */
9430 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
9431 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
9432
9433 /*
9434 * TlbLookup:
9435 */
9436 off = iemNativeEmitTlbLookup<true, a_cbMem, a_fAlignMaskAndCtl, a_fAccess>(pReNative, off, &TlbState, iSegReg,
9437 idxLabelTlbLookup, idxLabelTlbMiss,
9438 idxRegMemResult);
9439# ifdef IEM_WITH_TLB_STATISTICS
9440 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, TlbState.idxReg1, TlbState.idxReg2,
9441 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForMapped));
9442# endif
9443
9444 /* [idxVarUnmapInfo] = 0; */
9445 off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegUnmapInfo, 0);
9446
9447 /*
9448 * TlbDone:
9449 */
9450 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
9451
9452 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
9453
9454# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
9455 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
9456 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
9457# endif
9458 }
9459#else
9460 RT_NOREF(idxLabelTlbMiss);
9461#endif
9462
9463 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
9464 iemNativeVarRegisterRelease(pReNative, idxVarMem);
9465
9466 return off;
9467}
9468
9469
9470#define IEM_MC_MEM_COMMIT_AND_UNMAP_ATOMIC(a_bMapInfo) \
9471 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic, \
9472 pCallEntry->idxInstr, IEM_ACCESS_DATA_ATOMIC)
9473
9474#define IEM_MC_MEM_COMMIT_AND_UNMAP_RW(a_bMapInfo) \
9475 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), (uintptr_t)iemNativeHlpMemCommitAndUnmapRw, \
9476 pCallEntry->idxInstr, IEM_ACCESS_DATA_RW)
9477
9478#define IEM_MC_MEM_COMMIT_AND_UNMAP_WO(a_bMapInfo) \
9479 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), (uintptr_t)iemNativeHlpMemCommitAndUnmapWo, \
9480 pCallEntry->idxInstr, IEM_ACCESS_DATA_W)
9481
9482#define IEM_MC_MEM_COMMIT_AND_UNMAP_RO(a_bMapInfo) \
9483 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), (uintptr_t)iemNativeHlpMemCommitAndUnmapRo, \
9484 pCallEntry->idxInstr, IEM_ACCESS_DATA_R)
9485
9486DECL_INLINE_THROW(uint32_t)
9487iemNativeEmitMemCommitAndUnmap(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarUnmapInfo,
9488 uintptr_t pfnFunction, uint8_t idxInstr, uint32_t fAccess)
9489{
9490 /*
9491 * Assert sanity.
9492 */
9493 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
9494#if defined(VBOX_STRICT) || defined(RT_ARCH_AMD64)
9495 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
9496#endif
9497 Assert(pVarUnmapInfo->enmKind == kIemNativeVarKind_Stack);
9498 Assert( pVarUnmapInfo->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
9499 || pVarUnmapInfo->idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS); /* must be initialized */
9500#ifdef VBOX_STRICT
9501 switch (fAccess & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC))
9502 {
9503 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_ATOMIC:
9504 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic); break;
9505 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE:
9506 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRw); break;
9507 case IEM_ACCESS_TYPE_WRITE:
9508 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapWo); break;
9509 case IEM_ACCESS_TYPE_READ:
9510 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRo); break;
9511 default: AssertFailed();
9512 }
9513#else
9514 RT_NOREF(fAccess);
9515#endif
9516
9517 /*
9518 * To keep things simple we have to commit any pending writes first as we
9519 * may end up making calls (there shouldn't be any at this point, so this
9520 * is just for consistency).
9521 */
9522 /** @todo we could postpone this till we make the call and reload the
9523 * registers after returning from the call. Not sure if that's sensible or
9524 * not, though. */
9525 off = iemNativeRegFlushPendingWrites(pReNative, off);
9526
9527 /*
9528 * Move/spill/flush stuff out of call-volatile registers.
9529 *
9530 * We exclude any register holding the bUnmapInfo variable, as we'll be
9531 * checking it after returning from the call and will free it afterwards.
9532 */
9533 /** @todo save+restore active registers and maybe guest shadows in miss
9534 * scenario. */
9535 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */,
9536 RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)));
9537
9538 /*
9539 * If idxVarUnmapInfo is zero, we can skip all this. Otherwise we'll have
9540 * to call the unmap helper function.
9541 *
9542 * The likelyhood of it being zero is higher than for the TLB hit when doing
9543 * the mapping, as a TLB miss for an well aligned and unproblematic memory
9544 * access should also end up with a mapping that won't need special unmapping.
9545 */
9546 /** @todo Go over iemMemMapJmp and implement the no-unmap-needed case! That
9547 * should speed up things for the pure interpreter as well when TLBs
9548 * are enabled. */
9549#ifdef RT_ARCH_AMD64
9550 if (pVarUnmapInfo->idxReg == UINT8_MAX)
9551 {
9552 /* test byte [rbp - xxx], 0ffh */
9553 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
9554 pbCodeBuf[off++] = 0xf6;
9555 uint8_t const idxStackSlot = pVarUnmapInfo->idxStackSlot;
9556 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, 0, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
9557 pbCodeBuf[off++] = 0xff;
9558 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9559 }
9560 else
9561#endif
9562 {
9563 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInitedWithPref(pReNative, idxVarUnmapInfo, &off,
9564 IEMNATIVE_CALL_ARG1_GREG);
9565 off = iemNativeEmitTestAnyBitsInGpr8(pReNative, off, idxVarReg, 0xff);
9566 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
9567 }
9568 uint32_t const offJmpFixup = off;
9569 off = iemNativeEmitJzToFixed(pReNative, off, off /* ASSUME jz rel8 suffices*/);
9570
9571 /*
9572 * Call the unmap helper function.
9573 */
9574#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING /** @todo This should be unnecessary, the mapping call will already have set it! */
9575 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
9576#else
9577 RT_NOREF(idxInstr);
9578#endif
9579
9580 /* IEMNATIVE_CALL_ARG1_GREG = idxVarUnmapInfo (first!) */
9581 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo,
9582 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
9583
9584 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
9585 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9586
9587 /* Done setting up parameters, make the call.
9588 Note! Since we can only end up here if we took a TLB miss, any postponed EFLAGS
9589 calculations has been done there already. Thus, a_fSkipEflChecks = true. */
9590 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnFunction);
9591
9592 /* The bUnmapInfo variable is implictly free by these MCs. */
9593 iemNativeVarFreeLocal(pReNative, idxVarUnmapInfo);
9594
9595 /*
9596 * Done, just fixup the jump for the non-call case.
9597 */
9598 iemNativeFixupFixedJump(pReNative, offJmpFixup, off);
9599
9600 return off;
9601}
9602
9603
9604
9605/*********************************************************************************************************************************
9606* State and Exceptions *
9607*********************************************************************************************************************************/
9608
9609#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9610#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
9611
9612#define IEM_MC_PREPARE_SSE_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9613#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9614#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
9615
9616#define IEM_MC_PREPARE_AVX_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9617#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
9618#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
9619
9620
9621DECL_INLINE_THROW(uint32_t) iemNativeEmitPrepareFpuForUse(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fForChange)
9622{
9623#ifndef IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS
9624 RT_NOREF(pReNative, fForChange);
9625#else
9626 if ( !(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SYNCED)
9627 && fForChange)
9628 {
9629# ifdef RT_ARCH_AMD64
9630
9631 /* Need to save the host MXCSR the first time, and clear the exception flags. */
9632 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED))
9633 {
9634 PIEMNATIVEINSTR pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
9635
9636 /* stmxcsr */
9637 if (IEMNATIVE_REG_FIXED_PVMCPU >= 8)
9638 pbCodeBuf[off++] = X86_OP_REX_B;
9639 pbCodeBuf[off++] = 0x0f;
9640 pbCodeBuf[off++] = 0xae;
9641 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, 3, IEMNATIVE_REG_FIXED_PVMCPU & 7);
9642 pbCodeBuf[off++] = RT_BYTE1(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9643 pbCodeBuf[off++] = RT_BYTE2(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9644 pbCodeBuf[off++] = RT_BYTE3(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9645 pbCodeBuf[off++] = RT_BYTE4(RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9646 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9647
9648 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED;
9649 }
9650
9651 uint8_t const idxRegTmp = iemNativeRegAllocTmpPreferNonVolatile(pReNative, &off);
9652 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr,
9653 kIemNativeGstRegUse_ReadOnly);
9654
9655 /*
9656 * Mask any exceptions and clear the exception status and save into MXCSR,
9657 * taking a detour through memory here because ldmxcsr/stmxcsr don't support
9658 * a register source/target (sigh).
9659 */
9660 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, idxRegMxCsr);
9661 off = iemNativeEmitOrGpr32ByImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, X86_MXCSR_XCPT_MASK);
9662 off = iemNativeEmitAndGpr32ByImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, ~X86_MXCSR_XCPT_FLAGS);
9663 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9664
9665 PIEMNATIVEINSTR pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
9666
9667 /* ldmxcsr */
9668 if (IEMNATIVE_REG_FIXED_PVMCPU >= 8)
9669 pbCodeBuf[off++] = X86_OP_REX_B;
9670 pbCodeBuf[off++] = 0x0f;
9671 pbCodeBuf[off++] = 0xae;
9672 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, 2, IEMNATIVE_REG_FIXED_PVMCPU & 7);
9673 pbCodeBuf[off++] = RT_BYTE1(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9674 pbCodeBuf[off++] = RT_BYTE2(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9675 pbCodeBuf[off++] = RT_BYTE3(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9676 pbCodeBuf[off++] = RT_BYTE4(RT_UOFFSETOF(VMCPU, iem.s.uRegMxcsrTmp));
9677 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9678
9679 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
9680 iemNativeRegFreeTmp(pReNative, idxRegTmp);
9681
9682# elif defined(RT_ARCH_ARM64)
9683 uint8_t const idxRegTmp = iemNativeRegAllocTmpPreferNonVolatile(pReNative, &off);
9684
9685 /* Need to save the host floating point control register the first time, clear FPSR. */
9686 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED))
9687 {
9688 PIEMNATIVEINSTR pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
9689 pu32CodeBuf[off++] = Armv8A64MkInstrMsr(ARMV8_A64_REG_XZR, ARMV8_AARCH64_SYSREG_FPSR);
9690 pu32CodeBuf[off++] = Armv8A64MkInstrMrs(idxRegTmp, ARMV8_AARCH64_SYSREG_FPCR);
9691 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPU, iem.s.uRegFpCtrl));
9692 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SAVED;
9693 }
9694
9695 /*
9696 * Translate MXCSR to FPCR.
9697 *
9698 * Unfortunately we can't emulate the exact behavior of MXCSR as we can't take
9699 * FEAT_AFP on arm64 for granted (My M2 Macbook doesn't has it). So we can't map
9700 * MXCSR.DAZ to FPCR.FIZ and MXCSR.FZ to FPCR.FZ with FPCR.AH being set.
9701 * We can only use FPCR.FZ which will flush inputs _and_ output de-normals to zero.
9702 */
9703 /** @todo Check the host supported flags (needs additional work to get the host features from CPUM)
9704 * and implement alternate handling if FEAT_AFP is present. */
9705 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr,
9706 kIemNativeGstRegUse_ReadOnly);
9707
9708 PIEMNATIVEINSTR pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
9709
9710 /* First make sure that there is nothing set for the upper 16-bits (X86_MXCSR_MM, which we don't emulate right now). */
9711 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegTmp, idxRegMxCsr);
9712
9713 /* If either MXCSR.FZ or MXCSR.DAZ is set FPCR.FZ will be set. */
9714 pu32CodeBuf[off++] = Armv8A64MkInstrUbfx(IEMNATIVE_REG_FIXED_TMP0, idxRegTmp, X86_MXCSR_DAZ_BIT, 1);
9715 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(idxRegTmp, idxRegTmp, X86_MXCSR_FZ_BIT);
9716 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(idxRegTmp, idxRegTmp, IEMNATIVE_REG_FIXED_TMP0);
9717 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegTmp, idxRegTmp, ARMV8_FPCR_FZ_BIT);
9718
9719 /*
9720 * Init the rounding mode, the layout differs between MXCSR.RM[14:13] and FPCR.RMode[23:22]:
9721 *
9722 * Value MXCSR FPCR
9723 * 0 RN RN
9724 * 1 R- R+
9725 * 2 R+ R-
9726 * 3 RZ RZ
9727 *
9728 * Conversion can be achieved by switching bit positions
9729 */
9730 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxRegMxCsr, X86_MXCSR_RC_SHIFT);
9731 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxRegTmp, IEMNATIVE_REG_FIXED_TMP0, 14, 1);
9732 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxRegMxCsr, X86_MXCSR_RC_SHIFT + 1);
9733 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxRegTmp, IEMNATIVE_REG_FIXED_TMP0, 13, 1);
9734
9735 /* Write the value to FPCR. */
9736 pu32CodeBuf[off++] = Armv8A64MkInstrMsr(idxRegTmp, ARMV8_AARCH64_SYSREG_FPCR);
9737
9738 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
9739 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
9740 iemNativeRegFreeTmp(pReNative, idxRegTmp);
9741# else
9742# error "Port me"
9743# endif
9744 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_HOST_FP_CTRL_REG_SYNCED;
9745 }
9746#endif
9747 return off;
9748}
9749
9750
9751
9752/*********************************************************************************************************************************
9753* Emitters for FPU related operations. *
9754*********************************************************************************************************************************/
9755
9756#define IEM_MC_FETCH_FCW(a_u16Fcw) \
9757 off = iemNativeEmitFetchFpuFcw(pReNative, off, a_u16Fcw)
9758
9759/** Emits code for IEM_MC_FETCH_FCW. */
9760DECL_INLINE_THROW(uint32_t)
9761iemNativeEmitFetchFpuFcw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
9762{
9763 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9764 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
9765
9766 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9767
9768 /* Allocate a temporary FCW register. */
9769 /** @todo eliminate extra register */
9770 uint8_t const idxFcwReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFcw,
9771 kIemNativeGstRegUse_ReadOnly);
9772
9773 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFcwReg);
9774
9775 /* Free but don't flush the FCW register. */
9776 iemNativeRegFreeTmp(pReNative, idxFcwReg);
9777 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9778
9779 return off;
9780}
9781
9782
9783#define IEM_MC_FETCH_FSW(a_u16Fsw) \
9784 off = iemNativeEmitFetchFpuFsw(pReNative, off, a_u16Fsw)
9785
9786/** Emits code for IEM_MC_FETCH_FSW. */
9787DECL_INLINE_THROW(uint32_t)
9788iemNativeEmitFetchFpuFsw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
9789{
9790 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9791 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
9792
9793 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9794 /* Allocate a temporary FSW register. */
9795 /** @todo eliminate extra register */
9796 uint8_t const idxFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw,
9797 kIemNativeGstRegUse_ReadOnly);
9798
9799 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFswReg);
9800
9801 /* Free but don't flush the FSW register. */
9802 iemNativeRegFreeTmp(pReNative, idxFswReg);
9803 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9804
9805 return off;
9806}
9807
9808
9809
9810#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
9811
9812
9813/*********************************************************************************************************************************
9814* Emitters for SSE/AVX specific operations. *
9815*********************************************************************************************************************************/
9816
9817#define IEM_MC_COPY_XREG_U128(a_iXRegDst, a_iXRegSrc) \
9818 off = iemNativeEmitSimdCopyXregU128(pReNative, off, a_iXRegDst, a_iXRegSrc)
9819
9820/** Emits code for IEM_MC_COPY_XREG_U128. */
9821DECL_INLINE_THROW(uint32_t)
9822iemNativeEmitSimdCopyXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXRegDst, uint8_t iXRegSrc)
9823{
9824 /* This is a nop if the source and destination register are the same. */
9825 if (iXRegDst != iXRegSrc)
9826 {
9827 /* Allocate destination and source register. */
9828 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegDst),
9829 kIemNativeGstSimdRegLdStSz_Low128,
9830 kIemNativeGstRegUse_ForFullWrite);
9831 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegSrc),
9832 kIemNativeGstSimdRegLdStSz_Low128,
9833 kIemNativeGstRegUse_ReadOnly);
9834
9835 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
9836
9837 /* Free but don't flush the source and destination register. */
9838 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
9839 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9840 }
9841
9842 return off;
9843}
9844
9845
9846#define IEM_MC_FETCH_XREG_U128(a_u128Value, a_iXReg) \
9847 off = iemNativeEmitSimdFetchXregU128(pReNative, off, a_u128Value, a_iXReg)
9848
9849/** Emits code for IEM_MC_FETCH_XREG_U128. */
9850DECL_INLINE_THROW(uint32_t)
9851iemNativeEmitSimdFetchXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg)
9852{
9853 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9854 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
9855
9856 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9857 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
9858
9859 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
9860
9861 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxVarReg, idxSimdRegSrc);
9862
9863 /* Free but don't flush the source register. */
9864 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9865 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
9866
9867 return off;
9868}
9869
9870
9871#define IEM_MC_FETCH_XREG_U64(a_u64Value, a_iXReg, a_iQWord) \
9872 off = iemNativeEmitSimdFetchXregU64(pReNative, off, a_u64Value, a_iXReg, a_iQWord)
9873
9874#define IEM_MC_FETCH_XREG_R64(a_r64Value, a_iXReg, a_iQWord) \
9875 off = iemNativeEmitSimdFetchXregU64(pReNative, off, a_r64Value, a_iXReg, a_iQWord)
9876
9877/** Emits code for IEM_MC_FETCH_XREG_U64. */
9878DECL_INLINE_THROW(uint32_t)
9879iemNativeEmitSimdFetchXregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iQWord)
9880{
9881 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9882 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
9883
9884 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9885 kIemNativeGstSimdRegLdStSz_Low128,
9886 kIemNativeGstRegUse_ReadOnly);
9887
9888 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9889 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9890
9891 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
9892
9893 /* Free but don't flush the source register. */
9894 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9895 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9896
9897 return off;
9898}
9899
9900
9901#define IEM_MC_FETCH_XREG_U32(a_u32Value, a_iXReg, a_iDWord) \
9902 off = iemNativeEmitSimdFetchXregU32(pReNative, off, a_u32Value, a_iXReg, a_iDWord)
9903
9904#define IEM_MC_FETCH_XREG_R32(a_r32Value, a_iXReg, a_iDWord) \
9905 off = iemNativeEmitSimdFetchXregU32(pReNative, off, a_r32Value, a_iXReg, a_iDWord)
9906
9907/** Emits code for IEM_MC_FETCH_XREG_U32/IEM_MC_FETCH_XREG_R32. */
9908DECL_INLINE_THROW(uint32_t)
9909iemNativeEmitSimdFetchXregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iDWord)
9910{
9911 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9912 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
9913
9914 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9915 kIemNativeGstSimdRegLdStSz_Low128,
9916 kIemNativeGstRegUse_ReadOnly);
9917
9918 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9919 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9920
9921 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
9922
9923 /* Free but don't flush the source register. */
9924 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9925 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9926
9927 return off;
9928}
9929
9930
9931#define IEM_MC_FETCH_XREG_U16(a_u64Value, a_iXReg, a_iWord) \
9932 off = iemNativeEmitSimdFetchXregU16(pReNative, off, a_u64Value, a_iXReg, a_iWord)
9933
9934/** Emits code for IEM_MC_FETCH_XREG_U16. */
9935DECL_INLINE_THROW(uint32_t)
9936iemNativeEmitSimdFetchXregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iWord)
9937{
9938 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9939 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
9940
9941 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9942 kIemNativeGstSimdRegLdStSz_Low128,
9943 kIemNativeGstRegUse_ReadOnly);
9944
9945 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9946 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9947
9948 off = iemNativeEmitSimdLoadGprFromVecRegU16(pReNative, off, idxVarReg, idxSimdRegSrc, iWord);
9949
9950 /* Free but don't flush the source register. */
9951 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9952 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9953
9954 return off;
9955}
9956
9957
9958#define IEM_MC_FETCH_XREG_U8(a_u64Value, a_iXReg, a_iByte) \
9959 off = iemNativeEmitSimdFetchXregU8(pReNative, off, a_u64Value, a_iXReg, a_iByte)
9960
9961/** Emits code for IEM_MC_FETCH_XREG_U8. */
9962DECL_INLINE_THROW(uint32_t)
9963iemNativeEmitSimdFetchXregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iByte)
9964{
9965 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
9966 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint8_t));
9967
9968 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
9969 kIemNativeGstSimdRegLdStSz_Low128,
9970 kIemNativeGstRegUse_ReadOnly);
9971
9972 iemNativeVarSetKindToStack(pReNative, idxDstVar);
9973 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
9974
9975 off = iemNativeEmitSimdLoadGprFromVecRegU8(pReNative, off, idxVarReg, idxSimdRegSrc, iByte);
9976
9977 /* Free but don't flush the source register. */
9978 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
9979 iemNativeVarRegisterRelease(pReNative, idxDstVar);
9980
9981 return off;
9982}
9983
9984
9985#define IEM_MC_STORE_XREG_U128(a_iXReg, a_u128Value) \
9986 off = iemNativeEmitSimdStoreXregU128(pReNative, off, a_iXReg, a_u128Value)
9987
9988AssertCompileSize(X86XMMREG, sizeof(RTUINT128U));
9989#define IEM_MC_STORE_XREG_XMM(a_iXReg, a_XmmValue) \
9990 off = iemNativeEmitSimdStoreXregU128(pReNative, off, a_iXReg, a_XmmValue)
9991
9992
9993/** Emits code for IEM_MC_STORE_XREG_U128/IEM_MC_STORE_XREG_XMM. */
9994DECL_INLINE_THROW(uint32_t)
9995iemNativeEmitSimdStoreXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
9996{
9997 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
9998 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
9999
10000 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10001 kIemNativeGstSimdRegLdStSz_Low128,
10002 kIemNativeGstRegUse_ForFullWrite);
10003 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitialized*/);
10004
10005 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
10006
10007 /* Free but don't flush the source register. */
10008 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10009 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10010
10011 return off;
10012}
10013
10014
10015#define IEM_MC_STORE_XREG_U64(a_iXReg, a_iQWord, a_u64Value) \
10016 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u64Value, sizeof(uint64_t), a_iQWord)
10017
10018#define IEM_MC_STORE_XREG_U32(a_iXReg, a_iDWord, a_u32Value) \
10019 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint32_t), a_iDWord)
10020
10021#define IEM_MC_STORE_XREG_U16(a_iXReg, a_iWord, a_u32Value) \
10022 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint16_t), a_iWord)
10023
10024#define IEM_MC_STORE_XREG_U8(a_iXReg, a_iByte, a_u32Value) \
10025 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint8_t), a_iByte)
10026
10027#define IEM_MC_STORE_XREG_R32(a_iXReg, a_r32Value) \
10028 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_r32Value, sizeof(RTFLOAT32U), 0 /*iElem*/)
10029
10030#define IEM_MC_STORE_XREG_R64(a_iXReg, a_r64Value) \
10031 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_r64Value, sizeof(RTFLOAT64U), 0 /*iElem*/)
10032
10033/** Emits code for IEM_MC_STORE_XREG_U64/IEM_MC_STORE_XREG_U32/IEM_MC_STORE_XREG_U16/IEM_MC_STORE_XREG_U8. */
10034DECL_INLINE_THROW(uint32_t)
10035iemNativeEmitSimdStoreXregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar,
10036 uint8_t cbLocal, uint8_t iElem)
10037{
10038 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10039 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbLocal);
10040
10041#ifdef VBOX_STRICT
10042 switch (cbLocal)
10043 {
10044 case sizeof(uint64_t): Assert(iElem < 2); break;
10045 case sizeof(uint32_t): Assert(iElem < 4); break;
10046 case sizeof(uint16_t): Assert(iElem < 8); break;
10047 case sizeof(uint8_t): Assert(iElem < 16); break;
10048 default: AssertFailed();
10049 }
10050#endif
10051
10052 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10053 kIemNativeGstSimdRegLdStSz_Low128,
10054 kIemNativeGstRegUse_ForUpdate);
10055 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxDstVar, &off);
10056
10057 switch (cbLocal)
10058 {
10059 case sizeof(uint64_t): off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
10060 case sizeof(uint32_t): off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
10061 case sizeof(uint16_t): off = iemNativeEmitSimdStoreGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
10062 case sizeof(uint8_t): off = iemNativeEmitSimdStoreGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
10063 default: AssertFailed();
10064 }
10065
10066 /* Free but don't flush the source register. */
10067 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10068 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10069
10070 return off;
10071}
10072
10073
10074#define IEM_MC_STORE_XREG_U64_ZX_U128(a_iXReg, a_u64Value) \
10075 off = iemNativeEmitSimdStoreXregU64ZxU128(pReNative, off, a_iXReg, a_u64Value)
10076
10077/** Emits code for IEM_MC_STORE_XREG_U64_ZX_U128. */
10078DECL_INLINE_THROW(uint32_t)
10079iemNativeEmitSimdStoreXregU64ZxU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
10080{
10081 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10082 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
10083
10084 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10085 kIemNativeGstSimdRegLdStSz_Low128,
10086 kIemNativeGstRegUse_ForUpdate);
10087 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxDstVar, &off);
10088
10089 /* Zero the vector register first, then store the 64-bit value to the lower 64-bit. */
10090 off = iemNativeEmitSimdZeroVecRegLowU128(pReNative, off, idxSimdRegDst);
10091 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0);
10092
10093 /* Free but don't flush the source register. */
10094 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10095 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10096
10097 return off;
10098}
10099
10100
10101#define IEM_MC_STORE_XREG_U32_ZX_U128(a_iXReg, a_u32Value) \
10102 off = iemNativeEmitSimdStoreXregU32ZxU128(pReNative, off, a_iXReg, a_u32Value)
10103
10104/** Emits code for IEM_MC_STORE_XREG_U32_ZX_U128. */
10105DECL_INLINE_THROW(uint32_t)
10106iemNativeEmitSimdStoreXregU32ZxU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
10107{
10108 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10109 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
10110
10111 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10112 kIemNativeGstSimdRegLdStSz_Low128,
10113 kIemNativeGstRegUse_ForUpdate);
10114 uint8_t const idxVarReg = iemNativeVarRegisterAcquireInited(pReNative, idxDstVar, &off);
10115
10116 /* Zero the vector register first, then store the 32-bit value to the lowest 32-bit element. */
10117 off = iemNativeEmitSimdZeroVecRegLowU128(pReNative, off, idxSimdRegDst);
10118 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, 0);
10119
10120 /* Free but don't flush the source register. */
10121 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10122 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10123
10124 return off;
10125}
10126
10127
10128#define IEM_MC_STORE_XREG_U32_U128(a_iXReg, a_iDwDst, a_u128Value, a_iDwSrc) \
10129 off = iemNativeEmitSimdStoreXregU32U128(pReNative, off, a_iXReg, a_iDwDst, a_u128Value, a_iDwSrc)
10130
10131/** Emits code for IEM_MC_STORE_XREG_U32_U128. */
10132DECL_INLINE_THROW(uint32_t)
10133iemNativeEmitSimdStoreXregU32U128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t iDwDst,
10134 uint8_t idxSrcVar, uint8_t iDwSrc)
10135{
10136 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10137 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
10138
10139 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10140 kIemNativeGstSimdRegLdStSz_Low128,
10141 kIemNativeGstRegUse_ForUpdate);
10142 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitialized*/);
10143
10144 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, idxVarReg, iDwSrc);
10145 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, IEMNATIVE_REG_FIXED_TMP0, iDwDst);
10146
10147 /* Free but don't flush the destination register. */
10148 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10149 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10150
10151 return off;
10152}
10153
10154
10155#define IEM_MC_COPY_YREG_U128_ZX_VLMAX(a_iYRegDst, a_iYRegSrc) \
10156 off = iemNativeEmitSimdCopyYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrc)
10157
10158/** Emits code for IEM_MC_COPY_YREG_U128_ZX_VLMAX. */
10159DECL_INLINE_THROW(uint32_t)
10160iemNativeEmitSimdCopyYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrc)
10161{
10162 /*
10163 * The iYRegSrc == iYRegDst case needs to be treated differently here, because
10164 * if iYRegDst gets allocated first for the full write it won't load the
10165 * actual value from CPUMCTX. When allocating iYRegSrc afterwards it will get
10166 * duplicated from the already allocated host register for iYRegDst containing
10167 * garbage. This will be catched by the guest register value checking in debug
10168 * builds.
10169 */
10170 if (iYRegDst != iYRegSrc)
10171 {
10172 /* Allocate destination and source register. */
10173 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10174 kIemNativeGstSimdRegLdStSz_256,
10175 kIemNativeGstRegUse_ForFullWrite);
10176 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
10177 kIemNativeGstSimdRegLdStSz_Low128,
10178 kIemNativeGstRegUse_ReadOnly);
10179
10180 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
10181 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10182
10183 /* Free but don't flush the source and destination register. */
10184 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10185 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10186 }
10187 else
10188 {
10189 /* This effectively only clears the upper 128-bits of the register. */
10190 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10191 kIemNativeGstSimdRegLdStSz_High128,
10192 kIemNativeGstRegUse_ForFullWrite);
10193
10194 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
10195
10196 /* Free but don't flush the destination register. */
10197 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
10198 }
10199
10200 return off;
10201}
10202
10203
10204#define IEM_MC_COPY_YREG_U256_ZX_VLMAX(a_iYRegDst, a_iYRegSrc) \
10205 off = iemNativeEmitSimdCopyYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrc)
10206
10207/** Emits code for IEM_MC_COPY_YREG_U256_ZX_VLMAX. */
10208DECL_INLINE_THROW(uint32_t)
10209iemNativeEmitSimdCopyYregU256ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrc)
10210{
10211 /*
10212 * The iYRegSrc == iYRegDst case needs to be treated differently here, because
10213 * if iYRegDst gets allocated first for the full write it won't load the
10214 * actual value from CPUMCTX. When allocating iYRegSrc afterwards it will get
10215 * duplicated from the already allocated host register for iYRegDst containing
10216 * garbage. This will be catched by the guest register value checking in debug
10217 * builds. iYRegSrc == iYRegDst would effectively only clear any upper 256-bits
10218 * for a zmm register we don't support yet, so this is just a nop.
10219 */
10220 if (iYRegDst != iYRegSrc)
10221 {
10222 /* Allocate destination and source register. */
10223 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
10224 kIemNativeGstSimdRegLdStSz_256,
10225 kIemNativeGstRegUse_ReadOnly);
10226 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10227 kIemNativeGstSimdRegLdStSz_256,
10228 kIemNativeGstRegUse_ForFullWrite);
10229
10230 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
10231
10232 /* Free but don't flush the source and destination register. */
10233 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10234 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10235 }
10236
10237 return off;
10238}
10239
10240
10241#define IEM_MC_FETCH_YREG_U128(a_u128Dst, a_iYRegSrc, a_iDQWord) \
10242 off = iemNativeEmitSimdFetchYregU128(pReNative, off, a_u128Dst, a_iYRegSrc, a_iDQWord)
10243
10244/** Emits code for IEM_MC_FETCH_YREG_U128. */
10245DECL_INLINE_THROW(uint32_t)
10246iemNativeEmitSimdFetchYregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iDQWord)
10247{
10248 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10249 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
10250
10251 Assert(iDQWord <= 1);
10252 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10253 iDQWord == 1
10254 ? kIemNativeGstSimdRegLdStSz_High128
10255 : kIemNativeGstSimdRegLdStSz_Low128,
10256 kIemNativeGstRegUse_ReadOnly);
10257
10258 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10259 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
10260
10261 if (iDQWord == 1)
10262 off = iemNativeEmitSimdLoadVecRegLowU128FromVecRegHighU128(pReNative, off, idxVarReg, idxSimdRegSrc);
10263 else
10264 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxVarReg, idxSimdRegSrc);
10265
10266 /* Free but don't flush the source register. */
10267 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10268 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
10269
10270 return off;
10271}
10272
10273
10274#define IEM_MC_FETCH_YREG_U64(a_u64Dst, a_iYRegSrc, a_iQWord) \
10275 off = iemNativeEmitSimdFetchYregU64(pReNative, off, a_u64Dst, a_iYRegSrc, a_iQWord)
10276
10277/** Emits code for IEM_MC_FETCH_YREG_U64. */
10278DECL_INLINE_THROW(uint32_t)
10279iemNativeEmitSimdFetchYregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iQWord)
10280{
10281 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10282 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
10283
10284 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10285 iQWord >= 2
10286 ? kIemNativeGstSimdRegLdStSz_High128
10287 : kIemNativeGstSimdRegLdStSz_Low128,
10288 kIemNativeGstRegUse_ReadOnly);
10289
10290 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10291 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10292
10293 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
10294
10295 /* Free but don't flush the source register. */
10296 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10297 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10298
10299 return off;
10300}
10301
10302
10303#define IEM_MC_FETCH_YREG_U32(a_u32Dst, a_iYRegSrc) \
10304 off = iemNativeEmitSimdFetchYregU32(pReNative, off, a_u32Dst, a_iYRegSrc, 0)
10305
10306/** Emits code for IEM_MC_FETCH_YREG_U32. */
10307DECL_INLINE_THROW(uint32_t)
10308iemNativeEmitSimdFetchYregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iDWord)
10309{
10310 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10311 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
10312
10313 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10314 iDWord >= 4
10315 ? kIemNativeGstSimdRegLdStSz_High128
10316 : kIemNativeGstSimdRegLdStSz_Low128,
10317 kIemNativeGstRegUse_ReadOnly);
10318
10319 iemNativeVarSetKindToStack(pReNative, idxDstVar);
10320 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
10321
10322 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
10323
10324 /* Free but don't flush the source register. */
10325 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10326 iemNativeVarRegisterRelease(pReNative, idxDstVar);
10327
10328 return off;
10329}
10330
10331
10332#define IEM_MC_CLEAR_YREG_128_UP(a_iYReg) \
10333 off = iemNativeEmitSimdClearYregHighU128(pReNative, off, a_iYReg)
10334
10335/** Emits code for IEM_MC_CLEAR_YREG_128_UP. */
10336DECL_INLINE_THROW(uint32_t)
10337iemNativeEmitSimdClearYregHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg)
10338{
10339 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10340 kIemNativeGstSimdRegLdStSz_High128,
10341 kIemNativeGstRegUse_ForFullWrite);
10342
10343 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
10344
10345 /* Free but don't flush the register. */
10346 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
10347
10348 return off;
10349}
10350
10351
10352#define IEM_MC_STORE_YREG_U128(a_iYRegDst, a_iDQword, a_u128Value) \
10353 off = iemNativeEmitSimdStoreYregU128(pReNative, off, a_iYRegDst, a_iDQword, a_u128Value)
10354
10355/** Emits code for IEM_MC_STORE_YREG_U128. */
10356DECL_INLINE_THROW(uint32_t)
10357iemNativeEmitSimdStoreYregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t iDQword, uint8_t idxSrcVar)
10358{
10359 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10360 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
10361
10362 Assert(iDQword <= 1);
10363 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10364 iDQword == 0
10365 ? kIemNativeGstSimdRegLdStSz_Low128
10366 : kIemNativeGstSimdRegLdStSz_High128,
10367 kIemNativeGstRegUse_ForFullWrite);
10368
10369 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
10370
10371 if (iDQword == 0)
10372 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
10373 else
10374 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128(pReNative, off, idxSimdRegDst, idxVarReg);
10375
10376 /* Free but don't flush the source register. */
10377 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10378 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10379
10380 return off;
10381}
10382
10383
10384#define IEM_MC_STORE_YREG_U128_ZX_VLMAX(a_iYRegDst, a_u128Src) \
10385 off = iemNativeEmitSimdStoreYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_u128Src)
10386
10387/** Emits code for IEM_MC_STORE_YREG_U128_ZX_VLMAX. */
10388DECL_INLINE_THROW(uint32_t)
10389iemNativeEmitSimdStoreYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10390{
10391 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10392 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
10393
10394 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10395 kIemNativeGstSimdRegLdStSz_256,
10396 kIemNativeGstRegUse_ForFullWrite);
10397
10398 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
10399
10400 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
10401 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10402
10403 /* Free but don't flush the source register. */
10404 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10405 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10406
10407 return off;
10408}
10409
10410
10411#define IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX(a_iXRegDst, a_u8Src) \
10412 off = iemNativeEmitSimdBroadcastXregU8ZxVlmax(pReNative, off, a_iXRegDst, a_u8Src)
10413
10414/** Emits code for IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX. */
10415DECL_INLINE_THROW(uint32_t)
10416iemNativeEmitSimdBroadcastXregU8ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
10417{
10418 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10419 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint8_t));
10420
10421 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10422 kIemNativeGstSimdRegLdStSz_256,
10423 kIemNativeGstRegUse_ForFullWrite);
10424
10425 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10426
10427 off = iemNativeEmitSimdBroadcastGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
10428 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10429
10430 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10431 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10432
10433 return off;
10434}
10435
10436
10437#define IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX(a_iXRegDst, a_u16Src) \
10438 off = iemNativeEmitSimdBroadcastXregU16ZxVlmax(pReNative, off, a_iXRegDst, a_u16Src)
10439
10440/** Emits code for IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX. */
10441DECL_INLINE_THROW(uint32_t)
10442iemNativeEmitSimdBroadcastXregU16ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
10443{
10444 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10445 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint16_t));
10446
10447 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10448 kIemNativeGstSimdRegLdStSz_256,
10449 kIemNativeGstRegUse_ForFullWrite);
10450
10451 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10452
10453 off = iemNativeEmitSimdBroadcastGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
10454 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10455
10456 /* Free but don't flush the source register. */
10457 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10458 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10459
10460 return off;
10461}
10462
10463
10464#define IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX(a_iXRegDst, a_u32Src) \
10465 off = iemNativeEmitSimdBroadcastXregU32ZxVlmax(pReNative, off, a_iXRegDst, a_u32Src)
10466
10467/** Emits code for IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX. */
10468DECL_INLINE_THROW(uint32_t)
10469iemNativeEmitSimdBroadcastXregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
10470{
10471 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10472 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
10473
10474 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10475 kIemNativeGstSimdRegLdStSz_256,
10476 kIemNativeGstRegUse_ForFullWrite);
10477
10478 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10479
10480 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
10481 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10482
10483 /* Free but don't flush the source register. */
10484 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10485 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10486
10487 return off;
10488}
10489
10490
10491#define IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX(a_iXRegDst, a_u64Src) \
10492 off = iemNativeEmitSimdBroadcastXregU64ZxVlmax(pReNative, off, a_iXRegDst, a_u64Src)
10493
10494/** Emits code for IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX. */
10495DECL_INLINE_THROW(uint32_t)
10496iemNativeEmitSimdBroadcastXregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
10497{
10498 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10499 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10500
10501 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10502 kIemNativeGstSimdRegLdStSz_256,
10503 kIemNativeGstRegUse_ForFullWrite);
10504
10505 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10506
10507 off = iemNativeEmitSimdBroadcastGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
10508 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10509
10510 /* Free but don't flush the source register. */
10511 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10512 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10513
10514 return off;
10515}
10516
10517
10518#define IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX(a_iYRegDst, a_u8Src) \
10519 off = iemNativeEmitSimdBroadcastYregU8ZxVlmax(pReNative, off, a_iYRegDst, a_u8Src)
10520
10521/** Emits code for IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX. */
10522DECL_INLINE_THROW(uint32_t)
10523iemNativeEmitSimdBroadcastYregU8ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10524{
10525 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10526 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint8_t));
10527
10528 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10529 kIemNativeGstSimdRegLdStSz_256,
10530 kIemNativeGstRegUse_ForFullWrite);
10531
10532 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10533
10534 off = iemNativeEmitSimdBroadcastGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
10535
10536 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10537 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10538
10539 return off;
10540}
10541
10542
10543#define IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX(a_iYRegDst, a_u16Src) \
10544 off = iemNativeEmitSimdBroadcastYregU16ZxVlmax(pReNative, off, a_iYRegDst, a_u16Src)
10545
10546/** Emits code for IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX. */
10547DECL_INLINE_THROW(uint32_t)
10548iemNativeEmitSimdBroadcastYregU16ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10549{
10550 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10551 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint16_t));
10552
10553 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10554 kIemNativeGstSimdRegLdStSz_256,
10555 kIemNativeGstRegUse_ForFullWrite);
10556
10557 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10558
10559 off = iemNativeEmitSimdBroadcastGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
10560
10561 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10562 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10563
10564 return off;
10565}
10566
10567
10568#define IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX(a_iYRegDst, a_u32Src) \
10569 off = iemNativeEmitSimdBroadcastYregU32ZxVlmax(pReNative, off, a_iYRegDst, a_u32Src)
10570
10571/** Emits code for IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX. */
10572DECL_INLINE_THROW(uint32_t)
10573iemNativeEmitSimdBroadcastYregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10574{
10575 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10576 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
10577
10578 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10579 kIemNativeGstSimdRegLdStSz_256,
10580 kIemNativeGstRegUse_ForFullWrite);
10581
10582 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10583
10584 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
10585
10586 /* Free but don't flush the source register. */
10587 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10588 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10589
10590 return off;
10591}
10592
10593
10594#define IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX(a_iYRegDst, a_u64Src) \
10595 off = iemNativeEmitSimdBroadcastYregU64ZxVlmax(pReNative, off, a_iYRegDst, a_u64Src)
10596
10597/** Emits code for IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX. */
10598DECL_INLINE_THROW(uint32_t)
10599iemNativeEmitSimdBroadcastYregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10600{
10601 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10602 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10603
10604 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10605 kIemNativeGstSimdRegLdStSz_256,
10606 kIemNativeGstRegUse_ForFullWrite);
10607
10608 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10609
10610 off = iemNativeEmitSimdBroadcastGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
10611
10612 /* Free but don't flush the source register. */
10613 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10614 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10615
10616 return off;
10617}
10618
10619
10620#define IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX(a_iYRegDst, a_u128Src) \
10621 off = iemNativeEmitSimdBroadcastYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_u128Src)
10622
10623/** Emits code for IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX. */
10624DECL_INLINE_THROW(uint32_t)
10625iemNativeEmitSimdBroadcastYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10626{
10627 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10628 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
10629
10630 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10631 kIemNativeGstSimdRegLdStSz_256,
10632 kIemNativeGstRegUse_ForFullWrite);
10633
10634 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
10635
10636 off = iemNativeEmitSimdBroadcastVecRegU128ToVecReg(pReNative, off, idxSimdRegDst, idxVarReg);
10637
10638 /* Free but don't flush the source register. */
10639 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10640 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10641
10642 return off;
10643}
10644
10645
10646#define IEM_MC_STORE_YREG_U32_ZX_VLMAX(a_iYRegDst, a_u32Src) \
10647 off = iemNativeEmitSimdStoreYregU32ZxVlmax(pReNative, off, a_iYRegDst, a_u32Src)
10648
10649/** Emits code for IEM_MC_STORE_YREG_U32_ZX_VLMAX. */
10650DECL_INLINE_THROW(uint32_t)
10651iemNativeEmitSimdStoreYregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10652{
10653 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10654 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
10655
10656 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10657 kIemNativeGstSimdRegLdStSz_256,
10658 kIemNativeGstRegUse_ForFullWrite);
10659
10660 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10661
10662 off = iemNativeEmitSimdZeroVecRegU256(pReNative, off, idxSimdRegDst);
10663 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iDWord*/);
10664
10665 /* Free but don't flush the source register. */
10666 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10667 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10668
10669 return off;
10670}
10671
10672
10673#define IEM_MC_STORE_YREG_U64_ZX_VLMAX(a_iYRegDst, a_u64Src) \
10674 off = iemNativeEmitSimdStoreYregU64ZxVlmax(pReNative, off, a_iYRegDst, a_u64Src)
10675
10676/** Emits code for IEM_MC_STORE_YREG_U64_ZX_VLMAX. */
10677DECL_INLINE_THROW(uint32_t)
10678iemNativeEmitSimdStoreYregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
10679{
10680 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10681 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10682
10683 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
10684 kIemNativeGstSimdRegLdStSz_256,
10685 kIemNativeGstRegUse_ForFullWrite);
10686
10687 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10688
10689 off = iemNativeEmitSimdZeroVecRegU256(pReNative, off, idxSimdRegDst);
10690 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iQWord*/);
10691
10692 /* Free but don't flush the source register. */
10693 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10694 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10695
10696 return off;
10697}
10698
10699
10700#define IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX(a_iYRegDst, a_u64Local, a_iYRegSrcHx) \
10701 off = iemNativeEmitSimdMergeYregU64LocalU64HiZxVlmax(pReNative, off, a_iYRegDst, a_u64Local, a_iYRegSrcHx)
10702
10703/** Emits code for IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX. */
10704DECL_INLINE_THROW(uint32_t)
10705iemNativeEmitSimdMergeYregU64LocalU64HiZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar, uint8_t iYRegSrcHx)
10706{
10707 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10708 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10709
10710 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10711 kIemNativeGstSimdRegLdStSz_256,
10712 kIemNativeGstRegUse_ForFullWrite);
10713 uint8_t const idxSimdRegSrcHx = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrcHx),
10714 kIemNativeGstSimdRegLdStSz_Low128,
10715 kIemNativeGstRegUse_ReadOnly);
10716 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10717
10718 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrcHx);
10719 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iQWord*/);
10720 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10721
10722 /* Free but don't flush the source and destination registers. */
10723 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrcHx);
10724 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10725 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10726
10727 return off;
10728}
10729
10730
10731#define IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX(a_iYRegDst, a_iYRegSrcHx, a_u64Local) \
10732 off = iemNativeEmitSimdMergeYregU64LoU64LocalZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrcHx, a_u64Local)
10733
10734/** Emits code for IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX. */
10735DECL_INLINE_THROW(uint32_t)
10736iemNativeEmitSimdMergeYregU64LoU64LocalZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrcHx, uint8_t idxSrcVar)
10737{
10738 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10739 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10740
10741 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10742 kIemNativeGstSimdRegLdStSz_256,
10743 kIemNativeGstRegUse_ForFullWrite);
10744 uint8_t const idxSimdRegSrcHx = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrcHx),
10745 kIemNativeGstSimdRegLdStSz_Low128,
10746 kIemNativeGstRegUse_ReadOnly);
10747 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10748
10749 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrcHx);
10750 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 1 /*iQWord*/);
10751 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
10752
10753 /* Free but don't flush the source and destination registers. */
10754 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrcHx);
10755 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10756 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10757
10758 return off;
10759}
10760
10761
10762#define IEM_MC_CLEAR_XREG_U32_MASK(a_iXReg, a_bMask) \
10763 off = iemNativeEmitSimdClearXregU32Mask(pReNative, off, a_iXReg, a_bMask)
10764
10765
10766/** Emits code for IEM_MC_CLEAR_XREG_U32_MASK. */
10767DECL_INLINE_THROW(uint32_t)
10768iemNativeEmitSimdClearXregU32Mask(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t bImm8Mask)
10769{
10770 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
10771 kIemNativeGstSimdRegLdStSz_Low128,
10772 kIemNativeGstRegUse_ForUpdate);
10773
10774 /** @todo r=aeichner For certain bit combinations we could reduce the number of emitted instructions. */
10775 if (bImm8Mask & RT_BIT(0))
10776 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 0 /*iDWord*/);
10777 if (bImm8Mask & RT_BIT(1))
10778 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 1 /*iDWord*/);
10779 if (bImm8Mask & RT_BIT(2))
10780 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 2 /*iDWord*/);
10781 if (bImm8Mask & RT_BIT(3))
10782 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 3 /*iDWord*/);
10783
10784 /* Free but don't flush the destination register. */
10785 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10786
10787 return off;
10788}
10789
10790
10791#define IEM_MC_FETCH_YREG_U256(a_u256Dst, a_iYRegSrc) \
10792 off = iemNativeEmitSimdFetchYregU256(pReNative, off, a_u256Dst, a_iYRegSrc)
10793
10794#define IEM_MC_FETCH_YREG_YMM(a_uYmmDst, a_iYRegSrc) \
10795 off = iemNativeEmitSimdFetchYregU256(pReNative, off, a_uYmmDst, a_iYRegSrc)
10796
10797/** Emits code for IEM_MC_FETCH_YREG_U256/IEM_MC_FETCH_YREG_YMM. */
10798DECL_INLINE_THROW(uint32_t)
10799iemNativeEmitSimdFetchYregU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYRegSrc)
10800{
10801 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
10802 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT256U));
10803
10804 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
10805 kIemNativeGstSimdRegLdStSz_256,
10806 kIemNativeGstRegUse_ReadOnly);
10807 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
10808
10809 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxVarReg, idxSimdRegSrc);
10810
10811 /* Free but don't flush the source register. */
10812 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
10813 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
10814
10815 return off;
10816}
10817
10818
10819#define IEM_MC_STORE_YREG_U256_ZX_VLMAX(a_iYRegDst, a_u256Src) \
10820 off = iemNativeEmitSimdStoreYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_u256Src)
10821
10822#define IEM_MC_STORE_YREG_YMM_ZX_VLMAX(a_iYRegDst, a_uYmmSrc) \
10823 off = iemNativeEmitSimdStoreYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_uYmmSrc)
10824
10825/** Emits code for IEM_MC_STORE_YREG_U256_ZX_VLMAX/IEM_MC_STORE_YREG_YMM_ZX_VLMAX. */
10826DECL_INLINE_THROW(uint32_t)
10827iemNativeEmitSimdStoreYregU256ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar)
10828{
10829 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10830 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
10831
10832 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10833 kIemNativeGstSimdRegLdStSz_256,
10834 kIemNativeGstRegUse_ForFullWrite);
10835 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
10836
10837 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxSimdRegDst, idxVarRegSrc);
10838
10839 /* Free but don't flush the source register. */
10840 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10841 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10842
10843 return off;
10844}
10845
10846
10847#define IEM_MC_STORE_YREG_U32_U256(a_iYRegDst, a_iDwDst, a_u256Value, a_iDwSrc) \
10848 off = iemNativeEmitSimdStoreYregU32FromU256(pReNative, off, a_iYRegDst, a_iDwDst, a_u256Value, a_iDwSrc)
10849
10850
10851/** Emits code for IEM_MC_STORE_YREG_U32_U256. */
10852DECL_INLINE_THROW(uint32_t)
10853iemNativeEmitSimdStoreYregU32FromU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iDwDst,
10854 uint8_t idxSrcVar, uint8_t iDwSrc)
10855{
10856 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10857 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
10858
10859 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10860 iDwDst < 4
10861 ? kIemNativeGstSimdRegLdStSz_Low128
10862 : kIemNativeGstSimdRegLdStSz_High128,
10863 kIemNativeGstRegUse_ForUpdate);
10864 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
10865 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
10866
10867 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxRegTmp, idxVarRegSrc, iDwSrc);
10868 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxRegTmp, iDwDst);
10869
10870 /* Free but don't flush the source register. */
10871 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10872 iemNativeRegFreeTmp(pReNative, idxRegTmp);
10873 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10874
10875 return off;
10876}
10877
10878
10879#define IEM_MC_STORE_YREG_U64_U256(a_iYRegDst, a_iQwDst, a_u256Value, a_iQwSrc) \
10880 off = iemNativeEmitSimdStoreYregU64FromU256(pReNative, off, a_iYRegDst, a_iQwDst, a_u256Value, a_iQwSrc)
10881
10882
10883/** Emits code for IEM_MC_STORE_YREG_U64_U256. */
10884DECL_INLINE_THROW(uint32_t)
10885iemNativeEmitSimdStoreYregU64FromU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iQwDst,
10886 uint8_t idxSrcVar, uint8_t iQwSrc)
10887{
10888 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10889 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
10890
10891 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10892 iQwDst < 2
10893 ? kIemNativeGstSimdRegLdStSz_Low128
10894 : kIemNativeGstSimdRegLdStSz_High128,
10895 kIemNativeGstRegUse_ForUpdate);
10896 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
10897 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
10898
10899 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxRegTmp, idxVarRegSrc, iQwSrc);
10900 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxRegTmp, iQwDst);
10901
10902 /* Free but don't flush the source register. */
10903 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10904 iemNativeRegFreeTmp(pReNative, idxRegTmp);
10905 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
10906
10907 return off;
10908}
10909
10910
10911#define IEM_MC_STORE_YREG_U64(a_iYRegDst, a_iQword, a_u64Value) \
10912 off = iemNativeEmitSimdStoreYregU64(pReNative, off, a_iYRegDst, a_iQword, a_u64Value)
10913
10914
10915/** Emits code for IEM_MC_STORE_YREG_U64. */
10916DECL_INLINE_THROW(uint32_t)
10917iemNativeEmitSimdStoreYregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iQwDst, uint8_t idxSrcVar)
10918{
10919 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
10920 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
10921
10922 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
10923 iQwDst < 2
10924 ? kIemNativeGstSimdRegLdStSz_Low128
10925 : kIemNativeGstSimdRegLdStSz_High128,
10926 kIemNativeGstRegUse_ForUpdate);
10927
10928 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
10929
10930 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, iQwDst);
10931
10932 /* Free but don't flush the source register. */
10933 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
10934 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
10935
10936 return off;
10937}
10938
10939
10940#define IEM_MC_CLEAR_ZREG_256_UP(a_iYReg) \
10941 off = iemNativeEmitSimdClearZregU256Vlmax(pReNative, off, a_iYReg)
10942
10943/** Emits code for IEM_MC_CLEAR_ZREG_256_UP. */
10944DECL_INLINE_THROW(uint32_t)
10945iemNativeEmitSimdClearZregU256Vlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg)
10946{
10947 RT_NOREF(pReNative, iYReg);
10948 /** @todo Needs to be implemented when support for AVX-512 is added. */
10949 return off;
10950}
10951
10952
10953
10954/*********************************************************************************************************************************
10955* Emitters for IEM_MC_CALL_SSE_AIMPL_XXX *
10956*********************************************************************************************************************************/
10957
10958/**
10959 * Common worker for IEM_MC_CALL_SSE_AIMPL_XXX/IEM_MC_CALL_AVX_AIMPL_XXX.
10960 */
10961DECL_INLINE_THROW(uint32_t)
10962iemNativeEmitCallSseAvxAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t cArgs, uint8_t idxInstr)
10963{
10964 /* Grab the MXCSR register, it must not be call volatile or we end up freeing it when setting up the call below. */
10965 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr,
10966 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
10967 AssertRelease(!(RT_BIT_32(idxRegMxCsr) & IEMNATIVE_CALL_VOLATILE_GREG_MASK));
10968
10969#if 0 /* This is not required right now as the called helper will set up the SSE/AVX state if it is an assembly one. */
10970 /*
10971 * Need to do the FPU preparation.
10972 */
10973 off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/);
10974#endif
10975
10976 /*
10977 * Do all the call setup and cleanup.
10978 */
10979 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_SSE_AIMPL_HIDDEN_ARGS, IEM_SSE_AIMPL_HIDDEN_ARGS,
10980 false /*fFlushPendingWrites*/);
10981
10982 /*
10983 * Load the MXCSR register into the first argument and mask out the current exception flags.
10984 */
10985 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, idxRegMxCsr);
10986 off = iemNativeEmitAndGpr32ByImm(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, ~X86_MXCSR_XCPT_FLAGS);
10987
10988 /*
10989 * Make the call.
10990 */
10991 off = iemNativeEmitCallImm<true /*a_fSkipEflChecks*/>(pReNative, off, pfnAImpl);
10992
10993 /*
10994 * The updated MXCSR is in the return register, update exception status flags.
10995 *
10996 * The return register is marked allocated as a temporary because it is required for the
10997 * exception generation check below.
10998 */
10999 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG)));
11000 uint8_t const idxRegTmp = iemNativeRegMarkAllocated(pReNative, IEMNATIVE_CALL_RET_GREG, kIemNativeWhat_Tmp);
11001 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, idxRegMxCsr, idxRegTmp);
11002
11003#ifndef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
11004 /* Writeback the MXCSR register value (there is no delayed writeback for such registers at the moment). */
11005 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxRegMxCsr, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.XState.x87.MXCSR));
11006#endif
11007
11008 /*
11009 * Make sure we don't have any outstanding guest register writes as we may
11010 * raise an \#UD or \#XF and all guest register must be up to date in CPUMCTX.
11011 */
11012 off = iemNativeRegFlushPendingWrites(pReNative, off);
11013
11014#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
11015 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
11016#else
11017 RT_NOREF(idxInstr);
11018#endif
11019
11020 /** @todo r=aeichner ANDN from BMI1 would save us a temporary and additional instruction here but I don't
11021 * want to assume the existence for this instruction at the moment. */
11022 uint8_t const idxRegTmp2 = iemNativeRegAllocTmp(pReNative, &off);
11023
11024 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegTmp2, idxRegTmp);
11025 /* tmp &= X86_MXCSR_XCPT_MASK */
11026 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK);
11027 /* tmp >>= X86_MXCSR_XCPT_MASK_SHIFT */
11028 off = iemNativeEmitShiftGprRight(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK_SHIFT);
11029 /* tmp = ~tmp */
11030 off = iemNativeEmitInvBitsGpr(pReNative, off, idxRegTmp, idxRegTmp, false /*f64Bit*/);
11031 /* tmp &= mxcsr */
11032 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxRegTmp, idxRegTmp2);
11033 off = iemNativeEmitTbExitIfAnyBitsSetInGpr<kIemNativeLabelType_RaiseSseAvxFpRelated>(pReNative, off, idxRegTmp,
11034 X86_MXCSR_XCPT_FLAGS);
11035
11036 iemNativeRegFreeTmp(pReNative, idxRegTmp2);
11037 iemNativeRegFreeTmp(pReNative, idxRegTmp);
11038 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
11039
11040 return off;
11041}
11042
11043
11044#define IEM_MC_CALL_SSE_AIMPL_2(a_pfnAImpl, a0, a1) \
11045 off = iemNativeEmitCallSseAImpl2(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1))
11046
11047/** Emits code for IEM_MC_CALL_SSE_AIMPL_2. */
11048DECL_INLINE_THROW(uint32_t)
11049iemNativeEmitCallSseAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
11050{
11051 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_SSE_AIMPL_HIDDEN_ARGS);
11052 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_SSE_AIMPL_HIDDEN_ARGS);
11053 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 2, idxInstr);
11054}
11055
11056
11057#define IEM_MC_CALL_SSE_AIMPL_3(a_pfnAImpl, a0, a1, a2) \
11058 off = iemNativeEmitCallSseAImpl3(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1), (a2))
11059
11060/** Emits code for IEM_MC_CALL_SSE_AIMPL_3. */
11061DECL_INLINE_THROW(uint32_t)
11062iemNativeEmitCallSseAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl,
11063 uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
11064{
11065 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_SSE_AIMPL_HIDDEN_ARGS);
11066 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_SSE_AIMPL_HIDDEN_ARGS);
11067 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_SSE_AIMPL_HIDDEN_ARGS);
11068 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 3, idxInstr);
11069}
11070
11071
11072/*********************************************************************************************************************************
11073* Emitters for IEM_MC_CALL_AVX_AIMPL_XXX *
11074*********************************************************************************************************************************/
11075
11076#define IEM_MC_CALL_AVX_AIMPL_2(a_pfnAImpl, a0, a1) \
11077 off = iemNativeEmitCallAvxAImpl2(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1))
11078
11079/** Emits code for IEM_MC_CALL_AVX_AIMPL_2. */
11080DECL_INLINE_THROW(uint32_t)
11081iemNativeEmitCallAvxAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
11082{
11083 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_AVX_AIMPL_HIDDEN_ARGS);
11084 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_AVX_AIMPL_HIDDEN_ARGS);
11085 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 2, idxInstr);
11086}
11087
11088
11089#define IEM_MC_CALL_AVX_AIMPL_3(a_pfnAImpl, a0, a1, a2) \
11090 off = iemNativeEmitCallAvxAImpl3(pReNative, off, pCallEntry->idxInstr, (uintptr_t)(a_pfnAImpl), (a0), (a1), (a2))
11091
11092/** Emits code for IEM_MC_CALL_AVX_AIMPL_3. */
11093DECL_INLINE_THROW(uint32_t)
11094iemNativeEmitCallAvxAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uintptr_t pfnAImpl,
11095 uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
11096{
11097 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_AVX_AIMPL_HIDDEN_ARGS);
11098 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_AVX_AIMPL_HIDDEN_ARGS);
11099 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_AVX_AIMPL_HIDDEN_ARGS);
11100 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 3, idxInstr);
11101}
11102
11103
11104#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
11105
11106
11107/*********************************************************************************************************************************
11108* Include instruction emitters. *
11109*********************************************************************************************************************************/
11110#include "target-x86/IEMAllN8veEmit-x86.h"
11111
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette