VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompFuncs.h@ 104195

Last change on this file since 104195 was 104195, checked in by vboxsync, 12 months ago

VMM/IEM: Refactoring assembly helpers to not pass eflags by reference but instead by value and return the updated value (via eax/w0) - first chunk: ADD,ADC,SUB,SBB,CMP,TEST,AND,OR,XOR. bugref:10376

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 427.3 KB
Line 
1/* $Id: IEMAllN8veRecompFuncs.h 104195 2024-04-05 14:45:23Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler - Inlined Bits.
4 */
5
6/*
7 * Copyright (C) 2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
33#define IEM_WITH_OPAQUE_DECODER_STATE
34#define VMCPU_INCL_CPUM_GST_CTX
35#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
36#define IEMNATIVE_INCL_TABLE_FUNCTION_PROTOTYPES
37#include <VBox/vmm/iem.h>
38#include <VBox/vmm/cpum.h>
39#include <VBox/vmm/dbgf.h>
40#include "IEMInternal.h"
41#include <VBox/vmm/vmcc.h>
42#include <VBox/log.h>
43#include <VBox/err.h>
44#include <VBox/dis.h>
45#include <VBox/param.h>
46#include <iprt/assert.h>
47#include <iprt/heap.h>
48#include <iprt/mem.h>
49#include <iprt/string.h>
50#if defined(RT_ARCH_AMD64)
51# include <iprt/x86.h>
52#elif defined(RT_ARCH_ARM64)
53# include <iprt/armv8.h>
54#endif
55
56#include "IEMInline.h"
57#include "IEMThreadedFunctions.h"
58#include "IEMN8veRecompiler.h"
59#include "IEMN8veRecompilerEmit.h"
60#include "IEMN8veRecompilerTlbLookup.h"
61#include "IEMNativeFunctions.h"
62
63
64/*
65 * Narrow down configs here to avoid wasting time on unused configs here.
66 * Note! Same checks in IEMAllThrdRecompiler.cpp.
67 */
68
69#ifndef IEM_WITH_CODE_TLB
70# error The code TLB must be enabled for the recompiler.
71#endif
72
73#ifndef IEM_WITH_DATA_TLB
74# error The data TLB must be enabled for the recompiler.
75#endif
76
77#ifndef IEM_WITH_SETJMP
78# error The setjmp approach must be enabled for the recompiler.
79#endif
80
81
82
83/*********************************************************************************************************************************
84* Code emitters for flushing pending guest register writes and sanity checks *
85*********************************************************************************************************************************/
86
87#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
88# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
89DECL_INLINE_THROW(uint32_t) iemNativePcAdjustCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
90{
91 /* Compare the shadow with the context value, they should match. */
92 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, IEMNATIVE_REG_FIXED_PC_DBG);
93 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, pReNative->Core.offPc);
94 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_TMP1, kIemNativeGstReg_Pc);
95 return off;
96}
97# endif
98#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
99
100/**
101 * Flushes delayed write of a specific guest register.
102 *
103 * This must be called prior to calling CImpl functions and any helpers that use
104 * the guest state (like raising exceptions) and such.
105 *
106 * This optimization has not yet been implemented. The first target would be
107 * RIP updates, since these are the most common ones.
108 */
109DECL_INLINE_THROW(uint32_t)
110iemNativeRegFlushPendingSpecificWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREGREF enmClass, uint8_t idxReg)
111{
112#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
113 /* If for whatever reason it is possible to reference the PC register at some point we need to do the writeback here first. */
114#endif
115
116#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
117#if 0 /** @todo r=aeichner EFLAGS writeback delay. */
118 if ( enmClass == kIemNativeGstRegRef_EFlags
119 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(kIemNativeGstReg_EFlags))
120 off = iemNativeRegFlushPendingWrite(pReNative, off, kIemNativeGstReg_EFlags);
121#else
122 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(kIemNativeGstReg_EFlags)));
123#endif
124
125 if ( enmClass == kIemNativeGstRegRef_Gpr
126 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxReg))
127 off = iemNativeRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTREG_GPR(idxReg));
128#endif
129
130#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
131 if ( enmClass == kIemNativeGstRegRef_XReg
132 && pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(idxReg))
133 {
134 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxReg));
135 /* Flush the shadows as the register needs to be reloaded (there is no guarantee right now, that the referenced register doesn't change). */
136 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[idxReg];
137
138 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxHstSimdReg, off);
139 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(idxReg)));
140 }
141#endif
142 RT_NOREF(pReNative, enmClass, idxReg);
143 return off;
144}
145
146
147
148/*********************************************************************************************************************************
149* Emitters for IEM_MC_BEGIN_EX and IEM_MC_END. *
150*********************************************************************************************************************************/
151
152#undef IEM_MC_BEGIN /* unused */
153#define IEM_MC_BEGIN_EX(a_fMcFlags, a_fCImplFlags, a_cArgsIncludingHidden) \
154 { \
155 Assert(pReNative->Core.bmVars == 0); \
156 Assert(pReNative->Core.u64ArgVars == UINT64_MAX); \
157 Assert(pReNative->Core.bmStack == 0); \
158 pReNative->fMc = (a_fMcFlags); \
159 pReNative->fCImpl = (a_fCImplFlags); \
160 pReNative->cArgsX = (a_cArgsIncludingHidden)
161
162/** We have to get to the end in recompilation mode, as otherwise we won't
163 * generate code for all the IEM_MC_IF_XXX branches. */
164#define IEM_MC_END() \
165 iemNativeVarFreeAll(pReNative); \
166 } return off
167
168
169
170/*********************************************************************************************************************************
171* Native Emitter Support. *
172*********************************************************************************************************************************/
173
174#define IEM_MC_NATIVE_IF(a_fSupportedHosts) if (RT_ARCH_VAL & (a_fSupportedHosts)) {
175
176#define IEM_MC_NATIVE_ELSE() } else {
177
178#define IEM_MC_NATIVE_ENDIF() } ((void)0)
179
180
181#define IEM_MC_NATIVE_EMIT_0(a_fnEmitter) \
182 off = a_fnEmitter(pReNative, off)
183
184#define IEM_MC_NATIVE_EMIT_1(a_fnEmitter, a0) \
185 off = a_fnEmitter(pReNative, off, (a0))
186
187#define IEM_MC_NATIVE_EMIT_2(a_fnEmitter, a0, a1) \
188 off = a_fnEmitter(pReNative, off, (a0), (a1))
189
190#define IEM_MC_NATIVE_EMIT_3(a_fnEmitter, a0, a1, a2) \
191 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2))
192
193#define IEM_MC_NATIVE_EMIT_4(a_fnEmitter, a0, a1, a2, a3) \
194 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3))
195
196#define IEM_MC_NATIVE_EMIT_5(a_fnEmitter, a0, a1, a2, a3, a4) \
197 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4))
198
199#define IEM_MC_NATIVE_EMIT_6(a_fnEmitter, a0, a1, a2, a3, a4, a5) \
200 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5))
201
202#define IEM_MC_NATIVE_EMIT_7(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6) \
203 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6))
204
205#define IEM_MC_NATIVE_EMIT_8(a_fnEmitter, a0, a1, a2, a3, a4, a5, a6, a7) \
206 off = a_fnEmitter(pReNative, off, (a0), (a1), (a2), (a3), (a4), (a5), (a6), (a7))
207
208
209#ifndef RT_ARCH_AMD64
210# define IEM_MC_NATIVE_SET_AMD64_HOST_REG_FOR_LOCAL(a_VarNm, a_idxHostReg) ((void)0)
211#else
212/** @note This is a naive approach that ASSUMES that the register isn't
213 * allocated, so it only works safely for the first allocation(s) in
214 * a MC block. */
215# define IEM_MC_NATIVE_SET_AMD64_HOST_REG_FOR_LOCAL(a_VarNm, a_idxHostReg) \
216 off = iemNativeVarSetAmd64HostRegisterForLocal(pReNative, off, a_VarNm, a_idxHostReg)
217
218DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off);
219
220DECL_INLINE_THROW(uint32_t)
221iemNativeVarSetAmd64HostRegisterForLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t idxHstReg)
222{
223 Log12(("iemNativeVarSetAmd64HostRegisterForLocal: idxVar=%#x idxHstReg=%s (%#x) off=%#x\n", idxVar, g_apszIemNativeHstRegNames[idxHstReg], idxHstReg, off));
224 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
225 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg))); /* iemNativeVarRegisterSet does a throw/longjmp on this */
226
227# ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
228 /* Must flush the register if it hold pending writes. */
229 if ( (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
230 && (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows) )
231 off = iemNativeRegFlushDirtyGuest(pReNative, off, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
232# endif
233
234 iemNativeVarRegisterSet(pReNative, idxVar, idxHstReg, off);
235 return off;
236}
237
238#endif /* RT_ARCH_AMD64 */
239
240
241
242/*********************************************************************************************************************************
243* Emitters for standalone C-implementation deferals (IEM_MC_DEFER_TO_CIMPL_XXXX) *
244*********************************************************************************************************************************/
245
246#define IEM_MC_DEFER_TO_CIMPL_0_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl) \
247 pReNative->fMc = 0; \
248 pReNative->fCImpl = (a_fFlags); \
249 return iemNativeEmitCImplCall0(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr) /** @todo not used ... */
250
251
252#define IEM_MC_DEFER_TO_CIMPL_1_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
253 pReNative->fMc = 0; \
254 pReNative->fCImpl = (a_fFlags); \
255 return iemNativeEmitCImplCall1(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a_cbInstr, a0)
256
257DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall1(PIEMRECOMPILERSTATE pReNative, uint32_t off,
258 uint8_t idxInstr, uint64_t a_fGstShwFlush,
259 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0)
260{
261 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 1, uArg0, 0, 0);
262}
263
264
265#define IEM_MC_DEFER_TO_CIMPL_2_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
266 pReNative->fMc = 0; \
267 pReNative->fCImpl = (a_fFlags); \
268 return iemNativeEmitCImplCall2(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
269 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1)
270
271DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall2(PIEMRECOMPILERSTATE pReNative, uint32_t off,
272 uint8_t idxInstr, uint64_t a_fGstShwFlush,
273 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1)
274{
275 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 2, uArg0, uArg1, 0);
276}
277
278
279#define IEM_MC_DEFER_TO_CIMPL_3_RET_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
280 pReNative->fMc = 0; \
281 pReNative->fCImpl = (a_fFlags); \
282 return iemNativeEmitCImplCall3(pReNative, off, pCallEntry->idxInstr, a_fGstShwFlush, \
283 (uintptr_t)a_pfnCImpl, a_cbInstr, a0, a1, a2)
284
285DECL_INLINE_THROW(uint32_t) iemNativeEmitCImplCall3(PIEMRECOMPILERSTATE pReNative, uint32_t off,
286 uint8_t idxInstr, uint64_t a_fGstShwFlush,
287 uintptr_t pfnCImpl, uint8_t cbInstr, uint64_t uArg0, uint64_t uArg1,
288 uint64_t uArg2)
289{
290 return iemNativeEmitCImplCall(pReNative, off, idxInstr, a_fGstShwFlush, pfnCImpl, cbInstr, 3, uArg0, uArg1, uArg2);
291}
292
293
294
295/*********************************************************************************************************************************
296* Emitters for advancing PC/RIP/EIP/IP (IEM_MC_ADVANCE_RIP_AND_FINISH_XXX) *
297*********************************************************************************************************************************/
298
299/** Emits the flags check for IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS
300 * and the other _WITH_FLAGS MCs, see iemRegFinishClearingRF. */
301DECL_INLINE_THROW(uint32_t)
302iemNativeEmitFinishInstructionFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
303{
304 /*
305 * If its not just X86_EFL_RF and CPUMCTX_INHIBIT_SHADOW that are set, we
306 * return with special status code and make the execution loop deal with
307 * this. If TF or CPUMCTX_DBG_HIT_DRX_MASK triggers, we have to raise an
308 * exception and won't continue execution. While CPUMCTX_DBG_DBGF_MASK
309 * could continue w/o interruption, it probably will drop into the
310 * debugger, so not worth the effort of trying to services it here and we
311 * just lump it in with the handling of the others.
312 *
313 * To simplify the code and the register state management even more (wrt
314 * immediate in AND operation), we always update the flags and skip the
315 * extra check associated conditional jump.
316 */
317 AssertCompile( (X86_EFL_TF | X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK)
318 <= UINT32_MAX);
319#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
320 AssertMsg( pReNative->idxCurCall == 0
321 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], IEMLIVENESSBIT_IDX_EFL_OTHER)),
322 ("Efl_Other - %u\n", iemNativeLivenessGetStateByGstRegEx(&pReNative->paLivenessEntries[pReNative->idxCurCall - 1], IEMLIVENESSBIT_IDX_EFL_OTHER)));
323#endif
324
325 /*
326 * As this code can break out of the execution loop when jumping to the ReturnWithFlags label
327 * any pending register writes must be flushed.
328 */
329 off = iemNativeRegFlushPendingWrites(pReNative, off);
330
331 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
332 kIemNativeGstRegUse_ForUpdate, false /*fNoVolatileRegs*/,
333 true /*fSkipLivenessAssert*/);
334 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg,
335 X86_EFL_TF | CPUMCTX_DBG_HIT_DRX_MASK | CPUMCTX_DBG_DBGF_MASK,
336 iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnWithFlags));
337 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~(uint32_t)(X86_EFL_RF | CPUMCTX_INHIBIT_SHADOW));
338 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
339
340 /* Free but don't flush the EFLAGS register. */
341 iemNativeRegFreeTmp(pReNative, idxEflReg);
342
343 return off;
344}
345
346
347/** The VINF_SUCCESS dummy. */
348template<int const a_rcNormal>
349DECL_FORCE_INLINE(uint32_t)
350iemNativeEmitFinishInstructionWithStatus(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
351{
352 AssertCompile(a_rcNormal == VINF_SUCCESS || a_rcNormal == VINF_IEM_REEXEC_BREAK);
353 if (a_rcNormal != VINF_SUCCESS)
354 {
355#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
356 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
357#else
358 RT_NOREF_PV(idxInstr);
359#endif
360
361 /* As this code returns from the TB any pending register writes must be flushed. */
362 off = iemNativeRegFlushPendingWrites(pReNative, off);
363
364 return iemNativeEmitJmpToNewLabel(pReNative, off, kIemNativeLabelType_ReturnBreak);
365 }
366 return off;
367}
368
369
370#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64(a_cbInstr, a_rcNormal) \
371 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
372 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
373
374#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_cbInstr, a_rcNormal) \
375 off = iemNativeEmitAddToRip64AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
376 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
377 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
378
379/** Same as iemRegAddToRip64AndFinishingNoFlags. */
380DECL_INLINE_THROW(uint32_t)
381iemNativeEmitAddToRip64AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
382{
383#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
384# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
385 if (!pReNative->Core.offPc)
386 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
387# endif
388
389 /* Allocate a temporary PC register. */
390 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
391
392 /* Perform the addition and store the result. */
393 off = iemNativeEmitAddGprImm8(pReNative, off, idxPcReg, cbInstr);
394 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
395
396 /* Free but don't flush the PC register. */
397 iemNativeRegFreeTmp(pReNative, idxPcReg);
398#endif
399
400#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
401 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
402
403 pReNative->Core.offPc += cbInstr;
404# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
405 off = iemNativePcAdjustCheck(pReNative, off);
406# endif
407 if (pReNative->cCondDepth)
408 off = iemNativeEmitPcWriteback(pReNative, off);
409 else
410 pReNative->Core.cInstrPcUpdateSkipped++;
411#endif
412
413 return off;
414}
415
416
417#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32(a_cbInstr, a_rcNormal) \
418 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
419 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
420
421#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_cbInstr, a_rcNormal) \
422 off = iemNativeEmitAddToEip32AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
423 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
424 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
425
426/** Same as iemRegAddToEip32AndFinishingNoFlags. */
427DECL_INLINE_THROW(uint32_t)
428iemNativeEmitAddToEip32AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
429{
430#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
431# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
432 if (!pReNative->Core.offPc)
433 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
434# endif
435
436 /* Allocate a temporary PC register. */
437 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
438
439 /* Perform the addition and store the result. */
440 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
441 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
442
443 /* Free but don't flush the PC register. */
444 iemNativeRegFreeTmp(pReNative, idxPcReg);
445#endif
446
447#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
448 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
449
450 pReNative->Core.offPc += cbInstr;
451# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
452 off = iemNativePcAdjustCheck(pReNative, off);
453# endif
454 if (pReNative->cCondDepth)
455 off = iemNativeEmitPcWriteback(pReNative, off);
456 else
457 pReNative->Core.cInstrPcUpdateSkipped++;
458#endif
459
460 return off;
461}
462
463
464#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16(a_cbInstr, a_rcNormal) \
465 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
466 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
467
468#define IEM_MC_ADVANCE_RIP_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_cbInstr, a_rcNormal) \
469 off = iemNativeEmitAddToIp16AndFinishingNoFlags(pReNative, off, (a_cbInstr)); \
470 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
471 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
472
473/** Same as iemRegAddToIp16AndFinishingNoFlags. */
474DECL_INLINE_THROW(uint32_t)
475iemNativeEmitAddToIp16AndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
476{
477#if !defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING) || defined(IEMNATIVE_REG_FIXED_PC_DBG)
478# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
479 if (!pReNative->Core.offPc)
480 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
481# endif
482
483 /* Allocate a temporary PC register. */
484 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
485
486 /* Perform the addition and store the result. */
487 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxPcReg, cbInstr);
488 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
489 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
490
491 /* Free but don't flush the PC register. */
492 iemNativeRegFreeTmp(pReNative, idxPcReg);
493#endif
494
495#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
496 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
497
498 pReNative->Core.offPc += cbInstr;
499# if defined(IEMNATIVE_REG_FIXED_PC_DBG)
500 off = iemNativePcAdjustCheck(pReNative, off);
501# endif
502 if (pReNative->cCondDepth)
503 off = iemNativeEmitPcWriteback(pReNative, off);
504 else
505 pReNative->Core.cInstrPcUpdateSkipped++;
506#endif
507
508 return off;
509}
510
511
512
513/*********************************************************************************************************************************
514* Emitters for changing PC/RIP/EIP/IP with a relative jump (IEM_MC_REL_JMP_XXX_AND_FINISH_XXX). *
515*********************************************************************************************************************************/
516
517#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
518 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
519 (a_enmEffOpSize), pCallEntry->idxInstr); \
520 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
521
522#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
523 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
524 (a_enmEffOpSize), pCallEntry->idxInstr); \
525 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
526 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
527
528#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64(a_i16, a_cbInstr, a_rcNormal) \
529 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
530 IEMMODE_16BIT, pCallEntry->idxInstr); \
531 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
532
533#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
534 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
535 IEMMODE_16BIT, pCallEntry->idxInstr); \
536 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
537 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
538
539#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64(a_i32, a_cbInstr, a_rcNormal) \
540 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
541 IEMMODE_64BIT, pCallEntry->idxInstr); \
542 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
543
544#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
545 off = iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
546 IEMMODE_64BIT, pCallEntry->idxInstr); \
547 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
548 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
549
550/** Same as iemRegRip64RelativeJumpS8AndFinishNoFlags,
551 * iemRegRip64RelativeJumpS16AndFinishNoFlags and
552 * iemRegRip64RelativeJumpS32AndFinishNoFlags. */
553DECL_INLINE_THROW(uint32_t)
554iemNativeEmitRip64RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
555 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
556{
557 Assert(enmEffOpSize == IEMMODE_64BIT || enmEffOpSize == IEMMODE_16BIT);
558
559 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
560 off = iemNativeRegFlushPendingWrites(pReNative, off);
561
562#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
563 Assert(pReNative->Core.offPc == 0);
564
565 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
566#endif
567
568 /* Allocate a temporary PC register. */
569 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
570
571 /* Perform the addition. */
572 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, (int64_t)offDisp + cbInstr);
573
574 if (RT_LIKELY(enmEffOpSize == IEMMODE_64BIT))
575 {
576 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
577 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
578 }
579 else
580 {
581 /* Just truncate the result to 16-bit IP. */
582 Assert(enmEffOpSize == IEMMODE_16BIT);
583 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
584 }
585 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
586
587 /* Free but don't flush the PC register. */
588 iemNativeRegFreeTmp(pReNative, idxPcReg);
589
590 return off;
591}
592
593
594#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
595 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
596 (a_enmEffOpSize), pCallEntry->idxInstr); \
597 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
598
599#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i8, a_cbInstr, a_enmEffOpSize, a_rcNormal) \
600 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), \
601 (a_enmEffOpSize), pCallEntry->idxInstr); \
602 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
603 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
604
605#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32(a_i16, a_cbInstr, a_rcNormal) \
606 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
607 IEMMODE_16BIT, pCallEntry->idxInstr); \
608 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
609
610#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
611 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), \
612 IEMMODE_16BIT, pCallEntry->idxInstr); \
613 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
614 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
615
616#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32(a_i32, a_cbInstr, a_rcNormal) \
617 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
618 IEMMODE_32BIT, pCallEntry->idxInstr); \
619 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
620
621#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
622 off = iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), \
623 IEMMODE_32BIT, pCallEntry->idxInstr); \
624 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
625 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
626
627/** Same as iemRegEip32RelativeJumpS8AndFinishNoFlags,
628 * iemRegEip32RelativeJumpS16AndFinishNoFlags and
629 * iemRegEip32RelativeJumpS32AndFinishNoFlags. */
630DECL_INLINE_THROW(uint32_t)
631iemNativeEmitEip32RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr,
632 int32_t offDisp, IEMMODE enmEffOpSize, uint8_t idxInstr)
633{
634 Assert(enmEffOpSize == IEMMODE_32BIT || enmEffOpSize == IEMMODE_16BIT);
635
636 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
637 off = iemNativeRegFlushPendingWrites(pReNative, off);
638
639#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
640 Assert(pReNative->Core.offPc == 0);
641
642 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
643#endif
644
645 /* Allocate a temporary PC register. */
646 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
647
648 /* Perform the addition. */
649 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
650
651 /* Truncate the result to 16-bit IP if the operand size is 16-bit. */
652 if (enmEffOpSize == IEMMODE_16BIT)
653 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
654
655 /* Perform limit checking, potentially raising #GP(0) and exit the TB. */
656/** @todo we can skip this in 32-bit FLAT mode. */
657 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
658
659 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
660
661 /* Free but don't flush the PC register. */
662 iemNativeRegFreeTmp(pReNative, idxPcReg);
663
664 return off;
665}
666
667
668#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16(a_i8, a_cbInstr, a_rcNormal) \
669 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
670 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
671
672#define IEM_MC_REL_JMP_S8_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i8, a_cbInstr, a_rcNormal) \
673 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int8_t)(a_i8), pCallEntry->idxInstr); \
674 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
675 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
676
677#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16(a_i16, a_cbInstr, a_rcNormal) \
678 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
679 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
680
681#define IEM_MC_REL_JMP_S16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i16, a_cbInstr, a_rcNormal) \
682 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (int16_t)(a_i16), pCallEntry->idxInstr); \
683 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
684 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
685
686#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16(a_i32, a_cbInstr, a_rcNormal) \
687 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
688 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
689
690#define IEM_MC_REL_JMP_S32_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_i32, a_cbInstr, a_rcNormal) \
691 off = iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(pReNative, off, (a_cbInstr), (a_i32), pCallEntry->idxInstr); \
692 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off); \
693 off = iemNativeEmitFinishInstructionWithStatus<a_rcNormal>(pReNative, off, pCallEntry->idxInstr)
694
695/** Same as iemRegIp16RelativeJumpS8AndFinishNoFlags. */
696DECL_INLINE_THROW(uint32_t)
697iemNativeEmitIp16RelativeJumpAndFinishingNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off,
698 uint8_t cbInstr, int32_t offDisp, uint8_t idxInstr)
699{
700 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
701 off = iemNativeRegFlushPendingWrites(pReNative, off);
702
703#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
704 Assert(pReNative->Core.offPc == 0);
705
706 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
707#endif
708
709 /* Allocate a temporary PC register. */
710 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
711
712 /* Perform the addition, clamp the result, check limit (may #GP(0) + exit TB) and store the result. */
713 off = iemNativeEmitAddGpr32Imm(pReNative, off, idxPcReg, offDisp + cbInstr);
714 off = iemNativeEmitClear16UpGpr(pReNative, off, idxPcReg);
715 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
716 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
717
718 /* Free but don't flush the PC register. */
719 iemNativeRegFreeTmp(pReNative, idxPcReg);
720
721 return off;
722}
723
724
725
726/*********************************************************************************************************************************
727* Emitters for changing PC/RIP/EIP/IP with a indirect jump (IEM_MC_SET_RIP_UXX_AND_FINISH). *
728*********************************************************************************************************************************/
729
730/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets. */
731#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP) \
732 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
733
734/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets. */
735#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP) \
736 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
737
738/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code. */
739#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP) \
740 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u16NewIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint16_t))
741
742/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for pre-386 targets that checks and
743 * clears flags. */
744#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16_WITH_FLAGS(a_u16NewIP) \
745 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC16(a_u16NewIP); \
746 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
747
748/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for 386+ targets that checks and
749 * clears flags. */
750#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u16NewIP) \
751 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC32(a_u16NewIP); \
752 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
753
754/** Variant of IEM_MC_SET_RIP_U16_AND_FINISH for use in 64-bit code that checks and
755 * clears flags. */
756#define IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u16NewIP) \
757 IEM_MC_SET_RIP_U16_AND_FINISH_THREADED_PC64(a_u16NewIP); \
758 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
759
760#undef IEM_MC_SET_RIP_U16_AND_FINISH
761
762
763/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets. */
764#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP) \
765 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), false /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
766
767/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code. */
768#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP) \
769 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u32NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint32_t))
770
771/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for 386+ targets that checks and
772 * clears flags. */
773#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32_WITH_FLAGS(a_u32NewEIP) \
774 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC32(a_u32NewEIP); \
775 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
776
777/** Variant of IEM_MC_SET_RIP_U32_AND_FINISH for use in 64-bit code that checks
778 * and clears flags. */
779#define IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u32NewEIP) \
780 IEM_MC_SET_RIP_U32_AND_FINISH_THREADED_PC64(a_u32NewEIP); \
781 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
782
783#undef IEM_MC_SET_RIP_U32_AND_FINISH
784
785
786/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code. */
787#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP) \
788 off = iemNativeEmitRipJumpNoFlags(pReNative, off, (a_u64NewEIP), true /*f64Bit*/, pCallEntry->idxInstr, sizeof(uint64_t))
789
790/** Variant of IEM_MC_SET_RIP_U64_AND_FINISH for use in 64-bit code that checks
791 * and clears flags. */
792#define IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64_WITH_FLAGS(a_u64NewEIP) \
793 IEM_MC_SET_RIP_U64_AND_FINISH_THREADED_PC64(a_u64NewEIP); \
794 off = iemNativeEmitFinishInstructionFlagsCheck(pReNative, off)
795
796#undef IEM_MC_SET_RIP_U64_AND_FINISH
797
798
799/** Same as iemRegRipJumpU16AndFinishNoFlags,
800 * iemRegRipJumpU32AndFinishNoFlags and iemRegRipJumpU64AndFinishNoFlags. */
801DECL_INLINE_THROW(uint32_t)
802iemNativeEmitRipJumpNoFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarPc, bool f64Bit,
803 uint8_t idxInstr, uint8_t cbVar)
804{
805 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarPc);
806 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarPc, cbVar);
807
808 /* We speculatively modify PC and may raise #GP(0), so make sure the right values are in CPUMCTX. */
809 off = iemNativeRegFlushPendingWrites(pReNative, off);
810
811#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
812 Assert(pReNative->Core.offPc == 0);
813
814 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativePcUpdateTotal);
815#endif
816
817 /* Get a register with the new PC loaded from idxVarPc.
818 Note! This ASSUMES that the high bits of the GPR is zeroed. */
819 uint8_t const idxPcReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxVarPc, kIemNativeGstReg_Pc, &off);
820
821 /* Check limit (may #GP(0) + exit TB). */
822 if (!f64Bit)
823/** @todo we can skip this test in FLAT 32-bit mode. */
824 off = iemNativeEmitCheckGpr32AgainstCsSegLimitMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
825 /* Check that the address is canonical, raising #GP(0) + exit TB if it isn't. */
826 else if (cbVar > sizeof(uint32_t))
827 off = iemNativeEmitCheckGprCanonicalMaybeRaiseGp0(pReNative, off, idxPcReg, idxInstr);
828
829 /* Store the result. */
830 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
831
832 iemNativeVarRegisterRelease(pReNative, idxVarPc);
833 /** @todo implictly free the variable? */
834
835 return off;
836}
837
838
839
840/*********************************************************************************************************************************
841* Emitters for raising exceptions (IEM_MC_MAYBE_RAISE_XXX) *
842*********************************************************************************************************************************/
843
844#define IEM_MC_MAYBE_RAISE_DEVICE_NOT_AVAILABLE() \
845 off = iemNativeEmitMaybeRaiseDeviceNotAvailable(pReNative, off, pCallEntry->idxInstr)
846
847/**
848 * Emits code to check if a \#NM exception should be raised.
849 *
850 * @returns New code buffer offset, UINT32_MAX on failure.
851 * @param pReNative The native recompile state.
852 * @param off The code buffer offset.
853 * @param idxInstr The current instruction.
854 */
855DECL_INLINE_THROW(uint32_t)
856iemNativeEmitMaybeRaiseDeviceNotAvailable(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
857{
858#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
859 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeDeviceNotAvailXcptCheckPotential);
860
861 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE))
862 {
863#endif
864 /*
865 * Make sure we don't have any outstanding guest register writes as we may
866 * raise an #NM and all guest register must be up to date in CPUMCTX.
867 */
868 /** @todo r=aeichner Can we postpone this to the RaiseNm path? */
869 off = iemNativeRegFlushPendingWrites(pReNative, off);
870
871#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
872 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
873#else
874 RT_NOREF(idxInstr);
875#endif
876
877 /* Allocate a temporary CR0 register. */
878 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0, kIemNativeGstRegUse_ReadOnly);
879 uint8_t const idxLabelRaiseNm = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseNm);
880
881 /*
882 * if (cr0 & (X86_CR0_EM | X86_CR0_TS) != 0)
883 * return raisexcpt();
884 */
885 /* Test and jump. */
886 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxCr0Reg, X86_CR0_EM | X86_CR0_TS, idxLabelRaiseNm);
887
888 /* Free but don't flush the CR0 register. */
889 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
890
891#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
892 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE;
893 }
894 else
895 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeDeviceNotAvailXcptCheckOmitted);
896#endif
897
898 return off;
899}
900
901
902#define IEM_MC_MAYBE_RAISE_FPU_XCPT() \
903 off = iemNativeEmitMaybeRaiseFpuException(pReNative, off, pCallEntry->idxInstr)
904
905/**
906 * Emits code to check if a \#MF exception should be raised.
907 *
908 * @returns New code buffer offset, UINT32_MAX on failure.
909 * @param pReNative The native recompile state.
910 * @param off The code buffer offset.
911 * @param idxInstr The current instruction.
912 */
913DECL_INLINE_THROW(uint32_t)
914iemNativeEmitMaybeRaiseFpuException(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
915{
916 /*
917 * Make sure we don't have any outstanding guest register writes as we may
918 * raise an #MF and all guest register must be up to date in CPUMCTX.
919 */
920 /** @todo r=aeichner Can we postpone this to the RaiseMf path? */
921 off = iemNativeRegFlushPendingWrites(pReNative, off);
922
923#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
924 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
925#else
926 RT_NOREF(idxInstr);
927#endif
928
929 /* Allocate a temporary FSW register. */
930 uint8_t const idxFpuFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw, kIemNativeGstRegUse_ReadOnly);
931 uint8_t const idxLabelRaiseMf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseMf);
932
933 /*
934 * if (FSW & X86_FSW_ES != 0)
935 * return raisexcpt();
936 */
937 /* Test and jump. */
938 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxFpuFswReg, X86_FSW_ES, idxLabelRaiseMf);
939
940 /* Free but don't flush the FSW register. */
941 iemNativeRegFreeTmp(pReNative, idxFpuFswReg);
942
943 return off;
944}
945
946
947#define IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT() \
948 off = iemNativeEmitMaybeRaiseSseRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
949
950/**
951 * Emits code to check if a SSE exception (either \#UD or \#NM) should be raised.
952 *
953 * @returns New code buffer offset, UINT32_MAX on failure.
954 * @param pReNative The native recompile state.
955 * @param off The code buffer offset.
956 * @param idxInstr The current instruction.
957 */
958DECL_INLINE_THROW(uint32_t)
959iemNativeEmitMaybeRaiseSseRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
960{
961#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
962 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeSseXcptCheckPotential);
963
964 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE))
965 {
966#endif
967 /*
968 * Make sure we don't have any outstanding guest register writes as we may
969 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
970 */
971 /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
972 off = iemNativeRegFlushPendingWrites(pReNative, off);
973
974#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
975 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
976#else
977 RT_NOREF(idxInstr);
978#endif
979
980 /* Allocate a temporary CR0 and CR4 register. */
981 uint8_t const idxLabelRaiseSseRelated = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseSseRelated);
982 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
983 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
984 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
985
986 AssertCompile(!((X86_CR0_EM | X86_CR0_TS) & X86_CR4_OSFXSR));
987#ifdef RT_ARCH_AMD64
988 /*
989 * We do a modified test here:
990 * if (!(((cr4 & X86_CR4_OSFXSR) | cr0) ^ X86_CR4_OSFXSR)) { likely }
991 * else { goto RaiseSseRelated; }
992 * This ASSUMES that CR0[bit 9] is always zero. This is the case on
993 * all targets except the 386, which doesn't support SSE, this should
994 * be a safe assumption.
995 */
996 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6+3+3+7+7+6);
997 //pCodeBuf[off++] = 0xcc;
998 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, X86_CR4_OSFXSR); /* Isolate CR4.OSFXSR as CR4.TSD and */
999 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxTmpReg, idxCr4Reg); /* CR4.DE would overlap the CR0 bits. */
1000 off = iemNativeEmitOrGpr32ByGprEx(pCodeBuf, off, idxTmpReg, idxCr0Reg);
1001 off = iemNativeEmitAndGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_CR0_EM | X86_CR0_TS | X86_CR4_OSFXSR);
1002 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg, X86_CR4_OSFXSR);
1003 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelRaiseSseRelated, kIemNativeInstrCond_ne);
1004
1005#elif defined(RT_ARCH_ARM64)
1006 /*
1007 * We do a modified test here:
1008 * if (!((cr0 & (X86_CR0_EM | X86_CR0_TS)) | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1) ^ 1))) { likely }
1009 * else { goto RaiseSseRelated; }
1010 */
1011 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+5);
1012 //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
1013 Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - X86_CR0_EM_BIT) == (X86_CR0_EM | X86_CR0_TS));
1014 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxCr0Reg, 1, 32 - X86_CR0_EM_BIT, false /*f64Bit*/);
1015 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSFXSR_BIT, 1, false /*f64Bit*/);
1016 /* -> idxTmpReg[0]=OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
1017 Assert(Armv8A64ConvertImmRImmS2Mask32(0, 0) == 1);
1018 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 0, 0, false /*f64Bit*/);
1019 /* -> idxTmpReg[0]=~OSFXSR; idxTmpReg[2]=EM; idxTmpReg[3]=TS; (the rest is zero) */
1020 off = iemNativeEmitTestIfGprIsNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, idxTmpReg, false /*f64Bit*/,
1021 idxLabelRaiseSseRelated);
1022
1023#else
1024# error "Port me!"
1025#endif
1026
1027 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1028 iemNativeRegFreeTmp(pReNative, idxTmpReg);
1029 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
1030 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
1031
1032#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1033 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE;
1034 }
1035 else
1036 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeSseXcptCheckOmitted);
1037#endif
1038
1039 return off;
1040}
1041
1042
1043#define IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT() \
1044 off = iemNativeEmitMaybeRaiseAvxRelatedXcpt(pReNative, off, pCallEntry->idxInstr)
1045
1046/**
1047 * Emits code to check if a AVX exception (either \#UD or \#NM) should be raised.
1048 *
1049 * @returns New code buffer offset, UINT32_MAX on failure.
1050 * @param pReNative The native recompile state.
1051 * @param off The code buffer offset.
1052 * @param idxInstr The current instruction.
1053 */
1054DECL_INLINE_THROW(uint32_t)
1055iemNativeEmitMaybeRaiseAvxRelatedXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
1056{
1057#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1058 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeAvxXcptCheckPotential);
1059
1060 if (!(pReNative->fSimdRaiseXcptChecksEmitted & IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX))
1061 {
1062#endif
1063 /*
1064 * Make sure we don't have any outstanding guest register writes as we may
1065 * raise an \#UD or \#NM and all guest register must be up to date in CPUMCTX.
1066 */
1067 /** @todo r=aeichner Can we postpone this to the RaiseNm/RaiseUd path? */
1068 off = iemNativeRegFlushPendingWrites(pReNative, off);
1069
1070#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1071 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1072#else
1073 RT_NOREF(idxInstr);
1074#endif
1075
1076 /* Allocate a temporary CR0, CR4 and XCR0 register. */
1077 uint8_t const idxLabelRaiseAvxRelated = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseAvxRelated);
1078 uint8_t const idxCr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr0);
1079 uint8_t const idxCr4Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Cr4);
1080 uint8_t const idxXcr0Reg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Xcr0);
1081 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
1082
1083 /*
1084 * We have the following in IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT:
1085 * if (RT_LIKELY( ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE))
1086 * | (pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE)
1087 * | (pVCpu->cpum.GstCtx.cr0 & X86_CR0_TS))
1088 * == (XSAVE_C_YMM | XSAVE_C_SSE | X86_CR4_OSXSAVE)))
1089 * { likely }
1090 * else { goto RaiseAvxRelated; }
1091 */
1092#ifdef RT_ARCH_AMD64
1093 /* if (!( ( ((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) << 2)
1094 | (((cr4 >> X86_CR4_OSFXSR_BIT) & 1) << 1)
1095 | ((cr0 >> X86_CR0_TS_BIT) & 1) )
1096 ^ 0x1a) ) { likely }
1097 else { goto RaiseAvxRelated; } */
1098 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6+3+5+3+5+3+7+6);
1099 //pCodeBuf[off++] = 0xcc;
1100 off = iemNativeEmitLoadGpr32ImmEx(pCodeBuf, off, idxTmpReg, XSAVE_C_YMM | XSAVE_C_SSE);
1101 off = iemNativeEmitAndGpr32ByGpr32Ex(pCodeBuf, off, idxTmpReg, idxXcr0Reg);
1102 off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off, idxCr4Reg, X86_CR4_OSXSAVE_BIT);
1103 off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
1104 /* -> idxTmpReg[0]=CR4.OSXSAVE; idxTmpReg[1]=0; idxTmpReg[2]=SSE; idxTmpReg[3]=YMM; (the rest is zero) */
1105 off = iemNativeEmitAmd64TestBitInGprEx(pCodeBuf, off, idxCr0Reg, X86_CR0_TS_BIT);
1106 off = iemNativeEmitAmd64RotateGpr32LeftViaCarryEx(pCodeBuf, off, idxTmpReg, 1);
1107 /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=SSE; idxTmpReg[4]=YMM; */
1108 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg, ((XSAVE_C_YMM | XSAVE_C_SSE) << 2) | 2);
1109 /* -> idxTmpReg[0]=CR0.TS idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=0; idxTmpReg[3]=~SSE; idxTmpReg[4]=~YMM; */
1110 off = iemNativeEmitJccToLabelEx(pReNative, pCodeBuf, off, idxLabelRaiseAvxRelated, kIemNativeInstrCond_ne);
1111
1112#elif defined(RT_ARCH_ARM64)
1113 /* if (!( (((xcr0 & (XSAVE_C_YMM | XSAVE_C_SSE)) | ((cr4 >> X86_CR4_OSFXSR_BIT) & 1)) ^ 7) << 1)
1114 | ((cr0 >> X86_CR0_TS_BIT) & 1) ) { likely }
1115 else { goto RaiseAvxRelated; } */
1116 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+6);
1117 //pCodeBuf[off++] = Armv8A64MkInstrBrk(0x1111);
1118 Assert(Armv8A64ConvertImmRImmS2Mask32(1, 32 - XSAVE_C_SSE_BIT) == (XSAVE_C_YMM | XSAVE_C_SSE));
1119 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxXcr0Reg, 1, 32 - XSAVE_C_SSE_BIT, false /*f64Bit*/);
1120 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr4Reg, X86_CR4_OSXSAVE_BIT, 1, false /*f64Bit*/);
1121 /* -> idxTmpReg[0]=CR4.OSXSAVE; idxTmpReg[1]=SSE; idxTmpReg[2]=YMM; (the rest is zero) */
1122 Assert(Armv8A64ConvertImmRImmS2Mask32(2, 0) == 7);
1123 pCodeBuf[off++] = Armv8A64MkInstrEorImm(idxTmpReg, idxTmpReg, 2, 0, false /*f64Bit*/);
1124 /* -> idxTmpReg[0]=~CR4.OSXSAVE; idxTmpReg[1]=~SSE; idxTmpReg[2]=~YMM; (the rest is zero) */
1125 pCodeBuf[off++] = Armv8A64MkInstrLslImm(idxTmpReg, idxTmpReg, 1, false /*f64Bit*/);
1126 pCodeBuf[off++] = Armv8A64MkInstrBfxil(idxTmpReg, idxCr0Reg, X86_CR0_TS_BIT, 1, false /*f64Bit*/);
1127 /* -> idxTmpReg[0]=CR0.TS; idxTmpReg[1]=~CR4.OSXSAVE; idxTmpReg[2]=~SSE; idxTmpReg[3]=~YMM; (the rest is zero) */
1128 off = iemNativeEmitTestIfGprIsNotZeroAndJmpToLabelEx(pReNative, pCodeBuf, off, idxTmpReg, false /*f64Bit*/,
1129 idxLabelRaiseAvxRelated);
1130
1131#else
1132# error "Port me!"
1133#endif
1134
1135 iemNativeRegFreeTmp(pReNative, idxTmpReg);
1136 iemNativeRegFreeTmp(pReNative, idxCr0Reg);
1137 iemNativeRegFreeTmp(pReNative, idxCr4Reg);
1138 iemNativeRegFreeTmp(pReNative, idxXcr0Reg);
1139#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1140 pReNative->fSimdRaiseXcptChecksEmitted |= IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
1141 }
1142 else
1143 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeMaybeAvxXcptCheckOmitted);
1144#endif
1145
1146 return off;
1147}
1148
1149
1150#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1151#define IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT() \
1152 off = iemNativeEmitSimdMaybeRaiseSseAvxSimdFpOrUdXcpt(pReNative, off, pCallEntry->idxInstr)
1153
1154/** Emits code for IEM_MC_MAYBE_RAISE_SSE_AVX_SIMD_FP_OR_UD_XCPT. */
1155DECL_INLINE_THROW(uint32_t)
1156iemNativeEmitSimdMaybeRaiseSseAvxSimdFpOrUdXcpt(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
1157{
1158 /*
1159 * Make sure we don't have any outstanding guest register writes as we may
1160 * raise an \#UD or \#XF and all guest register must be up to date in CPUMCTX.
1161 */
1162 off = iemNativeRegFlushPendingWrites(pReNative, off);
1163
1164#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1165 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1166#else
1167 RT_NOREF(idxInstr);
1168#endif
1169
1170 uint8_t const idxLabelRaiseSseAvxFpRelated = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseSseAvxFpRelated);
1171 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr, kIemNativeGstRegUse_ReadOnly);
1172 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
1173
1174 /* mov tmp, varmxcsr */
1175 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegTmp, idxRegMxCsr);
1176 /* tmp &= X86_MXCSR_XCPT_MASK */
1177 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK);
1178 /* tmp >>= X86_MXCSR_XCPT_MASK_SHIFT */
1179 off = iemNativeEmitShiftGprRight(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_MASK_SHIFT);
1180 /* tmp = ~tmp */
1181 off = iemNativeEmitInvBitsGpr(pReNative, off, idxRegTmp, idxRegTmp, false /*f64Bit*/);
1182 /* tmp &= mxcsr */
1183 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxRegTmp, idxRegMxCsr);
1184 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxRegTmp, X86_MXCSR_XCPT_FLAGS,
1185 idxLabelRaiseSseAvxFpRelated);
1186
1187 /* Free but don't flush the MXCSR register. */
1188 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
1189 iemNativeRegFreeTmp(pReNative, idxRegTmp);
1190
1191 return off;
1192}
1193#endif
1194
1195
1196#define IEM_MC_RAISE_DIVIDE_ERROR() \
1197 off = iemNativeEmitRaiseDivideError(pReNative, off, pCallEntry->idxInstr)
1198
1199/**
1200 * Emits code to raise a \#DE.
1201 *
1202 * @returns New code buffer offset, UINT32_MAX on failure.
1203 * @param pReNative The native recompile state.
1204 * @param off The code buffer offset.
1205 * @param idxInstr The current instruction.
1206 */
1207DECL_INLINE_THROW(uint32_t)
1208iemNativeEmitRaiseDivideError(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
1209{
1210 /*
1211 * Make sure we don't have any outstanding guest register writes as we may
1212 */
1213 off = iemNativeRegFlushPendingWrites(pReNative, off);
1214
1215#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1216 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1217#else
1218 RT_NOREF(idxInstr);
1219#endif
1220
1221 uint8_t const idxLabelRaiseDe = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseDe);
1222
1223 /* raise \#DE exception unconditionally. */
1224 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelRaiseDe);
1225
1226 return off;
1227}
1228
1229
1230#define IEM_MC_RAISE_GP0_IF_EFF_ADDR_UNALIGNED(a_EffAddr, a_cbAlign) \
1231 off = iemNativeEmitRaiseGp0IfEffAddrUnaligned(pReNative, off, pCallEntry->idxInstr, a_EffAddr, a_cbAlign)
1232
1233/**
1234 * Emits code to raise a \#GP(0) if the given variable contains an unaligned address.
1235 *
1236 * @returns New code buffer offset, UINT32_MAX on failure.
1237 * @param pReNative The native recompile state.
1238 * @param off The code buffer offset.
1239 * @param idxInstr The current instruction.
1240 * @param idxVarEffAddr Index of the variable containing the effective address to check.
1241 * @param cbAlign The alignment in bytes to check against.
1242 */
1243DECL_INLINE_THROW(uint32_t)
1244iemNativeEmitRaiseGp0IfEffAddrUnaligned(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint8_t idxVarEffAddr, uint8_t cbAlign)
1245{
1246 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEffAddr);
1247 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEffAddr, sizeof(RTGCPTR));
1248
1249 /*
1250 * Make sure we don't have any outstanding guest register writes as we may throw an exception.
1251 */
1252 off = iemNativeRegFlushPendingWrites(pReNative, off);
1253
1254#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
1255 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
1256#else
1257 RT_NOREF(idxInstr);
1258#endif
1259
1260 uint8_t const idxLabelRaiseGp0 = iemNativeLabelCreate(pReNative, kIemNativeLabelType_RaiseGp0);
1261 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarEffAddr, &off);
1262
1263 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxVarReg, cbAlign - 1, idxLabelRaiseGp0);
1264
1265 iemNativeVarRegisterRelease(pReNative, idxVarEffAddr);
1266 return off;
1267}
1268
1269
1270/*********************************************************************************************************************************
1271* Emitters for conditionals (IEM_MC_IF_XXX, IEM_MC_ELSE, IEM_MC_ENDIF) *
1272*********************************************************************************************************************************/
1273
1274/**
1275 * Pushes an IEM_MC_IF_XXX onto the condition stack.
1276 *
1277 * @returns Pointer to the condition stack entry on success, NULL on failure
1278 * (too many nestings)
1279 */
1280DECL_INLINE_THROW(PIEMNATIVECOND) iemNativeCondPushIf(PIEMRECOMPILERSTATE pReNative, uint32_t *poff)
1281{
1282#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1283 *poff = iemNativeRegFlushPendingWrites(pReNative, *poff);
1284#endif
1285
1286 uint32_t const idxStack = pReNative->cCondDepth;
1287 AssertStmt(idxStack < RT_ELEMENTS(pReNative->aCondStack), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_TOO_DEEPLY_NESTED));
1288
1289 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[idxStack];
1290 pReNative->cCondDepth = (uint8_t)(idxStack + 1);
1291
1292 uint16_t const uCondSeqNo = ++pReNative->uCondSeqNo;
1293 pEntry->fInElse = false;
1294 pEntry->idxLabelElse = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Else, UINT32_MAX /*offWhere*/, uCondSeqNo);
1295 pEntry->idxLabelEndIf = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Endif, UINT32_MAX /*offWhere*/, uCondSeqNo);
1296
1297 return pEntry;
1298}
1299
1300
1301/**
1302 * Start of the if-block, snapshotting the register and variable state.
1303 */
1304DECL_INLINE_THROW(void)
1305iemNativeCondStartIfBlock(PIEMRECOMPILERSTATE pReNative, uint32_t offIfBlock, uint32_t idxLabelIf = UINT32_MAX)
1306{
1307 Assert(offIfBlock != UINT32_MAX);
1308 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
1309 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
1310 Assert(!pEntry->fInElse);
1311
1312 /* Define the start of the IF block if request or for disassembly purposes. */
1313 if (idxLabelIf != UINT32_MAX)
1314 iemNativeLabelDefine(pReNative, idxLabelIf, offIfBlock);
1315#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
1316 else
1317 iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, offIfBlock, pReNative->paLabels[pEntry->idxLabelElse].uData);
1318#else
1319 RT_NOREF(offIfBlock);
1320#endif
1321
1322#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1323 Assert(pReNative->Core.offPc == 0);
1324#endif
1325
1326 /* Copy the initial state so we can restore it in the 'else' block. */
1327 pEntry->InitialState = pReNative->Core;
1328}
1329
1330
1331#define IEM_MC_ELSE() } while (0); \
1332 off = iemNativeEmitElse(pReNative, off); \
1333 do {
1334
1335/** Emits code related to IEM_MC_ELSE. */
1336DECL_INLINE_THROW(uint32_t) iemNativeEmitElse(PIEMRECOMPILERSTATE pReNative, uint32_t off)
1337{
1338 /* Check sanity and get the conditional stack entry. */
1339 Assert(off != UINT32_MAX);
1340 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
1341 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
1342 Assert(!pEntry->fInElse);
1343
1344#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
1345 /* Writeback any dirty shadow registers. */
1346 /** @todo r=aeichner Possible optimization is to only writeback guest registers which became dirty
1347 * in one of the branches and leave guest registers already dirty before the start of the if
1348 * block alone. */
1349 off = iemNativeRegFlushDirtyGuest(pReNative, off);
1350#endif
1351
1352 /* Jump to the endif */
1353 off = iemNativeEmitJmpToLabel(pReNative, off, pEntry->idxLabelEndIf);
1354
1355 /* Define the else label and enter the else part of the condition. */
1356 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
1357 pEntry->fInElse = true;
1358
1359#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1360 Assert(pReNative->Core.offPc == 0);
1361#endif
1362
1363 /* Snapshot the core state so we can do a merge at the endif and restore
1364 the snapshot we took at the start of the if-block. */
1365 pEntry->IfFinalState = pReNative->Core;
1366 pReNative->Core = pEntry->InitialState;
1367
1368 return off;
1369}
1370
1371
1372#define IEM_MC_ENDIF() } while (0); \
1373 off = iemNativeEmitEndIf(pReNative, off)
1374
1375/** Emits code related to IEM_MC_ENDIF. */
1376DECL_INLINE_THROW(uint32_t) iemNativeEmitEndIf(PIEMRECOMPILERSTATE pReNative, uint32_t off)
1377{
1378 /* Check sanity and get the conditional stack entry. */
1379 Assert(off != UINT32_MAX);
1380 Assert(pReNative->cCondDepth > 0 && pReNative->cCondDepth <= RT_ELEMENTS(pReNative->aCondStack));
1381 PIEMNATIVECOND const pEntry = &pReNative->aCondStack[pReNative->cCondDepth - 1];
1382
1383#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1384 Assert(pReNative->Core.offPc == 0);
1385#endif
1386#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
1387 /* Writeback any dirty shadow registers (else branch). */
1388 /** @todo r=aeichner Possible optimization is to only writeback guest registers which became dirty
1389 * in one of the branches and leave guest registers already dirty before the start of the if
1390 * block alone. */
1391 off = iemNativeRegFlushDirtyGuest(pReNative, off);
1392#endif
1393
1394 /*
1395 * Now we have find common group with the core state at the end of the
1396 * if-final. Use the smallest common denominator and just drop anything
1397 * that isn't the same in both states.
1398 */
1399 /** @todo We could, maybe, shuffle registers around if we thought it helpful,
1400 * which is why we're doing this at the end of the else-block.
1401 * But we'd need more info about future for that to be worth the effort. */
1402 PCIEMNATIVECORESTATE const pOther = pEntry->fInElse ? &pEntry->IfFinalState : &pEntry->InitialState;
1403#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
1404 Assert( pOther->bmGstRegShadowDirty == 0
1405 && pReNative->Core.bmGstRegShadowDirty == 0);
1406#endif
1407
1408 if (memcmp(&pReNative->Core, pOther, sizeof(*pOther)) != 0)
1409 {
1410 /* shadow guest stuff first. */
1411 uint64_t fGstRegs = pReNative->Core.bmGstRegShadows;
1412 if (fGstRegs)
1413 {
1414 Assert(pReNative->Core.bmHstRegsWithGstShadow != 0);
1415 do
1416 {
1417 unsigned idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
1418 fGstRegs &= ~RT_BIT_64(idxGstReg);
1419
1420 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
1421 if ( !(pOther->bmGstRegShadows & RT_BIT_64(idxGstReg))
1422 || idxHstReg != pOther->aidxGstRegShadows[idxGstReg])
1423 {
1424 Log12(("iemNativeEmitEndIf: dropping gst %s from hst %s\n",
1425 g_aGstShadowInfo[idxGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg]));
1426
1427#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
1428 /* Writeback any dirty shadow registers we are about to unshadow. */
1429 off = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, off, idxHstReg);
1430#endif
1431 iemNativeRegClearGstRegShadowing(pReNative, idxHstReg, off);
1432 }
1433 } while (fGstRegs);
1434 }
1435 else
1436 {
1437 Assert(pReNative->Core.bmHstRegsWithGstShadow == 0);
1438#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
1439 Assert(pReNative->Core.bmGstRegShadowDirty == 0);
1440#endif
1441 }
1442
1443 /* Check variables next. For now we must require them to be identical
1444 or stuff we can recreate. */
1445 Assert(pReNative->Core.u64ArgVars == pOther->u64ArgVars);
1446 uint32_t fVars = pReNative->Core.bmVars | pOther->bmVars;
1447 if (fVars)
1448 {
1449 uint32_t const fVarsMustRemove = pReNative->Core.bmVars ^ pOther->bmVars;
1450 do
1451 {
1452 unsigned idxVar = ASMBitFirstSetU32(fVars) - 1;
1453 fVars &= ~RT_BIT_32(idxVar);
1454
1455 if (!(fVarsMustRemove & RT_BIT_32(idxVar)))
1456 {
1457 if (pReNative->Core.aVars[idxVar].idxReg == pOther->aVars[idxVar].idxReg)
1458 continue;
1459 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack)
1460 {
1461 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
1462 if (idxHstReg != UINT8_MAX)
1463 {
1464 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
1465 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
1466 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x\n",
1467 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
1468 }
1469 continue;
1470 }
1471 }
1472 else if (!(pReNative->Core.bmVars & RT_BIT_32(idxVar)))
1473 continue;
1474
1475 /* Irreconcilable, so drop it. */
1476 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
1477 if (idxHstReg != UINT8_MAX)
1478 {
1479 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
1480 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
1481 Log12(("iemNativeEmitEndIf: Dropping hst reg %s for var #%u/%#x (also dropped)\n",
1482 g_apszIemNativeHstRegNames[idxHstReg], idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
1483 }
1484 Log11(("iemNativeEmitEndIf: Freeing variable #%u/%#x\n", idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar)));
1485 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
1486 } while (fVars);
1487 }
1488
1489 /* Finally, check that the host register allocations matches. */
1490 AssertMsgStmt(pReNative->Core.bmHstRegs == pOther->bmHstRegs,
1491 ("Core.bmHstRegs=%#x pOther->bmHstRegs=%#x - %#x\n",
1492 pReNative->Core.bmHstRegs, pOther->bmHstRegs, pReNative->Core.bmHstRegs ^ pOther->bmHstRegs),
1493 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_COND_ENDIF_RECONCILIATION_FAILED));
1494 }
1495
1496 /*
1497 * Define the endif label and maybe the else one if we're still in the 'if' part.
1498 */
1499 if (!pEntry->fInElse)
1500 iemNativeLabelDefine(pReNative, pEntry->idxLabelElse, off);
1501 else
1502 Assert(pReNative->paLabels[pEntry->idxLabelElse].off <= off);
1503 iemNativeLabelDefine(pReNative, pEntry->idxLabelEndIf, off);
1504
1505 /* Pop the conditional stack.*/
1506 pReNative->cCondDepth -= 1;
1507
1508 return off;
1509}
1510
1511
1512#define IEM_MC_IF_EFL_ANY_BITS_SET(a_fBits) \
1513 off = iemNativeEmitIfEflagAnysBitsSet(pReNative, off, (a_fBits)); \
1514 do {
1515
1516/** Emits code for IEM_MC_IF_EFL_ANY_BITS_SET. */
1517DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagAnysBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
1518{
1519 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitsInEfl);
1520 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1521
1522 /* Get the eflags. */
1523 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1524 kIemNativeGstRegUse_ReadOnly);
1525
1526 /* Test and jump. */
1527 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
1528
1529 /* Free but don't flush the EFlags register. */
1530 iemNativeRegFreeTmp(pReNative, idxEflReg);
1531
1532 /* Make a copy of the core state now as we start the if-block. */
1533 iemNativeCondStartIfBlock(pReNative, off);
1534
1535 return off;
1536}
1537
1538
1539#define IEM_MC_IF_EFL_NO_BITS_SET(a_fBits) \
1540 off = iemNativeEmitIfEflagNoBitsSet(pReNative, off, (a_fBits)); \
1541 do {
1542
1543/** Emits code for IEM_MC_IF_EFL_NO_BITS_SET. */
1544DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagNoBitsSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitsInEfl)
1545{
1546 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitsInEfl);
1547 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1548
1549 /* Get the eflags. */
1550 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1551 kIemNativeGstRegUse_ReadOnly);
1552
1553 /* Test and jump. */
1554 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfAnySet(pReNative, off, idxEflReg, fBitsInEfl, pEntry->idxLabelElse);
1555
1556 /* Free but don't flush the EFlags register. */
1557 iemNativeRegFreeTmp(pReNative, idxEflReg);
1558
1559 /* Make a copy of the core state now as we start the if-block. */
1560 iemNativeCondStartIfBlock(pReNative, off);
1561
1562 return off;
1563}
1564
1565
1566#define IEM_MC_IF_EFL_BIT_SET(a_fBit) \
1567 off = iemNativeEmitIfEflagsBitSet(pReNative, off, (a_fBit)); \
1568 do {
1569
1570/** Emits code for IEM_MC_IF_EFL_BIT_SET. */
1571DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
1572{
1573 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
1574 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1575
1576 /* Get the eflags. */
1577 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1578 kIemNativeGstRegUse_ReadOnly);
1579
1580 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
1581 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
1582
1583 /* Test and jump. */
1584 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
1585
1586 /* Free but don't flush the EFlags register. */
1587 iemNativeRegFreeTmp(pReNative, idxEflReg);
1588
1589 /* Make a copy of the core state now as we start the if-block. */
1590 iemNativeCondStartIfBlock(pReNative, off);
1591
1592 return off;
1593}
1594
1595
1596#define IEM_MC_IF_EFL_BIT_NOT_SET(a_fBit) \
1597 off = iemNativeEmitIfEflagsBitNotSet(pReNative, off, (a_fBit)); \
1598 do {
1599
1600/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET. */
1601DECL_INLINE_THROW(uint32_t) iemNativeEmitIfEflagsBitNotSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl)
1602{
1603 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
1604 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1605
1606 /* Get the eflags. */
1607 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1608 kIemNativeGstRegUse_ReadOnly);
1609
1610 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
1611 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
1612
1613 /* Test and jump. */
1614 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
1615
1616 /* Free but don't flush the EFlags register. */
1617 iemNativeRegFreeTmp(pReNative, idxEflReg);
1618
1619 /* Make a copy of the core state now as we start the if-block. */
1620 iemNativeCondStartIfBlock(pReNative, off);
1621
1622 return off;
1623}
1624
1625
1626#define IEM_MC_IF_EFL_BITS_EQ(a_fBit1, a_fBit2) \
1627 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, false /*fInverted*/); \
1628 do {
1629
1630#define IEM_MC_IF_EFL_BITS_NE(a_fBit1, a_fBit2) \
1631 off = iemNativeEmitIfEflagsTwoBitsEqual(pReNative, off, a_fBit1, a_fBit2, true /*fInverted*/); \
1632 do {
1633
1634/** Emits code for IEM_MC_IF_EFL_BITS_EQ and IEM_MC_IF_EFL_BITS_NE. */
1635DECL_INLINE_THROW(uint32_t)
1636iemNativeEmitIfEflagsTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1637 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
1638{
1639 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBit1InEfl | fBit2InEfl);
1640 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1641
1642 /* Get the eflags. */
1643 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1644 kIemNativeGstRegUse_ReadOnly);
1645
1646 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
1647 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
1648
1649 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
1650 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
1651 Assert(iBitNo1 != iBitNo2);
1652
1653#ifdef RT_ARCH_AMD64
1654 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl);
1655
1656 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
1657 if (iBitNo1 > iBitNo2)
1658 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
1659 else
1660 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
1661 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
1662
1663#elif defined(RT_ARCH_ARM64)
1664 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
1665 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1666
1667 /* and tmpreg, eflreg, #1<<iBitNo1 */
1668 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
1669
1670 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
1671 if (iBitNo1 > iBitNo2)
1672 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
1673 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
1674 else
1675 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
1676 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
1677
1678 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1679
1680#else
1681# error "Port me"
1682#endif
1683
1684 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
1685 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
1686 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
1687
1688 /* Free but don't flush the EFlags and tmp registers. */
1689 iemNativeRegFreeTmp(pReNative, idxTmpReg);
1690 iemNativeRegFreeTmp(pReNative, idxEflReg);
1691
1692 /* Make a copy of the core state now as we start the if-block. */
1693 iemNativeCondStartIfBlock(pReNative, off);
1694
1695 return off;
1696}
1697
1698
1699#define IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ(a_fBit, a_fBit1, a_fBit2) \
1700 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, false /*fInverted*/); \
1701 do {
1702
1703#define IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE(a_fBit, a_fBit1, a_fBit2) \
1704 off = iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(pReNative, off, a_fBit, a_fBit1, a_fBit2, true /*fInverted*/); \
1705 do {
1706
1707/** Emits code for IEM_MC_IF_EFL_BIT_NOT_SET_AND_BITS_EQ and
1708 * IEM_MC_IF_EFL_BIT_SET_OR_BITS_NE. */
1709DECL_INLINE_THROW(uint32_t)
1710iemNativeEmitIfEflagsBitNotSetAndTwoBitsEqual(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl,
1711 uint32_t fBit1InEfl, uint32_t fBit2InEfl, bool fInverted)
1712{
1713 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl | fBit1InEfl | fBit2InEfl);
1714 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1715
1716 /* We need an if-block label for the non-inverted variant. */
1717 uint32_t const idxLabelIf = fInverted ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_If, UINT32_MAX,
1718 pReNative->paLabels[pEntry->idxLabelElse].uData) : UINT32_MAX;
1719
1720 /* Get the eflags. */
1721 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1722 kIemNativeGstRegUse_ReadOnly);
1723
1724 /* Translate the flag masks to bit numbers. */
1725 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
1726 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
1727
1728 unsigned const iBitNo1 = ASMBitFirstSetU32(fBit1InEfl) - 1;
1729 Assert(RT_BIT_32(iBitNo1) == fBit1InEfl);
1730 Assert(iBitNo1 != iBitNo);
1731
1732 unsigned const iBitNo2 = ASMBitFirstSetU32(fBit2InEfl) - 1;
1733 Assert(RT_BIT_32(iBitNo2) == fBit2InEfl);
1734 Assert(iBitNo2 != iBitNo);
1735 Assert(iBitNo2 != iBitNo1);
1736
1737#ifdef RT_ARCH_AMD64
1738 uint8_t const idxTmpReg = iemNativeRegAllocTmpImm(pReNative, &off, fBit1InEfl); /* This must come before we jump anywhere! */
1739#elif defined(RT_ARCH_ARM64)
1740 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
1741#endif
1742
1743 /* Check for the lone bit first. */
1744 if (!fInverted)
1745 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse);
1746 else
1747 off = iemNativeEmitTestBitInGprAndJmpToLabelIfSet(pReNative, off, idxEflReg, iBitNo, idxLabelIf);
1748
1749 /* Then extract and compare the other two bits. */
1750#ifdef RT_ARCH_AMD64
1751 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
1752 if (iBitNo1 > iBitNo2)
1753 off = iemNativeEmitShiftGpr32Right(pReNative, off, idxTmpReg, iBitNo1 - iBitNo2);
1754 else
1755 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxTmpReg, iBitNo2 - iBitNo1);
1756 off = iemNativeEmitXorGpr32ByGpr32(pReNative, off, idxTmpReg, idxEflReg);
1757
1758#elif defined(RT_ARCH_ARM64)
1759 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1760
1761 /* and tmpreg, eflreg, #1<<iBitNo1 */
1762 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxTmpReg, idxEflReg, 0 /*uImm7SizeLen -> 32*/, 32 - iBitNo1, false /*f64Bit*/);
1763
1764 /* eeyore tmpreg, eflreg, tmpreg, LSL/LSR, #abs(iBitNo2 - iBitNo1) */
1765 if (iBitNo1 > iBitNo2)
1766 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
1767 iBitNo1 - iBitNo2, kArmv8A64InstrShift_Lsr);
1768 else
1769 pu32CodeBuf[off++] = Armv8A64MkInstrEor(idxTmpReg, idxEflReg, idxTmpReg, false /*64bit*/,
1770 iBitNo2 - iBitNo1, kArmv8A64InstrShift_Lsl);
1771
1772 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1773
1774#else
1775# error "Port me"
1776#endif
1777
1778 /* Test (bit #2 is set in tmpreg if not-equal) and jump. */
1779 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxTmpReg, iBitNo2,
1780 pEntry->idxLabelElse, !fInverted /*fJmpIfSet*/);
1781
1782 /* Free but don't flush the EFlags and tmp registers. */
1783 iemNativeRegFreeTmp(pReNative, idxTmpReg);
1784 iemNativeRegFreeTmp(pReNative, idxEflReg);
1785
1786 /* Make a copy of the core state now as we start the if-block. */
1787 iemNativeCondStartIfBlock(pReNative, off, idxLabelIf);
1788
1789 return off;
1790}
1791
1792
1793#define IEM_MC_IF_CX_IS_NZ() \
1794 off = iemNativeEmitIfCxIsNotZero(pReNative, off); \
1795 do {
1796
1797/** Emits code for IEM_MC_IF_CX_IS_NZ. */
1798DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off)
1799{
1800 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1801
1802 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
1803 kIemNativeGstRegUse_ReadOnly);
1804 off = iemNativeEmitTestAnyBitsInGprAndJmpToLabelIfNoneSet(pReNative, off, idxGstRcxReg, UINT16_MAX, pEntry->idxLabelElse);
1805 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
1806
1807 iemNativeCondStartIfBlock(pReNative, off);
1808 return off;
1809}
1810
1811
1812#define IEM_MC_IF_ECX_IS_NZ() \
1813 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, false /*f64Bit*/); \
1814 do {
1815
1816#define IEM_MC_IF_RCX_IS_NZ() \
1817 off = iemNativeEmitIfRcxEcxIsNotZero(pReNative, off, true /*f64Bit*/); \
1818 do {
1819
1820/** Emits code for IEM_MC_IF_ECX_IS_NZ and IEM_MC_IF_RCX_IS_NZ. */
1821DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotZero(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
1822{
1823 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1824
1825 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
1826 kIemNativeGstRegUse_ReadOnly);
1827 off = iemNativeEmitTestIfGprIsZeroAndJmpToLabel(pReNative, off, idxGstRcxReg, f64Bit, pEntry->idxLabelElse);
1828 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
1829
1830 iemNativeCondStartIfBlock(pReNative, off);
1831 return off;
1832}
1833
1834
1835#define IEM_MC_IF_CX_IS_NOT_ONE() \
1836 off = iemNativeEmitIfCxIsNotOne(pReNative, off); \
1837 do {
1838
1839/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE. */
1840DECL_INLINE_THROW(uint32_t) iemNativeEmitIfCxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off)
1841{
1842 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1843
1844 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
1845 kIemNativeGstRegUse_ReadOnly);
1846#ifdef RT_ARCH_AMD64
1847 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
1848#else
1849 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
1850 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
1851 iemNativeRegFreeTmp(pReNative, idxTmpReg);
1852#endif
1853 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
1854
1855 iemNativeCondStartIfBlock(pReNative, off);
1856 return off;
1857}
1858
1859
1860#define IEM_MC_IF_ECX_IS_NOT_ONE() \
1861 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, false /*f64Bit*/); \
1862 do {
1863
1864#define IEM_MC_IF_RCX_IS_NOT_ONE() \
1865 off = iemNativeEmitIfRcxEcxIsNotOne(pReNative, off, true /*f64Bit*/); \
1866 do {
1867
1868/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE and IEM_MC_IF_RCX_IS_NOT_ONE. */
1869DECL_INLINE_THROW(uint32_t) iemNativeEmitIfRcxEcxIsNotOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool f64Bit)
1870{
1871 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1872
1873 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
1874 kIemNativeGstRegUse_ReadOnly);
1875 if (f64Bit)
1876 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
1877 else
1878 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
1879 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
1880
1881 iemNativeCondStartIfBlock(pReNative, off);
1882 return off;
1883}
1884
1885
1886#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
1887 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/); \
1888 do {
1889
1890#define IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
1891 off = iemNativeEmitIfCxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/); \
1892 do {
1893
1894/** Emits code for IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_SET and
1895 * IEM_MC_IF_CX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
1896DECL_INLINE_THROW(uint32_t)
1897iemNativeEmitIfCxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fBitInEfl, bool fCheckIfSet)
1898{
1899 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
1900 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1901
1902 /* We have to load both RCX and EFLAGS before we can start branching,
1903 otherwise we'll end up in the else-block with an inconsistent
1904 register allocator state.
1905 Doing EFLAGS first as it's more likely to be loaded, right? */
1906 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1907 kIemNativeGstRegUse_ReadOnly);
1908 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
1909 kIemNativeGstRegUse_ReadOnly);
1910
1911 /** @todo we could reduce this to a single branch instruction by spending a
1912 * temporary register and some setnz stuff. Not sure if loops are
1913 * worth it. */
1914 /* Check CX. */
1915#ifdef RT_ARCH_AMD64
1916 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
1917#else
1918 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
1919 off = iemNativeEmitTestIfGpr16EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse, idxTmpReg);
1920 iemNativeRegFreeTmp(pReNative, idxTmpReg);
1921#endif
1922
1923 /* Check the EFlags bit. */
1924 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
1925 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
1926 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
1927 !fCheckIfSet /*fJmpIfSet*/);
1928
1929 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
1930 iemNativeRegFreeTmp(pReNative, idxEflReg);
1931
1932 iemNativeCondStartIfBlock(pReNative, off);
1933 return off;
1934}
1935
1936
1937#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
1938 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, false /*f64Bit*/); \
1939 do {
1940
1941#define IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
1942 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, false /*f64Bit*/); \
1943 do {
1944
1945#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET(a_fBit) \
1946 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, true /*fCheckIfSet*/, true /*f64Bit*/); \
1947 do {
1948
1949#define IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET(a_fBit) \
1950 off = iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(pReNative, off, a_fBit, false /*fCheckIfSet*/, true /*f64Bit*/); \
1951 do {
1952
1953/** Emits code for IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_SET,
1954 * IEM_MC_IF_ECX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET,
1955 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_SET and
1956 * IEM_MC_IF_RCX_IS_NOT_ONE_AND_EFL_BIT_NOT_SET. */
1957DECL_INLINE_THROW(uint32_t)
1958iemNativeEmitIfRcxEcxIsNotOneAndTestEflagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off,
1959 uint32_t fBitInEfl, bool fCheckIfSet, bool f64Bit)
1960{
1961 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fBitInEfl);
1962 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
1963
1964 /* We have to load both RCX and EFLAGS before we can start branching,
1965 otherwise we'll end up in the else-block with an inconsistent
1966 register allocator state.
1967 Doing EFLAGS first as it's more likely to be loaded, right? */
1968 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
1969 kIemNativeGstRegUse_ReadOnly);
1970 uint8_t const idxGstRcxReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xCX),
1971 kIemNativeGstRegUse_ReadOnly);
1972
1973 /** @todo we could reduce this to a single branch instruction by spending a
1974 * temporary register and some setnz stuff. Not sure if loops are
1975 * worth it. */
1976 /* Check RCX/ECX. */
1977 if (f64Bit)
1978 off = iemNativeEmitTestIfGprEqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
1979 else
1980 off = iemNativeEmitTestIfGpr32EqualsImmAndJmpToLabel(pReNative, off, idxGstRcxReg, 1, pEntry->idxLabelElse);
1981
1982 /* Check the EFlags bit. */
1983 unsigned const iBitNo = ASMBitFirstSetU32(fBitInEfl) - 1;
1984 Assert(RT_BIT_32(iBitNo) == fBitInEfl);
1985 off = iemNativeEmitTestBitInGprAndJmpToLabelIfCc(pReNative, off, idxEflReg, iBitNo, pEntry->idxLabelElse,
1986 !fCheckIfSet /*fJmpIfSet*/);
1987
1988 iemNativeRegFreeTmp(pReNative, idxGstRcxReg);
1989 iemNativeRegFreeTmp(pReNative, idxEflReg);
1990
1991 iemNativeCondStartIfBlock(pReNative, off);
1992 return off;
1993}
1994
1995
1996#define IEM_MC_IF_LOCAL_IS_Z(a_Local) \
1997 off = iemNativeEmitIfLocalIsZ(pReNative, off, a_Local); \
1998 do {
1999
2000/** Emits code for IEM_MC_IF_LOCAL_IS_Z. */
2001DECL_INLINE_THROW(uint32_t)
2002iemNativeEmitIfLocalIsZ(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarLocal)
2003{
2004 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2005
2006 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarLocal);
2007 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarLocal)];
2008 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
2009 AssertStmt(pVarRc->cbVar == sizeof(int32_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
2010
2011 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarLocal, &off);
2012
2013 off = iemNativeEmitTestIfGprIsNotZeroAndJmpToLabel(pReNative, off, idxReg, false /*f64Bit*/, pEntry->idxLabelElse);
2014
2015 iemNativeVarRegisterRelease(pReNative, idxVarLocal);
2016
2017 iemNativeCondStartIfBlock(pReNative, off);
2018 return off;
2019}
2020
2021
2022#define IEM_MC_IF_GREG_BIT_SET(a_iGReg, a_iBitNo) \
2023 off = iemNativeEmitIfGregBitSet(pReNative, off, a_iGReg, a_iBitNo); \
2024 do {
2025
2026/** Emits code for IEM_MC_IF_GREG_BIT_SET. */
2027DECL_INLINE_THROW(uint32_t)
2028iemNativeEmitIfGregBitSet(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t iBitNo)
2029{
2030 PIEMNATIVECOND const pEntry = iemNativeCondPushIf(pReNative, &off);
2031 Assert(iGReg < 16);
2032
2033 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
2034 kIemNativeGstRegUse_ReadOnly);
2035
2036 off = iemNativeEmitTestBitInGprAndJmpToLabelIfNotSet(pReNative, off, idxGstFullReg, iBitNo, pEntry->idxLabelElse);
2037
2038 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2039
2040 iemNativeCondStartIfBlock(pReNative, off);
2041 return off;
2042}
2043
2044
2045
2046/*********************************************************************************************************************************
2047* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
2048*********************************************************************************************************************************/
2049
2050#define IEM_MC_NOREF(a_Name) \
2051 RT_NOREF_PV(a_Name)
2052
2053#define IEM_MC_ARG(a_Type, a_Name, a_iArg) \
2054 uint8_t const a_Name = iemNativeArgAlloc(pReNative, (a_iArg), sizeof(a_Type))
2055
2056#define IEM_MC_ARG_CONST(a_Type, a_Name, a_Value, a_iArg) \
2057 uint8_t const a_Name = iemNativeArgAllocConst(pReNative, (a_iArg), sizeof(a_Type), (a_Value))
2058
2059#define IEM_MC_ARG_LOCAL_REF(a_Type, a_Name, a_Local, a_iArg) \
2060 uint8_t const a_Name = iemNativeArgAllocLocalRef(pReNative, (a_iArg), (a_Local))
2061
2062#define IEM_MC_LOCAL(a_Type, a_Name) \
2063 uint8_t const a_Name = iemNativeVarAlloc(pReNative, sizeof(a_Type))
2064
2065#define IEM_MC_LOCAL_CONST(a_Type, a_Name, a_Value) \
2066 uint8_t const a_Name = iemNativeVarAllocConst(pReNative, sizeof(a_Type), (a_Value))
2067
2068#define IEM_MC_LOCAL_ASSIGN(a_Type, a_Name, a_Value) \
2069 uint8_t const a_Name = iemNativeVarAllocAssign(pReNative, &off, sizeof(a_Type), (a_Value))
2070
2071
2072/**
2073 * Sets the host register for @a idxVarRc to @a idxReg.
2074 *
2075 * The register must not be allocated. Any guest register shadowing will be
2076 * implictly dropped by this call.
2077 *
2078 * The variable must not have any register associated with it (causes
2079 * VERR_IEM_VAR_IPE_10 to be raised). Conversion to a stack variable is
2080 * implied.
2081 *
2082 * @returns idxReg
2083 * @param pReNative The recompiler state.
2084 * @param idxVar The variable.
2085 * @param idxReg The host register (typically IEMNATIVE_CALL_RET_GREG).
2086 * @param off For recording in debug info.
2087 *
2088 * @throws VERR_IEM_VAR_IPE_10, VERR_IEM_VAR_IPE_11
2089 */
2090DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSet(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxReg, uint32_t off)
2091{
2092 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
2093 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
2094 Assert(!pVar->fRegAcquired);
2095 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
2096 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_10));
2097 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_11));
2098
2099 iemNativeRegClearGstRegShadowing(pReNative, idxReg, off);
2100 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
2101
2102 iemNativeVarSetKindToStack(pReNative, idxVar);
2103 pVar->idxReg = idxReg;
2104
2105 return idxReg;
2106}
2107
2108
2109/**
2110 * A convenient helper function.
2111 */
2112DECL_INLINE_THROW(uint8_t) iemNativeVarRegisterSetAndAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
2113 uint8_t idxReg, uint32_t *poff)
2114{
2115 idxReg = iemNativeVarRegisterSet(pReNative, idxVar, idxReg, *poff);
2116 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fRegAcquired = true;
2117 return idxReg;
2118}
2119
2120
2121/**
2122 * This is called by IEM_MC_END() to clean up all variables.
2123 */
2124DECL_FORCE_INLINE(void) iemNativeVarFreeAll(PIEMRECOMPILERSTATE pReNative)
2125{
2126 uint32_t const bmVars = pReNative->Core.bmVars;
2127 if (bmVars != 0)
2128 iemNativeVarFreeAllSlow(pReNative, bmVars);
2129 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
2130 Assert(pReNative->Core.bmStack == 0);
2131}
2132
2133
2134#define IEM_MC_FREE_LOCAL(a_Name) iemNativeVarFreeLocal(pReNative, a_Name)
2135
2136/**
2137 * This is called by IEM_MC_FREE_LOCAL.
2138 */
2139DECLINLINE(void) iemNativeVarFreeLocal(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
2140{
2141 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
2142 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo == UINT8_MAX);
2143 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
2144}
2145
2146
2147#define IEM_MC_FREE_ARG(a_Name) iemNativeVarFreeArg(pReNative, a_Name)
2148
2149/**
2150 * This is called by IEM_MC_FREE_ARG.
2151 */
2152DECLINLINE(void) iemNativeVarFreeArg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
2153{
2154 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
2155 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars));
2156 iemNativeVarFreeOneWorker(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxVar));
2157}
2158
2159
2160#define IEM_MC_ASSIGN_TO_SMALLER(a_VarDst, a_VarSrcEol) off = iemNativeVarAssignToSmaller(pReNative, off, a_VarDst, a_VarSrcEol)
2161
2162/**
2163 * This is called by IEM_MC_ASSIGN_TO_SMALLER.
2164 */
2165DECL_INLINE_THROW(uint32_t)
2166iemNativeVarAssignToSmaller(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarDst, uint8_t idxVarSrc)
2167{
2168 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarDst);
2169 PIEMNATIVEVAR const pVarDst = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarDst)];
2170 AssertStmt(pVarDst->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
2171 Assert( pVarDst->cbVar == sizeof(uint16_t)
2172 || pVarDst->cbVar == sizeof(uint32_t));
2173
2174 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarSrc);
2175 PIEMNATIVEVAR const pVarSrc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarSrc)];
2176 AssertStmt( pVarSrc->enmKind == kIemNativeVarKind_Stack
2177 || pVarSrc->enmKind == kIemNativeVarKind_Immediate,
2178 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
2179
2180 Assert(pVarDst->cbVar < pVarSrc->cbVar);
2181
2182 /*
2183 * Special case for immediates.
2184 */
2185 if (pVarSrc->enmKind == kIemNativeVarKind_Immediate)
2186 {
2187 switch (pVarDst->cbVar)
2188 {
2189 case sizeof(uint16_t):
2190 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint16_t)pVarSrc->u.uValue);
2191 break;
2192 case sizeof(uint32_t):
2193 iemNativeVarSetKindToConst(pReNative, idxVarDst, (uint32_t)pVarSrc->u.uValue);
2194 break;
2195 default: AssertFailed(); break;
2196 }
2197 }
2198 else
2199 {
2200 /*
2201 * The generic solution for now.
2202 */
2203 /** @todo optimize this by having the python script make sure the source
2204 * variable passed to IEM_MC_ASSIGN_TO_SMALLER is not used after the
2205 * statement. Then we could just transfer the register assignments. */
2206 uint8_t const idxRegDst = iemNativeVarRegisterAcquire(pReNative, idxVarDst, &off);
2207 uint8_t const idxRegSrc = iemNativeVarRegisterAcquire(pReNative, idxVarSrc, &off);
2208 switch (pVarDst->cbVar)
2209 {
2210 case sizeof(uint16_t):
2211 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegDst, idxRegSrc);
2212 break;
2213 case sizeof(uint32_t):
2214 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegDst, idxRegSrc);
2215 break;
2216 default: AssertFailed(); break;
2217 }
2218 iemNativeVarRegisterRelease(pReNative, idxVarSrc);
2219 iemNativeVarRegisterRelease(pReNative, idxVarDst);
2220 }
2221 return off;
2222}
2223
2224
2225
2226/*********************************************************************************************************************************
2227* Emitters for IEM_MC_CALL_CIMPL_XXX *
2228*********************************************************************************************************************************/
2229
2230/** Common emit function for IEM_MC_CALL_CIMPL_XXXX. */
2231DECL_INLINE_THROW(uint32_t)
2232iemNativeEmitCallCImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr,
2233 uint64_t fGstShwFlush, uintptr_t pfnCImpl, uint8_t cArgs)
2234
2235{
2236 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
2237
2238#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2239 /* Clear the appropriate IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_XXX flags
2240 when a calls clobber any of the relevatn control registers. */
2241# if 1
2242 if (!(fGstShwFlush & (RT_BIT_64(kIemNativeGstReg_Cr0) | RT_BIT_64(kIemNativeGstReg_Cr4) | RT_BIT_64(kIemNativeGstReg_Xcr0))))
2243 {
2244 /* Likely as long as call+ret are done via cimpl. */
2245 Assert( /*pfnCImpl != (uintptr_t)iemCImpl_mov_Cd_Rd && pfnCImpl != (uintptr_t)iemCImpl_xsetbv
2246 &&*/ pfnCImpl != (uintptr_t)iemCImpl_lmsw && pfnCImpl != (uintptr_t)iemCImpl_clts);
2247 }
2248 else if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_Xcr0))
2249 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
2250 else if (fGstShwFlush & RT_BIT_64(kIemNativeGstReg_Cr4))
2251 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
2252 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE);
2253 else
2254 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
2255 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
2256 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
2257
2258# else
2259 if (pfnCImpl == (uintptr_t)iemCImpl_xsetbv) /* Modifies xcr0 which only the AVX check uses. */
2260 pReNative->fSimdRaiseXcptChecksEmitted &= ~IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX;
2261 else if (pfnCImpl == (uintptr_t)iemCImpl_mov_Cd_Rd) /* Can modify cr4 which all checks use. */
2262 pReNative->fSimdRaiseXcptChecksEmitted = 0;
2263 else if ( pfnCImpl == (uintptr_t)iemCImpl_FarJmp
2264 || pfnCImpl == (uintptr_t)iemCImpl_callf
2265 || pfnCImpl == (uintptr_t)iemCImpl_lmsw
2266 || pfnCImpl == (uintptr_t)iemCImpl_clts) /* Will only modify cr0 */
2267 pReNative->fSimdRaiseXcptChecksEmitted &= ~( IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_AVX
2268 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_SSE
2269 | IEMNATIVE_SIMD_RAISE_XCPT_CHECKS_EMITTED_MAYBE_DEVICE_NOT_AVAILABLE);
2270# endif
2271#endif
2272
2273 /*
2274 * Do all the call setup and cleanup.
2275 */
2276 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_CIMPL_HIDDEN_ARGS, IEM_CIMPL_HIDDEN_ARGS);
2277
2278 /*
2279 * Load the two or three hidden arguments.
2280 */
2281#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
2282 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
2283 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
2284 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr);
2285#else
2286 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
2287 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr);
2288#endif
2289
2290 /*
2291 * Make the call and check the return code.
2292 *
2293 * Shadow PC copies are always flushed here, other stuff depends on flags.
2294 * Segment and general purpose registers are explictily flushed via the
2295 * IEM_MC_HINT_FLUSH_GUEST_SHADOW_GREG and IEM_MC_HINT_FLUSH_GUEST_SHADOW_SREG
2296 * macros.
2297 */
2298 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)pfnCImpl);
2299#if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS) && defined(RT_ARCH_AMD64)
2300 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
2301#endif
2302 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl, fGstShwFlush | RT_BIT_64(kIemNativeGstReg_Pc));
2303 if (!(pReNative->fMc & IEM_MC_F_WITHOUT_FLAGS)) /** @todo We don't emit with-flags/without-flags variations for CIMPL calls. */
2304 fGstShwFlush |= RT_BIT_64(kIemNativeGstReg_EFlags);
2305 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
2306
2307 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
2308}
2309
2310
2311#define IEM_MC_CALL_CIMPL_1_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0) \
2312 off = iemNativeEmitCallCImpl1(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0)
2313
2314/** Emits code for IEM_MC_CALL_CIMPL_1. */
2315DECL_INLINE_THROW(uint32_t)
2316iemNativeEmitCallCImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
2317 uintptr_t pfnCImpl, uint8_t idxArg0)
2318{
2319 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
2320 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 1);
2321}
2322
2323
2324#define IEM_MC_CALL_CIMPL_2_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1) \
2325 off = iemNativeEmitCallCImpl2(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, (uintptr_t)a_pfnCImpl, a0, a1)
2326
2327/** Emits code for IEM_MC_CALL_CIMPL_2. */
2328DECL_INLINE_THROW(uint32_t)
2329iemNativeEmitCallCImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
2330 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1)
2331{
2332 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
2333 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
2334 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 2);
2335}
2336
2337
2338#define IEM_MC_CALL_CIMPL_3_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2) \
2339 off = iemNativeEmitCallCImpl3(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
2340 (uintptr_t)a_pfnCImpl, a0, a1, a2)
2341
2342/** Emits code for IEM_MC_CALL_CIMPL_3. */
2343DECL_INLINE_THROW(uint32_t)
2344iemNativeEmitCallCImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
2345 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
2346{
2347 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
2348 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
2349 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
2350 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 3);
2351}
2352
2353
2354#define IEM_MC_CALL_CIMPL_4_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3) \
2355 off = iemNativeEmitCallCImpl4(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
2356 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3)
2357
2358/** Emits code for IEM_MC_CALL_CIMPL_4. */
2359DECL_INLINE_THROW(uint32_t)
2360iemNativeEmitCallCImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
2361 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
2362{
2363 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
2364 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
2365 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
2366 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
2367 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 4);
2368}
2369
2370
2371#define IEM_MC_CALL_CIMPL_5_THREADED(a_cbInstr, a_fFlags, a_fGstShwFlush, a_pfnCImpl, a0, a1, a2, a3, a4) \
2372 off = iemNativeEmitCallCImpl5(pReNative, off, a_cbInstr, pCallEntry->idxInstr, a_fGstShwFlush, \
2373 (uintptr_t)a_pfnCImpl, a0, a1, a2, a3, a4)
2374
2375/** Emits code for IEM_MC_CALL_CIMPL_4. */
2376DECL_INLINE_THROW(uint32_t)
2377iemNativeEmitCallCImpl5(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr, uint8_t idxInstr, uint64_t fGstShwFlush,
2378 uintptr_t pfnCImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3, uint8_t idxArg4)
2379{
2380 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_CIMPL_HIDDEN_ARGS);
2381 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_CIMPL_HIDDEN_ARGS);
2382 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_CIMPL_HIDDEN_ARGS);
2383 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3 + IEM_CIMPL_HIDDEN_ARGS);
2384 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg4, 4 + IEM_CIMPL_HIDDEN_ARGS);
2385 return iemNativeEmitCallCImplCommon(pReNative, off, cbInstr, idxInstr, fGstShwFlush, pfnCImpl, 5);
2386}
2387
2388
2389/** Recompiler debugging: Flush guest register shadow copies. */
2390#define IEM_MC_HINT_FLUSH_GUEST_SHADOW(g_fGstShwFlush) iemNativeRegFlushGuestShadows(pReNative, g_fGstShwFlush)
2391
2392
2393
2394/*********************************************************************************************************************************
2395* Emitters for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX *
2396*********************************************************************************************************************************/
2397
2398/**
2399 * Common worker for IEM_MC_CALL_VOID_AIMPL_XXX and IEM_MC_CALL_AIMPL_XXX.
2400 */
2401DECL_INLINE_THROW(uint32_t)
2402iemNativeEmitCallAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
2403 uintptr_t pfnAImpl, uint8_t cArgs)
2404{
2405 if (idxVarRc != UINT8_MAX)
2406 {
2407 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRc);
2408 PIEMNATIVEVAR const pVarRc = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarRc)];
2409 AssertStmt(pVarRc->uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_8));
2410 AssertStmt(pVarRc->cbVar <= sizeof(uint64_t), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_9));
2411 }
2412
2413 /*
2414 * Do all the call setup and cleanup.
2415 *
2416 * It is only required to flush pending guest register writes in call volatile registers as
2417 * assembly helpers can't throw and don't access anything living in CPUMCTX, they only
2418 * access parameters. The flushing of call volatile registers is always done in iemNativeEmitCallCommon()
2419 * no matter the fFlushPendingWrites parameter.
2420 */
2421 off = iemNativeEmitCallCommon(pReNative, off, cArgs, 0 /*cHiddenArgs*/, false /*fFlushPendingWrites*/);
2422
2423 /*
2424 * Make the call and update the return code variable if we've got one.
2425 */
2426 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
2427 if (idxVarRc != UINT8_MAX)
2428 iemNativeVarRegisterSet(pReNative, idxVarRc, IEMNATIVE_CALL_RET_GREG, off);
2429
2430 return off;
2431}
2432
2433
2434
2435#define IEM_MC_CALL_VOID_AIMPL_0(a_pfn) \
2436 off = iemNativeEmitCallAImpl0(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn))
2437
2438#define IEM_MC_CALL_AIMPL_0(a_rc, a_pfn) \
2439 off = iemNativeEmitCallAImpl0(pReNative, off, a_rc, (uintptr_t)(a_pfn))
2440
2441/** Emits code for IEM_MC_CALL_VOID_AIMPL_0 and IEM_MC_CALL_AIMPL_0. */
2442DECL_INLINE_THROW(uint32_t)
2443iemNativeEmitCallAImpl0(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl)
2444{
2445 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 0);
2446}
2447
2448
2449#define IEM_MC_CALL_VOID_AIMPL_1(a_pfn, a0) \
2450 off = iemNativeEmitCallAImpl1(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0)
2451
2452#define IEM_MC_CALL_AIMPL_1(a_rc, a_pfn, a0) \
2453 off = iemNativeEmitCallAImpl1(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0)
2454
2455/** Emits code for IEM_MC_CALL_VOID_AIMPL_1 and IEM_MC_CALL_AIMPL_1. */
2456DECL_INLINE_THROW(uint32_t)
2457iemNativeEmitCallAImpl1(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc, uintptr_t pfnAImpl, uint8_t idxArg0)
2458{
2459 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
2460 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 1);
2461}
2462
2463
2464#define IEM_MC_CALL_VOID_AIMPL_2(a_pfn, a0, a1) \
2465 off = iemNativeEmitCallAImpl2(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1)
2466
2467#define IEM_MC_CALL_AIMPL_2(a_rc, a_pfn, a0, a1) \
2468 off = iemNativeEmitCallAImpl2(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1)
2469
2470/** Emits code for IEM_MC_CALL_VOID_AIMPL_2 and IEM_MC_CALL_AIMPL_2. */
2471DECL_INLINE_THROW(uint32_t)
2472iemNativeEmitCallAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
2473 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
2474{
2475 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
2476 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
2477 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 2);
2478}
2479
2480
2481#define IEM_MC_CALL_VOID_AIMPL_3(a_pfn, a0, a1, a2) \
2482 off = iemNativeEmitCallAImpl3(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2)
2483
2484#define IEM_MC_CALL_AIMPL_3(a_rcType, a_rc, a_pfn, a0, a1, a2) \
2485 IEM_MC_LOCAL(a_rcType, a_rc); \
2486 off = iemNativeEmitCallAImpl3(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2)
2487
2488/** Emits code for IEM_MC_CALL_VOID_AIMPL_3 and IEM_MC_CALL_AIMPL_3. */
2489DECL_INLINE_THROW(uint32_t)
2490iemNativeEmitCallAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
2491 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
2492{
2493 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
2494 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
2495 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
2496 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 3);
2497}
2498
2499
2500#define IEM_MC_CALL_VOID_AIMPL_4(a_pfn, a0, a1, a2, a3) \
2501 off = iemNativeEmitCallAImpl4(pReNative, off, UINT8_MAX /*idxVarRc*/, (uintptr_t)(a_pfn), a0, a1, a2, a3)
2502
2503#define IEM_MC_CALL_AIMPL_4(a_rcType, a_rc, a_pfn, a0, a1, a2, a3) \
2504 IEM_MC_LOCAL(a_rcType, a_rc); \
2505 off = iemNativeEmitCallAImpl4(pReNative, off, a_rc, (uintptr_t)(a_pfn), a0, a1, a2, a3)
2506
2507/** Emits code for IEM_MC_CALL_VOID_AIMPL_4 and IEM_MC_CALL_AIMPL_4. */
2508DECL_INLINE_THROW(uint32_t)
2509iemNativeEmitCallAImpl4(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRc,
2510 uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2, uint8_t idxArg3)
2511{
2512 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0);
2513 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1);
2514 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2);
2515 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg3, 3);
2516 return iemNativeEmitCallAImplCommon(pReNative, off, idxVarRc, pfnAImpl, 4);
2517}
2518
2519
2520
2521/*********************************************************************************************************************************
2522* Emitters for general purpose register fetches (IEM_MC_FETCH_GREG_XXX). *
2523*********************************************************************************************************************************/
2524
2525#define IEM_MC_FETCH_GREG_U8_THREADED(a_u8Dst, a_iGRegEx) \
2526 off = iemNativeEmitFetchGregU8(pReNative, off, a_u8Dst, a_iGRegEx, sizeof(uint8_t) /*cbZeroExtended*/)
2527
2528#define IEM_MC_FETCH_GREG_U8_ZX_U16_THREADED(a_u16Dst, a_iGRegEx) \
2529 off = iemNativeEmitFetchGregU8(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t) /*cbZeroExtended*/)
2530
2531#define IEM_MC_FETCH_GREG_U8_ZX_U32_THREADED(a_u32Dst, a_iGRegEx) \
2532 off = iemNativeEmitFetchGregU8(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t) /*cbZeroExtended*/)
2533
2534#define IEM_MC_FETCH_GREG_U8_ZX_U64_THREADED(a_u64Dst, a_iGRegEx) \
2535 off = iemNativeEmitFetchGregU8(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t) /*cbZeroExtended*/)
2536
2537
2538/** Emits code for IEM_MC_FETCH_GREG_U8_THREADED and
2539 * IEM_MC_FETCH_GREG_U8_ZX_U16/32/64_THREADED. */
2540DECL_INLINE_THROW(uint32_t)
2541iemNativeEmitFetchGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, int8_t cbZeroExtended)
2542{
2543 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2544 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
2545 Assert(iGRegEx < 20);
2546
2547 /* Same discussion as in iemNativeEmitFetchGregU16 */
2548 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
2549 kIemNativeGstRegUse_ReadOnly);
2550
2551 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2552 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2553
2554 /* The value is zero-extended to the full 64-bit host register width. */
2555 if (iGRegEx < 16)
2556 off = iemNativeEmitLoadGprFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
2557 else
2558 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
2559
2560 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2561 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2562 return off;
2563}
2564
2565
2566#define IEM_MC_FETCH_GREG_U8_SX_U16_THREADED(a_u16Dst, a_iGRegEx) \
2567 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u16Dst, a_iGRegEx, sizeof(uint16_t))
2568
2569#define IEM_MC_FETCH_GREG_U8_SX_U32_THREADED(a_u32Dst, a_iGRegEx) \
2570 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u32Dst, a_iGRegEx, sizeof(uint32_t))
2571
2572#define IEM_MC_FETCH_GREG_U8_SX_U64_THREADED(a_u64Dst, a_iGRegEx) \
2573 off = iemNativeEmitFetchGregU8Sx(pReNative, off, a_u64Dst, a_iGRegEx, sizeof(uint64_t))
2574
2575/** Emits code for IEM_MC_FETCH_GREG_U8_SX_U16/32/64_THREADED. */
2576DECL_INLINE_THROW(uint32_t)
2577iemNativeEmitFetchGregU8Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegEx, uint8_t cbSignExtended)
2578{
2579 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2580 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
2581 Assert(iGRegEx < 20);
2582
2583 /* Same discussion as in iemNativeEmitFetchGregU16 */
2584 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
2585 kIemNativeGstRegUse_ReadOnly);
2586
2587 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2588 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2589
2590 if (iGRegEx < 16)
2591 {
2592 switch (cbSignExtended)
2593 {
2594 case sizeof(uint16_t):
2595 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
2596 break;
2597 case sizeof(uint32_t):
2598 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
2599 break;
2600 case sizeof(uint64_t):
2601 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxGstFullReg);
2602 break;
2603 default: AssertFailed(); break;
2604 }
2605 }
2606 else
2607 {
2608 off = iemNativeEmitLoadGprFromGpr8Hi(pReNative, off, idxVarReg, idxGstFullReg);
2609 switch (cbSignExtended)
2610 {
2611 case sizeof(uint16_t):
2612 off = iemNativeEmitLoadGpr16SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
2613 break;
2614 case sizeof(uint32_t):
2615 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
2616 break;
2617 case sizeof(uint64_t):
2618 off = iemNativeEmitLoadGprSignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
2619 break;
2620 default: AssertFailed(); break;
2621 }
2622 }
2623
2624 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2625 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2626 return off;
2627}
2628
2629
2630
2631#define IEM_MC_FETCH_GREG_U16(a_u16Dst, a_iGReg) \
2632 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint16_t))
2633
2634#define IEM_MC_FETCH_GREG_U16_ZX_U32(a_u16Dst, a_iGReg) \
2635 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
2636
2637#define IEM_MC_FETCH_GREG_U16_ZX_U64(a_u16Dst, a_iGReg) \
2638 off = iemNativeEmitFetchGregU16(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
2639
2640/** Emits code for IEM_MC_FETCH_GREG_U16 and IEM_MC_FETCH_GREG_U16_ZX_U32/64. */
2641DECL_INLINE_THROW(uint32_t)
2642iemNativeEmitFetchGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
2643{
2644 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2645 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
2646 Assert(iGReg < 16);
2647
2648 /*
2649 * We can either just load the low 16-bit of the GPR into a host register
2650 * for the variable, or we can do so via a shadow copy host register. The
2651 * latter will avoid having to reload it if it's being stored later, but
2652 * will waste a host register if it isn't touched again. Since we don't
2653 * know what going to happen, we choose the latter for now.
2654 */
2655 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
2656 kIemNativeGstRegUse_ReadOnly);
2657
2658 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2659 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2660 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
2661 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2662
2663 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2664 return off;
2665}
2666
2667
2668#define IEM_MC_FETCH_GREG_U16_SX_U32(a_u16Dst, a_iGReg) \
2669 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint32_t))
2670
2671#define IEM_MC_FETCH_GREG_U16_SX_U64(a_u16Dst, a_iGReg) \
2672 off = iemNativeEmitFetchGregU16Sx(pReNative, off, a_u16Dst, a_iGReg, sizeof(uint64_t))
2673
2674/** Emits code for IEM_MC_FETCH_GREG_U16_SX_U32/64. */
2675DECL_INLINE_THROW(uint32_t)
2676iemNativeEmitFetchGregU16Sx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbSignExtended)
2677{
2678 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2679 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbSignExtended);
2680 Assert(iGReg < 16);
2681
2682 /*
2683 * We can either just load the low 16-bit of the GPR into a host register
2684 * for the variable, or we can do so via a shadow copy host register. The
2685 * latter will avoid having to reload it if it's being stored later, but
2686 * will waste a host register if it isn't touched again. Since we don't
2687 * know what going to happen, we choose the latter for now.
2688 */
2689 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
2690 kIemNativeGstRegUse_ReadOnly);
2691
2692 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2693 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2694 if (cbSignExtended == sizeof(uint32_t))
2695 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
2696 else
2697 {
2698 Assert(cbSignExtended == sizeof(uint64_t));
2699 off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxVarReg, idxGstFullReg);
2700 }
2701 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2702
2703 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2704 return off;
2705}
2706
2707
2708#define IEM_MC_FETCH_GREG_U32(a_u32Dst, a_iGReg) \
2709 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint32_t))
2710
2711#define IEM_MC_FETCH_GREG_U32_ZX_U64(a_u32Dst, a_iGReg) \
2712 off = iemNativeEmitFetchGregU32(pReNative, off, a_u32Dst, a_iGReg, sizeof(uint64_t))
2713
2714/** Emits code for IEM_MC_FETCH_GREG_U32. */
2715DECL_INLINE_THROW(uint32_t)
2716iemNativeEmitFetchGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg, uint8_t cbZeroExtended)
2717{
2718 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2719 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbZeroExtended); RT_NOREF(cbZeroExtended);
2720 Assert(iGReg < 16);
2721
2722 /*
2723 * We can either just load the low 16-bit of the GPR into a host register
2724 * for the variable, or we can do so via a shadow copy host register. The
2725 * latter will avoid having to reload it if it's being stored later, but
2726 * will waste a host register if it isn't touched again. Since we don't
2727 * know what going to happen, we choose the latter for now.
2728 */
2729 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
2730 kIemNativeGstRegUse_ReadOnly);
2731
2732 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2733 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2734 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
2735 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2736
2737 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2738 return off;
2739}
2740
2741
2742#define IEM_MC_FETCH_GREG_U32_SX_U64(a_u32Dst, a_iGReg) \
2743 off = iemNativeEmitFetchGregU32SxU64(pReNative, off, a_u32Dst, a_iGReg)
2744
2745/** Emits code for IEM_MC_FETCH_GREG_U32. */
2746DECL_INLINE_THROW(uint32_t)
2747iemNativeEmitFetchGregU32SxU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
2748{
2749 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2750 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
2751 Assert(iGReg < 16);
2752
2753 /*
2754 * We can either just load the low 32-bit of the GPR into a host register
2755 * for the variable, or we can do so via a shadow copy host register. The
2756 * latter will avoid having to reload it if it's being stored later, but
2757 * will waste a host register if it isn't touched again. Since we don't
2758 * know what going to happen, we choose the latter for now.
2759 */
2760 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
2761 kIemNativeGstRegUse_ReadOnly);
2762
2763 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2764 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2765 off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxVarReg, idxGstFullReg);
2766 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2767
2768 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2769 return off;
2770}
2771
2772
2773#define IEM_MC_FETCH_GREG_U64(a_u64Dst, a_iGReg) \
2774 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
2775
2776#define IEM_MC_FETCH_GREG_U64_ZX_U64(a_u64Dst, a_iGReg) \
2777 off = iemNativeEmitFetchGregU64(pReNative, off, a_u64Dst, a_iGReg)
2778
2779/** Emits code for IEM_MC_FETCH_GREG_U64 (and the
2780 * IEM_MC_FETCH_GREG_U64_ZX_U64 alias). */
2781DECL_INLINE_THROW(uint32_t)
2782iemNativeEmitFetchGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGReg)
2783{
2784 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2785 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
2786 Assert(iGReg < 16);
2787
2788 uint8_t const idxGstFullReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
2789 kIemNativeGstRegUse_ReadOnly);
2790
2791 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2792 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
2793 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxVarReg, idxGstFullReg);
2794 /** @todo name the register a shadow one already? */
2795 iemNativeVarRegisterRelease(pReNative, idxDstVar);
2796
2797 iemNativeRegFreeTmp(pReNative, idxGstFullReg);
2798 return off;
2799}
2800
2801
2802#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2803#define IEM_MC_FETCH_GREG_PAIR_U64(a_u128Dst, a_iGRegLo, a_iGRegHi) \
2804 off = iemNativeEmitFetchGregPairU64(pReNative, off, a_u128Dst, a_iGRegLo, a_iGRegHi)
2805
2806/** Emits code for IEM_MC_FETCH_GREG_PAIR_U64. */
2807DECL_INLINE_THROW(uint32_t)
2808iemNativeEmitFetchGregPairU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iGRegLo, uint8_t iGRegHi)
2809{
2810 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
2811 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
2812 Assert(iGRegLo < 16 && iGRegHi < 16);
2813
2814 uint8_t const idxGstFullRegLo = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegLo),
2815 kIemNativeGstRegUse_ReadOnly);
2816 uint8_t const idxGstFullRegHi = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegHi),
2817 kIemNativeGstRegUse_ReadOnly);
2818
2819 iemNativeVarSetKindToStack(pReNative, idxDstVar);
2820 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
2821 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxVarReg, idxGstFullRegLo, 0);
2822 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxVarReg, idxGstFullRegHi, 1);
2823
2824 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
2825 iemNativeRegFreeTmp(pReNative, idxGstFullRegLo);
2826 iemNativeRegFreeTmp(pReNative, idxGstFullRegHi);
2827 return off;
2828}
2829#endif
2830
2831
2832/*********************************************************************************************************************************
2833* Emitters for general purpose register stores (IEM_MC_STORE_GREG_XXX). *
2834*********************************************************************************************************************************/
2835
2836#define IEM_MC_STORE_GREG_U8_CONST_THREADED(a_iGRegEx, a_u8Value) \
2837 off = iemNativeEmitStoreGregU8Const(pReNative, off, a_iGRegEx, a_u8Value)
2838
2839/** Emits code for IEM_MC_STORE_GREG_U8_CONST_THREADED. */
2840DECL_INLINE_THROW(uint32_t)
2841iemNativeEmitStoreGregU8Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t u8Value)
2842{
2843 Assert(iGRegEx < 20);
2844 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
2845 kIemNativeGstRegUse_ForUpdate);
2846#ifdef RT_ARCH_AMD64
2847 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
2848
2849 /* To the lowest byte of the register: mov r8, imm8 */
2850 if (iGRegEx < 16)
2851 {
2852 if (idxGstTmpReg >= 8)
2853 pbCodeBuf[off++] = X86_OP_REX_B;
2854 else if (idxGstTmpReg >= 4)
2855 pbCodeBuf[off++] = X86_OP_REX;
2856 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
2857 pbCodeBuf[off++] = u8Value;
2858 }
2859 /* Otherwise it's to ah, ch, dh or bh: use mov r8, imm8 if we can, otherwise, we rotate. */
2860 else if (idxGstTmpReg < 4)
2861 {
2862 pbCodeBuf[off++] = 0xb4 + idxGstTmpReg;
2863 pbCodeBuf[off++] = u8Value;
2864 }
2865 else
2866 {
2867 /* ror reg64, 8 */
2868 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
2869 pbCodeBuf[off++] = 0xc1;
2870 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
2871 pbCodeBuf[off++] = 8;
2872
2873 /* mov reg8, imm8 */
2874 if (idxGstTmpReg >= 8)
2875 pbCodeBuf[off++] = X86_OP_REX_B;
2876 else if (idxGstTmpReg >= 4)
2877 pbCodeBuf[off++] = X86_OP_REX;
2878 pbCodeBuf[off++] = 0xb0 + (idxGstTmpReg & 7);
2879 pbCodeBuf[off++] = u8Value;
2880
2881 /* rol reg64, 8 */
2882 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
2883 pbCodeBuf[off++] = 0xc1;
2884 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
2885 pbCodeBuf[off++] = 8;
2886 }
2887
2888#elif defined(RT_ARCH_ARM64)
2889 uint8_t const idxImmReg = iemNativeRegAllocTmpImm(pReNative, &off, u8Value);
2890 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
2891 if (iGRegEx < 16)
2892 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 7:0. */
2893 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 0, 8);
2894 else
2895 /* bfi w1, w2, 8, 8 - moves bits 7:0 from idxImmReg to idxGstTmpReg bits 15:8. */
2896 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxImmReg, 8, 8);
2897 iemNativeRegFreeTmp(pReNative, idxImmReg);
2898
2899#else
2900# error "Port me!"
2901#endif
2902
2903 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
2904
2905#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
2906 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
2907#endif
2908
2909 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
2910 return off;
2911}
2912
2913
2914#define IEM_MC_STORE_GREG_U8_THREADED(a_iGRegEx, a_u8Value) \
2915 off = iemNativeEmitStoreGregU8(pReNative, off, a_iGRegEx, a_u8Value)
2916
2917/** Emits code for IEM_MC_STORE_GREG_U8_THREADED. */
2918DECL_INLINE_THROW(uint32_t)
2919iemNativeEmitStoreGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegEx, uint8_t idxValueVar)
2920{
2921 Assert(iGRegEx < 20);
2922 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
2923
2924 /*
2925 * If it's a constant value (unlikely) we treat this as a
2926 * IEM_MC_STORE_GREG_U8_CONST statement.
2927 */
2928 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
2929 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
2930 { /* likely */ }
2931 else
2932 {
2933 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
2934 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
2935 return iemNativeEmitStoreGregU8Const(pReNative, off, iGRegEx, (uint8_t)pValueVar->u.uValue);
2936 }
2937
2938 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegEx & 15),
2939 kIemNativeGstRegUse_ForUpdate);
2940 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
2941
2942#ifdef RT_ARCH_AMD64
2943 /* To the lowest byte of the register: mov reg8, reg8(r/m) */
2944 if (iGRegEx < 16)
2945 {
2946 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
2947 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
2948 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
2949 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
2950 pbCodeBuf[off++] = X86_OP_REX;
2951 pbCodeBuf[off++] = 0x8a;
2952 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
2953 }
2954 /* Otherwise it's to ah, ch, dh or bh from al, cl, dl or bl: use mov r8, r8 if we can, otherwise, we rotate. */
2955 else if (idxGstTmpReg < 4 && idxVarReg < 4)
2956 {
2957 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2+1);
2958 pbCodeBuf[off++] = 0x8a;
2959 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg + 4, idxVarReg);
2960 }
2961 else
2962 {
2963 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 15);
2964
2965 /* ror reg64, 8 */
2966 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
2967 pbCodeBuf[off++] = 0xc1;
2968 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
2969 pbCodeBuf[off++] = 8;
2970
2971 /* mov reg8, reg8(r/m) */
2972 if (idxGstTmpReg >= 8 || idxVarReg >= 8)
2973 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0) | (idxVarReg >= 8 ? X86_OP_REX_B : 0);
2974 else if (idxGstTmpReg >= 4 || idxVarReg >= 4)
2975 pbCodeBuf[off++] = X86_OP_REX;
2976 pbCodeBuf[off++] = 0x8a;
2977 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, idxVarReg & 7);
2978
2979 /* rol reg64, 8 */
2980 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg < 8 ? 0 : X86_OP_REX_B);
2981 pbCodeBuf[off++] = 0xc1;
2982 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
2983 pbCodeBuf[off++] = 8;
2984 }
2985
2986#elif defined(RT_ARCH_ARM64)
2987 /* bfi w1, w2, 0, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 7:0.
2988 or
2989 bfi w1, w2, 8, 8 - moves bits 7:0 from idxVarReg to idxGstTmpReg bits 15:8. */
2990 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
2991 if (iGRegEx < 16)
2992 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 8);
2993 else
2994 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 8, 8);
2995
2996#else
2997# error "Port me!"
2998#endif
2999 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3000
3001 iemNativeVarRegisterRelease(pReNative, idxValueVar);
3002
3003#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3004 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGRegEx & 15]));
3005#endif
3006 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3007 return off;
3008}
3009
3010
3011
3012#define IEM_MC_STORE_GREG_U16_CONST(a_iGReg, a_u16Const) \
3013 off = iemNativeEmitStoreGregU16Const(pReNative, off, a_iGReg, a_u16Const)
3014
3015/** Emits code for IEM_MC_STORE_GREG_U16. */
3016DECL_INLINE_THROW(uint32_t)
3017iemNativeEmitStoreGregU16Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint16_t uValue)
3018{
3019 Assert(iGReg < 16);
3020 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3021 kIemNativeGstRegUse_ForUpdate);
3022#ifdef RT_ARCH_AMD64
3023 /* mov reg16, imm16 */
3024 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
3025 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3026 if (idxGstTmpReg >= 8)
3027 pbCodeBuf[off++] = X86_OP_REX_B;
3028 pbCodeBuf[off++] = 0xb8 + (idxGstTmpReg & 7);
3029 pbCodeBuf[off++] = RT_BYTE1(uValue);
3030 pbCodeBuf[off++] = RT_BYTE2(uValue);
3031
3032#elif defined(RT_ARCH_ARM64)
3033 /* movk xdst, #uValue, lsl #0 */
3034 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3035 pu32CodeBuf[off++] = Armv8A64MkInstrMovK(idxGstTmpReg, uValue);
3036
3037#else
3038# error "Port me!"
3039#endif
3040
3041 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3042
3043#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3044 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3045#endif
3046 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3047 return off;
3048}
3049
3050
3051#define IEM_MC_STORE_GREG_U16(a_iGReg, a_u16Value) \
3052 off = iemNativeEmitStoreGregU16(pReNative, off, a_iGReg, a_u16Value)
3053
3054/** Emits code for IEM_MC_STORE_GREG_U16. */
3055DECL_INLINE_THROW(uint32_t)
3056iemNativeEmitStoreGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
3057{
3058 Assert(iGReg < 16);
3059 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
3060
3061 /*
3062 * If it's a constant value (unlikely) we treat this as a
3063 * IEM_MC_STORE_GREG_U16_CONST statement.
3064 */
3065 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
3066 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
3067 { /* likely */ }
3068 else
3069 {
3070 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
3071 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
3072 return iemNativeEmitStoreGregU16Const(pReNative, off, iGReg, (uint16_t)pValueVar->u.uValue);
3073 }
3074
3075 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3076 kIemNativeGstRegUse_ForUpdate);
3077
3078#ifdef RT_ARCH_AMD64
3079 /* mov reg16, reg16 or [mem16] */
3080 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 12);
3081 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3082 if (pValueVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
3083 {
3084 if (idxGstTmpReg >= 8 || pValueVar->idxReg >= 8)
3085 pbCodeBuf[off++] = (idxGstTmpReg >= 8 ? X86_OP_REX_R : 0)
3086 | (pValueVar->idxReg >= 8 ? X86_OP_REX_B : 0);
3087 pbCodeBuf[off++] = 0x8b;
3088 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxGstTmpReg & 7, pValueVar->idxReg & 7);
3089 }
3090 else
3091 {
3092 uint8_t const idxStackSlot = pValueVar->idxStackSlot;
3093 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
3094 if (idxGstTmpReg >= 8)
3095 pbCodeBuf[off++] = X86_OP_REX_R;
3096 pbCodeBuf[off++] = 0x8b;
3097 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, idxGstTmpReg, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
3098 }
3099
3100#elif defined(RT_ARCH_ARM64)
3101 /* bfi w1, w2, 0, 16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
3102 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxValueVar, &off, true /*fInitialized*/);
3103 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3104 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxVarReg, 0, 16);
3105 iemNativeVarRegisterRelease(pReNative, idxValueVar);
3106
3107#else
3108# error "Port me!"
3109#endif
3110
3111 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3112
3113#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3114 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3115#endif
3116 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3117 return off;
3118}
3119
3120
3121#define IEM_MC_STORE_GREG_U32_CONST(a_iGReg, a_u32Const) \
3122 off = iemNativeEmitStoreGregU32Const(pReNative, off, a_iGReg, a_u32Const)
3123
3124/** Emits code for IEM_MC_STORE_GREG_U32_CONST. */
3125DECL_INLINE_THROW(uint32_t)
3126iemNativeEmitStoreGregU32Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint32_t uValue)
3127{
3128 Assert(iGReg < 16);
3129 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3130 kIemNativeGstRegUse_ForFullWrite);
3131 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
3132#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3133 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3134#endif
3135 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3136 return off;
3137}
3138
3139
3140#define IEM_MC_STORE_GREG_U32(a_iGReg, a_u32Value) \
3141 off = iemNativeEmitStoreGregU32(pReNative, off, a_iGReg, a_u32Value)
3142
3143/** Emits code for IEM_MC_STORE_GREG_U32. */
3144DECL_INLINE_THROW(uint32_t)
3145iemNativeEmitStoreGregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
3146{
3147 Assert(iGReg < 16);
3148 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
3149
3150 /*
3151 * If it's a constant value (unlikely) we treat this as a
3152 * IEM_MC_STORE_GREG_U32_CONST statement.
3153 */
3154 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
3155 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
3156 { /* likely */ }
3157 else
3158 {
3159 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
3160 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
3161 return iemNativeEmitStoreGregU32Const(pReNative, off, iGReg, (uint32_t)pValueVar->u.uValue);
3162 }
3163
3164 /*
3165 * For the rest we allocate a guest register for the variable and writes
3166 * it to the CPUMCTX structure.
3167 */
3168 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
3169#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3170 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3171#else
3172 RT_NOREF(idxVarReg);
3173#endif
3174#ifdef VBOX_STRICT
3175 off = iemNativeEmitTop32BitsClearCheck(pReNative, off, idxVarReg);
3176#endif
3177 iemNativeVarRegisterRelease(pReNative, idxValueVar);
3178 return off;
3179}
3180
3181
3182#define IEM_MC_STORE_GREG_U64_CONST(a_iGReg, a_u64Const) \
3183 off = iemNativeEmitStoreGregU64Const(pReNative, off, a_iGReg, a_u64Const)
3184
3185/** Emits code for IEM_MC_STORE_GREG_U64_CONST. */
3186DECL_INLINE_THROW(uint32_t)
3187iemNativeEmitStoreGregU64Const(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uValue)
3188{
3189 Assert(iGReg < 16);
3190 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3191 kIemNativeGstRegUse_ForFullWrite);
3192 off = iemNativeEmitLoadGprImm64(pReNative, off, idxGstTmpReg, uValue);
3193#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3194 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3195#endif
3196 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3197 return off;
3198}
3199
3200
3201#define IEM_MC_STORE_GREG_U64(a_iGReg, a_u64Value) \
3202 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_u64Value)
3203
3204#define IEM_MC_STORE_GREG_I64(a_iGReg, a_i64Value) \
3205 off = iemNativeEmitStoreGregU64(pReNative, off, a_iGReg, a_i64Value)
3206
3207/** Emits code for IEM_MC_STORE_GREG_U64. */
3208DECL_INLINE_THROW(uint32_t)
3209iemNativeEmitStoreGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t idxValueVar)
3210{
3211 Assert(iGReg < 16);
3212 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxValueVar);
3213
3214 /*
3215 * If it's a constant value (unlikely) we treat this as a
3216 * IEM_MC_STORE_GREG_U64_CONST statement.
3217 */
3218 PIEMNATIVEVAR const pValueVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxValueVar)];
3219 if (pValueVar->enmKind == kIemNativeVarKind_Stack)
3220 { /* likely */ }
3221 else
3222 {
3223 AssertStmt(pValueVar->enmKind == kIemNativeVarKind_Immediate,
3224 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
3225 return iemNativeEmitStoreGregU64Const(pReNative, off, iGReg, pValueVar->u.uValue);
3226 }
3227
3228 /*
3229 * For the rest we allocate a guest register for the variable and writes
3230 * it to the CPUMCTX structure.
3231 */
3232 uint8_t const idxVarReg = iemNativeVarRegisterAcquireForGuestReg(pReNative, idxValueVar, IEMNATIVEGSTREG_GPR(iGReg), &off);
3233#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3234 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3235#else
3236 RT_NOREF(idxVarReg);
3237#endif
3238 iemNativeVarRegisterRelease(pReNative, idxValueVar);
3239 return off;
3240}
3241
3242
3243#define IEM_MC_CLEAR_HIGH_GREG_U64(a_iGReg) \
3244 off = iemNativeEmitClearHighGregU64(pReNative, off, a_iGReg)
3245
3246/** Emits code for IEM_MC_CLEAR_HIGH_GREG_U64. */
3247DECL_INLINE_THROW(uint32_t)
3248iemNativeEmitClearHighGregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg)
3249{
3250 Assert(iGReg < 16);
3251 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3252 kIemNativeGstRegUse_ForUpdate);
3253 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxGstTmpReg, idxGstTmpReg);
3254#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3255 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3256#endif
3257 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3258 return off;
3259}
3260
3261
3262#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3263#define IEM_MC_STORE_GREG_PAIR_U64(a_iGRegLo, a_iGRegHi, a_u128Value) \
3264 off = iemNativeEmitStoreGregPairU64(pReNative, off, a_iGRegLo, a_iGRegHi, a_u128Value)
3265
3266/** Emits code for IEM_MC_FETCH_GREG_PAIR_U64. */
3267DECL_INLINE_THROW(uint32_t)
3268iemNativeEmitStoreGregPairU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGRegLo, uint8_t iGRegHi, uint8_t idxDstVar)
3269{
3270 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
3271 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
3272 Assert(iGRegLo < 16 && iGRegHi < 16);
3273
3274 uint8_t const idxGstFullRegLo = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegLo),
3275 kIemNativeGstRegUse_ForFullWrite);
3276 uint8_t const idxGstFullRegHi = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGRegHi),
3277 kIemNativeGstRegUse_ForFullWrite);
3278
3279 iemNativeVarSetKindToStack(pReNative, idxDstVar);
3280 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
3281 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxGstFullRegLo, idxVarReg, 0);
3282 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxGstFullRegHi, idxVarReg, 1);
3283
3284 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
3285 iemNativeRegFreeTmp(pReNative, idxGstFullRegLo);
3286 iemNativeRegFreeTmp(pReNative, idxGstFullRegHi);
3287 return off;
3288}
3289#endif
3290
3291
3292/*********************************************************************************************************************************
3293* General purpose register manipulation (add, sub). *
3294*********************************************************************************************************************************/
3295
3296#define IEM_MC_ADD_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
3297 off = iemNativeEmitAddGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
3298
3299/** Emits code for IEM_MC_ADD_GREG_U16. */
3300DECL_INLINE_THROW(uint32_t)
3301iemNativeEmitAddGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend)
3302{
3303 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3304 kIemNativeGstRegUse_ForUpdate);
3305
3306#ifdef RT_ARCH_AMD64
3307 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
3308 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3309 if (idxGstTmpReg >= 8)
3310 pbCodeBuf[off++] = X86_OP_REX_B;
3311 if (uAddend == 1)
3312 {
3313 pbCodeBuf[off++] = 0xff; /* inc */
3314 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
3315 }
3316 else
3317 {
3318 pbCodeBuf[off++] = 0x81;
3319 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
3320 pbCodeBuf[off++] = uAddend;
3321 pbCodeBuf[off++] = 0;
3322 }
3323
3324#else
3325 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3326 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
3327
3328 /* sub tmp, gstgrp, uAddend */
3329 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxTmpReg, idxGstTmpReg, uAddend, false /*f64Bit*/);
3330
3331 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
3332 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
3333
3334 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3335#endif
3336
3337 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3338
3339#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3340 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3341#endif
3342
3343 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3344 return off;
3345}
3346
3347
3348#define IEM_MC_ADD_GREG_U32(a_iGReg, a_u8Const) \
3349 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
3350
3351#define IEM_MC_ADD_GREG_U64(a_iGReg, a_u8Const) \
3352 off = iemNativeEmitAddGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
3353
3354/** Emits code for IEM_MC_ADD_GREG_U32 and IEM_MC_ADD_GREG_U64. */
3355DECL_INLINE_THROW(uint32_t)
3356iemNativeEmitAddGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uAddend, bool f64Bit)
3357{
3358 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3359 kIemNativeGstRegUse_ForUpdate);
3360
3361#ifdef RT_ARCH_AMD64
3362 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
3363 if (f64Bit)
3364 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
3365 else if (idxGstTmpReg >= 8)
3366 pbCodeBuf[off++] = X86_OP_REX_B;
3367 if (uAddend == 1)
3368 {
3369 pbCodeBuf[off++] = 0xff; /* inc */
3370 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
3371 }
3372 else if (uAddend < 128)
3373 {
3374 pbCodeBuf[off++] = 0x83; /* add */
3375 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
3376 pbCodeBuf[off++] = RT_BYTE1(uAddend);
3377 }
3378 else
3379 {
3380 pbCodeBuf[off++] = 0x81; /* add */
3381 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxGstTmpReg & 7);
3382 pbCodeBuf[off++] = RT_BYTE1(uAddend);
3383 pbCodeBuf[off++] = 0;
3384 pbCodeBuf[off++] = 0;
3385 pbCodeBuf[off++] = 0;
3386 }
3387
3388#else
3389 /* sub tmp, gstgrp, uAddend */
3390 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3391 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGstTmpReg, idxGstTmpReg, uAddend, f64Bit);
3392
3393#endif
3394
3395 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3396
3397#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3398 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3399#endif
3400
3401 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3402 return off;
3403}
3404
3405
3406
3407#define IEM_MC_SUB_GREG_U16(a_iGReg, a_u8SubtrahendConst) \
3408 off = iemNativeEmitSubGregU16(pReNative, off, a_iGReg, a_u8SubtrahendConst)
3409
3410/** Emits code for IEM_MC_SUB_GREG_U16. */
3411DECL_INLINE_THROW(uint32_t)
3412iemNativeEmitSubGregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend)
3413{
3414 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3415 kIemNativeGstRegUse_ForUpdate);
3416
3417#ifdef RT_ARCH_AMD64
3418 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
3419 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
3420 if (idxGstTmpReg >= 8)
3421 pbCodeBuf[off++] = X86_OP_REX_B;
3422 if (uSubtrahend == 1)
3423 {
3424 pbCodeBuf[off++] = 0xff; /* dec */
3425 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
3426 }
3427 else
3428 {
3429 pbCodeBuf[off++] = 0x81;
3430 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
3431 pbCodeBuf[off++] = uSubtrahend;
3432 pbCodeBuf[off++] = 0;
3433 }
3434
3435#else
3436 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
3437 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
3438
3439 /* sub tmp, gstgrp, uSubtrahend */
3440 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxTmpReg, idxGstTmpReg, uSubtrahend, false /*f64Bit*/);
3441
3442 /* bfi w1, w2, 0, 16 - moves bits 15:0 from tmpreg2 to tmpreg. */
3443 pu32CodeBuf[off++] = Armv8A64MkInstrBfi(idxGstTmpReg, idxTmpReg, 0, 16);
3444
3445 iemNativeRegFreeTmp(pReNative, idxTmpReg);
3446#endif
3447
3448 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3449
3450#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3451 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3452#endif
3453
3454 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3455 return off;
3456}
3457
3458
3459#define IEM_MC_SUB_GREG_U32(a_iGReg, a_u8Const) \
3460 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, false /*f64Bit*/)
3461
3462#define IEM_MC_SUB_GREG_U64(a_iGReg, a_u8Const) \
3463 off = iemNativeEmitSubGregU32U64(pReNative, off, a_iGReg, a_u8Const, true /*f64Bit*/)
3464
3465/** Emits code for IEM_MC_SUB_GREG_U32 and IEM_MC_SUB_GREG_U64. */
3466DECL_INLINE_THROW(uint32_t)
3467iemNativeEmitSubGregU32U64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint8_t uSubtrahend, bool f64Bit)
3468{
3469 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3470 kIemNativeGstRegUse_ForUpdate);
3471
3472#ifdef RT_ARCH_AMD64
3473 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
3474 if (f64Bit)
3475 pbCodeBuf[off++] = X86_OP_REX_W | (idxGstTmpReg >= 8 ? X86_OP_REX_B : 0);
3476 else if (idxGstTmpReg >= 8)
3477 pbCodeBuf[off++] = X86_OP_REX_B;
3478 if (uSubtrahend == 1)
3479 {
3480 pbCodeBuf[off++] = 0xff; /* dec */
3481 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 1, idxGstTmpReg & 7);
3482 }
3483 else if (uSubtrahend < 128)
3484 {
3485 pbCodeBuf[off++] = 0x83; /* sub */
3486 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
3487 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
3488 }
3489 else
3490 {
3491 pbCodeBuf[off++] = 0x81; /* sub */
3492 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 5, idxGstTmpReg & 7);
3493 pbCodeBuf[off++] = RT_BYTE1(uSubtrahend);
3494 pbCodeBuf[off++] = 0;
3495 pbCodeBuf[off++] = 0;
3496 pbCodeBuf[off++] = 0;
3497 }
3498
3499#else
3500 /* sub tmp, gstgrp, uSubtrahend */
3501 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
3502 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxGstTmpReg, idxGstTmpReg, uSubtrahend, f64Bit);
3503
3504#endif
3505
3506 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3507
3508#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3509 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3510#endif
3511
3512 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3513 return off;
3514}
3515
3516
3517#define IEM_MC_AND_GREG_U8(a_iGReg, a_u8Mask) \
3518 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u8Mask, sizeof(uint8_t))
3519
3520#define IEM_MC_AND_GREG_U16(a_iGReg, a_u16Mask) \
3521 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u16Mask, sizeof(uint16_t))
3522
3523#define IEM_MC_AND_GREG_U32(a_iGReg, a_u32Mask) \
3524 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u32Mask, sizeof(uint32_t))
3525
3526#define IEM_MC_AND_GREG_U64(a_iGReg, a_u64Mask) \
3527 off = iemNativeEmitAndGReg(pReNative, off, a_iGReg, a_u64Mask, sizeof(uint64_t))
3528
3529/** Emits code for IEM_MC_AND_GREG_U8, IEM_MC_AND_GREG_U16, IEM_MC_AND_GREG_U32 and IEM_MC_AND_GREG_U64. */
3530DECL_INLINE_THROW(uint32_t)
3531iemNativeEmitAndGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uMask, uint8_t cbMask)
3532{
3533#ifdef VBOX_STRICT
3534 switch (cbMask)
3535 {
3536 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
3537 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
3538 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
3539 case sizeof(uint64_t): break;
3540 default: AssertFailedBreak();
3541 }
3542#endif
3543
3544 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3545 kIemNativeGstRegUse_ForUpdate);
3546
3547 switch (cbMask)
3548 {
3549 case sizeof(uint8_t): /* Leaves the higher bits untouched. */
3550 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask | UINT64_C(0xffffffffffffff00));
3551 break;
3552 case sizeof(uint16_t): /* Leaves the higher bits untouched. */
3553 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask | UINT64_C(0xffffffffffff0000));
3554 break;
3555 case sizeof(uint32_t): /* Zeroes the high 32 bits of the guest register. */
3556 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxGstTmpReg, uMask);
3557 break;
3558 case sizeof(uint64_t):
3559 off = iemNativeEmitAndGprByImm(pReNative, off, idxGstTmpReg, uMask);
3560 break;
3561 default: AssertFailedBreak();
3562 }
3563
3564 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3565
3566#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3567 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3568#endif
3569
3570 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3571 return off;
3572}
3573
3574
3575#define IEM_MC_OR_GREG_U8(a_iGReg, a_u8Mask) \
3576 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u8Mask, sizeof(uint8_t))
3577
3578#define IEM_MC_OR_GREG_U16(a_iGReg, a_u16Mask) \
3579 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u16Mask, sizeof(uint16_t))
3580
3581#define IEM_MC_OR_GREG_U32(a_iGReg, a_u32Mask) \
3582 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u32Mask, sizeof(uint32_t))
3583
3584#define IEM_MC_OR_GREG_U64(a_iGReg, a_u64Mask) \
3585 off = iemNativeEmitOrGReg(pReNative, off, a_iGReg, a_u64Mask, sizeof(uint64_t))
3586
3587/** Emits code for IEM_MC_OR_GREG_U8, IEM_MC_OR_GREG_U16, IEM_MC_OR_GREG_U32 and IEM_MC_OR_GREG_U64. */
3588DECL_INLINE_THROW(uint32_t)
3589iemNativeEmitOrGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iGReg, uint64_t uMask, uint8_t cbMask)
3590{
3591#ifdef VBOX_STRICT
3592 switch (cbMask)
3593 {
3594 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
3595 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
3596 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
3597 case sizeof(uint64_t): break;
3598 default: AssertFailedBreak();
3599 }
3600#endif
3601
3602 uint8_t const idxGstTmpReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(iGReg),
3603 kIemNativeGstRegUse_ForUpdate);
3604
3605 switch (cbMask)
3606 {
3607 case sizeof(uint8_t): /* Leaves the higher bits untouched. */
3608 case sizeof(uint16_t):
3609 case sizeof(uint64_t):
3610 off = iemNativeEmitOrGprByImm(pReNative, off, idxGstTmpReg, uMask);
3611 break;
3612 case sizeof(uint32_t): /* Zeroes the high 32 bits of the guest register. */
3613 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxGstTmpReg, uMask);
3614 break;
3615 default: AssertFailedBreak();
3616 }
3617
3618 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
3619
3620#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
3621 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxGstTmpReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[iGReg]));
3622#endif
3623
3624 iemNativeRegFreeTmp(pReNative, idxGstTmpReg);
3625 return off;
3626}
3627
3628
3629/*********************************************************************************************************************************
3630* Local/Argument variable manipulation (add, sub, and, or). *
3631*********************************************************************************************************************************/
3632
3633#define IEM_MC_AND_LOCAL_U8(a_u8Local, a_u8Mask) \
3634 off = iemNativeEmitAndLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
3635
3636#define IEM_MC_AND_LOCAL_U16(a_u16Local, a_u16Mask) \
3637 off = iemNativeEmitAndLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
3638
3639#define IEM_MC_AND_LOCAL_U32(a_u32Local, a_u32Mask) \
3640 off = iemNativeEmitAndLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
3641
3642#define IEM_MC_AND_LOCAL_U64(a_u64Local, a_u64Mask) \
3643 off = iemNativeEmitAndLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
3644
3645
3646#define IEM_MC_AND_ARG_U16(a_u16Arg, a_u16Mask) \
3647 off = iemNativeEmitAndLocal(pReNative, off, a_u16Arg, a_u16Mask, sizeof(uint16_t))
3648
3649#define IEM_MC_AND_ARG_U32(a_u32Arg, a_u32Mask) \
3650 off = iemNativeEmitAndLocal(pReNative, off, a_u32Arg, a_u32Mask, sizeof(uint32_t))
3651
3652#define IEM_MC_AND_ARG_U64(a_u64Arg, a_u64Mask) \
3653 off = iemNativeEmitAndLocal(pReNative, off, a_u64Arg, a_u64Mask, sizeof(uint64_t))
3654
3655/** Emits code for AND'ing a local and a constant value. */
3656DECL_INLINE_THROW(uint32_t)
3657iemNativeEmitAndLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
3658{
3659#ifdef VBOX_STRICT
3660 switch (cbMask)
3661 {
3662 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
3663 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
3664 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
3665 case sizeof(uint64_t): break;
3666 default: AssertFailedBreak();
3667 }
3668#endif
3669
3670 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
3671 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
3672
3673 if (cbMask <= sizeof(uint32_t))
3674 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg, uMask);
3675 else
3676 off = iemNativeEmitAndGprByImm(pReNative, off, idxVarReg, uMask);
3677
3678 iemNativeVarRegisterRelease(pReNative, idxVar);
3679 return off;
3680}
3681
3682
3683#define IEM_MC_OR_LOCAL_U8(a_u8Local, a_u8Mask) \
3684 off = iemNativeEmitOrLocal(pReNative, off, a_u8Local, a_u8Mask, sizeof(uint8_t))
3685
3686#define IEM_MC_OR_LOCAL_U16(a_u16Local, a_u16Mask) \
3687 off = iemNativeEmitOrLocal(pReNative, off, a_u16Local, a_u16Mask, sizeof(uint16_t))
3688
3689#define IEM_MC_OR_LOCAL_U32(a_u32Local, a_u32Mask) \
3690 off = iemNativeEmitOrLocal(pReNative, off, a_u32Local, a_u32Mask, sizeof(uint32_t))
3691
3692#define IEM_MC_OR_LOCAL_U64(a_u64Local, a_u64Mask) \
3693 off = iemNativeEmitOrLocal(pReNative, off, a_u64Local, a_u64Mask, sizeof(uint64_t))
3694
3695/** Emits code for OR'ing a local and a constant value. */
3696DECL_INLINE_THROW(uint32_t)
3697iemNativeEmitOrLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint64_t uMask, uint8_t cbMask)
3698{
3699#ifdef VBOX_STRICT
3700 switch (cbMask)
3701 {
3702 case sizeof(uint8_t): Assert((uint8_t)uMask == uMask); break;
3703 case sizeof(uint16_t): Assert((uint16_t)uMask == uMask); break;
3704 case sizeof(uint32_t): Assert((uint32_t)uMask == uMask); break;
3705 case sizeof(uint64_t): break;
3706 default: AssertFailedBreak();
3707 }
3708#endif
3709
3710 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
3711 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbMask);
3712
3713 if (cbMask <= sizeof(uint32_t))
3714 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxVarReg, uMask);
3715 else
3716 off = iemNativeEmitOrGprByImm(pReNative, off, idxVarReg, uMask);
3717
3718 iemNativeVarRegisterRelease(pReNative, idxVar);
3719 return off;
3720}
3721
3722
3723#define IEM_MC_BSWAP_LOCAL_U16(a_u16Local) \
3724 off = iemNativeEmitBswapLocal(pReNative, off, a_u16Local, sizeof(uint16_t))
3725
3726#define IEM_MC_BSWAP_LOCAL_U32(a_u32Local) \
3727 off = iemNativeEmitBswapLocal(pReNative, off, a_u32Local, sizeof(uint32_t))
3728
3729#define IEM_MC_BSWAP_LOCAL_U64(a_u64Local) \
3730 off = iemNativeEmitBswapLocal(pReNative, off, a_u64Local, sizeof(uint64_t))
3731
3732/** Emits code for reversing the byte order in a local value. */
3733DECL_INLINE_THROW(uint32_t)
3734iemNativeEmitBswapLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal)
3735{
3736 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
3737 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
3738
3739 switch (cbLocal)
3740 {
3741 case sizeof(uint16_t): off = iemNativeEmitBswapGpr16(pReNative, off, idxVarReg); break;
3742 case sizeof(uint32_t): off = iemNativeEmitBswapGpr32(pReNative, off, idxVarReg); break;
3743 case sizeof(uint64_t): off = iemNativeEmitBswapGpr(pReNative, off, idxVarReg); break;
3744 default: AssertFailedBreak();
3745 }
3746
3747 iemNativeVarRegisterRelease(pReNative, idxVar);
3748 return off;
3749}
3750
3751
3752#define IEM_MC_SHL_LOCAL_S16(a_i16Local, a_cShift) \
3753 off = iemNativeEmitShlLocal(pReNative, off, a_i16Local, sizeof(int16_t), a_cShift)
3754
3755#define IEM_MC_SHL_LOCAL_S32(a_i32Local, a_cShift) \
3756 off = iemNativeEmitShlLocal(pReNative, off, a_i32Local, sizeof(int32_t), a_cShift)
3757
3758#define IEM_MC_SHL_LOCAL_S64(a_i64Local, a_cShift) \
3759 off = iemNativeEmitShlLocal(pReNative, off, a_i64Local, sizeof(int64_t), a_cShift)
3760
3761/** Emits code for shifting left a local value. */
3762DECL_INLINE_THROW(uint32_t)
3763iemNativeEmitShlLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal, uint8_t cShift)
3764{
3765#ifdef VBOX_STRICT
3766 switch (cbLocal)
3767 {
3768 case sizeof(uint8_t): Assert(cShift < 8); break;
3769 case sizeof(uint16_t): Assert(cShift < 16); break;
3770 case sizeof(uint32_t): Assert(cShift < 32); break;
3771 case sizeof(uint64_t): Assert(cShift < 64); break;
3772 default: AssertFailedBreak();
3773 }
3774#endif
3775
3776 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
3777 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
3778
3779 if (cbLocal <= sizeof(uint32_t))
3780 {
3781 off = iemNativeEmitShiftGpr32Left(pReNative, off, idxVarReg, cShift);
3782 if (cbLocal < sizeof(uint32_t))
3783 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxVarReg,
3784 cbLocal == sizeof(uint16_t)
3785 ? UINT32_C(0xffff)
3786 : UINT32_C(0xff));
3787 }
3788 else
3789 off = iemNativeEmitShiftGprLeft(pReNative, off, idxVarReg, cShift);
3790
3791 iemNativeVarRegisterRelease(pReNative, idxVar);
3792 return off;
3793}
3794
3795
3796#define IEM_MC_SAR_LOCAL_S16(a_i16Local, a_cShift) \
3797 off = iemNativeEmitSarLocal(pReNative, off, a_i16Local, sizeof(int16_t), a_cShift)
3798
3799#define IEM_MC_SAR_LOCAL_S32(a_i32Local, a_cShift) \
3800 off = iemNativeEmitSarLocal(pReNative, off, a_i32Local, sizeof(int32_t), a_cShift)
3801
3802#define IEM_MC_SAR_LOCAL_S64(a_i64Local, a_cShift) \
3803 off = iemNativeEmitSarLocal(pReNative, off, a_i64Local, sizeof(int64_t), a_cShift)
3804
3805/** Emits code for shifting left a local value. */
3806DECL_INLINE_THROW(uint32_t)
3807iemNativeEmitSarLocal(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar, uint8_t cbLocal, uint8_t cShift)
3808{
3809#ifdef VBOX_STRICT
3810 switch (cbLocal)
3811 {
3812 case sizeof(int8_t): Assert(cShift < 8); break;
3813 case sizeof(int16_t): Assert(cShift < 16); break;
3814 case sizeof(int32_t): Assert(cShift < 32); break;
3815 case sizeof(int64_t): Assert(cShift < 64); break;
3816 default: AssertFailedBreak();
3817 }
3818#endif
3819
3820 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
3821 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
3822
3823 /* Need to sign extend the value first to make sure the sign is correct in the following arithmetic shift. */
3824 if (cbLocal == sizeof(uint8_t))
3825 off = iemNativeEmitLoadGpr32SignExtendedFromGpr8(pReNative, off, idxVarReg, idxVarReg);
3826 else if (cbLocal == sizeof(uint16_t))
3827 off = iemNativeEmitLoadGpr32SignExtendedFromGpr16(pReNative, off, idxVarReg, idxVarReg);
3828
3829 if (cbLocal <= sizeof(uint32_t))
3830 off = iemNativeEmitArithShiftGpr32Right(pReNative, off, idxVarReg, cShift);
3831 else
3832 off = iemNativeEmitArithShiftGprRight(pReNative, off, idxVarReg, cShift);
3833
3834 iemNativeVarRegisterRelease(pReNative, idxVar);
3835 return off;
3836}
3837
3838
3839#define IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR(a_EffAddr, a_i16) \
3840 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i16, sizeof(int16_t))
3841
3842#define IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR(a_EffAddr, a_i32) \
3843 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i32, sizeof(int32_t))
3844
3845#define IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR(a_EffAddr, a_i64) \
3846 off = iemNativeEmitAddLocalToEffAddr(pReNative, off, a_EffAddr, a_i64, sizeof(int64_t))
3847
3848/** Emits code for IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR/IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR/IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR. */
3849DECL_INLINE_THROW(uint32_t)
3850iemNativeEmitAddLocalToEffAddr(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEffAddr, uint8_t idxVar, uint8_t cbLocal)
3851{
3852 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEffAddr);
3853 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEffAddr, sizeof(RTGCPTR));
3854 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3855 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVar, cbLocal);
3856
3857 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, &off, true /*fInitialized*/);
3858 uint8_t const idxVarRegEffAddr = iemNativeVarRegisterAcquire(pReNative, idxVarEffAddr, &off, true /*fInitialized*/);
3859
3860 /* Need to sign extend the value. */
3861 if (cbLocal <= sizeof(uint32_t))
3862 {
3863/** @todo ARM64: In case of boredone, the extended add instruction can do the
3864 * conversion directly: ADD idxVarRegEffAddr, idxVarRegEffAddr, [w]idxVarReg, SXTH/SXTW */
3865 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
3866
3867 switch (cbLocal)
3868 {
3869 case sizeof(int16_t): off = iemNativeEmitLoadGprSignExtendedFromGpr16(pReNative, off, idxRegTmp, idxVarReg); break;
3870 case sizeof(int32_t): off = iemNativeEmitLoadGprSignExtendedFromGpr32(pReNative, off, idxRegTmp, idxVarReg); break;
3871 default: AssertFailed();
3872 }
3873
3874 off = iemNativeEmitAddTwoGprs(pReNative, off, idxVarRegEffAddr, idxRegTmp);
3875 iemNativeRegFreeTmp(pReNative, idxRegTmp);
3876 }
3877 else
3878 off = iemNativeEmitAddTwoGprs(pReNative, off, idxVarRegEffAddr, idxVarReg);
3879
3880 iemNativeVarRegisterRelease(pReNative, idxVarEffAddr);
3881 iemNativeVarRegisterRelease(pReNative, idxVar);
3882 return off;
3883}
3884
3885
3886
3887/*********************************************************************************************************************************
3888* EFLAGS *
3889*********************************************************************************************************************************/
3890
3891#if !defined(VBOX_WITH_STATISTICS) || !defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
3892# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) ((void)0)
3893#else
3894# define IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput) \
3895 iemNativeEFlagsOptimizationStats(pReNative, a_fEflInput, a_fEflOutput)
3896
3897DECLINLINE(void) iemNativeEFlagsOptimizationStats(PIEMRECOMPILERSTATE pReNative, uint32_t fEflInput, uint32_t fEflOutput)
3898{
3899 if (fEflOutput)
3900 {
3901 PVMCPUCC const pVCpu = pReNative->pVCpu;
3902# ifndef IEMLIVENESS_EXTENDED_LAYOUT
3903 IEMLIVENESSBIT const LivenessBit0 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit0;
3904 IEMLIVENESSBIT const LivenessBit1 = pReNative->paLivenessEntries[pReNative->idxCurCall].Bit1;
3905 AssertCompile(IEMLIVENESS_STATE_CLOBBERED == 0);
3906# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
3907 if (fEflOutput & (a_fEfl)) \
3908 { \
3909 if (LivenessBit0.a_fLivenessMember | LivenessBit1.a_fLivenessMember) \
3910 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
3911 else \
3912 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
3913 } else do { } while (0)
3914# else
3915 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall];
3916 IEMLIVENESSBIT const LivenessClobbered =
3917 {
3918 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
3919 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
3920 | pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
3921 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
3922 };
3923 IEMLIVENESSBIT const LivenessDelayable =
3924 {
3925 pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
3926 & pLivenessEntry->aBits[IEMLIVENESS_BIT_POT_XCPT_OR_CALL].bm64
3927 & ~( pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
3928 | pLivenessEntry->aBits[IEMLIVENESS_BIT_OTHER].bm64)
3929 };
3930# define CHECK_FLAG_AND_UPDATE_STATS(a_fEfl, a_fLivenessMember, a_CoreStatName) \
3931 if (fEflOutput & (a_fEfl)) \
3932 { \
3933 if (LivenessClobbered.a_fLivenessMember) \
3934 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Skippable); \
3935 else if (LivenessDelayable.a_fLivenessMember) \
3936 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Delayable); \
3937 else \
3938 STAM_COUNTER_INC(&pVCpu->iem.s.a_CoreStatName ## Required); \
3939 } else do { } while (0)
3940# endif
3941 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_CF, fEflCf, StatNativeLivenessEflCf);
3942 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_PF, fEflPf, StatNativeLivenessEflPf);
3943 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_AF, fEflAf, StatNativeLivenessEflAf);
3944 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_ZF, fEflZf, StatNativeLivenessEflZf);
3945 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_SF, fEflSf, StatNativeLivenessEflSf);
3946 CHECK_FLAG_AND_UPDATE_STATS(X86_EFL_OF, fEflOf, StatNativeLivenessEflOf);
3947 //CHECK_FLAG_AND_UPDATE_STATS(~X86_EFL_STATUS_BITS, fEflOther, StatNativeLivenessEflOther);
3948# undef CHECK_FLAG_AND_UPDATE_STATS
3949 }
3950 RT_NOREF(fEflInput);
3951}
3952#endif /* VBOX_WITH_STATISTICS */
3953
3954#undef IEM_MC_FETCH_EFLAGS /* should not be used */
3955#define IEM_MC_FETCH_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
3956 off = iemNativeEmitFetchEFlags(pReNative, off, a_EFlags, a_fEflInput, a_fEflOutput)
3957
3958/** Handles IEM_MC_FETCH_EFLAGS_EX. */
3959DECL_INLINE_THROW(uint32_t)
3960iemNativeEmitFetchEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags,
3961 uint32_t fEflInput, uint32_t fEflOutput)
3962{
3963 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarEFlags);
3964 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
3965 RT_NOREF(fEflInput, fEflOutput);
3966
3967#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3968# ifdef VBOX_STRICT
3969 if ( pReNative->idxCurCall != 0
3970 && (fEflInput != 0 || fEflOutput != 0) /* for NOT these are both zero for now. */)
3971 {
3972 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[pReNative->idxCurCall - 1];
3973 uint32_t const fBoth = fEflInput | fEflOutput;
3974# define ASSERT_ONE_EFL(a_fElfConst, a_idxField) \
3975 AssertMsg( !(fBoth & (a_fElfConst)) \
3976 || (!(fEflInput & (a_fElfConst)) \
3977 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
3978 : !(fEflOutput & (a_fElfConst)) \
3979 ? IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) \
3980 : IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)) ), \
3981 ("%s - %u\n", #a_fElfConst, iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, a_idxField)))
3982 ASSERT_ONE_EFL(~(uint32_t)X86_EFL_STATUS_BITS, IEMLIVENESSBIT_IDX_EFL_OTHER);
3983 ASSERT_ONE_EFL(X86_EFL_CF, IEMLIVENESSBIT_IDX_EFL_CF);
3984 ASSERT_ONE_EFL(X86_EFL_PF, IEMLIVENESSBIT_IDX_EFL_PF);
3985 ASSERT_ONE_EFL(X86_EFL_AF, IEMLIVENESSBIT_IDX_EFL_AF);
3986 ASSERT_ONE_EFL(X86_EFL_ZF, IEMLIVENESSBIT_IDX_EFL_ZF);
3987 ASSERT_ONE_EFL(X86_EFL_SF, IEMLIVENESSBIT_IDX_EFL_SF);
3988 ASSERT_ONE_EFL(X86_EFL_OF, IEMLIVENESSBIT_IDX_EFL_OF);
3989# undef ASSERT_ONE_EFL
3990 }
3991# endif
3992#endif
3993
3994 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fEflInput);
3995
3996 /** @todo this is suboptimial. EFLAGS is probably shadowed and we should use
3997 * the existing shadow copy. */
3998 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, false /*fInitialized*/);
3999 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
4000 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
4001 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
4002 return off;
4003}
4004
4005
4006
4007/** @todo emit strict build assertions for IEM_MC_COMMIT_EFLAGS_EX when we
4008 * start using it with custom native code emission (inlining assembly
4009 * instruction helpers). */
4010#undef IEM_MC_COMMIT_EFLAGS /* should not be used */
4011#define IEM_MC_COMMIT_EFLAGS_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
4012 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
4013 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags, a_fEflOutput, true /*fUpdateSkipping*/)
4014
4015#undef IEM_MC_COMMIT_EFLAGS_OPT /* should not be used */
4016#define IEM_MC_COMMIT_EFLAGS_OPT_EX(a_EFlags, a_fEflInput, a_fEflOutput) \
4017 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
4018 off = iemNativeEmitCommitEFlags(pReNative, off, a_EFlags, a_fEflOutput, false /*fUpdateSkipping*/)
4019
4020/** Handles IEM_MC_COMMIT_EFLAGS_EX. */
4021DECL_INLINE_THROW(uint32_t)
4022iemNativeEmitCommitEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarEFlags, uint32_t fEflOutput,
4023 bool fUpdateSkipping)
4024{
4025 RT_NOREF(fEflOutput);
4026 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxVarEFlags, &off, true /*fInitialized*/);
4027 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarEFlags, sizeof(uint32_t));
4028
4029#ifdef VBOX_STRICT
4030 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RA1_MASK);
4031 uint32_t offFixup = off;
4032 off = iemNativeEmitJnzToFixed(pReNative, off, off);
4033 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2001));
4034 iemNativeFixupFixedJump(pReNative, offFixup, off);
4035
4036 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxReg, X86_EFL_RAZ_MASK & CPUMX86EFLAGS_HW_MASK_32);
4037 offFixup = off;
4038 off = iemNativeEmitJzToFixed(pReNative, off, off);
4039 off = iemNativeEmitBrk(pReNative, off, UINT32_C(0x2002));
4040 iemNativeFixupFixedJump(pReNative, offFixup, off);
4041
4042 /** @todo validate that only bits in the fElfOutput mask changed. */
4043#endif
4044
4045#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
4046 if (fUpdateSkipping)
4047 {
4048 if ((fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
4049 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
4050 else
4051 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~(fEflOutput & X86_EFL_STATUS_BITS),
4052 RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
4053 }
4054#else
4055 RT_NOREF_PV(fUpdateSkipping);
4056#endif
4057
4058 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxReg, kIemNativeGstReg_EFlags, off);
4059 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxReg, RT_UOFFSETOF_DYN(VMCPUCC, cpum.GstCtx.eflags));
4060 iemNativeVarRegisterRelease(pReNative, idxVarEFlags);
4061 return off;
4062}
4063
4064
4065typedef enum IEMNATIVEMITEFLOP
4066{
4067 kIemNativeEmitEflOp_Invalid = 0,
4068 kIemNativeEmitEflOp_Set,
4069 kIemNativeEmitEflOp_Clear,
4070 kIemNativeEmitEflOp_Flip
4071} IEMNATIVEMITEFLOP;
4072
4073#define IEM_MC_SET_EFL_BIT(a_fBit) \
4074 off = iemNativeEmitModifyEFlagsBit(pReNative, off, a_fBit, kIemNativeEmitEflOp_Set);
4075
4076#define IEM_MC_CLEAR_EFL_BIT(a_fBit) \
4077 off = iemNativeEmitModifyEFlagsBit(pReNative, off, a_fBit, kIemNativeEmitEflOp_Clear);
4078
4079#define IEM_MC_FLIP_EFL_BIT(a_fBit) \
4080 off = iemNativeEmitModifyEFlagsBit(pReNative, off, a_fBit, kIemNativeEmitEflOp_Flip);
4081
4082/** Handles IEM_MC_SET_EFL_BIT/IEM_MC_CLEAR_EFL_BIT/IEM_MC_FLIP_EFL_BIT. */
4083DECL_INLINE_THROW(uint32_t) iemNativeEmitModifyEFlagsBit(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fEflBit, IEMNATIVEMITEFLOP enmOp)
4084{
4085 uint8_t const idxEflReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
4086 kIemNativeGstRegUse_ForUpdate, false /*fNoVolatileRegs*/);
4087
4088 switch (enmOp)
4089 {
4090 case kIemNativeEmitEflOp_Set:
4091 off = iemNativeEmitOrGpr32ByImm(pReNative, off, idxEflReg, fEflBit);
4092 break;
4093 case kIemNativeEmitEflOp_Clear:
4094 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxEflReg, ~fEflBit);
4095 break;
4096 case kIemNativeEmitEflOp_Flip:
4097 off = iemNativeEmitXorGpr32ByImm(pReNative, off, idxEflReg, fEflBit);
4098 break;
4099 default:
4100 AssertFailed();
4101 break;
4102 }
4103
4104 /** @todo No delayed writeback for EFLAGS right now. */
4105 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxEflReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.eflags));
4106
4107 /* Free but don't flush the EFLAGS register. */
4108 iemNativeRegFreeTmp(pReNative, idxEflReg);
4109
4110 return off;
4111}
4112
4113
4114/*********************************************************************************************************************************
4115* Emitters for segment register fetches (IEM_MC_FETCH_SREG_XXX).
4116*********************************************************************************************************************************/
4117
4118#define IEM_MC_FETCH_SREG_U16(a_u16Dst, a_iSReg) \
4119 off = iemNativeEmitFetchSReg(pReNative, off, a_u16Dst, a_iSReg, sizeof(uint16_t))
4120
4121#define IEM_MC_FETCH_SREG_ZX_U32(a_u32Dst, a_iSReg) \
4122 off = iemNativeEmitFetchSReg(pReNative, off, a_u32Dst, a_iSReg, sizeof(uint32_t))
4123
4124#define IEM_MC_FETCH_SREG_ZX_U64(a_u64Dst, a_iSReg) \
4125 off = iemNativeEmitFetchSReg(pReNative, off, a_u64Dst, a_iSReg, sizeof(uint64_t))
4126
4127
4128/** Emits code for IEM_MC_FETCH_SREG_U16, IEM_MC_FETCH_SREG_ZX_U32 and
4129 * IEM_MC_FETCH_SREG_ZX_U64. */
4130DECL_INLINE_THROW(uint32_t)
4131iemNativeEmitFetchSReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iSReg, int8_t cbVar)
4132{
4133 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
4134 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbVar); RT_NOREF(cbVar);
4135 Assert(iSReg < X86_SREG_COUNT);
4136
4137 /*
4138 * For now, we will not create a shadow copy of a selector. The rational
4139 * is that since we do not recompile the popping and loading of segment
4140 * registers and that the the IEM_MC_FETCH_SREG_U* MCs are only used for
4141 * pushing and moving to registers, there is only a small chance that the
4142 * shadow copy will be accessed again before the register is reloaded. One
4143 * scenario would be nested called in 16-bit code, but I doubt it's worth
4144 * the extra register pressure atm.
4145 *
4146 * What we really need first, though, is to combine iemNativeRegAllocTmpForGuestReg
4147 * and iemNativeVarRegisterAcquire for a load scenario. We only got the
4148 * store scencario covered at present (r160730).
4149 */
4150 iemNativeVarSetKindToStack(pReNative, idxDstVar);
4151 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
4152 off = iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxVarReg, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aSRegs[iSReg].Sel));
4153 iemNativeVarRegisterRelease(pReNative, idxDstVar);
4154 return off;
4155}
4156
4157
4158
4159/*********************************************************************************************************************************
4160* Register references. *
4161*********************************************************************************************************************************/
4162
4163#define IEM_MC_REF_GREG_U8_THREADED(a_pu8Dst, a_iGRegEx) \
4164 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, false /*fConst*/)
4165
4166#define IEM_MC_REF_GREG_U8_CONST_THREADED(a_pu8Dst, a_iGRegEx) \
4167 off = iemNativeEmitRefGregU8(pReNative, off, a_pu8Dst, a_iGRegEx, true /*fConst*/)
4168
4169/** Handles IEM_MC_REF_GREG_U8[_CONST]. */
4170DECL_INLINE_THROW(uint32_t)
4171iemNativeEmitRefGregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGRegEx, bool fConst)
4172{
4173 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRef);
4174 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
4175 Assert(iGRegEx < 20);
4176
4177 if (iGRegEx < 16)
4178 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
4179 else
4180 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_GprHighByte, iGRegEx & 15);
4181
4182 /* If we've delayed writing back the register value, flush it now. */
4183 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGRegEx & 15);
4184
4185 /* If it's not a const reference we need to flush the shadow copy of the register now. */
4186 if (!fConst)
4187 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGRegEx & 15)));
4188
4189 return off;
4190}
4191
4192#define IEM_MC_REF_GREG_U16(a_pu16Dst, a_iGReg) \
4193 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, false /*fConst*/)
4194
4195#define IEM_MC_REF_GREG_U16_CONST(a_pu16Dst, a_iGReg) \
4196 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu16Dst, a_iGReg, true /*fConst*/)
4197
4198#define IEM_MC_REF_GREG_U32(a_pu32Dst, a_iGReg) \
4199 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, false /*fConst*/)
4200
4201#define IEM_MC_REF_GREG_U32_CONST(a_pu32Dst, a_iGReg) \
4202 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu32Dst, a_iGReg, true /*fConst*/)
4203
4204#define IEM_MC_REF_GREG_I32(a_pi32Dst, a_iGReg) \
4205 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, false /*fConst*/)
4206
4207#define IEM_MC_REF_GREG_I32_CONST(a_pi32Dst, a_iGReg) \
4208 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi32Dst, a_iGReg, true /*fConst*/)
4209
4210#define IEM_MC_REF_GREG_U64(a_pu64Dst, a_iGReg) \
4211 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, false /*fConst*/)
4212
4213#define IEM_MC_REF_GREG_U64_CONST(a_pu64Dst, a_iGReg) \
4214 off = iemNativeEmitRefGregUxx(pReNative, off, a_pu64Dst, a_iGReg, true /*fConst*/)
4215
4216#define IEM_MC_REF_GREG_I64(a_pi64Dst, a_iGReg) \
4217 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, false /*fConst*/)
4218
4219#define IEM_MC_REF_GREG_I64_CONST(a_pi64Dst, a_iGReg) \
4220 off = iemNativeEmitRefGregUxx(pReNative, off, a_pi64Dst, a_iGReg, true /*fConst*/)
4221
4222/** Handles IEM_MC_REF_GREG_Uxx[_CONST] and IEM_MC_REF_GREG_Ixx[_CONST]. */
4223DECL_INLINE_THROW(uint32_t)
4224iemNativeEmitRefGregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iGReg, bool fConst)
4225{
4226 Assert(iGReg < 16);
4227 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_Gpr, iGReg);
4228 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
4229
4230 /* If we've delayed writing back the register value, flush it now. */
4231 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_Gpr, iGReg);
4232
4233 /* If it's not a const reference we need to flush the shadow copy of the register now. */
4234 if (!fConst)
4235 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTREG_GPR(iGReg)));
4236
4237 return off;
4238}
4239
4240
4241#undef IEM_MC_REF_EFLAGS /* should not be used. */
4242#define IEM_MC_REF_EFLAGS_EX(a_pEFlags, a_fEflInput, a_fEflOutput) \
4243 IEMNATIVE_EFLAGS_OPTIMIZATION_STATS(a_fEflInput, a_fEflOutput); \
4244 off = iemNativeEmitRefEFlags(pReNative, off, a_pEFlags, a_fEflInput, a_fEflOutput)
4245
4246/** Handles IEM_MC_REF_EFLAGS. */
4247DECL_INLINE_THROW(uint32_t)
4248iemNativeEmitRefEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint32_t fEflInput, uint32_t fEflOutput)
4249{
4250 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_EFlags, 0);
4251 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
4252
4253#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
4254 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, fEflInput);
4255
4256 /* Updating the skipping according to the outputs is a little early, but
4257 we don't have any other hooks for references atm. */
4258 if ((fEflOutput & X86_EFL_STATUS_BITS) == X86_EFL_STATUS_BITS)
4259 off = iemNativeEmitStoreImmToVCpuU32(pReNative, off, 0, RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
4260 else if (fEflOutput & X86_EFL_STATUS_BITS)
4261 off = iemNativeEmitAndImmIntoVCpuU32(pReNative, off, ~(fEflOutput & X86_EFL_STATUS_BITS),
4262 RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags));
4263#else
4264 RT_NOREF(fEflInput, fEflOutput);
4265#endif
4266
4267 /* If we've delayed writing back the register value, flush it now. */
4268 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_EFlags, 0);
4269
4270 /* If there is a shadow copy of guest EFLAGS, flush it now. */
4271 iemNativeRegFlushGuestShadows(pReNative, RT_BIT_64(kIemNativeGstReg_EFlags));
4272
4273 return off;
4274}
4275
4276
4277/** @todo Emit code for IEM_MC_ASSERT_EFLAGS in strict builds? Once we emit
4278 * different code from threaded recompiler, maybe it would be helpful. For now
4279 * we assume the threaded recompiler catches any incorrect EFLAGS delcarations. */
4280#define IEM_MC_ASSERT_EFLAGS(a_fEflInput, a_fEflOutput) ((void)0)
4281
4282
4283#define IEM_MC_REF_XREG_U128(a_pu128Dst, a_iXReg) \
4284 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, false /*fConst*/)
4285
4286#define IEM_MC_REF_XREG_U128_CONST(a_pu128Dst, a_iXReg) \
4287 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu128Dst, a_iXReg, true /*fConst*/)
4288
4289#define IEM_MC_REF_XREG_XMM_CONST(a_pXmmDst, a_iXReg) \
4290 off = iemNativeEmitRefXregXxx(pReNative, off, a_pXmmDst, a_iXReg, true /*fConst*/)
4291
4292#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4293/* Just being paranoid here. */
4294# ifndef _MSC_VER /* MSC can't compile this, doesn't like [0]. Added reduced version afterwards. */
4295AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].au64[0]);
4296AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].au32[0]);
4297AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].ar64[0]);
4298AssertCompile2MemberOffsets(CPUMCTX, XState.x87.aXMM[0], XState.x87.aXMM[0].ar32[0]);
4299# endif
4300AssertCompileMemberOffset(X86XMMREG, au64, 0);
4301AssertCompileMemberOffset(X86XMMREG, au32, 0);
4302AssertCompileMemberOffset(X86XMMREG, ar64, 0);
4303AssertCompileMemberOffset(X86XMMREG, ar32, 0);
4304
4305# define IEM_MC_REF_XREG_U32_CONST(a_pu32Dst, a_iXReg) \
4306 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu32Dst, a_iXReg, true /*fConst*/)
4307# define IEM_MC_REF_XREG_U64_CONST(a_pu64Dst, a_iXReg) \
4308 off = iemNativeEmitRefXregXxx(pReNative, off, a_pu64Dst, a_iXReg, true /*fConst*/)
4309# define IEM_MC_REF_XREG_R32_CONST(a_pr32Dst, a_iXReg) \
4310 off = iemNativeEmitRefXregXxx(pReNative, off, a_pr32Dst, a_iXReg, true /*fConst*/)
4311# define IEM_MC_REF_XREG_R64_CONST(a_pr64Dst, a_iXReg) \
4312 off = iemNativeEmitRefXregXxx(pReNative, off, a_pr64Dst, a_iXReg, true /*fConst*/)
4313#endif
4314
4315/** Handles IEM_MC_REF_XREG_xxx[_CONST]. */
4316DECL_INLINE_THROW(uint32_t)
4317iemNativeEmitRefXregXxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarRef, uint8_t iXReg, bool fConst)
4318{
4319 Assert(iXReg < 16);
4320 iemNativeVarSetKindToGstRegRef(pReNative, idxVarRef, kIemNativeGstRegRef_XReg, iXReg);
4321 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxVarRef, sizeof(void *));
4322
4323 /* If we've delayed writing back the register value, flush it now. */
4324 off = iemNativeRegFlushPendingSpecificWrite(pReNative, off, kIemNativeGstRegRef_XReg, iXReg);
4325
4326#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4327 /* If it's not a const reference we need to flush the shadow copy of the register now. */
4328 if (!fConst)
4329 iemNativeSimdRegFlushGuestShadows(pReNative, RT_BIT_64(IEMNATIVEGSTSIMDREG_SIMD(iXReg)));
4330#else
4331 RT_NOREF(fConst);
4332#endif
4333
4334 return off;
4335}
4336
4337
4338
4339/*********************************************************************************************************************************
4340* Effective Address Calculation *
4341*********************************************************************************************************************************/
4342#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_16(a_GCPtrEff, a_bRm, a_u16Disp) \
4343 off = iemNativeEmitCalcRmEffAddrThreadedAddr16(pReNative, off, a_bRm, a_u16Disp, a_GCPtrEff)
4344
4345/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_16.
4346 * @sa iemOpHlpCalcRmEffAddrThreadedAddr16 */
4347DECL_INLINE_THROW(uint32_t)
4348iemNativeEmitCalcRmEffAddrThreadedAddr16(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4349 uint8_t bRm, uint16_t u16Disp, uint8_t idxVarRet)
4350{
4351 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
4352
4353 /*
4354 * Handle the disp16 form with no registers first.
4355 *
4356 * Convert to an immediate value, as that'll delay the register allocation
4357 * and assignment till the memory access / call / whatever and we can use
4358 * a more appropriate register (or none at all).
4359 */
4360 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
4361 {
4362 iemNativeVarSetKindToConst(pReNative, idxVarRet, u16Disp);
4363 return off;
4364 }
4365
4366 /* Determin the displacment. */
4367 uint16_t u16EffAddr;
4368 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
4369 {
4370 case 0: u16EffAddr = 0; break;
4371 case 1: u16EffAddr = (int16_t)(int8_t)u16Disp; break;
4372 case 2: u16EffAddr = u16Disp; break;
4373 default: AssertFailedStmt(u16EffAddr = 0);
4374 }
4375
4376 /* Determine the registers involved. */
4377 uint8_t idxGstRegBase;
4378 uint8_t idxGstRegIndex;
4379 switch (bRm & X86_MODRM_RM_MASK)
4380 {
4381 case 0:
4382 idxGstRegBase = X86_GREG_xBX;
4383 idxGstRegIndex = X86_GREG_xSI;
4384 break;
4385 case 1:
4386 idxGstRegBase = X86_GREG_xBX;
4387 idxGstRegIndex = X86_GREG_xDI;
4388 break;
4389 case 2:
4390 idxGstRegBase = X86_GREG_xBP;
4391 idxGstRegIndex = X86_GREG_xSI;
4392 break;
4393 case 3:
4394 idxGstRegBase = X86_GREG_xBP;
4395 idxGstRegIndex = X86_GREG_xDI;
4396 break;
4397 case 4:
4398 idxGstRegBase = X86_GREG_xSI;
4399 idxGstRegIndex = UINT8_MAX;
4400 break;
4401 case 5:
4402 idxGstRegBase = X86_GREG_xDI;
4403 idxGstRegIndex = UINT8_MAX;
4404 break;
4405 case 6:
4406 idxGstRegBase = X86_GREG_xBP;
4407 idxGstRegIndex = UINT8_MAX;
4408 break;
4409#ifdef _MSC_VER /* lazy compiler, thinks idxGstRegBase and idxGstRegIndex may otherwise be used uninitialized. */
4410 default:
4411#endif
4412 case 7:
4413 idxGstRegBase = X86_GREG_xBX;
4414 idxGstRegIndex = UINT8_MAX;
4415 break;
4416 }
4417
4418 /*
4419 * Now emit code that calculates: idxRegRet = (uint16_t)(u16EffAddr + idxGstRegBase [+ idxGstRegIndex])
4420 */
4421 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
4422 uint8_t const idxRegBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
4423 kIemNativeGstRegUse_ReadOnly);
4424 uint8_t const idxRegIndex = idxGstRegIndex != UINT8_MAX
4425 ? iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
4426 kIemNativeGstRegUse_ReadOnly)
4427 : UINT8_MAX;
4428#ifdef RT_ARCH_AMD64
4429 if (idxRegIndex == UINT8_MAX)
4430 {
4431 if (u16EffAddr == 0)
4432 {
4433 /* movxz ret, base */
4434 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxRegRet, idxRegBase);
4435 }
4436 else
4437 {
4438 /* lea ret32, [base64 + disp32] */
4439 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
4440 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
4441 if (idxRegRet >= 8 || idxRegBase >= 8)
4442 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
4443 pbCodeBuf[off++] = 0x8d;
4444 if (idxRegBase != X86_GREG_x12 /*SIB*/)
4445 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, idxRegBase & 7);
4446 else
4447 {
4448 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM4, idxRegRet & 7, 4 /*SIB*/);
4449 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
4450 }
4451 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
4452 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
4453 pbCodeBuf[off++] = 0;
4454 pbCodeBuf[off++] = 0;
4455 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4456
4457 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
4458 }
4459 }
4460 else
4461 {
4462 /* lea ret32, [index64 + base64 (+ disp32)] */
4463 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
4464 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
4465 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
4466 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
4467 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
4468 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
4469 pbCodeBuf[off++] = 0x8d;
4470 uint8_t const bMod = u16EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0 : X86_MOD_MEM4;
4471 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
4472 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, 0);
4473 if (bMod == X86_MOD_MEM4)
4474 {
4475 pbCodeBuf[off++] = RT_BYTE1(u16EffAddr);
4476 pbCodeBuf[off++] = RT_BYTE2(u16EffAddr);
4477 pbCodeBuf[off++] = 0;
4478 pbCodeBuf[off++] = 0;
4479 }
4480 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4481 off = iemNativeEmitClear16UpGpr(pReNative, off, idxRegRet);
4482 }
4483
4484#elif defined(RT_ARCH_ARM64)
4485 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
4486 if (u16EffAddr == 0)
4487 {
4488 if (idxRegIndex == UINT8_MAX)
4489 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegBase);
4490 else
4491 {
4492 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex, false /*f64Bit*/);
4493 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
4494 }
4495 }
4496 else
4497 {
4498 if ((int16_t)u16EffAddr < 4096 && (int16_t)u16EffAddr >= 0)
4499 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u16EffAddr, false /*f64Bit*/);
4500 else if ((int16_t)u16EffAddr > -4096 && (int16_t)u16EffAddr < 0)
4501 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
4502 (uint16_t)-(int16_t)u16EffAddr, false /*f64Bit*/);
4503 else
4504 {
4505 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegRet, u16EffAddr);
4506 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
4507 }
4508 if (idxRegIndex != UINT8_MAX)
4509 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex, false /*f64Bit*/);
4510 pu32CodeBuf[off++] = Armv8A64MkInstrUxth(idxRegRet, idxRegRet);
4511 }
4512
4513#else
4514# error "port me"
4515#endif
4516
4517 if (idxRegIndex != UINT8_MAX)
4518 iemNativeRegFreeTmp(pReNative, idxRegIndex);
4519 iemNativeRegFreeTmp(pReNative, idxRegBase);
4520 iemNativeVarRegisterRelease(pReNative, idxVarRet);
4521 return off;
4522}
4523
4524
4525#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_32(a_GCPtrEff, a_bRm, a_uSibAndRspOffset, a_u32Disp) \
4526 off = iemNativeEmitCalcRmEffAddrThreadedAddr32(pReNative, off, a_bRm, a_uSibAndRspOffset, a_u32Disp, a_GCPtrEff)
4527
4528/** Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_32.
4529 * @see iemOpHlpCalcRmEffAddrThreadedAddr32 */
4530DECL_INLINE_THROW(uint32_t)
4531iemNativeEmitCalcRmEffAddrThreadedAddr32(PIEMRECOMPILERSTATE pReNative, uint32_t off,
4532 uint8_t bRm, uint32_t uSibAndRspOffset, uint32_t u32Disp, uint8_t idxVarRet)
4533{
4534 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
4535
4536 /*
4537 * Handle the disp32 form with no registers first.
4538 *
4539 * Convert to an immediate value, as that'll delay the register allocation
4540 * and assignment till the memory access / call / whatever and we can use
4541 * a more appropriate register (or none at all).
4542 */
4543 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
4544 {
4545 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32Disp);
4546 return off;
4547 }
4548
4549 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
4550 uint32_t u32EffAddr = 0;
4551 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
4552 {
4553 case 0: break;
4554 case 1: u32EffAddr = (int8_t)u32Disp; break;
4555 case 2: u32EffAddr = u32Disp; break;
4556 default: AssertFailed();
4557 }
4558
4559 /* Get the register (or SIB) value. */
4560 uint8_t idxGstRegBase = UINT8_MAX;
4561 uint8_t idxGstRegIndex = UINT8_MAX;
4562 uint8_t cShiftIndex = 0;
4563 switch (bRm & X86_MODRM_RM_MASK)
4564 {
4565 case 0: idxGstRegBase = X86_GREG_xAX; break;
4566 case 1: idxGstRegBase = X86_GREG_xCX; break;
4567 case 2: idxGstRegBase = X86_GREG_xDX; break;
4568 case 3: idxGstRegBase = X86_GREG_xBX; break;
4569 case 4: /* SIB */
4570 {
4571 /* index /w scaling . */
4572 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
4573 switch ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
4574 {
4575 case 0: idxGstRegIndex = X86_GREG_xAX; break;
4576 case 1: idxGstRegIndex = X86_GREG_xCX; break;
4577 case 2: idxGstRegIndex = X86_GREG_xDX; break;
4578 case 3: idxGstRegIndex = X86_GREG_xBX; break;
4579 case 4: cShiftIndex = 0; /*no index*/ break;
4580 case 5: idxGstRegIndex = X86_GREG_xBP; break;
4581 case 6: idxGstRegIndex = X86_GREG_xSI; break;
4582 case 7: idxGstRegIndex = X86_GREG_xDI; break;
4583 }
4584
4585 /* base */
4586 switch (uSibAndRspOffset & X86_SIB_BASE_MASK)
4587 {
4588 case 0: idxGstRegBase = X86_GREG_xAX; break;
4589 case 1: idxGstRegBase = X86_GREG_xCX; break;
4590 case 2: idxGstRegBase = X86_GREG_xDX; break;
4591 case 3: idxGstRegBase = X86_GREG_xBX; break;
4592 case 4:
4593 idxGstRegBase = X86_GREG_xSP;
4594 u32EffAddr += uSibAndRspOffset >> 8;
4595 break;
4596 case 5:
4597 if ((bRm & X86_MODRM_MOD_MASK) != 0)
4598 idxGstRegBase = X86_GREG_xBP;
4599 else
4600 {
4601 Assert(u32EffAddr == 0);
4602 u32EffAddr = u32Disp;
4603 }
4604 break;
4605 case 6: idxGstRegBase = X86_GREG_xSI; break;
4606 case 7: idxGstRegBase = X86_GREG_xDI; break;
4607 }
4608 break;
4609 }
4610 case 5: idxGstRegBase = X86_GREG_xBP; break;
4611 case 6: idxGstRegBase = X86_GREG_xSI; break;
4612 case 7: idxGstRegBase = X86_GREG_xDI; break;
4613 }
4614
4615 /*
4616 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
4617 * the start of the function.
4618 */
4619 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
4620 {
4621 iemNativeVarSetKindToConst(pReNative, idxVarRet, u32EffAddr);
4622 return off;
4623 }
4624
4625 /*
4626 * Now emit code that calculates: idxRegRet = (uint32_t)(u32EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
4627 */
4628 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
4629 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
4630 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
4631 kIemNativeGstRegUse_ReadOnly);
4632 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
4633 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
4634 kIemNativeGstRegUse_ReadOnly);
4635
4636 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
4637 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
4638 {
4639 idxRegBase = idxRegIndex;
4640 idxRegIndex = UINT8_MAX;
4641 }
4642
4643#ifdef RT_ARCH_AMD64
4644 if (idxRegIndex == UINT8_MAX)
4645 {
4646 if (u32EffAddr == 0)
4647 {
4648 /* mov ret, base */
4649 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
4650 }
4651 else
4652 {
4653 /* lea ret32, [base64 + disp32] */
4654 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
4655 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
4656 if (idxRegRet >= 8 || idxRegBase >= 8)
4657 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0) | (idxRegBase >= 8 ? X86_OP_REX_B : 0);
4658 pbCodeBuf[off++] = 0x8d;
4659 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
4660 if (idxRegBase != X86_GREG_x12 /*SIB*/)
4661 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
4662 else
4663 {
4664 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
4665 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
4666 }
4667 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
4668 if (bMod == X86_MOD_MEM4)
4669 {
4670 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
4671 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
4672 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
4673 }
4674 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4675 }
4676 }
4677 else
4678 {
4679 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
4680 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
4681 if (idxRegBase == UINT8_MAX)
4682 {
4683 /* lea ret32, [(index64 << cShiftIndex) + disp32] */
4684 if (idxRegRet >= 8 || idxRegIndex >= 8)
4685 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
4686 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
4687 pbCodeBuf[off++] = 0x8d;
4688 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
4689 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
4690 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
4691 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
4692 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
4693 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
4694 }
4695 else
4696 {
4697 /* lea ret32, [(index64 << cShiftIndex) + base64 (+ disp32)] */
4698 if (idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
4699 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
4700 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
4701 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0);
4702 pbCodeBuf[off++] = 0x8d;
4703 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
4704 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
4705 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
4706 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
4707 if (bMod != X86_MOD_MEM0)
4708 {
4709 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
4710 if (bMod == X86_MOD_MEM4)
4711 {
4712 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
4713 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
4714 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
4715 }
4716 }
4717 }
4718 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
4719 }
4720
4721#elif defined(RT_ARCH_ARM64)
4722 if (u32EffAddr == 0)
4723 {
4724 if (idxRegIndex == UINT8_MAX)
4725 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
4726 else if (idxRegBase == UINT8_MAX)
4727 {
4728 if (cShiftIndex == 0)
4729 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegIndex);
4730 else
4731 {
4732 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4733 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, false /*f64Bit*/);
4734 }
4735 }
4736 else
4737 {
4738 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4739 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
4740 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
4741 }
4742 }
4743 else
4744 {
4745 if ((int32_t)u32EffAddr < 4096 && (int32_t)u32EffAddr >= 0 && idxRegBase != UINT8_MAX)
4746 {
4747 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4748 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, u32EffAddr, false /*f64Bit*/);
4749 }
4750 else if ((int32_t)u32EffAddr > -4096 && (int32_t)u32EffAddr < 0 && idxRegBase != UINT8_MAX)
4751 {
4752 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4753 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase,
4754 (uint32_t)-(int32_t)u32EffAddr, false /*f64Bit*/);
4755 }
4756 else
4757 {
4758 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, u32EffAddr);
4759 if (idxRegBase != UINT8_MAX)
4760 {
4761 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4762 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, false /*f64Bit*/);
4763 }
4764 }
4765 if (idxRegIndex != UINT8_MAX)
4766 {
4767 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
4768 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
4769 false /*f64Bit*/, false /*fSetFlags*/, cShiftIndex);
4770 }
4771 }
4772
4773#else
4774# error "port me"
4775#endif
4776
4777 if (idxRegIndex != UINT8_MAX)
4778 iemNativeRegFreeTmp(pReNative, idxRegIndex);
4779 if (idxRegBase != UINT8_MAX)
4780 iemNativeRegFreeTmp(pReNative, idxRegBase);
4781 iemNativeVarRegisterRelease(pReNative, idxVarRet);
4782 return off;
4783}
4784
4785
4786#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
4787 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
4788 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
4789
4790#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_FSGS(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
4791 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
4792 a_u32Disp, a_cbImm, a_GCPtrEff, true /*f64Bit*/)
4793
4794#define IEM_MC_CALC_RM_EFF_ADDR_THREADED_64_ADDR32(a_GCPtrEff, a_bRmEx, a_uSibAndRspOffset, a_u32Disp, a_cbImm) \
4795 off = iemNativeEmitCalcRmEffAddrThreadedAddr64(pReNative, off, a_bRmEx, a_uSibAndRspOffset, \
4796 a_u32Disp, a_cbImm, a_GCPtrEff, false /*f64Bit*/)
4797
4798/**
4799 * Emit code for IEM_MC_CALC_RM_EFF_ADDR_THREADED_64*.
4800 *
4801 * @returns New off.
4802 * @param pReNative .
4803 * @param off .
4804 * @param bRmEx The ModRM byte but with bit 3 set to REX.B and
4805 * bit 4 to REX.X. The two bits are part of the
4806 * REG sub-field, which isn't needed in this
4807 * function.
4808 * @param uSibAndRspOffset Two parts:
4809 * - The first 8 bits make up the SIB byte.
4810 * - The next 8 bits are the fixed RSP/ESP offset
4811 * in case of a pop [xSP].
4812 * @param u32Disp The displacement byte/word/dword, if any.
4813 * @param cbInstr The size of the fully decoded instruction. Used
4814 * for RIP relative addressing.
4815 * @param idxVarRet The result variable number.
4816 * @param f64Bit Whether to use a 64-bit or 32-bit address size
4817 * when calculating the address.
4818 *
4819 * @see iemOpHlpCalcRmEffAddrThreadedAddr64
4820 */
4821DECL_INLINE_THROW(uint32_t)
4822iemNativeEmitCalcRmEffAddrThreadedAddr64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t bRmEx, uint32_t uSibAndRspOffset,
4823 uint32_t u32Disp, uint8_t cbInstr, uint8_t idxVarRet, bool f64Bit)
4824{
4825 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarRet);
4826
4827 /*
4828 * Special case the rip + disp32 form first.
4829 */
4830 if ((bRmEx & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
4831 {
4832#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
4833 /* Need to take the current PC offset into account for the displacement, no need to flush here
4834 * as the PC is only accessed readonly and there is no branching or calling helpers involved. */
4835 u32Disp += pReNative->Core.offPc;
4836#endif
4837
4838 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
4839 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
4840 kIemNativeGstRegUse_ReadOnly);
4841#ifdef RT_ARCH_AMD64
4842 if (f64Bit)
4843 {
4844 int64_t const offFinalDisp = (int64_t)(int32_t)u32Disp + cbInstr;
4845 if ((int32_t)offFinalDisp == offFinalDisp)
4846 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)offFinalDisp);
4847 else
4848 {
4849 off = iemNativeEmitLoadGprFromGprWithAddend(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp);
4850 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, cbInstr);
4851 }
4852 }
4853 else
4854 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc, (int32_t)u32Disp + cbInstr);
4855
4856#elif defined(RT_ARCH_ARM64)
4857 if (f64Bit)
4858 off = iemNativeEmitLoadGprFromGprWithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
4859 (int64_t)(int32_t)u32Disp + cbInstr);
4860 else
4861 off = iemNativeEmitLoadGprFromGpr32WithAddendMaybeZero(pReNative, off, idxRegRet, idxRegPc,
4862 (int32_t)u32Disp + cbInstr);
4863
4864#else
4865# error "Port me!"
4866#endif
4867 iemNativeRegFreeTmp(pReNative, idxRegPc);
4868 iemNativeVarRegisterRelease(pReNative, idxVarRet);
4869 return off;
4870 }
4871
4872 /* Calculate the fixed displacement (more down in SIB.B=4 and SIB.B=5 on this). */
4873 int64_t i64EffAddr = 0;
4874 switch ((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
4875 {
4876 case 0: break;
4877 case 1: i64EffAddr = (int8_t)u32Disp; break;
4878 case 2: i64EffAddr = (int32_t)u32Disp; break;
4879 default: AssertFailed();
4880 }
4881
4882 /* Get the register (or SIB) value. */
4883 uint8_t idxGstRegBase = UINT8_MAX;
4884 uint8_t idxGstRegIndex = UINT8_MAX;
4885 uint8_t cShiftIndex = 0;
4886 if ((bRmEx & X86_MODRM_RM_MASK) != 4)
4887 idxGstRegBase = bRmEx & (X86_MODRM_RM_MASK | 0x8); /* bRmEx[bit 3] = REX.B */
4888 else /* SIB: */
4889 {
4890 /* index /w scaling . */
4891 cShiftIndex = (uSibAndRspOffset >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
4892 idxGstRegIndex = ((uSibAndRspOffset >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
4893 | ((bRmEx & 0x10) >> 1); /* bRmEx[bit 4] = REX.X */
4894 if (idxGstRegIndex == 4)
4895 {
4896 /* no index */
4897 cShiftIndex = 0;
4898 idxGstRegIndex = UINT8_MAX;
4899 }
4900
4901 /* base */
4902 idxGstRegBase = (uSibAndRspOffset & X86_SIB_BASE_MASK) | (bRmEx & 0x8); /* bRmEx[bit 3] = REX.B */
4903 if (idxGstRegBase == 4)
4904 {
4905 /* pop [rsp] hack */
4906 i64EffAddr += uSibAndRspOffset >> 8; /* (this is why i64EffAddr must be 64-bit) */
4907 }
4908 else if ( (idxGstRegBase & X86_SIB_BASE_MASK) == 5
4909 && (bRmEx & X86_MODRM_MOD_MASK) == 0)
4910 {
4911 /* mod=0 and base=5 -> disp32, no base reg. */
4912 Assert(i64EffAddr == 0);
4913 i64EffAddr = (int32_t)u32Disp;
4914 idxGstRegBase = UINT8_MAX;
4915 }
4916 }
4917
4918 /*
4919 * If no registers are involved (SIB.B=5, SIB.X=4) repeat what we did at
4920 * the start of the function.
4921 */
4922 if (idxGstRegBase == UINT8_MAX && idxGstRegIndex == UINT8_MAX)
4923 {
4924 if (f64Bit)
4925 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint64_t)i64EffAddr);
4926 else
4927 iemNativeVarSetKindToConst(pReNative, idxVarRet, (uint32_t)i64EffAddr);
4928 return off;
4929 }
4930
4931 /*
4932 * Now emit code that calculates:
4933 * idxRegRet = (uint64_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
4934 * or if !f64Bit:
4935 * idxRegRet = (uint32_t)(i64EffAddr [+ idxGstRegBase] [+ (idxGstRegIndex << cShiftIndex)])
4936 */
4937 uint8_t const idxRegRet = iemNativeVarRegisterAcquire(pReNative, idxVarRet, &off);
4938 uint8_t idxRegBase = idxGstRegBase == UINT8_MAX ? UINT8_MAX
4939 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegBase),
4940 kIemNativeGstRegUse_ReadOnly);
4941 uint8_t idxRegIndex = idxGstRegIndex == UINT8_MAX ? UINT8_MAX
4942 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGstRegIndex),
4943 kIemNativeGstRegUse_ReadOnly);
4944
4945 /* If base is not given and there is no shifting, swap the registers to avoid code duplication. */
4946 if (idxRegBase == UINT8_MAX && cShiftIndex == 0)
4947 {
4948 idxRegBase = idxRegIndex;
4949 idxRegIndex = UINT8_MAX;
4950 }
4951
4952#ifdef RT_ARCH_AMD64
4953 uint8_t bFinalAdj;
4954 if (!f64Bit || (int32_t)i64EffAddr == i64EffAddr)
4955 bFinalAdj = 0; /* likely */
4956 else
4957 {
4958 /* pop [rsp] with a problematic disp32 value. Split out the
4959 RSP offset and add it separately afterwards (bFinalAdj). */
4960 /** @todo testcase: pop [rsp] with problematic disp32 (mod4). */
4961 Assert(idxGstRegBase == X86_GREG_xSP);
4962 Assert(((bRmEx >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK) == X86_MOD_MEM4);
4963 bFinalAdj = (uint8_t)(uSibAndRspOffset >> 8);
4964 Assert(bFinalAdj != 0);
4965 i64EffAddr -= bFinalAdj;
4966 Assert((int32_t)i64EffAddr == i64EffAddr);
4967 }
4968 uint32_t const u32EffAddr = (uint32_t)i64EffAddr;
4969//pReNative->pInstrBuf[off++] = 0xcc;
4970
4971 if (idxRegIndex == UINT8_MAX)
4972 {
4973 if (u32EffAddr == 0)
4974 {
4975 /* mov ret, base */
4976 if (f64Bit)
4977 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRet, idxRegBase);
4978 else
4979 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRet, idxRegBase);
4980 }
4981 else
4982 {
4983 /* lea ret, [base + disp32] */
4984 Assert(idxRegBase != X86_GREG_xSP /*SIB*/);
4985 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
4986 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8)
4987 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
4988 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
4989 | (f64Bit ? X86_OP_REX_W : 0);
4990 pbCodeBuf[off++] = 0x8d;
4991 uint8_t const bMod = (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
4992 if (idxRegBase != X86_GREG_x12 /*SIB*/)
4993 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, idxRegBase & 7);
4994 else
4995 {
4996 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
4997 pbCodeBuf[off++] = X86_SIB_MAKE(X86_GREG_x12 & 7, 4 /*no index*/, 0);
4998 }
4999 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
5000 if (bMod == X86_MOD_MEM4)
5001 {
5002 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
5003 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
5004 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
5005 }
5006 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5007 }
5008 }
5009 else
5010 {
5011 Assert(idxRegIndex != X86_GREG_xSP /*no-index*/);
5012 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
5013 if (idxRegBase == UINT8_MAX)
5014 {
5015 /* lea ret, [(index64 << cShiftIndex) + disp32] */
5016 if (f64Bit || idxRegRet >= 8 || idxRegIndex >= 8)
5017 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
5018 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
5019 | (f64Bit ? X86_OP_REX_W : 0);
5020 pbCodeBuf[off++] = 0x8d;
5021 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM0, idxRegRet & 7, 4 /*SIB*/);
5022 pbCodeBuf[off++] = X86_SIB_MAKE(5 /*nobase/bp*/, idxRegIndex & 7, cShiftIndex);
5023 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
5024 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
5025 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
5026 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
5027 }
5028 else
5029 {
5030 /* lea ret, [(index64 << cShiftIndex) + base64 (+ disp32)] */
5031 if (f64Bit || idxRegRet >= 8 || idxRegBase >= 8 || idxRegIndex >= 8)
5032 pbCodeBuf[off++] = (idxRegRet >= 8 ? X86_OP_REX_R : 0)
5033 | (idxRegBase >= 8 ? X86_OP_REX_B : 0)
5034 | (idxRegIndex >= 8 ? X86_OP_REX_X : 0)
5035 | (f64Bit ? X86_OP_REX_W : 0);
5036 pbCodeBuf[off++] = 0x8d;
5037 uint8_t const bMod = u32EffAddr == 0 && (idxRegBase & 7) != X86_GREG_xBP ? X86_MOD_MEM0
5038 : (int8_t)u32EffAddr == (int32_t)u32EffAddr ? X86_MOD_MEM1 : X86_MOD_MEM4;
5039 pbCodeBuf[off++] = X86_MODRM_MAKE(bMod, idxRegRet & 7, 4 /*SIB*/);
5040 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegBase & 7, idxRegIndex & 7, cShiftIndex);
5041 if (bMod != X86_MOD_MEM0)
5042 {
5043 pbCodeBuf[off++] = RT_BYTE1(u32EffAddr);
5044 if (bMod == X86_MOD_MEM4)
5045 {
5046 pbCodeBuf[off++] = RT_BYTE2(u32EffAddr);
5047 pbCodeBuf[off++] = RT_BYTE3(u32EffAddr);
5048 pbCodeBuf[off++] = RT_BYTE4(u32EffAddr);
5049 }
5050 }
5051 }
5052 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5053 }
5054
5055 if (!bFinalAdj)
5056 { /* likely */ }
5057 else
5058 {
5059 Assert(f64Bit);
5060 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRet, bFinalAdj);
5061 }
5062
5063#elif defined(RT_ARCH_ARM64)
5064 if (i64EffAddr == 0)
5065 {
5066 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5067 if (idxRegIndex == UINT8_MAX)
5068 pu32CodeBuf[off++] = Armv8A64MkInstrMov(idxRegRet, idxRegBase, f64Bit);
5069 else if (idxRegBase != UINT8_MAX)
5070 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegBase, idxRegIndex,
5071 f64Bit, false /*fSetFlags*/, cShiftIndex);
5072 else
5073 {
5074 Assert(cShiftIndex != 0); /* See base = index swap above when shift is 0 and we have no base reg. */
5075 pu32CodeBuf[off++] = Armv8A64MkInstrLslImm(idxRegRet, idxRegIndex, cShiftIndex, f64Bit);
5076 }
5077 }
5078 else
5079 {
5080 if (f64Bit)
5081 { /* likely */ }
5082 else
5083 i64EffAddr = (int32_t)i64EffAddr;
5084
5085 if (i64EffAddr < 4096 && i64EffAddr >= 0 && idxRegBase != UINT8_MAX)
5086 {
5087 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5088 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegRet, idxRegBase, i64EffAddr, f64Bit);
5089 }
5090 else if (i64EffAddr > -4096 && i64EffAddr < 0 && idxRegBase != UINT8_MAX)
5091 {
5092 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5093 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, idxRegRet, idxRegBase, (uint32_t)-i64EffAddr, f64Bit);
5094 }
5095 else
5096 {
5097 if (f64Bit)
5098 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, i64EffAddr);
5099 else
5100 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegRet, (uint32_t)i64EffAddr);
5101 if (idxRegBase != UINT8_MAX)
5102 {
5103 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5104 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegBase, f64Bit);
5105 }
5106 }
5107 if (idxRegIndex != UINT8_MAX)
5108 {
5109 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
5110 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(false /*fSub*/, idxRegRet, idxRegRet, idxRegIndex,
5111 f64Bit, false /*fSetFlags*/, cShiftIndex);
5112 }
5113 }
5114
5115#else
5116# error "port me"
5117#endif
5118
5119 if (idxRegIndex != UINT8_MAX)
5120 iemNativeRegFreeTmp(pReNative, idxRegIndex);
5121 if (idxRegBase != UINT8_MAX)
5122 iemNativeRegFreeTmp(pReNative, idxRegBase);
5123 iemNativeVarRegisterRelease(pReNative, idxVarRet);
5124 return off;
5125}
5126
5127
5128/*********************************************************************************************************************************
5129* Memory fetches and stores common *
5130*********************************************************************************************************************************/
5131
5132typedef enum IEMNATIVEMITMEMOP
5133{
5134 kIemNativeEmitMemOp_Store = 0,
5135 kIemNativeEmitMemOp_Fetch,
5136 kIemNativeEmitMemOp_Fetch_Zx_U16,
5137 kIemNativeEmitMemOp_Fetch_Zx_U32,
5138 kIemNativeEmitMemOp_Fetch_Zx_U64,
5139 kIemNativeEmitMemOp_Fetch_Sx_U16,
5140 kIemNativeEmitMemOp_Fetch_Sx_U32,
5141 kIemNativeEmitMemOp_Fetch_Sx_U64
5142} IEMNATIVEMITMEMOP;
5143
5144/** Emits code for IEM_MC_FETCH_MEM_U8/16/32/64 and IEM_MC_STORE_MEM_U8/16/32/64,
5145 * and IEM_MC_FETCH_MEM_FLAT_U8/16/32/64 and IEM_MC_STORE_MEM_FLAT_U8/16/32/64
5146 * (with iSegReg = UINT8_MAX). */
5147DECL_INLINE_THROW(uint32_t)
5148iemNativeEmitMemFetchStoreDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue, uint8_t iSegReg,
5149 uint8_t idxVarGCPtrMem, uint8_t cbMem, uint8_t fAlignMask, IEMNATIVEMITMEMOP enmOp,
5150 uintptr_t pfnFunction, uint8_t idxInstr, uint8_t offDisp = 0)
5151{
5152 /*
5153 * Assert sanity.
5154 */
5155 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
5156 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
5157 Assert( enmOp != kIemNativeEmitMemOp_Store
5158 || pVarValue->enmKind == kIemNativeVarKind_Immediate
5159 || pVarValue->enmKind == kIemNativeVarKind_Stack);
5160 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
5161 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
5162 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
5163 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
5164 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
5165 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
5166#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5167 Assert( cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8
5168 || cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U));
5169#else
5170 Assert(cbMem == 1 || cbMem == 2 || cbMem == 4 || cbMem == 8);
5171#endif
5172 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
5173#ifdef VBOX_STRICT
5174 if (iSegReg == UINT8_MAX)
5175 {
5176 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
5177 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
5178 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
5179 switch (cbMem)
5180 {
5181 case 1:
5182 Assert( pfnFunction
5183 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU8
5184 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
5185 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
5186 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
5187 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8
5188 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16
5189 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32
5190 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64
5191 : UINT64_C(0xc000b000a0009000) ));
5192 break;
5193 case 2:
5194 Assert( pfnFunction
5195 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU16
5196 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
5197 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
5198 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16
5199 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32
5200 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64
5201 : UINT64_C(0xc000b000a0009000) ));
5202 break;
5203 case 4:
5204 Assert( pfnFunction
5205 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU32
5206 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
5207 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32
5208 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64
5209 : UINT64_C(0xc000b000a0009000) ));
5210 break;
5211 case 8:
5212 Assert( pfnFunction
5213 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemFlatStoreDataU64
5214 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFlatFetchDataU64
5215 : UINT64_C(0xc000b000a0009000) ));
5216 break;
5217#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5218 case sizeof(RTUINT128U):
5219 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
5220 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128
5221 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse
5222 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc))
5223 || ( enmOp == kIemNativeEmitMemOp_Store
5224 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse
5225 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU128NoAc)));
5226 break;
5227 case sizeof(RTUINT256U):
5228 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
5229 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc
5230 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx))
5231 || ( enmOp == kIemNativeEmitMemOp_Store
5232 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256NoAc
5233 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx)));
5234 break;
5235#endif
5236 }
5237 }
5238 else
5239 {
5240 Assert(iSegReg < 6);
5241 switch (cbMem)
5242 {
5243 case 1:
5244 Assert( pfnFunction
5245 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU8
5246 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU8
5247 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8
5248 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8
5249 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8
5250 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U16 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16
5251 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32
5252 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64
5253 : UINT64_C(0xc000b000a0009000) ));
5254 break;
5255 case 2:
5256 Assert( pfnFunction
5257 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU16
5258 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU16
5259 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16
5260 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16
5261 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U32 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32
5262 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64
5263 : UINT64_C(0xc000b000a0009000) ));
5264 break;
5265 case 4:
5266 Assert( pfnFunction
5267 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU32
5268 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU32
5269 : enmOp == kIemNativeEmitMemOp_Fetch_Zx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32
5270 : enmOp == kIemNativeEmitMemOp_Fetch_Sx_U64 ? (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64
5271 : UINT64_C(0xc000b000a0009000) ));
5272 break;
5273 case 8:
5274 Assert( pfnFunction
5275 == ( enmOp == kIemNativeEmitMemOp_Store ? (uintptr_t)iemNativeHlpMemStoreDataU64
5276 : enmOp == kIemNativeEmitMemOp_Fetch ? (uintptr_t)iemNativeHlpMemFetchDataU64
5277 : UINT64_C(0xc000b000a0009000) ));
5278 break;
5279#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5280 case sizeof(RTUINT128U):
5281 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
5282 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128
5283 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse
5284 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU128NoAc))
5285 || ( enmOp == kIemNativeEmitMemOp_Store
5286 && ( pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse
5287 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU128NoAc)));
5288 break;
5289 case sizeof(RTUINT256U):
5290 Assert( ( enmOp == kIemNativeEmitMemOp_Fetch
5291 && ( pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256NoAc
5292 || pfnFunction == (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx))
5293 || ( enmOp == kIemNativeEmitMemOp_Store
5294 && ( pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256NoAc
5295 || pfnFunction == (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx)));
5296 break;
5297#endif
5298 }
5299 }
5300#endif
5301
5302#ifdef VBOX_STRICT
5303 /*
5304 * Check that the fExec flags we've got make sense.
5305 */
5306 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
5307#endif
5308
5309 /*
5310 * To keep things simple we have to commit any pending writes first as we
5311 * may end up making calls.
5312 */
5313 /** @todo we could postpone this till we make the call and reload the
5314 * registers after returning from the call. Not sure if that's sensible or
5315 * not, though. */
5316#ifndef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5317 off = iemNativeRegFlushPendingWrites(pReNative, off);
5318#else
5319 /* The program counter is treated differently for now. */
5320 off = iemNativeRegFlushPendingWrites(pReNative, off, RT_BIT_64(kIemNativeGstReg_Pc));
5321#endif
5322
5323#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
5324 /*
5325 * Move/spill/flush stuff out of call-volatile registers.
5326 * This is the easy way out. We could contain this to the tlb-miss branch
5327 * by saving and restoring active stuff here.
5328 */
5329 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
5330#endif
5331
5332 /*
5333 * Define labels and allocate the result register (trying for the return
5334 * register if we can).
5335 */
5336 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
5337#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5338 uint8_t idxRegValueFetch = UINT8_MAX;
5339
5340 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
5341 idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
5342 : iemNativeVarSimdRegisterAcquire(pReNative, idxVarValue, &off);
5343 else
5344 idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
5345 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
5346 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
5347 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
5348#else
5349 uint8_t const idxRegValueFetch = enmOp == kIemNativeEmitMemOp_Store ? UINT8_MAX
5350 : !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
5351 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarValue, IEMNATIVE_CALL_RET_GREG, &off)
5352 : iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off);
5353#endif
5354 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem, offDisp);
5355
5356#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5357 uint8_t idxRegValueStore = UINT8_MAX;
5358
5359 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
5360 idxRegValueStore = !TlbState.fSkip
5361 && enmOp == kIemNativeEmitMemOp_Store
5362 && pVarValue->enmKind != kIemNativeVarKind_Immediate
5363 ? iemNativeVarSimdRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
5364 : UINT8_MAX;
5365 else
5366 idxRegValueStore = !TlbState.fSkip
5367 && enmOp == kIemNativeEmitMemOp_Store
5368 && pVarValue->enmKind != kIemNativeVarKind_Immediate
5369 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
5370 : UINT8_MAX;
5371
5372#else
5373 uint8_t const idxRegValueStore = !TlbState.fSkip
5374 && enmOp == kIemNativeEmitMemOp_Store
5375 && pVarValue->enmKind != kIemNativeVarKind_Immediate
5376 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/)
5377 : UINT8_MAX;
5378#endif
5379 uint32_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
5380 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
5381 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
5382 : UINT32_MAX;
5383
5384 /*
5385 * Jump to the TLB lookup code.
5386 */
5387 if (!TlbState.fSkip)
5388 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
5389
5390 /*
5391 * TlbMiss:
5392 *
5393 * Call helper to do the fetching.
5394 * We flush all guest register shadow copies here.
5395 */
5396 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
5397
5398#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
5399 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
5400#else
5401 RT_NOREF(idxInstr);
5402#endif
5403
5404#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5405 if (pReNative->Core.offPc)
5406 {
5407 /*
5408 * Update the program counter but restore it at the end of the TlbMiss branch.
5409 * This should allow delaying more program counter updates for the TlbLookup and hit paths
5410 * which are hopefully much more frequent, reducing the amount of memory accesses.
5411 */
5412 /* Allocate a temporary PC register. */
5413 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5414
5415 /* Perform the addition and store the result. */
5416 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
5417 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5418
5419 /* Free and flush the PC register. */
5420 iemNativeRegFreeTmp(pReNative, idxPcReg);
5421 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
5422 }
5423#endif
5424
5425#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
5426 /* Save variables in volatile registers. */
5427 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
5428 | (idxRegMemResult != UINT8_MAX ? RT_BIT_32(idxRegMemResult) : 0)
5429 | (idxRegValueFetch != UINT8_MAX ? RT_BIT_32(idxRegValueFetch) : 0);
5430 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
5431#endif
5432
5433 /* IEMNATIVE_CALL_ARG2/3_GREG = uValue (idxVarValue) - if store */
5434 uint32_t fVolGregMask = IEMNATIVE_CALL_VOLATILE_GREG_MASK;
5435#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5436 if (cbMem == sizeof(RTUINT128U) || cbMem == sizeof(RTUINT256U))
5437 {
5438 /*
5439 * For SIMD based variables we pass the reference on the stack for both fetches and stores.
5440 *
5441 * @note There was a register variable assigned to the variable for the TlbLookup case above
5442 * which must not be freed or the value loaded into the register will not be synced into the register
5443 * further down the road because the variable doesn't know it had a variable assigned.
5444 *
5445 * @note For loads it is not required to sync what is in the assigned register with the stack slot
5446 * as it will be overwritten anyway.
5447 */
5448 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
5449 off = iemNativeEmitLoadArgGregWithSimdVarAddrForMemAccess(pReNative, off, idxRegArgValue, idxVarValue,
5450 enmOp == kIemNativeEmitMemOp_Store /*fSyncRegWithStack*/);
5451 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
5452 }
5453 else
5454#endif
5455 if (enmOp == kIemNativeEmitMemOp_Store)
5456 {
5457 uint8_t const idxRegArgValue = iSegReg == UINT8_MAX ? IEMNATIVE_CALL_ARG2_GREG : IEMNATIVE_CALL_ARG3_GREG;
5458 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, idxRegArgValue, idxVarValue, 0 /*cbAppend*/,
5459#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
5460 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
5461#else
5462 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
5463 fVolGregMask &= ~RT_BIT_32(idxRegArgValue);
5464#endif
5465 }
5466
5467 /* IEMNATIVE_CALL_ARG1_GREG = GCPtrMem */
5468 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarGCPtrMem, offDisp /*cbAppend*/,
5469#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
5470 fVolGregMask);
5471#else
5472 fVolGregMask, true /*fSpilledVarsInvolatileRegs*/);
5473#endif
5474
5475 if (iSegReg != UINT8_MAX)
5476 {
5477 /* IEMNATIVE_CALL_ARG2_GREG = iSegReg */
5478 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
5479 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, iSegReg);
5480 }
5481
5482 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
5483 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
5484
5485 /* Done setting up parameters, make the call. */
5486 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
5487
5488 /*
5489 * Put the result in the right register if this is a fetch.
5490 */
5491 if (enmOp != kIemNativeEmitMemOp_Store)
5492 {
5493#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5494 if ( cbMem == sizeof(RTUINT128U)
5495 || cbMem == sizeof(RTUINT256U))
5496 {
5497 Assert(enmOp == kIemNativeEmitMemOp_Fetch);
5498
5499 /* Sync the value on the stack with the host register assigned to the variable. */
5500 off = iemNativeEmitSimdVarSyncStackToRegister(pReNative, off, idxVarValue);
5501 }
5502 else
5503#endif
5504 {
5505 Assert(idxRegValueFetch == pVarValue->idxReg);
5506 if (idxRegValueFetch != IEMNATIVE_CALL_RET_GREG)
5507 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegValueFetch, IEMNATIVE_CALL_RET_GREG);
5508 }
5509 }
5510
5511#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
5512 /* Restore variables and guest shadow registers to volatile registers. */
5513 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
5514 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
5515#endif
5516
5517#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5518 if (pReNative->Core.offPc)
5519 {
5520 /*
5521 * Time to restore the program counter to its original value.
5522 */
5523 /* Allocate a temporary PC register. */
5524 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5525
5526 /* Restore the original value. */
5527 off = iemNativeEmitSubGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
5528 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5529
5530 /* Free and flush the PC register. */
5531 iemNativeRegFreeTmp(pReNative, idxPcReg);
5532 iemNativeRegFlushGuestShadowsByHostMask(pReNative, RT_BIT_32(idxPcReg));
5533 }
5534#endif
5535
5536#ifdef IEMNATIVE_WITH_TLB_LOOKUP
5537 if (!TlbState.fSkip)
5538 {
5539 /* end of TlbMiss - Jump to the done label. */
5540 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
5541 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
5542
5543 /*
5544 * TlbLookup:
5545 */
5546 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask,
5547 enmOp == kIemNativeEmitMemOp_Store ? IEM_ACCESS_TYPE_WRITE : IEM_ACCESS_TYPE_READ,
5548 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult, offDisp);
5549
5550 /*
5551 * Emit code to do the actual storing / fetching.
5552 */
5553 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
5554# ifdef VBOX_WITH_STATISTICS
5555 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
5556 enmOp == kIemNativeEmitMemOp_Store
5557 ? RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForFetch)
5558 : RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStore));
5559# endif
5560 switch (enmOp)
5561 {
5562 case kIemNativeEmitMemOp_Store:
5563 if (pVarValue->enmKind != kIemNativeVarKind_Immediate)
5564 {
5565 switch (cbMem)
5566 {
5567 case 1:
5568 off = iemNativeEmitStoreGpr8ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
5569 break;
5570 case 2:
5571 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
5572 break;
5573 case 4:
5574 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
5575 break;
5576 case 8:
5577 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
5578 break;
5579#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5580 case sizeof(RTUINT128U):
5581 off = iemNativeEmitStoreVecRegByGprU128Ex(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
5582 break;
5583 case sizeof(RTUINT256U):
5584 off = iemNativeEmitStoreVecRegByGprU256Ex(pCodeBuf, off, idxRegValueStore, idxRegMemResult);
5585 break;
5586#endif
5587 default:
5588 AssertFailed();
5589 }
5590 }
5591 else
5592 {
5593 switch (cbMem)
5594 {
5595 case 1:
5596 off = iemNativeEmitStoreImm8ByGprEx(pCodeBuf, off, (uint8_t)pVarValue->u.uValue,
5597 idxRegMemResult, TlbState.idxReg1);
5598 break;
5599 case 2:
5600 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
5601 idxRegMemResult, TlbState.idxReg1);
5602 break;
5603 case 4:
5604 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
5605 idxRegMemResult, TlbState.idxReg1);
5606 break;
5607 case 8:
5608 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue,
5609 idxRegMemResult, TlbState.idxReg1);
5610 break;
5611 default:
5612 AssertFailed();
5613 }
5614 }
5615 break;
5616
5617 case kIemNativeEmitMemOp_Fetch:
5618 case kIemNativeEmitMemOp_Fetch_Zx_U16:
5619 case kIemNativeEmitMemOp_Fetch_Zx_U32:
5620 case kIemNativeEmitMemOp_Fetch_Zx_U64:
5621 switch (cbMem)
5622 {
5623 case 1:
5624 off = iemNativeEmitLoadGprByGprU8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5625 break;
5626 case 2:
5627 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5628 break;
5629 case 4:
5630 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5631 break;
5632 case 8:
5633 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5634 break;
5635#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5636 case sizeof(RTUINT128U):
5637 /*
5638 * No need to sync back the register with the stack, this is done by the generic variable handling
5639 * code if there is a register assigned to a variable and the stack must be accessed.
5640 */
5641 off = iemNativeEmitLoadVecRegByGprU128Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5642 break;
5643 case sizeof(RTUINT256U):
5644 /*
5645 * No need to sync back the register with the stack, this is done by the generic variable handling
5646 * code if there is a register assigned to a variable and the stack must be accessed.
5647 */
5648 off = iemNativeEmitLoadVecRegByGprU256Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5649 break;
5650#endif
5651 default:
5652 AssertFailed();
5653 }
5654 break;
5655
5656 case kIemNativeEmitMemOp_Fetch_Sx_U16:
5657 Assert(cbMem == 1);
5658 off = iemNativeEmitLoadGprByGprU16SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5659 break;
5660
5661 case kIemNativeEmitMemOp_Fetch_Sx_U32:
5662 Assert(cbMem == 1 || cbMem == 2);
5663 if (cbMem == 1)
5664 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5665 else
5666 off = iemNativeEmitLoadGprByGprU32SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5667 break;
5668
5669 case kIemNativeEmitMemOp_Fetch_Sx_U64:
5670 switch (cbMem)
5671 {
5672 case 1:
5673 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS8Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5674 break;
5675 case 2:
5676 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS16Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5677 break;
5678 case 4:
5679 off = iemNativeEmitLoadGprByGprU64SignExtendedFromS32Ex(pCodeBuf, off, idxRegValueFetch, idxRegMemResult);
5680 break;
5681 default:
5682 AssertFailed();
5683 }
5684 break;
5685
5686 default:
5687 AssertFailed();
5688 }
5689
5690 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
5691
5692 /*
5693 * TlbDone:
5694 */
5695 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
5696
5697 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
5698
5699# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
5700 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
5701 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
5702# endif
5703 }
5704#else
5705 RT_NOREF(fAlignMask, idxLabelTlbMiss);
5706#endif
5707
5708 if (idxRegValueFetch != UINT8_MAX || idxRegValueStore != UINT8_MAX)
5709 iemNativeVarRegisterRelease(pReNative, idxVarValue);
5710 return off;
5711}
5712
5713
5714
5715/*********************************************************************************************************************************
5716* Memory fetches (IEM_MEM_FETCH_XXX). *
5717*********************************************************************************************************************************/
5718
5719/* 8-bit segmented: */
5720#define IEM_MC_FETCH_MEM_U8(a_u8Dst, a_iSeg, a_GCPtrMem) \
5721 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, a_iSeg, a_GCPtrMem, \
5722 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
5723 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
5724
5725#define IEM_MC_FETCH_MEM_U8_ZX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
5726 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
5727 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
5728 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
5729
5730#define IEM_MC_FETCH_MEM_U8_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
5731 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
5732 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
5733 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
5734
5735#define IEM_MC_FETCH_MEM_U8_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5736 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5737 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
5738 (uintptr_t)iemNativeHlpMemFetchDataU8, pCallEntry->idxInstr)
5739
5740#define IEM_MC_FETCH_MEM_U8_SX_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
5741 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
5742 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
5743 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U16, pCallEntry->idxInstr)
5744
5745#define IEM_MC_FETCH_MEM_U8_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
5746 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
5747 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
5748 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U32, pCallEntry->idxInstr)
5749
5750#define IEM_MC_FETCH_MEM_U8_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5751 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5752 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
5753 (uintptr_t)iemNativeHlpMemFetchDataU8_Sx_U64, pCallEntry->idxInstr)
5754
5755/* 16-bit segmented: */
5756#define IEM_MC_FETCH_MEM_U16(a_u16Dst, a_iSeg, a_GCPtrMem) \
5757 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
5758 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
5759 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
5760
5761#define IEM_MC_FETCH_MEM_U16_DISP(a_u16Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
5762 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, a_iSeg, a_GCPtrMem, \
5763 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
5764 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr, a_offDisp)
5765
5766#define IEM_MC_FETCH_MEM_U16_ZX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
5767 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
5768 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
5769 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
5770
5771#define IEM_MC_FETCH_MEM_U16_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5772 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5773 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
5774 (uintptr_t)iemNativeHlpMemFetchDataU16, pCallEntry->idxInstr)
5775
5776#define IEM_MC_FETCH_MEM_U16_SX_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
5777 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
5778 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
5779 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
5780
5781#define IEM_MC_FETCH_MEM_U16_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5782 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5783 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
5784 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U64, pCallEntry->idxInstr)
5785
5786
5787/* 32-bit segmented: */
5788#define IEM_MC_FETCH_MEM_U32(a_u32Dst, a_iSeg, a_GCPtrMem) \
5789 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
5790 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
5791 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
5792
5793#define IEM_MC_FETCH_MEM_U32_DISP(a_u32Dst, a_iSeg, a_GCPtrMem, a_offDisp) \
5794 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, a_iSeg, a_GCPtrMem, \
5795 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
5796 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr, a_offDisp)
5797
5798#define IEM_MC_FETCH_MEM_U32_ZX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5799 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5800 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
5801 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
5802
5803#define IEM_MC_FETCH_MEM_U32_SX_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5804 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5805 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
5806 (uintptr_t)iemNativeHlpMemFetchDataU32_Sx_U64, pCallEntry->idxInstr)
5807
5808#define IEM_MC_FETCH_MEM_I16(a_i16Dst, a_iSeg, a_GCPtrMem) \
5809 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i16Dst, a_iSeg, a_GCPtrMem, \
5810 sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch, \
5811 (uintptr_t)iemNativeHlpMemFetchDataU16_Sx_U32, pCallEntry->idxInstr)
5812
5813#define IEM_MC_FETCH_MEM_I32(a_i32Dst, a_iSeg, a_GCPtrMem) \
5814 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i32Dst, a_iSeg, a_GCPtrMem, \
5815 sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, \
5816 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
5817
5818#define IEM_MC_FETCH_MEM_I64(a_i64Dst, a_iSeg, a_GCPtrMem) \
5819 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i64Dst, a_iSeg, a_GCPtrMem, \
5820 sizeof(int64_t), sizeof(int64_t) - 1, kIemNativeEmitMemOp_Fetch, \
5821 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
5822
5823AssertCompileSize(RTFLOAT32U, sizeof(uint32_t));
5824#define IEM_MC_FETCH_MEM_R32(a_r32Dst, a_iSeg, a_GCPtrMem) \
5825 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r32Dst, a_iSeg, a_GCPtrMem, \
5826 sizeof(RTFLOAT32U), sizeof(RTFLOAT32U) - 1, kIemNativeEmitMemOp_Fetch, \
5827 (uintptr_t)iemNativeHlpMemFetchDataU32, pCallEntry->idxInstr)
5828
5829
5830/* 64-bit segmented: */
5831#define IEM_MC_FETCH_MEM_U64(a_u64Dst, a_iSeg, a_GCPtrMem) \
5832 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, a_iSeg, a_GCPtrMem, \
5833 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
5834 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
5835
5836AssertCompileSize(RTFLOAT64U, sizeof(uint64_t));
5837#define IEM_MC_FETCH_MEM_R64(a_r64Dst, a_iSeg, a_GCPtrMem) \
5838 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r64Dst, a_iSeg, a_GCPtrMem, \
5839 sizeof(RTFLOAT64U), sizeof(RTFLOAT64U) - 1, kIemNativeEmitMemOp_Fetch, \
5840 (uintptr_t)iemNativeHlpMemFetchDataU64, pCallEntry->idxInstr)
5841
5842
5843/* 8-bit flat: */
5844#define IEM_MC_FETCH_MEM_FLAT_U8(a_u8Dst, a_GCPtrMem) \
5845 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Dst, UINT8_MAX, a_GCPtrMem, \
5846 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch, \
5847 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
5848
5849#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U16(a_u16Dst, a_GCPtrMem) \
5850 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
5851 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U16, \
5852 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
5853
5854#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U32(a_u32Dst, a_GCPtrMem) \
5855 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
5856 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U32, \
5857 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
5858
5859#define IEM_MC_FETCH_MEM_FLAT_U8_ZX_U64(a_u64Dst, a_GCPtrMem) \
5860 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
5861 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Zx_U64, \
5862 (uintptr_t)iemNativeHlpMemFlatFetchDataU8, pCallEntry->idxInstr)
5863
5864#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U16(a_u16Dst, a_GCPtrMem) \
5865 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
5866 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U16, \
5867 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U16, pCallEntry->idxInstr)
5868
5869#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U32(a_u32Dst, a_GCPtrMem) \
5870 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
5871 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U32, \
5872 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U32, pCallEntry->idxInstr)
5873
5874#define IEM_MC_FETCH_MEM_FLAT_U8_SX_U64(a_u64Dst, a_GCPtrMem) \
5875 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
5876 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Fetch_Sx_U64, \
5877 (uintptr_t)iemNativeHlpMemFlatFetchDataU8_Sx_U64, pCallEntry->idxInstr)
5878
5879
5880/* 16-bit flat: */
5881#define IEM_MC_FETCH_MEM_FLAT_U16(a_u16Dst, a_GCPtrMem) \
5882 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
5883 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
5884 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
5885
5886#define IEM_MC_FETCH_MEM_FLAT_U16_DISP(a_u16Dst, a_GCPtrMem, a_offDisp) \
5887 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Dst, UINT8_MAX, a_GCPtrMem, \
5888 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch, \
5889 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr, a_offDisp)
5890
5891#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U32(a_u32Dst, a_GCPtrMem) \
5892 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
5893 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U32, \
5894 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
5895
5896#define IEM_MC_FETCH_MEM_FLAT_U16_ZX_U64(a_u64Dst, a_GCPtrMem) \
5897 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
5898 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
5899 (uintptr_t)iemNativeHlpMemFlatFetchDataU16, pCallEntry->idxInstr)
5900
5901#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U32(a_u32Dst, a_GCPtrMem) \
5902 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
5903 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U32, \
5904 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
5905
5906#define IEM_MC_FETCH_MEM_FLAT_U16_SX_U64(a_u64Dst, a_GCPtrMem) \
5907 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
5908 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
5909 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U64, pCallEntry->idxInstr)
5910
5911/* 32-bit flat: */
5912#define IEM_MC_FETCH_MEM_FLAT_U32(a_u32Dst, a_GCPtrMem) \
5913 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
5914 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
5915 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
5916
5917#define IEM_MC_FETCH_MEM_FLAT_U32_DISP(a_u32Dst, a_GCPtrMem, a_offDisp) \
5918 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Dst, UINT8_MAX, a_GCPtrMem, \
5919 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch, \
5920 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr, a_offDisp)
5921
5922#define IEM_MC_FETCH_MEM_FLAT_U32_ZX_U64(a_u64Dst, a_GCPtrMem) \
5923 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
5924 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Zx_U64, \
5925 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
5926
5927#define IEM_MC_FETCH_MEM_FLAT_U32_SX_U64(a_u64Dst, a_GCPtrMem) \
5928 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
5929 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Fetch_Sx_U64, \
5930 (uintptr_t)iemNativeHlpMemFlatFetchDataU32_Sx_U64, pCallEntry->idxInstr)
5931
5932#define IEM_MC_FETCH_MEM_FLAT_I16(a_i32Dst, a_GCPtrMem) \
5933 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i16Dst, UINT8_MAX, a_GCPtrMem, \
5934 sizeof(int16_t), sizeof(int16_t) - 1, kIemNativeEmitMemOp_Fetch, \
5935 (uintptr_t)iemNativeHlpMemFlatFetchDataU16_Sx_U32, pCallEntry->idxInstr)
5936
5937#define IEM_MC_FETCH_MEM_FLAT_I32(a_i32Dst, a_GCPtrMem) \
5938 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i32Dst, UINT8_MAX, a_GCPtrMem, \
5939 sizeof(int32_t), sizeof(int32_t) - 1, kIemNativeEmitMemOp_Fetch, \
5940 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
5941
5942#define IEM_MC_FETCH_MEM_FLAT_I64(a_i64Dst, a_GCPtrMem) \
5943 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_i64Dst, UINT8_MAX, a_GCPtrMem, \
5944 sizeof(int64_t), sizeof(int64_t) - 1, kIemNativeEmitMemOp_Fetch, \
5945 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
5946
5947#define IEM_MC_FETCH_MEM_FLAT_R32(a_r32Dst, a_GCPtrMem) \
5948 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r32Dst, UINT8_MAX, a_GCPtrMem, \
5949 sizeof(RTFLOAT32U), sizeof(RTFLOAT32U) - 1, kIemNativeEmitMemOp_Fetch, \
5950 (uintptr_t)iemNativeHlpMemFlatFetchDataU32, pCallEntry->idxInstr)
5951
5952
5953/* 64-bit flat: */
5954#define IEM_MC_FETCH_MEM_FLAT_U64(a_u64Dst, a_GCPtrMem) \
5955 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Dst, UINT8_MAX, a_GCPtrMem, \
5956 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Fetch, \
5957 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
5958
5959#define IEM_MC_FETCH_MEM_FLAT_R64(a_r64Dst, a_GCPtrMem) \
5960 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_r64Dst, UINT8_MAX, a_GCPtrMem, \
5961 sizeof(RTFLOAT64U), sizeof(RTFLOAT64U) - 1, kIemNativeEmitMemOp_Fetch, \
5962 (uintptr_t)iemNativeHlpMemFlatFetchDataU64, pCallEntry->idxInstr)
5963
5964#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5965/* 128-bit segmented: */
5966#define IEM_MC_FETCH_MEM_U128(a_u128Dst, a_iSeg, a_GCPtrMem) \
5967 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
5968 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
5969 (uintptr_t)iemNativeHlpMemFetchDataU128, pCallEntry->idxInstr)
5970
5971#define IEM_MC_FETCH_MEM_U128_ALIGN_SSE(a_u128Dst, a_iSeg, a_GCPtrMem) \
5972 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
5973 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
5974 (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse, pCallEntry->idxInstr)
5975
5976AssertCompileSize(X86XMMREG, sizeof(RTUINT128U));
5977#define IEM_MC_FETCH_MEM_XMM_ALIGN_SSE(a_uXmmDst, a_iSeg, a_GCPtrMem) \
5978 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uXmmDst, a_iSeg, a_GCPtrMem, \
5979 sizeof(X86XMMREG), sizeof(X86XMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
5980 (uintptr_t)iemNativeHlpMemFetchDataU128AlignedSse, pCallEntry->idxInstr)
5981
5982#define IEM_MC_FETCH_MEM_U128_NO_AC(a_u128Dst, a_iSeg, a_GCPtrMem) \
5983 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, a_iSeg, a_GCPtrMem, \
5984 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
5985 (uintptr_t)iemNativeHlpMemFetchDataU128NoAc, pCallEntry->idxInstr)
5986
5987/* 128-bit flat: */
5988#define IEM_MC_FETCH_MEM_FLAT_U128(a_u128Dst, a_GCPtrMem) \
5989 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, \
5990 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
5991 (uintptr_t)iemNativeHlpMemFlatFetchDataU128, pCallEntry->idxInstr)
5992
5993#define IEM_MC_FETCH_MEM_FLAT_U128_ALIGN_SSE(a_u128Dst, a_GCPtrMem) \
5994 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, \
5995 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
5996 (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse, pCallEntry->idxInstr)
5997
5998#define IEM_MC_FETCH_MEM_FLAT_XMM_ALIGN_SSE(a_uXmmDst, a_GCPtrMem) \
5999 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_uXmmDst, UINT8_MAX, a_GCPtrMem, \
6000 sizeof(X86XMMREG), sizeof(X86XMMREG) - 1, kIemNativeEmitMemOp_Fetch, \
6001 (uintptr_t)iemNativeHlpMemFlatFetchDataU128AlignedSse, pCallEntry->idxInstr)
6002
6003#define IEM_MC_FETCH_MEM_FLAT_U128_NO_AC(a_u128Dst, a_GCPtrMem) \
6004 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Dst, UINT8_MAX, a_GCPtrMem, \
6005 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Fetch, \
6006 (uintptr_t)iemNativeHlpMemFlatFetchDataU128NoAc, pCallEntry->idxInstr)
6007
6008/* 256-bit segmented: */
6009#define IEM_MC_FETCH_MEM_U256(a_u256Dst, a_iSeg, a_GCPtrMem) \
6010 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
6011 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
6012 (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
6013
6014#define IEM_MC_FETCH_MEM_U256_NO_AC(a_u256Dst, a_iSeg, a_GCPtrMem) \
6015 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
6016 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
6017 (uintptr_t)iemNativeHlpMemFetchDataU256NoAc, pCallEntry->idxInstr)
6018
6019#define IEM_MC_FETCH_MEM_U256_ALIGN_AVX(a_u256Dst, a_iSeg, a_GCPtrMem) \
6020 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, a_iSeg, a_GCPtrMem, \
6021 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
6022 (uintptr_t)iemNativeHlpMemFetchDataU256AlignedAvx, pCallEntry->idxInstr)
6023
6024
6025/* 256-bit flat: */
6026#define IEM_MC_FETCH_MEM_FLAT_U256(a_u256Dst, a_GCPtrMem) \
6027 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, \
6028 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
6029 (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
6030
6031#define IEM_MC_FETCH_MEM_FLAT_U256_NO_AC(a_u256Dst, a_GCPtrMem) \
6032 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, \
6033 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
6034 (uintptr_t)iemNativeHlpMemFlatFetchDataU256NoAc, pCallEntry->idxInstr)
6035
6036#define IEM_MC_FETCH_MEM_FLAT_U256_ALIGN_AVX(a_u256Dst, a_GCPtrMem) \
6037 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Dst, UINT8_MAX, a_GCPtrMem, \
6038 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Fetch, \
6039 (uintptr_t)iemNativeHlpMemFlatFetchDataU256AlignedAvx, pCallEntry->idxInstr)
6040#endif
6041
6042
6043/*********************************************************************************************************************************
6044* Memory stores (IEM_MEM_STORE_XXX). *
6045*********************************************************************************************************************************/
6046
6047#define IEM_MC_STORE_MEM_U8(a_iSeg, a_GCPtrMem, a_u8Value) \
6048 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, a_iSeg, a_GCPtrMem, \
6049 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
6050 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
6051
6052#define IEM_MC_STORE_MEM_U16(a_iSeg, a_GCPtrMem, a_u16Value) \
6053 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, a_iSeg, a_GCPtrMem, \
6054 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
6055 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
6056
6057#define IEM_MC_STORE_MEM_U32(a_iSeg, a_GCPtrMem, a_u32Value) \
6058 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, a_iSeg, a_GCPtrMem, \
6059 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
6060 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
6061
6062#define IEM_MC_STORE_MEM_U64(a_iSeg, a_GCPtrMem, a_u64Value) \
6063 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, a_iSeg, a_GCPtrMem, \
6064 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
6065 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
6066
6067
6068#define IEM_MC_STORE_MEM_FLAT_U8(a_GCPtrMem, a_u8Value) \
6069 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u8Value, UINT8_MAX, a_GCPtrMem, \
6070 sizeof(uint8_t), 0 /*fAlignMask*/, kIemNativeEmitMemOp_Store, \
6071 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
6072
6073#define IEM_MC_STORE_MEM_FLAT_U16(a_GCPtrMem, a_u16Value) \
6074 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u16Value, UINT8_MAX, a_GCPtrMem, \
6075 sizeof(uint16_t), sizeof(uint16_t) - 1, kIemNativeEmitMemOp_Store, \
6076 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
6077
6078#define IEM_MC_STORE_MEM_FLAT_U32(a_GCPtrMem, a_u32Value) \
6079 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u32Value, UINT8_MAX, a_GCPtrMem, \
6080 sizeof(uint32_t), sizeof(uint32_t) - 1, kIemNativeEmitMemOp_Store, \
6081 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
6082
6083#define IEM_MC_STORE_MEM_FLAT_U64(a_GCPtrMem, a_u64Value) \
6084 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u64Value, UINT8_MAX, a_GCPtrMem, \
6085 sizeof(uint64_t), sizeof(uint64_t) - 1, kIemNativeEmitMemOp_Store, \
6086 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
6087
6088
6089#define IEM_MC_STORE_MEM_U8_CONST(a_iSeg, a_GCPtrMem, a_u8ConstValue) \
6090 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
6091 (uintptr_t)iemNativeHlpMemStoreDataU8, pCallEntry->idxInstr)
6092
6093#define IEM_MC_STORE_MEM_U16_CONST(a_iSeg, a_GCPtrMem, a_u16ConstValue) \
6094 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
6095 (uintptr_t)iemNativeHlpMemStoreDataU16, pCallEntry->idxInstr)
6096
6097#define IEM_MC_STORE_MEM_U32_CONST(a_iSeg, a_GCPtrMem, a_u32ConstValue) \
6098 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
6099 (uintptr_t)iemNativeHlpMemStoreDataU32, pCallEntry->idxInstr)
6100
6101#define IEM_MC_STORE_MEM_U64_CONST(a_iSeg, a_GCPtrMem, a_u64ConstValue) \
6102 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
6103 (uintptr_t)iemNativeHlpMemStoreDataU64, pCallEntry->idxInstr)
6104
6105
6106#define IEM_MC_STORE_MEM_FLAT_U8_CONST(a_GCPtrMem, a_u8ConstValue) \
6107 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u8ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
6108 (uintptr_t)iemNativeHlpMemFlatStoreDataU8, pCallEntry->idxInstr)
6109
6110#define IEM_MC_STORE_MEM_FLAT_U16_CONST(a_GCPtrMem, a_u16ConstValue) \
6111 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u16ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
6112 (uintptr_t)iemNativeHlpMemFlatStoreDataU16, pCallEntry->idxInstr)
6113
6114#define IEM_MC_STORE_MEM_FLAT_U32_CONST(a_GCPtrMem, a_u32ConstValue) \
6115 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u32ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
6116 (uintptr_t)iemNativeHlpMemFlatStoreDataU32, pCallEntry->idxInstr)
6117
6118#define IEM_MC_STORE_MEM_FLAT_U64_CONST(a_GCPtrMem, a_u64ConstValue) \
6119 off = iemNativeEmitMemStoreConstDataCommon(pReNative, off, a_u64ConstValue, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
6120 (uintptr_t)iemNativeHlpMemFlatStoreDataU64, pCallEntry->idxInstr)
6121
6122/** Emits code for IEM_MC_STORE_MEM_U8/16/32/64_CONST and
6123 * IEM_MC_STORE_MEM_FLAT_U8/16/32/64_CONST (with iSegReg = UINT8_MAX). */
6124DECL_INLINE_THROW(uint32_t)
6125iemNativeEmitMemStoreConstDataCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t uValueConst, uint8_t iSegReg,
6126 uint8_t idxVarGCPtrMem, uint8_t cbMem, uintptr_t pfnFunction, uint8_t idxInstr)
6127{
6128 /*
6129 * Create a temporary const variable and call iemNativeEmitMemFetchStoreDataCommon
6130 * to do the grunt work.
6131 */
6132 uint8_t const idxVarConstValue = iemNativeVarAllocConst(pReNative, cbMem, uValueConst);
6133 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, idxVarConstValue, iSegReg, idxVarGCPtrMem,
6134 cbMem, cbMem - 1, kIemNativeEmitMemOp_Store,
6135 pfnFunction, idxInstr);
6136 iemNativeVarFreeLocal(pReNative, idxVarConstValue);
6137 return off;
6138}
6139
6140
6141#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6142# define IEM_MC_STORE_MEM_U128_ALIGN_SSE(a_iSeg, a_GCPtrMem, a_u128Value) \
6143 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, a_iSeg, a_GCPtrMem, \
6144 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
6145 (uintptr_t)iemNativeHlpMemStoreDataU128AlignedSse, pCallEntry->idxInstr)
6146
6147# define IEM_MC_STORE_MEM_U128_NO_AC(a_iSeg, a_GCPtrMem, a_u128Value) \
6148 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, a_iSeg, a_GCPtrMem, \
6149 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
6150 (uintptr_t)iemNativeHlpMemStoreDataU128NoAc, pCallEntry->idxInstr)
6151
6152# define IEM_MC_STORE_MEM_U256_NO_AC(a_iSeg, a_GCPtrMem, a_u256Value) \
6153 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, a_iSeg, a_GCPtrMem, \
6154 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
6155 (uintptr_t)iemNativeHlpMemStoreDataU256NoAc, pCallEntry->idxInstr)
6156
6157# define IEM_MC_STORE_MEM_U256_ALIGN_AVX(a_iSeg, a_GCPtrMem, a_u256Value) \
6158 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, a_iSeg, a_GCPtrMem, \
6159 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
6160 (uintptr_t)iemNativeHlpMemStoreDataU256AlignedAvx, pCallEntry->idxInstr)
6161
6162
6163# define IEM_MC_STORE_MEM_FLAT_U128_ALIGN_SSE(a_GCPtrMem, a_u128Value) \
6164 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, UINT8_MAX, a_GCPtrMem, \
6165 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
6166 (uintptr_t)iemNativeHlpMemFlatStoreDataU128AlignedSse, pCallEntry->idxInstr)
6167
6168# define IEM_MC_STORE_MEM_FLAT_U128_NO_AC(a_GCPtrMem, a_u128Value) \
6169 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u128Value, UINT8_MAX, a_GCPtrMem, \
6170 sizeof(RTUINT128U), sizeof(RTUINT128U) - 1, kIemNativeEmitMemOp_Store, \
6171 (uintptr_t)iemNativeHlpMemFlatStoreDataU128NoAc, pCallEntry->idxInstr)
6172
6173# define IEM_MC_STORE_MEM_FLAT_U256_NO_AC(a_GCPtrMem, a_u256Value) \
6174 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, UINT8_MAX, a_GCPtrMem, \
6175 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
6176 (uintptr_t)iemNativeHlpMemFlatStoreDataU256NoAc, pCallEntry->idxInstr)
6177
6178# define IEM_MC_STORE_MEM_FLAT_U256_ALIGN_AVX(a_GCPtrMem, a_u256Value) \
6179 off = iemNativeEmitMemFetchStoreDataCommon(pReNative, off, a_u256Value, UINT8_MAX, a_GCPtrMem, \
6180 sizeof(RTUINT256U), sizeof(RTUINT256U) - 1, kIemNativeEmitMemOp_Store, \
6181 (uintptr_t)iemNativeHlpMemFlatStoreDataU256AlignedAvx, pCallEntry->idxInstr)
6182#endif
6183
6184
6185
6186/*********************************************************************************************************************************
6187* Stack Accesses. *
6188*********************************************************************************************************************************/
6189/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, fSReg, 0) */
6190#define IEM_MC_PUSH_U16(a_u16Value) \
6191 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
6192 (uintptr_t)iemNativeHlpStackStoreU16, pCallEntry->idxInstr)
6193#define IEM_MC_PUSH_U32(a_u32Value) \
6194 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
6195 (uintptr_t)iemNativeHlpStackStoreU32, pCallEntry->idxInstr)
6196#define IEM_MC_PUSH_U32_SREG(a_uSegVal) \
6197 off = iemNativeEmitStackPush(pReNative, off, a_uSegVal, RT_MAKE_U32_FROM_U8(32, 0, 1, 0), \
6198 (uintptr_t)iemNativeHlpStackStoreU32SReg, pCallEntry->idxInstr)
6199#define IEM_MC_PUSH_U64(a_u64Value) \
6200 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
6201 (uintptr_t)iemNativeHlpStackStoreU64, pCallEntry->idxInstr)
6202
6203#define IEM_MC_FLAT32_PUSH_U16(a_u16Value) \
6204 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
6205 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
6206#define IEM_MC_FLAT32_PUSH_U32(a_u32Value) \
6207 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
6208 (uintptr_t)iemNativeHlpStackFlatStoreU32, pCallEntry->idxInstr)
6209#define IEM_MC_FLAT32_PUSH_U32_SREG(a_u32Value) \
6210 off = iemNativeEmitStackPush(pReNative, off, a_u32Value, RT_MAKE_U32_FROM_U8(32, 32, 1, 0), \
6211 (uintptr_t)iemNativeHlpStackFlatStoreU32SReg, pCallEntry->idxInstr)
6212
6213#define IEM_MC_FLAT64_PUSH_U16(a_u16Value) \
6214 off = iemNativeEmitStackPush(pReNative, off, a_u16Value, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
6215 (uintptr_t)iemNativeHlpStackFlatStoreU16, pCallEntry->idxInstr)
6216#define IEM_MC_FLAT64_PUSH_U64(a_u64Value) \
6217 off = iemNativeEmitStackPush(pReNative, off, a_u64Value, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
6218 (uintptr_t)iemNativeHlpStackFlatStoreU64, pCallEntry->idxInstr)
6219
6220
6221DECL_FORCE_INLINE_THROW(uint32_t)
6222iemNativeEmitStackPushUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
6223{
6224 /* Use16BitSp: */
6225#ifdef RT_ARCH_AMD64
6226 off = iemNativeEmitSubGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
6227 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
6228#else
6229 /* sub regeff, regrsp, #cbMem */
6230 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegEffSp, idxRegRsp, cbMem, false /*f64Bit*/);
6231 /* and regeff, regeff, #0xffff */
6232 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
6233 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegEffSp, idxRegEffSp, 15, 0, false /*f64Bit*/);
6234 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from idxVarReg to idxGstTmpReg bits 15:0. */
6235 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegEffSp, 0, 16, false /*f64Bit*/);
6236#endif
6237 return off;
6238}
6239
6240
6241DECL_FORCE_INLINE(uint32_t)
6242iemNativeEmitStackPushUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
6243{
6244 /* Use32BitSp: */
6245 off = iemNativeEmitSubGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
6246 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
6247 return off;
6248}
6249
6250
6251/** IEM_MC[|_FLAT32|_FLAT64]_PUSH_U16/32/32_SREG/64 */
6252DECL_INLINE_THROW(uint32_t)
6253iemNativeEmitStackPush(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarValue,
6254 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
6255{
6256 /*
6257 * Assert sanity.
6258 */
6259 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarValue);
6260 PIEMNATIVEVAR const pVarValue = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarValue)];
6261#ifdef VBOX_STRICT
6262 if (RT_BYTE2(cBitsVarAndFlat) != 0)
6263 {
6264 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
6265 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
6266 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
6267 Assert( pfnFunction
6268 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
6269 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32
6270 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 1, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU32SReg
6271 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU16
6272 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatStoreU64
6273 : UINT64_C(0xc000b000a0009000) ));
6274 }
6275 else
6276 Assert( pfnFunction
6277 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU16
6278 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU32
6279 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 1, 0) ? (uintptr_t)iemNativeHlpStackStoreU32SReg
6280 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackStoreU64
6281 : UINT64_C(0xc000b000a0009000) ));
6282#endif
6283
6284#ifdef VBOX_STRICT
6285 /*
6286 * Check that the fExec flags we've got make sense.
6287 */
6288 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
6289#endif
6290
6291 /*
6292 * To keep things simple we have to commit any pending writes first as we
6293 * may end up making calls.
6294 */
6295 /** @todo we could postpone this till we make the call and reload the
6296 * registers after returning from the call. Not sure if that's sensible or
6297 * not, though. */
6298 off = iemNativeRegFlushPendingWrites(pReNative, off);
6299
6300 /*
6301 * First we calculate the new RSP and the effective stack pointer value.
6302 * For 64-bit mode and flat 32-bit these two are the same.
6303 * (Code structure is very similar to that of PUSH)
6304 */
6305 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
6306 bool const fIsSegReg = RT_BYTE3(cBitsVarAndFlat) != 0;
6307 bool const fIsIntelSeg = fIsSegReg && IEM_IS_GUEST_CPU_INTEL(pReNative->pVCpu);
6308 uint8_t const cbMemAccess = !fIsIntelSeg || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_16BIT
6309 ? cbMem : sizeof(uint16_t);
6310 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
6311 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
6312 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
6313 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
6314 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
6315 if (cBitsFlat != 0)
6316 {
6317 Assert(idxRegEffSp == idxRegRsp);
6318 Assert(cBitsFlat == 32 || cBitsFlat == 64);
6319 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
6320 if (cBitsFlat == 64)
6321 off = iemNativeEmitSubGprImm(pReNative, off, idxRegRsp, cbMem);
6322 else
6323 off = iemNativeEmitSubGpr32Imm(pReNative, off, idxRegRsp, cbMem);
6324 }
6325 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
6326 {
6327 Assert(idxRegEffSp != idxRegRsp);
6328 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
6329 kIemNativeGstRegUse_ReadOnly);
6330#ifdef RT_ARCH_AMD64
6331 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6332#else
6333 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6334#endif
6335 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
6336 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
6337 offFixupJumpToUseOtherBitSp = off;
6338 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
6339 {
6340 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
6341 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
6342 }
6343 else
6344 {
6345 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
6346 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
6347 }
6348 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6349 }
6350 /* SpUpdateEnd: */
6351 uint32_t const offLabelSpUpdateEnd = off;
6352
6353 /*
6354 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
6355 * we're skipping lookup).
6356 */
6357 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
6358 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMemAccess);
6359 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
6360 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
6361 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
6362 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
6363 : UINT32_MAX;
6364 uint8_t const idxRegValue = !TlbState.fSkip
6365 && pVarValue->enmKind != kIemNativeVarKind_Immediate
6366 ? iemNativeVarRegisterAcquire(pReNative, idxVarValue, &off, true /*fInitialized*/,
6367 IEMNATIVE_CALL_ARG2_GREG /*idxRegPref*/)
6368 : UINT8_MAX;
6369 uint8_t const idxRegMemResult = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
6370
6371
6372 if (!TlbState.fSkip)
6373 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
6374 else
6375 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
6376
6377 /*
6378 * Use16BitSp:
6379 */
6380 if (cBitsFlat == 0)
6381 {
6382#ifdef RT_ARCH_AMD64
6383 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6384#else
6385 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6386#endif
6387 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
6388 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
6389 off = iemNativeEmitStackPushUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
6390 else
6391 off = iemNativeEmitStackPushUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
6392 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
6393 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6394 }
6395
6396 /*
6397 * TlbMiss:
6398 *
6399 * Call helper to do the pushing.
6400 */
6401 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
6402
6403#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6404 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6405#else
6406 RT_NOREF(idxInstr);
6407#endif
6408
6409 /* Save variables in volatile registers. */
6410 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
6411 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
6412 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0)
6413 | (idxRegValue < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegValue) : 0);
6414 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
6415
6416 if ( idxRegValue == IEMNATIVE_CALL_ARG1_GREG
6417 && idxRegEffSp == IEMNATIVE_CALL_ARG2_GREG)
6418 {
6419 /* Swap them using ARG0 as temp register: */
6420 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_CALL_ARG1_GREG);
6421 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_ARG2_GREG);
6422 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, IEMNATIVE_CALL_ARG0_GREG);
6423 }
6424 else if (idxRegEffSp != IEMNATIVE_CALL_ARG2_GREG)
6425 {
6426 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue (first!) */
6427 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue,
6428 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
6429
6430 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp */
6431 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
6432 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
6433 }
6434 else
6435 {
6436 /* IEMNATIVE_CALL_ARG1_GREG = idxRegEffSp (first!) */
6437 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
6438
6439 /* IEMNATIVE_CALL_ARG2_GREG = idxVarValue */
6440 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarValue, 0 /*offAddend*/,
6441 IEMNATIVE_CALL_VOLATILE_GREG_MASK & ~IEMNATIVE_CALL_ARG1_GREG);
6442 }
6443
6444 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
6445 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6446
6447 /* Done setting up parameters, make the call. */
6448 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
6449
6450 /* Restore variables and guest shadow registers to volatile registers. */
6451 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
6452 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
6453
6454#ifdef IEMNATIVE_WITH_TLB_LOOKUP
6455 if (!TlbState.fSkip)
6456 {
6457 /* end of TlbMiss - Jump to the done label. */
6458 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
6459 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
6460
6461 /*
6462 * TlbLookup:
6463 */
6464 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMemAccess, cbMemAccess - 1,
6465 IEM_ACCESS_TYPE_WRITE, idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
6466
6467 /*
6468 * Emit code to do the actual storing / fetching.
6469 */
6470 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
6471# ifdef VBOX_WITH_STATISTICS
6472 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
6473 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
6474# endif
6475 if (idxRegValue != UINT8_MAX)
6476 {
6477 switch (cbMemAccess)
6478 {
6479 case 2:
6480 off = iemNativeEmitStoreGpr16ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
6481 break;
6482 case 4:
6483 if (!fIsIntelSeg)
6484 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
6485 else
6486 {
6487 /* intel real mode segment push. 10890XE adds the 2nd of half EFLAGS to a
6488 PUSH FS in real mode, so we have to try emulate that here.
6489 We borrow the now unused idxReg1 from the TLB lookup code here. */
6490 uint8_t idxRegEfl = iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(pReNative, &off,
6491 kIemNativeGstReg_EFlags);
6492 if (idxRegEfl != UINT8_MAX)
6493 {
6494#ifdef ARCH_AMD64
6495 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, TlbState.idxReg1, idxRegEfl);
6496 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
6497 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
6498#else
6499 off = iemNativeEmitGpr32EqGprAndImmEx(iemNativeInstrBufEnsure(pReNative, off, 3),
6500 off, TlbState.idxReg1, idxRegEfl,
6501 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
6502#endif
6503 iemNativeRegFreeTmp(pReNative, idxRegEfl);
6504 }
6505 else
6506 {
6507 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, TlbState.idxReg1,
6508 RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.eflags));
6509 off = iemNativeEmitAndGpr32ByImm(pReNative, off, TlbState.idxReg1,
6510 UINT32_C(0xffff0000) & ~X86_EFL_RAZ_MASK);
6511 }
6512 /* ASSUMES the upper half of idxRegValue is ZERO. */
6513 off = iemNativeEmitOrGpr32ByGpr(pReNative, off, TlbState.idxReg1, idxRegValue);
6514 off = iemNativeEmitStoreGpr32ByGprEx(pCodeBuf, off, TlbState.idxReg1, idxRegMemResult);
6515 }
6516 break;
6517 case 8:
6518 off = iemNativeEmitStoreGpr64ByGprEx(pCodeBuf, off, idxRegValue, idxRegMemResult);
6519 break;
6520 default:
6521 AssertFailed();
6522 }
6523 }
6524 else
6525 {
6526 switch (cbMemAccess)
6527 {
6528 case 2:
6529 off = iemNativeEmitStoreImm16ByGprEx(pCodeBuf, off, (uint16_t)pVarValue->u.uValue,
6530 idxRegMemResult, TlbState.idxReg1);
6531 break;
6532 case 4:
6533 Assert(!fIsSegReg);
6534 off = iemNativeEmitStoreImm32ByGprEx(pCodeBuf, off, (uint32_t)pVarValue->u.uValue,
6535 idxRegMemResult, TlbState.idxReg1);
6536 break;
6537 case 8:
6538 off = iemNativeEmitStoreImm64ByGprEx(pCodeBuf, off, pVarValue->u.uValue, idxRegMemResult, TlbState.idxReg1);
6539 break;
6540 default:
6541 AssertFailed();
6542 }
6543 }
6544
6545 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
6546 TlbState.freeRegsAndReleaseVars(pReNative);
6547
6548 /*
6549 * TlbDone:
6550 *
6551 * Commit the new RSP value.
6552 */
6553 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
6554 }
6555#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
6556
6557#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
6558 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.rsp));
6559#endif
6560 iemNativeRegFreeTmp(pReNative, idxRegRsp);
6561 if (idxRegEffSp != idxRegRsp)
6562 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
6563
6564 /* The value variable is implictly flushed. */
6565 if (idxRegValue != UINT8_MAX)
6566 iemNativeVarRegisterRelease(pReNative, idxVarValue);
6567 iemNativeVarFreeLocal(pReNative, idxVarValue);
6568
6569 return off;
6570}
6571
6572
6573
6574/* RT_MAKE_U32_FROM_U8(cBitsVar, cBitsFlat, 0, 0) */
6575#define IEM_MC_POP_GREG_U16(a_iGReg) \
6576 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 0, 0, 0), \
6577 (uintptr_t)iemNativeHlpStackFetchU16, pCallEntry->idxInstr)
6578#define IEM_MC_POP_GREG_U32(a_iGReg) \
6579 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 0, 0, 0), \
6580 (uintptr_t)iemNativeHlpStackFetchU32, pCallEntry->idxInstr)
6581#define IEM_MC_POP_GREG_U64(a_iGReg) \
6582 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 0, 0, 0), \
6583 (uintptr_t)iemNativeHlpStackFetchU64, pCallEntry->idxInstr)
6584
6585#define IEM_MC_FLAT32_POP_GREG_U16(a_iGReg) \
6586 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 32, 0, 0), \
6587 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
6588#define IEM_MC_FLAT32_POP_GREG_U32(a_iGReg) \
6589 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(32, 32, 0, 0), \
6590 (uintptr_t)iemNativeHlpStackFlatFetchU32, pCallEntry->idxInstr)
6591
6592#define IEM_MC_FLAT64_POP_GREG_U16(a_iGReg) \
6593 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(16, 64, 0, 0), \
6594 (uintptr_t)iemNativeHlpStackFlatFetchU16, pCallEntry->idxInstr)
6595#define IEM_MC_FLAT64_POP_GREG_U64(a_iGReg) \
6596 off = iemNativeEmitStackPopGReg(pReNative, off, a_iGReg, RT_MAKE_U32_FROM_U8(64, 64, 0, 0), \
6597 (uintptr_t)iemNativeHlpStackFlatFetchU64, pCallEntry->idxInstr)
6598
6599
6600DECL_FORCE_INLINE_THROW(uint32_t)
6601iemNativeEmitStackPopUse16Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem,
6602 uint8_t idxRegTmp)
6603{
6604 /* Use16BitSp: */
6605#ifdef RT_ARCH_AMD64
6606 off = iemNativeEmitLoadGprFromGpr16Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
6607 off = iemNativeEmitAddGpr16ImmEx(pCodeBuf, off, idxRegRsp, cbMem); /* ASSUMES this does NOT modify bits [63:16]! */
6608 RT_NOREF(idxRegTmp);
6609#else
6610 /* ubfiz regeff, regrsp, #0, #16 - copies bits 15:0 from RSP to EffSp bits 15:0, zeroing bits 63:16. */
6611 pCodeBuf[off++] = Armv8A64MkInstrUbfiz(idxRegEffSp, idxRegRsp, 0, 16, false /*f64Bit*/);
6612 /* add tmp, regrsp, #cbMem */
6613 pCodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegTmp, idxRegRsp, cbMem, false /*f64Bit*/);
6614 /* and tmp, tmp, #0xffff */
6615 Assert(Armv8A64ConvertImmRImmS2Mask32(15, 0) == 0xffff);
6616 pCodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegTmp, idxRegTmp, 15, 0, false /*f64Bit*/);
6617 /* bfi regrsp, regeff, #0, #16 - moves bits 15:0 from tmp to RSP bits 15:0, keeping the other RSP bits as is. */
6618 pCodeBuf[off++] = Armv8A64MkInstrBfi(idxRegRsp, idxRegTmp, 0, 16, false /*f64Bit*/);
6619#endif
6620 return off;
6621}
6622
6623
6624DECL_FORCE_INLINE(uint32_t)
6625iemNativeEmitStackPopUse32Sp(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxRegRsp, uint8_t idxRegEffSp, uint8_t cbMem)
6626{
6627 /* Use32BitSp: */
6628 off = iemNativeEmitLoadGprFromGpr32Ex(pCodeBuf, off, idxRegEffSp, idxRegRsp);
6629 off = iemNativeEmitAddGpr32ImmEx(pCodeBuf, off, idxRegRsp, cbMem);
6630 return off;
6631}
6632
6633
6634/** IEM_MC[|_FLAT32|_FLAT64]_POP_GREG_U16/32/64 */
6635DECL_INLINE_THROW(uint32_t)
6636iemNativeEmitStackPopGReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGReg,
6637 uint32_t cBitsVarAndFlat, uintptr_t pfnFunction, uint8_t idxInstr)
6638{
6639 /*
6640 * Assert sanity.
6641 */
6642 Assert(idxGReg < 16);
6643#ifdef VBOX_STRICT
6644 if (RT_BYTE2(cBitsVarAndFlat) != 0)
6645 {
6646 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
6647 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
6648 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
6649 Assert( pfnFunction
6650 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
6651 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 32, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU32
6652 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU16
6653 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 64, 0, 0) ? (uintptr_t)iemNativeHlpStackFlatFetchU64
6654 : UINT64_C(0xc000b000a0009000) ));
6655 }
6656 else
6657 Assert( pfnFunction
6658 == ( cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(16, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU16
6659 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(32, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU32
6660 : cBitsVarAndFlat == RT_MAKE_U32_FROM_U8(64, 0, 0, 0) ? (uintptr_t)iemNativeHlpStackFetchU64
6661 : UINT64_C(0xc000b000a0009000) ));
6662#endif
6663
6664#ifdef VBOX_STRICT
6665 /*
6666 * Check that the fExec flags we've got make sense.
6667 */
6668 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
6669#endif
6670
6671 /*
6672 * To keep things simple we have to commit any pending writes first as we
6673 * may end up making calls.
6674 */
6675 off = iemNativeRegFlushPendingWrites(pReNative, off);
6676
6677 /*
6678 * Determine the effective stack pointer, for non-FLAT modes we also update RSP.
6679 * For FLAT modes we'll do this in TlbDone as we'll be using the incoming RSP
6680 * directly as the effective stack pointer.
6681 * (Code structure is very similar to that of PUSH)
6682 */
6683 uint8_t const cbMem = RT_BYTE1(cBitsVarAndFlat) / 8;
6684 uint8_t const cBitsFlat = RT_BYTE2(cBitsVarAndFlat); RT_NOREF(cBitsFlat);
6685 uint8_t const idxRegRsp = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(X86_GREG_xSP),
6686 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
6687 uint8_t const idxRegEffSp = cBitsFlat != 0 ? idxRegRsp : iemNativeRegAllocTmp(pReNative, &off);
6688 /** @todo can do a better job picking the register here. For cbMem >= 4 this
6689 * will be the resulting register value. */
6690 uint8_t const idxRegMemResult = iemNativeRegAllocTmp(pReNative, &off); /* pointer then value; arm64 SP += 2/4 helper too. */
6691
6692 uint32_t offFixupJumpToUseOtherBitSp = UINT32_MAX;
6693 if (cBitsFlat != 0)
6694 {
6695 Assert(idxRegEffSp == idxRegRsp);
6696 Assert(cBitsFlat == 32 || cBitsFlat == 64);
6697 Assert(IEM_F_MODE_X86_IS_FLAT(pReNative->fExec));
6698 }
6699 else /** @todo We can skip the test if we're targeting pre-386 CPUs. */
6700 {
6701 Assert(idxRegEffSp != idxRegRsp);
6702 uint8_t const idxRegSsAttr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_ATTRIB(X86_SREG_SS),
6703 kIemNativeGstRegUse_ReadOnly);
6704#ifdef RT_ARCH_AMD64
6705 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6706#else
6707 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6708#endif
6709 off = iemNativeEmitTestAnyBitsInGpr32Ex(pCodeBuf, off, idxRegSsAttr, X86DESCATTR_D);
6710 iemNativeRegFreeTmp(pReNative, idxRegSsAttr);
6711 offFixupJumpToUseOtherBitSp = off;
6712 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
6713 {
6714/** @todo can skip idxRegRsp updating when popping ESP. */
6715 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_e); /* jump if zero */
6716 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
6717 }
6718 else
6719 {
6720 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off /*8-bit suffices*/, kIemNativeInstrCond_ne); /* jump if not zero */
6721 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
6722 }
6723 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6724 }
6725 /* SpUpdateEnd: */
6726 uint32_t const offLabelSpUpdateEnd = off;
6727
6728 /*
6729 * Okay, now prepare for TLB lookup and jump to code (or the TlbMiss if
6730 * we're skipping lookup).
6731 */
6732 uint8_t const iSegReg = cBitsFlat != 0 ? UINT8_MAX : X86_SREG_SS;
6733 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, idxRegEffSp, &off, iSegReg, cbMem);
6734 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
6735 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, UINT32_MAX, uTlbSeqNo);
6736 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
6737 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
6738 : UINT32_MAX;
6739
6740 if (!TlbState.fSkip)
6741 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
6742 else
6743 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbMiss); /** @todo short jump */
6744
6745 /*
6746 * Use16BitSp:
6747 */
6748 if (cBitsFlat == 0)
6749 {
6750#ifdef RT_ARCH_AMD64
6751 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6752#else
6753 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6754#endif
6755 iemNativeFixupFixedJump(pReNative, offFixupJumpToUseOtherBitSp, off);
6756 if ((pReNative->fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT)
6757 off = iemNativeEmitStackPopUse16Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem, idxRegMemResult);
6758 else
6759 off = iemNativeEmitStackPopUse32Sp(pCodeBuf, off, idxRegRsp, idxRegEffSp, cbMem);
6760 off = iemNativeEmitJmpToFixedEx(pCodeBuf, off, offLabelSpUpdateEnd);
6761 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6762 }
6763
6764 /*
6765 * TlbMiss:
6766 *
6767 * Call helper to do the pushing.
6768 */
6769 iemNativeLabelDefine(pReNative, idxLabelTlbMiss, off);
6770
6771#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6772 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6773#else
6774 RT_NOREF(idxInstr);
6775#endif
6776
6777 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave()
6778 | (idxRegMemResult < RT_ELEMENTS(pReNative->Core.aHstRegs) ? RT_BIT_32(idxRegMemResult) : 0)
6779 | (idxRegEffSp != idxRegRsp ? RT_BIT_32(idxRegEffSp) : 0);
6780 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
6781
6782
6783 /* IEMNATIVE_CALL_ARG1_GREG = EffSp/RSP */
6784 if (idxRegEffSp != IEMNATIVE_CALL_ARG1_GREG)
6785 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxRegEffSp);
6786
6787 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
6788 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6789
6790 /* Done setting up parameters, make the call. */
6791 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
6792
6793 /* Move the return register content to idxRegMemResult. */
6794 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
6795 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
6796
6797 /* Restore variables and guest shadow registers to volatile registers. */
6798 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
6799 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
6800
6801#ifdef IEMNATIVE_WITH_TLB_LOOKUP
6802 if (!TlbState.fSkip)
6803 {
6804 /* end of TlbMiss - Jump to the done label. */
6805 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
6806 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
6807
6808 /*
6809 * TlbLookup:
6810 */
6811 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, cbMem - 1, IEM_ACCESS_TYPE_READ,
6812 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
6813
6814 /*
6815 * Emit code to load the value (from idxRegMemResult into idxRegMemResult).
6816 */
6817 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6818# ifdef VBOX_WITH_STATISTICS
6819 off = iemNativeEmitIncStamCounterInVCpuEx(pCodeBuf, off, TlbState.idxReg1, TlbState.idxReg2,
6820 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForStack));
6821# endif
6822 switch (cbMem)
6823 {
6824 case 2:
6825 off = iemNativeEmitLoadGprByGprU16Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
6826 break;
6827 case 4:
6828 off = iemNativeEmitLoadGprByGprU32Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
6829 break;
6830 case 8:
6831 off = iemNativeEmitLoadGprByGprU64Ex(pCodeBuf, off, idxRegMemResult, idxRegMemResult);
6832 break;
6833 default:
6834 AssertFailed();
6835 }
6836
6837 TlbState.freeRegsAndReleaseVars(pReNative);
6838
6839 /*
6840 * TlbDone:
6841 *
6842 * Set the new RSP value (FLAT accesses needs to calculate it first) and
6843 * commit the popped register value.
6844 */
6845 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
6846 }
6847#endif /* IEMNATIVE_WITH_TLB_LOOKUP */
6848
6849 if (idxGReg != X86_GREG_xSP)
6850 {
6851 /* Set the register. */
6852 if (cbMem >= sizeof(uint32_t))
6853 {
6854#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
6855 AssertMsg( pReNative->idxCurCall == 0
6856 || IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))),
6857 ("%s - %u\n", g_aGstShadowInfo[idxGReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, IEMNATIVEGSTREG_GPR(idxGReg))));
6858#endif
6859 iemNativeRegClearAndMarkAsGstRegShadow(pReNative, idxRegMemResult, IEMNATIVEGSTREG_GPR(idxGReg), off);
6860#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
6861 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(idxGReg);
6862#endif
6863#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
6864 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegMemResult,
6865 RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
6866#endif
6867 }
6868 else
6869 {
6870 Assert(cbMem == sizeof(uint16_t));
6871 uint8_t const idxRegDst = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_GPR(idxGReg),
6872 kIemNativeGstRegUse_ForUpdate);
6873 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegDst, idxRegMemResult);
6874#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
6875 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegDst, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.aGRegs[idxGReg]));
6876#endif
6877 iemNativeRegFreeTmp(pReNative, idxRegDst);
6878 }
6879
6880 /* Complete RSP calculation for FLAT mode. */
6881 if (idxRegEffSp == idxRegRsp)
6882 {
6883 if (cBitsFlat == 64)
6884 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
6885 else
6886 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
6887 }
6888 }
6889 else
6890 {
6891 /* We're popping RSP, ESP or SP. Only the is a bit extra work, of course. */
6892 if (cbMem == sizeof(uint64_t))
6893 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegRsp, idxRegMemResult);
6894 else if (cbMem == sizeof(uint32_t))
6895 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegRsp, idxRegMemResult);
6896 else
6897 {
6898 if (idxRegEffSp == idxRegRsp)
6899 {
6900 if (cBitsFlat == 64)
6901 off = iemNativeEmitAddGprImm8(pReNative, off, idxRegRsp, sizeof(uint64_t));
6902 else
6903 off = iemNativeEmitAddGpr32Imm8(pReNative, off, idxRegRsp, sizeof(uint32_t));
6904 }
6905 off = iemNativeEmitGprMergeInGpr16(pReNative, off, idxRegRsp, idxRegMemResult);
6906 }
6907 }
6908
6909#if !defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
6910 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxRegRsp, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rsp));
6911#endif
6912
6913 iemNativeRegFreeTmp(pReNative, idxRegRsp);
6914 if (idxRegEffSp != idxRegRsp)
6915 iemNativeRegFreeTmp(pReNative, idxRegEffSp);
6916 iemNativeRegFreeTmp(pReNative, idxRegMemResult);
6917
6918 return off;
6919}
6920
6921
6922
6923/*********************************************************************************************************************************
6924* Memory mapping (IEM_MEM_MAP_XXX, IEM_MEM_FLAT_MAP_XXX). *
6925*********************************************************************************************************************************/
6926
6927#define IEM_MC_MEM_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6928 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
6929 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMask*/, \
6930 (uintptr_t)iemNativeHlpMemMapDataU8Atomic, pCallEntry->idxInstr)
6931
6932#define IEM_MC_MEM_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6933 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
6934 IEM_ACCESS_DATA_RW, 0 /*fAlignMask*/, \
6935 (uintptr_t)iemNativeHlpMemMapDataU8Rw, pCallEntry->idxInstr)
6936
6937#define IEM_MC_MEM_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6938 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
6939 IEM_ACCESS_DATA_W, 0 /*fAlignMask*/, \
6940 (uintptr_t)iemNativeHlpMemMapDataU8Wo, pCallEntry->idxInstr) \
6941
6942#define IEM_MC_MEM_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6943 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint8_t), \
6944 IEM_ACCESS_DATA_R, 0 /*fAlignMask*/, \
6945 (uintptr_t)iemNativeHlpMemMapDataU8Ro, pCallEntry->idxInstr)
6946
6947
6948#define IEM_MC_MEM_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6949 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
6950 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMask*/, \
6951 (uintptr_t)iemNativeHlpMemMapDataU16Atomic, pCallEntry->idxInstr)
6952
6953#define IEM_MC_MEM_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6954 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
6955 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMask*/, \
6956 (uintptr_t)iemNativeHlpMemMapDataU16Rw, pCallEntry->idxInstr)
6957
6958#define IEM_MC_MEM_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6959 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
6960 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
6961 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
6962
6963#define IEM_MC_MEM_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6964 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint16_t), \
6965 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMask*/, \
6966 (uintptr_t)iemNativeHlpMemMapDataU16Ro, pCallEntry->idxInstr)
6967
6968#define IEM_MC_MEM_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6969 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int16_t), \
6970 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
6971 (uintptr_t)iemNativeHlpMemMapDataU16Wo, pCallEntry->idxInstr) \
6972
6973
6974#define IEM_MC_MEM_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6975 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
6976 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMask*/, \
6977 (uintptr_t)iemNativeHlpMemMapDataU32Atomic, pCallEntry->idxInstr)
6978
6979#define IEM_MC_MEM_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6980 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
6981 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMask*/, \
6982 (uintptr_t)iemNativeHlpMemMapDataU32Rw, pCallEntry->idxInstr)
6983
6984#define IEM_MC_MEM_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6985 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
6986 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
6987 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
6988
6989#define IEM_MC_MEM_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6990 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint32_t), \
6991 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMask*/, \
6992 (uintptr_t)iemNativeHlpMemMapDataU32Ro, pCallEntry->idxInstr)
6993
6994#define IEM_MC_MEM_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
6995 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int32_t), \
6996 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
6997 (uintptr_t)iemNativeHlpMemMapDataU32Wo, pCallEntry->idxInstr) \
6998
6999
7000#define IEM_MC_MEM_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7001 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
7002 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7003 (uintptr_t)iemNativeHlpMemMapDataU64Atomic, pCallEntry->idxInstr)
7004
7005#define IEM_MC_MEM_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7006 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
7007 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7008 (uintptr_t)iemNativeHlpMemMapDataU64Rw, pCallEntry->idxInstr)
7009#define IEM_MC_MEM_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7010 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
7011 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7012 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
7013
7014#define IEM_MC_MEM_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7015 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(uint64_t), \
7016 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7017 (uintptr_t)iemNativeHlpMemMapDataU64Ro, pCallEntry->idxInstr)
7018
7019#define IEM_MC_MEM_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7020 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(int64_t), \
7021 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7022 (uintptr_t)iemNativeHlpMemMapDataU64Wo, pCallEntry->idxInstr) \
7023
7024
7025#define IEM_MC_MEM_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7026 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
7027 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7028 (uintptr_t)iemNativeHlpMemMapDataR80Wo, pCallEntry->idxInstr) \
7029
7030#define IEM_MC_MEM_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7031 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTFLOAT80U), \
7032 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
7033 (uintptr_t)iemNativeHlpMemMapDataD80Wo, pCallEntry->idxInstr) \
7034
7035
7036#define IEM_MC_MEM_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7037 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
7038 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
7039 (uintptr_t)iemNativeHlpMemMapDataU128Atomic, pCallEntry->idxInstr)
7040
7041#define IEM_MC_MEM_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7042 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
7043 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
7044 (uintptr_t)iemNativeHlpMemMapDataU128Rw, pCallEntry->idxInstr)
7045
7046#define IEM_MC_MEM_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7047 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
7048 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
7049 (uintptr_t)iemNativeHlpMemMapDataU128Wo, pCallEntry->idxInstr) \
7050
7051#define IEM_MC_MEM_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem) \
7052 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, a_iSeg, a_GCPtrMem, sizeof(RTUINT128U), \
7053 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
7054 (uintptr_t)iemNativeHlpMemMapDataU128Ro, pCallEntry->idxInstr)
7055
7056
7057
7058#define IEM_MC_MEM_FLAT_MAP_U8_ATOMIC(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
7059 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
7060 IEM_ACCESS_DATA_ATOMIC, 0 /*fAlignMask*/, \
7061 (uintptr_t)iemNativeHlpMemFlatMapDataU8Atomic, pCallEntry->idxInstr)
7062
7063#define IEM_MC_MEM_FLAT_MAP_U8_RW(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
7064 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
7065 IEM_ACCESS_DATA_RW, 0 /*fAlignMask*/, \
7066 (uintptr_t)iemNativeHlpMemFlatMapDataU8Rw, pCallEntry->idxInstr)
7067
7068#define IEM_MC_MEM_FLAT_MAP_U8_WO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
7069 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
7070 IEM_ACCESS_DATA_W, 0 /*fAlignMask*/, \
7071 (uintptr_t)iemNativeHlpMemFlatMapDataU8Wo, pCallEntry->idxInstr) \
7072
7073#define IEM_MC_MEM_FLAT_MAP_U8_RO(a_pu8Mem, a_bUnmapInfo, a_GCPtrMem) \
7074 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu8Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint8_t), \
7075 IEM_ACCESS_DATA_R, 0 /*fAlignMask*/, \
7076 (uintptr_t)iemNativeHlpMemFlatMapDataU8Ro, pCallEntry->idxInstr)
7077
7078
7079#define IEM_MC_MEM_FLAT_MAP_U16_ATOMIC(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
7080 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
7081 IEM_ACCESS_DATA_ATOMIC, sizeof(uint16_t) - 1 /*fAlignMask*/, \
7082 (uintptr_t)iemNativeHlpMemFlatMapDataU16Atomic, pCallEntry->idxInstr)
7083
7084#define IEM_MC_MEM_FLAT_MAP_U16_RW(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
7085 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
7086 IEM_ACCESS_DATA_RW, sizeof(uint16_t) - 1 /*fAlignMask*/, \
7087 (uintptr_t)iemNativeHlpMemFlatMapDataU16Rw, pCallEntry->idxInstr)
7088
7089#define IEM_MC_MEM_FLAT_MAP_U16_WO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
7090 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
7091 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
7092 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
7093
7094#define IEM_MC_MEM_FLAT_MAP_U16_RO(a_pu16Mem, a_bUnmapInfo, a_GCPtrMem) \
7095 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint16_t), \
7096 IEM_ACCESS_DATA_R, sizeof(uint16_t) - 1 /*fAlignMask*/, \
7097 (uintptr_t)iemNativeHlpMemFlatMapDataU16Ro, pCallEntry->idxInstr)
7098
7099#define IEM_MC_MEM_FLAT_MAP_I16_WO(a_pi16Mem, a_bUnmapInfo, a_GCPtrMem) \
7100 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi16Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int16_t), \
7101 IEM_ACCESS_DATA_W, sizeof(uint16_t) - 1 /*fAlignMask*/, \
7102 (uintptr_t)iemNativeHlpMemFlatMapDataU16Wo, pCallEntry->idxInstr) \
7103
7104
7105#define IEM_MC_MEM_FLAT_MAP_U32_ATOMIC(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
7106 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
7107 IEM_ACCESS_DATA_ATOMIC, sizeof(uint32_t) - 1 /*fAlignMask*/, \
7108 (uintptr_t)iemNativeHlpMemFlatMapDataU32Atomic, pCallEntry->idxInstr)
7109
7110#define IEM_MC_MEM_FLAT_MAP_U32_RW(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
7111 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
7112 IEM_ACCESS_DATA_RW, sizeof(uint32_t) - 1 /*fAlignMask*/, \
7113 (uintptr_t)iemNativeHlpMemFlatMapDataU32Rw, pCallEntry->idxInstr)
7114
7115#define IEM_MC_MEM_FLAT_MAP_U32_WO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
7116 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
7117 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
7118 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
7119
7120#define IEM_MC_MEM_FLAT_MAP_U32_RO(a_pu32Mem, a_bUnmapInfo, a_GCPtrMem) \
7121 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint32_t), \
7122 IEM_ACCESS_DATA_R, sizeof(uint32_t) - 1 /*fAlignMask*/, \
7123 (uintptr_t)iemNativeHlpMemFlatMapDataU32Ro, pCallEntry->idxInstr)
7124
7125#define IEM_MC_MEM_FLAT_MAP_I32_WO(a_pi32Mem, a_bUnmapInfo, a_GCPtrMem) \
7126 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi32Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int32_t), \
7127 IEM_ACCESS_DATA_W, sizeof(uint32_t) - 1 /*fAlignMask*/, \
7128 (uintptr_t)iemNativeHlpMemFlatMapDataU32Wo, pCallEntry->idxInstr) \
7129
7130
7131#define IEM_MC_MEM_FLAT_MAP_U64_ATOMIC(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
7132 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
7133 IEM_ACCESS_DATA_ATOMIC, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7134 (uintptr_t)iemNativeHlpMemFlatMapDataU64Atomic, pCallEntry->idxInstr)
7135
7136#define IEM_MC_MEM_FLAT_MAP_U64_RW(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
7137 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
7138 IEM_ACCESS_DATA_RW, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7139 (uintptr_t)iemNativeHlpMemFlatMapDataU64Rw, pCallEntry->idxInstr)
7140
7141#define IEM_MC_MEM_FLAT_MAP_U64_WO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
7142 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
7143 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7144 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
7145
7146#define IEM_MC_MEM_FLAT_MAP_U64_RO(a_pu64Mem, a_bUnmapInfo, a_GCPtrMem) \
7147 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(uint64_t), \
7148 IEM_ACCESS_DATA_R, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7149 (uintptr_t)iemNativeHlpMemFlatMapDataU64Ro, pCallEntry->idxInstr)
7150
7151#define IEM_MC_MEM_FLAT_MAP_I64_WO(a_pi64Mem, a_bUnmapInfo, a_GCPtrMem) \
7152 off = iemNativeEmitMemMapCommon(pReNative, off, a_pi64Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(int64_t), \
7153 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7154 (uintptr_t)iemNativeHlpMemFlatMapDataU64Wo, pCallEntry->idxInstr) \
7155
7156
7157#define IEM_MC_MEM_FLAT_MAP_R80_WO(a_pr80Mem, a_bUnmapInfo, a_GCPtrMem) \
7158 off = iemNativeEmitMemMapCommon(pReNative, off, a_pr80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
7159 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, \
7160 (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo, pCallEntry->idxInstr) \
7161
7162#define IEM_MC_MEM_FLAT_MAP_D80_WO(a_pd80Mem, a_bUnmapInfo, a_GCPtrMem) \
7163 off = iemNativeEmitMemMapCommon(pReNative, off, a_pd80Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTFLOAT80U), \
7164 IEM_ACCESS_DATA_W, sizeof(uint64_t) - 1 /*fAlignMask*/, /** @todo check BCD align */ \
7165 (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo, pCallEntry->idxInstr) \
7166
7167
7168#define IEM_MC_MEM_FLAT_MAP_U128_ATOMIC(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
7169 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
7170 IEM_ACCESS_DATA_ATOMIC, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
7171 (uintptr_t)iemNativeHlpMemFlatMapDataU128Atomic, pCallEntry->idxInstr)
7172
7173#define IEM_MC_MEM_FLAT_MAP_U128_RW(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
7174 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
7175 IEM_ACCESS_DATA_RW, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
7176 (uintptr_t)iemNativeHlpMemFlatMapDataU128Rw, pCallEntry->idxInstr)
7177
7178#define IEM_MC_MEM_FLAT_MAP_U128_WO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
7179 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
7180 IEM_ACCESS_DATA_W, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
7181 (uintptr_t)iemNativeHlpMemFlatMapDataU128Wo, pCallEntry->idxInstr) \
7182
7183#define IEM_MC_MEM_FLAT_MAP_U128_RO(a_pu128Mem, a_bUnmapInfo, a_GCPtrMem) \
7184 off = iemNativeEmitMemMapCommon(pReNative, off, a_pu128Mem, a_bUnmapInfo, UINT8_MAX, a_GCPtrMem, sizeof(RTUINT128U), \
7185 IEM_ACCESS_DATA_R, sizeof(RTUINT128U) - 1 /*fAlignMask*/, \
7186 (uintptr_t)iemNativeHlpMemFlatMapDataU128Ro, pCallEntry->idxInstr)
7187
7188
7189DECL_INLINE_THROW(uint32_t)
7190iemNativeEmitMemMapCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarMem, uint8_t idxVarUnmapInfo,
7191 uint8_t iSegReg, uint8_t idxVarGCPtrMem, uint8_t cbMem, uint32_t fAccess, uint8_t fAlignMask,
7192 uintptr_t pfnFunction, uint8_t idxInstr)
7193{
7194 /*
7195 * Assert sanity.
7196 */
7197 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarMem);
7198 PIEMNATIVEVAR const pVarMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarMem)];
7199 AssertStmt( pVarMem->enmKind == kIemNativeVarKind_Invalid
7200 && pVarMem->cbVar == sizeof(void *),
7201 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7202
7203 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
7204 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
7205 AssertStmt( pVarUnmapInfo->enmKind == kIemNativeVarKind_Invalid
7206 && pVarUnmapInfo->cbVar == sizeof(uint8_t),
7207 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7208
7209 PIEMNATIVEVAR const pVarGCPtrMem = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarGCPtrMem)];
7210 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarGCPtrMem);
7211 AssertStmt( pVarGCPtrMem->enmKind == kIemNativeVarKind_Immediate
7212 || pVarGCPtrMem->enmKind == kIemNativeVarKind_Stack,
7213 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_UNEXPECTED_KIND));
7214
7215 Assert(iSegReg < 6 || iSegReg == UINT8_MAX);
7216
7217 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
7218
7219#ifdef VBOX_STRICT
7220# define IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) \
7221 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ) \
7222 ? (uintptr_t)RT_CONCAT(a_fnBase,Rw) \
7223 : ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == IEM_ACCESS_TYPE_READ \
7224 ? (uintptr_t)RT_CONCAT(a_fnBase,Ro) : (uintptr_t)RT_CONCAT(a_fnBase,Wo) )
7225# define IEM_MAP_HLP_FN(a_fAccess, a_fnBase) \
7226 ( ((a_fAccess) & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC)) == (IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_TYPE_READ | IEM_ACCESS_ATOMIC) \
7227 ? (uintptr_t)RT_CONCAT(a_fnBase,Atomic) \
7228 : IEM_MAP_HLP_FN_NO_AT(a_fAccess, a_fnBase) )
7229
7230 if (iSegReg == UINT8_MAX)
7231 {
7232 Assert( (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_64BIT
7233 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_PROT_FLAT
7234 || (pReNative->fExec & IEM_F_MODE_MASK) == IEM_F_MODE_X86_32BIT_FLAT);
7235 switch (cbMem)
7236 {
7237 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU8)); break;
7238 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU16)); break;
7239 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU32)); break;
7240 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU64)); break;
7241 case 10:
7242 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataR80Wo
7243 || pfnFunction == (uintptr_t)iemNativeHlpMemFlatMapDataD80Wo);
7244 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
7245 break;
7246 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemFlatMapDataU128)); break;
7247# if 0
7248 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU256)); break;
7249 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemFlatMapDataU512)); break;
7250# endif
7251 default: AssertFailed(); break;
7252 }
7253 }
7254 else
7255 {
7256 Assert(iSegReg < 6);
7257 switch (cbMem)
7258 {
7259 case 1: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU8)); break;
7260 case 2: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU16)); break;
7261 case 4: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU32)); break;
7262 case 8: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU64)); break;
7263 case 10:
7264 Assert( pfnFunction == (uintptr_t)iemNativeHlpMemMapDataR80Wo
7265 || pfnFunction == (uintptr_t)iemNativeHlpMemMapDataD80Wo);
7266 Assert((fAccess & IEM_ACCESS_TYPE_MASK) == IEM_ACCESS_TYPE_WRITE);
7267 break;
7268 case 16: Assert(pfnFunction == IEM_MAP_HLP_FN(fAccess, iemNativeHlpMemMapDataU128)); break;
7269# if 0
7270 case 32: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU256)); break;
7271 case 64: Assert(pfnFunction == IEM_MAP_HLP_FN_NO_AT(fAccess, iemNativeHlpMemMapDataU512)); break;
7272# endif
7273 default: AssertFailed(); break;
7274 }
7275 }
7276# undef IEM_MAP_HLP_FN
7277# undef IEM_MAP_HLP_FN_NO_AT
7278#endif
7279
7280#ifdef VBOX_STRICT
7281 /*
7282 * Check that the fExec flags we've got make sense.
7283 */
7284 off = iemNativeEmitExecFlagsCheck(pReNative, off, pReNative->fExec);
7285#endif
7286
7287 /*
7288 * To keep things simple we have to commit any pending writes first as we
7289 * may end up making calls.
7290 */
7291 off = iemNativeRegFlushPendingWrites(pReNative, off);
7292
7293#ifdef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7294 /*
7295 * Move/spill/flush stuff out of call-volatile registers.
7296 * This is the easy way out. We could contain this to the tlb-miss branch
7297 * by saving and restoring active stuff here.
7298 */
7299 /** @todo save+restore active registers and maybe guest shadows in tlb-miss. */
7300 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */);
7301#endif
7302
7303 /* The bUnmapInfo variable will get a register in the tlb-hit code path,
7304 while the tlb-miss codepath will temporarily put it on the stack.
7305 Set the the type to stack here so we don't need to do it twice below. */
7306 iemNativeVarSetKindToStack(pReNative, idxVarUnmapInfo);
7307 uint8_t const idxRegUnmapInfo = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off);
7308 /** @todo use a tmp register from TlbState, since they'll be free after tlb
7309 * lookup is done. */
7310
7311 /*
7312 * Define labels and allocate the result register (trying for the return
7313 * register if we can).
7314 */
7315 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
7316 uint8_t const idxRegMemResult = !(pReNative->Core.bmHstRegs & RT_BIT_32(IEMNATIVE_CALL_RET_GREG))
7317 ? iemNativeVarRegisterSetAndAcquire(pReNative, idxVarMem, IEMNATIVE_CALL_RET_GREG, &off)
7318 : iemNativeVarRegisterAcquire(pReNative, idxVarMem, &off);
7319 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, &off, idxVarGCPtrMem, iSegReg, cbMem);
7320 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
7321 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
7322 : UINT32_MAX;
7323//off=iemNativeEmitBrk(pReNative, off, 0);
7324 /*
7325 * Jump to the TLB lookup code.
7326 */
7327 if (!TlbState.fSkip)
7328 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
7329
7330 /*
7331 * TlbMiss:
7332 *
7333 * Call helper to do the fetching.
7334 * We flush all guest register shadow copies here.
7335 */
7336 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
7337
7338#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7339 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7340#else
7341 RT_NOREF(idxInstr);
7342#endif
7343
7344#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7345 /* Save variables in volatile registers. */
7346 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave() | RT_BIT_32(idxRegMemResult) | RT_BIT_32(idxRegUnmapInfo);
7347 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
7348#endif
7349
7350 /* IEMNATIVE_CALL_ARG2_GREG = GCPtrMem - load first as it is from a variable. */
7351 off = iemNativeEmitLoadArgGregFromImmOrStackVar(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, idxVarGCPtrMem, 0 /*cbAppend*/,
7352#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7353 IEMNATIVE_CALL_VOLATILE_GREG_MASK, true /*fSpilledVarsInvolatileRegs*/);
7354#else
7355 IEMNATIVE_CALL_VOLATILE_GREG_MASK);
7356#endif
7357
7358 /* IEMNATIVE_CALL_ARG3_GREG = iSegReg */
7359 if (iSegReg != UINT8_MAX)
7360 {
7361 AssertStmt(iSegReg < 6, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_EMIT_BAD_SEG_REG_NO));
7362 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, iSegReg);
7363 }
7364
7365 /* IEMNATIVE_CALL_ARG1_GREG = &idxVarUnmapInfo; stackslot address, load any register with result after the call. */
7366 int32_t const offBpDispVarUnmapInfo = iemNativeStackCalcBpDisp(iemNativeVarGetStackSlot(pReNative, idxVarUnmapInfo));
7367 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, offBpDispVarUnmapInfo);
7368
7369 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
7370 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7371
7372 /* Done setting up parameters, make the call. */
7373 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
7374
7375 /*
7376 * Put the output in the right registers.
7377 */
7378 Assert(idxRegMemResult == pVarMem->idxReg);
7379 if (idxRegMemResult != IEMNATIVE_CALL_RET_GREG)
7380 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegMemResult, IEMNATIVE_CALL_RET_GREG);
7381
7382#ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7383 /* Restore variables and guest shadow registers to volatile registers. */
7384 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
7385 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows());
7386#endif
7387
7388 Assert(pVarUnmapInfo->idxReg == idxRegUnmapInfo);
7389 off = iemNativeEmitLoadGprByBpU8(pReNative, off, idxRegUnmapInfo, offBpDispVarUnmapInfo);
7390
7391#ifdef IEMNATIVE_WITH_TLB_LOOKUP
7392 if (!TlbState.fSkip)
7393 {
7394 /* end of tlbsmiss - Jump to the done label. */
7395 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
7396 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
7397
7398 /*
7399 * TlbLookup:
7400 */
7401 off = iemNativeEmitTlbLookup<true>(pReNative, off, &TlbState, iSegReg, cbMem, fAlignMask, fAccess,
7402 idxLabelTlbLookup, idxLabelTlbMiss, idxRegMemResult);
7403# ifdef VBOX_WITH_STATISTICS
7404 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, TlbState.idxReg1, TlbState.idxReg2,
7405 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTlbHitsForMapped));
7406# endif
7407
7408 /* [idxVarUnmapInfo] = 0; */
7409 off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegUnmapInfo, 0);
7410
7411 /*
7412 * TlbDone:
7413 */
7414 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
7415
7416 TlbState.freeRegsAndReleaseVars(pReNative, idxVarGCPtrMem);
7417
7418# ifndef IEMNATIVE_WITH_FREE_AND_FLUSH_VOLATILE_REGS_AT_TLB_LOOKUP
7419 /* Temp Hack: Flush all guest shadows in volatile registers in case of TLB miss. */
7420 iemNativeRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
7421# endif
7422 }
7423#else
7424 RT_NOREF(fAccess, fAlignMask, idxLabelTlbMiss);
7425#endif
7426
7427 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
7428 iemNativeVarRegisterRelease(pReNative, idxVarMem);
7429
7430 return off;
7431}
7432
7433
7434#define IEM_MC_MEM_COMMIT_AND_UNMAP_ATOMIC(a_bMapInfo) \
7435 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_ATOMIC, \
7436 (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic, pCallEntry->idxInstr)
7437
7438#define IEM_MC_MEM_COMMIT_AND_UNMAP_RW(a_bMapInfo) \
7439 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_RW, \
7440 (uintptr_t)iemNativeHlpMemCommitAndUnmapRw, pCallEntry->idxInstr)
7441
7442#define IEM_MC_MEM_COMMIT_AND_UNMAP_WO(a_bMapInfo) \
7443 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_W, \
7444 (uintptr_t)iemNativeHlpMemCommitAndUnmapWo, pCallEntry->idxInstr)
7445
7446#define IEM_MC_MEM_COMMIT_AND_UNMAP_RO(a_bMapInfo) \
7447 off = iemNativeEmitMemCommitAndUnmap(pReNative, off, (a_bMapInfo), IEM_ACCESS_DATA_R, \
7448 (uintptr_t)iemNativeHlpMemCommitAndUnmapRo, pCallEntry->idxInstr)
7449
7450DECL_INLINE_THROW(uint32_t)
7451iemNativeEmitMemCommitAndUnmap(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVarUnmapInfo,
7452 uint32_t fAccess, uintptr_t pfnFunction, uint8_t idxInstr)
7453{
7454 /*
7455 * Assert sanity.
7456 */
7457 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVarUnmapInfo);
7458#if defined(VBOX_STRICT) || defined(RT_ARCH_AMD64)
7459 PIEMNATIVEVAR const pVarUnmapInfo = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)];
7460#endif
7461 Assert(pVarUnmapInfo->enmKind == kIemNativeVarKind_Stack);
7462 Assert( pVarUnmapInfo->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
7463 || pVarUnmapInfo->idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS); /* must be initialized */
7464#ifdef VBOX_STRICT
7465 switch (fAccess & (IEM_ACCESS_TYPE_MASK | IEM_ACCESS_ATOMIC))
7466 {
7467 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE | IEM_ACCESS_ATOMIC:
7468 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapAtomic); break;
7469 case IEM_ACCESS_TYPE_READ | IEM_ACCESS_TYPE_WRITE:
7470 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRw); break;
7471 case IEM_ACCESS_TYPE_WRITE:
7472 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapWo); break;
7473 case IEM_ACCESS_TYPE_READ:
7474 Assert(pfnFunction == (uintptr_t)iemNativeHlpMemCommitAndUnmapRo); break;
7475 default: AssertFailed();
7476 }
7477#else
7478 RT_NOREF(fAccess);
7479#endif
7480
7481 /*
7482 * To keep things simple we have to commit any pending writes first as we
7483 * may end up making calls (there shouldn't be any at this point, so this
7484 * is just for consistency).
7485 */
7486 /** @todo we could postpone this till we make the call and reload the
7487 * registers after returning from the call. Not sure if that's sensible or
7488 * not, though. */
7489 off = iemNativeRegFlushPendingWrites(pReNative, off);
7490
7491 /*
7492 * Move/spill/flush stuff out of call-volatile registers.
7493 *
7494 * We exclude any register holding the bUnmapInfo variable, as we'll be
7495 * checking it after returning from the call and will free it afterwards.
7496 */
7497 /** @todo save+restore active registers and maybe guest shadows in miss
7498 * scenario. */
7499 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /* vacate all non-volatile regs */,
7500 RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVarUnmapInfo)));
7501
7502 /*
7503 * If idxVarUnmapInfo is zero, we can skip all this. Otherwise we'll have
7504 * to call the unmap helper function.
7505 *
7506 * The likelyhood of it being zero is higher than for the TLB hit when doing
7507 * the mapping, as a TLB miss for an well aligned and unproblematic memory
7508 * access should also end up with a mapping that won't need special unmapping.
7509 */
7510 /** @todo Go over iemMemMapJmp and implement the no-unmap-needed case! That
7511 * should speed up things for the pure interpreter as well when TLBs
7512 * are enabled. */
7513#ifdef RT_ARCH_AMD64
7514 if (pVarUnmapInfo->idxReg == UINT8_MAX)
7515 {
7516 /* test byte [rbp - xxx], 0ffh */
7517 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
7518 pbCodeBuf[off++] = 0xf6;
7519 uint8_t const idxStackSlot = pVarUnmapInfo->idxStackSlot;
7520 off = iemNativeEmitGprByBpDisp(pbCodeBuf, off, 0, iemNativeStackCalcBpDisp(idxStackSlot), pReNative);
7521 pbCodeBuf[off++] = 0xff;
7522 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7523 }
7524 else
7525#endif
7526 {
7527 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVarUnmapInfo, &off,
7528 true /*fInitialized*/, IEMNATIVE_CALL_ARG1_GREG /*idxRegPref*/);
7529 off = iemNativeEmitTestAnyBitsInGpr8(pReNative, off, idxVarReg, 0xff);
7530 iemNativeVarRegisterRelease(pReNative, idxVarUnmapInfo);
7531 }
7532 uint32_t const offJmpFixup = off;
7533 off = iemNativeEmitJzToFixed(pReNative, off, off /* ASSUME jz rel8 suffices*/);
7534
7535 /*
7536 * Call the unmap helper function.
7537 */
7538#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING /** @todo This should be unnecessary, the mapping call will already have set it! */
7539 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
7540#else
7541 RT_NOREF(idxInstr);
7542#endif
7543
7544 /* IEMNATIVE_CALL_ARG1_GREG = idxVarUnmapInfo (first!) */
7545 off = iemNativeEmitLoadArgGregFromStackVar(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, idxVarUnmapInfo,
7546 0 /*offAddend*/, IEMNATIVE_CALL_VOLATILE_GREG_MASK);
7547
7548 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
7549 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7550
7551 /* Done setting up parameters, make the call. */
7552 off = iemNativeEmitCallImm(pReNative, off, pfnFunction);
7553
7554 /* The bUnmapInfo variable is implictly free by these MCs. */
7555 iemNativeVarFreeLocal(pReNative, idxVarUnmapInfo);
7556
7557 /*
7558 * Done, just fixup the jump for the non-call case.
7559 */
7560 iemNativeFixupFixedJump(pReNative, offJmpFixup, off);
7561
7562 return off;
7563}
7564
7565
7566
7567/*********************************************************************************************************************************
7568* State and Exceptions *
7569*********************************************************************************************************************************/
7570
7571#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
7572#define IEM_MC_ACTUALIZE_FPU_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
7573
7574#define IEM_MC_PREPARE_SSE_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
7575#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
7576#define IEM_MC_ACTUALIZE_SSE_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
7577
7578#define IEM_MC_PREPARE_AVX_USAGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
7579#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_CHANGE() off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/)
7580#define IEM_MC_ACTUALIZE_AVX_STATE_FOR_READ() off = iemNativeEmitPrepareFpuForUse(pReNative, off, false /*fForChange*/)
7581
7582
7583DECL_INLINE_THROW(uint32_t) iemNativeEmitPrepareFpuForUse(PIEMRECOMPILERSTATE pReNative, uint32_t off, bool fForChange)
7584{
7585 /** @todo this needs a lot more work later. */
7586 RT_NOREF(pReNative, fForChange);
7587 return off;
7588}
7589
7590
7591
7592/*********************************************************************************************************************************
7593* Emitters for FPU related operations. *
7594*********************************************************************************************************************************/
7595
7596#define IEM_MC_FETCH_FCW(a_u16Fcw) \
7597 off = iemNativeEmitFetchFpuFcw(pReNative, off, a_u16Fcw)
7598
7599/** Emits code for IEM_MC_FETCH_FCW. */
7600DECL_INLINE_THROW(uint32_t)
7601iemNativeEmitFetchFpuFcw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
7602{
7603 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7604 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
7605
7606 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7607
7608 /* Allocate a temporary FCW register. */
7609 /** @todo eliminate extra register */
7610 uint8_t const idxFcwReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFcw,
7611 kIemNativeGstRegUse_ReadOnly);
7612
7613 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFcwReg);
7614
7615 /* Free but don't flush the FCW register. */
7616 iemNativeRegFreeTmp(pReNative, idxFcwReg);
7617 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7618
7619 return off;
7620}
7621
7622
7623#define IEM_MC_FETCH_FSW(a_u16Fsw) \
7624 off = iemNativeEmitFetchFpuFsw(pReNative, off, a_u16Fsw)
7625
7626/** Emits code for IEM_MC_FETCH_FSW. */
7627DECL_INLINE_THROW(uint32_t)
7628iemNativeEmitFetchFpuFsw(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar)
7629{
7630 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7631 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
7632
7633 uint8_t const idxReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, false /*fInitialized*/);
7634 /* Allocate a temporary FSW register. */
7635 /** @todo eliminate extra register */
7636 uint8_t const idxFswReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_FpuFsw,
7637 kIemNativeGstRegUse_ReadOnly);
7638
7639 off = iemNativeEmitLoadGprFromGpr16(pReNative, off, idxReg, idxFswReg);
7640
7641 /* Free but don't flush the FSW register. */
7642 iemNativeRegFreeTmp(pReNative, idxFswReg);
7643 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7644
7645 return off;
7646}
7647
7648
7649
7650#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7651
7652
7653/*********************************************************************************************************************************
7654* Emitters for SSE/AVX specific operations. *
7655*********************************************************************************************************************************/
7656
7657#define IEM_MC_COPY_XREG_U128(a_iXRegDst, a_iXRegSrc) \
7658 off = iemNativeEmitSimdCopyXregU128(pReNative, off, a_iXRegDst, a_iXRegSrc)
7659
7660/** Emits code for IEM_MC_COPY_XREG_U128. */
7661DECL_INLINE_THROW(uint32_t)
7662iemNativeEmitSimdCopyXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXRegDst, uint8_t iXRegSrc)
7663{
7664 /* This is a nop if the source and destination register are the same. */
7665 if (iXRegDst != iXRegSrc)
7666 {
7667 /* Allocate destination and source register. */
7668 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegDst),
7669 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForFullWrite);
7670 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXRegSrc),
7671 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
7672
7673 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
7674
7675 /* Free but don't flush the source and destination register. */
7676 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7677 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7678 }
7679
7680 return off;
7681}
7682
7683
7684#define IEM_MC_FETCH_XREG_U128(a_u128Value, a_iXReg) \
7685 off = iemNativeEmitSimdFetchXregU128(pReNative, off, a_u128Value, a_iXReg)
7686
7687/** Emits code for IEM_MC_FETCH_XREG_U128. */
7688DECL_INLINE_THROW(uint32_t)
7689iemNativeEmitSimdFetchXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg)
7690{
7691 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7692 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
7693
7694 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7695 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
7696
7697 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
7698
7699 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxVarReg, idxSimdRegSrc);
7700
7701 /* Free but don't flush the source register. */
7702 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7703 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
7704
7705 return off;
7706}
7707
7708
7709#define IEM_MC_FETCH_XREG_U64(a_u64Value, a_iXReg, a_iQWord) \
7710 off = iemNativeEmitSimdFetchXregU64(pReNative, off, a_u64Value, a_iXReg, a_iQWord)
7711
7712#define IEM_MC_FETCH_XREG_R64(a_r64Value, a_iXReg, a_iQWord) \
7713 off = iemNativeEmitSimdFetchXregU64(pReNative, off, a_r64Value, a_iXReg, a_iQWord)
7714
7715/** Emits code for IEM_MC_FETCH_XREG_U64. */
7716DECL_INLINE_THROW(uint32_t)
7717iemNativeEmitSimdFetchXregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iQWord)
7718{
7719 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7720 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
7721
7722 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7723 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
7724
7725 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7726 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7727
7728 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
7729
7730 /* Free but don't flush the source register. */
7731 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7732 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7733
7734 return off;
7735}
7736
7737
7738#define IEM_MC_FETCH_XREG_U32(a_u32Value, a_iXReg, a_iDWord) \
7739 off = iemNativeEmitSimdFetchXregU32(pReNative, off, a_u32Value, a_iXReg, a_iDWord)
7740
7741#define IEM_MC_FETCH_XREG_R32(a_r32Value, a_iXReg, a_iDWord) \
7742 off = iemNativeEmitSimdFetchXregU32(pReNative, off, a_r32Value, a_iXReg, a_iDWord)
7743
7744/** Emits code for IEM_MC_FETCH_XREG_U32/IEM_MC_FETCH_XREG_R32. */
7745DECL_INLINE_THROW(uint32_t)
7746iemNativeEmitSimdFetchXregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iDWord)
7747{
7748 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7749 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
7750
7751 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7752 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
7753
7754 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7755 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7756
7757 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
7758
7759 /* Free but don't flush the source register. */
7760 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7761 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7762
7763 return off;
7764}
7765
7766
7767#define IEM_MC_FETCH_XREG_U16(a_u64Value, a_iXReg, a_iWord) \
7768 off = iemNativeEmitSimdFetchXregU16(pReNative, off, a_u64Value, a_iXReg, a_iWord)
7769
7770/** Emits code for IEM_MC_FETCH_XREG_U16. */
7771DECL_INLINE_THROW(uint32_t)
7772iemNativeEmitSimdFetchXregU16(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iWord)
7773{
7774 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7775 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint16_t));
7776
7777 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7778 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
7779
7780 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7781 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7782
7783 off = iemNativeEmitSimdLoadGprFromVecRegU16(pReNative, off, idxVarReg, idxSimdRegSrc, iWord);
7784
7785 /* Free but don't flush the source register. */
7786 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7787 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7788
7789 return off;
7790}
7791
7792
7793#define IEM_MC_FETCH_XREG_U8(a_u64Value, a_iXReg, a_iByte) \
7794 off = iemNativeEmitSimdFetchXregU8(pReNative, off, a_u64Value, a_iXReg, a_iByte)
7795
7796/** Emits code for IEM_MC_FETCH_XREG_U8. */
7797DECL_INLINE_THROW(uint32_t)
7798iemNativeEmitSimdFetchXregU8(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iXReg, uint8_t iByte)
7799{
7800 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7801 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint8_t));
7802
7803 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7804 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
7805
7806 iemNativeVarSetKindToStack(pReNative, idxDstVar);
7807 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
7808
7809 off = iemNativeEmitSimdLoadGprFromVecRegU8(pReNative, off, idxVarReg, idxSimdRegSrc, iByte);
7810
7811 /* Free but don't flush the source register. */
7812 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
7813 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7814
7815 return off;
7816}
7817
7818
7819#define IEM_MC_STORE_XREG_U128(a_iXReg, a_u128Value) \
7820 off = iemNativeEmitSimdStoreXregU128(pReNative, off, a_iXReg, a_u128Value)
7821
7822AssertCompileSize(X86XMMREG, sizeof(RTUINT128U));
7823#define IEM_MC_STORE_XREG_XMM(a_iXReg, a_XmmValue) \
7824 off = iemNativeEmitSimdStoreXregU128(pReNative, off, a_iXReg, a_XmmValue)
7825
7826
7827/** Emits code for IEM_MC_STORE_XREG_U128/IEM_MC_STORE_XREG_XMM. */
7828DECL_INLINE_THROW(uint32_t)
7829iemNativeEmitSimdStoreXregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
7830{
7831 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
7832 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
7833
7834 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7835 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForFullWrite);
7836 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitialized*/);
7837
7838 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
7839
7840 /* Free but don't flush the source register. */
7841 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7842 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
7843
7844 return off;
7845}
7846
7847
7848#define IEM_MC_STORE_XREG_U64(a_iXReg, a_iQWord, a_u64Value) \
7849 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u64Value, sizeof(uint64_t), a_iQWord)
7850
7851#define IEM_MC_STORE_XREG_U32(a_iXReg, a_iDWord, a_u32Value) \
7852 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint32_t), a_iDWord)
7853
7854#define IEM_MC_STORE_XREG_U16(a_iXReg, a_iWord, a_u32Value) \
7855 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint16_t), a_iWord)
7856
7857#define IEM_MC_STORE_XREG_U8(a_iXReg, a_iByte, a_u32Value) \
7858 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_u32Value, sizeof(uint8_t), a_iByte)
7859
7860#define IEM_MC_STORE_XREG_R32(a_iXReg, a_r32Value) \
7861 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_r32Value, sizeof(RTFLOAT32U), 0 /*iElem*/)
7862
7863#define IEM_MC_STORE_XREG_R64(a_iXReg, a_r64Value) \
7864 off = iemNativeEmitSimdStoreXregUxx(pReNative, off, a_iXReg, a_r64Value, sizeof(RTFLOAT64U), 0 /*iElem*/)
7865
7866/** Emits code for IEM_MC_STORE_XREG_U64/IEM_MC_STORE_XREG_U32/IEM_MC_STORE_XREG_U16/IEM_MC_STORE_XREG_U8. */
7867DECL_INLINE_THROW(uint32_t)
7868iemNativeEmitSimdStoreXregUxx(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar,
7869 uint8_t cbLocal, uint8_t iElem)
7870{
7871 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7872 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, cbLocal);
7873
7874#ifdef VBOX_STRICT
7875 switch (cbLocal)
7876 {
7877 case sizeof(uint64_t): Assert(iElem < 2); break;
7878 case sizeof(uint32_t): Assert(iElem < 4); break;
7879 case sizeof(uint16_t): Assert(iElem < 8); break;
7880 case sizeof(uint8_t): Assert(iElem < 16); break;
7881 default: AssertFailed();
7882 }
7883#endif
7884
7885 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7886 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
7887 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
7888
7889 switch (cbLocal)
7890 {
7891 case sizeof(uint64_t): off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
7892 case sizeof(uint32_t): off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
7893 case sizeof(uint16_t): off = iemNativeEmitSimdStoreGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
7894 case sizeof(uint8_t): off = iemNativeEmitSimdStoreGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, iElem); break;
7895 default: AssertFailed();
7896 }
7897
7898 /* Free but don't flush the source register. */
7899 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7900 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7901
7902 return off;
7903}
7904
7905
7906#define IEM_MC_STORE_XREG_U64_ZX_U128(a_iXReg, a_u64Value) \
7907 off = iemNativeEmitSimdStoreXregU64ZxU128(pReNative, off, a_iXReg, a_u64Value)
7908
7909/** Emits code for IEM_MC_STORE_XREG_U64_ZX_U128. */
7910DECL_INLINE_THROW(uint32_t)
7911iemNativeEmitSimdStoreXregU64ZxU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
7912{
7913 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7914 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
7915
7916 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7917 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
7918 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
7919
7920 /* Zero the vector register first, then store the 64-bit value to the lower 64-bit. */
7921 off = iemNativeEmitSimdZeroVecRegLowU128(pReNative, off, idxSimdRegDst);
7922 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0);
7923
7924 /* Free but don't flush the source register. */
7925 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7926 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7927
7928 return off;
7929}
7930
7931
7932#define IEM_MC_STORE_XREG_U32_ZX_U128(a_iXReg, a_u32Value) \
7933 off = iemNativeEmitSimdStoreXregU32ZxU128(pReNative, off, a_iXReg, a_u32Value)
7934
7935/** Emits code for IEM_MC_STORE_XREG_U32_ZX_U128. */
7936DECL_INLINE_THROW(uint32_t)
7937iemNativeEmitSimdStoreXregU32ZxU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxDstVar)
7938{
7939 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
7940 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
7941
7942 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7943 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
7944 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off, true /*fInitialized*/);
7945
7946 /* Zero the vector register first, then store the 32-bit value to the lowest 32-bit element. */
7947 off = iemNativeEmitSimdZeroVecRegLowU128(pReNative, off, idxSimdRegDst);
7948 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, 0);
7949
7950 /* Free but don't flush the source register. */
7951 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7952 iemNativeVarRegisterRelease(pReNative, idxDstVar);
7953
7954 return off;
7955}
7956
7957
7958#define IEM_MC_STORE_XREG_U32_U128(a_iXReg, a_iDwDst, a_u128Value, a_iDwSrc) \
7959 off = iemNativeEmitSimdStoreXregU32U128(pReNative, off, a_iXReg, a_iDwDst, a_u128Value, a_iDwSrc)
7960
7961/** Emits code for IEM_MC_STORE_XREG_U32_U128. */
7962DECL_INLINE_THROW(uint32_t)
7963iemNativeEmitSimdStoreXregU32U128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t iDwDst,
7964 uint8_t idxSrcVar, uint8_t iDwSrc)
7965{
7966 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
7967 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
7968
7969 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
7970 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
7971 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitialized*/);
7972
7973 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, idxVarReg, iDwSrc);
7974 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, IEMNATIVE_REG_FIXED_TMP0, iDwDst);
7975
7976 /* Free but don't flush the destination register. */
7977 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
7978 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
7979
7980 return off;
7981}
7982
7983
7984#define IEM_MC_COPY_YREG_U128_ZX_VLMAX(a_iYRegDst, a_iYRegSrc) \
7985 off = iemNativeEmitSimdCopyYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrc)
7986
7987/** Emits code for IEM_MC_COPY_YREG_U128_ZX_VLMAX. */
7988DECL_INLINE_THROW(uint32_t)
7989iemNativeEmitSimdCopyYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrc)
7990{
7991 /*
7992 * The iYRegSrc == iYRegDst case needs to be treated differently here, because if iYRegDst gets allocated first for the full write
7993 * it won't load the actual value from CPUMCTX. When allocating iYRegSrc afterwards it will get duplicated from the already
7994 * allocated host register for iYRegDst containing garbage. This will be catched by the guest register value checking in debug builds.
7995 */
7996 if (iYRegDst != iYRegSrc)
7997 {
7998 /* Allocate destination and source register. */
7999 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
8000 kIemNativeGstSimdRegLdStSz_256,
8001 kIemNativeGstRegUse_ForFullWrite);
8002 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
8003 kIemNativeGstSimdRegLdStSz_Low128,
8004 kIemNativeGstRegUse_ReadOnly);
8005
8006 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
8007 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
8008
8009 /* Free but don't flush the source and destination register. */
8010 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8011 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8012 }
8013 else
8014 {
8015 /* This effectively only clears the upper 128-bits of the register. */
8016 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
8017 kIemNativeGstSimdRegLdStSz_High128, kIemNativeGstRegUse_ForFullWrite);
8018
8019 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
8020
8021 /* Free but don't flush the destination register. */
8022 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
8023 }
8024
8025 return off;
8026}
8027
8028
8029#define IEM_MC_COPY_YREG_U256_ZX_VLMAX(a_iYRegDst, a_iYRegSrc) \
8030 off = iemNativeEmitSimdCopyYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrc)
8031
8032/** Emits code for IEM_MC_COPY_YREG_U256_ZX_VLMAX. */
8033DECL_INLINE_THROW(uint32_t)
8034iemNativeEmitSimdCopyYregU256ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrc)
8035{
8036 /*
8037 * The iYRegSrc == iYRegDst case needs to be treated differently here, because if iYRegDst gets allocated first for the full write
8038 * it won't load the actual value from CPUMCTX. When allocating iYRegSrc afterwards it will get duplicated from the already
8039 * allocated host register for iYRegDst containing garbage. This will be catched by the guest register value checking in debug builds.
8040 * iYRegSrc == iYRegDst would effectively only clear any upper 256-bits for a zmm register we don't support yet, so this is just a nop.
8041 */
8042 if (iYRegDst != iYRegSrc)
8043 {
8044 /* Allocate destination and source register. */
8045 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
8046 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ReadOnly);
8047 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
8048 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8049
8050 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxSimdRegDst, idxSimdRegSrc);
8051
8052 /* Free but don't flush the source and destination register. */
8053 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8054 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8055 }
8056
8057 return off;
8058}
8059
8060
8061#define IEM_MC_FETCH_YREG_U128(a_u128Dst, a_iYRegSrc, a_iDQWord) \
8062 off = iemNativeEmitSimdFetchYregU128(pReNative, off, a_u128Dst, a_iYRegSrc, a_iDQWord)
8063
8064/** Emits code for IEM_MC_FETCH_YREG_U128. */
8065DECL_INLINE_THROW(uint32_t)
8066iemNativeEmitSimdFetchYregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iDQWord)
8067{
8068 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8069 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT128U));
8070
8071 Assert(iDQWord <= 1);
8072 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8073 iDQWord == 1
8074 ? kIemNativeGstSimdRegLdStSz_High128
8075 : kIemNativeGstSimdRegLdStSz_Low128,
8076 kIemNativeGstRegUse_ReadOnly);
8077
8078 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8079 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
8080
8081 if (iDQWord == 1)
8082 off = iemNativeEmitSimdLoadVecRegLowU128FromVecRegHighU128(pReNative, off, idxVarReg, idxSimdRegSrc);
8083 else
8084 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxVarReg, idxSimdRegSrc);
8085
8086 /* Free but don't flush the source register. */
8087 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8088 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
8089
8090 return off;
8091}
8092
8093
8094#define IEM_MC_FETCH_YREG_U64(a_u64Dst, a_iYRegSrc, a_iQWord) \
8095 off = iemNativeEmitSimdFetchYregU64(pReNative, off, a_u64Dst, a_iYRegSrc, a_iQWord)
8096
8097/** Emits code for IEM_MC_FETCH_YREG_U64. */
8098DECL_INLINE_THROW(uint32_t)
8099iemNativeEmitSimdFetchYregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iQWord)
8100{
8101 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8102 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint64_t));
8103
8104 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8105 iQWord >= 2
8106 ? kIemNativeGstSimdRegLdStSz_High128
8107 : kIemNativeGstSimdRegLdStSz_Low128,
8108 kIemNativeGstRegUse_ReadOnly);
8109
8110 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8111 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8112
8113 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxVarReg, idxSimdRegSrc, iQWord);
8114
8115 /* Free but don't flush the source register. */
8116 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8117 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8118
8119 return off;
8120}
8121
8122
8123#define IEM_MC_FETCH_YREG_U32(a_u32Dst, a_iYRegSrc) \
8124 off = iemNativeEmitSimdFetchYregU32(pReNative, off, a_u32Dst, a_iYRegSrc, 0)
8125
8126/** Emits code for IEM_MC_FETCH_YREG_U32. */
8127DECL_INLINE_THROW(uint32_t)
8128iemNativeEmitSimdFetchYregU32(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYReg, uint8_t iDWord)
8129{
8130 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8131 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(uint32_t));
8132
8133 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8134 iDWord >= 4
8135 ? kIemNativeGstSimdRegLdStSz_High128
8136 : kIemNativeGstSimdRegLdStSz_Low128,
8137 kIemNativeGstRegUse_ReadOnly);
8138
8139 iemNativeVarSetKindToStack(pReNative, idxDstVar);
8140 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxDstVar, &off);
8141
8142 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxVarReg, idxSimdRegSrc, iDWord);
8143
8144 /* Free but don't flush the source register. */
8145 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8146 iemNativeVarRegisterRelease(pReNative, idxDstVar);
8147
8148 return off;
8149}
8150
8151
8152#define IEM_MC_CLEAR_YREG_128_UP(a_iYReg) \
8153 off = iemNativeEmitSimdClearYregHighU128(pReNative, off, a_iYReg)
8154
8155/** Emits code for IEM_MC_CLEAR_YREG_128_UP. */
8156DECL_INLINE_THROW(uint32_t)
8157iemNativeEmitSimdClearYregHighU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg)
8158{
8159 uint8_t const idxSimdReg = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8160 kIemNativeGstSimdRegLdStSz_High128, kIemNativeGstRegUse_ForFullWrite);
8161
8162 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdReg);
8163
8164 /* Free but don't flush the register. */
8165 iemNativeSimdRegFreeTmp(pReNative, idxSimdReg);
8166
8167 return off;
8168}
8169
8170
8171#define IEM_MC_STORE_YREG_U128(a_iYRegDst, a_iDQword, a_u128Value) \
8172 off = iemNativeEmitSimdStoreYregU128(pReNative, off, a_iYRegDst, a_iDQword, a_u128Value)
8173
8174/** Emits code for IEM_MC_STORE_YREG_U128. */
8175DECL_INLINE_THROW(uint32_t)
8176iemNativeEmitSimdStoreYregU128(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t iDQword, uint8_t idxSrcVar)
8177{
8178 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8179 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
8180
8181 Assert(iDQword <= 1);
8182 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8183 iDQword == 0
8184 ? kIemNativeGstSimdRegLdStSz_Low128
8185 : kIemNativeGstSimdRegLdStSz_High128,
8186 kIemNativeGstRegUse_ForFullWrite);
8187
8188 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
8189
8190 if (iDQword == 0)
8191 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
8192 else
8193 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegLowU128(pReNative, off, idxSimdRegDst, idxVarReg);
8194
8195 /* Free but don't flush the source register. */
8196 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8197 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
8198
8199 return off;
8200}
8201
8202
8203#define IEM_MC_STORE_YREG_U128_ZX_VLMAX(a_iYRegDst, a_u128Src) \
8204 off = iemNativeEmitSimdStoreYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_u128Src)
8205
8206/** Emits code for IEM_MC_STORE_YREG_U128_ZX_VLMAX. */
8207DECL_INLINE_THROW(uint32_t)
8208iemNativeEmitSimdStoreYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
8209{
8210 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8211 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
8212
8213 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8214 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8215
8216 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
8217
8218 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxVarReg);
8219 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
8220
8221 /* Free but don't flush the source register. */
8222 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8223 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
8224
8225 return off;
8226}
8227
8228
8229#define IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX(a_iXRegDst, a_u8Src) \
8230 off = iemNativeEmitSimdBroadcastXregU8ZxVlmax(pReNative, off, a_iXRegDst, a_u8Src)
8231
8232/** Emits code for IEM_MC_BROADCAST_XREG_U8_ZX_VLMAX. */
8233DECL_INLINE_THROW(uint32_t)
8234iemNativeEmitSimdBroadcastXregU8ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
8235{
8236 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8237 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint8_t));
8238
8239 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8240 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8241
8242 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8243
8244 off = iemNativeEmitSimdBroadcastGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
8245 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
8246
8247 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8248 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8249
8250 return off;
8251}
8252
8253
8254#define IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX(a_iXRegDst, a_u16Src) \
8255 off = iemNativeEmitSimdBroadcastXregU16ZxVlmax(pReNative, off, a_iXRegDst, a_u16Src)
8256
8257/** Emits code for IEM_MC_BROADCAST_XREG_U16_ZX_VLMAX. */
8258DECL_INLINE_THROW(uint32_t)
8259iemNativeEmitSimdBroadcastXregU16ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
8260{
8261 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8262 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint16_t));
8263
8264 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8265 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8266
8267 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8268
8269 off = iemNativeEmitSimdBroadcastGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
8270 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
8271
8272 /* Free but don't flush the source register. */
8273 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8274 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8275
8276 return off;
8277}
8278
8279
8280#define IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX(a_iXRegDst, a_u32Src) \
8281 off = iemNativeEmitSimdBroadcastXregU32ZxVlmax(pReNative, off, a_iXRegDst, a_u32Src)
8282
8283/** Emits code for IEM_MC_BROADCAST_XREG_U32_ZX_VLMAX. */
8284DECL_INLINE_THROW(uint32_t)
8285iemNativeEmitSimdBroadcastXregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
8286{
8287 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8288 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
8289
8290 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8291 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8292
8293 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8294
8295 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
8296 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
8297
8298 /* Free but don't flush the source register. */
8299 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8300 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8301
8302 return off;
8303}
8304
8305
8306#define IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX(a_iXRegDst, a_u64Src) \
8307 off = iemNativeEmitSimdBroadcastXregU64ZxVlmax(pReNative, off, a_iXRegDst, a_u64Src)
8308
8309/** Emits code for IEM_MC_BROADCAST_XREG_U64_ZX_VLMAX. */
8310DECL_INLINE_THROW(uint32_t)
8311iemNativeEmitSimdBroadcastXregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t idxSrcVar)
8312{
8313 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8314 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
8315
8316 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8317 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8318
8319 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8320
8321 off = iemNativeEmitSimdBroadcastGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, false /*f256Bit*/);
8322 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
8323
8324 /* Free but don't flush the source register. */
8325 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8326 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8327
8328 return off;
8329}
8330
8331
8332#define IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX(a_iYRegDst, a_u8Src) \
8333 off = iemNativeEmitSimdBroadcastYregU8ZxVlmax(pReNative, off, a_iYRegDst, a_u8Src)
8334
8335/** Emits code for IEM_MC_BROADCAST_YREG_U8_ZX_VLMAX. */
8336DECL_INLINE_THROW(uint32_t)
8337iemNativeEmitSimdBroadcastYregU8ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
8338{
8339 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8340 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint8_t));
8341
8342 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8343 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8344
8345 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8346
8347 off = iemNativeEmitSimdBroadcastGprToVecRegU8(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
8348
8349 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8350 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8351
8352 return off;
8353}
8354
8355
8356#define IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX(a_iYRegDst, a_u16Src) \
8357 off = iemNativeEmitSimdBroadcastYregU16ZxVlmax(pReNative, off, a_iYRegDst, a_u16Src)
8358
8359/** Emits code for IEM_MC_BROADCAST_YREG_U16_ZX_VLMAX. */
8360DECL_INLINE_THROW(uint32_t)
8361iemNativeEmitSimdBroadcastYregU16ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
8362{
8363 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8364 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint16_t));
8365
8366 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8367 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8368
8369 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8370
8371 off = iemNativeEmitSimdBroadcastGprToVecRegU16(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
8372
8373 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8374 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8375
8376 return off;
8377}
8378
8379
8380#define IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX(a_iYRegDst, a_u32Src) \
8381 off = iemNativeEmitSimdBroadcastYregU32ZxVlmax(pReNative, off, a_iYRegDst, a_u32Src)
8382
8383/** Emits code for IEM_MC_BROADCAST_YREG_U32_ZX_VLMAX. */
8384DECL_INLINE_THROW(uint32_t)
8385iemNativeEmitSimdBroadcastYregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
8386{
8387 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8388 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
8389
8390 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8391 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8392
8393 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8394
8395 off = iemNativeEmitSimdBroadcastGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
8396
8397 /* Free but don't flush the source register. */
8398 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8399 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8400
8401 return off;
8402}
8403
8404
8405#define IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX(a_iYRegDst, a_u64Src) \
8406 off = iemNativeEmitSimdBroadcastYregU64ZxVlmax(pReNative, off, a_iYRegDst, a_u64Src)
8407
8408/** Emits code for IEM_MC_BROADCAST_YREG_U64_ZX_VLMAX. */
8409DECL_INLINE_THROW(uint32_t)
8410iemNativeEmitSimdBroadcastYregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
8411{
8412 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8413 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
8414
8415 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8416 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8417
8418 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8419
8420 off = iemNativeEmitSimdBroadcastGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, true /*f256Bit*/);
8421
8422 /* Free but don't flush the source register. */
8423 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8424 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8425
8426 return off;
8427}
8428
8429
8430#define IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX(a_iYRegDst, a_u128Src) \
8431 off = iemNativeEmitSimdBroadcastYregU128ZxVlmax(pReNative, off, a_iYRegDst, a_u128Src)
8432
8433/** Emits code for IEM_MC_BROADCAST_YREG_U128_ZX_VLMAX. */
8434DECL_INLINE_THROW(uint32_t)
8435iemNativeEmitSimdBroadcastYregU128ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
8436{
8437 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8438 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT128U));
8439
8440 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8441 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8442
8443 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off);
8444
8445 off = iemNativeEmitSimdBroadcastVecRegU128ToVecReg(pReNative, off, idxSimdRegDst, idxVarReg);
8446
8447 /* Free but don't flush the source register. */
8448 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8449 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
8450
8451 return off;
8452}
8453
8454
8455#define IEM_MC_STORE_YREG_U32_ZX_VLMAX(a_iYRegDst, a_u32Src) \
8456 off = iemNativeEmitSimdStoreYregU32ZxVlmax(pReNative, off, a_iYRegDst, a_u32Src)
8457
8458/** Emits code for IEM_MC_STORE_YREG_U32_ZX_VLMAX. */
8459DECL_INLINE_THROW(uint32_t)
8460iemNativeEmitSimdStoreYregU32ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
8461{
8462 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8463 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint32_t));
8464
8465 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8466 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8467
8468 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8469
8470 off = iemNativeEmitSimdZeroVecRegU256(pReNative, off, idxSimdRegDst);
8471 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iDWord*/);
8472
8473 /* Free but don't flush the source register. */
8474 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8475 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8476
8477 return off;
8478}
8479
8480
8481#define IEM_MC_STORE_YREG_U64_ZX_VLMAX(a_iYRegDst, a_u64Src) \
8482 off = iemNativeEmitSimdStoreYregU64ZxVlmax(pReNative, off, a_iYRegDst, a_u64Src)
8483
8484/** Emits code for IEM_MC_STORE_YREG_U64_ZX_VLMAX. */
8485DECL_INLINE_THROW(uint32_t)
8486iemNativeEmitSimdStoreYregU64ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg, uint8_t idxSrcVar)
8487{
8488 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8489 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
8490
8491 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYReg),
8492 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8493
8494 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8495
8496 off = iemNativeEmitSimdZeroVecRegU256(pReNative, off, idxSimdRegDst);
8497 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iQWord*/);
8498
8499 /* Free but don't flush the source register. */
8500 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8501 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8502
8503 return off;
8504}
8505
8506
8507#define IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX(a_iYRegDst, a_u64Local, a_iYRegSrcHx) \
8508 off = iemNativeEmitSimdMergeYregU64LocalU64HiZxVlmax(pReNative, off, a_iYRegDst, a_u64Local, a_iYRegSrcHx)
8509
8510/** Emits code for IEM_MC_MERGE_YREG_U64LOCAL_U64HI_ZX_VLMAX. */
8511DECL_INLINE_THROW(uint32_t)
8512iemNativeEmitSimdMergeYregU64LocalU64HiZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar, uint8_t iYRegSrcHx)
8513{
8514 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8515 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
8516
8517 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
8518 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8519 uint8_t const idxSimdRegSrcHx = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrcHx),
8520 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
8521 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8522
8523 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrcHx);
8524 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 0 /*iQWord*/);
8525 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
8526
8527 /* Free but don't flush the source and destination registers. */
8528 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrcHx);
8529 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8530 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8531
8532 return off;
8533}
8534
8535
8536#define IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX(a_iYRegDst, a_iYRegSrcHx, a_u64Local) \
8537 off = iemNativeEmitSimdMergeYregU64LoU64LocalZxVlmax(pReNative, off, a_iYRegDst, a_iYRegSrcHx, a_u64Local)
8538
8539/** Emits code for IEM_MC_MERGE_YREG_U64LO_U64LOCAL_ZX_VLMAX. */
8540DECL_INLINE_THROW(uint32_t)
8541iemNativeEmitSimdMergeYregU64LoU64LocalZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iYRegSrcHx, uint8_t idxSrcVar)
8542{
8543 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8544 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
8545
8546 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
8547 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8548 uint8_t const idxSimdRegSrcHx = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrcHx),
8549 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ReadOnly);
8550 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8551
8552 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxSimdRegDst, idxSimdRegSrcHx);
8553 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, 1 /*iQWord*/);
8554 off = iemNativeEmitSimdZeroVecRegHighU128(pReNative, off, idxSimdRegDst);
8555
8556 /* Free but don't flush the source and destination registers. */
8557 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrcHx);
8558 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8559 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8560
8561 return off;
8562}
8563
8564
8565#define IEM_MC_CLEAR_XREG_U32_MASK(a_iXReg, a_bMask) \
8566 off = iemNativeEmitSimdClearXregU32Mask(pReNative, off, a_iXReg, a_bMask)
8567
8568
8569/** Emits code for IEM_MC_CLEAR_XREG_U32_MASK. */
8570DECL_INLINE_THROW(uint32_t)
8571iemNativeEmitSimdClearXregU32Mask(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iXReg, uint8_t bImm8Mask)
8572{
8573 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iXReg),
8574 kIemNativeGstSimdRegLdStSz_Low128, kIemNativeGstRegUse_ForUpdate);
8575
8576 /** @todo r=aeichner For certain bit combinations we could reduce the number of emitted instructions. */
8577 if (bImm8Mask & RT_BIT(0))
8578 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 0 /*iDWord*/);
8579 if (bImm8Mask & RT_BIT(1))
8580 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 1 /*iDWord*/);
8581 if (bImm8Mask & RT_BIT(2))
8582 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 2 /*iDWord*/);
8583 if (bImm8Mask & RT_BIT(3))
8584 off = iemNativeEmitSimdZeroVecRegElemU32(pReNative, off, idxSimdRegDst, 3 /*iDWord*/);
8585
8586 /* Free but don't flush the destination register. */
8587 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8588
8589 return off;
8590}
8591
8592
8593#define IEM_MC_FETCH_YREG_U256(a_u256Dst, a_iYRegSrc) \
8594 off = iemNativeEmitSimdFetchYregU256(pReNative, off, a_u256Dst, a_iYRegSrc)
8595
8596
8597/** Emits code for IEM_MC_FETCH_YREG_U256. */
8598DECL_INLINE_THROW(uint32_t)
8599iemNativeEmitSimdFetchYregU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxDstVar, uint8_t iYRegSrc)
8600{
8601 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxDstVar);
8602 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxDstVar, sizeof(RTUINT256U));
8603
8604 uint8_t const idxSimdRegSrc = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegSrc),
8605 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ReadOnly);
8606 uint8_t const idxVarReg = iemNativeVarSimdRegisterAcquire(pReNative, idxDstVar, &off);
8607
8608 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxVarReg, idxSimdRegSrc);
8609
8610 /* Free but don't flush the source register. */
8611 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegSrc);
8612 iemNativeVarSimdRegisterRelease(pReNative, idxDstVar);
8613
8614 return off;
8615}
8616
8617
8618#define IEM_MC_STORE_YREG_U256_ZX_VLMAX(a_iYRegDst, a_u256Src) \
8619 off = iemNativeEmitSimdStoreYregU256ZxVlmax(pReNative, off, a_iYRegDst, a_u256Src)
8620
8621
8622/** Emits code for IEM_MC_STORE_YREG_U256_ZX_VLMAX. */
8623DECL_INLINE_THROW(uint32_t)
8624iemNativeEmitSimdStoreYregU256ZxVlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t idxSrcVar)
8625{
8626 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8627 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
8628
8629 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
8630 kIemNativeGstSimdRegLdStSz_256, kIemNativeGstRegUse_ForFullWrite);
8631 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
8632
8633 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxSimdRegDst, idxVarRegSrc);
8634
8635 /* Free but don't flush the source register. */
8636 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8637 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
8638
8639 return off;
8640}
8641
8642
8643#define IEM_MC_STORE_YREG_U32_U256(a_iYRegDst, a_iDwDst, a_u256Value, a_iDwSrc) \
8644 off = iemNativeEmitSimdStoreYregU32FromU256(pReNative, off, a_iYRegDst, a_iDwDst, a_u256Value, a_iDwSrc)
8645
8646
8647/** Emits code for IEM_MC_STORE_YREG_U32_U256. */
8648DECL_INLINE_THROW(uint32_t)
8649iemNativeEmitSimdStoreYregU32FromU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iDwDst,
8650 uint8_t idxSrcVar, uint8_t iDwSrc)
8651{
8652 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8653 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
8654
8655 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
8656 iDwDst < 4
8657 ? kIemNativeGstSimdRegLdStSz_Low128
8658 : kIemNativeGstSimdRegLdStSz_High128,
8659 kIemNativeGstRegUse_ForUpdate);
8660 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
8661 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
8662
8663 off = iemNativeEmitSimdLoadGprFromVecRegU32(pReNative, off, idxRegTmp, idxVarRegSrc, iDwSrc);
8664 off = iemNativeEmitSimdStoreGprToVecRegU32(pReNative, off, idxSimdRegDst, idxRegTmp, iDwDst);
8665
8666 /* Free but don't flush the source register. */
8667 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8668 iemNativeRegFreeTmp(pReNative, idxRegTmp);
8669 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
8670
8671 return off;
8672}
8673
8674
8675#define IEM_MC_STORE_YREG_U64_U256(a_iYRegDst, a_iQwDst, a_u256Value, a_iQwSrc) \
8676 off = iemNativeEmitSimdStoreYregU64FromU256(pReNative, off, a_iYRegDst, a_iQwDst, a_u256Value, a_iQwSrc)
8677
8678
8679/** Emits code for IEM_MC_STORE_YREG_U64_U256. */
8680DECL_INLINE_THROW(uint32_t)
8681iemNativeEmitSimdStoreYregU64FromU256(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iQwDst,
8682 uint8_t idxSrcVar, uint8_t iQwSrc)
8683{
8684 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8685 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(RTUINT256U));
8686
8687 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
8688 iQwDst < 2
8689 ? kIemNativeGstSimdRegLdStSz_Low128
8690 : kIemNativeGstSimdRegLdStSz_High128,
8691 kIemNativeGstRegUse_ForUpdate);
8692 uint8_t const idxVarRegSrc = iemNativeVarSimdRegisterAcquire(pReNative, idxSrcVar, &off, true /*fInitalized*/);
8693 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
8694
8695 off = iemNativeEmitSimdLoadGprFromVecRegU64(pReNative, off, idxRegTmp, idxVarRegSrc, iQwSrc);
8696 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxRegTmp, iQwDst);
8697
8698 /* Free but don't flush the source register. */
8699 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8700 iemNativeRegFreeTmp(pReNative, idxRegTmp);
8701 iemNativeVarSimdRegisterRelease(pReNative, idxSrcVar);
8702
8703 return off;
8704}
8705
8706
8707#define IEM_MC_STORE_YREG_U64(a_iYRegDst, a_iQword, a_u64Value) \
8708 off = iemNativeEmitSimdStoreYregU64(pReNative, off, a_iYRegDst, a_iQword, a_u64Value)
8709
8710
8711/** Emits code for IEM_MC_STORE_YREG_U64. */
8712DECL_INLINE_THROW(uint32_t)
8713iemNativeEmitSimdStoreYregU64(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYRegDst, uint8_t iQwDst, uint8_t idxSrcVar)
8714{
8715 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxSrcVar);
8716 IEMNATIVE_ASSERT_VAR_SIZE(pReNative, idxSrcVar, sizeof(uint64_t));
8717
8718 uint8_t const idxSimdRegDst = iemNativeSimdRegAllocTmpForGuestSimdReg(pReNative, &off, IEMNATIVEGSTSIMDREG_SIMD(iYRegDst),
8719 iQwDst < 2
8720 ? kIemNativeGstSimdRegLdStSz_Low128
8721 : kIemNativeGstSimdRegLdStSz_High128,
8722 kIemNativeGstRegUse_ForUpdate);
8723
8724 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxSrcVar, &off);
8725
8726 off = iemNativeEmitSimdStoreGprToVecRegU64(pReNative, off, idxSimdRegDst, idxVarReg, iQwDst);
8727
8728 /* Free but don't flush the source register. */
8729 iemNativeSimdRegFreeTmp(pReNative, idxSimdRegDst);
8730 iemNativeVarRegisterRelease(pReNative, idxSrcVar);
8731
8732 return off;
8733}
8734
8735
8736#define IEM_MC_CLEAR_ZREG_256_UP(a_iYReg) \
8737 off = iemNativeEmitSimdClearZregU256Vlmax(pReNative, off, a_iYReg)
8738
8739/** Emits code for IEM_MC_CLEAR_ZREG_256_UP. */
8740DECL_INLINE_THROW(uint32_t)
8741iemNativeEmitSimdClearZregU256Vlmax(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t iYReg)
8742{
8743 RT_NOREF(pReNative, iYReg);
8744 /** @todo Needs to be implemented when support for AVX-512 is added. */
8745 return off;
8746}
8747
8748
8749
8750/*********************************************************************************************************************************
8751* Emitters for IEM_MC_CALL_SSE_AIMPL_XXX *
8752*********************************************************************************************************************************/
8753
8754/**
8755 * Common worker for IEM_MC_CALL_SSE_AIMPL_XXX/IEM_MC_CALL_AVX_AIMPL_XXX.
8756 */
8757DECL_INLINE_THROW(uint32_t)
8758iemNativeEmitCallSseAvxAImplCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t cArgs)
8759{
8760 /* Grab the MXCSR register, it must not be call volatile or we end up freeing it when setting up the call below. */
8761 uint8_t const idxRegMxCsr = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_MxCsr,
8762 kIemNativeGstRegUse_ForUpdate, true /*fNoVolatileRegs*/);
8763 AssertRelease(!(RT_BIT_32(idxRegMxCsr) & IEMNATIVE_CALL_VOLATILE_GREG_MASK));
8764
8765 /*
8766 * Need to do the FPU preparation.
8767 */
8768 off = iemNativeEmitPrepareFpuForUse(pReNative, off, true /*fForChange*/);
8769
8770 /*
8771 * Do all the call setup and cleanup.
8772 */
8773 off = iemNativeEmitCallCommon(pReNative, off, cArgs + IEM_SSE_AIMPL_HIDDEN_ARGS, IEM_SSE_AIMPL_HIDDEN_ARGS, false /*fFlushPendingWrites*/);
8774
8775 /*
8776 * Load the MXCSR register into the first argument and mask out the current exception flags.
8777 */
8778 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, idxRegMxCsr);
8779 off = iemNativeEmitAndGpr32ByImm(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, ~X86_MXCSR_XCPT_FLAGS);
8780
8781 /*
8782 * Make the call.
8783 */
8784 off = iemNativeEmitCallImm(pReNative, off, pfnAImpl);
8785
8786 /*
8787 * The updated MXCSR is in the return register.
8788 */
8789 off = iemNativeEmitLoadGprFromGpr32(pReNative, off, idxRegMxCsr, IEMNATIVE_CALL_RET_GREG);
8790
8791#ifndef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
8792 /* Writeback the MXCSR register value (there is no delayed writeback for such registers at the moment). */
8793 off = iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxRegMxCsr, RT_UOFFSETOF_DYN(VMCPU, cpum.GstCtx.XState.x87.MXCSR));
8794#endif
8795 iemNativeRegFreeTmp(pReNative, idxRegMxCsr);
8796
8797 return off;
8798}
8799
8800
8801#define IEM_MC_CALL_SSE_AIMPL_2(a_pfnAImpl, a0, a1) \
8802 off = iemNativeEmitCallSseAImpl2(pReNative, off, (uintptr_t)(a_pfnAImpl), (a0), (a1))
8803
8804/** Emits code for IEM_MC_CALL_SSE_AIMPL_2. */
8805DECL_INLINE_THROW(uint32_t)
8806iemNativeEmitCallSseAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
8807{
8808 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_SSE_AIMPL_HIDDEN_ARGS);
8809 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_SSE_AIMPL_HIDDEN_ARGS);
8810 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 2);
8811}
8812
8813
8814#define IEM_MC_CALL_SSE_AIMPL_3(a_pfnAImpl, a0, a1, a2) \
8815 off = iemNativeEmitCallSseAImpl3(pReNative, off, (uintptr_t)(a_pfnAImpl), (a0), (a1), (a2))
8816
8817/** Emits code for IEM_MC_CALL_SSE_AIMPL_3. */
8818DECL_INLINE_THROW(uint32_t)
8819iemNativeEmitCallSseAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
8820{
8821 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_SSE_AIMPL_HIDDEN_ARGS);
8822 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_SSE_AIMPL_HIDDEN_ARGS);
8823 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_SSE_AIMPL_HIDDEN_ARGS);
8824 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 3);
8825}
8826
8827
8828/*********************************************************************************************************************************
8829* Emitters for IEM_MC_CALL_AVX_AIMPL_XXX *
8830*********************************************************************************************************************************/
8831
8832#define IEM_MC_CALL_AVX_AIMPL_2(a_pfnAImpl, a0, a1) \
8833 off = iemNativeEmitCallAvxAImpl2(pReNative, off, (uintptr_t)(a_pfnAImpl), (a0), (a1))
8834
8835/** Emits code for IEM_MC_CALL_AVX_AIMPL_2. */
8836DECL_INLINE_THROW(uint32_t)
8837iemNativeEmitCallAvxAImpl2(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1)
8838{
8839 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_AVX_AIMPL_HIDDEN_ARGS);
8840 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_AVX_AIMPL_HIDDEN_ARGS);
8841 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 2);
8842}
8843
8844
8845#define IEM_MC_CALL_AVX_AIMPL_3(a_pfnAImpl, a0, a1, a2) \
8846 off = iemNativeEmitCallAvxAImpl3(pReNative, off, (uintptr_t)(a_pfnAImpl), (a0), (a1), (a2))
8847
8848/** Emits code for IEM_MC_CALL_AVX_AIMPL_3. */
8849DECL_INLINE_THROW(uint32_t)
8850iemNativeEmitCallAvxAImpl3(PIEMRECOMPILERSTATE pReNative, uint32_t off, uintptr_t pfnAImpl, uint8_t idxArg0, uint8_t idxArg1, uint8_t idxArg2)
8851{
8852 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg0, 0 + IEM_AVX_AIMPL_HIDDEN_ARGS);
8853 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg1, 1 + IEM_AVX_AIMPL_HIDDEN_ARGS);
8854 IEMNATIVE_ASSERT_ARG_VAR_IDX(pReNative, idxArg2, 2 + IEM_AVX_AIMPL_HIDDEN_ARGS);
8855 return iemNativeEmitCallSseAvxAImplCommon(pReNative, off, pfnAImpl, 3);
8856}
8857#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
8858
8859
8860/*********************************************************************************************************************************
8861* Include instruction emitters. *
8862*********************************************************************************************************************************/
8863#include "target-x86/IEMAllN8veEmit-x86.h"
8864
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette