VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 107200

Last change on this file since 107200 was 107200, checked in by vboxsync, 2 months ago

VMM/IEM: Deal with hidden pointer to VBOXSTRICTRC return struct on win.arm64. jiraref:VBP-1466

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 457.9 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 107200 2024-11-29 22:15:46Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : Details calls as they're recompiled.
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : Delayed PC updating.
11 * - Level 5 (Log5) : Postponed and skipped EFLAGS calculations.
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023-2024 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include <VBox/vmm/tm.h>
55#include "IEMInternal.h"
56#include <VBox/vmm/vmcc.h>
57#include <VBox/log.h>
58#include <VBox/err.h>
59#include <VBox/dis.h>
60#include <VBox/param.h>
61#include <iprt/assert.h>
62#include <iprt/mem.h>
63#include <iprt/string.h>
64#if defined(RT_ARCH_AMD64)
65# include <iprt/x86.h>
66#elif defined(RT_ARCH_ARM64)
67# include <iprt/armv8.h>
68#endif
69
70#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
71# include "/opt/local/include/capstone/capstone.h"
72#endif
73
74#include "IEMInline.h"
75#include "IEMThreadedFunctions.h"
76#include "IEMN8veRecompiler.h"
77#include "IEMN8veRecompilerEmit.h"
78#include "IEMN8veRecompilerTlbLookup.h"
79#include "IEMNativeFunctions.h"
80#include "target-x86/IEMAllN8veEmit-x86.h"
81
82
83/*
84 * Narrow down configs here to avoid wasting time on unused configs here.
85 * Note! Same checks in IEMAllThrdRecompiler.cpp.
86 */
87
88#ifndef IEM_WITH_CODE_TLB
89# error The code TLB must be enabled for the recompiler.
90#endif
91
92#ifndef IEM_WITH_DATA_TLB
93# error The data TLB must be enabled for the recompiler.
94#endif
95
96#ifndef IEM_WITH_SETJMP
97# error The setjmp approach must be enabled for the recompiler.
98#endif
99
100/** @todo eliminate this clang build hack. */
101#if RT_CLANG_PREREQ(4, 0)
102# pragma GCC diagnostic ignored "-Wunused-function"
103#endif
104
105
106/*********************************************************************************************************************************
107* Internal Functions *
108*********************************************************************************************************************************/
109#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
110static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
111#endif
112DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
113DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
114 IEMNATIVEGSTREG enmGstReg, uint32_t off);
115DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar);
116static const char *iemNativeGetLabelName(IEMNATIVELABELTYPE enmLabel, bool fCommonCode = false);
117
118
119
120/*********************************************************************************************************************************
121* Native Recompilation *
122*********************************************************************************************************************************/
123
124
125/**
126 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
127 */
128IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
129{
130 pVCpu->iem.s.cInstructions += idxInstr;
131 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
132}
133
134
135/**
136 * Helping iemNativeHlpReturnBreakViaLookup and iemNativeHlpReturnBreakViaLookupWithTlb.
137 */
138DECL_FORCE_INLINE(bool) iemNativeHlpReturnBreakViaLookupIsIrqOrForceFlagPending(PVMCPU pVCpu)
139{
140 uint64_t fCpu = pVCpu->fLocalForcedActions;
141 fCpu &= VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR3
142 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
143 | VMCPU_FF_TLB_FLUSH
144 | VMCPU_FF_UNHALT );
145 /** @todo this isn't even close to the NMI/IRQ conditions in EM. */
146 if (RT_LIKELY( ( !fCpu
147 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
148 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
149 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx) )) )
150 && !VM_FF_IS_ANY_SET(pVCpu->CTX_SUFF(pVM), VM_FF_ALL_MASK) ))
151 return false;
152 return true;
153}
154
155
156/**
157 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
158 */
159template<bool const a_fWithIrqCheck>
160IEM_DECL_NATIVE_HLP_DEF(uintptr_t, iemNativeHlpReturnBreakViaLookup,(PVMCPUCC pVCpu, uint8_t idxTbLookup,
161 uint32_t fFlags, RTGCPHYS GCPhysPc))
162{
163 PIEMTB const pTb = pVCpu->iem.s.pCurTbR3;
164 Assert(idxTbLookup < pTb->cTbLookupEntries);
165 PIEMTB * const ppNewTb = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, idxTbLookup);
166#if 1
167 PIEMTB const pNewTb = *ppNewTb;
168 if (pNewTb)
169 {
170# ifdef VBOX_STRICT
171 uint64_t const uFlatPcAssert = pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base;
172 AssertMsg( (uFlatPcAssert & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == pVCpu->iem.s.uInstrBufPc
173 && (GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == pVCpu->iem.s.GCPhysInstrBuf
174 && (GCPhysPc & GUEST_PAGE_OFFSET_MASK) == (uFlatPcAssert & GUEST_PAGE_OFFSET_MASK),
175 ("GCPhysPc=%RGp uFlatPcAssert=%#RX64 uInstrBufPc=%#RX64 GCPhysInstrBuf=%RGp\n",
176 GCPhysPc, uFlatPcAssert, pVCpu->iem.s.uInstrBufPc, pVCpu->iem.s.GCPhysInstrBuf));
177# endif
178 if (pNewTb->GCPhysPc == GCPhysPc)
179 {
180# ifdef VBOX_STRICT
181 uint32_t fAssertFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK) | IEMTB_F_TYPE_NATIVE;
182 if (pVCpu->cpum.GstCtx.rflags.uBoth & CPUMCTX_INHIBIT_SHADOW)
183 fAssertFlags |= IEMTB_F_INHIBIT_SHADOW;
184 if (pVCpu->cpum.GstCtx.rflags.uBoth & CPUMCTX_INHIBIT_NMI)
185 fAssertFlags |= IEMTB_F_INHIBIT_NMI;
186# if 1 /** @todo breaks on IP/EIP/RIP wraparound tests in bs3-cpu-weird-1. */
187 Assert(IEM_F_MODE_X86_IS_FLAT(fFlags));
188# else
189 if (!IEM_F_MODE_X86_IS_FLAT(fFlags))
190 {
191 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
192 if (offFromLim < X86_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
193 fAssertFlags |= IEMTB_F_CS_LIM_CHECKS;
194 }
195# endif
196 Assert(!(fFlags & ~(IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)));
197 AssertMsg(fFlags == fAssertFlags, ("fFlags=%#RX32 fAssertFlags=%#RX32 cs:rip=%04x:%#010RX64\n",
198 fFlags, fAssertFlags, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
199#endif
200
201 /*
202 * Check them + type.
203 */
204 if ((pNewTb->fFlags & (IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)) == fFlags)
205 {
206 /*
207 * Check for interrupts and stuff.
208 */
209 /** @todo We duplicate code here that's also in iemNativeHlpReturnBreakViaLookupWithTlb.
210 * The main problem are the statistics and to some degree the logging. :/ */
211 if (!a_fWithIrqCheck || !iemNativeHlpReturnBreakViaLookupIsIrqOrForceFlagPending(pVCpu) )
212 {
213 /* Do polling. */
214 if ( RT_LIKELY((int32_t)--pVCpu->iem.s.cTbsTillNextTimerPoll > 0)
215 || iemPollTimers(pVCpu->CTX_SUFF(pVM), pVCpu) == VINF_SUCCESS)
216 {
217 /*
218 * Success. Update statistics and switch to the next TB.
219 */
220 if (a_fWithIrqCheck)
221 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1Irq);
222 else
223 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1NoIrq);
224
225 pNewTb->cUsed += 1;
226 pNewTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
227 pVCpu->iem.s.pCurTbR3 = pNewTb;
228 pVCpu->iem.s.ppTbLookupEntryR3 = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pNewTb, 0);
229 pVCpu->iem.s.cTbExecNative += 1;
230 Log10(("iemNativeHlpReturnBreakViaLookupWithPc: match at %04x:%08RX64 (%RGp): pTb=%p[%#x]-> %p\n",
231 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, GCPhysPc, pTb, idxTbLookup, pNewTb));
232 return (uintptr_t)pNewTb->Native.paInstructions;
233 }
234 }
235 Log10(("iemNativeHlpReturnBreakViaLookupWithPc: IRQ or FF pending\n"));
236 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1PendingIrq);
237 }
238 else
239 {
240 Log10(("iemNativeHlpReturnBreakViaLookupWithPc: fFlags mismatch at %04x:%08RX64: %#x vs %#x (pTb=%p[%#x]-> %p)\n",
241 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, fFlags, pNewTb->fFlags, pTb, idxTbLookup, pNewTb));
242 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1MismatchFlags);
243 }
244 }
245 else
246 {
247 Log10(("iemNativeHlpReturnBreakViaLookupWithPc: GCPhysPc mismatch at %04x:%08RX64: %RGp vs %RGp (pTb=%p[%#x]-> %p)\n",
248 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, GCPhysPc, pNewTb->GCPhysPc, pTb, idxTbLookup, pNewTb));
249 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1MismatchGCPhysPc);
250 }
251 }
252 else
253 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1NoTb);
254#else
255 NOREF(GCPhysPc);
256#endif
257
258 pVCpu->iem.s.ppTbLookupEntryR3 = ppNewTb;
259 return 0;
260}
261
262
263/**
264 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
265 */
266template <bool const a_fWithIrqCheck>
267IEM_DECL_NATIVE_HLP_DEF(uintptr_t, iemNativeHlpReturnBreakViaLookupWithTlb,(PVMCPUCC pVCpu, uint8_t idxTbLookup))
268{
269 PIEMTB const pTb = pVCpu->iem.s.pCurTbR3;
270 Assert(idxTbLookup < pTb->cTbLookupEntries);
271 PIEMTB * const ppNewTb = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, idxTbLookup);
272#if 1
273 PIEMTB const pNewTb = *ppNewTb;
274 if (pNewTb)
275 {
276 /*
277 * Calculate the flags for the next TB and check if they match.
278 */
279 uint32_t fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK) | IEMTB_F_TYPE_NATIVE;
280 if (!(pVCpu->cpum.GstCtx.rflags.uBoth & (CPUMCTX_INHIBIT_SHADOW | CPUMCTX_INHIBIT_NMI)))
281 { /* likely */ }
282 else
283 {
284 if (pVCpu->cpum.GstCtx.rflags.uBoth & CPUMCTX_INHIBIT_SHADOW)
285 fFlags |= IEMTB_F_INHIBIT_SHADOW;
286 if (pVCpu->cpum.GstCtx.rflags.uBoth & CPUMCTX_INHIBIT_NMI)
287 fFlags |= IEMTB_F_INHIBIT_NMI;
288 }
289 if (!IEM_F_MODE_X86_IS_FLAT(fFlags))
290 {
291 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
292 if (offFromLim >= X86_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
293 { /* likely */ }
294 else
295 fFlags |= IEMTB_F_CS_LIM_CHECKS;
296 }
297 Assert(!(fFlags & ~(IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)));
298
299 if ((pNewTb->fFlags & (IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)) == fFlags)
300 {
301 /*
302 * Do the TLB lookup for flat RIP and compare the result with the next TB.
303 *
304 * Note! This replicates iemGetPcWithPhysAndCode and iemGetPcWithPhysAndCodeMissed.
305 */
306 /* Calc the effective PC. */
307 uint64_t uPc = pVCpu->cpum.GstCtx.rip;
308 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu));
309 uPc += pVCpu->cpum.GstCtx.cs.u64Base;
310
311 /* Advance within the current buffer (PAGE) when possible. */
312 RTGCPHYS GCPhysPc;
313 uint64_t off;
314 if ( pVCpu->iem.s.pbInstrBuf
315 && (off = uPc - pVCpu->iem.s.uInstrBufPc) < pVCpu->iem.s.cbInstrBufTotal) /*ugly*/
316 {
317 pVCpu->iem.s.offInstrNextByte = (uint32_t)off;
318 pVCpu->iem.s.offCurInstrStart = (uint16_t)off;
319 if ((uint16_t)off + 15 <= pVCpu->iem.s.cbInstrBufTotal)
320 pVCpu->iem.s.cbInstrBuf = (uint16_t)off + 15;
321 else
322 pVCpu->iem.s.cbInstrBuf = pVCpu->iem.s.cbInstrBufTotal;
323 GCPhysPc = pVCpu->iem.s.GCPhysInstrBuf + off;
324 }
325 else
326 {
327 pVCpu->iem.s.pbInstrBuf = NULL;
328 pVCpu->iem.s.offCurInstrStart = 0;
329 pVCpu->iem.s.offInstrNextByte = 0;
330 iemOpcodeFetchBytesJmp(pVCpu, 0, NULL);
331 GCPhysPc = pVCpu->iem.s.pbInstrBuf ? pVCpu->iem.s.GCPhysInstrBuf + pVCpu->iem.s.offCurInstrStart : NIL_RTGCPHYS;
332 }
333
334 if (pNewTb->GCPhysPc == GCPhysPc)
335 {
336 /*
337 * Check for interrupts and stuff.
338 */
339 /** @todo We duplicate code here that's also in iemNativeHlpReturnBreakViaLookupWithPc.
340 * The main problem are the statistics and to some degree the logging. :/ */
341 if (!a_fWithIrqCheck || !iemNativeHlpReturnBreakViaLookupIsIrqOrForceFlagPending(pVCpu) )
342 {
343 /* Do polling. */
344 if ( RT_LIKELY((int32_t)--pVCpu->iem.s.cTbsTillNextTimerPoll > 0)
345 || iemPollTimers(pVCpu->CTX_SUFF(pVM), pVCpu) == VINF_SUCCESS)
346 {
347 /*
348 * Success. Update statistics and switch to the next TB.
349 */
350 if (a_fWithIrqCheck)
351 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2Irq);
352 else
353 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2NoIrq);
354
355 pNewTb->cUsed += 1;
356 pNewTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
357 pVCpu->iem.s.pCurTbR3 = pNewTb;
358 pVCpu->iem.s.ppTbLookupEntryR3 = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pNewTb, 0);
359 pVCpu->iem.s.cTbExecNative += 1;
360 Log10(("iemNativeHlpReturnBreakViaLookupWithTlb: match at %04x:%08RX64 (%RGp): pTb=%p[%#x]-> %p\n",
361 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, GCPhysPc, pTb, idxTbLookup, pNewTb));
362 return (uintptr_t)pNewTb->Native.paInstructions;
363 }
364 }
365 Log10(("iemNativeHlpReturnBreakViaLookupWithTlb: IRQ or FF pending\n"));
366 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2PendingIrq);
367 }
368 else
369 {
370 Log10(("iemNativeHlpReturnBreakViaLookupWithTlb: GCPhysPc mismatch at %04x:%08RX64: %RGp vs %RGp (pTb=%p[%#x]-> %p)\n",
371 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, GCPhysPc, pNewTb->GCPhysPc, pTb, idxTbLookup, pNewTb));
372 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2MismatchGCPhysPc);
373 }
374 }
375 else
376 {
377 Log10(("iemNativeHlpReturnBreakViaLookupWithTlb: fFlags mismatch at %04x:%08RX64: %#x vs %#x (pTb=%p[%#x]-> %p)\n",
378 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, fFlags, pNewTb->fFlags, pTb, idxTbLookup, pNewTb));
379 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2MismatchFlags);
380 }
381 }
382 else
383 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2NoTb);
384#else
385 NOREF(fFlags);
386 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2NoTb); /* just for some stats, even if misleading */
387#endif
388
389 pVCpu->iem.s.ppTbLookupEntryR3 = ppNewTb;
390 return 0;
391}
392
393
394/**
395 * Used by TB code when it wants to raise a \#DE.
396 */
397IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseDe,(PVMCPUCC pVCpu))
398{
399 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseDe);
400 iemRaiseDivideErrorJmp(pVCpu);
401#ifndef _MSC_VER
402 return VINF_IEM_RAISED_XCPT; /* not reached */
403#endif
404}
405
406
407/**
408 * Used by TB code when it wants to raise a \#UD.
409 */
410IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseUd,(PVMCPUCC pVCpu))
411{
412 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseUd);
413 iemRaiseUndefinedOpcodeJmp(pVCpu);
414#ifndef _MSC_VER
415 return VINF_IEM_RAISED_XCPT; /* not reached */
416#endif
417}
418
419
420/**
421 * Used by TB code when it wants to raise an SSE related \#UD or \#NM.
422 *
423 * See IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT.
424 */
425IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseSseRelated,(PVMCPUCC pVCpu))
426{
427 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseSseRelated);
428 if ( (pVCpu->cpum.GstCtx.cr0 & X86_CR0_EM)
429 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSFXSR))
430 iemRaiseUndefinedOpcodeJmp(pVCpu);
431 else
432 iemRaiseDeviceNotAvailableJmp(pVCpu);
433#ifndef _MSC_VER
434 return VINF_IEM_RAISED_XCPT; /* not reached */
435#endif
436}
437
438
439/**
440 * Used by TB code when it wants to raise an AVX related \#UD or \#NM.
441 *
442 * See IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT.
443 */
444IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseAvxRelated,(PVMCPUCC pVCpu))
445{
446 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseAvxRelated);
447 if ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE)) != (XSAVE_C_YMM | XSAVE_C_SSE)
448 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE))
449 iemRaiseUndefinedOpcodeJmp(pVCpu);
450 else
451 iemRaiseDeviceNotAvailableJmp(pVCpu);
452#ifndef _MSC_VER
453 return VINF_IEM_RAISED_XCPT; /* not reached */
454#endif
455}
456
457
458/**
459 * Used by TB code when it wants to raise an SSE/AVX floating point exception related \#UD or \#XF.
460 *
461 * See IEM_MC_CALL_AVX_XXX/IEM_MC_CALL_SSE_XXX.
462 */
463IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseSseAvxFpRelated,(PVMCPUCC pVCpu))
464{
465 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseSseAvxFpRelated);
466 if (pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXMMEEXCPT)
467 iemRaiseSimdFpExceptionJmp(pVCpu);
468 else
469 iemRaiseUndefinedOpcodeJmp(pVCpu);
470#ifndef _MSC_VER
471 return VINF_IEM_RAISED_XCPT; /* not reached */
472#endif
473}
474
475
476/**
477 * Used by TB code when it wants to raise a \#NM.
478 */
479IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseNm,(PVMCPUCC pVCpu))
480{
481 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseNm);
482 iemRaiseDeviceNotAvailableJmp(pVCpu);
483#ifndef _MSC_VER
484 return VINF_IEM_RAISED_XCPT; /* not reached */
485#endif
486}
487
488
489/**
490 * Used by TB code when it wants to raise a \#GP(0).
491 */
492IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu))
493{
494 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseGp0);
495 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
496#ifndef _MSC_VER
497 return VINF_IEM_RAISED_XCPT; /* not reached */
498#endif
499}
500
501
502/**
503 * Used by TB code when it wants to raise a \#MF.
504 */
505IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseMf,(PVMCPUCC pVCpu))
506{
507 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseMf);
508 iemRaiseMathFaultJmp(pVCpu);
509#ifndef _MSC_VER
510 return VINF_IEM_RAISED_XCPT; /* not reached */
511#endif
512}
513
514
515/**
516 * Used by TB code when it wants to raise a \#XF.
517 */
518IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseXf,(PVMCPUCC pVCpu))
519{
520 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseXf);
521 iemRaiseSimdFpExceptionJmp(pVCpu);
522#ifndef _MSC_VER
523 return VINF_IEM_RAISED_XCPT; /* not reached */
524#endif
525}
526
527
528/**
529 * Used by TB code when detecting opcode changes.
530 * @see iemThreadeFuncWorkerObsoleteTb
531 */
532IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpObsoleteTb,(PVMCPUCC pVCpu))
533{
534 /* We set fSafeToFree to false where as we're being called in the context
535 of a TB callback function, which for native TBs means we cannot release
536 the executable memory till we've returned our way back to iemTbExec as
537 that return path codes via the native code generated for the TB. */
538 Log7(("TB obsolete: %p at %04x:%08RX64\n", pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
539 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitObsoleteTb);
540 iemThreadedTbObsolete(pVCpu, pVCpu->iem.s.pCurTbR3, false /*fSafeToFree*/);
541 return VINF_IEM_REEXEC_BREAK;
542}
543
544
545/**
546 * Used by TB code when we need to switch to a TB with CS.LIM checking.
547 */
548IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpNeedCsLimChecking,(PVMCPUCC pVCpu))
549{
550 Log7(("TB need CS.LIM: %p at %04x:%08RX64; offFromLim=%#RX64 CS.LIM=%#RX32 CS.BASE=%#RX64\n",
551 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
552 (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.rip,
553 pVCpu->cpum.GstCtx.cs.u32Limit, pVCpu->cpum.GstCtx.cs.u64Base));
554 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckNeedCsLimChecking);
555 return VINF_IEM_REEXEC_BREAK;
556}
557
558
559/**
560 * Used by TB code when we missed a PC check after a branch.
561 */
562IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpCheckBranchMiss,(PVMCPUCC pVCpu))
563{
564 Log7(("TB jmp miss: %p at %04x:%08RX64; GCPhysWithOffset=%RGp, pbInstrBuf=%p\n",
565 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
566 pVCpu->iem.s.GCPhysInstrBuf + pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base - pVCpu->iem.s.uInstrBufPc,
567 pVCpu->iem.s.pbInstrBuf));
568 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses);
569 return VINF_IEM_REEXEC_BREAK;
570}
571
572
573
574/*********************************************************************************************************************************
575* Helpers: Segmented memory fetches and stores. *
576*********************************************************************************************************************************/
577
578/**
579 * Used by TB code to load unsigned 8-bit data w/ segmentation.
580 */
581IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
582{
583#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
584 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
585#else
586 return (uint64_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
587#endif
588}
589
590
591/**
592 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
593 * to 16 bits.
594 */
595IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
596{
597#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
598 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
599#else
600 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
601#endif
602}
603
604
605/**
606 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
607 * to 32 bits.
608 */
609IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
610{
611#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
612 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
613#else
614 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
615#endif
616}
617
618/**
619 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
620 * to 64 bits.
621 */
622IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
623{
624#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
625 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
626#else
627 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
628#endif
629}
630
631
632/**
633 * Used by TB code to load unsigned 16-bit data w/ segmentation.
634 */
635IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
636{
637#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
638 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
639#else
640 return (uint64_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
641#endif
642}
643
644
645/**
646 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
647 * to 32 bits.
648 */
649IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
650{
651#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
652 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
653#else
654 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
655#endif
656}
657
658
659/**
660 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
661 * to 64 bits.
662 */
663IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
664{
665#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
666 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
667#else
668 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
669#endif
670}
671
672
673/**
674 * Used by TB code to load unsigned 32-bit data w/ segmentation.
675 */
676IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
677{
678#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
679 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
680#else
681 return (uint64_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
682#endif
683}
684
685
686/**
687 * Used by TB code to load signed 32-bit data w/ segmentation, sign extending it
688 * to 64 bits.
689 */
690IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
691{
692#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
693 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
694#else
695 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
696#endif
697}
698
699
700/**
701 * Used by TB code to load unsigned 64-bit data w/ segmentation.
702 */
703IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
704{
705#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
706 return iemMemFetchDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem);
707#else
708 return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem);
709#endif
710}
711
712
713/**
714 * Used by TB code to load 128-bit data w/ segmentation.
715 */
716IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
717{
718#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
719 iemMemFetchDataU128SafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
720#else
721 iemMemFetchDataU128Jmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
722#endif
723}
724
725
726/**
727 * Used by TB code to load 128-bit data w/ segmentation.
728 */
729IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
730{
731#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
732 iemMemFetchDataU128AlignedSseSafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
733#else
734 iemMemFetchDataU128AlignedSseJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
735#endif
736}
737
738
739/**
740 * Used by TB code to load 128-bit data w/ segmentation.
741 */
742IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
743{
744#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
745 iemMemFetchDataU128NoAcSafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
746#else
747 iemMemFetchDataU128NoAcJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
748#endif
749}
750
751
752/**
753 * Used by TB code to load 256-bit data w/ segmentation.
754 */
755IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT256U pu256Dst))
756{
757#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
758 iemMemFetchDataU256NoAcSafeJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
759#else
760 iemMemFetchDataU256NoAcJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
761#endif
762}
763
764
765/**
766 * Used by TB code to load 256-bit data w/ segmentation.
767 */
768IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT256U pu256Dst))
769{
770#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
771 iemMemFetchDataU256AlignedAvxSafeJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
772#else
773 iemMemFetchDataU256AlignedAvxJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
774#endif
775}
776
777
778/**
779 * Used by TB code to store unsigned 8-bit data w/ segmentation.
780 */
781IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))
782{
783#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
784 iemMemStoreDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem, u8Value);
785#else
786 iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value);
787#endif
788}
789
790
791/**
792 * Used by TB code to store unsigned 16-bit data w/ segmentation.
793 */
794IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))
795{
796#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
797 iemMemStoreDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem, u16Value);
798#else
799 iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value);
800#endif
801}
802
803
804/**
805 * Used by TB code to store unsigned 32-bit data w/ segmentation.
806 */
807IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))
808{
809#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
810 iemMemStoreDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem, u32Value);
811#else
812 iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value);
813#endif
814}
815
816
817/**
818 * Used by TB code to store unsigned 64-bit data w/ segmentation.
819 */
820IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))
821{
822#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
823 iemMemStoreDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem, u64Value);
824#else
825 iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value);
826#endif
827}
828
829
830/**
831 * Used by TB code to store unsigned 128-bit data w/ segmentation.
832 */
833IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT128U pu128Src))
834{
835#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
836 iemMemStoreDataU128AlignedSseSafeJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
837#else
838 iemMemStoreDataU128AlignedSseJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
839#endif
840}
841
842
843/**
844 * Used by TB code to store unsigned 128-bit data w/ segmentation.
845 */
846IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT128U pu128Src))
847{
848#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
849 iemMemStoreDataU128NoAcSafeJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
850#else
851 iemMemStoreDataU128NoAcJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
852#endif
853}
854
855
856/**
857 * Used by TB code to store unsigned 256-bit data w/ segmentation.
858 */
859IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT256U pu256Src))
860{
861#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
862 iemMemStoreDataU256NoAcSafeJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
863#else
864 iemMemStoreDataU256NoAcJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
865#endif
866}
867
868
869/**
870 * Used by TB code to store unsigned 256-bit data w/ segmentation.
871 */
872IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT256U pu256Src))
873{
874#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
875 iemMemStoreDataU256AlignedAvxSafeJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
876#else
877 iemMemStoreDataU256AlignedAvxJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
878#endif
879}
880
881
882/**
883 * Used by TB code to store an unsigned 16-bit value onto a generic stack.
884 */
885IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
886{
887#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
888 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
889#else
890 iemMemStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
891#endif
892}
893
894
895/**
896 * Used by TB code to store an unsigned 32-bit value onto a generic stack.
897 */
898IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
899{
900#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
901 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
902#else
903 iemMemStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
904#endif
905}
906
907
908/**
909 * Used by TB code to store an 32-bit selector value onto a generic stack.
910 *
911 * Intel CPUs doesn't do write a whole dword, thus the special function.
912 */
913IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
914{
915#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
916 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
917#else
918 iemMemStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
919#endif
920}
921
922
923/**
924 * Used by TB code to push unsigned 64-bit value onto a generic stack.
925 */
926IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
927{
928#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
929 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
930#else
931 iemMemStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
932#endif
933}
934
935
936/**
937 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
938 */
939IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
940{
941#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
942 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
943#else
944 return iemMemFetchStackU16Jmp(pVCpu, GCPtrMem);
945#endif
946}
947
948
949/**
950 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
951 */
952IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
953{
954#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
955 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
956#else
957 return iemMemFetchStackU32Jmp(pVCpu, GCPtrMem);
958#endif
959}
960
961
962/**
963 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
964 */
965IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
966{
967#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
968 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
969#else
970 return iemMemFetchStackU64Jmp(pVCpu, GCPtrMem);
971#endif
972}
973
974
975
976/*********************************************************************************************************************************
977* Helpers: Flat memory fetches and stores. *
978*********************************************************************************************************************************/
979
980/**
981 * Used by TB code to load unsigned 8-bit data w/ flat address.
982 * @note Zero extending the value to 64-bit to simplify assembly.
983 */
984IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
985{
986#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
987 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
988#else
989 return (uint64_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
990#endif
991}
992
993
994/**
995 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
996 * to 16 bits.
997 * @note Zero extending the value to 64-bit to simplify assembly.
998 */
999IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1000{
1001#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1002 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1003#else
1004 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1005#endif
1006}
1007
1008
1009/**
1010 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1011 * to 32 bits.
1012 * @note Zero extending the value to 64-bit to simplify assembly.
1013 */
1014IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1015{
1016#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1017 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1018#else
1019 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1020#endif
1021}
1022
1023
1024/**
1025 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1026 * to 64 bits.
1027 */
1028IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1029{
1030#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1031 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1032#else
1033 return (uint64_t)(int64_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1034#endif
1035}
1036
1037
1038/**
1039 * Used by TB code to load unsigned 16-bit data w/ flat address.
1040 * @note Zero extending the value to 64-bit to simplify assembly.
1041 */
1042IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1043{
1044#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1045 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1046#else
1047 return (uint64_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
1048#endif
1049}
1050
1051
1052/**
1053 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
1054 * to 32 bits.
1055 * @note Zero extending the value to 64-bit to simplify assembly.
1056 */
1057IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1058{
1059#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1060 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1061#else
1062 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
1063#endif
1064}
1065
1066
1067/**
1068 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
1069 * to 64 bits.
1070 * @note Zero extending the value to 64-bit to simplify assembly.
1071 */
1072IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1073{
1074#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1075 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1076#else
1077 return (uint64_t)(int64_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
1078#endif
1079}
1080
1081
1082/**
1083 * Used by TB code to load unsigned 32-bit data w/ flat address.
1084 * @note Zero extending the value to 64-bit to simplify assembly.
1085 */
1086IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1087{
1088#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1089 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1090#else
1091 return (uint64_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
1092#endif
1093}
1094
1095
1096/**
1097 * Used by TB code to load signed 32-bit data w/ flat address, sign extending it
1098 * to 64 bits.
1099 * @note Zero extending the value to 64-bit to simplify assembly.
1100 */
1101IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1102{
1103#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1104 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1105#else
1106 return (uint64_t)(int64_t)(int32_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
1107#endif
1108}
1109
1110
1111/**
1112 * Used by TB code to load unsigned 64-bit data w/ flat address.
1113 */
1114IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1115{
1116#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1117 return iemMemFetchDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1118#else
1119 return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem);
1120#endif
1121}
1122
1123
1124/**
1125 * Used by TB code to load unsigned 128-bit data w/ flat address.
1126 */
1127IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
1128{
1129#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1130 return iemMemFetchDataU128SafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
1131#else
1132 return iemMemFlatFetchDataU128Jmp(pVCpu, pu128Dst, GCPtrMem);
1133#endif
1134}
1135
1136
1137/**
1138 * Used by TB code to load unsigned 128-bit data w/ flat address.
1139 */
1140IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
1141{
1142#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1143 return iemMemFetchDataU128AlignedSseSafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
1144#else
1145 return iemMemFlatFetchDataU128AlignedSseJmp(pVCpu, pu128Dst, GCPtrMem);
1146#endif
1147}
1148
1149
1150/**
1151 * Used by TB code to load unsigned 128-bit data w/ flat address.
1152 */
1153IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
1154{
1155#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1156 return iemMemFetchDataU128NoAcSafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
1157#else
1158 return iemMemFlatFetchDataU128NoAcJmp(pVCpu, pu128Dst, GCPtrMem);
1159#endif
1160}
1161
1162
1163/**
1164 * Used by TB code to load unsigned 256-bit data w/ flat address.
1165 */
1166IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT256U pu256Dst))
1167{
1168#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1169 return iemMemFetchDataU256NoAcSafeJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
1170#else
1171 return iemMemFlatFetchDataU256NoAcJmp(pVCpu, pu256Dst, GCPtrMem);
1172#endif
1173}
1174
1175
1176/**
1177 * Used by TB code to load unsigned 256-bit data w/ flat address.
1178 */
1179IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT256U pu256Dst))
1180{
1181#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1182 return iemMemFetchDataU256AlignedAvxSafeJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
1183#else
1184 return iemMemFlatFetchDataU256AlignedAvxJmp(pVCpu, pu256Dst, GCPtrMem);
1185#endif
1186}
1187
1188
1189/**
1190 * Used by TB code to store unsigned 8-bit data w/ flat address.
1191 */
1192IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))
1193{
1194#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1195 iemMemStoreDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u8Value);
1196#else
1197 iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value);
1198#endif
1199}
1200
1201
1202/**
1203 * Used by TB code to store unsigned 16-bit data w/ flat address.
1204 */
1205IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1206{
1207#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1208 iemMemStoreDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u16Value);
1209#else
1210 iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value);
1211#endif
1212}
1213
1214
1215/**
1216 * Used by TB code to store unsigned 32-bit data w/ flat address.
1217 */
1218IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1219{
1220#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1221 iemMemStoreDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u32Value);
1222#else
1223 iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value);
1224#endif
1225}
1226
1227
1228/**
1229 * Used by TB code to store unsigned 64-bit data w/ flat address.
1230 */
1231IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
1232{
1233#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1234 iemMemStoreDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u64Value);
1235#else
1236 iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value);
1237#endif
1238}
1239
1240
1241/**
1242 * Used by TB code to store unsigned 128-bit data w/ flat address.
1243 */
1244IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT128U pu128Src))
1245{
1246#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1247 iemMemStoreDataU128AlignedSseSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu128Src);
1248#else
1249 iemMemFlatStoreDataU128AlignedSseJmp(pVCpu, GCPtrMem, pu128Src);
1250#endif
1251}
1252
1253
1254/**
1255 * Used by TB code to store unsigned 128-bit data w/ flat address.
1256 */
1257IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT128U pu128Src))
1258{
1259#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1260 iemMemStoreDataU128NoAcSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu128Src);
1261#else
1262 iemMemFlatStoreDataU128NoAcJmp(pVCpu, GCPtrMem, pu128Src);
1263#endif
1264}
1265
1266
1267/**
1268 * Used by TB code to store unsigned 256-bit data w/ flat address.
1269 */
1270IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT256U pu256Src))
1271{
1272#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1273 iemMemStoreDataU256NoAcSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu256Src);
1274#else
1275 iemMemFlatStoreDataU256NoAcJmp(pVCpu, GCPtrMem, pu256Src);
1276#endif
1277}
1278
1279
1280/**
1281 * Used by TB code to store unsigned 256-bit data w/ flat address.
1282 */
1283IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT256U pu256Src))
1284{
1285#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1286 iemMemStoreDataU256AlignedAvxSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu256Src);
1287#else
1288 iemMemFlatStoreDataU256AlignedAvxJmp(pVCpu, GCPtrMem, pu256Src);
1289#endif
1290}
1291
1292
1293/**
1294 * Used by TB code to store an unsigned 16-bit value onto a flat stack.
1295 */
1296IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1297{
1298#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1299 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
1300#else
1301 iemMemFlatStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
1302#endif
1303}
1304
1305
1306/**
1307 * Used by TB code to store an unsigned 32-bit value onto a flat stack.
1308 */
1309IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1310{
1311#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1312 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
1313#else
1314 iemMemFlatStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
1315#endif
1316}
1317
1318
1319/**
1320 * Used by TB code to store a segment selector value onto a flat stack.
1321 *
1322 * Intel CPUs doesn't do write a whole dword, thus the special function.
1323 */
1324IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1325{
1326#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1327 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
1328#else
1329 iemMemFlatStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
1330#endif
1331}
1332
1333
1334/**
1335 * Used by TB code to store an unsigned 64-bit value onto a flat stack.
1336 */
1337IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
1338{
1339#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1340 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
1341#else
1342 iemMemFlatStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
1343#endif
1344}
1345
1346
1347/**
1348 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
1349 */
1350IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFlatFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1351{
1352#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1353 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
1354#else
1355 return iemMemFlatFetchStackU16Jmp(pVCpu, GCPtrMem);
1356#endif
1357}
1358
1359
1360/**
1361 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
1362 */
1363IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFlatFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1364{
1365#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1366 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
1367#else
1368 return iemMemFlatFetchStackU32Jmp(pVCpu, GCPtrMem);
1369#endif
1370}
1371
1372
1373/**
1374 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
1375 */
1376IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFlatFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1377{
1378#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1379 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
1380#else
1381 return iemMemFlatFetchStackU64Jmp(pVCpu, GCPtrMem);
1382#endif
1383}
1384
1385
1386
1387/*********************************************************************************************************************************
1388* Helpers: Segmented memory mapping. *
1389*********************************************************************************************************************************/
1390
1391/**
1392 * Used by TB code to map unsigned 8-bit data for atomic read-write w/
1393 * segmentation.
1394 */
1395IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1396 RTGCPTR GCPtrMem, uint8_t iSegReg))
1397{
1398#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1399 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1400#else
1401 return iemMemMapDataU8AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1402#endif
1403}
1404
1405
1406/**
1407 * Used by TB code to map unsigned 8-bit data read-write w/ segmentation.
1408 */
1409IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1410 RTGCPTR GCPtrMem, uint8_t iSegReg))
1411{
1412#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1413 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1414#else
1415 return iemMemMapDataU8RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1416#endif
1417}
1418
1419
1420/**
1421 * Used by TB code to map unsigned 8-bit data writeonly w/ segmentation.
1422 */
1423IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1424 RTGCPTR GCPtrMem, uint8_t iSegReg))
1425{
1426#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1427 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1428#else
1429 return iemMemMapDataU8WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1430#endif
1431}
1432
1433
1434/**
1435 * Used by TB code to map unsigned 8-bit data readonly w/ segmentation.
1436 */
1437IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1438 RTGCPTR GCPtrMem, uint8_t iSegReg))
1439{
1440#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1441 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1442#else
1443 return iemMemMapDataU8RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1444#endif
1445}
1446
1447
1448/**
1449 * Used by TB code to map unsigned 16-bit data for atomic read-write w/
1450 * segmentation.
1451 */
1452IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1453 RTGCPTR GCPtrMem, uint8_t iSegReg))
1454{
1455#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1456 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1457#else
1458 return iemMemMapDataU16AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1459#endif
1460}
1461
1462
1463/**
1464 * Used by TB code to map unsigned 16-bit data read-write w/ segmentation.
1465 */
1466IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1467 RTGCPTR GCPtrMem, uint8_t iSegReg))
1468{
1469#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1470 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1471#else
1472 return iemMemMapDataU16RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1473#endif
1474}
1475
1476
1477/**
1478 * Used by TB code to map unsigned 16-bit data writeonly w/ segmentation.
1479 */
1480IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1481 RTGCPTR GCPtrMem, uint8_t iSegReg))
1482{
1483#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1484 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1485#else
1486 return iemMemMapDataU16WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1487#endif
1488}
1489
1490
1491/**
1492 * Used by TB code to map unsigned 16-bit data readonly w/ segmentation.
1493 */
1494IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1495 RTGCPTR GCPtrMem, uint8_t iSegReg))
1496{
1497#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1498 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1499#else
1500 return iemMemMapDataU16RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1501#endif
1502}
1503
1504
1505/**
1506 * Used by TB code to map unsigned 32-bit data for atomic read-write w/
1507 * segmentation.
1508 */
1509IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1510 RTGCPTR GCPtrMem, uint8_t iSegReg))
1511{
1512#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1513 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1514#else
1515 return iemMemMapDataU32AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1516#endif
1517}
1518
1519
1520/**
1521 * Used by TB code to map unsigned 32-bit data read-write w/ segmentation.
1522 */
1523IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1524 RTGCPTR GCPtrMem, uint8_t iSegReg))
1525{
1526#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1527 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1528#else
1529 return iemMemMapDataU32RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1530#endif
1531}
1532
1533
1534/**
1535 * Used by TB code to map unsigned 32-bit data writeonly w/ segmentation.
1536 */
1537IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1538 RTGCPTR GCPtrMem, uint8_t iSegReg))
1539{
1540#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1541 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1542#else
1543 return iemMemMapDataU32WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1544#endif
1545}
1546
1547
1548/**
1549 * Used by TB code to map unsigned 32-bit data readonly w/ segmentation.
1550 */
1551IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1552 RTGCPTR GCPtrMem, uint8_t iSegReg))
1553{
1554#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1555 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1556#else
1557 return iemMemMapDataU32RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1558#endif
1559}
1560
1561
1562/**
1563 * Used by TB code to map unsigned 64-bit data for atomic read-write w/
1564 * segmentation.
1565 */
1566IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1567 RTGCPTR GCPtrMem, uint8_t iSegReg))
1568{
1569#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1570 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1571#else
1572 return iemMemMapDataU64AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1573#endif
1574}
1575
1576
1577/**
1578 * Used by TB code to map unsigned 64-bit data read-write w/ segmentation.
1579 */
1580IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1581 RTGCPTR GCPtrMem, uint8_t iSegReg))
1582{
1583#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1584 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1585#else
1586 return iemMemMapDataU64RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1587#endif
1588}
1589
1590
1591/**
1592 * Used by TB code to map unsigned 64-bit data writeonly w/ segmentation.
1593 */
1594IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1595 RTGCPTR GCPtrMem, uint8_t iSegReg))
1596{
1597#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1598 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1599#else
1600 return iemMemMapDataU64WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1601#endif
1602}
1603
1604
1605/**
1606 * Used by TB code to map unsigned 64-bit data readonly w/ segmentation.
1607 */
1608IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1609 RTGCPTR GCPtrMem, uint8_t iSegReg))
1610{
1611#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1612 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1613#else
1614 return iemMemMapDataU64RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1615#endif
1616}
1617
1618
1619/**
1620 * Used by TB code to map 80-bit float data writeonly w/ segmentation.
1621 */
1622IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1623 RTGCPTR GCPtrMem, uint8_t iSegReg))
1624{
1625#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1626 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1627#else
1628 return iemMemMapDataR80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1629#endif
1630}
1631
1632
1633/**
1634 * Used by TB code to map 80-bit BCD data writeonly w/ segmentation.
1635 */
1636IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1637 RTGCPTR GCPtrMem, uint8_t iSegReg))
1638{
1639#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1640 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1641#else
1642 return iemMemMapDataD80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1643#endif
1644}
1645
1646
1647/**
1648 * Used by TB code to map unsigned 128-bit data for atomic read-write w/
1649 * segmentation.
1650 */
1651IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1652 RTGCPTR GCPtrMem, uint8_t iSegReg))
1653{
1654#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1655 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1656#else
1657 return iemMemMapDataU128AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1658#endif
1659}
1660
1661
1662/**
1663 * Used by TB code to map unsigned 128-bit data read-write w/ segmentation.
1664 */
1665IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1666 RTGCPTR GCPtrMem, uint8_t iSegReg))
1667{
1668#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1669 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1670#else
1671 return iemMemMapDataU128RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1672#endif
1673}
1674
1675
1676/**
1677 * Used by TB code to map unsigned 128-bit data writeonly w/ segmentation.
1678 */
1679IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1680 RTGCPTR GCPtrMem, uint8_t iSegReg))
1681{
1682#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1683 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1684#else
1685 return iemMemMapDataU128WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1686#endif
1687}
1688
1689
1690/**
1691 * Used by TB code to map unsigned 128-bit data readonly w/ segmentation.
1692 */
1693IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1694 RTGCPTR GCPtrMem, uint8_t iSegReg))
1695{
1696#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1697 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1698#else
1699 return iemMemMapDataU128RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1700#endif
1701}
1702
1703
1704/*********************************************************************************************************************************
1705* Helpers: Flat memory mapping. *
1706*********************************************************************************************************************************/
1707
1708/**
1709 * Used by TB code to map unsigned 8-bit data for atomic read-write w/ flat
1710 * address.
1711 */
1712IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1713{
1714#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1715 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1716#else
1717 return iemMemFlatMapDataU8AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1718#endif
1719}
1720
1721
1722/**
1723 * Used by TB code to map unsigned 8-bit data read-write w/ flat address.
1724 */
1725IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1726{
1727#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1728 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1729#else
1730 return iemMemFlatMapDataU8RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1731#endif
1732}
1733
1734
1735/**
1736 * Used by TB code to map unsigned 8-bit data writeonly w/ flat address.
1737 */
1738IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1739{
1740#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1741 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1742#else
1743 return iemMemFlatMapDataU8WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1744#endif
1745}
1746
1747
1748/**
1749 * Used by TB code to map unsigned 8-bit data readonly w/ flat address.
1750 */
1751IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1752{
1753#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1754 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1755#else
1756 return iemMemFlatMapDataU8RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1757#endif
1758}
1759
1760
1761/**
1762 * Used by TB code to map unsigned 16-bit data for atomic read-write w/ flat
1763 * address.
1764 */
1765IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1766{
1767#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1768 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1769#else
1770 return iemMemFlatMapDataU16AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1771#endif
1772}
1773
1774
1775/**
1776 * Used by TB code to map unsigned 16-bit data read-write w/ flat address.
1777 */
1778IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1779{
1780#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1781 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1782#else
1783 return iemMemFlatMapDataU16RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1784#endif
1785}
1786
1787
1788/**
1789 * Used by TB code to map unsigned 16-bit data writeonly w/ flat address.
1790 */
1791IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1792{
1793#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1794 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1795#else
1796 return iemMemFlatMapDataU16WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1797#endif
1798}
1799
1800
1801/**
1802 * Used by TB code to map unsigned 16-bit data readonly w/ flat address.
1803 */
1804IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1805{
1806#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1807 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1808#else
1809 return iemMemFlatMapDataU16RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1810#endif
1811}
1812
1813
1814/**
1815 * Used by TB code to map unsigned 32-bit data for atomic read-write w/ flat
1816 * address.
1817 */
1818IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1819{
1820#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1821 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1822#else
1823 return iemMemFlatMapDataU32AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1824#endif
1825}
1826
1827
1828/**
1829 * Used by TB code to map unsigned 32-bit data read-write w/ flat address.
1830 */
1831IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1832{
1833#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1834 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1835#else
1836 return iemMemFlatMapDataU32RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1837#endif
1838}
1839
1840
1841/**
1842 * Used by TB code to map unsigned 32-bit data writeonly w/ flat address.
1843 */
1844IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1845{
1846#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1847 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1848#else
1849 return iemMemFlatMapDataU32WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1850#endif
1851}
1852
1853
1854/**
1855 * Used by TB code to map unsigned 32-bit data readonly w/ flat address.
1856 */
1857IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1858{
1859#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1860 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1861#else
1862 return iemMemFlatMapDataU32RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1863#endif
1864}
1865
1866
1867/**
1868 * Used by TB code to map unsigned 64-bit data for atomic read-write w/ flat
1869 * address.
1870 */
1871IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1872{
1873#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1874 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1875#else
1876 return iemMemFlatMapDataU64AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1877#endif
1878}
1879
1880
1881/**
1882 * Used by TB code to map unsigned 64-bit data read-write w/ flat address.
1883 */
1884IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1885{
1886#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1887 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1888#else
1889 return iemMemFlatMapDataU64RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1890#endif
1891}
1892
1893
1894/**
1895 * Used by TB code to map unsigned 64-bit data writeonly w/ flat address.
1896 */
1897IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1898{
1899#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1900 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1901#else
1902 return iemMemFlatMapDataU64WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1903#endif
1904}
1905
1906
1907/**
1908 * Used by TB code to map unsigned 64-bit data readonly w/ flat address.
1909 */
1910IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1911{
1912#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1913 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1914#else
1915 return iemMemFlatMapDataU64RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1916#endif
1917}
1918
1919
1920/**
1921 * Used by TB code to map 80-bit float data writeonly w/ flat address.
1922 */
1923IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1924{
1925#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1926 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1927#else
1928 return iemMemFlatMapDataR80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1929#endif
1930}
1931
1932
1933/**
1934 * Used by TB code to map 80-bit BCD data writeonly w/ flat address.
1935 */
1936IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1937{
1938#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1939 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1940#else
1941 return iemMemFlatMapDataD80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1942#endif
1943}
1944
1945
1946/**
1947 * Used by TB code to map unsigned 128-bit data for atomic read-write w/ flat
1948 * address.
1949 */
1950IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1951{
1952#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1953 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1954#else
1955 return iemMemFlatMapDataU128AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1956#endif
1957}
1958
1959
1960/**
1961 * Used by TB code to map unsigned 128-bit data read-write w/ flat address.
1962 */
1963IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1964{
1965#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1966 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1967#else
1968 return iemMemFlatMapDataU128RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1969#endif
1970}
1971
1972
1973/**
1974 * Used by TB code to map unsigned 128-bit data writeonly w/ flat address.
1975 */
1976IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1977{
1978#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1979 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1980#else
1981 return iemMemFlatMapDataU128WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1982#endif
1983}
1984
1985
1986/**
1987 * Used by TB code to map unsigned 128-bit data readonly w/ flat address.
1988 */
1989IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1990{
1991#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1992 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1993#else
1994 return iemMemFlatMapDataU128RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1995#endif
1996}
1997
1998
1999/*********************************************************************************************************************************
2000* Helpers: Commit, rollback & unmap *
2001*********************************************************************************************************************************/
2002
2003/**
2004 * Used by TB code to commit and unmap a read-write memory mapping.
2005 */
2006IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapAtomic,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2007{
2008 return iemMemCommitAndUnmapAtSafeJmp(pVCpu, bUnmapInfo);
2009}
2010
2011
2012/**
2013 * Used by TB code to commit and unmap a read-write memory mapping.
2014 */
2015IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2016{
2017 return iemMemCommitAndUnmapRwSafeJmp(pVCpu, bUnmapInfo);
2018}
2019
2020
2021/**
2022 * Used by TB code to commit and unmap a write-only memory mapping.
2023 */
2024IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2025{
2026 return iemMemCommitAndUnmapWoSafeJmp(pVCpu, bUnmapInfo);
2027}
2028
2029
2030/**
2031 * Used by TB code to commit and unmap a read-only memory mapping.
2032 */
2033IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2034{
2035 return iemMemCommitAndUnmapRoSafeJmp(pVCpu, bUnmapInfo);
2036}
2037
2038
2039/**
2040 * Reinitializes the native recompiler state.
2041 *
2042 * Called before starting a new recompile job.
2043 */
2044static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
2045{
2046 pReNative->cLabels = 0;
2047 pReNative->bmLabelTypes = 0;
2048 pReNative->cFixups = 0;
2049 pReNative->cTbExitFixups = 0;
2050#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2051 pReNative->pDbgInfo->cEntries = 0;
2052 pReNative->pDbgInfo->offNativeLast = UINT32_MAX;
2053#endif
2054 pReNative->pTbOrg = pTb;
2055 pReNative->cCondDepth = 0;
2056 pReNative->uCondSeqNo = 0;
2057 pReNative->uCheckIrqSeqNo = 0;
2058 pReNative->uTlbSeqNo = 0;
2059#ifdef IEMNATIVE_WITH_EFLAGS_SKIPPING
2060 pReNative->fSkippingEFlags = 0;
2061#endif
2062#ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
2063 pReNative->PostponedEfl.fEFlags = 0;
2064 pReNative->PostponedEfl.enmOp = kIemNativePostponedEflOp_Invalid;
2065 pReNative->PostponedEfl.cOpBits = 0;
2066 pReNative->PostponedEfl.idxReg1 = UINT8_MAX;
2067 pReNative->PostponedEfl.idxReg2 = UINT8_MAX;
2068#endif
2069
2070#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2071 pReNative->Core.offPc = 0;
2072# if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || defined(VBOX_WITH_STATISTICS)
2073 pReNative->idxInstrPlusOneOfLastPcUpdate = 0;
2074# endif
2075# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
2076 pReNative->Core.fDebugPcInitialized = false;
2077# endif
2078#endif
2079 pReNative->fSimdRaiseXcptChecksEmitted = 0;
2080 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
2081#if IEMNATIVE_HST_GREG_COUNT < 32
2082 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
2083#endif
2084 ;
2085 pReNative->Core.bmHstRegsWithGstShadow = 0;
2086 pReNative->Core.bmGstRegShadows = 0;
2087#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2088 pReNative->Core.bmGstRegShadowDirty = 0;
2089#endif
2090 pReNative->Core.bmVars = 0;
2091 pReNative->Core.bmStack = 0;
2092 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
2093 pReNative->Core.u64ArgVars = UINT64_MAX;
2094
2095 AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 23);
2096 pReNative->aidxUniqueLabels[0] = UINT32_MAX;
2097 pReNative->aidxUniqueLabels[1] = UINT32_MAX;
2098 pReNative->aidxUniqueLabels[2] = UINT32_MAX;
2099 pReNative->aidxUniqueLabels[3] = UINT32_MAX;
2100 pReNative->aidxUniqueLabels[4] = UINT32_MAX;
2101 pReNative->aidxUniqueLabels[5] = UINT32_MAX;
2102 pReNative->aidxUniqueLabels[6] = UINT32_MAX;
2103 pReNative->aidxUniqueLabels[7] = UINT32_MAX;
2104 pReNative->aidxUniqueLabels[8] = UINT32_MAX;
2105 pReNative->aidxUniqueLabels[9] = UINT32_MAX;
2106 pReNative->aidxUniqueLabels[10] = UINT32_MAX;
2107 pReNative->aidxUniqueLabels[11] = UINT32_MAX;
2108 pReNative->aidxUniqueLabels[12] = UINT32_MAX;
2109 pReNative->aidxUniqueLabels[13] = UINT32_MAX;
2110 pReNative->aidxUniqueLabels[14] = UINT32_MAX;
2111 pReNative->aidxUniqueLabels[15] = UINT32_MAX;
2112 pReNative->aidxUniqueLabels[16] = UINT32_MAX;
2113 pReNative->aidxUniqueLabels[17] = UINT32_MAX;
2114 pReNative->aidxUniqueLabels[18] = UINT32_MAX;
2115 pReNative->aidxUniqueLabels[19] = UINT32_MAX;
2116 pReNative->aidxUniqueLabels[20] = UINT32_MAX;
2117 pReNative->aidxUniqueLabels[21] = UINT32_MAX;
2118 pReNative->aidxUniqueLabels[22] = UINT32_MAX;
2119
2120 pReNative->idxLastCheckIrqCallNo = UINT32_MAX;
2121
2122 /* Full host register reinit: */
2123 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
2124 {
2125 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
2126 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
2127 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
2128 }
2129
2130 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
2131 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
2132#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2133 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
2134#endif
2135#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2136 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
2137#endif
2138#ifdef IEMNATIVE_REG_FIXED_TMP1
2139 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
2140#endif
2141#ifdef IEMNATIVE_REG_FIXED_PC_DBG
2142 | RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
2143#endif
2144 );
2145 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
2146 {
2147 fRegs &= ~RT_BIT_32(idxReg);
2148 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
2149 }
2150
2151 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
2152#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2153 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
2154#endif
2155#ifdef IEMNATIVE_REG_FIXED_TMP0
2156 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
2157#endif
2158#ifdef IEMNATIVE_REG_FIXED_TMP1
2159 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP1].enmWhat = kIemNativeWhat_FixedTmp;
2160#endif
2161#ifdef IEMNATIVE_REG_FIXED_PC_DBG
2162 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PC_DBG].enmWhat = kIemNativeWhat_PcShadow;
2163#endif
2164
2165 pReNative->Core.bmHstSimdRegs = IEMNATIVE_SIMD_REG_FIXED_MASK
2166#if IEMNATIVE_HST_SIMD_REG_COUNT < 32
2167 | ~(RT_BIT(IEMNATIVE_HST_SIMD_REG_COUNT) - 1U)
2168#endif
2169 ;
2170 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
2171 pReNative->Core.bmGstSimdRegShadows = 0;
2172 pReNative->Core.bmGstSimdRegShadowDirtyLo128 = 0;
2173 pReNative->Core.bmGstSimdRegShadowDirtyHi128 = 0;
2174
2175 /* Full host register reinit: */
2176 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstSimdRegs); i++)
2177 {
2178 pReNative->Core.aHstSimdRegs[i].fGstRegShadows = 0;
2179 pReNative->Core.aHstSimdRegs[i].enmWhat = kIemNativeWhat_Invalid;
2180 pReNative->Core.aHstSimdRegs[i].idxVar = UINT8_MAX;
2181 pReNative->Core.aHstSimdRegs[i].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
2182 }
2183
2184 fRegs = IEMNATIVE_SIMD_REG_FIXED_MASK;
2185 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
2186 {
2187 fRegs &= ~RT_BIT_32(idxReg);
2188 pReNative->Core.aHstSimdRegs[idxReg].enmWhat = kIemNativeWhat_FixedReserved;
2189 }
2190
2191#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
2192 pReNative->Core.aHstSimdRegs[IEMNATIVE_SIMD_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
2193#endif
2194
2195 return pReNative;
2196}
2197
2198
2199/**
2200 * Used when done emitting the per-chunk code and for iemNativeInit bailout.
2201 */
2202static void iemNativeTerm(PIEMRECOMPILERSTATE pReNative)
2203{
2204 RTMemFree(pReNative->pInstrBuf);
2205 RTMemFree(pReNative->paLabels);
2206 RTMemFree(pReNative->paFixups);
2207 RTMemFree(pReNative->paTbExitFixups);
2208#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2209 RTMemFree(pReNative->pDbgInfo);
2210#endif
2211 RTMemFree(pReNative);
2212}
2213
2214
2215/**
2216 * Allocates and initializes the native recompiler state.
2217 *
2218 * This is called the first time an EMT wants to recompile something.
2219 *
2220 * @returns Pointer to the new recompiler state.
2221 * @param pVCpu The cross context virtual CPU structure of the calling
2222 * thread.
2223 * @param pTb The TB that's about to be recompiled. When this is NULL,
2224 * the recompiler state is for emitting the common per-chunk
2225 * code from iemNativeRecompileAttachExecMemChunkCtx.
2226 * @thread EMT(pVCpu)
2227 */
2228static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
2229{
2230 VMCPU_ASSERT_EMT(pVCpu);
2231
2232 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
2233 AssertReturn(pReNative, NULL);
2234
2235 /*
2236 * Try allocate all the buffers and stuff we need.
2237 */
2238 uint32_t const cFactor = pTb ? 1 : 32 /* per-chunk stuff doesn't really need anything but the code buffer */;
2239 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
2240 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K / cFactor);
2241 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K / cFactor);
2242 pReNative->paTbExitFixups = (PIEMNATIVEEXITFIXUP)RTMemAllocZ(sizeof(IEMNATIVEEXITFIXUP) * _8K / cFactor);
2243#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2244 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K / cFactor]));
2245#endif
2246 if (RT_LIKELY( pReNative->pInstrBuf
2247 && pReNative->paLabels
2248 && pReNative->paFixups
2249 && pReNative->paTbExitFixups)
2250#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2251 && pReNative->pDbgInfo
2252#endif
2253 )
2254 {
2255 /*
2256 * Set the buffer & array sizes on success.
2257 */
2258 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
2259 pReNative->cLabelsAlloc = _8K / cFactor;
2260 pReNative->cFixupsAlloc = _16K / cFactor;
2261 pReNative->cTbExitFixupsAlloc = _8K / cFactor;
2262#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2263 pReNative->cDbgInfoAlloc = _16K / cFactor;
2264#endif
2265
2266 /* Other constant stuff: */
2267 pReNative->pVCpu = pVCpu;
2268
2269 /*
2270 * Done, just reinit it.
2271 */
2272 return iemNativeReInit(pReNative, pTb);
2273 }
2274
2275 /*
2276 * Failed. Cleanup and return.
2277 */
2278 AssertFailed();
2279 iemNativeTerm(pReNative);
2280 return NULL;
2281}
2282
2283
2284/**
2285 * Creates a label
2286 *
2287 * If the label does not yet have a defined position,
2288 * call iemNativeLabelDefine() later to set it.
2289 *
2290 * @returns Label ID. Throws VBox status code on failure, so no need to check
2291 * the return value.
2292 * @param pReNative The native recompile state.
2293 * @param enmType The label type.
2294 * @param offWhere The instruction offset of the label. UINT32_MAX if the
2295 * label is not yet defined (default).
2296 * @param uData Data associated with the lable. Only applicable to
2297 * certain type of labels. Default is zero.
2298 */
2299DECL_HIDDEN_THROW(uint32_t)
2300iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
2301 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
2302{
2303 Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
2304#if defined(RT_ARCH_AMD64)
2305 Assert(enmType >= kIemNativeLabelType_LoopJumpTarget);
2306#endif
2307
2308 /*
2309 * Locate existing label definition.
2310 *
2311 * This is only allowed for forward declarations where offWhere=UINT32_MAX
2312 * and uData is zero.
2313 */
2314 PIEMNATIVELABEL paLabels = pReNative->paLabels;
2315 uint32_t const cLabels = pReNative->cLabels;
2316 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
2317#ifndef VBOX_STRICT
2318 && enmType < kIemNativeLabelType_FirstWithMultipleInstances
2319 && offWhere == UINT32_MAX
2320 && uData == 0
2321#endif
2322 )
2323 {
2324#ifndef VBOX_STRICT
2325 AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
2326 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2327 uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
2328 if (idxLabel < pReNative->cLabels)
2329 return idxLabel;
2330#else
2331 for (uint32_t i = 0; i < cLabels; i++)
2332 if ( paLabels[i].enmType == enmType
2333 && paLabels[i].uData == uData)
2334 {
2335 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2336 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2337 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
2338 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
2339 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2340 return i;
2341 }
2342 AssertStmt( enmType >= kIemNativeLabelType_FirstWithMultipleInstances
2343 || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2344#endif
2345 }
2346
2347 /*
2348 * Make sure we've got room for another label.
2349 */
2350 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
2351 { /* likely */ }
2352 else
2353 {
2354 uint32_t cNew = pReNative->cLabelsAlloc;
2355 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
2356 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
2357 cNew *= 2;
2358 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
2359 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
2360 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
2361 pReNative->paLabels = paLabels;
2362 pReNative->cLabelsAlloc = cNew;
2363 }
2364
2365 /*
2366 * Define a new label.
2367 */
2368 paLabels[cLabels].off = offWhere;
2369 paLabels[cLabels].enmType = enmType;
2370 paLabels[cLabels].uData = uData;
2371 pReNative->cLabels = cLabels + 1;
2372
2373 Assert((unsigned)enmType < 64);
2374 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
2375
2376 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
2377 {
2378 Assert(uData == 0);
2379 pReNative->aidxUniqueLabels[enmType] = cLabels;
2380 }
2381
2382 if (offWhere != UINT32_MAX)
2383 {
2384#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2385 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
2386 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
2387#endif
2388 }
2389 return cLabels;
2390}
2391
2392
2393/**
2394 * Defines the location of an existing label.
2395 *
2396 * @param pReNative The native recompile state.
2397 * @param idxLabel The label to define.
2398 * @param offWhere The position.
2399 */
2400DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
2401{
2402 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
2403 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
2404 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
2405 pLabel->off = offWhere;
2406#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2407 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
2408 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
2409#endif
2410}
2411
2412
2413/**
2414 * Looks up a lable.
2415 *
2416 * @returns Label ID if found, UINT32_MAX if not.
2417 */
2418DECLHIDDEN(uint32_t) iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
2419 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/) RT_NOEXCEPT
2420{
2421 Assert((unsigned)enmType < 64);
2422 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
2423 {
2424 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
2425 return pReNative->aidxUniqueLabels[enmType];
2426
2427 PIEMNATIVELABEL paLabels = pReNative->paLabels;
2428 uint32_t const cLabels = pReNative->cLabels;
2429 for (uint32_t i = 0; i < cLabels; i++)
2430 if ( paLabels[i].enmType == enmType
2431 && paLabels[i].uData == uData
2432 && ( paLabels[i].off == offWhere
2433 || offWhere == UINT32_MAX
2434 || paLabels[i].off == UINT32_MAX))
2435 return i;
2436 }
2437 return UINT32_MAX;
2438}
2439
2440
2441/**
2442 * Adds a fixup.
2443 *
2444 * @throws VBox status code (int) on failure.
2445 * @param pReNative The native recompile state.
2446 * @param offWhere The instruction offset of the fixup location.
2447 * @param idxLabel The target label ID for the fixup.
2448 * @param enmType The fixup type.
2449 * @param offAddend Fixup addend if applicable to the type. Default is 0.
2450 */
2451DECL_HIDDEN_THROW(void)
2452iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
2453 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
2454{
2455 Assert(idxLabel <= UINT16_MAX);
2456 Assert((unsigned)enmType <= UINT8_MAX);
2457#ifdef RT_ARCH_ARM64
2458 AssertStmt( enmType != kIemNativeFixupType_RelImm14At5
2459 || pReNative->paLabels[idxLabel].enmType >= kIemNativeLabelType_LastWholeTbBranch,
2460 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_SHORT_JMP_TO_TAIL_LABEL));
2461#endif
2462
2463 /*
2464 * Make sure we've room.
2465 */
2466 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
2467 uint32_t const cFixups = pReNative->cFixups;
2468 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
2469 { /* likely */ }
2470 else
2471 {
2472 uint32_t cNew = pReNative->cFixupsAlloc;
2473 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2474 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2475 cNew *= 2;
2476 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
2477 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
2478 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
2479 pReNative->paFixups = paFixups;
2480 pReNative->cFixupsAlloc = cNew;
2481 }
2482
2483 /*
2484 * Add the fixup.
2485 */
2486 paFixups[cFixups].off = offWhere;
2487 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
2488 paFixups[cFixups].enmType = enmType;
2489 paFixups[cFixups].offAddend = offAddend;
2490 pReNative->cFixups = cFixups + 1;
2491}
2492
2493
2494/**
2495 * Adds a fixup to the per chunk tail code.
2496 *
2497 * @throws VBox status code (int) on failure.
2498 * @param pReNative The native recompile state.
2499 * @param offWhere The instruction offset of the fixup location.
2500 * @param enmExitReason The exit reason to jump to.
2501 */
2502DECL_HIDDEN_THROW(void)
2503iemNativeAddTbExitFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, IEMNATIVELABELTYPE enmExitReason)
2504{
2505 Assert(IEMNATIVELABELTYPE_IS_EXIT_REASON(enmExitReason));
2506
2507 /*
2508 * Make sure we've room.
2509 */
2510 PIEMNATIVEEXITFIXUP paTbExitFixups = pReNative->paTbExitFixups;
2511 uint32_t const cTbExitFixups = pReNative->cTbExitFixups;
2512 if (RT_LIKELY(cTbExitFixups < pReNative->cTbExitFixupsAlloc))
2513 { /* likely */ }
2514 else
2515 {
2516 uint32_t cNew = pReNative->cTbExitFixupsAlloc;
2517 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2518 AssertStmt(cTbExitFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2519 cNew *= 2;
2520 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
2521 paTbExitFixups = (PIEMNATIVEEXITFIXUP)RTMemRealloc(paTbExitFixups, cNew * sizeof(paTbExitFixups[0]));
2522 AssertStmt(paTbExitFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
2523 pReNative->paTbExitFixups = paTbExitFixups;
2524 pReNative->cTbExitFixupsAlloc = cNew;
2525 }
2526
2527 /*
2528 * Add the fixup.
2529 */
2530 paTbExitFixups[cTbExitFixups].off = offWhere;
2531 paTbExitFixups[cTbExitFixups].enmExitReason = enmExitReason;
2532 pReNative->cTbExitFixups = cTbExitFixups + 1;
2533}
2534
2535
2536/**
2537 * Slow code path for iemNativeInstrBufEnsure.
2538 */
2539DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
2540{
2541 /* Double the buffer size till we meet the request. */
2542 uint32_t cNew = pReNative->cInstrBufAlloc;
2543 AssertStmt(cNew > 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_INTERNAL_ERROR_5)); /* impossible */
2544 do
2545 cNew *= 2;
2546 while (cNew < off + cInstrReq);
2547
2548 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
2549#ifdef RT_ARCH_ARM64
2550 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
2551#else
2552 uint32_t const cbMaxInstrBuf = _2M;
2553#endif
2554 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
2555
2556 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
2557 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
2558
2559#ifdef VBOX_STRICT
2560 pReNative->offInstrBufChecked = off + cInstrReq;
2561#endif
2562 pReNative->cInstrBufAlloc = cNew;
2563 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
2564}
2565
2566#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2567
2568/**
2569 * Grows the static debug info array used during recompilation.
2570 *
2571 * @returns Pointer to the new debug info block; throws VBox status code on
2572 * failure, so no need to check the return value.
2573 */
2574DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
2575{
2576 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
2577 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
2578 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
2579 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
2580 pReNative->pDbgInfo = pDbgInfo;
2581 pReNative->cDbgInfoAlloc = cNew;
2582 return pDbgInfo;
2583}
2584
2585
2586/**
2587 * Adds a new debug info uninitialized entry, returning the pointer to it.
2588 */
2589DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
2590{
2591 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
2592 { /* likely */ }
2593 else
2594 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
2595 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
2596}
2597
2598
2599/**
2600 * Debug Info: Adds a native offset record, if necessary.
2601 */
2602DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2603{
2604 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
2605
2606 /*
2607 * Do we need this one?
2608 */
2609 uint32_t const offPrev = pDbgInfo->offNativeLast;
2610 if (offPrev == off)
2611 return;
2612 AssertStmt(offPrev < off || offPrev == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
2613
2614 /*
2615 * Add it.
2616 */
2617 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
2618 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
2619 pEntry->NativeOffset.offNative = off;
2620 pDbgInfo->offNativeLast = off;
2621}
2622
2623
2624/**
2625 * Debug Info: Record info about a label.
2626 */
2627static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
2628{
2629 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2630 pEntry->Label.uType = kIemTbDbgEntryType_Label;
2631 pEntry->Label.uUnused = 0;
2632 pEntry->Label.enmLabel = (uint8_t)enmType;
2633 pEntry->Label.uData = uData;
2634}
2635
2636
2637/**
2638 * Debug Info: Record info about a threaded call.
2639 */
2640static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
2641{
2642 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2643 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
2644 pEntry->ThreadedCall.fRecompiled = fRecompiled;
2645 pEntry->ThreadedCall.uUnused = 0;
2646 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
2647}
2648
2649
2650/**
2651 * Debug Info: Record info about a new guest instruction.
2652 */
2653static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
2654{
2655 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2656 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
2657 pEntry->GuestInstruction.uUnused = 0;
2658 pEntry->GuestInstruction.fExec = fExec;
2659}
2660
2661
2662/**
2663 * Debug Info: Record info about guest register shadowing.
2664 */
2665DECL_HIDDEN_THROW(void)
2666iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
2667 uint8_t idxHstReg /*= UINT8_MAX*/, uint8_t idxHstRegPrev /*= UINT8_MAX*/)
2668{
2669 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2670 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
2671 pEntry->GuestRegShadowing.uUnused = 0;
2672 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
2673 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
2674 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
2675# ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2676 Assert( idxHstReg != UINT8_MAX
2677 || !(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg)));
2678# endif
2679}
2680
2681
2682/**
2683 * Debug Info: Record info about guest register shadowing.
2684 */
2685DECL_HIDDEN_THROW(void)
2686iemNativeDbgInfoAddGuestSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTSIMDREG enmGstSimdReg,
2687 uint8_t idxHstSimdReg /*= UINT8_MAX*/, uint8_t idxHstSimdRegPrev /*= UINT8_MAX*/)
2688{
2689 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2690 pEntry->GuestSimdRegShadowing.uType = kIemTbDbgEntryType_GuestSimdRegShadowing;
2691 pEntry->GuestSimdRegShadowing.uUnused = 0;
2692 pEntry->GuestSimdRegShadowing.idxGstSimdReg = enmGstSimdReg;
2693 pEntry->GuestSimdRegShadowing.idxHstSimdReg = idxHstSimdReg;
2694 pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev = idxHstSimdRegPrev;
2695}
2696
2697
2698# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2699/**
2700 * Debug Info: Record info about delayed RIP updates.
2701 */
2702DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddDelayedPcUpdate(PIEMRECOMPILERSTATE pReNative, uint64_t offPc, uint32_t cInstrSkipped)
2703{
2704 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2705 pEntry->DelayedPcUpdate.uType = kIemTbDbgEntryType_DelayedPcUpdate;
2706 pEntry->DelayedPcUpdate.cInstrSkipped = cInstrSkipped;
2707 pEntry->DelayedPcUpdate.offPc = offPc; /** @todo support larger values */
2708}
2709# endif
2710
2711
2712/**
2713 * Debug Info: Record info about a dirty guest register.
2714 */
2715DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddGuestRegDirty(PIEMRECOMPILERSTATE pReNative, bool fSimdReg,
2716 uint8_t idxGstReg, uint8_t idxHstReg)
2717{
2718 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2719 pEntry->GuestRegDirty.uType = kIemTbDbgEntryType_GuestRegDirty;
2720 pEntry->GuestRegDirty.fSimdReg = fSimdReg ? 1 : 0;
2721 pEntry->GuestRegDirty.idxGstReg = idxGstReg;
2722 pEntry->GuestRegDirty.idxHstReg = idxHstReg;
2723}
2724
2725
2726/**
2727 * Debug Info: Record info about a dirty guest register writeback operation.
2728 */
2729DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddGuestRegWriteback(PIEMRECOMPILERSTATE pReNative, bool fSimdReg, uint64_t fGstReg)
2730{
2731 unsigned const cBitsGstRegMask = 25;
2732 uint32_t const fGstRegMask = RT_BIT_32(cBitsGstRegMask) - 1U;
2733
2734 /* The first block of 25 bits: */
2735 if (fGstReg & fGstRegMask)
2736 {
2737 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2738 pEntry->GuestRegWriteback.uType = kIemTbDbgEntryType_GuestRegWriteback;
2739 pEntry->GuestRegWriteback.fSimdReg = fSimdReg ? 1 : 0;
2740 pEntry->GuestRegWriteback.cShift = 0;
2741 pEntry->GuestRegWriteback.fGstReg = (uint32_t)(fGstReg & fGstRegMask);
2742 fGstReg &= ~(uint64_t)fGstRegMask;
2743 if (!fGstReg)
2744 return;
2745 }
2746
2747 /* The second block of 25 bits: */
2748 fGstReg >>= cBitsGstRegMask;
2749 if (fGstReg & fGstRegMask)
2750 {
2751 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2752 pEntry->GuestRegWriteback.uType = kIemTbDbgEntryType_GuestRegWriteback;
2753 pEntry->GuestRegWriteback.fSimdReg = fSimdReg ? 1 : 0;
2754 pEntry->GuestRegWriteback.cShift = 0;
2755 pEntry->GuestRegWriteback.fGstReg = (uint32_t)(fGstReg & fGstRegMask);
2756 fGstReg &= ~(uint64_t)fGstRegMask;
2757 if (!fGstReg)
2758 return;
2759 }
2760
2761 /* The last block with 14 bits: */
2762 fGstReg >>= cBitsGstRegMask;
2763 Assert(fGstReg & fGstRegMask);
2764 Assert((fGstReg & ~(uint64_t)fGstRegMask) == 0);
2765 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2766 pEntry->GuestRegWriteback.uType = kIemTbDbgEntryType_GuestRegWriteback;
2767 pEntry->GuestRegWriteback.fSimdReg = fSimdReg ? 1 : 0;
2768 pEntry->GuestRegWriteback.cShift = 2;
2769 pEntry->GuestRegWriteback.fGstReg = (uint32_t)(fGstReg & fGstRegMask);
2770}
2771
2772
2773# ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
2774/**
2775 * Debug Info: Record info about emitting a postponed EFLAGS calculation.
2776 */
2777DECL_HIDDEN_THROW(void)
2778iemNativeDbgInfoAddPostponedEFlagsCalc(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVE_POSTPONED_EFL_OP_T enmOp,
2779 uint8_t cOpBits, uint8_t idxEmit)
2780{
2781 iemNativeDbgInfoAddNativeOffset(pReNative, off);
2782 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2783 pEntry->PostponedEflCalc.uType = kIemTbDbgEntryType_PostponedEFlagsCalc;
2784 pEntry->PostponedEflCalc.enmOp = (unsigned)enmOp;
2785 pEntry->PostponedEflCalc.cOpBits = cOpBits;
2786 pEntry->PostponedEflCalc.idxEmit = idxEmit;
2787 pEntry->PostponedEflCalc.uUnused = 0;
2788}
2789# endif /* IEMNATIVE_WITH_EFLAGS_POSTPONING */
2790
2791#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
2792
2793
2794/*********************************************************************************************************************************
2795* Register Allocator *
2796*********************************************************************************************************************************/
2797
2798/**
2799 * Register parameter indexes (indexed by argument number).
2800 */
2801DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
2802{
2803 IEMNATIVE_CALL_ARG0_GREG,
2804 IEMNATIVE_CALL_ARG1_GREG,
2805 IEMNATIVE_CALL_ARG2_GREG,
2806 IEMNATIVE_CALL_ARG3_GREG,
2807#if defined(IEMNATIVE_CALL_ARG4_GREG)
2808 IEMNATIVE_CALL_ARG4_GREG,
2809# if defined(IEMNATIVE_CALL_ARG5_GREG)
2810 IEMNATIVE_CALL_ARG5_GREG,
2811# if defined(IEMNATIVE_CALL_ARG6_GREG)
2812 IEMNATIVE_CALL_ARG6_GREG,
2813# if defined(IEMNATIVE_CALL_ARG7_GREG)
2814 IEMNATIVE_CALL_ARG7_GREG,
2815# endif
2816# endif
2817# endif
2818#endif
2819};
2820AssertCompile(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
2821
2822/**
2823 * Call register masks indexed by argument count.
2824 */
2825DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
2826{
2827 0,
2828 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
2829 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
2830 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
2831 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2832 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
2833#if defined(IEMNATIVE_CALL_ARG4_GREG)
2834 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2835 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
2836# if defined(IEMNATIVE_CALL_ARG5_GREG)
2837 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2838 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
2839# if defined(IEMNATIVE_CALL_ARG6_GREG)
2840 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2841 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2842 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
2843# if defined(IEMNATIVE_CALL_ARG7_GREG)
2844 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2845 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2846 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
2847# endif
2848# endif
2849# endif
2850#endif
2851};
2852
2853#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
2854/**
2855 * BP offset of the stack argument slots.
2856 *
2857 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
2858 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
2859 */
2860DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
2861{
2862 IEMNATIVE_FP_OFF_STACK_ARG0,
2863# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
2864 IEMNATIVE_FP_OFF_STACK_ARG1,
2865# endif
2866# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
2867 IEMNATIVE_FP_OFF_STACK_ARG2,
2868# endif
2869# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
2870 IEMNATIVE_FP_OFF_STACK_ARG3,
2871# endif
2872};
2873AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
2874#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
2875
2876/**
2877 * Info about shadowed guest register values.
2878 * @see IEMNATIVEGSTREG
2879 */
2880DECL_HIDDEN_CONST(IEMANTIVEGSTREGINFO const) g_aGstShadowInfo[] =
2881{
2882#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
2883 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
2884 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
2885 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
2886 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
2887 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
2888 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
2889 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
2890 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
2891 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
2892 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
2893 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
2894 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
2895 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
2896 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
2897 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
2898 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
2899 /* [kIemNativeGstReg_Cr0] = */ { CPUMCTX_OFF_AND_SIZE(cr0), "cr0", },
2900 /* [kIemNativeGstReg_Cr4] = */ { CPUMCTX_OFF_AND_SIZE(cr4), "cr4", },
2901 /* [kIemNativeGstReg_FpuFcw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FCW), "fcw", },
2902 /* [kIemNativeGstReg_FpuFsw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FSW), "fsw", },
2903 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
2904 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
2905 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
2906 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
2907 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
2908 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
2909 /* [kIemNativeGstReg_SegAttribFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Attr.u), "es_attrib", },
2910 /* [kIemNativeGstReg_SegAttribFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Attr.u), "cs_attrib", },
2911 /* [kIemNativeGstReg_SegAttribFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Attr.u), "ss_attrib", },
2912 /* [kIemNativeGstReg_SegAttribFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Attr.u), "ds_attrib", },
2913 /* [kIemNativeGstReg_SegAttribFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Attr.u), "fs_attrib", },
2914 /* [kIemNativeGstReg_SegAttribFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Attr.u), "gs_attrib", },
2915 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
2916 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
2917 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
2918 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
2919 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
2920 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
2921 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
2922 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
2923 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
2924 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
2925 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
2926 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
2927 /* [kIemNativeGstReg_Xcr0] = */ { CPUMCTX_OFF_AND_SIZE(aXcr[0]), "xcr0", },
2928 /* [kIemNativeGstReg_MxCsr] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.MXCSR), "mxcsr", },
2929 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
2930 /* [kIemNativeGstReg_EFlags.Cf] = */ { UINT32_MAX, 0, "efl.cf", },
2931 /* [kIemNativeGstReg_EFlags.Of] = */ { UINT32_MAX, 0, "efl.of", },
2932 /* [kIemNativeGstReg_EFlags.Af] = */ { UINT32_MAX, 0, "efl.af", },
2933 /* [kIemNativeGstReg_EFlags.Zf] = */ { UINT32_MAX, 0, "efl.zf", },
2934 /* [kIemNativeGstReg_EFlags.Sf] = */ { UINT32_MAX, 0, "efl.sf", },
2935 /* [kIemNativeGstReg_EFlags.Of] = */ { UINT32_MAX, 0, "efl.of", },
2936 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
2937#undef CPUMCTX_OFF_AND_SIZE
2938};
2939AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
2940
2941
2942/** Host CPU general purpose register names. */
2943DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
2944{
2945#ifdef RT_ARCH_AMD64
2946 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
2947#elif RT_ARCH_ARM64
2948 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
2949 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
2950#else
2951# error "port me"
2952#endif
2953};
2954
2955
2956#if 0 /* unused */
2957/**
2958 * Tries to locate a suitable register in the given register mask.
2959 *
2960 * This ASSUMES the caller has done the minimal/optimal allocation checks and
2961 * failed.
2962 *
2963 * @returns Host register number on success, returns UINT8_MAX on failure.
2964 */
2965static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
2966{
2967 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
2968 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
2969 if (fRegs)
2970 {
2971 /** @todo pick better here: */
2972 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
2973
2974 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
2975 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
2976 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2977 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2978
2979 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2980 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2981 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2982 return idxReg;
2983 }
2984 return UINT8_MAX;
2985}
2986#endif /* unused */
2987
2988#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2989
2990/**
2991 * Stores the host reg @a idxHstReg into guest shadow register @a enmGstReg.
2992 *
2993 * @returns New code buffer offset on success, UINT32_MAX on failure.
2994 * @param pReNative .
2995 * @param off The current code buffer position.
2996 * @param enmGstReg The guest register to store to.
2997 * @param idxHstReg The host register to store from.
2998 */
2999DECL_FORCE_INLINE_THROW(uint32_t)
3000iemNativeEmitStoreGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREG enmGstReg, uint8_t idxHstReg)
3001{
3002 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
3003 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
3004
3005 switch (g_aGstShadowInfo[enmGstReg].cb)
3006 {
3007 case sizeof(uint64_t):
3008 return iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3009 case sizeof(uint32_t):
3010 return iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3011 case sizeof(uint16_t):
3012 return iemNativeEmitStoreGprToVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3013# if 0 /* not present in the table. */
3014 case sizeof(uint8_t):
3015 return iemNativeEmitStoreGprToVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3016# endif
3017 default:
3018 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
3019 }
3020}
3021
3022
3023/**
3024 * Emits code to flush a pending write of the given guest register,
3025 * version with alternative core state.
3026 *
3027 * @returns New code buffer offset.
3028 * @param pReNative The native recompile state.
3029 * @param off Current code buffer position.
3030 * @param pCore Alternative core state.
3031 * @param enmGstReg The guest register to flush.
3032 */
3033DECL_HIDDEN_THROW(uint32_t)
3034iemNativeRegFlushPendingWriteEx(PIEMRECOMPILERSTATE pReNative, uint32_t off, PIEMNATIVECORESTATE pCore, IEMNATIVEGSTREG enmGstReg)
3035{
3036 uint8_t const idxHstReg = pCore->aidxGstRegShadows[enmGstReg];
3037
3038 AssertCompile(kIemNativeGstReg_GprFirst == 0);
3039 Assert( enmGstReg <= kIemNativeGstReg_GprLast
3040 || enmGstReg == kIemNativeGstReg_MxCsr);
3041 Assert( idxHstReg != UINT8_MAX
3042 && pCore->bmGstRegShadowDirty & RT_BIT_64(enmGstReg));
3043 Log12(("iemNativeRegFlushPendingWriteEx: Clearing guest register %s shadowed by host %s (off=%#x)\n",
3044 g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg], off));
3045
3046 off = iemNativeEmitStoreGprWithGstShadowReg(pReNative, off, enmGstReg, idxHstReg);
3047
3048 pCore->bmGstRegShadowDirty &= ~RT_BIT_64(enmGstReg);
3049 return off;
3050}
3051
3052
3053/**
3054 * Emits code to flush a pending write of the given guest register.
3055 *
3056 * @returns New code buffer offset.
3057 * @param pReNative The native recompile state.
3058 * @param off Current code buffer position.
3059 * @param enmGstReg The guest register to flush.
3060 */
3061DECL_HIDDEN_THROW(uint32_t)
3062iemNativeRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREG enmGstReg)
3063{
3064 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
3065
3066 AssertCompile(kIemNativeGstReg_GprFirst == 0);
3067 Assert( enmGstReg <= kIemNativeGstReg_GprLast
3068 || enmGstReg == kIemNativeGstReg_MxCsr);
3069 Assert( idxHstReg != UINT8_MAX
3070 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg));
3071 Log12(("iemNativeRegFlushPendingWrite: Clearing guest register %s shadowed by host %s (off=%#x)\n",
3072 g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg], off));
3073
3074 off = iemNativeEmitStoreGprWithGstShadowReg(pReNative, off, enmGstReg, idxHstReg);
3075
3076 pReNative->Core.bmGstRegShadowDirty &= ~RT_BIT_64(enmGstReg);
3077 return off;
3078}
3079
3080
3081/**
3082 * Flush the given set of guest registers if marked as dirty.
3083 *
3084 * @returns New code buffer offset.
3085 * @param pReNative The native recompile state.
3086 * @param off Current code buffer position.
3087 * @param fFlushGstReg The guest register set to flush (default is flush everything).
3088 * @note Must not modify the host status flags!
3089 */
3090DECL_HIDDEN_THROW(uint32_t)
3091iemNativeRegFlushDirtyGuest(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fFlushGstReg /*= UINT64_MAX*/)
3092{
3093 uint64_t bmGstRegShadowDirty = pReNative->Core.bmGstRegShadowDirty & fFlushGstReg;
3094 if (bmGstRegShadowDirty)
3095 {
3096# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3097 iemNativeDbgInfoAddNativeOffset(pReNative, off);
3098 iemNativeDbgInfoAddGuestRegWriteback(pReNative, false /*fSimdReg*/, bmGstRegShadowDirty);
3099# endif
3100 do
3101 {
3102 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadowDirty) - 1;
3103 bmGstRegShadowDirty &= ~RT_BIT_64(idxGstReg);
3104 off = iemNativeRegFlushPendingWrite(pReNative, off, (IEMNATIVEGSTREG)idxGstReg);
3105 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
3106 } while (bmGstRegShadowDirty);
3107 }
3108
3109 return off;
3110}
3111
3112
3113/**
3114 * Flush all shadowed guest registers marked as dirty for the given host register.
3115 *
3116 * @returns New code buffer offset.
3117 * @param pReNative The native recompile state.
3118 * @param off Current code buffer position.
3119 * @param idxHstReg The host register.
3120 *
3121 * @note This doesn't do any unshadowing of guest registers from the host register.
3122 *
3123 * @note Must not modify the host status flags!
3124 */
3125DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushDirtyGuestByHostRegShadow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg)
3126{
3127 /* We need to flush any pending guest register writes this host register shadows. */
3128 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3129 if (pReNative->Core.bmGstRegShadowDirty & fGstRegShadows)
3130 {
3131# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3132 iemNativeDbgInfoAddNativeOffset(pReNative, off);
3133 iemNativeDbgInfoAddGuestRegWriteback(pReNative, false /*fSimdReg*/, pReNative->Core.bmGstRegShadowDirty & fGstRegShadows);
3134# endif
3135 uint64_t bmGstRegShadowDirty = pReNative->Core.bmGstRegShadowDirty & fGstRegShadows;
3136 do
3137 {
3138 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadowDirty) - 1;
3139 bmGstRegShadowDirty &= ~RT_BIT_64(idxGstReg);
3140 off = iemNativeRegFlushPendingWrite(pReNative, off, (IEMNATIVEGSTREG)idxGstReg);
3141 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
3142 } while (bmGstRegShadowDirty);
3143 }
3144
3145 return off;
3146}
3147
3148#endif /* IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK */
3149
3150
3151/**
3152 * Locate a register, possibly freeing one up.
3153 *
3154 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3155 * failed.
3156 *
3157 * @returns Host register number on success. Returns UINT8_MAX if no registers
3158 * found, the caller is supposed to deal with this and raise a
3159 * allocation type specific status code (if desired).
3160 *
3161 * @throws VBox status code if we're run into trouble spilling a variable of
3162 * recording debug info. Does NOT throw anything if we're out of
3163 * registers, though.
3164 *
3165 * @note Must not modify the host status flags!
3166 */
3167static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
3168 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
3169{
3170 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
3171 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3172 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3173
3174 /*
3175 * Try a freed register that's shadowing a guest register.
3176 */
3177 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3178 if (fRegs)
3179 {
3180 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
3181
3182#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3183 /*
3184 * When we have liveness information, we use it to kick out all shadowed
3185 * guest register that will not be needed any more in this TB. If we're
3186 * lucky, this may prevent us from ending up here again.
3187 *
3188 * Note! We must consider the previous entry here so we don't free
3189 * anything that the current threaded function requires (current
3190 * entry is produced by the next threaded function).
3191 */
3192 uint32_t const idxCurCall = pReNative->idxCurCall;
3193 if (idxCurCall > 0)
3194 {
3195 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
3196 uint64_t fToFreeMask = IEMLIVENESS_STATE_GET_CAN_BE_FREED_SET(pLivenessEntry);
3197
3198 /* Merge EFLAGS. */
3199 uint64_t fTmp = fToFreeMask & (fToFreeMask >> 3); /* AF2,PF2,CF2,Other2 = AF,PF,CF,Other & OF,SF,ZF,AF */
3200 fTmp &= fTmp >> 2; /* CF3,Other3 = AF2,PF2 & CF2,Other2 */
3201 fTmp &= fTmp >> 1; /* Other4 = CF3 & Other3 */
3202 fToFreeMask &= RT_BIT_64(kIemNativeGstReg_EFlags) - 1;
3203 fToFreeMask |= fTmp & RT_BIT_64(kIemNativeGstReg_EFlags);
3204
3205 /* If it matches any shadowed registers. */
3206 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
3207 {
3208#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3209 /* Writeback any dirty shadow registers we are about to unshadow. */
3210 *poff = iemNativeRegFlushDirtyGuest(pReNative, *poff, fToFreeMask);
3211#endif
3212
3213 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
3214 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
3215 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
3216
3217 /* See if we've got any unshadowed registers we can return now. */
3218 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
3219 if (fUnshadowedRegs)
3220 {
3221 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
3222 return (fPreferVolatile
3223 ? ASMBitFirstSetU32(fUnshadowedRegs)
3224 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3225 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
3226 - 1;
3227 }
3228 }
3229 }
3230#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
3231
3232 unsigned const idxReg = (fPreferVolatile
3233 ? ASMBitFirstSetU32(fRegs)
3234 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3235 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs))
3236 - 1;
3237
3238 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3239 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3240 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3241 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3242
3243#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3244 /* We need to flush any pending guest register writes this host register shadows. */
3245 *poff = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, *poff, idxReg);
3246#endif
3247
3248 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3249 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3250 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3251 return idxReg;
3252 }
3253
3254 /*
3255 * Try free up a variable that's in a register.
3256 *
3257 * We do two rounds here, first evacuating variables we don't need to be
3258 * saved on the stack, then in the second round move things to the stack.
3259 */
3260 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
3261 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
3262 {
3263 uint32_t fVars = pReNative->Core.bmVars;
3264 while (fVars)
3265 {
3266 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
3267 if (!pReNative->Core.aVars[idxVar].fSimdReg) /* (this is the GPR allocator) */
3268 {
3269 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
3270 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
3271 && (RT_BIT_32(idxReg) & fRegMask)
3272 && ( iLoop == 0
3273 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
3274 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3275 && !pReNative->Core.aVars[idxVar].fRegAcquired)
3276 {
3277 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
3278 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
3279 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3280 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3281 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
3282 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
3283#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3284 Assert(!(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3285#endif
3286
3287 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3288 {
3289 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
3290 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
3291 }
3292
3293 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3294 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
3295
3296 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3297 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3298 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3299 return idxReg;
3300 }
3301 }
3302 fVars &= ~RT_BIT_32(idxVar);
3303 }
3304 }
3305
3306 return UINT8_MAX;
3307}
3308
3309
3310/**
3311 * Reassigns a variable to a different register specified by the caller.
3312 *
3313 * @returns The new code buffer position.
3314 * @param pReNative The native recompile state.
3315 * @param off The current code buffer position.
3316 * @param idxVar The variable index.
3317 * @param idxRegOld The old host register number.
3318 * @param idxRegNew The new host register number.
3319 * @param pszCaller The caller for logging.
3320 */
3321static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3322 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
3323{
3324 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3325 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
3326 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
3327 RT_NOREF(pszCaller);
3328
3329#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3330 Assert(!(pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3331#endif
3332 iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
3333
3334 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3335#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3336 Assert(!(fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3337#endif
3338 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
3339 pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
3340 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
3341
3342 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
3343 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
3344 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
3345 if (fGstRegShadows)
3346 {
3347 pReNative->Core.bmHstRegsWithGstShadow = (pReNative->Core.bmHstRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
3348 | RT_BIT_32(idxRegNew);
3349 while (fGstRegShadows)
3350 {
3351 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3352 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3353
3354 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
3355 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
3356 }
3357 }
3358
3359 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
3360 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3361 pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));
3362 return off;
3363}
3364
3365
3366/**
3367 * Moves a variable to a different register or spills it onto the stack.
3368 *
3369 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
3370 * kinds can easily be recreated if needed later.
3371 *
3372 * @returns The new code buffer position.
3373 * @param pReNative The native recompile state.
3374 * @param off The current code buffer position.
3375 * @param idxVar The variable index.
3376 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
3377 * call-volatile registers.
3378 */
3379DECL_HIDDEN_THROW(uint32_t) iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3380 uint32_t fForbiddenRegs /*= IEMNATIVE_CALL_VOLATILE_GREG_MASK*/)
3381{
3382 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3383 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3384 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
3385 Assert(!pVar->fRegAcquired);
3386
3387 uint8_t const idxRegOld = pVar->idxReg;
3388 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
3389 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
3390 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
3391 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
3392 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
3393 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3394 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
3395 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
3396#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3397 Assert(!(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3398#endif
3399
3400
3401 /** @todo Add statistics on this.*/
3402 /** @todo Implement basic variable liveness analysis (python) so variables
3403 * can be freed immediately once no longer used. This has the potential to
3404 * be trashing registers and stack for dead variables.
3405 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
3406
3407 /*
3408 * First try move it to a different register, as that's cheaper.
3409 */
3410 fForbiddenRegs |= RT_BIT_32(idxRegOld);
3411 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
3412 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
3413 if (fRegs)
3414 {
3415 /* Avoid using shadow registers, if possible. */
3416 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
3417 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
3418 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
3419 return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");
3420 }
3421
3422 /*
3423 * Otherwise we must spill the register onto the stack.
3424 */
3425 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
3426 Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
3427 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
3428 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
3429
3430 pVar->idxReg = UINT8_MAX;
3431 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
3432 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
3433 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3434 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3435 return off;
3436}
3437
3438
3439/**
3440 * Allocates a temporary host general purpose register.
3441 *
3442 * This may emit code to save register content onto the stack in order to free
3443 * up a register.
3444 *
3445 * @returns The host register number; throws VBox status code on failure,
3446 * so no need to check the return value.
3447 * @param pReNative The native recompile state.
3448 * @param poff Pointer to the variable with the code buffer
3449 * position. This will be update if we need to move
3450 * a variable from register to stack in order to
3451 * satisfy the request.
3452 * @tparam a_fPreferVolatile Whether to prefer volatile over non-volatile
3453 * registers (@c true, default) or the other way
3454 * around (@c false, for
3455 * iemNativeRegAllocTmpForGuestReg()).
3456 *
3457 * @note Must not modify the host status flags!
3458 */
3459template<bool const a_fPreferVolatile>
3460DECL_FORCE_INLINE_THROW(uint8_t) iemNativeRegAllocTmpInt(PIEMRECOMPILERSTATE pReNative, uint32_t *poff)
3461{
3462 /*
3463 * Try find a completely unused register, preferably a call-volatile one.
3464 */
3465 uint8_t idxReg;
3466 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3467 & ~pReNative->Core.bmHstRegsWithGstShadow
3468 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
3469 if (fRegs)
3470 {
3471 if (a_fPreferVolatile)
3472 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3473 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3474 else
3475 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3476 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3477 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3478 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3479 Log12(("iemNativeRegAllocTmp: %s\n", g_apszIemNativeHstRegNames[idxReg]));
3480 }
3481 else
3482 {
3483 idxReg = iemNativeRegAllocFindFree(pReNative, poff, a_fPreferVolatile);
3484 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3485 Log12(("iemNativeRegAllocTmp: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
3486 }
3487 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3488}
3489
3490
3491/** See iemNativeRegAllocTmpInt for details. */
3492DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff)
3493{
3494 return iemNativeRegAllocTmpInt<true>(pReNative, poff);
3495}
3496
3497
3498/** See iemNativeRegAllocTmpInt for details. */
3499DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpPreferNonVolatile(PIEMRECOMPILERSTATE pReNative, uint32_t *poff)
3500{
3501 return iemNativeRegAllocTmpInt<false>(pReNative, poff);
3502}
3503
3504
3505/**
3506 * Alternative version of iemNativeRegAllocTmp that takes mask with acceptable
3507 * registers.
3508 *
3509 * @returns The host register number; throws VBox status code on failure,
3510 * so no need to check the return value.
3511 * @param pReNative The native recompile state.
3512 * @param poff Pointer to the variable with the code buffer
3513 * position. This will be update if we need to move
3514 * a variable from register to stack in order to
3515 * satisfy the request.
3516 * @param fRegMask Mask of acceptable registers.
3517 * @tparam a_fPreferVolatile Whether to prefer volatile over non-volatile
3518 * registers (@c true, default) or the other way
3519 * around (@c false, for
3520 * iemNativeRegAllocTmpForGuestReg()).
3521 */
3522template<bool const a_fPreferVolatile>
3523DECL_FORCE_INLINE_THROW(uint8_t) iemNativeRegAllocTmpExInt(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask)
3524{
3525 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3526 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3527
3528 /*
3529 * Try find a completely unused register, preferably a call-volatile one.
3530 */
3531 uint8_t idxReg;
3532 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3533 & ~pReNative->Core.bmHstRegsWithGstShadow
3534 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
3535 & fRegMask;
3536 if (fRegs)
3537 {
3538 if RT_CONSTEXPR_IF(a_fPreferVolatile)
3539 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3540 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3541 else
3542 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3543 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3544 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3545 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3546 Log12(("iemNativeRegAllocTmpExInt: %s\n", g_apszIemNativeHstRegNames[idxReg]));
3547 }
3548 else
3549 {
3550 idxReg = iemNativeRegAllocFindFree(pReNative, poff, a_fPreferVolatile, fRegMask);
3551 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3552 Log12(("iemNativeRegAllocTmpExInt: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
3553 }
3554 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3555}
3556
3557
3558/** See iemNativeRegAllocTmpExInt for details. */
3559DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask)
3560{
3561 return iemNativeRegAllocTmpExInt<true>(pReNative, poff, fRegMask);
3562}
3563
3564
3565/** See iemNativeRegAllocTmpExInt for details. */
3566DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpExPreferNonVolatile(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask)
3567{
3568 return iemNativeRegAllocTmpExInt<false>(pReNative, poff, fRegMask);
3569}
3570
3571
3572/** Internal templated variation of iemNativeRegAllocTmpEx. */
3573template<uint32_t const a_fRegMask, bool const a_fPreferVolatile>
3574DECL_FORCE_INLINE_THROW(uint8_t) iemNativeRegAllocTmpExInt(PIEMRECOMPILERSTATE pReNative, uint32_t *poff)
3575{
3576 AssertCompile(!(a_fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3577 AssertCompile(!(a_fRegMask & IEMNATIVE_REG_FIXED_MASK));
3578
3579 /*
3580 * Try find a completely unused register, preferably a call-volatile one.
3581 */
3582 uint8_t idxReg;
3583 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3584 & ~pReNative->Core.bmHstRegsWithGstShadow
3585 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
3586 & a_fRegMask;
3587 if (fRegs)
3588 {
3589 if RT_CONSTEXPR_IF(a_fPreferVolatile)
3590 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3591 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3592 else
3593 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3594 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3595 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3596 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3597 Log12(("iemNativeRegAllocTmpExInt: %s\n", g_apszIemNativeHstRegNames[idxReg]));
3598 }
3599 else
3600 {
3601 idxReg = iemNativeRegAllocFindFree(pReNative, poff, a_fPreferVolatile, a_fRegMask);
3602 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3603 Log12(("iemNativeRegAllocTmpExInt: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
3604 }
3605 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3606}
3607
3608
3609/**
3610 * Allocates a temporary register for loading an immediate value into.
3611 *
3612 * This will emit code to load the immediate, unless there happens to be an
3613 * unused register with the value already loaded.
3614 *
3615 * The caller will not modify the returned register, it must be considered
3616 * read-only. Free using iemNativeRegFreeTmpImm.
3617 *
3618 * @returns The host register number; throws VBox status code on failure, so no
3619 * need to check the return value.
3620 * @param pReNative The native recompile state.
3621 * @param poff Pointer to the variable with the code buffer position.
3622 * @param uImm The immediate value that the register must hold upon
3623 * return.
3624 * @note Prefers volatile registers.
3625 * @note Reusing immediate values has not been implemented yet.
3626 */
3627DECL_HIDDEN_THROW(uint8_t)
3628iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm)
3629{
3630 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff);
3631 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
3632 return idxReg;
3633}
3634
3635
3636/**
3637 * Common worker for iemNativeRegAllocTmpForGuestReg() and
3638 * iemNativeRegAllocTmpForGuestEFlags().
3639 *
3640 * See iemNativeRegAllocTmpForGuestRegInt() for details.
3641 */
3642template<IEMNATIVEGSTREGUSE const a_enmIntendedUse, uint32_t const a_fRegMask>
3643static uint8_t iemNativeRegAllocTmpForGuestRegCommon(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3644{
3645 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
3646#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
3647 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
3648#endif
3649
3650 /*
3651 * First check if the guest register value is already in a host register.
3652 */
3653 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
3654 {
3655 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
3656 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3657 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
3658 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3659
3660 /* It's not supposed to be allocated... */
3661 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
3662 {
3663 /*
3664 * If the register will trash the guest shadow copy, try find a
3665 * completely unused register we can use instead. If that fails,
3666 * we need to disassociate the host reg from the guest reg.
3667 */
3668 /** @todo would be nice to know if preserving the register is in any way helpful. */
3669 /* If the purpose is calculations, try duplicate the register value as
3670 we'll be clobbering the shadow. */
3671 if ( a_enmIntendedUse == kIemNativeGstRegUse_Calculation
3672 && ( ~pReNative->Core.bmHstRegs
3673 & ~pReNative->Core.bmHstRegsWithGstShadow
3674 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
3675 {
3676 uint8_t const idxRegNew = iemNativeRegAllocTmpExInt<a_fRegMask, true>(pReNative, poff);
3677
3678 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3679
3680 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
3681 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3682 g_apszIemNativeHstRegNames[idxRegNew]));
3683 idxReg = idxRegNew;
3684 }
3685 /* If the current register matches the restrictions, go ahead and allocate
3686 it for the caller. */
3687 else if (a_fRegMask & RT_BIT_32(idxReg))
3688 {
3689 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3690 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
3691 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3692 if RT_CONSTEXPR_IF(a_enmIntendedUse != kIemNativeGstRegUse_Calculation)
3693 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n", g_apszIemNativeHstRegNames[idxReg],
3694 g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[a_enmIntendedUse]));
3695 else
3696 {
3697 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
3698 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
3699 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
3700 }
3701 }
3702 /* Otherwise, allocate a register that satisfies the caller and transfer
3703 the shadowing if compatible with the intended use. (This basically
3704 means the call wants a non-volatile register (RSP push/pop scenario).) */
3705 else
3706 {
3707 Assert(!(a_fRegMask & IEMNATIVE_CALL_VOLATILE_GREG_MASK));
3708 uint8_t const idxRegNew = (a_fRegMask & IEMNATIVE_CALL_VOLATILE_GREG_MASK)
3709 && a_enmIntendedUse == kIemNativeGstRegUse_Calculation
3710 ? iemNativeRegAllocTmpEx(pReNative, poff, a_fRegMask & ~RT_BIT_32(idxReg))
3711 : iemNativeRegAllocTmpExPreferNonVolatile(pReNative, poff, a_fRegMask & ~RT_BIT_32(idxReg));
3712 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3713 if RT_CONSTEXPR_IF(a_enmIntendedUse != kIemNativeGstRegUse_Calculation)
3714 {
3715 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
3716 Log12(("iemNativeRegAllocTmpForGuestReg: Transfering %s to %s for guest %s %s\n",
3717 g_apszIemNativeHstRegNames[idxReg], g_apszIemNativeHstRegNames[idxRegNew],
3718 g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[a_enmIntendedUse]));
3719 }
3720 else
3721 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
3722 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3723 g_apszIemNativeHstRegNames[idxRegNew]));
3724 idxReg = idxRegNew;
3725 }
3726 }
3727 else
3728 {
3729 /*
3730 * Oops. Shadowed guest register already allocated!
3731 *
3732 * Allocate a new register, copy the value and, if updating, the
3733 * guest shadow copy assignment to the new register.
3734 */
3735 AssertMsg( a_enmIntendedUse != kIemNativeGstRegUse_ForUpdate
3736 && a_enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
3737 ("This shouldn't happen: idxReg=%d enmGstReg=%d a_enmIntendedUse=%s\n",
3738 idxReg, enmGstReg, s_pszIntendedUse[a_enmIntendedUse]));
3739
3740 /** @todo share register for readonly access. */
3741 uint8_t const idxRegNew = a_enmIntendedUse == kIemNativeGstRegUse_Calculation
3742 ? iemNativeRegAllocTmpExInt<a_fRegMask, true>(pReNative, poff)
3743 : iemNativeRegAllocTmpExInt<a_fRegMask, false>(pReNative, poff);
3744
3745 if RT_CONSTEXPR_IF(a_enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3746 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3747
3748 if RT_CONSTEXPR_IF( a_enmIntendedUse != kIemNativeGstRegUse_ForUpdate
3749 && a_enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3750 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
3751 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3752 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[a_enmIntendedUse]));
3753 else
3754 {
3755 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
3756 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
3757 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3758 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[a_enmIntendedUse]));
3759 }
3760 idxReg = idxRegNew;
3761 }
3762 Assert(RT_BIT_32(idxReg) & a_fRegMask); /* See assumption in fNoVolatileRegs docs. */
3763
3764#ifdef VBOX_STRICT
3765 /* Strict builds: Check that the value is correct. */
3766 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
3767#endif
3768
3769#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3770 /** @todo r=aeichner Implement for registers other than GPR as well. */
3771 if RT_CONSTEXPR_IF( a_enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
3772 || a_enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
3773 if ( ( enmGstReg >= kIemNativeGstReg_GprFirst
3774 && enmGstReg <= kIemNativeGstReg_GprLast)
3775 || enmGstReg == kIemNativeGstReg_MxCsr)
3776 {
3777# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3778 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
3779 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxReg);
3780# endif
3781 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
3782 }
3783#endif
3784
3785 return idxReg;
3786 }
3787
3788 /*
3789 * Allocate a new register, load it with the guest value and designate it as a copy of the
3790 */
3791 uint8_t const idxRegNew = a_enmIntendedUse != kIemNativeGstRegUse_Calculation
3792 ? iemNativeRegAllocTmpExInt<a_fRegMask, false>(pReNative, poff)
3793 : iemNativeRegAllocTmpExInt<a_fRegMask, true>(pReNative, poff);
3794
3795 if RT_CONSTEXPR_IF(a_enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3796 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
3797
3798 if RT_CONSTEXPR_IF(a_enmIntendedUse != kIemNativeGstRegUse_Calculation)
3799 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
3800 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
3801 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[a_enmIntendedUse]));
3802
3803#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3804 /** @todo r=aeichner Implement for registers other than GPR as well. */
3805 if RT_CONSTEXPR_IF( a_enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
3806 || a_enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
3807 if ( ( enmGstReg >= kIemNativeGstReg_GprFirst
3808 && enmGstReg <= kIemNativeGstReg_GprLast)
3809 || enmGstReg == kIemNativeGstReg_MxCsr)
3810 {
3811# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3812 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
3813 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxRegNew);
3814# endif
3815 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
3816 }
3817#endif
3818
3819 return idxRegNew;
3820}
3821
3822
3823/**
3824 * Allocates a temporary host general purpose register for keeping a guest
3825 * register value.
3826 *
3827 * Since we may already have a register holding the guest register value,
3828 * code will be emitted to do the loading if that's not the case. Code may also
3829 * be emitted if we have to free up a register to satify the request.
3830 *
3831 * @returns The host register number; throws VBox status code on failure, so no
3832 * need to check the return value.
3833 * @param pReNative The native recompile state.
3834 * @param poff Pointer to the variable with the code buffer
3835 * position. This will be update if we need to move
3836 * a variable from register to stack in order to
3837 * satisfy the request.
3838 * @param enmGstReg The guest register that will is to be updated.
3839 * @tparam a_enmIntendedUse How the caller will be using the host register.
3840 * @tparam a_fNonVolatileRegs Set if no volatile register allowed, clear if
3841 * any register is okay (default).
3842 * The ASSUMPTION here is that the caller has
3843 * already flushed all volatile registers,
3844 * so this is only applied if we allocate a new
3845 * register.
3846 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
3847 */
3848template<IEMNATIVEGSTREGUSE const a_enmIntendedUse, bool const a_fNonVolatileRegs>
3849DECL_FORCE_INLINE_THROW(uint8_t)
3850iemNativeRegAllocTmpForGuestRegInt(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3851{
3852#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3853 AssertMsg( pReNative->idxCurCall == 0
3854 || enmGstReg == kIemNativeGstReg_Pc
3855 || (a_enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
3856 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
3857 : a_enmIntendedUse == kIemNativeGstRegUse_ForUpdate
3858 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
3859 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)) ),
3860 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
3861#endif
3862
3863 if RT_CONSTEXPR_IF(!a_fNonVolatileRegs)
3864 return iemNativeRegAllocTmpForGuestRegCommon<a_enmIntendedUse,
3865 IEMNATIVE_HST_GREG_MASK
3866 & ~IEMNATIVE_REG_FIXED_MASK>(pReNative, poff, enmGstReg);
3867 else /* keep else, is required by MSC */
3868 return iemNativeRegAllocTmpForGuestRegCommon<a_enmIntendedUse,
3869 IEMNATIVE_HST_GREG_MASK
3870 & ~IEMNATIVE_REG_FIXED_MASK
3871 & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK>(pReNative, poff, enmGstReg);
3872}
3873
3874/* Variants including volatile registers: */
3875
3876DECL_HIDDEN_THROW(uint8_t)
3877iemNativeRegAllocTmpForGuestRegReadOnly(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3878{
3879 return iemNativeRegAllocTmpForGuestRegInt<kIemNativeGstRegUse_ReadOnly, false>(pReNative, poff, enmGstReg);
3880}
3881
3882DECL_HIDDEN_THROW(uint8_t)
3883iemNativeRegAllocTmpForGuestRegUpdate(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3884{
3885 return iemNativeRegAllocTmpForGuestRegInt<kIemNativeGstRegUse_ForUpdate, false>(pReNative, poff, enmGstReg);
3886}
3887
3888DECL_HIDDEN_THROW(uint8_t)
3889iemNativeRegAllocTmpForGuestRegFullWrite(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3890{
3891 return iemNativeRegAllocTmpForGuestRegInt<kIemNativeGstRegUse_ForFullWrite, false>(pReNative, poff, enmGstReg);
3892}
3893
3894DECL_HIDDEN_THROW(uint8_t)
3895iemNativeRegAllocTmpForGuestRegCalculation(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3896{
3897 return iemNativeRegAllocTmpForGuestRegInt<kIemNativeGstRegUse_Calculation, false>(pReNative, poff, enmGstReg);
3898}
3899
3900/* Variants excluding any volatile registers: */
3901
3902DECL_HIDDEN_THROW(uint8_t)
3903iemNativeRegAllocTmpForGuestRegReadOnlyNoVolatile(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3904{
3905 return iemNativeRegAllocTmpForGuestRegInt<kIemNativeGstRegUse_ReadOnly, true>(pReNative, poff, enmGstReg);
3906}
3907
3908DECL_HIDDEN_THROW(uint8_t)
3909iemNativeRegAllocTmpForGuestRegUpdateNoVolatile(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3910{
3911 return iemNativeRegAllocTmpForGuestRegInt<kIemNativeGstRegUse_ForUpdate, true>(pReNative, poff, enmGstReg);
3912}
3913
3914DECL_HIDDEN_THROW(uint8_t)
3915iemNativeRegAllocTmpForGuestRegFullWriteNoVolatile(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3916{
3917 return iemNativeRegAllocTmpForGuestRegInt<kIemNativeGstRegUse_ForFullWrite, true>(pReNative, poff, enmGstReg);
3918}
3919
3920DECL_HIDDEN_THROW(uint8_t)
3921iemNativeRegAllocTmpForGuestRegCalculationNoVolatile(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3922{
3923 return iemNativeRegAllocTmpForGuestRegInt<kIemNativeGstRegUse_Calculation, true>(pReNative, poff, enmGstReg);
3924}
3925
3926
3927
3928#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) && defined(VBOX_STRICT)
3929/**
3930 * Specialized version of iemNativeRegAllocTmpForGuestReg for EFLAGS.
3931 *
3932 * This takes additional arguments for covering liveness assertions in strict
3933 * builds, it's otherwise the same as iemNativeRegAllocTmpForGuestReg() with
3934 * kIemNativeGstReg_EFlags as argument.
3935 */
3936template<IEMNATIVEGSTREGUSE const a_enmIntendedUse>
3937DECL_FORCE_INLINE_THROW(uint8_t)
3938iemNativeRegAllocTmpForGuestEFlags(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t fRead,
3939 uint64_t fWrite /*= 0*/, uint64_t fPotentialCall /*= 0*/)
3940{
3941 if (pReNative->idxCurCall != 0 && (fRead || fWrite /*|| fPotentialCall*/))
3942 {
3943 Assert(!(fRead & ~IEMLIVENESSBIT_ALL_EFL_MASK));
3944 Assert(!(fWrite & ~IEMLIVENESSBIT_ALL_EFL_MASK));
3945 Assert(!(fPotentialCall & ~IEMLIVENESSBIT_ALL_EFL_MASK));
3946 uint64_t const fAll = fRead | fWrite /*| fPotentialCall*/;
3947 uint32_t fState;
3948# define MY_ASSERT_ONE_EFL(a_enmGstEfl) \
3949 fState = iemNativeLivenessGetPrevStateByGstRegEx(pReNative, (IEMNATIVEGSTREG)(a_enmGstEfl)); \
3950 AssertMsg( !( fAll & RT_BIT_64(a_enmGstEfl)) \
3951 || ( fRead & RT_BIT_64(a_enmGstEfl) \
3952 ? fWrite & RT_BIT_64(a_enmGstEfl) \
3953 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED(fState) \
3954 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED(fState) \
3955 : IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(fState) \
3956 ) \
3957 , ("%s - %u\n", #a_enmGstEfl, fState))
3958 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_OTHER);
3959 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_CF);
3960 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_PF);
3961 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_AF);
3962 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_ZF);
3963 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_SF);
3964 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_OF);
3965# undef MY_ASSERT_ONE_EFL
3966 }
3967 RT_NOREF(fPotentialCall);
3968
3969 AssertCompile(a_enmIntendedUse == kIemNativeGstRegUse_ReadOnly || a_enmIntendedUse == kIemNativeGstRegUse_ForUpdate);
3970 if RT_CONSTEXPR_IF(a_enmIntendedUse == kIemNativeGstRegUse_ReadOnly)
3971 return iemNativeRegAllocTmpForGuestRegCommon<kIemNativeGstRegUse_ReadOnly,
3972 IEMNATIVE_HST_GREG_MASK
3973 & ~IEMNATIVE_REG_FIXED_MASK>(pReNative, poff, kIemNativeGstReg_EFlags);
3974 else /* keep else, is required by MSC */
3975 return iemNativeRegAllocTmpForGuestRegCommon<kIemNativeGstRegUse_ForUpdate,
3976 IEMNATIVE_HST_GREG_MASK
3977 & ~IEMNATIVE_REG_FIXED_MASK>(pReNative, poff, kIemNativeGstReg_EFlags);
3978}
3979
3980
3981DECL_HIDDEN_THROW(uint8_t)
3982iemNativeRegAllocTmpForGuestEFlagsReadOnly(PIEMRECOMPILERSTATE pReNative, uint32_t *poff,
3983 uint64_t fRead, uint64_t fWrite /*= 0*/, uint64_t fPotentialCall /*= 0*/)
3984{
3985 return iemNativeRegAllocTmpForGuestEFlags<kIemNativeGstRegUse_ReadOnly>(pReNative, poff, fRead, fWrite, fPotentialCall);
3986}
3987
3988DECL_HIDDEN_THROW(uint8_t)
3989iemNativeRegAllocTmpForGuestEFlagsForUpdate(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t fRead,
3990 uint64_t fWrite /*= 0*/, uint64_t fPotentialCall /*= 0*/)
3991{
3992 return iemNativeRegAllocTmpForGuestEFlags<kIemNativeGstRegUse_ForUpdate>(pReNative, poff, fRead, fWrite, fPotentialCall);
3993}
3994
3995#endif
3996
3997
3998
3999/**
4000 * Common worker for iemNativeRegAllocTmpForGuestRegIfAlreadyPresent and
4001 * iemNativeRegAllocTmpForGuestEFlagsIfAlreadyPresent.
4002 *
4003 * See iemNativeRegAllocTmpForGuestRegIfAlreadyPresent() for details.
4004 */
4005DECL_FORCE_INLINE(uint8_t)
4006iemNativeRegAllocTmpForGuestRegIfAlreadyPresentCommon(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
4007{
4008 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
4009
4010 /*
4011 * First check if the guest register value is already in a host register.
4012 */
4013 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
4014 {
4015 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
4016 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4017 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
4018 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
4019
4020 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
4021 {
4022 /*
4023 * We only do readonly use here, so easy compared to the other
4024 * variant of this code.
4025 */
4026 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
4027 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
4028 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4029 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
4030 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
4031
4032#ifdef VBOX_STRICT
4033 /* Strict builds: Check that the value is correct. */
4034 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
4035#else
4036 RT_NOREF(poff);
4037#endif
4038 return idxReg;
4039 }
4040 }
4041
4042 return UINT8_MAX;
4043}
4044
4045
4046/**
4047 * Allocates a temporary host general purpose register that already holds the
4048 * given guest register value.
4049 *
4050 * The use case for this function is places where the shadowing state cannot be
4051 * modified due to branching and such. This will fail if the we don't have a
4052 * current shadow copy handy or if it's incompatible. The only code that will
4053 * be emitted here is value checking code in strict builds.
4054 *
4055 * The intended use can only be readonly!
4056 *
4057 * @returns The host register number, UINT8_MAX if not present.
4058 * @param pReNative The native recompile state.
4059 * @param poff Pointer to the instruction buffer offset.
4060 * Will be updated in strict builds if a register is
4061 * found.
4062 * @param enmGstReg The guest register that will is to be updated.
4063 * @note In strict builds, this may throw instruction buffer growth failures.
4064 * Non-strict builds will not throw anything.
4065 * @sa iemNativeRegAllocTmpForGuestReg
4066 */
4067DECL_HIDDEN_THROW(uint8_t)
4068iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
4069{
4070#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
4071 AssertMsg( pReNative->idxCurCall == 0
4072 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
4073 || enmGstReg == kIemNativeGstReg_Pc
4074 , ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
4075#endif
4076 return iemNativeRegAllocTmpForGuestRegIfAlreadyPresentCommon(pReNative, poff, enmGstReg);
4077}
4078
4079
4080#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) && defined(VBOX_STRICT)
4081/**
4082 * Specialized version of iemNativeRegAllocTmpForGuestRegIfAlreadyPresent for
4083 * EFLAGS.
4084 *
4085 * This takes additional arguments for covering liveness assertions in strict
4086 * builds, it's otherwise the same as
4087 * iemNativeRegAllocTmpForGuestRegIfAlreadyPresent() with
4088 * kIemNativeGstReg_EFlags as argument.
4089 *
4090 * @note The @a fWrite parameter is necessary to complete the liveness picture,
4091 * as iemNativeEmitFetchEFlags() may fetch flags in prep for a later
4092 * commit. It the operation clobbers all the flags, @a fRead will be
4093 * zero, so better verify the whole picture while we're here.
4094 */
4095DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpForGuestEFlagsIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff,
4096 uint64_t fRead, uint64_t fWrite /*=0*/)
4097{
4098 if (pReNative->idxCurCall != 0)
4099 {
4100 Assert(fRead | fWrite);
4101 Assert(!(fRead & ~IEMLIVENESSBIT_ALL_EFL_MASK));
4102 Assert(!(fWrite & ~IEMLIVENESSBIT_ALL_EFL_MASK));
4103 uint64_t const fAll = fRead | fWrite;
4104 uint32_t fState;
4105# define MY_ASSERT_ONE_EFL(a_enmGstEfl) \
4106 fState = iemNativeLivenessGetPrevStateByGstRegEx(pReNative, (IEMNATIVEGSTREG)(a_enmGstEfl)); \
4107 AssertMsg( !( fAll & RT_BIT_64(a_enmGstEfl)) \
4108 || ( fRead & RT_BIT_64(a_enmGstEfl) \
4109 ? fWrite & RT_BIT_64(a_enmGstEfl) \
4110 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED(fState) \
4111 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED(fState) \
4112 : IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(fState) \
4113 ) \
4114 , ("%s - %u\n", #a_enmGstEfl, fState))
4115 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_OTHER);
4116 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_CF);
4117 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_PF);
4118 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_AF);
4119 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_ZF);
4120 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_SF);
4121 MY_ASSERT_ONE_EFL(IEMLIVENESSBIT_IDX_EFL_OF);
4122# undef MY_ASSERT_ONE_EFL
4123 }
4124 RT_NOREF(fRead);
4125 return iemNativeRegAllocTmpForGuestRegIfAlreadyPresentCommon(pReNative, poff, kIemNativeGstReg_EFlags);
4126}
4127#endif
4128
4129
4130/**
4131 * Allocates argument registers for a function call.
4132 *
4133 * @returns New code buffer offset on success; throws VBox status code on failure, so no
4134 * need to check the return value.
4135 * @param pReNative The native recompile state.
4136 * @param off The current code buffer offset.
4137 * @param cArgs The number of arguments the function call takes.
4138 */
4139DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
4140{
4141 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
4142 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
4143 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4144 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
4145
4146 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4147 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4148 else if (cArgs == 0)
4149 return true;
4150
4151 /*
4152 * Do we get luck and all register are free and not shadowing anything?
4153 */
4154 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
4155 for (uint32_t i = 0; i < cArgs; i++)
4156 {
4157 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4158 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4159 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4160 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4161 }
4162 /*
4163 * Okay, not lucky so we have to free up the registers.
4164 */
4165 else
4166 for (uint32_t i = 0; i < cArgs; i++)
4167 {
4168 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
4169 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
4170 {
4171 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4172 {
4173 case kIemNativeWhat_Var:
4174 {
4175 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4176 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4177 AssertStmt(IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars),
4178 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4179 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxReg);
4180 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4181
4182 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind != kIemNativeVarKind_Stack)
4183 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4184 else
4185 {
4186 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4187 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
4188 }
4189 break;
4190 }
4191
4192 case kIemNativeWhat_Tmp:
4193 case kIemNativeWhat_Arg:
4194 case kIemNativeWhat_rc:
4195 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
4196 default:
4197 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
4198 }
4199
4200 }
4201 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
4202 {
4203 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
4204 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
4205 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
4206#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4207 Assert(!(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
4208#endif
4209 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4210 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4211 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4212 }
4213 else
4214 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
4215 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
4216 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
4217 }
4218 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
4219 return true;
4220}
4221
4222
4223DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
4224
4225
4226#if 0
4227/**
4228 * Frees a register assignment of any type.
4229 *
4230 * @param pReNative The native recompile state.
4231 * @param idxHstReg The register to free.
4232 *
4233 * @note Does not update variables.
4234 */
4235DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4236{
4237 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
4238 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4239 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
4240 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
4241 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
4242 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
4243 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
4244 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
4245 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
4246 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
4247 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4248 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4249 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
4250 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4251
4252 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4253 /* no flushing, right:
4254 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4255 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4256 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4257 */
4258}
4259#endif
4260
4261
4262/**
4263 * Frees a temporary register.
4264 *
4265 * Any shadow copies of guest registers assigned to the host register will not
4266 * be flushed by this operation.
4267 */
4268DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4269{
4270 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4271 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
4272 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4273 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
4274 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4275}
4276
4277
4278/**
4279 * Frees a temporary immediate register.
4280 *
4281 * It is assumed that the call has not modified the register, so it still hold
4282 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
4283 */
4284DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4285{
4286 iemNativeRegFreeTmp(pReNative, idxHstReg);
4287}
4288
4289
4290/**
4291 * Frees a register assigned to a variable.
4292 *
4293 * The register will be disassociated from the variable.
4294 */
4295DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
4296{
4297 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4298 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
4299 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
4300 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4301 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
4302 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4303
4304 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4305 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4306 if (!fFlushShadows)
4307 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
4308 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows, idxVar));
4309 else
4310 {
4311 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4312 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4313#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4314 Assert(!(pReNative->Core.bmGstRegShadowDirty & fGstRegShadowsOld));
4315#endif
4316 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4317 pReNative->Core.bmGstRegShadows &= ~fGstRegShadowsOld;
4318 uint64_t fGstRegShadows = fGstRegShadowsOld;
4319 while (fGstRegShadows)
4320 {
4321 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4322 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4323
4324 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg);
4325 pReNative->Core.aidxGstRegShadows[idxGstReg] = UINT8_MAX;
4326 }
4327 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
4328 g_apszIemNativeHstRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4329 }
4330}
4331
4332
4333#if defined(LOG_ENABLED) || defined(IEMNATIVE_WITH_TB_DEBUG_INFO)
4334/** Host CPU SIMD register names. */
4335DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstSimdRegNames[] =
4336{
4337# ifdef RT_ARCH_AMD64
4338 "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15"
4339# elif RT_ARCH_ARM64
4340 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
4341 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
4342# else
4343# error "port me"
4344# endif
4345};
4346#endif
4347
4348
4349/**
4350 * Frees a SIMD register assigned to a variable.
4351 *
4352 * The register will be disassociated from the variable.
4353 */
4354DECLHIDDEN(void) iemNativeSimdRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
4355{
4356 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstReg));
4357 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
4358 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
4359 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4360 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
4361 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4362
4363 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4364 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
4365 if (!fFlushShadows)
4366 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
4367 g_apszIemNativeHstSimdRegNames[idxHstReg], pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows, idxVar));
4368 else
4369 {
4370 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4371 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows;
4372 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
4373 pReNative->Core.bmGstSimdRegShadows &= ~fGstRegShadowsOld;
4374 uint64_t fGstRegShadows = fGstRegShadowsOld;
4375 while (fGstRegShadows)
4376 {
4377 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4378 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4379
4380 Assert(pReNative->Core.aidxGstSimdRegShadows[idxGstReg] == idxHstReg);
4381 pReNative->Core.aidxGstSimdRegShadows[idxGstReg] = UINT8_MAX;
4382 }
4383 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
4384 g_apszIemNativeHstSimdRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4385 }
4386}
4387
4388
4389/**
4390 * Reassigns a variable to a different SIMD register specified by the caller.
4391 *
4392 * @returns The new code buffer position.
4393 * @param pReNative The native recompile state.
4394 * @param off The current code buffer position.
4395 * @param idxVar The variable index.
4396 * @param idxRegOld The old host register number.
4397 * @param idxRegNew The new host register number.
4398 * @param pszCaller The caller for logging.
4399 */
4400static uint32_t iemNativeSimdRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
4401 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
4402{
4403 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4404 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
4405 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4406 RT_NOREF(pszCaller);
4407
4408 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4409 & pReNative->Core.aHstSimdRegs[idxRegNew].fGstRegShadows));
4410 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxRegNew, off);
4411
4412 uint64_t fGstRegShadows = pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
4413 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4414 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
4415
4416 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
4417 pszCaller, idxVar, g_apszIemNativeHstSimdRegNames[idxRegOld], g_apszIemNativeHstSimdRegNames[idxRegNew], fGstRegShadows));
4418 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
4419
4420 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U))
4421 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxRegNew, idxRegOld);
4422 else
4423 {
4424 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U));
4425 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxRegNew, idxRegOld);
4426 }
4427
4428 pReNative->Core.aHstSimdRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
4429 pReNative->Core.aHstSimdRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
4430 pReNative->Core.aHstSimdRegs[idxRegNew].idxVar = idxVar;
4431 if (fGstRegShadows)
4432 {
4433 pReNative->Core.bmHstSimdRegsWithGstShadow = (pReNative->Core.bmHstSimdRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
4434 | RT_BIT_32(idxRegNew);
4435 while (fGstRegShadows)
4436 {
4437 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4438 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4439
4440 Assert(pReNative->Core.aidxGstSimdRegShadows[idxGstReg] == idxRegOld);
4441 pReNative->Core.aidxGstSimdRegShadows[idxGstReg] = idxRegNew;
4442 }
4443 }
4444
4445 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
4446 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
4447 pReNative->Core.bmHstSimdRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstSimdRegs & ~RT_BIT_32(idxRegOld));
4448 return off;
4449}
4450
4451
4452/**
4453 * Moves a variable to a different register or spills it onto the stack.
4454 *
4455 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
4456 * kinds can easily be recreated if needed later.
4457 *
4458 * @returns The new code buffer position.
4459 * @param pReNative The native recompile state.
4460 * @param off The current code buffer position.
4461 * @param idxVar The variable index.
4462 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
4463 * call-volatile registers.
4464 */
4465DECL_HIDDEN_THROW(uint32_t) iemNativeSimdRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
4466 uint32_t fForbiddenRegs /*= IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK*/)
4467{
4468 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4469 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4470 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
4471 Assert(!pVar->fRegAcquired);
4472 Assert(!pVar->fSimdReg);
4473
4474 uint8_t const idxRegOld = pVar->idxReg;
4475 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
4476 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxRegOld));
4477 Assert(pReNative->Core.aHstSimdRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
4478 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows)
4479 == pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows);
4480 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4481 Assert( RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxRegOld))
4482 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
4483 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4484 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
4485
4486 /** @todo Add statistics on this.*/
4487 /** @todo Implement basic variable liveness analysis (python) so variables
4488 * can be freed immediately once no longer used. This has the potential to
4489 * be trashing registers and stack for dead variables.
4490 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
4491
4492 /*
4493 * First try move it to a different register, as that's cheaper.
4494 */
4495 fForbiddenRegs |= RT_BIT_32(idxRegOld);
4496 fForbiddenRegs |= IEMNATIVE_SIMD_REG_FIXED_MASK;
4497 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & ~fForbiddenRegs;
4498 if (fRegs)
4499 {
4500 /* Avoid using shadow registers, if possible. */
4501 if (fRegs & ~pReNative->Core.bmHstSimdRegsWithGstShadow)
4502 fRegs &= ~pReNative->Core.bmHstSimdRegsWithGstShadow;
4503 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
4504 return iemNativeSimdRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeSimdRegMoveOrSpillStackVar");
4505 }
4506
4507 /*
4508 * Otherwise we must spill the register onto the stack.
4509 */
4510 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
4511 Log12(("iemNativeSimdRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
4512 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
4513
4514 if (pVar->cbVar == sizeof(RTUINT128U))
4515 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
4516 else
4517 {
4518 Assert(pVar->cbVar == sizeof(RTUINT256U));
4519 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
4520 }
4521
4522 pVar->idxReg = UINT8_MAX;
4523 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
4524 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxRegOld);
4525 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
4526 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
4527 return off;
4528}
4529
4530
4531/**
4532 * Called right before emitting a call instruction to move anything important
4533 * out of call-volatile SIMD registers, free and flush the call-volatile SIMD registers,
4534 * optionally freeing argument variables.
4535 *
4536 * @returns New code buffer offset, UINT32_MAX on failure.
4537 * @param pReNative The native recompile state.
4538 * @param off The code buffer offset.
4539 * @param cArgs The number of arguments the function call takes.
4540 * It is presumed that the host register part of these have
4541 * been allocated as such already and won't need moving,
4542 * just freeing.
4543 * @param fKeepVars Mask of variables that should keep their register
4544 * assignments. Caller must take care to handle these.
4545 */
4546DECL_HIDDEN_THROW(uint32_t)
4547iemNativeSimdRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4548{
4549 Assert(!cArgs); RT_NOREF(cArgs);
4550
4551 /* fKeepVars will reduce this mask. */
4552 uint32_t fSimdRegsToFree = IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
4553
4554 /*
4555 * Move anything important out of volatile registers.
4556 */
4557 uint32_t fSimdRegsToMove = IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
4558#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
4559 & ~RT_BIT_32(IEMNATIVE_SIMD_REG_FIXED_TMP0)
4560#endif
4561 ;
4562
4563 fSimdRegsToMove &= pReNative->Core.bmHstSimdRegs;
4564 if (!fSimdRegsToMove)
4565 { /* likely */ }
4566 else
4567 {
4568 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: fSimdRegsToMove=%#x\n", fSimdRegsToMove));
4569 while (fSimdRegsToMove != 0)
4570 {
4571 unsigned const idxSimdReg = ASMBitFirstSetU32(fSimdRegsToMove) - 1;
4572 fSimdRegsToMove &= ~RT_BIT_32(idxSimdReg);
4573
4574 switch (pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat)
4575 {
4576 case kIemNativeWhat_Var:
4577 {
4578 uint8_t const idxVar = pReNative->Core.aHstRegs[idxSimdReg].idxVar;
4579 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4580 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4581 Assert(pVar->idxReg == idxSimdReg);
4582 Assert(pVar->fSimdReg);
4583 if (!(RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)) & fKeepVars))
4584 {
4585 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxSimdReg=%d\n",
4586 idxVar, pVar->enmKind, pVar->idxReg));
4587 if (pVar->enmKind != kIemNativeVarKind_Stack)
4588 pVar->idxReg = UINT8_MAX;
4589 else
4590 off = iemNativeSimdRegMoveOrSpillStackVar(pReNative, off, idxVar);
4591 }
4592 else
4593 fSimdRegsToFree &= ~RT_BIT_32(idxSimdReg);
4594 continue;
4595 }
4596
4597 case kIemNativeWhat_Arg:
4598 AssertMsgFailed(("What?!?: %u\n", idxSimdReg));
4599 continue;
4600
4601 case kIemNativeWhat_rc:
4602 case kIemNativeWhat_Tmp:
4603 AssertMsgFailed(("Missing free: %u\n", idxSimdReg));
4604 continue;
4605
4606 case kIemNativeWhat_FixedReserved:
4607#ifdef RT_ARCH_ARM64
4608 continue; /* On ARM the upper half of the virtual 256-bit register. */
4609#endif
4610
4611 case kIemNativeWhat_FixedTmp:
4612 case kIemNativeWhat_pVCpuFixed:
4613 case kIemNativeWhat_pCtxFixed:
4614 case kIemNativeWhat_PcShadow:
4615 case kIemNativeWhat_Invalid:
4616 case kIemNativeWhat_End:
4617 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
4618 }
4619 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
4620 }
4621 }
4622
4623 /*
4624 * Do the actual freeing.
4625 */
4626 if (pReNative->Core.bmHstSimdRegs & fSimdRegsToFree)
4627 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: bmHstSimdRegs %#x -> %#x\n",
4628 pReNative->Core.bmHstSimdRegs, pReNative->Core.bmHstSimdRegs & ~fSimdRegsToFree));
4629 pReNative->Core.bmHstSimdRegs &= ~fSimdRegsToFree;
4630
4631 /* If there are guest register shadows in any call-volatile register, we
4632 have to clear the corrsponding guest register masks for each register. */
4633 uint32_t fHstSimdRegsWithGstShadow = pReNative->Core.bmHstSimdRegsWithGstShadow & fSimdRegsToFree;
4634 if (fHstSimdRegsWithGstShadow)
4635 {
4636 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: bmHstSimdRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
4637 pReNative->Core.bmHstSimdRegsWithGstShadow, pReNative->Core.bmHstSimdRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK, fHstSimdRegsWithGstShadow));
4638 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~fHstSimdRegsWithGstShadow;
4639 do
4640 {
4641 unsigned const idxSimdReg = ASMBitFirstSetU32(fHstSimdRegsWithGstShadow) - 1;
4642 fHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxSimdReg);
4643
4644 AssertMsg(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows != 0, ("idxSimdReg=%#x\n", idxSimdReg));
4645
4646#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4647 /*
4648 * Flush any pending writes now (might have been skipped earlier in iemEmitCallCommon() but it doesn't apply
4649 * to call volatile registers).
4650 */
4651 if ( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4652 & pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows)
4653 off = iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(pReNative, off, idxSimdReg);
4654#endif
4655 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4656 & pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows));
4657
4658 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows;
4659 pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows = 0;
4660 } while (fHstSimdRegsWithGstShadow != 0);
4661 }
4662
4663 return off;
4664}
4665
4666
4667/**
4668 * Called right before emitting a call instruction to move anything important
4669 * out of call-volatile registers, free and flush the call-volatile registers,
4670 * optionally freeing argument variables.
4671 *
4672 * @returns New code buffer offset, UINT32_MAX on failure.
4673 * @param pReNative The native recompile state.
4674 * @param off The code buffer offset.
4675 * @param cArgs The number of arguments the function call takes.
4676 * It is presumed that the host register part of these have
4677 * been allocated as such already and won't need moving,
4678 * just freeing.
4679 * @param fKeepVars Mask of variables that should keep their register
4680 * assignments. Caller must take care to handle these.
4681 */
4682DECL_HIDDEN_THROW(uint32_t)
4683iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4684{
4685 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
4686
4687 /* fKeepVars will reduce this mask. */
4688 uint32_t fRegsToFree = IEMNATIVE_CALL_VOLATILE_NOTMP_GREG_MASK;
4689
4690#ifdef RT_ARCH_ARM64
4691AssertCompile(IEMNATIVE_CALL_VOLATILE_NOTMP_GREG_MASK == UINT32_C(0x37fff));
4692#endif
4693
4694 /*
4695 * Move anything important out of volatile registers.
4696 */
4697 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4698 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4699 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_NOTMP_GREG_MASK
4700#ifdef IEMNATIVE_REG_FIXED_PC_DBG
4701 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
4702#endif
4703 & ~g_afIemNativeCallRegs[cArgs];
4704
4705 fRegsToMove &= pReNative->Core.bmHstRegs;
4706 if (!fRegsToMove)
4707 { /* likely */ }
4708 else
4709 {
4710 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
4711 while (fRegsToMove != 0)
4712 {
4713 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
4714 fRegsToMove &= ~RT_BIT_32(idxReg);
4715
4716 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4717 {
4718 case kIemNativeWhat_Var:
4719 {
4720 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4721 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4722 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4723 Assert(pVar->idxReg == idxReg);
4724 Assert(!pVar->fSimdReg);
4725 if (!(RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)) & fKeepVars))
4726 {
4727 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxReg=%d\n",
4728 idxVar, pVar->enmKind, pVar->idxReg));
4729 if (pVar->enmKind != kIemNativeVarKind_Stack)
4730 pVar->idxReg = UINT8_MAX;
4731 else
4732 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4733 }
4734 else
4735 fRegsToFree &= ~RT_BIT_32(idxReg);
4736 continue;
4737 }
4738
4739 case kIemNativeWhat_Arg:
4740 AssertMsgFailed(("What?!?: %u\n", idxReg));
4741 continue;
4742
4743 case kIemNativeWhat_rc:
4744 case kIemNativeWhat_Tmp:
4745 AssertMsgFailed(("Missing free: %u\n", idxReg));
4746 continue;
4747
4748 case kIemNativeWhat_FixedTmp:
4749 case kIemNativeWhat_pVCpuFixed:
4750 case kIemNativeWhat_pCtxFixed:
4751 case kIemNativeWhat_PcShadow:
4752 case kIemNativeWhat_FixedReserved:
4753 case kIemNativeWhat_Invalid:
4754 case kIemNativeWhat_End:
4755 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
4756 }
4757 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
4758 }
4759 }
4760
4761 /*
4762 * Do the actual freeing.
4763 */
4764 if (pReNative->Core.bmHstRegs & fRegsToFree)
4765 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n",
4766 pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~fRegsToFree));
4767 pReNative->Core.bmHstRegs &= ~fRegsToFree;
4768
4769 /* If there are guest register shadows in any call-volatile register, we
4770 have to clear the corrsponding guest register masks for each register. */
4771 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & fRegsToFree;
4772 if (fHstRegsWithGstShadow)
4773 {
4774 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
4775 pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK,
4776 fHstRegsWithGstShadow));
4777 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
4778 do
4779 {
4780 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
4781 fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4782
4783 AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
4784
4785#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4786 /*
4787 * Flush any pending writes now (might have been skipped earlier in iemEmitCallCommon() but it doesn't apply
4788 * to call volatile registers).
4789 */
4790 if (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
4791 off = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, off, idxReg);
4792 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
4793#endif
4794
4795 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4796 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4797 } while (fHstRegsWithGstShadow != 0);
4798 }
4799
4800 /*
4801 * Now for the SIMD registers, no argument support for now.
4802 */
4803 off = iemNativeSimdRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /*cArgs*/, fKeepVars);
4804
4805 return off;
4806}
4807
4808
4809/**
4810 * Flushes a set of guest register shadow copies.
4811 *
4812 * This is usually done after calling a threaded function or a C-implementation
4813 * of an instruction.
4814 *
4815 * @param pReNative The native recompile state.
4816 * @param fGstRegs Set of guest registers to flush.
4817 */
4818DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
4819{
4820 /*
4821 * Reduce the mask by what's currently shadowed
4822 */
4823 uint64_t const bmGstRegShadowsOld = pReNative->Core.bmGstRegShadows;
4824 fGstRegs &= bmGstRegShadowsOld;
4825 if (fGstRegs)
4826 {
4827 uint64_t const bmGstRegShadowsNew = bmGstRegShadowsOld & ~fGstRegs;
4828 Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstRegs, bmGstRegShadowsOld, bmGstRegShadowsNew));
4829 pReNative->Core.bmGstRegShadows = bmGstRegShadowsNew;
4830 if (bmGstRegShadowsNew)
4831 {
4832 /*
4833 * Partial.
4834 */
4835 do
4836 {
4837 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4838 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4839 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4840 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4841 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4842#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4843 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
4844#endif
4845
4846 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
4847 fGstRegs &= ~fInThisHstReg;
4848 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
4849 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
4850 if (!fGstRegShadowsNew)
4851 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4852 } while (fGstRegs != 0);
4853 }
4854 else
4855 {
4856 /*
4857 * Clear all.
4858 */
4859 do
4860 {
4861 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4862 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4863 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4864 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4865 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4866#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4867 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
4868#endif
4869
4870 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
4871 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4872 } while (fGstRegs != 0);
4873 pReNative->Core.bmHstRegsWithGstShadow = 0;
4874 }
4875 }
4876}
4877
4878
4879/**
4880 * Flushes guest register shadow copies held by a set of host registers.
4881 *
4882 * This is used with the TLB lookup code for ensuring that we don't carry on
4883 * with any guest shadows in volatile registers, as these will get corrupted by
4884 * a TLB miss.
4885 *
4886 * @param pReNative The native recompile state.
4887 * @param fHstRegs Set of host registers to flush guest shadows for.
4888 */
4889DECLHIDDEN(void) iemNativeRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstRegs) RT_NOEXCEPT
4890{
4891 /*
4892 * Reduce the mask by what's currently shadowed.
4893 */
4894 uint32_t const bmHstRegsWithGstShadowOld = pReNative->Core.bmHstRegsWithGstShadow;
4895 fHstRegs &= bmHstRegsWithGstShadowOld;
4896 if (fHstRegs)
4897 {
4898 uint32_t const bmHstRegsWithGstShadowNew = bmHstRegsWithGstShadowOld & ~fHstRegs;
4899 Log12(("iemNativeRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
4900 fHstRegs, bmHstRegsWithGstShadowOld, bmHstRegsWithGstShadowNew));
4901 pReNative->Core.bmHstRegsWithGstShadow = bmHstRegsWithGstShadowNew;
4902 if (bmHstRegsWithGstShadowNew)
4903 {
4904 /*
4905 * Partial (likely).
4906 */
4907 uint64_t fGstShadows = 0;
4908 do
4909 {
4910 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4911 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4912 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4913 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4914#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4915 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4916#endif
4917
4918 fGstShadows |= pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4919 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4920 fHstRegs &= ~RT_BIT_32(idxHstReg);
4921 } while (fHstRegs != 0);
4922 pReNative->Core.bmGstRegShadows &= ~fGstShadows;
4923 }
4924 else
4925 {
4926 /*
4927 * Clear all.
4928 */
4929 do
4930 {
4931 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4932 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4933 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4934 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4935#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4936 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4937#endif
4938
4939 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4940 fHstRegs &= ~RT_BIT_32(idxHstReg);
4941 } while (fHstRegs != 0);
4942 pReNative->Core.bmGstRegShadows = 0;
4943 }
4944 }
4945}
4946
4947
4948/**
4949 * Restores guest shadow copies in volatile registers.
4950 *
4951 * This is used after calling a helper function (think TLB miss) to restore the
4952 * register state of volatile registers.
4953 *
4954 * @param pReNative The native recompile state.
4955 * @param off The code buffer offset.
4956 * @param fHstRegsActiveShadows Set of host registers which are allowed to
4957 * be active (allocated) w/o asserting. Hack.
4958 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
4959 * iemNativeVarRestoreVolatileRegsPostHlpCall()
4960 */
4961DECL_HIDDEN_THROW(uint32_t)
4962iemNativeRegRestoreGuestShadowsInVolatileRegs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsActiveShadows)
4963{
4964 uint32_t fHstRegs = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4965 if (fHstRegs)
4966 {
4967 Log12(("iemNativeRegRestoreGuestShadowsInVolatileRegs: %#RX32\n", fHstRegs));
4968 do
4969 {
4970 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4971
4972 /* It's not fatal if a register is active holding a variable that
4973 shadowing a guest register, ASSUMING all pending guest register
4974 writes were flushed prior to the helper call. However, we'll be
4975 emitting duplicate restores, so it wasts code space. */
4976 Assert(!(pReNative->Core.bmHstRegs & ~fHstRegsActiveShadows & RT_BIT_32(idxHstReg)));
4977 RT_NOREF(fHstRegsActiveShadows);
4978
4979 uint64_t const fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4980#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4981 Assert(!(pReNative->Core.bmGstRegShadowDirty & fGstRegShadows));
4982#endif
4983 Assert((pReNative->Core.bmGstRegShadows & fGstRegShadows) == fGstRegShadows);
4984 AssertStmt(fGstRegShadows != 0 && fGstRegShadows < RT_BIT_64(kIemNativeGstReg_End),
4985 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_12));
4986
4987 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4988 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, idxHstReg, (IEMNATIVEGSTREG)idxGstReg);
4989
4990 fHstRegs &= ~RT_BIT_32(idxHstReg);
4991 } while (fHstRegs != 0);
4992 }
4993 return off;
4994}
4995
4996
4997
4998
4999/*********************************************************************************************************************************
5000* SIMD register allocator (largely code duplication of the GPR allocator for now but might diverge) *
5001*********************************************************************************************************************************/
5002
5003/**
5004 * Info about shadowed guest SIMD register values.
5005 * @see IEMNATIVEGSTSIMDREG
5006 */
5007static struct
5008{
5009 /** Offset in VMCPU of XMM (low 128-bit) registers. */
5010 uint32_t offXmm;
5011 /** Offset in VMCPU of YmmHi (high 128-bit) registers. */
5012 uint32_t offYmm;
5013 /** Name (for logging). */
5014 const char *pszName;
5015} const g_aGstSimdShadowInfo[] =
5016{
5017#define CPUMCTX_OFF_AND_SIZE(a_iSimdReg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.x87.aXMM[a_iSimdReg]), \
5018 (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.u.YmmHi.aYmmHi[a_iSimdReg])
5019 /* [kIemNativeGstSimdReg_SimdRegFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(0), "ymm0", },
5020 /* [kIemNativeGstSimdReg_SimdRegFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(1), "ymm1", },
5021 /* [kIemNativeGstSimdReg_SimdRegFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(2), "ymm2", },
5022 /* [kIemNativeGstSimdReg_SimdRegFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(3), "ymm3", },
5023 /* [kIemNativeGstSimdReg_SimdRegFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(4), "ymm4", },
5024 /* [kIemNativeGstSimdReg_SimdRegFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(5), "ymm5", },
5025 /* [kIemNativeGstSimdReg_SimdRegFirst + 6] = */ { CPUMCTX_OFF_AND_SIZE(6), "ymm6", },
5026 /* [kIemNativeGstSimdReg_SimdRegFirst + 7] = */ { CPUMCTX_OFF_AND_SIZE(7), "ymm7", },
5027 /* [kIemNativeGstSimdReg_SimdRegFirst + 8] = */ { CPUMCTX_OFF_AND_SIZE(8), "ymm8", },
5028 /* [kIemNativeGstSimdReg_SimdRegFirst + 9] = */ { CPUMCTX_OFF_AND_SIZE(9), "ymm9", },
5029 /* [kIemNativeGstSimdReg_SimdRegFirst + 10] = */ { CPUMCTX_OFF_AND_SIZE(10), "ymm10", },
5030 /* [kIemNativeGstSimdReg_SimdRegFirst + 11] = */ { CPUMCTX_OFF_AND_SIZE(11), "ymm11", },
5031 /* [kIemNativeGstSimdReg_SimdRegFirst + 12] = */ { CPUMCTX_OFF_AND_SIZE(12), "ymm12", },
5032 /* [kIemNativeGstSimdReg_SimdRegFirst + 13] = */ { CPUMCTX_OFF_AND_SIZE(13), "ymm13", },
5033 /* [kIemNativeGstSimdReg_SimdRegFirst + 14] = */ { CPUMCTX_OFF_AND_SIZE(14), "ymm14", },
5034 /* [kIemNativeGstSimdReg_SimdRegFirst + 15] = */ { CPUMCTX_OFF_AND_SIZE(15), "ymm15", },
5035#undef CPUMCTX_OFF_AND_SIZE
5036};
5037AssertCompile(RT_ELEMENTS(g_aGstSimdShadowInfo) == kIemNativeGstSimdReg_End);
5038
5039
5040/**
5041 * Frees a temporary SIMD register.
5042 *
5043 * Any shadow copies of guest registers assigned to the host register will not
5044 * be flushed by this operation.
5045 */
5046DECLHIDDEN(void) iemNativeSimdRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg) RT_NOEXCEPT
5047{
5048 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg));
5049 Assert(pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmWhat == kIemNativeWhat_Tmp);
5050 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
5051 Log12(("iemNativeSimdRegFreeTmp: %s (gst: %#RX64)\n",
5052 g_apszIemNativeHstSimdRegNames[idxHstSimdReg], pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5053}
5054
5055
5056/**
5057 * Emits code to flush a pending write of the given SIMD register if any, also flushes the guest to host SIMD register association.
5058 *
5059 * @returns New code bufferoffset.
5060 * @param pReNative The native recompile state.
5061 * @param off Current code buffer position.
5062 * @param enmGstSimdReg The guest SIMD register to flush.
5063 */
5064DECL_HIDDEN_THROW(uint32_t)
5065iemNativeSimdRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdReg)
5066{
5067 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
5068
5069 Log12(("iemNativeSimdRegFlushPendingWrite: Clearing guest register %s shadowed by host %s with state DirtyLo:%u DirtyHi:%u\n",
5070 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, g_apszIemNativeHstSimdRegNames[idxHstSimdReg],
5071 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg),
5072 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)));
5073
5074 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
5075 {
5076 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
5077 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128);
5078 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5079 }
5080
5081 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg))
5082 {
5083 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
5084 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128);
5085 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5086 }
5087
5088 IEMNATIVE_SIMD_REG_STATE_CLR_DIRTY(pReNative, enmGstSimdReg);
5089 return off;
5090}
5091
5092
5093/**
5094 * Flush the given set of guest SIMD registers if marked as dirty.
5095 *
5096 * @returns New code buffer offset.
5097 * @param pReNative The native recompile state.
5098 * @param off Current code buffer position.
5099 * @param fFlushGstSimdReg The guest SIMD register set to flush (default is flush everything).
5100 */
5101DECL_HIDDEN_THROW(uint32_t)
5102iemNativeSimdRegFlushDirtyGuest(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fFlushGstSimdReg /*= UINT64_MAX*/)
5103{
5104 uint64_t bmGstSimdRegShadowDirty = (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5105 & fFlushGstSimdReg;
5106 if (bmGstSimdRegShadowDirty)
5107 {
5108# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5109 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5110 iemNativeDbgInfoAddGuestRegWriteback(pReNative, true /*fSimdReg*/, bmGstSimdRegShadowDirty);
5111# endif
5112
5113 do
5114 {
5115 unsigned const idxGstSimdReg = ASMBitFirstSetU64(bmGstSimdRegShadowDirty) - 1;
5116 bmGstSimdRegShadowDirty &= ~RT_BIT_64(idxGstSimdReg);
5117 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
5118 } while (bmGstSimdRegShadowDirty);
5119 }
5120
5121 return off;
5122}
5123
5124
5125#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
5126/**
5127 * Flush all shadowed guest SIMD registers marked as dirty for the given host SIMD register.
5128 *
5129 * @returns New code buffer offset.
5130 * @param pReNative The native recompile state.
5131 * @param off Current code buffer position.
5132 * @param idxHstSimdReg The host SIMD register.
5133 *
5134 * @note This doesn't do any unshadowing of guest registers from the host register.
5135 */
5136DECL_HIDDEN_THROW(uint32_t) iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const idxHstSimdReg)
5137{
5138 /* We need to flush any pending guest register writes this host register shadows. */
5139 uint64_t bmGstSimdRegShadowDirty = (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5140 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
5141 if (bmGstSimdRegShadowDirty)
5142 {
5143# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5144 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5145 iemNativeDbgInfoAddGuestRegWriteback(pReNative, true /*fSimdReg*/, bmGstSimdRegShadowDirty);
5146# endif
5147
5148 do
5149 {
5150 unsigned const idxGstSimdReg = ASMBitFirstSetU64(bmGstSimdRegShadowDirty) - 1;
5151 bmGstSimdRegShadowDirty &= ~RT_BIT_64(idxGstSimdReg);
5152 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
5153 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg));
5154 } while (bmGstSimdRegShadowDirty);
5155 }
5156
5157 return off;
5158}
5159#endif
5160
5161
5162/**
5163 * Locate a register, possibly freeing one up.
5164 *
5165 * This ASSUMES the caller has done the minimal/optimal allocation checks and
5166 * failed.
5167 *
5168 * @returns Host register number on success. Returns UINT8_MAX if no registers
5169 * found, the caller is supposed to deal with this and raise a
5170 * allocation type specific status code (if desired).
5171 *
5172 * @throws VBox status code if we're run into trouble spilling a variable of
5173 * recording debug info. Does NOT throw anything if we're out of
5174 * registers, though.
5175 */
5176static uint8_t iemNativeSimdRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
5177 uint32_t fRegMask = IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK)
5178{
5179 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFree);
5180 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
5181 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
5182
5183 /*
5184 * Try a freed register that's shadowing a guest register.
5185 */
5186 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & fRegMask;
5187 if (fRegs)
5188 {
5189 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeNoVar);
5190
5191#if 0 /** @todo def IEMNATIVE_WITH_LIVENESS_ANALYSIS */
5192 /*
5193 * When we have livness information, we use it to kick out all shadowed
5194 * guest register that will not be needed any more in this TB. If we're
5195 * lucky, this may prevent us from ending up here again.
5196 *
5197 * Note! We must consider the previous entry here so we don't free
5198 * anything that the current threaded function requires (current
5199 * entry is produced by the next threaded function).
5200 */
5201 uint32_t const idxCurCall = pReNative->idxCurCall;
5202 if (idxCurCall > 0)
5203 {
5204 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
5205 uint64_t const fToFreeMask = IEMLIVENESS_STATE_GET_CAN_BE_FREED_SET(pLivenessEntry);
5206
5207 /* If it matches any shadowed registers. */
5208 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
5209 {
5210 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeLivenessUnshadowed);
5211 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
5212 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
5213
5214 /* See if we've got any unshadowed registers we can return now. */
5215 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
5216 if (fUnshadowedRegs)
5217 {
5218 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeLivenessHelped);
5219 return (fPreferVolatile
5220 ? ASMBitFirstSetU32(fUnshadowedRegs)
5221 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
5222 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
5223 - 1;
5224 }
5225 }
5226 }
5227#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
5228
5229 unsigned const idxReg = (fPreferVolatile
5230 ? ASMBitFirstSetU32(fRegs)
5231 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5232 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs))
5233 - 1;
5234
5235 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows != 0);
5236 Assert( (pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadows)
5237 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
5238 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg));
5239
5240 /* We need to flush any pending guest register writes this host SIMD register shadows. */
5241 *poff = iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(pReNative, *poff, idxReg);
5242
5243 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5244 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
5245 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
5246 pReNative->Core.aHstSimdRegs[idxReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5247 return idxReg;
5248 }
5249
5250 AssertFailed(); /** @todo The following needs testing when it actually gets hit. */
5251
5252 /*
5253 * Try free up a variable that's in a register.
5254 *
5255 * We do two rounds here, first evacuating variables we don't need to be
5256 * saved on the stack, then in the second round move things to the stack.
5257 */
5258 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeVar);
5259 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
5260 {
5261 uint32_t fVars = pReNative->Core.bmVars;
5262 while (fVars)
5263 {
5264 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
5265 if (pReNative->Core.aVars[idxVar].fSimdReg) /* (this is the SIMD allocator) */
5266 {
5267 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
5268 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
5269 && (RT_BIT_32(idxReg) & fRegMask)
5270 && ( iLoop == 0
5271 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
5272 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5273 && !pReNative->Core.aVars[idxVar].fRegAcquired)
5274 {
5275 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxReg));
5276 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows)
5277 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
5278 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstSimdReg_End));
5279 Assert( RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg))
5280 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows));
5281
5282 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5283 {
5284 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
5285 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
5286 }
5287
5288 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
5289 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxReg);
5290
5291 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5292 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
5293 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
5294 return idxReg;
5295 }
5296 }
5297 fVars &= ~RT_BIT_32(idxVar);
5298 }
5299 }
5300
5301 AssertFailed();
5302 return UINT8_MAX;
5303}
5304
5305
5306/**
5307 * Flushes a set of guest register shadow copies.
5308 *
5309 * This is usually done after calling a threaded function or a C-implementation
5310 * of an instruction.
5311 *
5312 * @param pReNative The native recompile state.
5313 * @param fGstSimdRegs Set of guest SIMD registers to flush.
5314 */
5315DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstSimdRegs) RT_NOEXCEPT
5316{
5317 /*
5318 * Reduce the mask by what's currently shadowed
5319 */
5320 uint64_t const bmGstSimdRegShadows = pReNative->Core.bmGstSimdRegShadows;
5321 fGstSimdRegs &= bmGstSimdRegShadows;
5322 if (fGstSimdRegs)
5323 {
5324 uint64_t const bmGstSimdRegShadowsNew = bmGstSimdRegShadows & ~fGstSimdRegs;
5325 Log12(("iemNativeSimdRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstSimdRegs, bmGstSimdRegShadows, bmGstSimdRegShadowsNew));
5326 pReNative->Core.bmGstSimdRegShadows = bmGstSimdRegShadowsNew;
5327 if (bmGstSimdRegShadowsNew)
5328 {
5329 /*
5330 * Partial.
5331 */
5332 do
5333 {
5334 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5335 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5336 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5337 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5338 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5339 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5340
5341 uint64_t const fInThisHstReg = (pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & fGstSimdRegs) | RT_BIT_64(idxGstReg);
5342 fGstSimdRegs &= ~fInThisHstReg;
5343 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
5344 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
5345 if (!fGstRegShadowsNew)
5346 {
5347 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5348 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5349 }
5350 } while (fGstSimdRegs != 0);
5351 }
5352 else
5353 {
5354 /*
5355 * Clear all.
5356 */
5357 do
5358 {
5359 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5360 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5361 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5362 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5363 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5364 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5365
5366 fGstSimdRegs &= ~(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
5367 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
5368 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5369 } while (fGstSimdRegs != 0);
5370 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
5371 }
5372 }
5373}
5374
5375
5376/**
5377 * Allocates a temporary host SIMD register.
5378 *
5379 * This may emit code to save register content onto the stack in order to free
5380 * up a register.
5381 *
5382 * @returns The host register number; throws VBox status code on failure,
5383 * so no need to check the return value.
5384 * @param pReNative The native recompile state.
5385 * @param poff Pointer to the variable with the code buffer position.
5386 * This will be update if we need to move a variable from
5387 * register to stack in order to satisfy the request.
5388 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5389 * registers (@c true, default) or the other way around
5390 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5391 */
5392DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
5393{
5394 /*
5395 * Try find a completely unused register, preferably a call-volatile one.
5396 */
5397 uint8_t idxSimdReg;
5398 uint32_t fRegs = ~pReNative->Core.bmHstRegs
5399 & ~pReNative->Core.bmHstRegsWithGstShadow
5400 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK);
5401 if (fRegs)
5402 {
5403 if (fPreferVolatile)
5404 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5405 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5406 else
5407 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5408 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5409 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5410 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5411
5412 pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5413 Log12(("iemNativeSimdRegAllocTmp: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5414 }
5415 else
5416 {
5417 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile);
5418 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5419 Log12(("iemNativeSimdRegAllocTmp: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5420 }
5421
5422 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5423 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5424}
5425
5426
5427/**
5428 * Alternative version of iemNativeSimdRegAllocTmp that takes mask with acceptable
5429 * registers.
5430 *
5431 * @returns The host register number; throws VBox status code on failure,
5432 * so no need to check the return value.
5433 * @param pReNative The native recompile state.
5434 * @param poff Pointer to the variable with the code buffer position.
5435 * This will be update if we need to move a variable from
5436 * register to stack in order to satisfy the request.
5437 * @param fRegMask Mask of acceptable registers.
5438 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5439 * registers (@c true, default) or the other way around
5440 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5441 */
5442DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
5443 bool fPreferVolatile /*= true*/)
5444{
5445 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
5446 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
5447
5448 /*
5449 * Try find a completely unused register, preferably a call-volatile one.
5450 */
5451 uint8_t idxSimdReg;
5452 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs
5453 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5454 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
5455 & fRegMask;
5456 if (fRegs)
5457 {
5458 if (fPreferVolatile)
5459 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5460 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5461 else
5462 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5463 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5464 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5465 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5466
5467 pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5468 Log12(("iemNativeSimdRegAllocTmpEx: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5469 }
5470 else
5471 {
5472 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
5473 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5474 Log12(("iemNativeSimdRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5475 }
5476
5477 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5478 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5479}
5480
5481
5482/**
5483 * Sets the indiactor for which part of the given SIMD register has valid data loaded.
5484 *
5485 * @param pReNative The native recompile state.
5486 * @param idxHstSimdReg The host SIMD register to update the state for.
5487 * @param enmLoadSz The load size to set.
5488 */
5489DECL_FORCE_INLINE(void) iemNativeSimdRegSetValidLoadFlag(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg,
5490 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
5491{
5492 /* Everything valid already? -> nothing to do. */
5493 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
5494 return;
5495
5496 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid)
5497 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = enmLoadSz;
5498 else if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded != enmLoadSz)
5499 {
5500 Assert( ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128
5501 && enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5502 || ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128
5503 && enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128));
5504 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_256;
5505 }
5506}
5507
5508
5509static uint32_t iemNativeSimdRegAllocLoadVecRegFromVecRegSz(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdRegDst,
5510 uint8_t idxHstSimdRegDst, uint8_t idxHstSimdRegSrc, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSzDst)
5511{
5512 /* Easy case first, either the destination loads the same range as what the source has already loaded or the source has loaded everything. */
5513 if ( pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == enmLoadSzDst
5514 || pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
5515 {
5516#ifdef RT_ARCH_ARM64
5517 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
5518 Assert(!(idxHstSimdRegDst & 0x1)); Assert(!(idxHstSimdRegSrc & 0x1));
5519#endif
5520
5521 if (idxHstSimdRegDst != idxHstSimdRegSrc)
5522 {
5523 switch (enmLoadSzDst)
5524 {
5525 case kIemNativeGstSimdRegLdStSz_256:
5526 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5527 break;
5528 case kIemNativeGstSimdRegLdStSz_Low128:
5529 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5530 break;
5531 case kIemNativeGstSimdRegLdStSz_High128:
5532 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegHighU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5533 break;
5534 default:
5535 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5536 }
5537
5538 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdRegDst, enmLoadSzDst);
5539 }
5540 }
5541 else
5542 {
5543 /* The source doesn't has the part loaded, so load the register from CPUMCTX. */
5544 Assert(enmLoadSzDst == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSzDst == kIemNativeGstSimdRegLdStSz_High128);
5545 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, idxHstSimdRegDst, enmGstSimdRegDst, enmLoadSzDst);
5546 }
5547
5548 return off;
5549}
5550
5551
5552/**
5553 * Allocates a temporary host SIMD register for keeping a guest
5554 * SIMD register value.
5555 *
5556 * Since we may already have a register holding the guest register value,
5557 * code will be emitted to do the loading if that's not the case. Code may also
5558 * be emitted if we have to free up a register to satify the request.
5559 *
5560 * @returns The host register number; throws VBox status code on failure, so no
5561 * need to check the return value.
5562 * @param pReNative The native recompile state.
5563 * @param poff Pointer to the variable with the code buffer
5564 * position. This will be update if we need to move a
5565 * variable from register to stack in order to satisfy
5566 * the request.
5567 * @param enmGstSimdReg The guest SIMD register that will is to be updated.
5568 * @param enmLoadSz Load/store size.
5569 * @param enmIntendedUse How the caller will be using the host register.
5570 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
5571 * register is okay (default). The ASSUMPTION here is
5572 * that the caller has already flushed all volatile
5573 * registers, so this is only applied if we allocate a
5574 * new register.
5575 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
5576 */
5577DECL_HIDDEN_THROW(uint8_t)
5578iemNativeSimdRegAllocTmpForGuestSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTSIMDREG enmGstSimdReg,
5579 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz,
5580 IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
5581 bool fNoVolatileRegs /*= false*/)
5582{
5583 Assert(enmGstSimdReg < kIemNativeGstSimdReg_End);
5584#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) && 0 /** @todo r=aeichner */
5585 AssertMsg( pReNative->idxCurCall == 0
5586 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5587 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5588 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
5589 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5590 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)) ),
5591 ("%s - %u\n", g_aGstSimdShadowInfo[enmGstSimdReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)));
5592#endif
5593#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
5594 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
5595#endif
5596 uint32_t const fRegMask = !fNoVolatileRegs
5597 ? IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK
5598 : IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
5599
5600 /*
5601 * First check if the guest register value is already in a host register.
5602 */
5603 if (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(enmGstSimdReg))
5604 {
5605 uint8_t idxSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
5606 Assert(idxSimdReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
5607 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows & RT_BIT_64(enmGstSimdReg));
5608 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg));
5609
5610 /* It's not supposed to be allocated... */
5611 if (!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxSimdReg)))
5612 {
5613 /*
5614 * If the register will trash the guest shadow copy, try find a
5615 * completely unused register we can use instead. If that fails,
5616 * we need to disassociate the host reg from the guest reg.
5617 */
5618 /** @todo would be nice to know if preserving the register is in any way helpful. */
5619 /* If the purpose is calculations, try duplicate the register value as
5620 we'll be clobbering the shadow. */
5621 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
5622 && ( ~pReNative->Core.bmHstSimdRegs
5623 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5624 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)))
5625 {
5626 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask);
5627
5628 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
5629
5630 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5631 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5632 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5633 idxSimdReg = idxRegNew;
5634 }
5635 /* If the current register matches the restrictions, go ahead and allocate
5636 it for the caller. */
5637 else if (fRegMask & RT_BIT_32(idxSimdReg))
5638 {
5639 pReNative->Core.bmHstSimdRegs |= RT_BIT_32(idxSimdReg);
5640 pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat = kIemNativeWhat_Tmp;
5641 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5642 {
5643 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5644 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxSimdReg, idxSimdReg, enmLoadSz);
5645 else
5646 iemNativeSimdRegSetValidLoadFlag(pReNative, idxSimdReg, enmLoadSz);
5647 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Reusing %s for guest %s %s\n",
5648 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5649 }
5650 else
5651 {
5652 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxSimdReg, *poff);
5653 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Grabbing %s for guest %s - destructive calc\n",
5654 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName));
5655 }
5656 }
5657 /* Otherwise, allocate a register that satisfies the caller and transfer
5658 the shadowing if compatible with the intended use. (This basically
5659 means the call wants a non-volatile register (RSP push/pop scenario).) */
5660 else
5661 {
5662 Assert(fNoVolatileRegs);
5663 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxSimdReg),
5664 !fNoVolatileRegs
5665 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
5666 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
5667 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5668 {
5669 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5670 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Transfering %s to %s for guest %s %s\n",
5671 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_apszIemNativeHstSimdRegNames[idxRegNew],
5672 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5673 }
5674 else
5675 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5676 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5677 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5678 idxSimdReg = idxRegNew;
5679 }
5680 }
5681 else
5682 {
5683 /*
5684 * Oops. Shadowed guest register already allocated!
5685 *
5686 * Allocate a new register, copy the value and, if updating, the
5687 * guest shadow copy assignment to the new register.
5688 */
5689 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5690 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
5691 ("This shouldn't happen: idxSimdReg=%d enmGstSimdReg=%d enmIntendedUse=%s\n",
5692 idxSimdReg, enmGstSimdReg, s_pszIntendedUse[enmIntendedUse]));
5693
5694 /** @todo share register for readonly access. */
5695 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask,
5696 enmIntendedUse == kIemNativeGstRegUse_Calculation);
5697
5698 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5699 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
5700 else
5701 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5702
5703 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5704 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5705 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for %s\n",
5706 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5707 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5708 else
5709 {
5710 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5711 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Moved %s for guest %s into %s for %s\n",
5712 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5713 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5714 }
5715 idxSimdReg = idxRegNew;
5716 }
5717 Assert(RT_BIT_32(idxSimdReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
5718
5719#ifdef VBOX_STRICT
5720 /* Strict builds: Check that the value is correct. */
5721 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5722 *poff = iemNativeEmitGuestSimdRegValueCheck(pReNative, *poff, idxSimdReg, enmGstSimdReg, enmLoadSz);
5723#endif
5724
5725 if ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5726 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
5727 {
5728#if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) && defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5729 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
5730 iemNativeDbgInfoAddGuestRegDirty(pReNative, true /*fSimdReg*/, enmGstSimdReg, idxSimdReg);
5731#endif
5732
5733 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128)
5734 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5735 else if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5736 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5737 else
5738 {
5739 Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_256);
5740 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5741 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5742 }
5743 }
5744
5745 return idxSimdReg;
5746 }
5747
5748 /*
5749 * Allocate a new register, load it with the guest value and designate it as a copy of the
5750 */
5751 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
5752
5753 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5754 *poff = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, *poff, idxRegNew, enmGstSimdReg, enmLoadSz);
5755 else
5756 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5757
5758 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5759 iemNativeSimdRegMarkAsGstSimdRegShadow(pReNative, idxRegNew, enmGstSimdReg, *poff);
5760
5761 if ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5762 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
5763 {
5764#if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) && defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5765 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
5766 iemNativeDbgInfoAddGuestRegDirty(pReNative, true /*fSimdReg*/, enmGstSimdReg, idxRegNew);
5767#endif
5768
5769 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128)
5770 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5771 else if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5772 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5773 else
5774 {
5775 Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_256);
5776 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5777 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5778 }
5779 }
5780
5781 Log12(("iemNativeRegAllocTmpForGuestSimdReg: Allocated %s for guest %s %s\n",
5782 g_apszIemNativeHstSimdRegNames[idxRegNew], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5783
5784 return idxRegNew;
5785}
5786
5787
5788/**
5789 * Flushes guest SIMD register shadow copies held by a set of host registers.
5790 *
5791 * This is used whenever calling an external helper for ensuring that we don't carry on
5792 * with any guest shadows in volatile registers, as these will get corrupted by the caller.
5793 *
5794 * @param pReNative The native recompile state.
5795 * @param fHstSimdRegs Set of host SIMD registers to flush guest shadows for.
5796 */
5797DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstSimdRegs) RT_NOEXCEPT
5798{
5799 /*
5800 * Reduce the mask by what's currently shadowed.
5801 */
5802 uint32_t const bmHstSimdRegsWithGstShadowOld = pReNative->Core.bmHstSimdRegsWithGstShadow;
5803 fHstSimdRegs &= bmHstSimdRegsWithGstShadowOld;
5804 if (fHstSimdRegs)
5805 {
5806 uint32_t const bmHstSimdRegsWithGstShadowNew = bmHstSimdRegsWithGstShadowOld & ~fHstSimdRegs;
5807 Log12(("iemNativeSimdRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
5808 fHstSimdRegs, bmHstSimdRegsWithGstShadowOld, bmHstSimdRegsWithGstShadowNew));
5809 pReNative->Core.bmHstSimdRegsWithGstShadow = bmHstSimdRegsWithGstShadowNew;
5810 if (bmHstSimdRegsWithGstShadowNew)
5811 {
5812 /*
5813 * Partial (likely).
5814 */
5815 uint64_t fGstShadows = 0;
5816 do
5817 {
5818 unsigned const idxHstSimdReg = ASMBitFirstSetU32(fHstSimdRegs) - 1;
5819 Assert(!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg)));
5820 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows)
5821 == pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows);
5822 Assert(!(( pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5823 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5824
5825 fGstShadows |= pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
5826 pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows = 0;
5827 fHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
5828 } while (fHstSimdRegs != 0);
5829 pReNative->Core.bmGstSimdRegShadows &= ~fGstShadows;
5830 }
5831 else
5832 {
5833 /*
5834 * Clear all.
5835 */
5836 do
5837 {
5838 unsigned const idxHstSimdReg = ASMBitFirstSetU32(fHstSimdRegs) - 1;
5839 Assert(!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg)));
5840 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows)
5841 == pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows);
5842 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5843 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5844
5845 pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows = 0;
5846 fHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
5847 } while (fHstSimdRegs != 0);
5848 pReNative->Core.bmGstSimdRegShadows = 0;
5849 }
5850 }
5851}
5852
5853
5854
5855/*********************************************************************************************************************************
5856* Code emitters for flushing pending guest register writes and sanity checks *
5857*********************************************************************************************************************************/
5858
5859#ifdef VBOX_STRICT
5860/**
5861 * Does internal register allocator sanity checks.
5862 */
5863DECLHIDDEN(void) iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
5864{
5865 /*
5866 * Iterate host registers building a guest shadowing set.
5867 */
5868 uint64_t bmGstRegShadows = 0;
5869 uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
5870 AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
5871 while (bmHstRegsWithGstShadow)
5872 {
5873 unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
5874 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
5875 bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5876
5877 uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5878 AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
5879 AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
5880 bmGstRegShadows |= fThisGstRegShadows;
5881 while (fThisGstRegShadows)
5882 {
5883 unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
5884 fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
5885 AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
5886 ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
5887 idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
5888 }
5889 }
5890 AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
5891 ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
5892 bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
5893
5894 /*
5895 * Now the other way around, checking the guest to host index array.
5896 */
5897 bmHstRegsWithGstShadow = 0;
5898 bmGstRegShadows = pReNative->Core.bmGstRegShadows;
5899 Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5900 while (bmGstRegShadows)
5901 {
5902 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
5903 Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5904 bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
5905
5906 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5907 AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
5908 AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
5909 ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
5910 idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
5911 bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
5912 }
5913 AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
5914 ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
5915 bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
5916}
5917#endif /* VBOX_STRICT */
5918
5919
5920/**
5921 * Flushes any delayed guest register writes.
5922 *
5923 * This must be called prior to calling CImpl functions and any helpers that use
5924 * the guest state (like raising exceptions) and such.
5925 *
5926 * @note This function does not flush any shadowing information for guest registers. This needs to be done by
5927 * the caller if it wishes to do so.
5928 */
5929DECL_HIDDEN_THROW(uint32_t)
5930iemNativeRegFlushPendingWritesSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fGstShwExcept, uint64_t fGstSimdShwExcept)
5931{
5932#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5933 if (!(fGstShwExcept & RT_BIT_64(kIemNativeGstReg_Pc)))
5934 off = iemNativeEmitPcWriteback(pReNative, off);
5935#else
5936 RT_NOREF(pReNative, fGstShwExcept);
5937#endif
5938
5939#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
5940 off = iemNativeRegFlushDirtyGuest(pReNative, off, ~fGstShwExcept);
5941#endif
5942
5943 return iemNativeSimdRegFlushDirtyGuest(pReNative, off, ~fGstSimdShwExcept);
5944}
5945
5946#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5947
5948# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
5949
5950/**
5951 * Checks if the value in @a idxPcReg matches IEMCPU::uPcUpdatingDebug.
5952 */
5953DECL_HIDDEN_THROW(uint32_t) iemNativeEmitPcDebugCheckWithReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxPcReg)
5954{
5955 Assert(idxPcReg != IEMNATIVE_REG_FIXED_TMP0);
5956 Assert(pReNative->Core.fDebugPcInitialized);
5957
5958 /* cmp [pVCpu->iem.s.uPcUpdatingDebug], pcreg */
5959# ifdef RT_ARCH_AMD64
5960 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
5961 pCodeBuf[off++] = X86_OP_REX_W | (idxPcReg >= 8 ? X86_OP_REX_R : 0);
5962 pCodeBuf[off++] = 0x3b;
5963 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, idxPcReg & 7, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
5964# else
5965 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
5966 off = iemNativeEmitLoadGprFromVCpuU64Ex(pCodeBuf, off, IEMNATIVE_REG_FIXED_TMP0, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
5967 off = iemNativeEmitCmpGprWithGprEx(pCodeBuf, off, IEMNATIVE_REG_FIXED_TMP0, idxPcReg);
5968# endif
5969
5970 uint32_t offFixup = off;
5971 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off + 1, kIemNativeInstrCond_e);
5972 off = iemNativeEmitBrkEx(pCodeBuf, off, UINT32_C(0x2200));
5973 iemNativeFixupFixedJump(pReNative, offFixup, off);
5974
5975 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5976 return off;
5977}
5978
5979
5980/**
5981 * Checks that the current RIP+offPc matches IEMCPU::uPcUpdatingDebug.
5982 */
5983DECL_HIDDEN_THROW(uint32_t) iemNativeEmitPcDebugCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5984{
5985 if (pReNative->Core.fDebugPcInitialized)
5986 {
5987 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc);
5988 if (pReNative->Core.offPc)
5989 {
5990 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5991 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, RT_ARCH_VAL == RT_ARCH_VAL_AMD64 ? 32 : 8);
5992 off = iemNativeEmitGprEqGprPlusImmEx(pCodeBuf, off, idxTmpReg, idxPcReg, pReNative->Core.offPc);
5993 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5994 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxTmpReg);
5995 iemNativeRegFreeTmp(pReNative, idxTmpReg);
5996 }
5997 else
5998 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
5999 iemNativeRegFreeTmp(pReNative, idxPcReg);
6000 }
6001 return off;
6002}
6003
6004# endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG */
6005
6006/**
6007 * Emits code to update the guest RIP value by adding the current offset since the start of the last RIP update.
6008 */
6009DECL_HIDDEN_THROW(uint32_t) iemNativeEmitPcWritebackSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6010{
6011 Assert(pReNative->Core.offPc);
6012# if !defined(IEMNATIVE_WITH_TB_DEBUG_INFO) && !defined(VBOX_WITH_STATISTICS)
6013 Log4(("iemNativeEmitPcWritebackSlow: offPc=%#RX64 -> 0; off=%#x\n", pReNative->Core.offPc, off));
6014# else
6015 uint8_t const idxOldInstrPlusOne = pReNative->idxInstrPlusOneOfLastPcUpdate;
6016 uint8_t idxCurCall = pReNative->idxCurCall;
6017 uint8_t idxInstr = pReNative->pTbOrg->Thrd.paCalls[idxCurCall].idxInstr; /* unreliable*/
6018 while (idxInstr == 0 && idxInstr + 1 < idxOldInstrPlusOne && idxCurCall > 0)
6019 idxInstr = pReNative->pTbOrg->Thrd.paCalls[--idxCurCall].idxInstr;
6020 pReNative->idxInstrPlusOneOfLastPcUpdate = RT_MAX(idxInstr + 1, idxOldInstrPlusOne);
6021 uint8_t const cInstrsSkipped = idxInstr <= idxOldInstrPlusOne ? 0 : idxInstr - idxOldInstrPlusOne;
6022 Log4(("iemNativeEmitPcWritebackSlow: offPc=%#RX64 -> 0; off=%#x; idxInstr=%u cInstrsSkipped=%u\n",
6023 pReNative->Core.offPc, off, idxInstr, cInstrsSkipped));
6024
6025 STAM_COUNTER_ADD(&pReNative->pVCpu->iem.s.StatNativePcUpdateDelayed, cInstrsSkipped);
6026
6027# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
6028 iemNativeDbgInfoAddNativeOffset(pReNative, off);
6029 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc, cInstrsSkipped);
6030# endif
6031# endif
6032
6033# ifndef IEMNATIVE_REG_FIXED_PC_DBG
6034 /* Allocate a temporary PC register. */
6035 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
6036
6037 /* Perform the addition and store the result. */
6038 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
6039 off = iemNativeEmitStoreGprToGstRegT<kIemNativeGstReg_Pc>(pReNative, off, idxPcReg);
6040# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
6041 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
6042# endif
6043
6044 /* Free but don't flush the PC register. */
6045 iemNativeRegFreeTmp(pReNative, idxPcReg);
6046# else
6047 /* Compare the shadow with the context value, they should match. */
6048 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, pReNative->Core.offPc);
6049 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, kIemNativeGstReg_Pc);
6050# endif
6051
6052 pReNative->Core.offPc = 0;
6053
6054 return off;
6055}
6056
6057#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
6058
6059
6060/*********************************************************************************************************************************
6061* Code Emitters (larger snippets) *
6062*********************************************************************************************************************************/
6063
6064/**
6065 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
6066 * extending to 64-bit width.
6067 *
6068 * @returns New code buffer offset on success, UINT32_MAX on failure.
6069 * @param pReNative .
6070 * @param off The current code buffer position.
6071 * @param idxHstReg The host register to load the guest register value into.
6072 * @param enmGstReg The guest register to load.
6073 *
6074 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
6075 * that is something the caller needs to do if applicable.
6076 */
6077DECL_HIDDEN_THROW(uint32_t)
6078iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
6079{
6080 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
6081 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
6082
6083 switch (g_aGstShadowInfo[enmGstReg].cb)
6084 {
6085 case sizeof(uint64_t):
6086 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6087 case sizeof(uint32_t):
6088 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6089 case sizeof(uint16_t):
6090 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6091#if 0 /* not present in the table. */
6092 case sizeof(uint8_t):
6093 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6094#endif
6095 default:
6096 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
6097 }
6098}
6099
6100
6101/**
6102 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
6103 * extending to 64-bit width, extended version.
6104 *
6105 * @returns New code buffer offset on success, UINT32_MAX on failure.
6106 * @param pCodeBuf The code buffer.
6107 * @param off The current code buffer position.
6108 * @param idxHstReg The host register to load the guest register value into.
6109 * @param enmGstReg The guest register to load.
6110 *
6111 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
6112 * that is something the caller needs to do if applicable.
6113 */
6114DECL_HIDDEN_THROW(uint32_t)
6115iemNativeEmitLoadGprWithGstShadowRegEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
6116{
6117 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
6118 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
6119
6120 switch (g_aGstShadowInfo[enmGstReg].cb)
6121 {
6122 case sizeof(uint64_t):
6123 return iemNativeEmitLoadGprFromVCpuU64Ex(pCodeBuf, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6124 case sizeof(uint32_t):
6125 return iemNativeEmitLoadGprFromVCpuU32Ex(pCodeBuf, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6126 case sizeof(uint16_t):
6127 return iemNativeEmitLoadGprFromVCpuU16Ex(pCodeBuf, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6128#if 0 /* not present in the table. */
6129 case sizeof(uint8_t):
6130 return iemNativeEmitLoadGprFromVCpuU8Ex(pCodeBuf, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
6131#endif
6132 default:
6133#ifdef IEM_WITH_THROW_CATCH
6134 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(NULL, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
6135#else
6136 AssertReleaseFailedReturn(off);
6137#endif
6138 }
6139}
6140
6141
6142/**
6143 * Loads the guest shadow SIMD register @a enmGstSimdReg into host SIMD reg @a idxHstSimdReg.
6144 *
6145 * @returns New code buffer offset on success, UINT32_MAX on failure.
6146 * @param pReNative The recompiler state.
6147 * @param off The current code buffer position.
6148 * @param idxHstSimdReg The host register to load the guest register value into.
6149 * @param enmGstSimdReg The guest register to load.
6150 * @param enmLoadSz The load size of the register.
6151 *
6152 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
6153 * that is something the caller needs to do if applicable.
6154 */
6155DECL_HIDDEN_THROW(uint32_t)
6156iemNativeEmitLoadSimdRegWithGstShadowSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdReg,
6157 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
6158{
6159 Assert((unsigned)enmGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo));
6160
6161 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdReg, enmLoadSz);
6162 switch (enmLoadSz)
6163 {
6164 case kIemNativeGstSimdRegLdStSz_256:
6165 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6166 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
6167 case kIemNativeGstSimdRegLdStSz_Low128:
6168 return iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6169 case kIemNativeGstSimdRegLdStSz_High128:
6170 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
6171 default:
6172 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
6173 }
6174}
6175
6176#ifdef VBOX_STRICT
6177
6178/**
6179 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
6180 *
6181 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6182 * Trashes EFLAGS on AMD64.
6183 */
6184DECL_FORCE_INLINE(uint32_t)
6185iemNativeEmitTop32BitsClearCheckEx(PIEMNATIVEINSTR pCodeBuf, uint32_t off, uint8_t idxReg)
6186{
6187# ifdef RT_ARCH_AMD64
6188 /* rol reg64, 32 */
6189 pCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6190 pCodeBuf[off++] = 0xc1;
6191 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6192 pCodeBuf[off++] = 32;
6193
6194 /* test reg32, ffffffffh */
6195 if (idxReg >= 8)
6196 pCodeBuf[off++] = X86_OP_REX_B;
6197 pCodeBuf[off++] = 0xf7;
6198 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6199 pCodeBuf[off++] = 0xff;
6200 pCodeBuf[off++] = 0xff;
6201 pCodeBuf[off++] = 0xff;
6202 pCodeBuf[off++] = 0xff;
6203
6204 /* je/jz +1 */
6205 pCodeBuf[off++] = 0x74;
6206 pCodeBuf[off++] = 0x01;
6207
6208 /* int3 */
6209 pCodeBuf[off++] = 0xcc;
6210
6211 /* rol reg64, 32 */
6212 pCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6213 pCodeBuf[off++] = 0xc1;
6214 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6215 pCodeBuf[off++] = 32;
6216
6217# elif defined(RT_ARCH_ARM64)
6218 /* lsr tmp0, reg64, #32 */
6219 pCodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
6220 /* cbz tmp0, +1 */
6221 pCodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6222 /* brk #0x1100 */
6223 pCodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
6224
6225# else
6226# error "Port me!"
6227# endif
6228 return off;
6229}
6230
6231
6232/**
6233 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
6234 *
6235 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6236 * Trashes EFLAGS on AMD64.
6237 */
6238DECL_HIDDEN_THROW(uint32_t)
6239iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
6240{
6241# ifdef RT_ARCH_AMD64
6242 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
6243# elif defined(RT_ARCH_ARM64)
6244 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6245# else
6246# error "Port me!"
6247# endif
6248 off = iemNativeEmitTop32BitsClearCheckEx(pCodeBuf, off, idxReg);
6249 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6250 return off;
6251}
6252
6253
6254/**
6255 * Emitting code that checks that the content of register @a idxReg is the same
6256 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
6257 * instruction if that's not the case.
6258 *
6259 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6260 * Trashes EFLAGS on AMD64.
6261 */
6262DECL_HIDDEN_THROW(uint32_t) iemNativeEmitGuestRegValueCheckEx(PIEMRECOMPILERSTATE pReNative, PIEMNATIVEINSTR pCodeBuf,
6263 uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
6264{
6265#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
6266 /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
6267 if (pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg))
6268 return off;
6269#endif
6270
6271# ifdef RT_ARCH_AMD64
6272 /* cmp reg, [mem] */
6273 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
6274 {
6275 if (idxReg >= 8)
6276 pCodeBuf[off++] = X86_OP_REX_R;
6277 pCodeBuf[off++] = 0x38;
6278 }
6279 else
6280 {
6281 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
6282 pCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
6283 else
6284 {
6285 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
6286 pCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6287 else
6288 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
6289 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
6290 if (idxReg >= 8)
6291 pCodeBuf[off++] = X86_OP_REX_R;
6292 }
6293 pCodeBuf[off++] = 0x39;
6294 }
6295 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
6296
6297 /* je/jz +1 */
6298 pCodeBuf[off++] = 0x74;
6299 pCodeBuf[off++] = 0x01;
6300
6301 /* int3 */
6302 pCodeBuf[off++] = 0xcc;
6303
6304 /* For values smaller than the register size, we must check that the rest
6305 of the register is all zeros. */
6306 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
6307 {
6308 /* test reg64, imm32 */
6309 pCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6310 pCodeBuf[off++] = 0xf7;
6311 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6312 pCodeBuf[off++] = 0;
6313 pCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
6314 pCodeBuf[off++] = 0xff;
6315 pCodeBuf[off++] = 0xff;
6316
6317 /* je/jz +1 */
6318 pCodeBuf[off++] = 0x74;
6319 pCodeBuf[off++] = 0x01;
6320
6321 /* int3 */
6322 pCodeBuf[off++] = 0xcc;
6323 }
6324 else if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
6325 iemNativeEmitTop32BitsClearCheckEx(pCodeBuf, off, idxReg);
6326 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6327
6328# elif defined(RT_ARCH_ARM64)
6329 /* mov TMP0, [gstreg] */
6330 off = iemNativeEmitLoadGprWithGstShadowRegEx(pCodeBuf, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
6331
6332 /* sub tmp0, tmp0, idxReg */
6333 pCodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
6334 /* cbz tmp0, +2 */
6335 pCodeBuf[off++] = Armv8A64MkInstrCbz(2, IEMNATIVE_REG_FIXED_TMP0);
6336 /* brk #0x1000+enmGstReg */
6337 pCodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
6338 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6339
6340# else
6341# error "Port me!"
6342# endif
6343 return off;
6344}
6345
6346
6347/**
6348 * Emitting code that checks that the content of register @a idxReg is the same
6349 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
6350 * instruction if that's not the case.
6351 *
6352 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6353 * Trashes EFLAGS on AMD64.
6354 */
6355DECL_HIDDEN_THROW(uint32_t)
6356iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
6357{
6358#ifdef RT_ARCH_AMD64
6359 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6360#elif defined(RT_ARCH_ARM64)
6361 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 5);
6362# else
6363# error "Port me!"
6364# endif
6365 return iemNativeEmitGuestRegValueCheckEx(pReNative, pCodeBuf, off, idxReg, enmGstReg);
6366}
6367
6368# ifdef RT_ARCH_AMD64
6369/**
6370 * Helper for AMD64 to emit code which checks the low 128-bits of the given SIMD register against the given vCPU offset.
6371 */
6372DECL_FORCE_INLINE_THROW(uint32_t)
6373iemNativeEmitGuestSimdRegValueCheckVCpuU128(uint8_t * const pbCodeBuf, uint32_t off, uint8_t idxSimdReg, uint32_t offVCpu)
6374{
6375 /* pcmpeqq vectmp0, [gstreg] (ASSUMES SSE4.1) */
6376 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6377 if (idxSimdReg >= 8)
6378 pbCodeBuf[off++] = X86_OP_REX_R;
6379 pbCodeBuf[off++] = 0x0f;
6380 pbCodeBuf[off++] = 0x38;
6381 pbCodeBuf[off++] = 0x29;
6382 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxSimdReg, offVCpu);
6383
6384 /* pextrq tmp0, vectmp0, #0 (ASSUMES SSE4.1). */
6385 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6386 pbCodeBuf[off++] = X86_OP_REX_W
6387 | (idxSimdReg < 8 ? 0 : X86_OP_REX_R)
6388 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6389 pbCodeBuf[off++] = 0x0f;
6390 pbCodeBuf[off++] = 0x3a;
6391 pbCodeBuf[off++] = 0x16;
6392 pbCodeBuf[off++] = 0xeb;
6393 pbCodeBuf[off++] = 0x00;
6394
6395 /* cmp tmp0, 0xffffffffffffffff. */
6396 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6397 pbCodeBuf[off++] = 0x83;
6398 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
6399 pbCodeBuf[off++] = 0xff;
6400
6401 /* je/jz +1 */
6402 pbCodeBuf[off++] = 0x74;
6403 pbCodeBuf[off++] = 0x01;
6404
6405 /* int3 */
6406 pbCodeBuf[off++] = 0xcc;
6407
6408 /* pextrq tmp0, vectmp0, #1 (ASSUMES SSE4.1). */
6409 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6410 pbCodeBuf[off++] = X86_OP_REX_W
6411 | (idxSimdReg < 8 ? 0 : X86_OP_REX_R)
6412 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6413 pbCodeBuf[off++] = 0x0f;
6414 pbCodeBuf[off++] = 0x3a;
6415 pbCodeBuf[off++] = 0x16;
6416 pbCodeBuf[off++] = 0xeb;
6417 pbCodeBuf[off++] = 0x01;
6418
6419 /* cmp tmp0, 0xffffffffffffffff. */
6420 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6421 pbCodeBuf[off++] = 0x83;
6422 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
6423 pbCodeBuf[off++] = 0xff;
6424
6425 /* je/jz +1 */
6426 pbCodeBuf[off++] = 0x74;
6427 pbCodeBuf[off++] = 0x01;
6428
6429 /* int3 */
6430 pbCodeBuf[off++] = 0xcc;
6431
6432 return off;
6433}
6434# endif /* RT_ARCH_AMD64 */
6435
6436
6437/**
6438 * Emitting code that checks that the content of SIMD register @a idxSimdReg is the same
6439 * as what's in the guest register @a enmGstSimdReg, resulting in a breakpoint
6440 * instruction if that's not the case.
6441 *
6442 * @note May of course trash IEMNATIVE_SIMD_REG_FIXED_TMP0 and IEMNATIVE_REG_FIXED_TMP0.
6443 * Trashes EFLAGS on AMD64.
6444 */
6445DECL_HIDDEN_THROW(uint32_t)
6446iemNativeEmitGuestSimdRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSimdReg,
6447 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
6448{
6449 /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
6450 if ( ( enmLoadSz == kIemNativeGstSimdRegLdStSz_256
6451 && ( IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg)
6452 || IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
6453 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128
6454 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
6455 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_High128
6456 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
6457 return off;
6458
6459# ifdef RT_ARCH_AMD64
6460 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6461 {
6462 /* movdqa vectmp0, idxSimdReg */
6463 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
6464
6465 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 44);
6466
6467 off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6468 g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6469 }
6470
6471 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6472 {
6473 /* Due to the fact that CPUMCTX stores the high 128-bit separately we need to do this all over again for the high part. */
6474 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 50);
6475
6476 /* vextracti128 vectmp0, idxSimdReg, 1 */
6477 pbCodeBuf[off++] = X86_OP_VEX3;
6478 pbCodeBuf[off++] = (idxSimdReg < 8 ? X86_OP_VEX3_BYTE1_R : 0)
6479 | X86_OP_VEX3_BYTE1_X
6480 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? X86_OP_VEX3_BYTE1_B : 0)
6481 | 0x03; /* Opcode map */
6482 pbCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false /*f64BitOpSz*/, true /*f256BitAvx*/, X86_OP_VEX3_BYTE2_P_066H);
6483 pbCodeBuf[off++] = 0x39;
6484 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxSimdReg & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
6485 pbCodeBuf[off++] = 0x01;
6486
6487 off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6488 g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
6489 }
6490
6491# elif defined(RT_ARCH_ARM64)
6492 /* mov vectmp0, [gstreg] */
6493 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, enmGstSimdReg, enmLoadSz);
6494
6495 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6496 {
6497 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
6498 /* eor vectmp0, vectmp0, idxSimdReg */
6499 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
6500 /* uaddlv vectmp0, vectmp0.16B */
6501 pu32CodeBuf[off++] = Armv8A64MkVecInstrUAddLV(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, kArmv8InstrUAddLVSz_16B);
6502 /* umov tmp0, vectmp0.H[0] */
6503 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6504 0 /*idxElem*/, kArmv8InstrUmovInsSz_U16, false /*f64Bit*/);
6505 /* cbz tmp0, +1 */
6506 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6507 /* brk #0x1000+enmGstReg */
6508 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6509 }
6510
6511 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6512 {
6513 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
6514 /* eor vectmp0 + 1, vectmp0 + 1, idxSimdReg */
6515 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, idxSimdReg + 1);
6516 /* uaddlv vectmp0 + 1, (vectmp0 + 1).16B */
6517 pu32CodeBuf[off++] = Armv8A64MkVecInstrUAddLV(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, kArmv8InstrUAddLVSz_16B);
6518 /* umov tmp0, (vectmp0 + 1).H[0] */
6519 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1,
6520 0 /*idxElem*/, kArmv8InstrUmovInsSz_U16, false /*f64Bit*/);
6521 /* cbz tmp0, +1 */
6522 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6523 /* brk #0x1000+enmGstReg */
6524 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6525 }
6526
6527# else
6528# error "Port me!"
6529# endif
6530
6531 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6532 return off;
6533}
6534
6535
6536/**
6537 * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
6538 * important bits.
6539 *
6540 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6541 * Trashes EFLAGS on AMD64.
6542 */
6543DECL_HIDDEN_THROW(uint32_t)
6544iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
6545{
6546 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
6547 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
6548 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
6549 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
6550
6551# ifdef RT_ARCH_AMD64
6552 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6553
6554 /* je/jz +1 */
6555 pbCodeBuf[off++] = 0x74;
6556 pbCodeBuf[off++] = 0x01;
6557
6558 /* int3 */
6559 pbCodeBuf[off++] = 0xcc;
6560
6561# elif defined(RT_ARCH_ARM64)
6562 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6563
6564 /* b.eq +1 */
6565 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
6566 /* brk #0x2000 */
6567 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
6568
6569# else
6570# error "Port me!"
6571# endif
6572 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6573
6574 iemNativeRegFreeTmp(pReNative, idxRegTmp);
6575 return off;
6576}
6577
6578#endif /* VBOX_STRICT */
6579
6580
6581#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
6582/**
6583 * Worker for IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK.
6584 */
6585DECL_HIDDEN_THROW(uint32_t)
6586iemNativeEmitEFlagsSkippingCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fEflNeeded)
6587{
6588 uint32_t const offVCpu = RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags);
6589
6590 fEflNeeded &= X86_EFL_STATUS_BITS;
6591 if (fEflNeeded)
6592 {
6593# ifdef RT_ARCH_AMD64
6594 /* test dword [pVCpu + offVCpu], imm32 */
6595 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 13);
6596 if (fEflNeeded <= 0xff)
6597 {
6598 pCodeBuf[off++] = 0xf6;
6599 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
6600 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
6601 }
6602 else
6603 {
6604 pCodeBuf[off++] = 0xf7;
6605 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
6606 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
6607 pCodeBuf[off++] = RT_BYTE2(fEflNeeded);
6608 pCodeBuf[off++] = RT_BYTE3(fEflNeeded);
6609 pCodeBuf[off++] = RT_BYTE4(fEflNeeded);
6610 }
6611
6612 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off + 3, kIemNativeInstrCond_e);
6613 pCodeBuf[off++] = 0xcc;
6614
6615 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6616
6617# else
6618 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
6619 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, offVCpu);
6620 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxRegTmp, fEflNeeded);
6621# ifdef RT_ARCH_ARM64
6622 off = iemNativeEmitJzToFixed(pReNative, off, off + 2);
6623 off = iemNativeEmitBrk(pReNative, off, 0x7777);
6624# else
6625# error "Port me!"
6626# endif
6627 iemNativeRegFreeTmp(pReNative, idxRegTmp);
6628# endif
6629 }
6630 return off;
6631}
6632#endif /* IEMNATIVE_STRICT_EFLAGS_SKIPPING */
6633
6634
6635/**
6636 * Emits a code for checking the return code of a call and rcPassUp, returning
6637 * from the code if either are non-zero.
6638 */
6639DECL_HIDDEN_THROW(uint32_t)
6640iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
6641{
6642#ifdef RT_ARCH_AMD64
6643 /*
6644 * AMD64: eax = call status code.
6645 */
6646
6647 /* edx = rcPassUp */
6648 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6649# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6650 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
6651# endif
6652
6653 /* edx = eax | rcPassUp */
6654 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6655 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
6656 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
6657 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6658
6659 /* Jump to non-zero status return path. */
6660 off = iemNativeEmitTbExitJnz<kIemNativeLabelType_NonZeroRetOrPassUp>(pReNative, off);
6661
6662 /* done. */
6663
6664#elif RT_ARCH_ARM64
6665 /*
6666 * ARM64: w0 = call status code.
6667 */
6668 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1+3+3 + IEMNATIVE_MAX_POSTPONED_EFLAGS_INSTRUCTIONS);
6669
6670# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6671 AssertCompile(ARMV8_A64_REG_X2 == IEMNATIVE_CALL_ARG2_GREG);
6672 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, ARMV8_A64_REG_X2, idxInstr);
6673# endif
6674 off = iemNativeEmitLoadGprFromVCpuU32Ex(pCodeBuf, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6675
6676 pCodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
6677
6678 off = iemNativeEmitTbExitIfGprIsNotZeroEx<kIemNativeLabelType_NonZeroRetOrPassUp>(pReNative, pCodeBuf, off,
6679 ARMV8_A64_REG_X4, true /*f64Bit*/);
6680
6681#else
6682# error "port me"
6683#endif
6684 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6685 RT_NOREF_PV(idxInstr);
6686 return off;
6687}
6688
6689
6690/**
6691 * Emits a call to a CImpl function or something similar.
6692 */
6693DECL_HIDDEN_THROW(uint32_t)
6694iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush, uintptr_t pfnCImpl,
6695 uint8_t cbInstr, uint8_t cAddParams, uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
6696{
6697 /* Writeback everything. */
6698 off = iemNativeRegFlushPendingWrites(pReNative, off);
6699
6700 /*
6701 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
6702 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
6703 */
6704 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
6705 fGstShwFlush
6706 | RT_BIT_64(kIemNativeGstReg_Pc)
6707 | RT_BIT_64(kIemNativeGstReg_EFlags));
6708 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
6709
6710 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6711
6712 /*
6713 * Load the parameters.
6714 */
6715#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED) && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_ARM64))
6716 /* Special code the hidden VBOXSTRICTRC pointer. */
6717 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6718 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6719 if (cAddParams > 0)
6720 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
6721 if (cAddParams > 1)
6722# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
6723 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam1);
6724# else
6725 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
6726# endif
6727 if (cAddParams > 2)
6728# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 6
6729 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG5_GREG, uParam2);
6730# else
6731 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
6732# endif
6733 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_VBOXSTRICRC); /* rcStrict */
6734
6735#else
6736 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
6737 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6738 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6739 if (cAddParams > 0)
6740 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
6741 if (cAddParams > 1)
6742 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
6743 if (cAddParams > 2)
6744# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
6745 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
6746# else
6747 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
6748# endif
6749#endif
6750
6751 /*
6752 * Make the call.
6753 */
6754 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
6755
6756#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED) && (defined(RT_ARCH_AMD64) || defined(RT_ARCH_ARM64))
6757 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_VBOXSTRICRC); /* rcStrict (see above) */
6758#endif
6759
6760#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
6761 pReNative->Core.fDebugPcInitialized = false;
6762 Log4(("fDebugPcInitialized=false cimpl off=%#x (v2)\n", off));
6763#endif
6764
6765 /*
6766 * Check the status code.
6767 */
6768 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
6769}
6770
6771
6772/**
6773 * Emits a call to a threaded worker function.
6774 */
6775DECL_HIDDEN_THROW(uint32_t)
6776iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6777{
6778 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, X86_EFL_STATUS_BITS);
6779 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
6780
6781 /* We don't know what the threaded function is doing so we must flush all pending writes. */
6782 off = iemNativeRegFlushPendingWrites(pReNative, off);
6783
6784 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
6785 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6786
6787#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6788 /* The threaded function may throw / long jmp, so set current instruction
6789 number if we're counting. */
6790 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6791#endif
6792
6793 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
6794
6795#ifdef RT_ARCH_AMD64
6796 /* Load the parameters and emit the call. */
6797# ifdef RT_OS_WINDOWS
6798# ifndef VBOXSTRICTRC_STRICT_ENABLED
6799 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6800 if (cParams > 0)
6801 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
6802 if (cParams > 1)
6803 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
6804 if (cParams > 2)
6805 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
6806# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
6807 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
6808 if (cParams > 0)
6809 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
6810 if (cParams > 1)
6811 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
6812 if (cParams > 2)
6813 {
6814 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
6815 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
6816 }
6817 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_VBOXSTRICRC); /* rcStrict */
6818# endif /* VBOXSTRICTRC_STRICT_ENABLED */
6819# else
6820 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6821 if (cParams > 0)
6822 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
6823 if (cParams > 1)
6824 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
6825 if (cParams > 2)
6826 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
6827# endif
6828
6829 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6830
6831# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6832 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_VBOXSTRICRC); /* rcStrict (see above) */
6833# endif
6834
6835#elif RT_ARCH_ARM64
6836 /*
6837 * ARM64:
6838 */
6839# if !defined(RT_OS_WINDOWS) || !defined(VBOXSTRICTRC_STRICT_ENABLED)
6840 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6841 if (cParams > 0)
6842 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
6843 if (cParams > 1)
6844 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
6845 if (cParams > 2)
6846 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
6847# else
6848 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6849 if (cParams > 0)
6850 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[0]);
6851 if (cParams > 1)
6852 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[1]);
6853 if (cParams > 2)
6854 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, pCallEntry->auParams[2]);
6855 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_VBOXSTRICRC); /* rcStrict */
6856# endif
6857
6858 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6859
6860# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6861 off = iemNativeEmitLoadGprByBpU32(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_FP_OFF_VBOXSTRICRC); /* rcStrict (see above) */
6862# endif
6863
6864#else
6865# error "port me"
6866#endif
6867
6868#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
6869 pReNative->Core.fDebugPcInitialized = false;
6870 Log4(("fDebugPcInitialized=false todo off=%#x (v2)\n", off));
6871#endif
6872
6873 /*
6874 * Check the status code.
6875 */
6876 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
6877
6878 return off;
6879}
6880
6881
6882/**
6883 * The default liveness function, matching iemNativeEmitThreadedCall.
6884 */
6885IEM_DECL_IEMNATIVELIVENESSFUNC_DEF(iemNativeLivenessFunc_ThreadedCall)
6886{
6887 IEM_LIVENESS_RAW_INIT_WITH_CALL(pOutgoing, pIncoming);
6888 RT_NOREF(pCallEntry);
6889}
6890
6891#ifdef VBOX_WITH_STATISTICS
6892
6893/**
6894 * Emits code to update the thread call statistics.
6895 */
6896DECL_INLINE_THROW(uint32_t)
6897iemNativeEmitThreadCallStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6898{
6899 /*
6900 * Update threaded function stats.
6901 */
6902 uint32_t const offVCpu = RT_UOFFSETOF_DYN(VMCPUCC, iem.s.acThreadedFuncStats[pCallEntry->enmFunction]);
6903 AssertCompile(sizeof(pReNative->pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction]) == sizeof(uint32_t));
6904# if defined(RT_ARCH_ARM64)
6905 uint8_t const idxTmp1 = iemNativeRegAllocTmp(pReNative, &off);
6906 uint8_t const idxTmp2 = iemNativeRegAllocTmp(pReNative, &off);
6907 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, idxTmp1, idxTmp2, offVCpu);
6908 iemNativeRegFreeTmp(pReNative, idxTmp1);
6909 iemNativeRegFreeTmp(pReNative, idxTmp2);
6910# else
6911 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, UINT8_MAX, UINT8_MAX, offVCpu);
6912# endif
6913 return off;
6914}
6915
6916
6917/**
6918 * Emits code to update the TB exit reason statistics.
6919 */
6920DECL_INLINE_THROW(uint32_t)
6921iemNativeEmitNativeTbExitStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t const offVCpu)
6922{
6923 uint8_t const idxStatsTmp1 = iemNativeRegAllocTmp(pReNative, &off);
6924 uint8_t const idxStatsTmp2 = iemNativeRegAllocTmp(pReNative, &off);
6925 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, idxStatsTmp1, idxStatsTmp2, offVCpu);
6926 iemNativeRegFreeTmp(pReNative, idxStatsTmp1);
6927 iemNativeRegFreeTmp(pReNative, idxStatsTmp2);
6928
6929 return off;
6930}
6931
6932#endif /* VBOX_WITH_STATISTICS */
6933
6934/**
6935 * Worker for iemNativeEmitViaLookupDoOne and iemNativeRecompileAttachExecMemChunkCtx.
6936 */
6937static uint32_t
6938iemNativeEmitCoreViaLookupDoOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offReturnBreak, uintptr_t pfnHelper)
6939{
6940 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6941 off = iemNativeEmitCallImm(pReNative, off, pfnHelper);
6942
6943 /* Jump to ReturnBreak if the return register is NULL. */
6944 off = iemNativeEmitTestIfGprIsZeroAndJmpToFixed(pReNative, off, IEMNATIVE_CALL_RET_GREG,
6945 true /*f64Bit*/, offReturnBreak);
6946
6947 /* Okay, continue executing the next TB. */
6948 off = iemNativeEmitJmpViaGpr(pReNative, off, IEMNATIVE_CALL_RET_GREG);
6949 return off;
6950}
6951
6952
6953/**
6954 * Emits the code at the ReturnWithFlags label (returns VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
6955 */
6956static uint32_t iemNativeEmitCoreReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6957{
6958 /* set the return status */
6959 return iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
6960}
6961
6962
6963/**
6964 * Emits the code at the ReturnBreakFF label (returns VINF_IEM_REEXEC_BREAK_FF).
6965 */
6966static uint32_t iemNativeEmitCoreReturnBreakFF(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6967{
6968 /* set the return status */
6969 return iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK_FF);
6970}
6971
6972
6973/**
6974 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
6975 */
6976static uint32_t iemNativeEmitCoreReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6977{
6978 /* set the return status */
6979 return iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
6980}
6981
6982
6983/**
6984 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
6985 */
6986static uint32_t iemNativeEmitCoreRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6987{
6988 /*
6989 * Generate the rc + rcPassUp fiddling code.
6990 */
6991 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
6992#ifdef RT_ARCH_AMD64
6993# ifdef RT_OS_WINDOWS
6994# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6995 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
6996# endif
6997 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6998 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
6999# else
7000 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
7001 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
7002# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7003 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
7004# endif
7005# endif
7006# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
7007 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
7008# endif
7009
7010#else
7011 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
7012 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
7013 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
7014#endif
7015
7016 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
7017 return off;
7018}
7019
7020
7021/**
7022 * Emits a standard epilog.
7023 */
7024static uint32_t iemNativeEmitCoreEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
7025{
7026 pReNative->Core.bmHstRegs |= RT_BIT_32(IEMNATIVE_CALL_RET_GREG); /* HACK: For IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK (return register is already set to status code). */
7027
7028 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, X86_EFL_STATUS_BITS);
7029 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
7030
7031 /* HACK: For IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK (return register is already set to status code). */
7032 pReNative->Core.bmHstRegs &= ~RT_BIT_32(IEMNATIVE_CALL_RET_GREG);
7033
7034 /*
7035 * Restore registers and return.
7036 */
7037#ifdef RT_ARCH_AMD64
7038 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
7039
7040 /* Reposition esp at the r15 restore point. */
7041 pbCodeBuf[off++] = X86_OP_REX_W;
7042 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
7043 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
7044 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
7045
7046 /* Pop non-volatile registers and return */
7047 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
7048 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
7049 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
7050 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
7051 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
7052 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
7053 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
7054 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
7055# ifdef RT_OS_WINDOWS
7056 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
7057 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
7058# endif
7059 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
7060 pbCodeBuf[off++] = 0xc9; /* leave */
7061 pbCodeBuf[off++] = 0xc3; /* ret */
7062 pbCodeBuf[off++] = 0xcc; /* int3 poison */
7063
7064#elif RT_ARCH_ARM64
7065 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
7066
7067 /* ldp x19, x20, [sp #(IEMNATIVE_FRAME_VAR_SIZE+IEMNATIVE_FRAME_ALIGN_SIZE)]! ; Unallocate the variable space and restore x19+x20. */
7068 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_ALIGN_SIZE < 64*8);
7069 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
7070 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
7071 (IEMNATIVE_FRAME_VAR_SIZE + IEMNATIVE_FRAME_ALIGN_SIZE) / 8);
7072 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
7073 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7074 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
7075 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7076 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
7077 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7078 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
7079 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7080 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
7081 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
7082 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
7083 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
7084
7085 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
7086 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
7087 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
7088 IEMNATIVE_FRAME_SAVE_REG_SIZE);
7089
7090 /* retab / ret */
7091# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
7092 if (1)
7093 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
7094 else
7095# endif
7096 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
7097
7098#else
7099# error "port me"
7100#endif
7101 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7102
7103 /* HACK: For IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK. */
7104 pReNative->Core.bmHstRegs &= ~RT_BIT_32(IEMNATIVE_CALL_RET_GREG);
7105
7106 return off;
7107}
7108
7109
7110
7111/*********************************************************************************************************************************
7112* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
7113*********************************************************************************************************************************/
7114
7115/**
7116 * Internal work that allocates a variable with kind set to
7117 * kIemNativeVarKind_Invalid and no current stack allocation.
7118 *
7119 * The kind will either be set by the caller or later when the variable is first
7120 * assigned a value.
7121 *
7122 * @returns Unpacked index.
7123 * @internal
7124 */
7125DECL_INLINE_THROW(uint8_t) iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7126{
7127 Assert(cbType > 0 && cbType <= 64);
7128 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
7129 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
7130
7131 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
7132
7133 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[idxVar]; /* VS 2019 gets a bit weird on us otherwise. */
7134#if 0
7135 pVar->cbVar = cbType;
7136 pVar->enmKind = kIemNativeVarKind_Invalid;
7137 pVar->fRegAcquired = false;
7138 pVar->fSimdReg = false;
7139 pVar->idxReg = UINT8_MAX;
7140 pVar->uArgNo = UINT8_MAX;
7141 pVar->idxStackSlot = UINT8_MAX;
7142 pVar->idxReferrerVar = UINT8_MAX;
7143 pVar->u.uValue = 0;
7144#else
7145 /* Neither clang 15 nor VC++ 2019 is able to generate this from the above. */
7146 AssertCompileMemberOffset(IEMNATIVEVAR, cbVar, 1);
7147 AssertCompile((int)kIemNativeVarKind_Invalid == 0);
7148 pVar->u32Init0 = (uint32_t)cbType << 8;
7149 pVar->u32Init1 = UINT32_MAX;
7150 pVar->u.uValue = 0;
7151#endif
7152 return idxVar;
7153}
7154
7155
7156/**
7157 * Internal work that allocates an argument variable w/o setting enmKind.
7158 *
7159 * @returns Unpacked index.
7160 * @internal
7161 */
7162static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7163{
7164 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
7165 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7166 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
7167
7168 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
7169 pReNative->Core.aidxArgVars[iArgNo] = idxVar; /* (unpacked) */
7170 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
7171 return idxVar;
7172}
7173
7174
7175/**
7176 * Gets the stack slot for a stack variable, allocating one if necessary.
7177 *
7178 * Calling this function implies that the stack slot will contain a valid
7179 * variable value. The caller deals with any register currently assigned to the
7180 * variable, typically by spilling it into the stack slot.
7181 *
7182 * @returns The stack slot number.
7183 * @param pReNative The recompiler state.
7184 * @param idxVar The variable.
7185 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS
7186 */
7187DECL_HIDDEN_THROW(uint8_t) iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7188{
7189 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7190 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7191 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
7192
7193 /* Already got a slot? */
7194 uint8_t const idxStackSlot = pVar->idxStackSlot;
7195 if (idxStackSlot != UINT8_MAX)
7196 {
7197 Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
7198 return idxStackSlot;
7199 }
7200
7201 /*
7202 * A single slot is easy to allocate.
7203 * Allocate them from the top end, closest to BP, to reduce the displacement.
7204 */
7205 if (pVar->cbVar <= sizeof(uint64_t))
7206 {
7207 unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7208 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7209 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
7210 pVar->idxStackSlot = (uint8_t)iSlot;
7211 Log11(("iemNativeVarGetStackSlot: idxVar=%#x iSlot=%#x\n", idxVar, iSlot));
7212 return (uint8_t)iSlot;
7213 }
7214
7215 /*
7216 * We need more than one stack slot.
7217 *
7218 * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;
7219 */
7220 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
7221 Assert(pVar->cbVar <= 64);
7222 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pVar->cbVar) - 4) - 1;
7223 uint32_t fBitAllocMask = RT_BIT_32((pVar->cbVar + 7) >> 3) - 1;
7224 uint32_t bmStack = pReNative->Core.bmStack;
7225 while (bmStack != UINT32_MAX)
7226 {
7227 unsigned iSlot = ASMBitLastSetU32(~bmStack);
7228 AssertStmt(iSlot, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7229 iSlot = (iSlot - 1) & ~fBitAlignMask;
7230 if ((bmStack & ~(fBitAllocMask << iSlot)) == bmStack)
7231 {
7232 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
7233 pVar->idxStackSlot = (uint8_t)iSlot;
7234 Log11(("iemNativeVarGetStackSlot: idxVar=%#x iSlot=%#x/%#x (cbVar=%#x)\n",
7235 idxVar, iSlot, fBitAllocMask, pVar->cbVar));
7236 return (uint8_t)iSlot;
7237 }
7238
7239 bmStack |= (fBitAllocMask << iSlot);
7240 }
7241 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7242}
7243
7244
7245/**
7246 * Changes the variable to a stack variable.
7247 *
7248 * Currently this is s only possible to do the first time the variable is used,
7249 * switching later is can be implemented but not done.
7250 *
7251 * @param pReNative The recompiler state.
7252 * @param idxVar The variable.
7253 * @throws VERR_IEM_VAR_IPE_2
7254 */
7255DECL_HIDDEN_THROW(void) iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7256{
7257 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7258 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7259 if (pVar->enmKind != kIemNativeVarKind_Stack)
7260 {
7261 /* We could in theory transition from immediate to stack as well, but it
7262 would involve the caller doing work storing the value on the stack. So,
7263 till that's required we only allow transition from invalid. */
7264 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7265 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7266 pVar->enmKind = kIemNativeVarKind_Stack;
7267
7268 /* Note! We don't allocate a stack slot here, that's only done when a
7269 slot is actually needed to hold a variable value. */
7270 }
7271}
7272
7273
7274/**
7275 * Sets it to a variable with a constant value.
7276 *
7277 * This does not require stack storage as we know the value and can always
7278 * reload it, unless of course it's referenced.
7279 *
7280 * @param pReNative The recompiler state.
7281 * @param idxVar The variable.
7282 * @param uValue The immediate value.
7283 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7284 */
7285DECL_HIDDEN_THROW(void) iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
7286{
7287 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7288 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7289 if (pVar->enmKind != kIemNativeVarKind_Immediate)
7290 {
7291 /* Only simple transitions for now. */
7292 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7293 pVar->enmKind = kIemNativeVarKind_Immediate;
7294 }
7295 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7296
7297 pVar->u.uValue = uValue;
7298 AssertMsg( pVar->cbVar >= sizeof(uint64_t)
7299 || pVar->u.uValue < RT_BIT_64(pVar->cbVar * 8),
7300 ("idxVar=%d cbVar=%u uValue=%#RX64\n", idxVar, pVar->cbVar, uValue));
7301}
7302
7303
7304/**
7305 * Sets the variable to a reference (pointer) to @a idxOtherVar.
7306 *
7307 * This does not require stack storage as we know the value and can always
7308 * reload it. Loading is postponed till needed.
7309 *
7310 * @param pReNative The recompiler state.
7311 * @param idxVar The variable. Unpacked.
7312 * @param idxOtherVar The variable to take the (stack) address of. Unpacked.
7313 *
7314 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7315 * @internal
7316 */
7317static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
7318{
7319 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
7320 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
7321
7322 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
7323 {
7324 /* Only simple transitions for now. */
7325 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
7326 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7327 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
7328 }
7329 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7330
7331 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar; /* unpacked */
7332
7333 /* Update the other variable, ensure it's a stack variable. */
7334 /** @todo handle variables with const values... that'll go boom now. */
7335 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
7336 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
7337}
7338
7339
7340/**
7341 * Sets the variable to a reference (pointer) to a guest register reference.
7342 *
7343 * This does not require stack storage as we know the value and can always
7344 * reload it. Loading is postponed till needed.
7345 *
7346 * @param pReNative The recompiler state.
7347 * @param idxVar The variable.
7348 * @param enmRegClass The class guest registers to reference.
7349 * @param idxReg The register within @a enmRegClass to reference.
7350 *
7351 * @throws VERR_IEM_VAR_IPE_2
7352 */
7353DECL_HIDDEN_THROW(void) iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
7354 IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
7355{
7356 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7357 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7358
7359 if (pVar->enmKind != kIemNativeVarKind_GstRegRef)
7360 {
7361 /* Only simple transitions for now. */
7362 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7363 pVar->enmKind = kIemNativeVarKind_GstRegRef;
7364 }
7365 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7366
7367 pVar->u.GstRegRef.enmClass = enmRegClass;
7368 pVar->u.GstRegRef.idx = idxReg;
7369}
7370
7371
7372DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7373{
7374 return IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
7375}
7376
7377
7378DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
7379{
7380 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
7381
7382 /* Since we're using a generic uint64_t value type, we must truncate it if
7383 the variable is smaller otherwise we may end up with too large value when
7384 scaling up a imm8 w/ sign-extension.
7385
7386 This caused trouble with a "add bx, 0xffff" instruction (around f000:ac60
7387 in the bios, bx=1) when running on arm, because clang expect 16-bit
7388 register parameters to have bits 16 and up set to zero. Instead of
7389 setting x1 = 0xffff we ended up with x1 = 0xffffffffffffff and the wrong
7390 CF value in the result. */
7391 switch (cbType)
7392 {
7393 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7394 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7395 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7396 }
7397 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7398 return idxVar;
7399}
7400
7401
7402DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
7403{
7404 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxOtherVar);
7405 idxOtherVar = IEMNATIVE_VAR_IDX_UNPACK(idxOtherVar);
7406 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
7407 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
7408 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
7409 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7410
7411 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
7412 iemNativeVarSetKindToLocalRef(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxArgVar), idxOtherVar);
7413 return idxArgVar;
7414}
7415
7416
7417DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7418{
7419 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7420 /* Don't set to stack now, leave that to the first use as for instance
7421 IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
7422 return idxVar;
7423}
7424
7425
7426DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
7427{
7428 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7429
7430 /* Since we're using a generic uint64_t value type, we must truncate it if
7431 the variable is smaller otherwise we may end up with too large value when
7432 scaling up a imm8 w/ sign-extension. */
7433 switch (cbType)
7434 {
7435 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7436 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7437 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7438 }
7439 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7440 return idxVar;
7441}
7442
7443
7444DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocAssign(PIEMRECOMPILERSTATE pReNative, uint32_t *poff,
7445 uint8_t cbType, uint8_t idxVarOther)
7446{
7447 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7448 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
7449
7450 uint8_t const idxVarOtherReg = iemNativeVarRegisterAcquireInited(pReNative, idxVarOther, poff);
7451 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, poff);
7452
7453/** @todo combine MOV and AND using MOVZX/similar. */
7454 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxVarReg, idxVarOtherReg);
7455
7456 /* Truncate the value to this variables size. */
7457 switch (cbType)
7458 {
7459 case sizeof(uint8_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xff)); break;
7460 case sizeof(uint16_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xffff)); break;
7461 case sizeof(uint32_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xffffffff)); break;
7462 }
7463
7464 iemNativeVarRegisterRelease(pReNative, idxVarOther);
7465 iemNativeVarRegisterRelease(pReNative, idxVar);
7466 return idxVar;
7467}
7468
7469
7470/**
7471 * Makes sure variable @a idxVar has a register assigned to it and that it stays
7472 * fixed till we call iemNativeVarRegisterRelease.
7473 *
7474 * @returns The host register number.
7475 * @param pReNative The recompiler state.
7476 * @param idxVar The variable.
7477 * @param poff Pointer to the instruction buffer offset.
7478 * In case a register needs to be freed up or the value
7479 * loaded off the stack.
7480 * @param idxRegPref Preferred register number or UINT8_MAX.
7481 *
7482 * @tparam a_fInitialized Set if the variable must already have been
7483 * initialized. Will throw VERR_IEM_VAR_NOT_INITIALIZED
7484 * if this is not the case.
7485 * @tparam a_fWithRegPref If idxRegPref is valid.
7486 *
7487 * @note Must not modify the host status flags!
7488 */
7489template<bool const a_fInitialized, bool const a_fWithRegPref>
7490DECL_FORCE_INLINE_THROW(uint8_t)
7491iemNativeVarRegisterAcquireSlowInt(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff, uint8_t idxRegPref)
7492{
7493 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7494 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7495 Assert(pVar->cbVar <= 8);
7496 Assert(!pVar->fRegAcquired);
7497 Assert(!a_fWithRegPref || idxRegPref < RT_ELEMENTS(pReNative->Core.aHstRegs));
7498
7499 /* This slow code path only handles the case where no register have been
7500 allocated for the variable yet. */
7501 Assert(pVar->idxReg == UINT8_MAX);
7502
7503 /*
7504 * If the kind of variable has not yet been set, default to 'stack'.
7505 */
7506 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
7507 && pVar->enmKind < kIemNativeVarKind_End);
7508 if (pVar->enmKind == kIemNativeVarKind_Invalid)
7509 iemNativeVarSetKindToStack(pReNative, idxVar);
7510
7511 /*
7512 * We have to allocate a register for the variable, even if its a stack one
7513 * as we don't know if there are modification being made to it before its
7514 * finalized (todo: analyze and insert hints about that?).
7515 *
7516 * If we can, we try get the correct register for argument variables. This
7517 * is assuming that most argument variables are fetched as close as possible
7518 * to the actual call, so that there aren't any interfering hidden calls
7519 * (memory accesses, etc) inbetween.
7520 *
7521 * If we cannot or it's a variable, we make sure no argument registers
7522 * that will be used by this MC block will be allocated here, and we always
7523 * prefer non-volatile registers to avoid needing to spill stuff for internal
7524 * call.
7525 */
7526 /** @todo Detect too early argument value fetches and warn about hidden
7527 * calls causing less optimal code to be generated in the python script. */
7528
7529 uint8_t idxReg;
7530 uint8_t const uArgNo = pVar->uArgNo;
7531 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
7532 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
7533 {
7534 idxReg = g_aidxIemNativeCallRegs[uArgNo];
7535
7536#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
7537 /* Writeback any dirty shadow registers we are about to unshadow. */
7538 *poff = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, *poff, idxReg);
7539#endif
7540
7541 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7542 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));
7543 }
7544 else if ( !a_fWithRegPref
7545 || (pReNative->Core.bmHstRegs & RT_BIT_32(idxRegPref)))
7546 {
7547 /** @todo there must be a better way for this and boot cArgsX? */
7548 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgsX, IEMNATIVE_CALL_ARG_GREG_COUNT)];
7549 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
7550 & ~pReNative->Core.bmHstRegsWithGstShadow
7551 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
7552 & fNotArgsMask;
7553 if (fRegs)
7554 {
7555 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
7556 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
7557 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
7558 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
7559 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
7560 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7561 }
7562 else
7563 {
7564 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
7565 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
7566 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
7567 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7568 }
7569 }
7570 else
7571 {
7572 idxReg = idxRegPref;
7573 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7574 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
7575 }
7576 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7577 pVar->idxReg = idxReg;
7578 pVar->fSimdReg = false;
7579
7580 /*
7581 * Load it off the stack if we've got a stack slot.
7582 */
7583 uint8_t const idxStackSlot = pVar->idxStackSlot;
7584 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7585 {
7586 Assert(a_fInitialized);
7587 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7588 switch (pVar->cbVar)
7589 {
7590 case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;
7591 case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;
7592 case 3: AssertFailed(); RT_FALL_THRU();
7593 case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;
7594 default: AssertFailed(); RT_FALL_THRU();
7595 case 8: *poff = iemNativeEmitLoadGprByBp( pReNative, *poff, idxReg, offDispBp); break;
7596 }
7597 }
7598 else
7599 {
7600 Assert(idxStackSlot == UINT8_MAX);
7601 if (pVar->enmKind != kIemNativeVarKind_Immediate)
7602 AssertStmt(!a_fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7603 else
7604 {
7605 /*
7606 * Convert from immediate to stack/register. This is currently only
7607 * required by IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR, IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR
7608 * and IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR in connection with BT, BTS, BTR, and BTC.
7609 */
7610 AssertStmt(a_fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7611 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u uValue=%RX64 converting from immediate to stack\n",
7612 idxVar, idxReg, pVar->u.uValue));
7613 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
7614 pVar->enmKind = kIemNativeVarKind_Stack;
7615 }
7616 }
7617
7618 pVar->fRegAcquired = true;
7619 return idxReg;
7620}
7621
7622
7623/** See iemNativeVarRegisterAcquireSlowInt for details. */
7624DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquireSlow(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff)
7625{
7626 /* very likely */
7627 //STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.aStatAdHoc[(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)) + 0]);
7628 return iemNativeVarRegisterAcquireSlowInt<false, false>(pReNative, idxVar, poff, UINT8_MAX);
7629}
7630
7631
7632/** See iemNativeVarRegisterAcquireSlowInt for details. */
7633DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquireInitedSlow(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff)
7634{
7635 /* even more likely */
7636 //STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.aStatAdHoc[(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)) + 2]);
7637 return iemNativeVarRegisterAcquireSlowInt<true, false>(pReNative, idxVar, poff, UINT8_MAX);
7638}
7639
7640
7641/** See iemNativeVarRegisterAcquireSlowInt for details. */
7642DECL_HIDDEN_THROW(uint8_t)
7643iemNativeVarRegisterAcquireWithPrefSlow(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff, uint8_t idxRegPref)
7644{
7645 /* unused */
7646 //STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.aStatAdHoc[(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)) + 4]);
7647 return iemNativeVarRegisterAcquireSlowInt<false, true>(pReNative, idxVar, poff, idxRegPref);
7648}
7649
7650
7651/** See iemNativeVarRegisterAcquireSlowInt for details. */
7652DECL_HIDDEN_THROW(uint8_t)
7653iemNativeVarRegisterAcquireInitedWithPrefSlow(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff, uint8_t idxRegPref)
7654{
7655 /* very very likely */
7656 //STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.aStatAdHoc[(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)) + 6]);
7657 return iemNativeVarRegisterAcquireSlowInt<true, true>(pReNative, idxVar, poff, idxRegPref);
7658}
7659
7660
7661/**
7662 * Makes sure variable @a idxVar has a SIMD register assigned to it and that it stays
7663 * fixed till we call iemNativeVarRegisterRelease.
7664 *
7665 * @returns The host register number.
7666 * @param pReNative The recompiler state.
7667 * @param idxVar The variable.
7668 * @param poff Pointer to the instruction buffer offset.
7669 * In case a register needs to be freed up or the value
7670 * loaded off the stack.
7671 * @param fInitialized Set if the variable must already have been initialized.
7672 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
7673 * the case.
7674 * @param idxRegPref Preferred SIMD register number or UINT8_MAX.
7675 */
7676/** @todo Create variants for the last two params like we've done for the
7677 * GPR variant? */
7678DECL_HIDDEN_THROW(uint8_t) iemNativeVarSimdRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
7679 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
7680{
7681 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7682 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7683 Assert( pVar->cbVar == sizeof(RTUINT128U)
7684 || pVar->cbVar == sizeof(RTUINT256U));
7685 Assert(!pVar->fRegAcquired);
7686
7687/** @todo inline this bit? */
7688 uint8_t idxReg = pVar->idxReg;
7689 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs))
7690 {
7691 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
7692 && pVar->enmKind < kIemNativeVarKind_End);
7693 pVar->fRegAcquired = true;
7694 return idxReg;
7695 }
7696
7697 /*
7698 * If the kind of variable has not yet been set, default to 'stack'.
7699 */
7700 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
7701 && pVar->enmKind < kIemNativeVarKind_End);
7702 if (pVar->enmKind == kIemNativeVarKind_Invalid)
7703 iemNativeVarSetKindToStack(pReNative, idxVar);
7704
7705 /*
7706 * We have to allocate a register for the variable, even if its a stack one
7707 * as we don't know if there are modification being made to it before its
7708 * finalized (todo: analyze and insert hints about that?).
7709 *
7710 * If we can, we try get the correct register for argument variables. This
7711 * is assuming that most argument variables are fetched as close as possible
7712 * to the actual call, so that there aren't any interfering hidden calls
7713 * (memory accesses, etc) inbetween.
7714 *
7715 * If we cannot or it's a variable, we make sure no argument registers
7716 * that will be used by this MC block will be allocated here, and we always
7717 * prefer non-volatile registers to avoid needing to spill stuff for internal
7718 * call.
7719 */
7720 /** @todo Detect too early argument value fetches and warn about hidden
7721 * calls causing less optimal code to be generated in the python script. */
7722
7723 uint8_t const uArgNo = pVar->uArgNo;
7724 Assert(uArgNo == UINT8_MAX); RT_NOREF(uArgNo); /* No SIMD registers as arguments for now. */
7725
7726 /* SIMD is bit simpler for now because there is no support for arguments. */
7727 if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
7728 || (pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxRegPref)))
7729 {
7730 uint32_t const fNotArgsMask = UINT32_MAX; //~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
7731 uint32_t const fRegs = ~pReNative->Core.bmHstSimdRegs
7732 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
7733 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
7734 & fNotArgsMask;
7735 if (fRegs)
7736 {
7737 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
7738 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
7739 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows == 0);
7740 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg)));
7741 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7742 }
7743 else
7744 {
7745 idxReg = iemNativeSimdRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
7746 IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & fNotArgsMask);
7747 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
7748 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7749 }
7750 }
7751 else
7752 {
7753 idxReg = idxRegPref;
7754 AssertReleaseFailed(); //iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7755 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
7756 }
7757 iemNativeSimdRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7758 pVar->idxReg = idxReg;
7759 pVar->fSimdReg = true;
7760
7761 /*
7762 * Load it off the stack if we've got a stack slot.
7763 */
7764 uint8_t const idxStackSlot = pVar->idxStackSlot;
7765 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7766 {
7767 Assert(fInitialized);
7768 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7769 switch (pVar->cbVar)
7770 {
7771 case sizeof(RTUINT128U): *poff = iemNativeEmitLoadVecRegByBpU128(pReNative, *poff, idxReg, offDispBp); break;
7772 default: AssertFailed(); RT_FALL_THRU();
7773 case sizeof(RTUINT256U): *poff = iemNativeEmitLoadVecRegByBpU256(pReNative, *poff, idxReg, offDispBp); break;
7774 }
7775 }
7776 else
7777 {
7778 Assert(idxStackSlot == UINT8_MAX);
7779 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7780 }
7781 pVar->fRegAcquired = true;
7782 return idxReg;
7783}
7784
7785
7786/**
7787 * The value of variable @a idxVar will be written in full to the @a enmGstReg
7788 * guest register.
7789 *
7790 * This function makes sure there is a register for it and sets it to be the
7791 * current shadow copy of @a enmGstReg.
7792 *
7793 * @returns The host register number.
7794 * @param pReNative The recompiler state.
7795 * @param idxVar The variable.
7796 * @param enmGstReg The guest register this variable will be written to
7797 * after this call.
7798 * @param poff Pointer to the instruction buffer offset.
7799 * In case a register needs to be freed up or if the
7800 * variable content needs to be loaded off the stack.
7801 *
7802 * @note We DO NOT expect @a idxVar to be an argument variable,
7803 * because we can only in the commit stage of an instruction when this
7804 * function is used.
7805 */
7806DECL_HIDDEN_THROW(uint8_t)
7807iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
7808{
7809 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7810 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7811 Assert(!pVar->fRegAcquired);
7812 AssertMsgStmt( pVar->cbVar <= 8
7813 && ( pVar->enmKind == kIemNativeVarKind_Immediate
7814 || pVar->enmKind == kIemNativeVarKind_Stack),
7815 ("idxVar=%#x cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pVar->cbVar,
7816 pVar->enmKind, g_aGstShadowInfo[enmGstReg].pszName),
7817 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7818
7819 /*
7820 * This shouldn't ever be used for arguments, unless it's in a weird else
7821 * branch that doesn't do any calling and even then it's questionable.
7822 *
7823 * However, in case someone writes crazy wrong MC code and does register
7824 * updates before making calls, just use the regular register allocator to
7825 * ensure we get a register suitable for the intended argument number.
7826 */
7827 AssertStmt(pVar->uArgNo == UINT8_MAX, iemNativeVarRegisterAcquire(pReNative, idxVar, poff));
7828
7829 /*
7830 * If there is already a register for the variable, we transfer/set the
7831 * guest shadow copy assignment to it.
7832 */
7833 uint8_t idxReg = pVar->idxReg;
7834 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7835 {
7836#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
7837 AssertCompile(kIemNativeGstReg_GprFirst == 0);
7838 if (enmGstReg <= kIemNativeGstReg_GprLast)
7839 {
7840# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
7841 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
7842 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxReg);
7843# endif
7844 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
7845 }
7846#endif
7847
7848 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
7849 {
7850 uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
7851 iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
7852 Log12(("iemNativeVarRegisterAcquireForGuestReg: Moved %s for guest %s into %s for full write\n",
7853 g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
7854 }
7855 else
7856 {
7857 iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
7858 Log12(("iemNativeVarRegisterAcquireForGuestReg: Marking %s as copy of guest %s (full write)\n",
7859 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
7860 }
7861 pVar->fRegAcquired = true;
7862 return idxReg;
7863 }
7864 Assert(pVar->uArgNo == UINT8_MAX);
7865
7866 /*
7867 * Because this is supposed to be the commit stage, we're just tag along with the
7868 * temporary register allocator and upgrade it to a variable register.
7869 */
7870 idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
7871 Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
7872 Assert(pReNative->Core.aHstRegs[idxReg].idxVar == UINT8_MAX);
7873 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
7874 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
7875 pVar->idxReg = idxReg;
7876
7877 /*
7878 * Now we need to load the register value.
7879 */
7880 if (pVar->enmKind == kIemNativeVarKind_Immediate)
7881 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
7882 else
7883 {
7884 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7885 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7886 switch (pVar->cbVar)
7887 {
7888 case sizeof(uint64_t):
7889 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
7890 break;
7891 case sizeof(uint32_t):
7892 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
7893 break;
7894 case sizeof(uint16_t):
7895 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
7896 break;
7897 case sizeof(uint8_t):
7898 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
7899 break;
7900 default:
7901 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7902 }
7903 }
7904
7905 pVar->fRegAcquired = true;
7906 return idxReg;
7907}
7908
7909
7910/**
7911 * Emit code to save volatile registers prior to a call to a helper (TLB miss).
7912 *
7913 * This is used together with iemNativeVarRestoreVolatileRegsPostHlpCall() and
7914 * optionally iemNativeRegRestoreGuestShadowsInVolatileRegs() to bypass the
7915 * requirement of flushing anything in volatile host registers when making a
7916 * call.
7917 *
7918 * @returns New @a off value.
7919 * @param pReNative The recompiler state.
7920 * @param off The code buffer position.
7921 * @param fHstGprNotToSave Set of GPRs not to save & restore.
7922 */
7923DECL_HIDDEN_THROW(uint32_t)
7924iemNativeVarSaveVolatileRegsPreHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstGprNotToSave)
7925{
7926 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_NOTMP_GREG_MASK & ~fHstGprNotToSave;
7927 if (fHstRegs)
7928 {
7929 do
7930 {
7931 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7932 fHstRegs &= ~RT_BIT_32(idxHstReg);
7933
7934 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7935 {
7936 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7937 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7938 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7939 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7940 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
7941 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7942 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7943 {
7944 case kIemNativeVarKind_Stack:
7945 {
7946 /* Temporarily spill the variable register. */
7947 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7948 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7949 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7950 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7951 continue;
7952 }
7953
7954 case kIemNativeVarKind_Immediate:
7955 case kIemNativeVarKind_VarRef:
7956 case kIemNativeVarKind_GstRegRef:
7957 /* It is weird to have any of these loaded at this point. */
7958 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7959 continue;
7960
7961 case kIemNativeVarKind_End:
7962 case kIemNativeVarKind_Invalid:
7963 break;
7964 }
7965 AssertFailed();
7966 }
7967 else
7968 {
7969 /*
7970 * Allocate a temporary stack slot and spill the register to it.
7971 */
7972 unsigned const idxStackSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7973 AssertStmt(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS,
7974 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7975 pReNative->Core.bmStack |= RT_BIT_32(idxStackSlot);
7976 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = (uint8_t)idxStackSlot;
7977 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7978 idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7979 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7980 }
7981 } while (fHstRegs);
7982 }
7983
7984 /*
7985 * Guest register shadows are flushed to CPUMCTX at the moment and don't need allocating a stack slot
7986 * which would be more difficult due to spanning multiple stack slots and different sizes
7987 * (besides we only have a limited amount of slots at the moment).
7988 *
7989 * However the shadows need to be flushed out as the guest SIMD register might get corrupted by
7990 * the callee. This asserts that the registers were written back earlier and are not in the dirty state.
7991 */
7992 iemNativeSimdRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK);
7993
7994 fHstRegs = pReNative->Core.bmHstSimdRegs & (IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK);
7995 if (fHstRegs)
7996 {
7997 do
7998 {
7999 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
8000 fHstRegs &= ~RT_BIT_32(idxHstReg);
8001
8002 /* Fixed reserved and temporary registers don't need saving. */
8003 /*Assert( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat != kIemNativeWhat_FixedReserved
8004 && pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat != kIemNativeWhat_FixedTmp); included below */
8005 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
8006
8007 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
8008 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8009 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
8010 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
8011 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
8012 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
8013 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
8014 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
8015 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
8016 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
8017 {
8018 case kIemNativeVarKind_Stack:
8019 {
8020 /* Temporarily spill the variable register. */
8021 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
8022 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8023 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
8024 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8025 if (cbVar == sizeof(RTUINT128U))
8026 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
8027 else
8028 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
8029 continue;
8030 }
8031
8032 case kIemNativeVarKind_Immediate:
8033 case kIemNativeVarKind_VarRef:
8034 case kIemNativeVarKind_GstRegRef:
8035 /* It is weird to have any of these loaded at this point. */
8036 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8037 continue;
8038
8039 case kIemNativeVarKind_End:
8040 case kIemNativeVarKind_Invalid:
8041 break;
8042 }
8043 AssertFailed();
8044 } while (fHstRegs);
8045 }
8046 return off;
8047}
8048
8049
8050/**
8051 * Emit code to restore volatile registers after to a call to a helper.
8052 *
8053 * @returns New @a off value.
8054 * @param pReNative The recompiler state.
8055 * @param off The code buffer position.
8056 * @param fHstGprNotToSave Set of registers not to save & restore.
8057 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
8058 * iemNativeRegRestoreGuestShadowsInVolatileRegs()
8059 */
8060DECL_HIDDEN_THROW(uint32_t)
8061iemNativeVarRestoreVolatileRegsPostHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstGprNotToSave)
8062{
8063 /*
8064 * GPRs
8065 */
8066 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_NOTMP_GREG_MASK & ~fHstGprNotToSave;
8067 if (fHstRegs)
8068 {
8069 do
8070 {
8071 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
8072 fHstRegs &= ~RT_BIT_32(idxHstReg);
8073
8074 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
8075 {
8076 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
8077 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8078 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
8079 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
8080 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
8081 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
8082 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
8083 {
8084 case kIemNativeVarKind_Stack:
8085 {
8086 /* Unspill the variable register. */
8087 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8088 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
8089 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8090 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8091 continue;
8092 }
8093
8094 case kIemNativeVarKind_Immediate:
8095 case kIemNativeVarKind_VarRef:
8096 case kIemNativeVarKind_GstRegRef:
8097 /* It is weird to have any of these loaded at this point. */
8098 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8099 continue;
8100
8101 case kIemNativeVarKind_End:
8102 case kIemNativeVarKind_Invalid:
8103 break;
8104 }
8105 AssertFailed();
8106 }
8107 else
8108 {
8109 /*
8110 * Restore from temporary stack slot.
8111 */
8112 uint8_t const idxStackSlot = pReNative->Core.aHstRegs[idxHstReg].idxStackSlot;
8113 AssertContinue(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS && (pReNative->Core.bmStack & RT_BIT_32(idxStackSlot)));
8114 pReNative->Core.bmStack &= ~RT_BIT_32(idxStackSlot);
8115 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = UINT8_MAX;
8116
8117 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8118 }
8119 } while (fHstRegs);
8120 }
8121
8122 /*
8123 * SIMD registers.
8124 */
8125 fHstRegs = pReNative->Core.bmHstSimdRegs & (IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK);
8126 if (fHstRegs)
8127 {
8128 do
8129 {
8130 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
8131 fHstRegs &= ~RT_BIT_32(idxHstReg);
8132
8133 /*Assert( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat != kIemNativeWhat_FixedTmp
8134 && pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat != kIemNativeWhat_FixedReserved); - included below. */
8135 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
8136
8137 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
8138 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8139 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
8140 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
8141 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
8142 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
8143 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
8144 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
8145 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
8146 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
8147 {
8148 case kIemNativeVarKind_Stack:
8149 {
8150 /* Unspill the variable register. */
8151 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
8152 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8153 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
8154 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8155
8156 if (cbVar == sizeof(RTUINT128U))
8157 off = iemNativeEmitLoadVecRegByBpU128(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8158 else
8159 off = iemNativeEmitLoadVecRegByBpU256(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8160 continue;
8161 }
8162
8163 case kIemNativeVarKind_Immediate:
8164 case kIemNativeVarKind_VarRef:
8165 case kIemNativeVarKind_GstRegRef:
8166 /* It is weird to have any of these loaded at this point. */
8167 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8168 continue;
8169
8170 case kIemNativeVarKind_End:
8171 case kIemNativeVarKind_Invalid:
8172 break;
8173 }
8174 AssertFailed();
8175 } while (fHstRegs);
8176 }
8177 return off;
8178}
8179
8180
8181/**
8182 * Worker that frees the stack slots for variable @a idxVar if any allocated.
8183 *
8184 * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
8185 *
8186 * ASSUMES that @a idxVar is valid and unpacked.
8187 */
8188DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8189{
8190 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars)); /* unpacked! */
8191 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
8192 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
8193 {
8194 uint8_t const cbVar = pReNative->Core.aVars[idxVar].cbVar;
8195 uint8_t const cSlots = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
8196 uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
8197 Assert(cSlots > 0);
8198 Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
8199 Log11(("iemNativeVarFreeStackSlots: idxVar=%d/%#x iSlot=%#x/%#x (cbVar=%#x)\n",
8200 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxStackSlot, fAllocMask, cbVar));
8201 pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
8202 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
8203 }
8204 else
8205 Assert(idxStackSlot == UINT8_MAX);
8206}
8207
8208
8209/**
8210 * Worker that frees a single variable.
8211 *
8212 * ASSUMES that @a idxVar is valid and unpacked.
8213 */
8214DECLHIDDEN(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8215{
8216 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid /* Including invalid as we may have unused */
8217 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End); /* variables in conditional branches. */
8218 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
8219
8220 /* Free the host register first if any assigned. */
8221 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8222 if (idxHstReg != UINT8_MAX)
8223 {
8224 if (!pReNative->Core.aVars[idxVar].fSimdReg)
8225 {
8226 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
8227 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8228 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
8229 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8230 }
8231 else
8232 {
8233 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8234 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8235 pReNative->Core.aHstSimdRegs[idxHstReg].idxVar = UINT8_MAX;
8236 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
8237 }
8238 }
8239
8240 /* Free argument mapping. */
8241 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
8242 if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
8243 pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
8244
8245 /* Free the stack slots. */
8246 iemNativeVarFreeStackSlots(pReNative, idxVar);
8247
8248 /* Free the actual variable. */
8249 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
8250 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8251}
8252
8253
8254/**
8255 * Worker for iemNativeVarFreeAll that's called when there is anything to do.
8256 */
8257DECLHIDDEN(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
8258{
8259 while (bmVars != 0)
8260 {
8261 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
8262 bmVars &= ~RT_BIT_32(idxVar);
8263
8264#if 1 /** @todo optimize by simplifying this later... */
8265 iemNativeVarFreeOneWorker(pReNative, idxVar);
8266#else
8267 /* Only need to free the host register, the rest is done as bulk updates below. */
8268 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8269 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8270 {
8271 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8272 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
8273 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8274 }
8275#endif
8276 }
8277#if 0 /** @todo optimize by simplifying this later... */
8278 pReNative->Core.bmVars = 0;
8279 pReNative->Core.bmStack = 0;
8280 pReNative->Core.u64ArgVars = UINT64_MAX;
8281#endif
8282}
8283
8284
8285
8286/*********************************************************************************************************************************
8287* Emitters for IEM_MC_CALL_CIMPL_XXX *
8288*********************************************************************************************************************************/
8289
8290/**
8291 * Emits code to load a reference to the given guest register into @a idxGprDst.
8292 */
8293DECL_HIDDEN_THROW(uint32_t)
8294iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
8295 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
8296{
8297#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8298 /** @todo If we ever gonna allow referencing the RIP register we need to update guest value here. */
8299#endif
8300
8301 /*
8302 * Get the offset relative to the CPUMCTX structure.
8303 */
8304 uint32_t offCpumCtx;
8305 switch (enmClass)
8306 {
8307 case kIemNativeGstRegRef_Gpr:
8308 Assert(idxRegInClass < 16);
8309 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
8310 break;
8311
8312 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
8313 Assert(idxRegInClass < 4);
8314 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
8315 break;
8316
8317 case kIemNativeGstRegRef_EFlags:
8318 Assert(idxRegInClass == 0);
8319 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
8320 break;
8321
8322 case kIemNativeGstRegRef_MxCsr:
8323 Assert(idxRegInClass == 0);
8324 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
8325 break;
8326
8327 case kIemNativeGstRegRef_FpuReg:
8328 Assert(idxRegInClass < 8);
8329 AssertFailed(); /** @todo what kind of indexing? */
8330 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
8331 break;
8332
8333 case kIemNativeGstRegRef_MReg:
8334 Assert(idxRegInClass < 8);
8335 AssertFailed(); /** @todo what kind of indexing? */
8336 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
8337 break;
8338
8339 case kIemNativeGstRegRef_XReg:
8340 Assert(idxRegInClass < 16);
8341 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
8342 break;
8343
8344 case kIemNativeGstRegRef_X87: /* Not a register actually but we would just duplicate code otherwise. */
8345 Assert(idxRegInClass == 0);
8346 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87);
8347 break;
8348
8349 case kIemNativeGstRegRef_XState: /* Not a register actually but we would just duplicate code otherwise. */
8350 Assert(idxRegInClass == 0);
8351 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState);
8352 break;
8353
8354 default:
8355 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
8356 }
8357
8358 /*
8359 * Load the value into the destination register.
8360 */
8361#ifdef RT_ARCH_AMD64
8362 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
8363
8364#elif defined(RT_ARCH_ARM64)
8365 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
8366 Assert(offCpumCtx < 4096);
8367 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
8368
8369#else
8370# error "Port me!"
8371#endif
8372
8373 return off;
8374}
8375
8376
8377/**
8378 * Common code for CIMPL and AIMPL calls.
8379 *
8380 * These are calls that uses argument variables and such. They should not be
8381 * confused with internal calls required to implement an MC operation,
8382 * like a TLB load and similar.
8383 *
8384 * Upon return all that is left to do is to load any hidden arguments and
8385 * perform the call. All argument variables are freed.
8386 *
8387 * @returns New code buffer offset; throws VBox status code on error.
8388 * @param pReNative The native recompile state.
8389 * @param off The code buffer offset.
8390 * @param cArgs The total nubmer of arguments (includes hidden
8391 * count).
8392 * @param cHiddenArgs The number of hidden arguments. The hidden
8393 * arguments must not have any variable declared for
8394 * them, whereas all the regular arguments must
8395 * (tstIEMCheckMc ensures this).
8396 * @param fFlushPendingWrites Flag whether to flush pending writes (default true),
8397 * this will still flush pending writes in call volatile registers if false.
8398 */
8399DECL_HIDDEN_THROW(uint32_t)
8400iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs,
8401 bool fFlushPendingWrites /*= true*/)
8402{
8403#ifdef VBOX_STRICT
8404 /*
8405 * Assert sanity.
8406 */
8407 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
8408 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
8409 for (unsigned i = 0; i < cHiddenArgs; i++)
8410 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
8411 for (unsigned i = cHiddenArgs; i < cArgs; i++)
8412 {
8413 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
8414 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
8415 }
8416 iemNativeRegAssertSanity(pReNative);
8417#endif
8418
8419 /* We don't know what the called function makes use of, so flush any pending register writes. */
8420 RT_NOREF(fFlushPendingWrites);
8421#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
8422 if (fFlushPendingWrites)
8423#endif
8424 off = iemNativeRegFlushPendingWrites(pReNative, off);
8425
8426 /*
8427 * Before we do anything else, go over variables that are referenced and
8428 * make sure they are not in a register.
8429 */
8430 uint32_t bmVars = pReNative->Core.bmVars;
8431 if (bmVars)
8432 {
8433 do
8434 {
8435 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
8436 bmVars &= ~RT_BIT_32(idxVar);
8437
8438 if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
8439 {
8440 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
8441 if (idxRegOld != UINT8_MAX)
8442 {
8443 if (!pReNative->Core.aVars[idxVar].fSimdReg)
8444 {
8445 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
8446
8447 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
8448 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
8449 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
8450 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8451 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8452
8453 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8454 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
8455 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
8456 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
8457 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
8458 }
8459 else
8460 {
8461 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8462 Assert( pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U)
8463 || pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT256U));
8464
8465 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
8466 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
8467 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
8468 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8469 if (pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U))
8470 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off,
8471 iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8472 else
8473 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off,
8474 iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8475
8476 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
8477 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
8478
8479 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8480 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxRegOld);
8481 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
8482 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
8483 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
8484 }
8485 }
8486 }
8487 } while (bmVars != 0);
8488#if 0 //def VBOX_STRICT
8489 iemNativeRegAssertSanity(pReNative);
8490#endif
8491 }
8492
8493 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
8494
8495#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
8496 /*
8497 * At the very first step go over the host registers that will be used for arguments
8498 * don't shadow anything which needs writing back first.
8499 */
8500 for (uint32_t i = 0; i < cRegArgs; i++)
8501 {
8502 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8503
8504 /* Writeback any dirty guest shadows before using this register. */
8505 if (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxArgReg].fGstRegShadows)
8506 off = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, off, idxArgReg);
8507 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxArgReg].fGstRegShadows));
8508 }
8509#endif
8510
8511 /*
8512 * First, go over the host registers that will be used for arguments and make
8513 * sure they either hold the desired argument or are free.
8514 */
8515 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
8516 {
8517 for (uint32_t i = 0; i < cRegArgs; i++)
8518 {
8519 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8520 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8521 {
8522 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
8523 {
8524 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
8525 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8526 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8527 Assert(pVar->idxReg == idxArgReg);
8528 uint8_t const uArgNo = pVar->uArgNo;
8529 if (uArgNo == i)
8530 { /* prefect */ }
8531 /* The variable allocator logic should make sure this is impossible,
8532 except for when the return register is used as a parameter (ARM,
8533 but not x86). */
8534#if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK
8535 else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)
8536 {
8537# ifdef IEMNATIVE_FP_OFF_STACK_ARG0
8538# error "Implement this"
8539# endif
8540 Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);
8541 uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];
8542 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),
8543 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
8544 off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");
8545 }
8546#endif
8547 else
8548 {
8549 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
8550
8551 if (pVar->enmKind == kIemNativeVarKind_Stack)
8552 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
8553 else
8554 {
8555 /* just free it, can be reloaded if used again */
8556 pVar->idxReg = UINT8_MAX;
8557 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
8558 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
8559 }
8560 }
8561 }
8562 else
8563 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
8564 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
8565 }
8566 }
8567#if 0 //def VBOX_STRICT
8568 iemNativeRegAssertSanity(pReNative);
8569#endif
8570 }
8571
8572 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
8573
8574#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
8575 /*
8576 * If there are any stack arguments, make sure they are in their place as well.
8577 *
8578 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
8579 * the caller) be loading it later and it must be free (see first loop).
8580 */
8581 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
8582 {
8583 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
8584 {
8585 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
8586 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
8587 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8588 {
8589 Assert(pVar->enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
8590 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pVar->idxReg);
8591 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pVar->idxReg);
8592 pVar->idxReg = UINT8_MAX;
8593 }
8594 else
8595 {
8596 /* Use ARG0 as temp for stuff we need registers for. */
8597 switch (pVar->enmKind)
8598 {
8599 case kIemNativeVarKind_Stack:
8600 {
8601 uint8_t const idxStackSlot = pVar->idxStackSlot;
8602 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8603 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
8604 iemNativeStackCalcBpDisp(idxStackSlot));
8605 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8606 continue;
8607 }
8608
8609 case kIemNativeVarKind_Immediate:
8610 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pVar->u.uValue);
8611 continue;
8612
8613 case kIemNativeVarKind_VarRef:
8614 {
8615 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
8616 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8617 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8618 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8619 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8620 if (idxRegOther != UINT8_MAX)
8621 {
8622 if (!pReNative->Core.aVars[idxOtherVar].fSimdReg)
8623 {
8624 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs));
8625 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8626 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8627 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8628 }
8629 else
8630 {
8631 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8632 if (pReNative->Core.aVars[idxOtherVar].cbVar == sizeof(RTUINT128U))
8633 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
8634 else
8635 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
8636 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8637 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8638 }
8639 }
8640 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8641 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8642 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);
8643 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8644 continue;
8645 }
8646
8647 case kIemNativeVarKind_GstRegRef:
8648 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
8649 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
8650 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8651 continue;
8652
8653 case kIemNativeVarKind_Invalid:
8654 case kIemNativeVarKind_End:
8655 break;
8656 }
8657 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8658 }
8659 }
8660# if 0 //def VBOX_STRICT
8661 iemNativeRegAssertSanity(pReNative);
8662# endif
8663 }
8664#else
8665 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
8666#endif
8667
8668 /*
8669 * Make sure the argument variables are loaded into their respective registers.
8670 *
8671 * We can optimize this by ASSUMING that any register allocations are for
8672 * registeres that have already been loaded and are ready. The previous step
8673 * saw to that.
8674 */
8675 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
8676 {
8677 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8678 {
8679 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8680 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8681 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == IEMNATIVE_VAR_IDX_PACK(pReNative->Core.aidxArgVars[i])
8682 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
8683 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
8684 else
8685 {
8686 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
8687 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8688 {
8689 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
8690 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pVar->idxReg);
8691 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pVar->idxReg))
8692 | RT_BIT_32(idxArgReg);
8693 pVar->idxReg = idxArgReg;
8694 }
8695 else
8696 {
8697 /* Use ARG0 as temp for stuff we need registers for. */
8698 switch (pVar->enmKind)
8699 {
8700 case kIemNativeVarKind_Stack:
8701 {
8702 uint8_t const idxStackSlot = pVar->idxStackSlot;
8703 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8704 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));
8705 continue;
8706 }
8707
8708 case kIemNativeVarKind_Immediate:
8709 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pVar->u.uValue);
8710 continue;
8711
8712 case kIemNativeVarKind_VarRef:
8713 {
8714 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
8715 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8716 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative,
8717 IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8718 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8719 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8720 if (idxRegOther != UINT8_MAX)
8721 {
8722 if (!pReNative->Core.aVars[idxOtherVar].fSimdReg)
8723 {
8724 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs));
8725 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8726 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8727 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8728 }
8729 else
8730 {
8731 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8732 if (pReNative->Core.aVars[idxOtherVar].cbVar == sizeof(RTUINT128U))
8733 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
8734 else
8735 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
8736 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8737 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8738 }
8739 }
8740 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8741 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8742 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);
8743 continue;
8744 }
8745
8746 case kIemNativeVarKind_GstRegRef:
8747 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
8748 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
8749 continue;
8750
8751 case kIemNativeVarKind_Invalid:
8752 case kIemNativeVarKind_End:
8753 break;
8754 }
8755 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8756 }
8757 }
8758 }
8759#if 0 //def VBOX_STRICT
8760 iemNativeRegAssertSanity(pReNative);
8761#endif
8762 }
8763#ifdef VBOX_STRICT
8764 else
8765 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8766 {
8767 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
8768 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
8769 }
8770#endif
8771
8772 /*
8773 * Free all argument variables (simplified).
8774 * Their lifetime always expires with the call they are for.
8775 */
8776 /** @todo Make the python script check that arguments aren't used after
8777 * IEM_MC_CALL_XXXX. */
8778 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
8779 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
8780 * an argument value. There is also some FPU stuff. */
8781 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
8782 {
8783 uint8_t const idxVar = pReNative->Core.aidxArgVars[i]; /* unpacked */
8784 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
8785
8786 /* no need to free registers: */
8787 AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
8788 ? pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
8789 || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
8790 : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
8791 ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
8792 i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
8793
8794 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
8795 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8796 iemNativeVarFreeStackSlots(pReNative, idxVar);
8797 }
8798 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
8799
8800 /*
8801 * Flush volatile registers as we make the call.
8802 */
8803 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
8804
8805 return off;
8806}
8807
8808
8809
8810/*********************************************************************************************************************************
8811* TLB Lookup. *
8812*********************************************************************************************************************************/
8813
8814/**
8815 * This is called via iemNativeHlpAsmSafeWrapCheckTlbLookup.
8816 */
8817DECLASM(void) iemNativeHlpCheckTlbLookup(PVMCPU pVCpu, uintptr_t uResult, uint64_t GCPtr, uint64_t uSegAndSizeAndAccessAndDisp)
8818{
8819 uint8_t const iSegReg = RT_BYTE1(uSegAndSizeAndAccessAndDisp);
8820 uint8_t const cbMem = RT_BYTE2(uSegAndSizeAndAccessAndDisp);
8821 uint32_t const fAccess = (uint32_t)uSegAndSizeAndAccessAndDisp >> 16;
8822 uint8_t const offDisp = RT_BYTE5(uSegAndSizeAndAccessAndDisp);
8823 Log(("iemNativeHlpCheckTlbLookup: %x:%#RX64+%#x LB %#x fAccess=%#x -> %#RX64\n", iSegReg, GCPtr, offDisp, cbMem, fAccess, uResult));
8824
8825 /* Do the lookup manually. */
8826 RTGCPTR const GCPtrFlat = (iSegReg == UINT8_MAX ? GCPtr : GCPtr + pVCpu->cpum.GstCtx.aSRegs[iSegReg].u64Base) + offDisp;
8827 uint64_t const uTagNoRev = IEMTLB_CALC_TAG_NO_REV(GCPtrFlat);
8828 PCIEMTLBENTRY pTlbe = IEMTLB_TAG_TO_EVEN_ENTRY(&pVCpu->iem.s.DataTlb, uTagNoRev);
8829 if (RT_LIKELY( pTlbe->uTag == (uTagNoRev | pVCpu->iem.s.DataTlb.uTlbRevision)
8830 || (pTlbe = pTlbe + 1)->uTag == (uTagNoRev | pVCpu->iem.s.DataTlb.uTlbRevisionGlobal)))
8831 {
8832 /*
8833 * Check TLB page table level access flags.
8834 */
8835 AssertCompile(IEMTLBE_F_PT_NO_USER == 4);
8836 uint64_t const fNoUser = (IEM_GET_CPL(pVCpu) + 1) & IEMTLBE_F_PT_NO_USER;
8837 uint64_t const fNoWriteNoDirty = !(fAccess & IEM_ACCESS_TYPE_WRITE) ? 0
8838 : IEMTLBE_F_PT_NO_WRITE | IEMTLBE_F_PT_NO_DIRTY | IEMTLBE_F_PG_NO_WRITE;
8839 uint64_t const fFlagsAndPhysRev = pTlbe->fFlagsAndPhysRev & ( IEMTLBE_F_PHYS_REV | IEMTLBE_F_NO_MAPPINGR3
8840 | IEMTLBE_F_PG_UNASSIGNED
8841 | IEMTLBE_F_PT_NO_ACCESSED
8842 | fNoWriteNoDirty | fNoUser);
8843 uint64_t const uTlbPhysRev = pVCpu->iem.s.DataTlb.uTlbPhysRev;
8844 if (RT_LIKELY(fFlagsAndPhysRev == uTlbPhysRev))
8845 {
8846 /*
8847 * Return the address.
8848 */
8849 uint8_t const * const pbAddr = &pTlbe->pbMappingR3[GCPtrFlat & GUEST_PAGE_OFFSET_MASK];
8850 if ((uintptr_t)pbAddr == uResult)
8851 return;
8852 RT_NOREF(cbMem);
8853 AssertFailed();
8854 }
8855 else
8856 AssertMsgFailed(("fFlagsAndPhysRev=%#RX64 vs uTlbPhysRev=%#RX64: %#RX64\n",
8857 fFlagsAndPhysRev, uTlbPhysRev, fFlagsAndPhysRev ^ uTlbPhysRev));
8858 }
8859 else
8860 AssertFailed();
8861 RT_BREAKPOINT();
8862}
8863
8864/* The rest of the code is in IEMN8veRecompilerTlbLookup.h. */
8865
8866
8867
8868/*********************************************************************************************************************************
8869* Recompiler Core. *
8870*********************************************************************************************************************************/
8871
8872/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
8873static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
8874{
8875 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
8876 pDis->cbCachedInstr += cbMaxRead;
8877 RT_NOREF(cbMinRead);
8878 return VERR_NO_DATA;
8879}
8880
8881
8882DECLHIDDEN(const char *) iemNativeDbgVCpuOffsetToName(uint32_t off)
8883{
8884 static struct { uint32_t off; const char *pszName; } const s_aMembers[] =
8885 {
8886#define ENTRY(a_Member) { (uint32_t)RT_UOFFSETOF(VMCPUCC, a_Member), #a_Member } /* cast is for stupid MSC */
8887 ENTRY(fLocalForcedActions),
8888 ENTRY(iem.s.rcPassUp),
8889 ENTRY(iem.s.fExec),
8890 ENTRY(iem.s.pbInstrBuf),
8891 ENTRY(iem.s.uInstrBufPc),
8892 ENTRY(iem.s.GCPhysInstrBuf),
8893 ENTRY(iem.s.cbInstrBufTotal),
8894 ENTRY(iem.s.idxTbCurInstr),
8895 ENTRY(iem.s.fSkippingEFlags),
8896#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
8897 ENTRY(iem.s.uPcUpdatingDebug),
8898#endif
8899#ifdef VBOX_WITH_STATISTICS
8900 ENTRY(iem.s.StatNativeTlbHitsForFetch),
8901 ENTRY(iem.s.StatNativeTlbHitsForStore),
8902 ENTRY(iem.s.StatNativeTlbHitsForStack),
8903 ENTRY(iem.s.StatNativeTlbHitsForMapped),
8904 ENTRY(iem.s.StatNativeCodeTlbMissesNewPage),
8905 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPage),
8906 ENTRY(iem.s.StatNativeCodeTlbMissesNewPageWithOffset),
8907 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPageWithOffset),
8908#endif
8909 ENTRY(iem.s.DataTlb.uTlbRevision),
8910 ENTRY(iem.s.DataTlb.uTlbPhysRev),
8911 ENTRY(iem.s.DataTlb.cTlbCoreHits),
8912 ENTRY(iem.s.DataTlb.cTlbInlineCodeHits),
8913 ENTRY(iem.s.DataTlb.cTlbNativeMissTag),
8914 ENTRY(iem.s.DataTlb.cTlbNativeMissFlagsAndPhysRev),
8915 ENTRY(iem.s.DataTlb.cTlbNativeMissAlignment),
8916 ENTRY(iem.s.DataTlb.cTlbNativeMissCrossPage),
8917 ENTRY(iem.s.DataTlb.cTlbNativeMissNonCanonical),
8918 ENTRY(iem.s.DataTlb.aEntries),
8919 ENTRY(iem.s.CodeTlb.uTlbRevision),
8920 ENTRY(iem.s.CodeTlb.uTlbPhysRev),
8921 ENTRY(iem.s.CodeTlb.cTlbCoreHits),
8922 ENTRY(iem.s.CodeTlb.cTlbNativeMissTag),
8923 ENTRY(iem.s.CodeTlb.cTlbNativeMissFlagsAndPhysRev),
8924 ENTRY(iem.s.CodeTlb.cTlbNativeMissAlignment),
8925 ENTRY(iem.s.CodeTlb.cTlbNativeMissCrossPage),
8926 ENTRY(iem.s.CodeTlb.cTlbNativeMissNonCanonical),
8927 ENTRY(iem.s.CodeTlb.aEntries),
8928 ENTRY(pVMR3),
8929 ENTRY(cpum.GstCtx.rax),
8930 ENTRY(cpum.GstCtx.ah),
8931 ENTRY(cpum.GstCtx.rcx),
8932 ENTRY(cpum.GstCtx.ch),
8933 ENTRY(cpum.GstCtx.rdx),
8934 ENTRY(cpum.GstCtx.dh),
8935 ENTRY(cpum.GstCtx.rbx),
8936 ENTRY(cpum.GstCtx.bh),
8937 ENTRY(cpum.GstCtx.rsp),
8938 ENTRY(cpum.GstCtx.rbp),
8939 ENTRY(cpum.GstCtx.rsi),
8940 ENTRY(cpum.GstCtx.rdi),
8941 ENTRY(cpum.GstCtx.r8),
8942 ENTRY(cpum.GstCtx.r9),
8943 ENTRY(cpum.GstCtx.r10),
8944 ENTRY(cpum.GstCtx.r11),
8945 ENTRY(cpum.GstCtx.r12),
8946 ENTRY(cpum.GstCtx.r13),
8947 ENTRY(cpum.GstCtx.r14),
8948 ENTRY(cpum.GstCtx.r15),
8949 ENTRY(cpum.GstCtx.es.Sel),
8950 ENTRY(cpum.GstCtx.es.u64Base),
8951 ENTRY(cpum.GstCtx.es.u32Limit),
8952 ENTRY(cpum.GstCtx.es.Attr),
8953 ENTRY(cpum.GstCtx.cs.Sel),
8954 ENTRY(cpum.GstCtx.cs.u64Base),
8955 ENTRY(cpum.GstCtx.cs.u32Limit),
8956 ENTRY(cpum.GstCtx.cs.Attr),
8957 ENTRY(cpum.GstCtx.ss.Sel),
8958 ENTRY(cpum.GstCtx.ss.u64Base),
8959 ENTRY(cpum.GstCtx.ss.u32Limit),
8960 ENTRY(cpum.GstCtx.ss.Attr),
8961 ENTRY(cpum.GstCtx.ds.Sel),
8962 ENTRY(cpum.GstCtx.ds.u64Base),
8963 ENTRY(cpum.GstCtx.ds.u32Limit),
8964 ENTRY(cpum.GstCtx.ds.Attr),
8965 ENTRY(cpum.GstCtx.fs.Sel),
8966 ENTRY(cpum.GstCtx.fs.u64Base),
8967 ENTRY(cpum.GstCtx.fs.u32Limit),
8968 ENTRY(cpum.GstCtx.fs.Attr),
8969 ENTRY(cpum.GstCtx.gs.Sel),
8970 ENTRY(cpum.GstCtx.gs.u64Base),
8971 ENTRY(cpum.GstCtx.gs.u32Limit),
8972 ENTRY(cpum.GstCtx.gs.Attr),
8973 ENTRY(cpum.GstCtx.rip),
8974 ENTRY(cpum.GstCtx.eflags),
8975 ENTRY(cpum.GstCtx.uRipInhibitInt),
8976 ENTRY(cpum.GstCtx.cr0),
8977 ENTRY(cpum.GstCtx.cr4),
8978 ENTRY(cpum.GstCtx.aXcr[0]),
8979 ENTRY(cpum.GstCtx.aXcr[1]),
8980 ENTRY(cpum.GstCtx.XState.x87.MXCSR),
8981 ENTRY(cpum.GstCtx.XState.x87.aXMM[0]),
8982 ENTRY(cpum.GstCtx.XState.x87.aXMM[1]),
8983 ENTRY(cpum.GstCtx.XState.x87.aXMM[2]),
8984 ENTRY(cpum.GstCtx.XState.x87.aXMM[3]),
8985 ENTRY(cpum.GstCtx.XState.x87.aXMM[4]),
8986 ENTRY(cpum.GstCtx.XState.x87.aXMM[5]),
8987 ENTRY(cpum.GstCtx.XState.x87.aXMM[6]),
8988 ENTRY(cpum.GstCtx.XState.x87.aXMM[7]),
8989 ENTRY(cpum.GstCtx.XState.x87.aXMM[8]),
8990 ENTRY(cpum.GstCtx.XState.x87.aXMM[9]),
8991 ENTRY(cpum.GstCtx.XState.x87.aXMM[10]),
8992 ENTRY(cpum.GstCtx.XState.x87.aXMM[11]),
8993 ENTRY(cpum.GstCtx.XState.x87.aXMM[12]),
8994 ENTRY(cpum.GstCtx.XState.x87.aXMM[13]),
8995 ENTRY(cpum.GstCtx.XState.x87.aXMM[14]),
8996 ENTRY(cpum.GstCtx.XState.x87.aXMM[15]),
8997 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[0]),
8998 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[1]),
8999 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[2]),
9000 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[3]),
9001 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[4]),
9002 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[5]),
9003 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[6]),
9004 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[7]),
9005 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[8]),
9006 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[9]),
9007 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[10]),
9008 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[11]),
9009 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[12]),
9010 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[13]),
9011 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[14]),
9012 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[15])
9013#undef ENTRY
9014 };
9015#ifdef VBOX_STRICT
9016 static bool s_fOrderChecked = false;
9017 if (!s_fOrderChecked)
9018 {
9019 s_fOrderChecked = true;
9020 uint32_t offPrev = s_aMembers[0].off;
9021 for (unsigned i = 1; i < RT_ELEMENTS(s_aMembers); i++)
9022 {
9023 Assert(s_aMembers[i].off > offPrev);
9024 offPrev = s_aMembers[i].off;
9025 }
9026 }
9027#endif
9028
9029 /*
9030 * Binary lookup.
9031 */
9032 unsigned iStart = 0;
9033 unsigned iEnd = RT_ELEMENTS(s_aMembers);
9034 for (;;)
9035 {
9036 unsigned const iCur = iStart + (iEnd - iStart) / 2;
9037 uint32_t const offCur = s_aMembers[iCur].off;
9038 if (off < offCur)
9039 {
9040 if (iCur != iStart)
9041 iEnd = iCur;
9042 else
9043 break;
9044 }
9045 else if (off > offCur)
9046 {
9047 if (iCur + 1 < iEnd)
9048 iStart = iCur + 1;
9049 else
9050 break;
9051 }
9052 else
9053 return s_aMembers[iCur].pszName;
9054 }
9055#ifdef VBOX_WITH_STATISTICS
9056 if (off - RT_UOFFSETOF(VMCPUCC, iem.s.acThreadedFuncStats) < RT_SIZEOFMEMB(VMCPUCC, iem.s.acThreadedFuncStats))
9057 return "iem.s.acThreadedFuncStats[iFn]";
9058#endif
9059 return NULL;
9060}
9061
9062
9063/**
9064 * Translates a label to a name.
9065 */
9066static const char *iemNativeGetLabelName(IEMNATIVELABELTYPE enmLabel, bool fCommonCode /*= false*/)
9067{
9068 switch (enmLabel)
9069 {
9070#define STR_CASE_CMN(a_Label) case kIemNativeLabelType_ ## a_Label: return fCommonCode ? "Chunk_" #a_Label : #a_Label;
9071 STR_CASE_CMN(Invalid);
9072 STR_CASE_CMN(RaiseDe);
9073 STR_CASE_CMN(RaiseUd);
9074 STR_CASE_CMN(RaiseSseRelated);
9075 STR_CASE_CMN(RaiseAvxRelated);
9076 STR_CASE_CMN(RaiseSseAvxFpRelated);
9077 STR_CASE_CMN(RaiseNm);
9078 STR_CASE_CMN(RaiseGp0);
9079 STR_CASE_CMN(RaiseMf);
9080 STR_CASE_CMN(RaiseXf);
9081 STR_CASE_CMN(ObsoleteTb);
9082 STR_CASE_CMN(NeedCsLimChecking);
9083 STR_CASE_CMN(CheckBranchMiss);
9084 STR_CASE_CMN(ReturnSuccess);
9085 STR_CASE_CMN(ReturnBreak);
9086 STR_CASE_CMN(ReturnBreakFF);
9087 STR_CASE_CMN(ReturnWithFlags);
9088 STR_CASE_CMN(ReturnBreakViaLookup);
9089 STR_CASE_CMN(ReturnBreakViaLookupWithIrq);
9090 STR_CASE_CMN(ReturnBreakViaLookupWithTlb);
9091 STR_CASE_CMN(ReturnBreakViaLookupWithTlbAndIrq);
9092 STR_CASE_CMN(NonZeroRetOrPassUp);
9093#undef STR_CASE_CMN
9094#define STR_CASE_LBL(a_Label) case kIemNativeLabelType_ ## a_Label: return #a_Label;
9095 STR_CASE_LBL(LoopJumpTarget);
9096 STR_CASE_LBL(If);
9097 STR_CASE_LBL(Else);
9098 STR_CASE_LBL(Endif);
9099 STR_CASE_LBL(CheckIrq);
9100 STR_CASE_LBL(TlbLookup);
9101 STR_CASE_LBL(TlbMiss);
9102 STR_CASE_LBL(TlbDone);
9103 case kIemNativeLabelType_End: break;
9104 }
9105 return NULL;
9106}
9107
9108
9109/** Info for the symbols resolver used when disassembling. */
9110typedef struct IEMNATIVDISASMSYMCTX
9111{
9112 PVMCPU pVCpu;
9113 PCIEMTB pTb;
9114 PCIEMNATIVEPERCHUNKCTX pCtx;
9115#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9116 PCIEMTBDBG pDbgInfo;
9117#endif
9118} IEMNATIVDISASMSYMCTX;
9119typedef IEMNATIVDISASMSYMCTX *PIEMNATIVDISASMSYMCTX;
9120
9121
9122/**
9123 * Resolve address to symbol, if we can.
9124 */
9125static const char *iemNativeDisasmGetSymbol(PIEMNATIVDISASMSYMCTX pSymCtx, uintptr_t uAddress, char *pszBuf, size_t cbBuf)
9126{
9127 PCIEMTB const pTb = pSymCtx->pTb;
9128 uintptr_t const offNative = (uAddress - (uintptr_t)pTb->Native.paInstructions) / sizeof(IEMNATIVEINSTR);
9129 if (offNative <= pTb->Native.cInstructions)
9130 {
9131#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9132 /*
9133 * Scan debug info for a matching label.
9134 * Since the debug info should be 100% linear, we can do a binary search here.
9135 */
9136 PCIEMTBDBG const pDbgInfo = pSymCtx->pDbgInfo;
9137 if (pDbgInfo)
9138 {
9139 uint32_t const cEntries = pDbgInfo->cEntries;
9140 uint32_t idxEnd = cEntries;
9141 uint32_t idxStart = 0;
9142 for (;;)
9143 {
9144 /* Find a NativeOffset record close to the midpoint. */
9145 uint32_t idx = idxStart + (idxEnd - idxStart) / 2;
9146 while (idx > idxStart && pDbgInfo->aEntries[idx].Gen.uType != kIemTbDbgEntryType_NativeOffset)
9147 idx--;
9148 if (pDbgInfo->aEntries[idx].Gen.uType != kIemTbDbgEntryType_NativeOffset)
9149 {
9150 idx = idxStart + (idxEnd - idxStart) / 2 + 1;
9151 while (idx < idxEnd && pDbgInfo->aEntries[idx].Gen.uType != kIemTbDbgEntryType_NativeOffset)
9152 idx++;
9153 if (idx >= idxEnd)
9154 break;
9155 }
9156
9157 /* Do the binary searching thing. */
9158 if (offNative < pDbgInfo->aEntries[idx].NativeOffset.offNative)
9159 {
9160 if (idx > idxStart)
9161 idxEnd = idx;
9162 else
9163 break;
9164 }
9165 else if (offNative > pDbgInfo->aEntries[idx].NativeOffset.offNative)
9166 {
9167 idx += 1;
9168 if (idx < idxEnd)
9169 idxStart = idx;
9170 else
9171 break;
9172 }
9173 else
9174 {
9175 /* Got a matching offset, scan forward till we hit a label, but
9176 stop when the native offset changes. */
9177 while (++idx < cEntries)
9178 switch (pDbgInfo->aEntries[idx].Gen.uType)
9179 {
9180 case kIemTbDbgEntryType_Label:
9181 {
9182 IEMNATIVELABELTYPE const enmLabel = (IEMNATIVELABELTYPE)pDbgInfo->aEntries[idx].Label.enmLabel;
9183 const char * const pszName = iemNativeGetLabelName(enmLabel);
9184 if (enmLabel < kIemNativeLabelType_FirstWithMultipleInstances)
9185 return pszName;
9186 RTStrPrintf(pszBuf, cbBuf, "%s_%u", pszName, pDbgInfo->aEntries[idx].Label.uData);
9187 return pszBuf;
9188 }
9189
9190 case kIemTbDbgEntryType_NativeOffset:
9191 if (pDbgInfo->aEntries[idx].NativeOffset.offNative != offNative)
9192 return NULL;
9193 break;
9194 }
9195 break;
9196 }
9197 }
9198 }
9199#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
9200 }
9201 else
9202 {
9203 PCIEMNATIVEPERCHUNKCTX const pChunkCtx = pSymCtx->pCtx;
9204 if (pChunkCtx)
9205 for (uint32_t i = 1; i < RT_ELEMENTS(pChunkCtx->apExitLabels); i++)
9206 if ((PIEMNATIVEINSTR)uAddress == pChunkCtx->apExitLabels[i])
9207 return iemNativeGetLabelName((IEMNATIVELABELTYPE)i, true /*fCommonCode*/);
9208 }
9209 RT_NOREF(pszBuf, cbBuf);
9210 return NULL;
9211}
9212
9213#ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9214
9215/**
9216 * @callback_method_impl{FNDISGETSYMBOL}
9217 */
9218static DECLCALLBACK(int) iemNativeDisasmGetSymbolCb(PCDISSTATE pDis, uint32_t u32Sel, RTUINTPTR uAddress,
9219 char *pszBuf, size_t cchBuf, RTINTPTR *poff, void *pvUser)
9220{
9221 const char * const pszSym = iemNativeDisasmGetSymbol((PIEMNATIVDISASMSYMCTX)pvUser, uAddress, pszBuf, cchBuf);
9222 if (pszSym)
9223 {
9224 *poff = 0;
9225 if (pszSym != pszBuf)
9226 return RTStrCopy(pszBuf, cchBuf, pszSym);
9227 return VINF_SUCCESS;
9228 }
9229 RT_NOREF(pDis, u32Sel);
9230 return VERR_SYMBOL_NOT_FOUND;
9231}
9232
9233#else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9234
9235/**
9236 * Annotates an instruction decoded by the capstone disassembler.
9237 */
9238static const char *
9239iemNativeDisasmAnnotateCapstone(PIEMNATIVDISASMSYMCTX pSymCtx, cs_insn const *pInstr, char *pszBuf, size_t cchBuf)
9240{
9241# if defined(RT_ARCH_ARM64)
9242 if ( (pInstr->id >= ARM64_INS_LD1 && pInstr->id < ARM64_INS_LSL)
9243 || (pInstr->id >= ARM64_INS_ST1 && pInstr->id < ARM64_INS_SUB))
9244 {
9245 /* This is bit crappy, but the disassembler provides incomplete addressing details. */
9246 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == 28 && IEMNATIVE_REG_FIXED_PCPUMCTX == 27);
9247 char const *psz = strchr(pInstr->op_str, '[');
9248 if (psz && psz[1] == 'x' && psz[2] == '2' && (psz[3] == '7' || psz[3] == '8'))
9249 {
9250 uint32_t const offVCpu = psz[3] == '8'? 0 : RT_UOFFSETOF(VMCPU, cpum.GstCtx);
9251 int32_t off = -1;
9252 psz += 4;
9253 if (*psz == ']')
9254 off = 0;
9255 else if (*psz == ',')
9256 {
9257 psz = RTStrStripL(psz + 1);
9258 if (*psz == '#')
9259 off = RTStrToInt32(&psz[1]);
9260 /** @todo deal with index registers and LSL as well... */
9261 }
9262 if (off >= 0)
9263 return iemNativeDbgVCpuOffsetToName(offVCpu + (uint32_t)off);
9264 }
9265 }
9266 else if (pInstr->id == ARM64_INS_B || pInstr->id == ARM64_INS_BL)
9267 {
9268 const char *pszAddr = strchr(pInstr->op_str, '#');
9269 if (pszAddr)
9270 {
9271 uint64_t uAddr = RTStrToUInt64(pszAddr + 1);
9272 if (uAddr != 0)
9273 return iemNativeDisasmGetSymbol(pSymCtx, uAddr, pszBuf, cchBuf);
9274 }
9275 }
9276# endif
9277 RT_NOREF(pSymCtx, pInstr, pszBuf, cchBuf);
9278 return NULL;
9279}
9280#endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9281
9282
9283DECLHIDDEN(void) iemNativeDisassembleTb(PVMCPU pVCpu, PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
9284{
9285 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
9286#if defined(RT_ARCH_AMD64)
9287 static const char * const a_apszMarkers[] =
9288 {
9289 /*[0]=*/ "unknown0", "CheckCsLim", "ConsiderLimChecking", "CheckOpcodes",
9290 /*[4]=*/ "PcAfterBranch", "LoadTlbForNewPage", "LoadTlbAfterBranch"
9291 };
9292#endif
9293
9294 char szDisBuf[512];
9295 DISSTATE Dis;
9296 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
9297 uint32_t const cNative = pTb->Native.cInstructions;
9298 uint32_t offNative = 0;
9299#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9300 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
9301#endif
9302 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
9303 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
9304 : DISCPUMODE_64BIT;
9305#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9306 IEMNATIVDISASMSYMCTX SymCtx = { pVCpu, pTb, iemExecMemGetTbChunkCtx(pVCpu, pTb), pDbgInfo };
9307#else
9308 IEMNATIVDISASMSYMCTX SymCtx = { pVCpu, pTb, iemExecMemGetTbChunkCtx(pVCpu, pTb) };
9309#endif
9310#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
9311 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
9312#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
9313 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
9314#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
9315# error "Port me"
9316#else
9317 csh hDisasm = ~(size_t)0;
9318# if defined(RT_ARCH_AMD64)
9319 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
9320# elif defined(RT_ARCH_ARM64)
9321 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
9322# else
9323# error "Port me"
9324# endif
9325 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
9326
9327 //rcCs = cs_option(hDisasm, CS_OPT_DETAIL, CS_OPT_ON); - not needed as pInstr->detail doesn't provide full memory detail.
9328 //Assert(rcCs == CS_ERR_OK);
9329#endif
9330
9331 /*
9332 * Print TB info.
9333 */
9334 pHlp->pfnPrintf(pHlp,
9335 "pTb=%p: GCPhysPc=%RGp (%%%RGv) cInstructions=%u LB %#x cRanges=%u\n"
9336 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
9337 pTb, pTb->GCPhysPc,
9338#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9339 pTb->pDbgInfo ? pTb->pDbgInfo->FlatPc : RTGCPTR_MAX,
9340#else
9341 pTb->FlatPc,
9342#endif
9343 pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
9344 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
9345#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9346 if (pDbgInfo && pDbgInfo->cEntries > 1)
9347 {
9348 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
9349
9350 /*
9351 * This disassembly is driven by the debug info which follows the native
9352 * code and indicates when it starts with the next guest instructions,
9353 * where labels are and such things.
9354 */
9355 uint32_t idxThreadedCall = 0;
9356 uint32_t idxGuestInstr = 0;
9357 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
9358 uint8_t idxRange = UINT8_MAX;
9359 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
9360 uint32_t offRange = 0;
9361 uint32_t offOpcodes = 0;
9362 uint32_t const cbOpcodes = pTb->cbOpcodes;
9363 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
9364 uint32_t const cDbgEntries = pDbgInfo->cEntries;
9365 uint32_t iDbgEntry = 1;
9366 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
9367
9368 while (offNative < cNative)
9369 {
9370 /* If we're at or have passed the point where the next chunk of debug
9371 info starts, process it. */
9372 if (offDbgNativeNext <= offNative)
9373 {
9374 offDbgNativeNext = UINT32_MAX;
9375 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
9376 {
9377 switch ((IEMTBDBGENTRYTYPE)pDbgInfo->aEntries[iDbgEntry].Gen.uType)
9378 {
9379 case kIemTbDbgEntryType_GuestInstruction:
9380 {
9381 /* Did the exec flag change? */
9382 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
9383 {
9384 pHlp->pfnPrintf(pHlp,
9385 " fExec change %#08x -> %#08x %s\n",
9386 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
9387 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
9388 szDisBuf, sizeof(szDisBuf)));
9389 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
9390 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
9391 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
9392 : DISCPUMODE_64BIT;
9393 }
9394
9395 /* New opcode range? We need to fend up a spurious debug info entry here for cases
9396 where the compilation was aborted before the opcode was recorded and the actual
9397 instruction was translated to a threaded call. This may happen when we run out
9398 of ranges, or when some complicated interrupts/FFs are found to be pending or
9399 similar. So, we just deal with it here rather than in the compiler code as it
9400 is a lot simpler to do here. */
9401 if ( idxRange == UINT8_MAX
9402 || idxRange >= cRanges
9403 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
9404 {
9405 idxRange += 1;
9406 if (idxRange < cRanges)
9407 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
9408 else
9409 continue;
9410 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes + offRange);
9411 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
9412 + (pTb->aRanges[idxRange].idxPhysPage == 0
9413 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
9414 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
9415 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
9416 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
9417 pTb->aRanges[idxRange].idxPhysPage);
9418 GCPhysPc += offRange;
9419 }
9420
9421 /* Disassemble the instruction. */
9422 //uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
9423 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
9424 uint32_t cbInstr = 1;
9425 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
9426 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
9427 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
9428 if (RT_SUCCESS(rc))
9429 {
9430 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9431 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9432 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9433 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9434
9435 static unsigned const s_offMarker = 55;
9436 static char const s_szMarker[] = " ; <--- guest";
9437 if (cch < s_offMarker)
9438 {
9439 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
9440 cch = s_offMarker;
9441 }
9442 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
9443 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
9444
9445 pHlp->pfnPrintf(pHlp, "\n %%%%%RGp: %s #%u\n", GCPhysPc, szDisBuf, idxGuestInstr);
9446 }
9447 else
9448 {
9449 pHlp->pfnPrintf(pHlp, "\n %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
9450 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
9451 cbInstr = 1;
9452 }
9453 idxGuestInstr++;
9454 GCPhysPc += cbInstr;
9455 offOpcodes += cbInstr;
9456 offRange += cbInstr;
9457 continue;
9458 }
9459
9460 case kIemTbDbgEntryType_ThreadedCall:
9461 pHlp->pfnPrintf(pHlp,
9462 " Call #%u to %s (%u args) - %s\n",
9463 idxThreadedCall,
9464 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
9465 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
9466 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? "recompiled" : "todo");
9467 idxThreadedCall++;
9468 continue;
9469
9470 case kIemTbDbgEntryType_GuestRegShadowing:
9471 {
9472 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
9473 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
9474 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
9475 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
9476 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
9477 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
9478 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s \n", pszGstReg,
9479 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
9480 else
9481 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
9482 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
9483 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
9484 continue;
9485 }
9486
9487 case kIemTbDbgEntryType_GuestSimdRegShadowing:
9488 {
9489 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
9490 const char * const pszGstReg = g_aGstSimdShadowInfo[pEntry->GuestSimdRegShadowing.idxGstSimdReg].pszName;
9491 if (pEntry->GuestSimdRegShadowing.idxHstSimdReg == UINT8_MAX)
9492 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s != host SIMD register %s\n", pszGstReg,
9493 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
9494 else if (pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev == UINT8_MAX)
9495 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s\n", pszGstReg,
9496 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg]);
9497 else
9498 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s (previously in %s)\n", pszGstReg,
9499 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg],
9500 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
9501 continue;
9502 }
9503
9504 case kIemTbDbgEntryType_Label:
9505 {
9506 const char *pszName = iemNativeGetLabelName((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel);
9507 if (pDbgInfo->aEntries[iDbgEntry].Label.enmLabel >= kIemNativeLabelType_FirstWithMultipleInstances)
9508 {
9509 const char *pszComment = pDbgInfo->aEntries[iDbgEntry].Label.enmLabel == kIemNativeLabelType_Else
9510 ? " ; regs state restored pre-if-block" : "";
9511 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
9512 }
9513 else
9514 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
9515 continue;
9516 }
9517
9518 case kIemTbDbgEntryType_NativeOffset:
9519 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
9520 Assert(offDbgNativeNext >= offNative);
9521 break;
9522
9523# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
9524 case kIemTbDbgEntryType_DelayedPcUpdate:
9525 pHlp->pfnPrintf(pHlp, " Updating guest PC value by %u (cInstrSkipped=%u)\n",
9526 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.offPc,
9527 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.cInstrSkipped);
9528 continue;
9529# endif
9530
9531# ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
9532 case kIemTbDbgEntryType_GuestRegDirty:
9533 {
9534 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
9535 const char * const pszGstReg = pEntry->GuestRegDirty.fSimdReg
9536 ? g_aGstSimdShadowInfo[pEntry->GuestRegDirty.idxGstReg].pszName
9537 : g_aGstShadowInfo[pEntry->GuestRegDirty.idxGstReg].pszName;
9538 const char * const pszHstReg = pEntry->GuestRegDirty.fSimdReg
9539 ? g_apszIemNativeHstSimdRegNames[pEntry->GuestRegDirty.idxHstReg]
9540 : g_apszIemNativeHstRegNames[pEntry->GuestRegDirty.idxHstReg];
9541 pHlp->pfnPrintf(pHlp, " Guest register %s (shadowed by %s) is now marked dirty (intent)\n",
9542 pszGstReg, pszHstReg);
9543 continue;
9544 }
9545
9546 case kIemTbDbgEntryType_GuestRegWriteback:
9547 pHlp->pfnPrintf(pHlp, " Writing dirty %s registers (gst %#RX32)\n",
9548 pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.fSimdReg ? "SIMD" : "general",
9549 (uint64_t)pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.fGstReg
9550 << (pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.cShift * 25));
9551 continue;
9552# endif
9553
9554# ifdef IEMNATIVE_WITH_EFLAGS_POSTPONING
9555 case kIemTbDbgEntryType_PostponedEFlagsCalc:
9556 {
9557 const char *pszOp = "!unknown!";
9558 switch ((IEMNATIVE_POSTPONED_EFL_OP_T)pDbgInfo->aEntries[iDbgEntry].PostponedEflCalc.enmOp)
9559 {
9560 case kIemNativePostponedEflOp_Logical: pszOp = "logical"; break;
9561 case kIemNativePostponedEflOp_Invalid: break;
9562 case kIemNativePostponedEflOp_End: break;
9563 }
9564 pHlp->pfnPrintf(pHlp, " Postponed EFLAGS calc #%u: %s %u bits\n",
9565 pDbgInfo->aEntries[iDbgEntry].PostponedEflCalc.idxEmit, pszOp,
9566 pDbgInfo->aEntries[iDbgEntry].PostponedEflCalc.cOpBits);
9567 continue;
9568 }
9569# endif
9570 default:
9571 AssertFailed();
9572 continue;
9573 }
9574 /* Break out of the loop at kIemTbDbgEntryType_NativeOffset. */
9575 iDbgEntry++;
9576 break;
9577 }
9578 }
9579
9580 /*
9581 * Disassemble the next native instruction.
9582 */
9583 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
9584# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9585 uint32_t cbInstr = sizeof(paNative[0]);
9586 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
9587 if (RT_SUCCESS(rc))
9588 {
9589# if defined(RT_ARCH_AMD64)
9590 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
9591 {
9592 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
9593 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
9594 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args) - %s\n",
9595 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
9596 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
9597 uInfo & 0x8000 ? "recompiled" : "todo");
9598 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
9599 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
9600 else
9601 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
9602 }
9603 else
9604# endif
9605 {
9606 const char *pszAnnotation = NULL;
9607# ifdef RT_ARCH_AMD64
9608 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9609 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9610 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9611 iemNativeDisasmGetSymbolCb, &SymCtx);
9612 PCDISOPPARAM pMemOp;
9613 if (DISUSE_IS_EFFECTIVE_ADDR(Dis.aParams[0].fUse))
9614 pMemOp = &Dis.aParams[0];
9615 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.aParams[1].fUse))
9616 pMemOp = &Dis.aParams[1];
9617 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.aParams[2].fUse))
9618 pMemOp = &Dis.aParams[2];
9619 else
9620 pMemOp = NULL;
9621 if ( pMemOp
9622 && pMemOp->x86.Base.idxGenReg == IEMNATIVE_REG_FIXED_PVMCPU
9623 && (pMemOp->fUse & (DISUSE_BASE | DISUSE_REG_GEN64)) == (DISUSE_BASE | DISUSE_REG_GEN64))
9624 pszAnnotation = iemNativeDbgVCpuOffsetToName(pMemOp->fUse & DISUSE_DISPLACEMENT32
9625 ? pMemOp->x86.uDisp.u32 : pMemOp->x86.uDisp.u8);
9626
9627# elif defined(RT_ARCH_ARM64)
9628 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
9629 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9630 iemNativeDisasmGetSymbolCb, &SymCtx);
9631# else
9632# error "Port me"
9633# endif
9634 if (pszAnnotation)
9635 {
9636 static unsigned const s_offAnnotation = 55;
9637 size_t const cchAnnotation = strlen(pszAnnotation);
9638 size_t cchDis = strlen(szDisBuf);
9639 if (RT_MAX(cchDis, s_offAnnotation) + sizeof(" ; ") + cchAnnotation <= sizeof(szDisBuf))
9640 {
9641 if (cchDis < s_offAnnotation)
9642 {
9643 memset(&szDisBuf[cchDis], ' ', s_offAnnotation - cchDis);
9644 cchDis = s_offAnnotation;
9645 }
9646 szDisBuf[cchDis++] = ' ';
9647 szDisBuf[cchDis++] = ';';
9648 szDisBuf[cchDis++] = ' ';
9649 memcpy(&szDisBuf[cchDis], pszAnnotation, cchAnnotation + 1);
9650 }
9651 }
9652 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
9653 }
9654 }
9655 else
9656 {
9657# if defined(RT_ARCH_AMD64)
9658 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
9659 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
9660# elif defined(RT_ARCH_ARM64)
9661 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
9662# else
9663# error "Port me"
9664# endif
9665 cbInstr = sizeof(paNative[0]);
9666 }
9667 offNative += cbInstr / sizeof(paNative[0]);
9668
9669# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9670 cs_insn *pInstr;
9671 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
9672 (uintptr_t)pNativeCur, 1, &pInstr);
9673 if (cInstrs > 0)
9674 {
9675 Assert(cInstrs == 1);
9676 const char * const pszAnnotation = iemNativeDisasmAnnotateCapstone(&SymCtx, pInstr, szDisBuf, sizeof(szDisBuf));
9677 size_t const cchOp = strlen(pInstr->op_str);
9678# if defined(RT_ARCH_AMD64)
9679 if (pszAnnotation)
9680 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s%*s ; %s\n",
9681 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str,
9682 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9683 else
9684 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
9685 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
9686
9687# else
9688 if (pszAnnotation)
9689 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s%*s ; %s\n",
9690 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str,
9691 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9692 else
9693 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
9694 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
9695# endif
9696 offNative += pInstr->size / sizeof(*pNativeCur);
9697 cs_free(pInstr, cInstrs);
9698 }
9699 else
9700 {
9701# if defined(RT_ARCH_AMD64)
9702 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
9703 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
9704# else
9705 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
9706# endif
9707 offNative++;
9708 }
9709# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9710 }
9711 }
9712 else
9713#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
9714 {
9715 /*
9716 * No debug info, just disassemble the x86 code and then the native code.
9717 *
9718 * First the guest code:
9719 */
9720 for (unsigned i = 0; i < pTb->cRanges; i++)
9721 {
9722 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
9723 + (pTb->aRanges[i].idxPhysPage == 0
9724 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
9725 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
9726 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
9727 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
9728 unsigned off = pTb->aRanges[i].offOpcodes;
9729 /** @todo this ain't working when crossing pages! */
9730 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
9731 while (off < cbOpcodes)
9732 {
9733 uint32_t cbInstr = 1;
9734 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
9735 &pTb->pabOpcodes[off], cbOpcodes - off,
9736 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
9737 if (RT_SUCCESS(rc))
9738 {
9739 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9740 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9741 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9742 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9743 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
9744 GCPhysPc += cbInstr;
9745 off += cbInstr;
9746 }
9747 else
9748 {
9749 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - disassembly failure %Rrc\n",
9750 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
9751 break;
9752 }
9753 }
9754 }
9755
9756 /*
9757 * Then the native code:
9758 */
9759 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
9760 while (offNative < cNative)
9761 {
9762 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
9763#ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9764 uint32_t cbInstr = sizeof(paNative[0]);
9765 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
9766 if (RT_SUCCESS(rc))
9767 {
9768# if defined(RT_ARCH_AMD64)
9769 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
9770 {
9771 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
9772 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
9773 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args) - %s\n",
9774 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
9775 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
9776 uInfo & 0x8000 ? "recompiled" : "todo");
9777 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
9778 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
9779 else
9780 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
9781 }
9782 else
9783# endif
9784 {
9785# ifdef RT_ARCH_AMD64
9786 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9787 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9788 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9789 iemNativeDisasmGetSymbolCb, &SymCtx);
9790# elif defined(RT_ARCH_ARM64)
9791 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
9792 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9793 iemNativeDisasmGetSymbolCb, &SymCtx);
9794# else
9795# error "Port me"
9796# endif
9797 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
9798 }
9799 }
9800 else
9801 {
9802# if defined(RT_ARCH_AMD64)
9803 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
9804 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
9805# else
9806 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
9807# endif
9808 cbInstr = sizeof(paNative[0]);
9809 }
9810 offNative += cbInstr / sizeof(paNative[0]);
9811
9812#else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9813 cs_insn *pInstr;
9814 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
9815 (uintptr_t)pNativeCur, 1, &pInstr);
9816 if (cInstrs > 0)
9817 {
9818 Assert(cInstrs == 1);
9819 const char * const pszAnnotation = iemNativeDisasmAnnotateCapstone(&SymCtx, pInstr, szDisBuf, sizeof(szDisBuf));
9820 size_t const cchOp = strlen(pInstr->op_str);
9821# if defined(RT_ARCH_AMD64)
9822 if (pszAnnotation)
9823 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s%*s ; %s\n",
9824 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str,
9825 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9826 else
9827 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
9828 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
9829
9830# else
9831 if (pszAnnotation)
9832 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s%*s ; %s\n",
9833 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str,
9834 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9835 else
9836 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
9837 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
9838# endif
9839 offNative += pInstr->size / sizeof(*pNativeCur);
9840 cs_free(pInstr, cInstrs);
9841 }
9842 else
9843 {
9844# if defined(RT_ARCH_AMD64)
9845 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
9846 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
9847# else
9848 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
9849# endif
9850 offNative++;
9851 }
9852#endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9853 }
9854 }
9855
9856#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9857 /* Cleanup. */
9858 cs_close(&hDisasm);
9859#endif
9860}
9861
9862
9863/** Emit alignment padding between labels / functions. */
9864DECL_INLINE_THROW(uint32_t)
9865iemNativeRecompileEmitAlignmentPadding(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fAlignMask)
9866{
9867 if (off & fAlignMask)
9868 {
9869 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, fAlignMask + 1);
9870 while (off & fAlignMask)
9871#if defined(RT_ARCH_AMD64)
9872 pCodeBuf[off++] = 0xcc;
9873#elif defined(RT_ARCH_ARM64)
9874 pCodeBuf[off++] = Armv8A64MkInstrBrk(0xcccc);
9875#else
9876# error "port me"
9877#endif
9878 }
9879 return off;
9880}
9881
9882
9883/**
9884 * Called when a new chunk is allocate to emit common per-chunk code.
9885 *
9886 * Allocates a per-chunk context directly from the chunk itself and place the
9887 * common code there.
9888 *
9889 * @returns VBox status code.
9890 * @param pVCpu The cross context virtual CPU structure of the calling
9891 * thread.
9892 * @param idxChunk The index of the chunk being added and requiring a
9893 * common code context.
9894 * @param ppCtx Where to return the pointer to the chunk context start.
9895 */
9896DECLHIDDEN(int) iemNativeRecompileAttachExecMemChunkCtx(PVMCPU pVCpu, uint32_t idxChunk, PCIEMNATIVEPERCHUNKCTX *ppCtx)
9897{
9898 *ppCtx = NULL;
9899
9900 /*
9901 * Allocate a new recompiler state (since we're likely to be called while
9902 * the default one is fully loaded already with a recompiled TB).
9903 *
9904 * This is a bit of overkill, but this isn't a frequently used code path.
9905 */
9906 PIEMRECOMPILERSTATE pReNative = iemNativeInit(pVCpu, NULL);
9907 AssertReturn(pReNative, VERR_NO_MEMORY);
9908
9909#if defined(RT_ARCH_AMD64)
9910 uint32_t const fAlignMask = 15;
9911#elif defined(RT_ARCH_ARM64)
9912 uint32_t const fAlignMask = 31 / 4;
9913#else
9914# error "port me"
9915#endif
9916 uint32_t aoffLabels[kIemNativeLabelType_LastTbExit + 1] = {0};
9917 int rc = VINF_SUCCESS;
9918 uint32_t off = 0;
9919
9920 IEMNATIVE_TRY_SETJMP(pReNative, rc)
9921 {
9922 /*
9923 * Emit the epilog code.
9924 */
9925 aoffLabels[kIemNativeLabelType_ReturnSuccess] = off;
9926 off = iemNativeEmitGprZero(pReNative, off, IEMNATIVE_CALL_RET_GREG);
9927 uint32_t const offReturnWithStatus = off;
9928 off = iemNativeEmitCoreEpilog(pReNative, off);
9929
9930 /*
9931 * Generate special jump labels. All of these gets a copy of the epilog code.
9932 */
9933 static struct
9934 {
9935 IEMNATIVELABELTYPE enmExitReason;
9936 uint32_t (*pfnEmitCore)(PIEMRECOMPILERSTATE pReNative, uint32_t off);
9937 } const s_aSpecialWithEpilogs[] =
9938 {
9939 { kIemNativeLabelType_NonZeroRetOrPassUp, iemNativeEmitCoreRcFiddling },
9940 { kIemNativeLabelType_ReturnBreak, iemNativeEmitCoreReturnBreak },
9941 { kIemNativeLabelType_ReturnBreakFF, iemNativeEmitCoreReturnBreakFF },
9942 { kIemNativeLabelType_ReturnWithFlags, iemNativeEmitCoreReturnWithFlags },
9943 };
9944 for (uint32_t i = 0; i < RT_ELEMENTS(s_aSpecialWithEpilogs); i++)
9945 {
9946 off = iemNativeRecompileEmitAlignmentPadding(pReNative, off, fAlignMask);
9947 Assert(aoffLabels[s_aSpecialWithEpilogs[i].enmExitReason] == 0);
9948 aoffLabels[s_aSpecialWithEpilogs[i].enmExitReason] = off;
9949 off = s_aSpecialWithEpilogs[i].pfnEmitCore(pReNative, off);
9950 off = iemNativeEmitCoreEpilog(pReNative, off);
9951 }
9952
9953 /*
9954 * Do what iemNativeEmitReturnBreakViaLookup does.
9955 */
9956 static struct
9957 {
9958 IEMNATIVELABELTYPE enmExitReason;
9959 uintptr_t pfnHelper;
9960 } const s_aViaLookup[] =
9961 {
9962 { kIemNativeLabelType_ReturnBreakViaLookup,
9963 (uintptr_t)iemNativeHlpReturnBreakViaLookup<false /*a_fWithIrqCheck*/> },
9964 { kIemNativeLabelType_ReturnBreakViaLookupWithIrq,
9965 (uintptr_t)iemNativeHlpReturnBreakViaLookup<true /*a_fWithIrqCheck*/> },
9966 { kIemNativeLabelType_ReturnBreakViaLookupWithTlb,
9967 (uintptr_t)iemNativeHlpReturnBreakViaLookupWithTlb<false /*a_fWithIrqCheck*/> },
9968 { kIemNativeLabelType_ReturnBreakViaLookupWithTlbAndIrq,
9969 (uintptr_t)iemNativeHlpReturnBreakViaLookupWithTlb<true /*a_fWithIrqCheck*/> },
9970 };
9971 uint32_t const offReturnBreak = aoffLabels[kIemNativeLabelType_ReturnBreak]; Assert(offReturnBreak != 0);
9972 for (uint32_t i = 0; i < RT_ELEMENTS(s_aViaLookup); i++)
9973 {
9974 off = iemNativeRecompileEmitAlignmentPadding(pReNative, off, fAlignMask);
9975 Assert(aoffLabels[s_aViaLookup[i].enmExitReason] == 0);
9976 aoffLabels[s_aViaLookup[i].enmExitReason] = off;
9977 off = iemNativeEmitCoreViaLookupDoOne(pReNative, off, offReturnBreak, s_aViaLookup[i].pfnHelper);
9978 }
9979
9980 /*
9981 * Generate simple TB tail labels that just calls a help with a pVCpu
9982 * arg and either return or longjmps/throws a non-zero status.
9983 */
9984 typedef IEM_DECL_NATIVE_HLP_PTR(int, PFNIEMNATIVESIMPLETAILLABELCALL,(PVMCPUCC pVCpu));
9985 static struct
9986 {
9987 IEMNATIVELABELTYPE enmExitReason;
9988 bool fWithEpilog;
9989 PFNIEMNATIVESIMPLETAILLABELCALL pfnCallback;
9990 } const s_aSimpleTailLabels[] =
9991 {
9992 { kIemNativeLabelType_RaiseDe, false, iemNativeHlpExecRaiseDe },
9993 { kIemNativeLabelType_RaiseUd, false, iemNativeHlpExecRaiseUd },
9994 { kIemNativeLabelType_RaiseSseRelated, false, iemNativeHlpExecRaiseSseRelated },
9995 { kIemNativeLabelType_RaiseAvxRelated, false, iemNativeHlpExecRaiseAvxRelated },
9996 { kIemNativeLabelType_RaiseSseAvxFpRelated, false, iemNativeHlpExecRaiseSseAvxFpRelated },
9997 { kIemNativeLabelType_RaiseNm, false, iemNativeHlpExecRaiseNm },
9998 { kIemNativeLabelType_RaiseGp0, false, iemNativeHlpExecRaiseGp0 },
9999 { kIemNativeLabelType_RaiseMf, false, iemNativeHlpExecRaiseMf },
10000 { kIemNativeLabelType_RaiseXf, false, iemNativeHlpExecRaiseXf },
10001 { kIemNativeLabelType_ObsoleteTb, true, iemNativeHlpObsoleteTb },
10002 { kIemNativeLabelType_NeedCsLimChecking, true, iemNativeHlpNeedCsLimChecking },
10003 { kIemNativeLabelType_CheckBranchMiss, true, iemNativeHlpCheckBranchMiss },
10004 };
10005 for (uint32_t i = 0; i < RT_ELEMENTS(s_aSimpleTailLabels); i++)
10006 {
10007 off = iemNativeRecompileEmitAlignmentPadding(pReNative, off, fAlignMask);
10008 Assert(!aoffLabels[s_aSimpleTailLabels[i].enmExitReason]);
10009 aoffLabels[s_aSimpleTailLabels[i].enmExitReason] = off;
10010
10011 /* int pfnCallback(PVMCPUCC pVCpu) */
10012 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
10013 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)s_aSimpleTailLabels[i].pfnCallback);
10014
10015 /* If the callback is supposed to return with a status code we inline the epilog
10016 sequence for better speed. Otherwise, if the callback shouldn't return because
10017 it throws/longjmps, we just jump to the return sequence to be on the safe side. */
10018 if (s_aSimpleTailLabels[i].fWithEpilog)
10019 off = iemNativeEmitCoreEpilog(pReNative, off);
10020 else
10021 {
10022#ifdef VBOX_STRICT
10023 off = iemNativeEmitBrk(pReNative, off, 0x2201);
10024#endif
10025 off = iemNativeEmitJmpToFixed(pReNative, off, offReturnWithStatus);
10026 }
10027 }
10028
10029
10030#ifdef VBOX_STRICT
10031 /* Make sure we've generate code for all labels. */
10032 for (uint32_t i = kIemNativeLabelType_Invalid + 1; i < RT_ELEMENTS(aoffLabels); i++)
10033 Assert(aoffLabels[i] != 0 || i == kIemNativeLabelType_ReturnSuccess);
10034#endif
10035 }
10036 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
10037 {
10038 Log(("iemNativeRecompileAttachExecMemChunkCtx: Caught %Rrc while recompiling!\n", rc));
10039 iemNativeTerm(pReNative);
10040 return rc;
10041 }
10042 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
10043
10044 /*
10045 * Allocate memory for the context (first) and the common code (last).
10046 */
10047 PIEMNATIVEPERCHUNKCTX pCtx;
10048 uint32_t const cbCtx = RT_ALIGN_32(sizeof(*pCtx), 64);
10049 uint32_t const cbCode = off * sizeof(IEMNATIVEINSTR);
10050 PIEMNATIVEINSTR paFinalCommonCodeRx = NULL;
10051 pCtx = (PIEMNATIVEPERCHUNKCTX)iemExecMemAllocatorAllocFromChunk(pVCpu, idxChunk, cbCtx + cbCode, &paFinalCommonCodeRx);
10052 AssertLogRelMsgReturnStmt(pCtx, ("cbCtx=%#x cbCode=%#x idxChunk=%#x\n", cbCtx, cbCode, idxChunk),
10053 iemNativeTerm(pReNative), VERR_OUT_OF_RESOURCES);
10054
10055 /*
10056 * Copy over the generated code.
10057 * There should be no fixups or labels defined here.
10058 */
10059 paFinalCommonCodeRx = (PIEMNATIVEINSTR)((uintptr_t)paFinalCommonCodeRx + cbCtx);
10060 memcpy((PIEMNATIVEINSTR)((uintptr_t)pCtx + cbCtx), pReNative->pInstrBuf, cbCode);
10061
10062 Assert(pReNative->cFixups == 0);
10063 Assert(pReNative->cLabels == 0);
10064
10065 /*
10066 * Initialize the context.
10067 */
10068 AssertCompile(kIemNativeLabelType_Invalid == 0);
10069 AssertCompile(RT_ELEMENTS(pCtx->apExitLabels) == RT_ELEMENTS(aoffLabels));
10070 pCtx->apExitLabels[kIemNativeLabelType_Invalid] = 0;
10071 for (uint32_t i = kIemNativeLabelType_Invalid + 1; i < RT_ELEMENTS(pCtx->apExitLabels); i++)
10072 {
10073 Assert(aoffLabels[i] != 0 || i == kIemNativeLabelType_ReturnSuccess);
10074 pCtx->apExitLabels[i] = &paFinalCommonCodeRx[aoffLabels[i]];
10075 Log10((" apExitLabels[%u]=%p %s\n", i, pCtx->apExitLabels[i], iemNativeGetLabelName((IEMNATIVELABELTYPE)i, true)));
10076 }
10077
10078 iemExecMemAllocatorReadyForUse(pVCpu, pCtx, cbCtx + cbCode);
10079
10080 iemNativeTerm(pReNative);
10081 *ppCtx = pCtx;
10082 return VINF_SUCCESS;
10083}
10084
10085
10086/**
10087 * Recompiles the given threaded TB into a native one.
10088 *
10089 * In case of failure the translation block will be returned as-is.
10090 *
10091 * @returns pTb.
10092 * @param pVCpu The cross context virtual CPU structure of the calling
10093 * thread.
10094 * @param pTb The threaded translation to recompile to native.
10095 */
10096IEM_DECL_MSC_GUARD_IGNORE DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
10097{
10098#if 0 /* For profiling the native recompiler code. */
10099l_profile_again:
10100#endif
10101 STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
10102
10103 /*
10104 * The first time thru, we allocate the recompiler state and save it,
10105 * all the other times we'll just reuse the saved one after a quick reset.
10106 */
10107 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
10108 if (RT_LIKELY(pReNative))
10109 iemNativeReInit(pReNative, pTb);
10110 else
10111 {
10112 pReNative = iemNativeInit(pVCpu, pTb);
10113 AssertReturn(pReNative, pTb);
10114 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative; /* save it */
10115 }
10116
10117#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
10118 /*
10119 * First do liveness analysis. This is done backwards.
10120 */
10121 {
10122 uint32_t idxCall = pTb->Thrd.cCalls;
10123 if (idxCall <= pReNative->cLivenessEntriesAlloc)
10124 { /* likely */ }
10125 else
10126 {
10127 uint32_t cAlloc = RT_MAX(pReNative->cLivenessEntriesAlloc, _4K);
10128 while (idxCall > cAlloc)
10129 cAlloc *= 2;
10130 void *pvNew = RTMemRealloc(pReNative->paLivenessEntries, sizeof(pReNative->paLivenessEntries[0]) * cAlloc);
10131 AssertReturn(pvNew, pTb);
10132 pReNative->paLivenessEntries = (PIEMLIVENESSENTRY)pvNew;
10133 pReNative->cLivenessEntriesAlloc = cAlloc;
10134 }
10135 AssertReturn(idxCall > 0, pTb);
10136 PIEMLIVENESSENTRY const paLivenessEntries = pReNative->paLivenessEntries;
10137
10138 /* The initial (final) entry. */
10139 idxCall--;
10140 IEM_LIVENESS_RAW_INIT_AS_UNUSED(&paLivenessEntries[idxCall]);
10141
10142 /* Loop backwards thru the calls and fill in the other entries. */
10143 PCIEMTHRDEDCALLENTRY pCallEntry = &pTb->Thrd.paCalls[idxCall];
10144 while (idxCall > 0)
10145 {
10146 PFNIEMNATIVELIVENESSFUNC const pfnLiveness = g_apfnIemNativeLivenessFunctions[pCallEntry->enmFunction];
10147 Assert(pfnLiveness);
10148 pfnLiveness(pCallEntry, &paLivenessEntries[idxCall], &paLivenessEntries[idxCall - 1]);
10149 pCallEntry--;
10150 idxCall--;
10151 }
10152 }
10153#endif
10154
10155 /*
10156 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
10157 * for aborting if an error happens.
10158 */
10159 uint32_t cCallsLeft = pTb->Thrd.cCalls;
10160#ifdef LOG_ENABLED
10161 uint32_t const cCallsOrg = cCallsLeft;
10162#endif
10163 uint32_t off = 0;
10164 int rc = VINF_SUCCESS;
10165 IEMNATIVE_TRY_SETJMP(pReNative, rc)
10166 {
10167 /*
10168 * Convert the calls to native code.
10169 */
10170#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
10171 int32_t iGstInstr = -1;
10172#endif
10173#ifndef VBOX_WITHOUT_RELEASE_STATISTICS
10174 uint32_t cThreadedCalls = 0;
10175 uint32_t cRecompiledCalls = 0;
10176#endif
10177#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(IEM_WITH_INTRA_TB_JUMPS) || defined(VBOX_STRICT) || defined(LOG_ENABLED) || defined(VBOX_WITH_STATISTICS) || defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING)
10178 uint32_t idxCurCall = 0;
10179#endif
10180 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
10181 pReNative->fExec = pTb->fFlags & IEMTB_F_IEM_F_MASK;
10182 while (cCallsLeft-- > 0)
10183 {
10184 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
10185#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_WITH_STATISTICS) || defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING)
10186 pReNative->idxCurCall = idxCurCall;
10187#endif
10188
10189#ifdef IEM_WITH_INTRA_TB_JUMPS
10190 /*
10191 * Define label for jump targets (currently only the first entry).
10192 */
10193 if (!(pCallEntry->fFlags & IEMTHREADEDCALLENTRY_F_JUMP_TARGET))
10194 { /* likely */ }
10195 else
10196 {
10197 iemNativeLabelCreate(pReNative, kIemNativeLabelType_LoopJumpTarget, off);
10198 Assert(idxCurCall == 0); /** @todo when jumping elsewhere, we have to save the register state. */
10199 }
10200#endif
10201
10202 /*
10203 * Debug info, assembly markup and statistics.
10204 */
10205#if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || !defined(IEMNATIVE_WITH_BLTIN_CHECKMODE)
10206 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
10207 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
10208#endif
10209#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
10210 iemNativeDbgInfoAddNativeOffset(pReNative, off);
10211 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
10212 {
10213 if (iGstInstr < (int32_t)pTb->cInstructions)
10214 iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
10215 else
10216 Assert(iGstInstr == pTb->cInstructions);
10217 iGstInstr = pCallEntry->idxInstr;
10218 }
10219 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
10220#endif
10221#if defined(VBOX_STRICT)
10222 off = iemNativeEmitMarker(pReNative, off,
10223 RT_MAKE_U32(idxCurCall | (pfnRecom ? 0x8000 : 0), pCallEntry->enmFunction));
10224#endif
10225#if defined(VBOX_STRICT)
10226 iemNativeRegAssertSanity(pReNative);
10227#endif
10228#ifdef VBOX_WITH_STATISTICS
10229 off = iemNativeEmitThreadCallStats(pReNative, off, pCallEntry);
10230#endif
10231
10232#if 0
10233 if ( pTb->GCPhysPc == 0x00000000000c1240
10234 && idxCurCall == 67)
10235 off = iemNativeEmitBrk(pReNative, off, 0xf000);
10236#endif
10237
10238 /*
10239 * Actual work.
10240 */
10241 Log2(("%u[%u]: %s%s (off=%#x)\n", idxCurCall, pCallEntry->idxInstr,
10242 g_apszIemThreadedFunctions[pCallEntry->enmFunction], pfnRecom ? "(recompiled)" : "(todo)", off));
10243 if (pfnRecom) /** @todo stats on this. */
10244 {
10245 off = pfnRecom(pReNative, off, pCallEntry);
10246 STAM_REL_STATS({cRecompiledCalls++;});
10247 }
10248 else
10249 {
10250 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
10251 STAM_REL_STATS({cThreadedCalls++;});
10252 }
10253 Assert(off <= pReNative->cInstrBufAlloc);
10254 Assert(pReNative->cCondDepth == 0);
10255
10256#if defined(LOG_ENABLED) && defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
10257 if (LogIs2Enabled())
10258 {
10259 PCIEMLIVENESSENTRY pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall];
10260# ifndef IEMLIVENESS_EXTENDED_LAYOUT
10261 static const char s_achState[] = "CUXI";
10262# else
10263 /* 0123 4567 89ab cdef */
10264 /* CCCC CCCC */
10265 /* WWWW WWWW */
10266 /* RR RR RR RR */
10267 /* P P P P P P P P */
10268 static const char s_achState[] = "UxRr" "WwMm" "CcQq" "KkNn";
10269# endif
10270
10271 char szGpr[17];
10272 for (unsigned i = 0; i < 16; i++)
10273 szGpr[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_GprFirst)];
10274 szGpr[16] = '\0';
10275
10276 char szSegBase[X86_SREG_COUNT + 1];
10277 char szSegLimit[X86_SREG_COUNT + 1];
10278 char szSegAttrib[X86_SREG_COUNT + 1];
10279 char szSegSel[X86_SREG_COUNT + 1];
10280 for (unsigned i = 0; i < X86_SREG_COUNT; i++)
10281 {
10282 szSegBase[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegBaseFirst)];
10283 szSegAttrib[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegAttribFirst)];
10284 szSegLimit[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegLimitFirst)];
10285 szSegSel[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegSelFirst)];
10286 }
10287 szSegBase[X86_SREG_COUNT] = szSegAttrib[X86_SREG_COUNT] = szSegLimit[X86_SREG_COUNT]
10288 = szSegSel[X86_SREG_COUNT] = '\0';
10289
10290 char szEFlags[IEMLIVENESSBIT_IDX_EFL_COUNT + 1];
10291 for (unsigned i = 0; i < IEMLIVENESSBIT_IDX_EFL_COUNT; i++)
10292 szEFlags[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_EFlags)];
10293 szEFlags[7] = '\0';
10294
10295 Log2(("liveness: gpr=%s segbase=%s segattr=%s seglim=%s segsel=%s efl=%s\n",
10296 szGpr, szSegBase, szSegAttrib, szSegLimit, szSegSel, szEFlags));
10297 }
10298#endif
10299
10300 /*
10301 * Advance.
10302 */
10303 pCallEntry++;
10304#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(IEM_WITH_INTRA_TB_JUMPS) || defined(VBOX_STRICT) || defined(LOG_ENABLED) || defined(VBOX_WITH_STATISTICS) || defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING)
10305 idxCurCall++;
10306#endif
10307 }
10308
10309 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
10310 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded, cThreadedCalls);
10311 if (!cThreadedCalls)
10312 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
10313
10314 IEMNATIVE_ASSERT_EFLAGS_SKIPPING_AND_POSTPONING(pReNative, UINT32_MAX);
10315
10316#ifdef VBOX_WITH_STATISTICS
10317 off = iemNativeEmitNativeTbExitStats(pReNative, off, RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTbFinished));
10318#endif
10319
10320 /* Flush any pending writes before returning from the last instruction (RIP updates, etc.). */
10321 off = iemNativeRegFlushPendingWrites(pReNative, off);
10322
10323 /*
10324 * Jump to the common per-chunk epilog code.
10325 */
10326 //off = iemNativeEmitBrk(pReNative, off, 0x1227);
10327 off = iemNativeEmitTbExit<kIemNativeLabelType_ReturnSuccess, true, false>(pReNative, off);
10328
10329 /*
10330 * Generate tail labels with jumps to the common per-chunk code on non-x86 hosts.
10331 */
10332#ifndef RT_ARCH_AMD64
10333 Assert(!(pReNative->bmLabelTypes & ( RT_BIT_64(kIemNativeLabelType_ReturnSuccess)
10334 | RT_BIT_64(kIemNativeLabelType_Invalid) )));
10335 AssertCompile(kIemNativeLabelType_Invalid == 0);
10336 uint64_t fTailLabels = pReNative->bmLabelTypes & (RT_BIT_64(kIemNativeLabelType_LastTbExit + 1U) - 2U);
10337 if (fTailLabels)
10338 {
10339 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, kIemNativeLabelType_LastTbExit + 1);
10340 do
10341 {
10342 IEMNATIVELABELTYPE const enmLabel = (IEMNATIVELABELTYPE)(ASMBitFirstSetU64(fTailLabels) - 1U);
10343 fTailLabels &= ~RT_BIT_64(enmLabel);
10344
10345 uint32_t const idxLabel = iemNativeLabelFind(pReNative, enmLabel);
10346 AssertContinue(idxLabel != UINT32_MAX);
10347 iemNativeLabelDefine(pReNative, idxLabel, off);
10348
10349 iemNativeAddTbExitFixup(pReNative, off, enmLabel);
10350# ifdef RT_ARCH_ARM64
10351 pCodeBuf[off++] = Armv8A64MkInstrB(-1);
10352# else
10353# error "port me"
10354# endif
10355 } while (fTailLabels);
10356 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
10357 }
10358#else
10359 Assert(!(pReNative->bmLabelTypes & (RT_BIT_64(kIemNativeLabelType_LastTbExit + 1) - 1U))); /* Should not be used! */
10360#endif
10361 }
10362 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
10363 {
10364 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
10365 return pTb;
10366 }
10367 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
10368 Assert(off <= pReNative->cInstrBufAlloc);
10369
10370 /*
10371 * Make sure all labels has been defined.
10372 */
10373 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
10374#ifdef VBOX_STRICT
10375 uint32_t const cLabels = pReNative->cLabels;
10376 for (uint32_t i = 0; i < cLabels; i++)
10377 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
10378#endif
10379
10380#if 0 /* For profiling the native recompiler code. */
10381 if (pTb->Thrd.cCalls >= 136)
10382 {
10383 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
10384 goto l_profile_again;
10385 }
10386#endif
10387
10388 /*
10389 * Allocate executable memory, copy over the code we've generated.
10390 */
10391 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
10392 if (pTbAllocator->pDelayedFreeHead)
10393 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
10394
10395 PIEMNATIVEINSTR paFinalInstrBufRx = NULL;
10396 PCIEMNATIVEPERCHUNKCTX pCtx = NULL;
10397 PIEMNATIVEINSTR const paFinalInstrBuf = iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR), pTb,
10398 &paFinalInstrBufRx, &pCtx);
10399
10400 AssertReturn(paFinalInstrBuf, pTb);
10401 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
10402
10403 /*
10404 * Apply fixups.
10405 */
10406 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
10407 uint32_t const cFixups = pReNative->cFixups;
10408 for (uint32_t i = 0; i < cFixups; i++)
10409 {
10410 Assert(paFixups[i].off < off);
10411 Assert(paFixups[i].idxLabel < cLabels);
10412 AssertMsg(paLabels[paFixups[i].idxLabel].off < off,
10413 ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,
10414 paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));
10415 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
10416 switch (paFixups[i].enmType)
10417 {
10418#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
10419 case kIemNativeFixupType_Rel32:
10420 Assert(paFixups[i].off + 4 <= off);
10421 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10422 continue;
10423
10424#elif defined(RT_ARCH_ARM64)
10425 case kIemNativeFixupType_RelImm26At0:
10426 {
10427 Assert(paFixups[i].off < off);
10428 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10429 Assert(offDisp >= -33554432 && offDisp < 33554432);
10430 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
10431 continue;
10432 }
10433
10434 case kIemNativeFixupType_RelImm19At5:
10435 {
10436 Assert(paFixups[i].off < off);
10437 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10438 Assert(offDisp >= -262144 && offDisp < 262144);
10439 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
10440 continue;
10441 }
10442
10443 case kIemNativeFixupType_RelImm14At5:
10444 {
10445 Assert(paFixups[i].off < off);
10446 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10447 Assert(offDisp >= -8192 && offDisp < 8192);
10448 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
10449 continue;
10450 }
10451
10452#endif
10453 case kIemNativeFixupType_Invalid:
10454 case kIemNativeFixupType_End:
10455 break;
10456 }
10457 AssertFailed();
10458 }
10459
10460 /*
10461 * Apply TB exit fixups.
10462 */
10463 PIEMNATIVEEXITFIXUP const paTbExitFixups = pReNative->paTbExitFixups;
10464 uint32_t const cTbExitFixups = pReNative->cTbExitFixups;
10465 for (uint32_t i = 0; i < cTbExitFixups; i++)
10466 {
10467 Assert(paTbExitFixups[i].off < off);
10468 Assert(IEMNATIVELABELTYPE_IS_EXIT_REASON(paTbExitFixups[i].enmExitReason));
10469 RTPTRUNION const Ptr = { &paFinalInstrBuf[paTbExitFixups[i].off] };
10470
10471#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
10472 Assert(paTbExitFixups[i].off + 4 <= off);
10473 intptr_t const offDisp = pCtx->apExitLabels[paTbExitFixups[i].enmExitReason] - &paFinalInstrBufRx[paTbExitFixups[i].off + 4];
10474 Assert(offDisp >= INT32_MIN && offDisp <= INT32_MAX);
10475 *Ptr.pi32 = (int32_t)offDisp;
10476
10477#elif defined(RT_ARCH_ARM64)
10478 intptr_t const offDisp = pCtx->apExitLabels[paTbExitFixups[i].enmExitReason] - &paFinalInstrBufRx[paTbExitFixups[i].off];
10479 Assert(offDisp >= -33554432 && offDisp < 33554432);
10480 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
10481
10482#else
10483# error "Port me!"
10484#endif
10485 }
10486
10487 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBufRx, off * sizeof(IEMNATIVEINSTR));
10488 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
10489
10490 /*
10491 * Convert the translation block.
10492 */
10493 RTMemFree(pTb->Thrd.paCalls);
10494 pTb->Native.paInstructions = paFinalInstrBufRx;
10495 pTb->Native.cInstructions = off;
10496 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
10497#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
10498 pReNative->pDbgInfo->FlatPc = pTb->FlatPc;
10499 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
10500 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
10501#endif
10502
10503 Assert(pTbAllocator->cThreadedTbs > 0);
10504 pTbAllocator->cThreadedTbs -= 1;
10505 pTbAllocator->cNativeTbs += 1;
10506 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
10507
10508#ifdef LOG_ENABLED
10509 /*
10510 * Disassemble to the log if enabled.
10511 */
10512 if (LogIs3Enabled())
10513 {
10514 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
10515 iemNativeDisassembleTb(pVCpu, pTb, DBGFR3InfoLogHlp());
10516# if defined(DEBUG_bird) || defined(DEBUG_aeichner)
10517 RTLogFlush(NULL);
10518# endif
10519 }
10520#endif
10521 /*iemNativeDisassembleTb(pTb, DBGFR3InfoLogRelHlp());*/
10522
10523 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
10524 return pTb;
10525}
10526
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette