VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 106061

Last change on this file since 106061 was 106061, checked in by vboxsync, 5 months ago

Copyright year updates by scm.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 456.0 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 106061 2024-09-16 14:03:52Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : Details calls as they're recompiled.
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : Delayed PC updating.
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023-2024 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include <VBox/vmm/tm.h>
55#include "IEMInternal.h"
56#include <VBox/vmm/vmcc.h>
57#include <VBox/log.h>
58#include <VBox/err.h>
59#include <VBox/dis.h>
60#include <VBox/param.h>
61#include <iprt/assert.h>
62#include <iprt/mem.h>
63#include <iprt/string.h>
64#if defined(RT_ARCH_AMD64)
65# include <iprt/x86.h>
66#elif defined(RT_ARCH_ARM64)
67# include <iprt/armv8.h>
68#endif
69
70#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
71# include "/opt/local/include/capstone/capstone.h"
72#endif
73
74#include "IEMInline.h"
75#include "IEMThreadedFunctions.h"
76#include "IEMN8veRecompiler.h"
77#include "IEMN8veRecompilerEmit.h"
78#include "IEMN8veRecompilerTlbLookup.h"
79#include "IEMNativeFunctions.h"
80
81
82/*
83 * Narrow down configs here to avoid wasting time on unused configs here.
84 * Note! Same checks in IEMAllThrdRecompiler.cpp.
85 */
86
87#ifndef IEM_WITH_CODE_TLB
88# error The code TLB must be enabled for the recompiler.
89#endif
90
91#ifndef IEM_WITH_DATA_TLB
92# error The data TLB must be enabled for the recompiler.
93#endif
94
95#ifndef IEM_WITH_SETJMP
96# error The setjmp approach must be enabled for the recompiler.
97#endif
98
99/** @todo eliminate this clang build hack. */
100#if RT_CLANG_PREREQ(4, 0)
101# pragma GCC diagnostic ignored "-Wunused-function"
102#endif
103
104
105/*********************************************************************************************************************************
106* Internal Functions *
107*********************************************************************************************************************************/
108#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
109static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
110#endif
111DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
112DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
113 IEMNATIVEGSTREG enmGstReg, uint32_t off);
114DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar);
115static const char *iemNativeGetLabelName(IEMNATIVELABELTYPE enmLabel, bool fCommonCode = false);
116
117
118
119/*********************************************************************************************************************************
120* Native Recompilation *
121*********************************************************************************************************************************/
122
123
124/**
125 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
126 */
127IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
128{
129 pVCpu->iem.s.cInstructions += idxInstr;
130 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
131}
132
133
134/**
135 * Helping iemNativeHlpReturnBreakViaLookup and iemNativeHlpReturnBreakViaLookupWithTlb.
136 */
137DECL_FORCE_INLINE(bool) iemNativeHlpReturnBreakViaLookupIsIrqOrForceFlagPending(PVMCPU pVCpu)
138{
139 uint64_t fCpu = pVCpu->fLocalForcedActions;
140 fCpu &= VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR3
141 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
142 | VMCPU_FF_TLB_FLUSH
143 | VMCPU_FF_UNHALT );
144 /** @todo this isn't even close to the NMI/IRQ conditions in EM. */
145 if (RT_LIKELY( ( !fCpu
146 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
147 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
148 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx) )) )
149 && !VM_FF_IS_ANY_SET(pVCpu->CTX_SUFF(pVM), VM_FF_ALL_MASK) ))
150 return false;
151 return true;
152}
153
154
155/**
156 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
157 */
158template <bool const a_fWithIrqCheck>
159IEM_DECL_NATIVE_HLP_DEF(uintptr_t, iemNativeHlpReturnBreakViaLookup,(PVMCPUCC pVCpu, uint8_t idxTbLookup,
160 uint32_t fFlags, RTGCPHYS GCPhysPc))
161{
162 PIEMTB const pTb = pVCpu->iem.s.pCurTbR3;
163 Assert(idxTbLookup < pTb->cTbLookupEntries);
164 PIEMTB * const ppNewTb = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, idxTbLookup);
165#if 1
166 PIEMTB const pNewTb = *ppNewTb;
167 if (pNewTb)
168 {
169# ifdef VBOX_STRICT
170 uint64_t const uFlatPcAssert = pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base;
171 AssertMsg( (uFlatPcAssert & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == pVCpu->iem.s.uInstrBufPc
172 && (GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == pVCpu->iem.s.GCPhysInstrBuf
173 && (GCPhysPc & GUEST_PAGE_OFFSET_MASK) == (uFlatPcAssert & GUEST_PAGE_OFFSET_MASK),
174 ("GCPhysPc=%RGp uFlatPcAssert=%#RX64 uInstrBufPc=%#RX64 GCPhysInstrBuf=%RGp\n",
175 GCPhysPc, uFlatPcAssert, pVCpu->iem.s.uInstrBufPc, pVCpu->iem.s.GCPhysInstrBuf));
176# endif
177 if (pNewTb->GCPhysPc == GCPhysPc)
178 {
179# ifdef VBOX_STRICT
180 uint32_t fAssertFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK) | IEMTB_F_TYPE_NATIVE;
181 if (pVCpu->cpum.GstCtx.rflags.uBoth & CPUMCTX_INHIBIT_SHADOW)
182 fAssertFlags |= IEMTB_F_INHIBIT_SHADOW;
183 if (pVCpu->cpum.GstCtx.rflags.uBoth & CPUMCTX_INHIBIT_NMI)
184 fAssertFlags |= IEMTB_F_INHIBIT_NMI;
185# if 1 /** @todo breaks on IP/EIP/RIP wraparound tests in bs3-cpu-weird-1. */
186 Assert(IEM_F_MODE_X86_IS_FLAT(fFlags));
187# else
188 if (!IEM_F_MODE_X86_IS_FLAT(fFlags))
189 {
190 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
191 if (offFromLim < X86_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
192 fAssertFlags |= IEMTB_F_CS_LIM_CHECKS;
193 }
194# endif
195 Assert(!(fFlags & ~(IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)));
196 AssertMsg(fFlags == fAssertFlags, ("fFlags=%#RX32 fAssertFlags=%#RX32 cs:rip=%04x:%#010RX64\n",
197 fFlags, fAssertFlags, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
198#endif
199
200 /*
201 * Check them + type.
202 */
203 if ((pNewTb->fFlags & (IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)) == fFlags)
204 {
205 /*
206 * Check for interrupts and stuff.
207 */
208 /** @todo We duplicate code here that's also in iemNativeHlpReturnBreakViaLookupWithTlb.
209 * The main problem are the statistics and to some degree the logging. :/ */
210 if (!a_fWithIrqCheck || !iemNativeHlpReturnBreakViaLookupIsIrqOrForceFlagPending(pVCpu) )
211 {
212 /* Do polling. */
213 if ( RT_LIKELY((int32_t)--pVCpu->iem.s.cTbsTillNextTimerPoll > 0)
214 || iemPollTimers(pVCpu->CTX_SUFF(pVM), pVCpu) == VINF_SUCCESS)
215 {
216 /*
217 * Success. Update statistics and switch to the next TB.
218 */
219 if (a_fWithIrqCheck)
220 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1Irq);
221 else
222 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1NoIrq);
223
224 pNewTb->cUsed += 1;
225 pNewTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
226 pVCpu->iem.s.pCurTbR3 = pNewTb;
227 pVCpu->iem.s.ppTbLookupEntryR3 = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pNewTb, 0);
228 pVCpu->iem.s.cTbExecNative += 1;
229 Log10(("iemNativeHlpReturnBreakViaLookupWithPc: match at %04x:%08RX64 (%RGp): pTb=%p[%#x]-> %p\n",
230 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, GCPhysPc, pTb, idxTbLookup, pNewTb));
231 return (uintptr_t)pNewTb->Native.paInstructions;
232 }
233 }
234 Log10(("iemNativeHlpReturnBreakViaLookupWithPc: IRQ or FF pending\n"));
235 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1PendingIrq);
236 }
237 else
238 {
239 Log10(("iemNativeHlpReturnBreakViaLookupWithPc: fFlags mismatch at %04x:%08RX64: %#x vs %#x (pTb=%p[%#x]-> %p)\n",
240 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, fFlags, pNewTb->fFlags, pTb, idxTbLookup, pNewTb));
241 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1MismatchFlags);
242 }
243 }
244 else
245 {
246 Log10(("iemNativeHlpReturnBreakViaLookupWithPc: GCPhysPc mismatch at %04x:%08RX64: %RGp vs %RGp (pTb=%p[%#x]-> %p)\n",
247 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, GCPhysPc, pNewTb->GCPhysPc, pTb, idxTbLookup, pNewTb));
248 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1MismatchGCPhysPc);
249 }
250 }
251 else
252 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1NoTb);
253#else
254 NOREF(GCPhysPc);
255#endif
256
257 pVCpu->iem.s.ppTbLookupEntryR3 = ppNewTb;
258 return 0;
259}
260
261
262/**
263 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
264 */
265template <bool const a_fWithIrqCheck>
266IEM_DECL_NATIVE_HLP_DEF(uintptr_t, iemNativeHlpReturnBreakViaLookupWithTlb,(PVMCPUCC pVCpu, uint8_t idxTbLookup))
267{
268 PIEMTB const pTb = pVCpu->iem.s.pCurTbR3;
269 Assert(idxTbLookup < pTb->cTbLookupEntries);
270 PIEMTB * const ppNewTb = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, idxTbLookup);
271#if 1
272 PIEMTB const pNewTb = *ppNewTb;
273 if (pNewTb)
274 {
275 /*
276 * Calculate the flags for the next TB and check if they match.
277 */
278 uint32_t fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK) | IEMTB_F_TYPE_NATIVE;
279 if (!(pVCpu->cpum.GstCtx.rflags.uBoth & (CPUMCTX_INHIBIT_SHADOW | CPUMCTX_INHIBIT_NMI)))
280 { /* likely */ }
281 else
282 {
283 if (pVCpu->cpum.GstCtx.rflags.uBoth & CPUMCTX_INHIBIT_SHADOW)
284 fFlags |= IEMTB_F_INHIBIT_SHADOW;
285 if (pVCpu->cpum.GstCtx.rflags.uBoth & CPUMCTX_INHIBIT_NMI)
286 fFlags |= IEMTB_F_INHIBIT_NMI;
287 }
288 if (!IEM_F_MODE_X86_IS_FLAT(fFlags))
289 {
290 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
291 if (offFromLim >= X86_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
292 { /* likely */ }
293 else
294 fFlags |= IEMTB_F_CS_LIM_CHECKS;
295 }
296 Assert(!(fFlags & ~(IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)));
297
298 if ((pNewTb->fFlags & (IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)) == fFlags)
299 {
300 /*
301 * Do the TLB lookup for flat RIP and compare the result with the next TB.
302 *
303 * Note! This replicates iemGetPcWithPhysAndCode and iemGetPcWithPhysAndCodeMissed.
304 */
305 /* Calc the effective PC. */
306 uint64_t uPc = pVCpu->cpum.GstCtx.rip;
307 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu));
308 uPc += pVCpu->cpum.GstCtx.cs.u64Base;
309
310 /* Advance within the current buffer (PAGE) when possible. */
311 RTGCPHYS GCPhysPc;
312 uint64_t off;
313 if ( pVCpu->iem.s.pbInstrBuf
314 && (off = uPc - pVCpu->iem.s.uInstrBufPc) < pVCpu->iem.s.cbInstrBufTotal) /*ugly*/
315 {
316 pVCpu->iem.s.offInstrNextByte = (uint32_t)off;
317 pVCpu->iem.s.offCurInstrStart = (uint16_t)off;
318 if ((uint16_t)off + 15 <= pVCpu->iem.s.cbInstrBufTotal)
319 pVCpu->iem.s.cbInstrBuf = (uint16_t)off + 15;
320 else
321 pVCpu->iem.s.cbInstrBuf = pVCpu->iem.s.cbInstrBufTotal;
322 GCPhysPc = pVCpu->iem.s.GCPhysInstrBuf + off;
323 }
324 else
325 {
326 pVCpu->iem.s.pbInstrBuf = NULL;
327 pVCpu->iem.s.offCurInstrStart = 0;
328 pVCpu->iem.s.offInstrNextByte = 0;
329 iemOpcodeFetchBytesJmp(pVCpu, 0, NULL);
330 GCPhysPc = pVCpu->iem.s.pbInstrBuf ? pVCpu->iem.s.GCPhysInstrBuf + pVCpu->iem.s.offCurInstrStart : NIL_RTGCPHYS;
331 }
332
333 if (pNewTb->GCPhysPc == GCPhysPc)
334 {
335 /*
336 * Check for interrupts and stuff.
337 */
338 /** @todo We duplicate code here that's also in iemNativeHlpReturnBreakViaLookupWithPc.
339 * The main problem are the statistics and to some degree the logging. :/ */
340 if (!a_fWithIrqCheck || !iemNativeHlpReturnBreakViaLookupIsIrqOrForceFlagPending(pVCpu) )
341 {
342 /* Do polling. */
343 if ( RT_LIKELY((int32_t)--pVCpu->iem.s.cTbsTillNextTimerPoll > 0)
344 || iemPollTimers(pVCpu->CTX_SUFF(pVM), pVCpu) == VINF_SUCCESS)
345 {
346 /*
347 * Success. Update statistics and switch to the next TB.
348 */
349 if (a_fWithIrqCheck)
350 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2Irq);
351 else
352 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2NoIrq);
353
354 pNewTb->cUsed += 1;
355 pNewTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
356 pVCpu->iem.s.pCurTbR3 = pNewTb;
357 pVCpu->iem.s.ppTbLookupEntryR3 = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pNewTb, 0);
358 pVCpu->iem.s.cTbExecNative += 1;
359 Log10(("iemNativeHlpReturnBreakViaLookupWithTlb: match at %04x:%08RX64 (%RGp): pTb=%p[%#x]-> %p\n",
360 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, GCPhysPc, pTb, idxTbLookup, pNewTb));
361 return (uintptr_t)pNewTb->Native.paInstructions;
362 }
363 }
364 Log10(("iemNativeHlpReturnBreakViaLookupWithTlb: IRQ or FF pending\n"));
365 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2PendingIrq);
366 }
367 else
368 {
369 Log10(("iemNativeHlpReturnBreakViaLookupWithTlb: GCPhysPc mismatch at %04x:%08RX64: %RGp vs %RGp (pTb=%p[%#x]-> %p)\n",
370 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, GCPhysPc, pNewTb->GCPhysPc, pTb, idxTbLookup, pNewTb));
371 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2MismatchGCPhysPc);
372 }
373 }
374 else
375 {
376 Log10(("iemNativeHlpReturnBreakViaLookupWithTlb: fFlags mismatch at %04x:%08RX64: %#x vs %#x (pTb=%p[%#x]-> %p)\n",
377 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, fFlags, pNewTb->fFlags, pTb, idxTbLookup, pNewTb));
378 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2MismatchFlags);
379 }
380 }
381 else
382 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2NoTb);
383#else
384 NOREF(fFlags);
385 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2NoTb); /* just for some stats, even if misleading */
386#endif
387
388 pVCpu->iem.s.ppTbLookupEntryR3 = ppNewTb;
389 return 0;
390}
391
392
393/**
394 * Used by TB code when it wants to raise a \#DE.
395 */
396IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseDe,(PVMCPUCC pVCpu))
397{
398 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseDe);
399 iemRaiseDivideErrorJmp(pVCpu);
400#ifndef _MSC_VER
401 return VINF_IEM_RAISED_XCPT; /* not reached */
402#endif
403}
404
405
406/**
407 * Used by TB code when it wants to raise a \#UD.
408 */
409IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseUd,(PVMCPUCC pVCpu))
410{
411 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseUd);
412 iemRaiseUndefinedOpcodeJmp(pVCpu);
413#ifndef _MSC_VER
414 return VINF_IEM_RAISED_XCPT; /* not reached */
415#endif
416}
417
418
419/**
420 * Used by TB code when it wants to raise an SSE related \#UD or \#NM.
421 *
422 * See IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT.
423 */
424IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseSseRelated,(PVMCPUCC pVCpu))
425{
426 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseSseRelated);
427 if ( (pVCpu->cpum.GstCtx.cr0 & X86_CR0_EM)
428 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSFXSR))
429 iemRaiseUndefinedOpcodeJmp(pVCpu);
430 else
431 iemRaiseDeviceNotAvailableJmp(pVCpu);
432#ifndef _MSC_VER
433 return VINF_IEM_RAISED_XCPT; /* not reached */
434#endif
435}
436
437
438/**
439 * Used by TB code when it wants to raise an AVX related \#UD or \#NM.
440 *
441 * See IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT.
442 */
443IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseAvxRelated,(PVMCPUCC pVCpu))
444{
445 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseAvxRelated);
446 if ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE)) != (XSAVE_C_YMM | XSAVE_C_SSE)
447 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE))
448 iemRaiseUndefinedOpcodeJmp(pVCpu);
449 else
450 iemRaiseDeviceNotAvailableJmp(pVCpu);
451#ifndef _MSC_VER
452 return VINF_IEM_RAISED_XCPT; /* not reached */
453#endif
454}
455
456
457/**
458 * Used by TB code when it wants to raise an SSE/AVX floating point exception related \#UD or \#XF.
459 *
460 * See IEM_MC_CALL_AVX_XXX/IEM_MC_CALL_SSE_XXX.
461 */
462IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseSseAvxFpRelated,(PVMCPUCC pVCpu))
463{
464 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseSseAvxFpRelated);
465 if (pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXMMEEXCPT)
466 iemRaiseSimdFpExceptionJmp(pVCpu);
467 else
468 iemRaiseUndefinedOpcodeJmp(pVCpu);
469#ifndef _MSC_VER
470 return VINF_IEM_RAISED_XCPT; /* not reached */
471#endif
472}
473
474
475/**
476 * Used by TB code when it wants to raise a \#NM.
477 */
478IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseNm,(PVMCPUCC pVCpu))
479{
480 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseNm);
481 iemRaiseDeviceNotAvailableJmp(pVCpu);
482#ifndef _MSC_VER
483 return VINF_IEM_RAISED_XCPT; /* not reached */
484#endif
485}
486
487
488/**
489 * Used by TB code when it wants to raise a \#GP(0).
490 */
491IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu))
492{
493 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseGp0);
494 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
495#ifndef _MSC_VER
496 return VINF_IEM_RAISED_XCPT; /* not reached */
497#endif
498}
499
500
501/**
502 * Used by TB code when it wants to raise a \#MF.
503 */
504IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseMf,(PVMCPUCC pVCpu))
505{
506 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseMf);
507 iemRaiseMathFaultJmp(pVCpu);
508#ifndef _MSC_VER
509 return VINF_IEM_RAISED_XCPT; /* not reached */
510#endif
511}
512
513
514/**
515 * Used by TB code when it wants to raise a \#XF.
516 */
517IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseXf,(PVMCPUCC pVCpu))
518{
519 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseXf);
520 iemRaiseSimdFpExceptionJmp(pVCpu);
521#ifndef _MSC_VER
522 return VINF_IEM_RAISED_XCPT; /* not reached */
523#endif
524}
525
526
527/**
528 * Used by TB code when detecting opcode changes.
529 * @see iemThreadeFuncWorkerObsoleteTb
530 */
531IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpObsoleteTb,(PVMCPUCC pVCpu))
532{
533 /* We set fSafeToFree to false where as we're being called in the context
534 of a TB callback function, which for native TBs means we cannot release
535 the executable memory till we've returned our way back to iemTbExec as
536 that return path codes via the native code generated for the TB. */
537 Log7(("TB obsolete: %p at %04x:%08RX64\n", pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
538 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitObsoleteTb);
539 iemThreadedTbObsolete(pVCpu, pVCpu->iem.s.pCurTbR3, false /*fSafeToFree*/);
540 return VINF_IEM_REEXEC_BREAK;
541}
542
543
544/**
545 * Used by TB code when we need to switch to a TB with CS.LIM checking.
546 */
547IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpNeedCsLimChecking,(PVMCPUCC pVCpu))
548{
549 Log7(("TB need CS.LIM: %p at %04x:%08RX64; offFromLim=%#RX64 CS.LIM=%#RX32 CS.BASE=%#RX64\n",
550 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
551 (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.rip,
552 pVCpu->cpum.GstCtx.cs.u32Limit, pVCpu->cpum.GstCtx.cs.u64Base));
553 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckNeedCsLimChecking);
554 return VINF_IEM_REEXEC_BREAK;
555}
556
557
558/**
559 * Used by TB code when we missed a PC check after a branch.
560 */
561IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpCheckBranchMiss,(PVMCPUCC pVCpu))
562{
563 Log7(("TB jmp miss: %p at %04x:%08RX64; GCPhysWithOffset=%RGp, pbInstrBuf=%p\n",
564 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
565 pVCpu->iem.s.GCPhysInstrBuf + pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base - pVCpu->iem.s.uInstrBufPc,
566 pVCpu->iem.s.pbInstrBuf));
567 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses);
568 return VINF_IEM_REEXEC_BREAK;
569}
570
571
572
573/*********************************************************************************************************************************
574* Helpers: Segmented memory fetches and stores. *
575*********************************************************************************************************************************/
576
577/**
578 * Used by TB code to load unsigned 8-bit data w/ segmentation.
579 */
580IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
581{
582#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
583 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
584#else
585 return (uint64_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
586#endif
587}
588
589
590/**
591 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
592 * to 16 bits.
593 */
594IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
595{
596#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
597 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
598#else
599 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
600#endif
601}
602
603
604/**
605 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
606 * to 32 bits.
607 */
608IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
609{
610#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
611 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
612#else
613 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
614#endif
615}
616
617/**
618 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
619 * to 64 bits.
620 */
621IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
622{
623#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
624 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
625#else
626 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
627#endif
628}
629
630
631/**
632 * Used by TB code to load unsigned 16-bit data w/ segmentation.
633 */
634IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
635{
636#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
637 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
638#else
639 return (uint64_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
640#endif
641}
642
643
644/**
645 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
646 * to 32 bits.
647 */
648IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
649{
650#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
651 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
652#else
653 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
654#endif
655}
656
657
658/**
659 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
660 * to 64 bits.
661 */
662IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
663{
664#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
665 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
666#else
667 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
668#endif
669}
670
671
672/**
673 * Used by TB code to load unsigned 32-bit data w/ segmentation.
674 */
675IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
676{
677#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
678 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
679#else
680 return (uint64_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
681#endif
682}
683
684
685/**
686 * Used by TB code to load signed 32-bit data w/ segmentation, sign extending it
687 * to 64 bits.
688 */
689IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
690{
691#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
692 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
693#else
694 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
695#endif
696}
697
698
699/**
700 * Used by TB code to load unsigned 64-bit data w/ segmentation.
701 */
702IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
703{
704#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
705 return iemMemFetchDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem);
706#else
707 return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem);
708#endif
709}
710
711
712#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
713/**
714 * Used by TB code to load 128-bit data w/ segmentation.
715 */
716IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
717{
718#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
719 iemMemFetchDataU128SafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
720#else
721 iemMemFetchDataU128Jmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
722#endif
723}
724
725
726/**
727 * Used by TB code to load 128-bit data w/ segmentation.
728 */
729IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
730{
731#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
732 iemMemFetchDataU128AlignedSseSafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
733#else
734 iemMemFetchDataU128AlignedSseJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
735#endif
736}
737
738
739/**
740 * Used by TB code to load 128-bit data w/ segmentation.
741 */
742IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
743{
744#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
745 iemMemFetchDataU128NoAcSafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
746#else
747 iemMemFetchDataU128NoAcJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
748#endif
749}
750
751
752/**
753 * Used by TB code to load 256-bit data w/ segmentation.
754 */
755IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT256U pu256Dst))
756{
757#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
758 iemMemFetchDataU256NoAcSafeJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
759#else
760 iemMemFetchDataU256NoAcJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
761#endif
762}
763
764
765/**
766 * Used by TB code to load 256-bit data w/ segmentation.
767 */
768IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT256U pu256Dst))
769{
770#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
771 iemMemFetchDataU256AlignedAvxSafeJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
772#else
773 iemMemFetchDataU256AlignedAvxJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
774#endif
775}
776#endif
777
778
779/**
780 * Used by TB code to store unsigned 8-bit data w/ segmentation.
781 */
782IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))
783{
784#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
785 iemMemStoreDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem, u8Value);
786#else
787 iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value);
788#endif
789}
790
791
792/**
793 * Used by TB code to store unsigned 16-bit data w/ segmentation.
794 */
795IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))
796{
797#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
798 iemMemStoreDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem, u16Value);
799#else
800 iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value);
801#endif
802}
803
804
805/**
806 * Used by TB code to store unsigned 32-bit data w/ segmentation.
807 */
808IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))
809{
810#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
811 iemMemStoreDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem, u32Value);
812#else
813 iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value);
814#endif
815}
816
817
818/**
819 * Used by TB code to store unsigned 64-bit data w/ segmentation.
820 */
821IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))
822{
823#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
824 iemMemStoreDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem, u64Value);
825#else
826 iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value);
827#endif
828}
829
830
831#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
832/**
833 * Used by TB code to store unsigned 128-bit data w/ segmentation.
834 */
835IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT128U pu128Src))
836{
837#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
838 iemMemStoreDataU128AlignedSseSafeJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
839#else
840 iemMemStoreDataU128AlignedSseJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
841#endif
842}
843
844
845/**
846 * Used by TB code to store unsigned 128-bit data w/ segmentation.
847 */
848IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT128U pu128Src))
849{
850#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
851 iemMemStoreDataU128NoAcSafeJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
852#else
853 iemMemStoreDataU128NoAcJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
854#endif
855}
856
857
858/**
859 * Used by TB code to store unsigned 256-bit data w/ segmentation.
860 */
861IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT256U pu256Src))
862{
863#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
864 iemMemStoreDataU256NoAcSafeJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
865#else
866 iemMemStoreDataU256NoAcJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
867#endif
868}
869
870
871/**
872 * Used by TB code to store unsigned 256-bit data w/ segmentation.
873 */
874IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT256U pu256Src))
875{
876#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
877 iemMemStoreDataU256AlignedAvxSafeJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
878#else
879 iemMemStoreDataU256AlignedAvxJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
880#endif
881}
882#endif
883
884
885
886/**
887 * Used by TB code to store an unsigned 16-bit value onto a generic stack.
888 */
889IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
890{
891#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
892 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
893#else
894 iemMemStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
895#endif
896}
897
898
899/**
900 * Used by TB code to store an unsigned 32-bit value onto a generic stack.
901 */
902IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
903{
904#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
905 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
906#else
907 iemMemStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
908#endif
909}
910
911
912/**
913 * Used by TB code to store an 32-bit selector value onto a generic stack.
914 *
915 * Intel CPUs doesn't do write a whole dword, thus the special function.
916 */
917IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
918{
919#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
920 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
921#else
922 iemMemStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
923#endif
924}
925
926
927/**
928 * Used by TB code to push unsigned 64-bit value onto a generic stack.
929 */
930IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
931{
932#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
933 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
934#else
935 iemMemStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
936#endif
937}
938
939
940/**
941 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
942 */
943IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
944{
945#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
946 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
947#else
948 return iemMemFetchStackU16Jmp(pVCpu, GCPtrMem);
949#endif
950}
951
952
953/**
954 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
955 */
956IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
957{
958#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
959 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
960#else
961 return iemMemFetchStackU32Jmp(pVCpu, GCPtrMem);
962#endif
963}
964
965
966/**
967 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
968 */
969IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
970{
971#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
972 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
973#else
974 return iemMemFetchStackU64Jmp(pVCpu, GCPtrMem);
975#endif
976}
977
978
979
980/*********************************************************************************************************************************
981* Helpers: Flat memory fetches and stores. *
982*********************************************************************************************************************************/
983
984/**
985 * Used by TB code to load unsigned 8-bit data w/ flat address.
986 * @note Zero extending the value to 64-bit to simplify assembly.
987 */
988IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
989{
990#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
991 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
992#else
993 return (uint64_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
994#endif
995}
996
997
998/**
999 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1000 * to 16 bits.
1001 * @note Zero extending the value to 64-bit to simplify assembly.
1002 */
1003IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1004{
1005#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1006 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1007#else
1008 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1009#endif
1010}
1011
1012
1013/**
1014 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1015 * to 32 bits.
1016 * @note Zero extending the value to 64-bit to simplify assembly.
1017 */
1018IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1019{
1020#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1021 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1022#else
1023 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1024#endif
1025}
1026
1027
1028/**
1029 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1030 * to 64 bits.
1031 */
1032IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1033{
1034#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1035 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1036#else
1037 return (uint64_t)(int64_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1038#endif
1039}
1040
1041
1042/**
1043 * Used by TB code to load unsigned 16-bit data w/ flat address.
1044 * @note Zero extending the value to 64-bit to simplify assembly.
1045 */
1046IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1047{
1048#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1049 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1050#else
1051 return (uint64_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
1052#endif
1053}
1054
1055
1056/**
1057 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
1058 * to 32 bits.
1059 * @note Zero extending the value to 64-bit to simplify assembly.
1060 */
1061IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1062{
1063#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1064 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1065#else
1066 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
1067#endif
1068}
1069
1070
1071/**
1072 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
1073 * to 64 bits.
1074 * @note Zero extending the value to 64-bit to simplify assembly.
1075 */
1076IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1077{
1078#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1079 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1080#else
1081 return (uint64_t)(int64_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
1082#endif
1083}
1084
1085
1086/**
1087 * Used by TB code to load unsigned 32-bit data w/ flat address.
1088 * @note Zero extending the value to 64-bit to simplify assembly.
1089 */
1090IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1091{
1092#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1093 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1094#else
1095 return (uint64_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
1096#endif
1097}
1098
1099
1100/**
1101 * Used by TB code to load signed 32-bit data w/ flat address, sign extending it
1102 * to 64 bits.
1103 * @note Zero extending the value to 64-bit to simplify assembly.
1104 */
1105IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1106{
1107#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1108 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1109#else
1110 return (uint64_t)(int64_t)(int32_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
1111#endif
1112}
1113
1114
1115/**
1116 * Used by TB code to load unsigned 64-bit data w/ flat address.
1117 */
1118IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1119{
1120#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1121 return iemMemFetchDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1122#else
1123 return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem);
1124#endif
1125}
1126
1127
1128#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1129/**
1130 * Used by TB code to load unsigned 128-bit data w/ flat address.
1131 */
1132IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
1133{
1134#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1135 return iemMemFetchDataU128SafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
1136#else
1137 return iemMemFlatFetchDataU128Jmp(pVCpu, pu128Dst, GCPtrMem);
1138#endif
1139}
1140
1141
1142/**
1143 * Used by TB code to load unsigned 128-bit data w/ flat address.
1144 */
1145IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
1146{
1147#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1148 return iemMemFetchDataU128AlignedSseSafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
1149#else
1150 return iemMemFlatFetchDataU128AlignedSseJmp(pVCpu, pu128Dst, GCPtrMem);
1151#endif
1152}
1153
1154
1155/**
1156 * Used by TB code to load unsigned 128-bit data w/ flat address.
1157 */
1158IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
1159{
1160#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1161 return iemMemFetchDataU128NoAcSafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
1162#else
1163 return iemMemFlatFetchDataU128NoAcJmp(pVCpu, pu128Dst, GCPtrMem);
1164#endif
1165}
1166
1167
1168/**
1169 * Used by TB code to load unsigned 256-bit data w/ flat address.
1170 */
1171IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT256U pu256Dst))
1172{
1173#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1174 return iemMemFetchDataU256NoAcSafeJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
1175#else
1176 return iemMemFlatFetchDataU256NoAcJmp(pVCpu, pu256Dst, GCPtrMem);
1177#endif
1178}
1179
1180
1181/**
1182 * Used by TB code to load unsigned 256-bit data w/ flat address.
1183 */
1184IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT256U pu256Dst))
1185{
1186#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1187 return iemMemFetchDataU256AlignedAvxSafeJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
1188#else
1189 return iemMemFlatFetchDataU256AlignedAvxJmp(pVCpu, pu256Dst, GCPtrMem);
1190#endif
1191}
1192#endif
1193
1194
1195/**
1196 * Used by TB code to store unsigned 8-bit data w/ flat address.
1197 */
1198IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))
1199{
1200#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1201 iemMemStoreDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u8Value);
1202#else
1203 iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value);
1204#endif
1205}
1206
1207
1208/**
1209 * Used by TB code to store unsigned 16-bit data w/ flat address.
1210 */
1211IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1212{
1213#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1214 iemMemStoreDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u16Value);
1215#else
1216 iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value);
1217#endif
1218}
1219
1220
1221/**
1222 * Used by TB code to store unsigned 32-bit data w/ flat address.
1223 */
1224IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1225{
1226#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1227 iemMemStoreDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u32Value);
1228#else
1229 iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value);
1230#endif
1231}
1232
1233
1234/**
1235 * Used by TB code to store unsigned 64-bit data w/ flat address.
1236 */
1237IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
1238{
1239#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1240 iemMemStoreDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u64Value);
1241#else
1242 iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value);
1243#endif
1244}
1245
1246
1247#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1248/**
1249 * Used by TB code to store unsigned 128-bit data w/ flat address.
1250 */
1251IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT128U pu128Src))
1252{
1253#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1254 iemMemStoreDataU128AlignedSseSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu128Src);
1255#else
1256 iemMemFlatStoreDataU128AlignedSseJmp(pVCpu, GCPtrMem, pu128Src);
1257#endif
1258}
1259
1260
1261/**
1262 * Used by TB code to store unsigned 128-bit data w/ flat address.
1263 */
1264IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT128U pu128Src))
1265{
1266#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1267 iemMemStoreDataU128NoAcSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu128Src);
1268#else
1269 iemMemFlatStoreDataU128NoAcJmp(pVCpu, GCPtrMem, pu128Src);
1270#endif
1271}
1272
1273
1274/**
1275 * Used by TB code to store unsigned 256-bit data w/ flat address.
1276 */
1277IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT256U pu256Src))
1278{
1279#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1280 iemMemStoreDataU256NoAcSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu256Src);
1281#else
1282 iemMemFlatStoreDataU256NoAcJmp(pVCpu, GCPtrMem, pu256Src);
1283#endif
1284}
1285
1286
1287/**
1288 * Used by TB code to store unsigned 256-bit data w/ flat address.
1289 */
1290IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT256U pu256Src))
1291{
1292#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1293 iemMemStoreDataU256AlignedAvxSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu256Src);
1294#else
1295 iemMemFlatStoreDataU256AlignedAvxJmp(pVCpu, GCPtrMem, pu256Src);
1296#endif
1297}
1298#endif
1299
1300
1301
1302/**
1303 * Used by TB code to store an unsigned 16-bit value onto a flat stack.
1304 */
1305IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1306{
1307#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1308 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
1309#else
1310 iemMemFlatStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
1311#endif
1312}
1313
1314
1315/**
1316 * Used by TB code to store an unsigned 32-bit value onto a flat stack.
1317 */
1318IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1319{
1320#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1321 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
1322#else
1323 iemMemFlatStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
1324#endif
1325}
1326
1327
1328/**
1329 * Used by TB code to store a segment selector value onto a flat stack.
1330 *
1331 * Intel CPUs doesn't do write a whole dword, thus the special function.
1332 */
1333IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1334{
1335#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1336 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
1337#else
1338 iemMemFlatStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
1339#endif
1340}
1341
1342
1343/**
1344 * Used by TB code to store an unsigned 64-bit value onto a flat stack.
1345 */
1346IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
1347{
1348#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1349 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
1350#else
1351 iemMemFlatStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
1352#endif
1353}
1354
1355
1356/**
1357 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
1358 */
1359IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFlatFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1360{
1361#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1362 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
1363#else
1364 return iemMemFlatFetchStackU16Jmp(pVCpu, GCPtrMem);
1365#endif
1366}
1367
1368
1369/**
1370 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
1371 */
1372IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFlatFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1373{
1374#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1375 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
1376#else
1377 return iemMemFlatFetchStackU32Jmp(pVCpu, GCPtrMem);
1378#endif
1379}
1380
1381
1382/**
1383 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
1384 */
1385IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFlatFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1386{
1387#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1388 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
1389#else
1390 return iemMemFlatFetchStackU64Jmp(pVCpu, GCPtrMem);
1391#endif
1392}
1393
1394
1395
1396/*********************************************************************************************************************************
1397* Helpers: Segmented memory mapping. *
1398*********************************************************************************************************************************/
1399
1400/**
1401 * Used by TB code to map unsigned 8-bit data for atomic read-write w/
1402 * segmentation.
1403 */
1404IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1405 RTGCPTR GCPtrMem, uint8_t iSegReg))
1406{
1407#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1408 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1409#else
1410 return iemMemMapDataU8AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1411#endif
1412}
1413
1414
1415/**
1416 * Used by TB code to map unsigned 8-bit data read-write w/ segmentation.
1417 */
1418IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1419 RTGCPTR GCPtrMem, uint8_t iSegReg))
1420{
1421#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1422 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1423#else
1424 return iemMemMapDataU8RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1425#endif
1426}
1427
1428
1429/**
1430 * Used by TB code to map unsigned 8-bit data writeonly w/ segmentation.
1431 */
1432IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1433 RTGCPTR GCPtrMem, uint8_t iSegReg))
1434{
1435#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1436 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1437#else
1438 return iemMemMapDataU8WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1439#endif
1440}
1441
1442
1443/**
1444 * Used by TB code to map unsigned 8-bit data readonly w/ segmentation.
1445 */
1446IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1447 RTGCPTR GCPtrMem, uint8_t iSegReg))
1448{
1449#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1450 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1451#else
1452 return iemMemMapDataU8RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1453#endif
1454}
1455
1456
1457/**
1458 * Used by TB code to map unsigned 16-bit data for atomic read-write w/
1459 * segmentation.
1460 */
1461IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1462 RTGCPTR GCPtrMem, uint8_t iSegReg))
1463{
1464#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1465 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1466#else
1467 return iemMemMapDataU16AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1468#endif
1469}
1470
1471
1472/**
1473 * Used by TB code to map unsigned 16-bit data read-write w/ segmentation.
1474 */
1475IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1476 RTGCPTR GCPtrMem, uint8_t iSegReg))
1477{
1478#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1479 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1480#else
1481 return iemMemMapDataU16RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1482#endif
1483}
1484
1485
1486/**
1487 * Used by TB code to map unsigned 16-bit data writeonly w/ segmentation.
1488 */
1489IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1490 RTGCPTR GCPtrMem, uint8_t iSegReg))
1491{
1492#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1493 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1494#else
1495 return iemMemMapDataU16WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1496#endif
1497}
1498
1499
1500/**
1501 * Used by TB code to map unsigned 16-bit data readonly w/ segmentation.
1502 */
1503IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1504 RTGCPTR GCPtrMem, uint8_t iSegReg))
1505{
1506#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1507 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1508#else
1509 return iemMemMapDataU16RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1510#endif
1511}
1512
1513
1514/**
1515 * Used by TB code to map unsigned 32-bit data for atomic read-write w/
1516 * segmentation.
1517 */
1518IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1519 RTGCPTR GCPtrMem, uint8_t iSegReg))
1520{
1521#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1522 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1523#else
1524 return iemMemMapDataU32AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1525#endif
1526}
1527
1528
1529/**
1530 * Used by TB code to map unsigned 32-bit data read-write w/ segmentation.
1531 */
1532IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1533 RTGCPTR GCPtrMem, uint8_t iSegReg))
1534{
1535#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1536 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1537#else
1538 return iemMemMapDataU32RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1539#endif
1540}
1541
1542
1543/**
1544 * Used by TB code to map unsigned 32-bit data writeonly w/ segmentation.
1545 */
1546IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1547 RTGCPTR GCPtrMem, uint8_t iSegReg))
1548{
1549#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1550 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1551#else
1552 return iemMemMapDataU32WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1553#endif
1554}
1555
1556
1557/**
1558 * Used by TB code to map unsigned 32-bit data readonly w/ segmentation.
1559 */
1560IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1561 RTGCPTR GCPtrMem, uint8_t iSegReg))
1562{
1563#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1564 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1565#else
1566 return iemMemMapDataU32RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1567#endif
1568}
1569
1570
1571/**
1572 * Used by TB code to map unsigned 64-bit data for atomic read-write w/
1573 * segmentation.
1574 */
1575IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1576 RTGCPTR GCPtrMem, uint8_t iSegReg))
1577{
1578#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1579 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1580#else
1581 return iemMemMapDataU64AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1582#endif
1583}
1584
1585
1586/**
1587 * Used by TB code to map unsigned 64-bit data read-write w/ segmentation.
1588 */
1589IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1590 RTGCPTR GCPtrMem, uint8_t iSegReg))
1591{
1592#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1593 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1594#else
1595 return iemMemMapDataU64RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1596#endif
1597}
1598
1599
1600/**
1601 * Used by TB code to map unsigned 64-bit data writeonly w/ segmentation.
1602 */
1603IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1604 RTGCPTR GCPtrMem, uint8_t iSegReg))
1605{
1606#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1607 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1608#else
1609 return iemMemMapDataU64WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1610#endif
1611}
1612
1613
1614/**
1615 * Used by TB code to map unsigned 64-bit data readonly w/ segmentation.
1616 */
1617IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1618 RTGCPTR GCPtrMem, uint8_t iSegReg))
1619{
1620#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1621 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1622#else
1623 return iemMemMapDataU64RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1624#endif
1625}
1626
1627
1628/**
1629 * Used by TB code to map 80-bit float data writeonly w/ segmentation.
1630 */
1631IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1632 RTGCPTR GCPtrMem, uint8_t iSegReg))
1633{
1634#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1635 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1636#else
1637 return iemMemMapDataR80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1638#endif
1639}
1640
1641
1642/**
1643 * Used by TB code to map 80-bit BCD data writeonly w/ segmentation.
1644 */
1645IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1646 RTGCPTR GCPtrMem, uint8_t iSegReg))
1647{
1648#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1649 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1650#else
1651 return iemMemMapDataD80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1652#endif
1653}
1654
1655
1656/**
1657 * Used by TB code to map unsigned 128-bit data for atomic read-write w/
1658 * segmentation.
1659 */
1660IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1661 RTGCPTR GCPtrMem, uint8_t iSegReg))
1662{
1663#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1664 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1665#else
1666 return iemMemMapDataU128AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1667#endif
1668}
1669
1670
1671/**
1672 * Used by TB code to map unsigned 128-bit data read-write w/ segmentation.
1673 */
1674IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1675 RTGCPTR GCPtrMem, uint8_t iSegReg))
1676{
1677#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1678 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1679#else
1680 return iemMemMapDataU128RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1681#endif
1682}
1683
1684
1685/**
1686 * Used by TB code to map unsigned 128-bit data writeonly w/ segmentation.
1687 */
1688IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1689 RTGCPTR GCPtrMem, uint8_t iSegReg))
1690{
1691#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1692 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1693#else
1694 return iemMemMapDataU128WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1695#endif
1696}
1697
1698
1699/**
1700 * Used by TB code to map unsigned 128-bit data readonly w/ segmentation.
1701 */
1702IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1703 RTGCPTR GCPtrMem, uint8_t iSegReg))
1704{
1705#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1706 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1707#else
1708 return iemMemMapDataU128RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1709#endif
1710}
1711
1712
1713/*********************************************************************************************************************************
1714* Helpers: Flat memory mapping. *
1715*********************************************************************************************************************************/
1716
1717/**
1718 * Used by TB code to map unsigned 8-bit data for atomic read-write w/ flat
1719 * address.
1720 */
1721IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1722{
1723#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1724 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1725#else
1726 return iemMemFlatMapDataU8AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1727#endif
1728}
1729
1730
1731/**
1732 * Used by TB code to map unsigned 8-bit data read-write w/ flat address.
1733 */
1734IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1735{
1736#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1737 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1738#else
1739 return iemMemFlatMapDataU8RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1740#endif
1741}
1742
1743
1744/**
1745 * Used by TB code to map unsigned 8-bit data writeonly w/ flat address.
1746 */
1747IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1748{
1749#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1750 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1751#else
1752 return iemMemFlatMapDataU8WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1753#endif
1754}
1755
1756
1757/**
1758 * Used by TB code to map unsigned 8-bit data readonly w/ flat address.
1759 */
1760IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1761{
1762#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1763 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1764#else
1765 return iemMemFlatMapDataU8RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1766#endif
1767}
1768
1769
1770/**
1771 * Used by TB code to map unsigned 16-bit data for atomic read-write w/ flat
1772 * address.
1773 */
1774IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1775{
1776#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1777 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1778#else
1779 return iemMemFlatMapDataU16AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1780#endif
1781}
1782
1783
1784/**
1785 * Used by TB code to map unsigned 16-bit data read-write w/ flat address.
1786 */
1787IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1788{
1789#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1790 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1791#else
1792 return iemMemFlatMapDataU16RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1793#endif
1794}
1795
1796
1797/**
1798 * Used by TB code to map unsigned 16-bit data writeonly w/ flat address.
1799 */
1800IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1801{
1802#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1803 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1804#else
1805 return iemMemFlatMapDataU16WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1806#endif
1807}
1808
1809
1810/**
1811 * Used by TB code to map unsigned 16-bit data readonly w/ flat address.
1812 */
1813IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1814{
1815#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1816 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1817#else
1818 return iemMemFlatMapDataU16RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1819#endif
1820}
1821
1822
1823/**
1824 * Used by TB code to map unsigned 32-bit data for atomic read-write w/ flat
1825 * address.
1826 */
1827IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1828{
1829#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1830 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1831#else
1832 return iemMemFlatMapDataU32AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1833#endif
1834}
1835
1836
1837/**
1838 * Used by TB code to map unsigned 32-bit data read-write w/ flat address.
1839 */
1840IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1841{
1842#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1843 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1844#else
1845 return iemMemFlatMapDataU32RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1846#endif
1847}
1848
1849
1850/**
1851 * Used by TB code to map unsigned 32-bit data writeonly w/ flat address.
1852 */
1853IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1854{
1855#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1856 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1857#else
1858 return iemMemFlatMapDataU32WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1859#endif
1860}
1861
1862
1863/**
1864 * Used by TB code to map unsigned 32-bit data readonly w/ flat address.
1865 */
1866IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1867{
1868#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1869 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1870#else
1871 return iemMemFlatMapDataU32RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1872#endif
1873}
1874
1875
1876/**
1877 * Used by TB code to map unsigned 64-bit data for atomic read-write w/ flat
1878 * address.
1879 */
1880IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1881{
1882#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1883 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1884#else
1885 return iemMemFlatMapDataU64AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1886#endif
1887}
1888
1889
1890/**
1891 * Used by TB code to map unsigned 64-bit data read-write w/ flat address.
1892 */
1893IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1894{
1895#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1896 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1897#else
1898 return iemMemFlatMapDataU64RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1899#endif
1900}
1901
1902
1903/**
1904 * Used by TB code to map unsigned 64-bit data writeonly w/ flat address.
1905 */
1906IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1907{
1908#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1909 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1910#else
1911 return iemMemFlatMapDataU64WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1912#endif
1913}
1914
1915
1916/**
1917 * Used by TB code to map unsigned 64-bit data readonly w/ flat address.
1918 */
1919IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1920{
1921#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1922 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1923#else
1924 return iemMemFlatMapDataU64RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1925#endif
1926}
1927
1928
1929/**
1930 * Used by TB code to map 80-bit float data writeonly w/ flat address.
1931 */
1932IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1933{
1934#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1935 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1936#else
1937 return iemMemFlatMapDataR80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1938#endif
1939}
1940
1941
1942/**
1943 * Used by TB code to map 80-bit BCD data writeonly w/ flat address.
1944 */
1945IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1946{
1947#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1948 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1949#else
1950 return iemMemFlatMapDataD80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1951#endif
1952}
1953
1954
1955/**
1956 * Used by TB code to map unsigned 128-bit data for atomic read-write w/ flat
1957 * address.
1958 */
1959IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1960{
1961#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1962 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1963#else
1964 return iemMemFlatMapDataU128AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1965#endif
1966}
1967
1968
1969/**
1970 * Used by TB code to map unsigned 128-bit data read-write w/ flat address.
1971 */
1972IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1973{
1974#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1975 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1976#else
1977 return iemMemFlatMapDataU128RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1978#endif
1979}
1980
1981
1982/**
1983 * Used by TB code to map unsigned 128-bit data writeonly w/ flat address.
1984 */
1985IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1986{
1987#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1988 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1989#else
1990 return iemMemFlatMapDataU128WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1991#endif
1992}
1993
1994
1995/**
1996 * Used by TB code to map unsigned 128-bit data readonly w/ flat address.
1997 */
1998IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1999{
2000#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2001 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2002#else
2003 return iemMemFlatMapDataU128RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2004#endif
2005}
2006
2007
2008/*********************************************************************************************************************************
2009* Helpers: Commit, rollback & unmap *
2010*********************************************************************************************************************************/
2011
2012/**
2013 * Used by TB code to commit and unmap a read-write memory mapping.
2014 */
2015IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapAtomic,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2016{
2017 return iemMemCommitAndUnmapAtSafeJmp(pVCpu, bUnmapInfo);
2018}
2019
2020
2021/**
2022 * Used by TB code to commit and unmap a read-write memory mapping.
2023 */
2024IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2025{
2026 return iemMemCommitAndUnmapRwSafeJmp(pVCpu, bUnmapInfo);
2027}
2028
2029
2030/**
2031 * Used by TB code to commit and unmap a write-only memory mapping.
2032 */
2033IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2034{
2035 return iemMemCommitAndUnmapWoSafeJmp(pVCpu, bUnmapInfo);
2036}
2037
2038
2039/**
2040 * Used by TB code to commit and unmap a read-only memory mapping.
2041 */
2042IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2043{
2044 return iemMemCommitAndUnmapRoSafeJmp(pVCpu, bUnmapInfo);
2045}
2046
2047
2048/**
2049 * Reinitializes the native recompiler state.
2050 *
2051 * Called before starting a new recompile job.
2052 */
2053static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
2054{
2055 pReNative->cLabels = 0;
2056 pReNative->bmLabelTypes = 0;
2057 pReNative->cFixups = 0;
2058#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
2059 pReNative->cTbExitFixups = 0;
2060#endif
2061#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2062 pReNative->pDbgInfo->cEntries = 0;
2063 pReNative->pDbgInfo->offNativeLast = UINT32_MAX;
2064#endif
2065 pReNative->pTbOrg = pTb;
2066 pReNative->cCondDepth = 0;
2067 pReNative->uCondSeqNo = 0;
2068 pReNative->uCheckIrqSeqNo = 0;
2069 pReNative->uTlbSeqNo = 0;
2070
2071#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2072 pReNative->Core.offPc = 0;
2073# if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || defined(VBOX_WITH_STATISTICS)
2074 pReNative->idxInstrPlusOneOfLastPcUpdate = 0;
2075# endif
2076# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
2077 pReNative->Core.fDebugPcInitialized = false;
2078# endif
2079#endif
2080#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2081 pReNative->fSimdRaiseXcptChecksEmitted = 0;
2082#endif
2083 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
2084#if IEMNATIVE_HST_GREG_COUNT < 32
2085 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
2086#endif
2087 ;
2088 pReNative->Core.bmHstRegsWithGstShadow = 0;
2089 pReNative->Core.bmGstRegShadows = 0;
2090#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2091 pReNative->Core.bmGstRegShadowDirty = 0;
2092#endif
2093 pReNative->Core.bmVars = 0;
2094 pReNative->Core.bmStack = 0;
2095 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
2096 pReNative->Core.u64ArgVars = UINT64_MAX;
2097
2098 AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 23);
2099 pReNative->aidxUniqueLabels[0] = UINT32_MAX;
2100 pReNative->aidxUniqueLabels[1] = UINT32_MAX;
2101 pReNative->aidxUniqueLabels[2] = UINT32_MAX;
2102 pReNative->aidxUniqueLabels[3] = UINT32_MAX;
2103 pReNative->aidxUniqueLabels[4] = UINT32_MAX;
2104 pReNative->aidxUniqueLabels[5] = UINT32_MAX;
2105 pReNative->aidxUniqueLabels[6] = UINT32_MAX;
2106 pReNative->aidxUniqueLabels[7] = UINT32_MAX;
2107 pReNative->aidxUniqueLabels[8] = UINT32_MAX;
2108 pReNative->aidxUniqueLabels[9] = UINT32_MAX;
2109 pReNative->aidxUniqueLabels[10] = UINT32_MAX;
2110 pReNative->aidxUniqueLabels[11] = UINT32_MAX;
2111 pReNative->aidxUniqueLabels[12] = UINT32_MAX;
2112 pReNative->aidxUniqueLabels[13] = UINT32_MAX;
2113 pReNative->aidxUniqueLabels[14] = UINT32_MAX;
2114 pReNative->aidxUniqueLabels[15] = UINT32_MAX;
2115 pReNative->aidxUniqueLabels[16] = UINT32_MAX;
2116 pReNative->aidxUniqueLabels[17] = UINT32_MAX;
2117 pReNative->aidxUniqueLabels[18] = UINT32_MAX;
2118 pReNative->aidxUniqueLabels[19] = UINT32_MAX;
2119 pReNative->aidxUniqueLabels[20] = UINT32_MAX;
2120 pReNative->aidxUniqueLabels[21] = UINT32_MAX;
2121 pReNative->aidxUniqueLabels[22] = UINT32_MAX;
2122
2123 pReNative->idxLastCheckIrqCallNo = UINT32_MAX;
2124
2125 /* Full host register reinit: */
2126 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
2127 {
2128 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
2129 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
2130 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
2131 }
2132
2133 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
2134 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
2135#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2136 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
2137#endif
2138#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2139 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
2140#endif
2141#ifdef IEMNATIVE_REG_FIXED_TMP1
2142 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
2143#endif
2144#ifdef IEMNATIVE_REG_FIXED_PC_DBG
2145 | RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
2146#endif
2147 );
2148 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
2149 {
2150 fRegs &= ~RT_BIT_32(idxReg);
2151 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
2152 }
2153
2154 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
2155#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2156 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
2157#endif
2158#ifdef IEMNATIVE_REG_FIXED_TMP0
2159 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
2160#endif
2161#ifdef IEMNATIVE_REG_FIXED_TMP1
2162 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP1].enmWhat = kIemNativeWhat_FixedTmp;
2163#endif
2164#ifdef IEMNATIVE_REG_FIXED_PC_DBG
2165 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PC_DBG].enmWhat = kIemNativeWhat_PcShadow;
2166#endif
2167
2168#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2169 pReNative->Core.bmHstSimdRegs = IEMNATIVE_SIMD_REG_FIXED_MASK
2170# if IEMNATIVE_HST_SIMD_REG_COUNT < 32
2171 | ~(RT_BIT(IEMNATIVE_HST_SIMD_REG_COUNT) - 1U)
2172# endif
2173 ;
2174 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
2175 pReNative->Core.bmGstSimdRegShadows = 0;
2176 pReNative->Core.bmGstSimdRegShadowDirtyLo128 = 0;
2177 pReNative->Core.bmGstSimdRegShadowDirtyHi128 = 0;
2178
2179 /* Full host register reinit: */
2180 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstSimdRegs); i++)
2181 {
2182 pReNative->Core.aHstSimdRegs[i].fGstRegShadows = 0;
2183 pReNative->Core.aHstSimdRegs[i].enmWhat = kIemNativeWhat_Invalid;
2184 pReNative->Core.aHstSimdRegs[i].idxVar = UINT8_MAX;
2185 pReNative->Core.aHstSimdRegs[i].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
2186 }
2187
2188 fRegs = IEMNATIVE_SIMD_REG_FIXED_MASK;
2189 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
2190 {
2191 fRegs &= ~RT_BIT_32(idxReg);
2192 pReNative->Core.aHstSimdRegs[idxReg].enmWhat = kIemNativeWhat_FixedReserved;
2193 }
2194
2195#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
2196 pReNative->Core.aHstSimdRegs[IEMNATIVE_SIMD_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
2197#endif
2198
2199#endif
2200
2201 return pReNative;
2202}
2203
2204
2205/**
2206 * Used when done emitting the per-chunk code and for iemNativeInit bailout.
2207 */
2208static void iemNativeTerm(PIEMRECOMPILERSTATE pReNative)
2209{
2210 RTMemFree(pReNative->pInstrBuf);
2211 RTMemFree(pReNative->paLabels);
2212 RTMemFree(pReNative->paFixups);
2213#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
2214 RTMemFree(pReNative->paTbExitFixups);
2215#endif
2216#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2217 RTMemFree(pReNative->pDbgInfo);
2218#endif
2219 RTMemFree(pReNative);
2220}
2221
2222
2223/**
2224 * Allocates and initializes the native recompiler state.
2225 *
2226 * This is called the first time an EMT wants to recompile something.
2227 *
2228 * @returns Pointer to the new recompiler state.
2229 * @param pVCpu The cross context virtual CPU structure of the calling
2230 * thread.
2231 * @param pTb The TB that's about to be recompiled. When this is NULL,
2232 * the recompiler state is for emitting the common per-chunk
2233 * code from iemNativeRecompileAttachExecMemChunkCtx.
2234 * @thread EMT(pVCpu)
2235 */
2236static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
2237{
2238 VMCPU_ASSERT_EMT(pVCpu);
2239
2240 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
2241 AssertReturn(pReNative, NULL);
2242
2243 /*
2244 * Try allocate all the buffers and stuff we need.
2245 */
2246 uint32_t const cFactor = pTb ? 1 : 32 /* per-chunk stuff doesn't really need anything but the code buffer */;
2247 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
2248 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K / cFactor);
2249 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K / cFactor);
2250#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
2251 pReNative->paTbExitFixups = (PIEMNATIVEEXITFIXUP)RTMemAllocZ(sizeof(IEMNATIVEEXITFIXUP) * _8K / cFactor);
2252#endif
2253#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2254 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K / cFactor]));
2255#endif
2256 if (RT_LIKELY( pReNative->pInstrBuf
2257 && pReNative->paLabels
2258 && pReNative->paFixups)
2259#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
2260 && pReNative->paTbExitFixups
2261#endif
2262#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2263 && pReNative->pDbgInfo
2264#endif
2265 )
2266 {
2267 /*
2268 * Set the buffer & array sizes on success.
2269 */
2270 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
2271 pReNative->cLabelsAlloc = _8K / cFactor;
2272 pReNative->cFixupsAlloc = _16K / cFactor;
2273#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
2274 pReNative->cTbExitFixupsAlloc = _8K / cFactor;
2275#endif
2276#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2277 pReNative->cDbgInfoAlloc = _16K / cFactor;
2278#endif
2279
2280 /* Other constant stuff: */
2281 pReNative->pVCpu = pVCpu;
2282
2283 /*
2284 * Done, just reinit it.
2285 */
2286 return iemNativeReInit(pReNative, pTb);
2287 }
2288
2289 /*
2290 * Failed. Cleanup and return.
2291 */
2292 AssertFailed();
2293 iemNativeTerm(pReNative);
2294 return NULL;
2295}
2296
2297
2298/**
2299 * Creates a label
2300 *
2301 * If the label does not yet have a defined position,
2302 * call iemNativeLabelDefine() later to set it.
2303 *
2304 * @returns Label ID. Throws VBox status code on failure, so no need to check
2305 * the return value.
2306 * @param pReNative The native recompile state.
2307 * @param enmType The label type.
2308 * @param offWhere The instruction offset of the label. UINT32_MAX if the
2309 * label is not yet defined (default).
2310 * @param uData Data associated with the lable. Only applicable to
2311 * certain type of labels. Default is zero.
2312 */
2313DECL_HIDDEN_THROW(uint32_t)
2314iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
2315 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
2316{
2317 Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
2318#if defined(IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE) && defined(RT_ARCH_AMD64)
2319 Assert(enmType >= kIemNativeLabelType_LoopJumpTarget);
2320#endif
2321
2322 /*
2323 * Locate existing label definition.
2324 *
2325 * This is only allowed for forward declarations where offWhere=UINT32_MAX
2326 * and uData is zero.
2327 */
2328 PIEMNATIVELABEL paLabels = pReNative->paLabels;
2329 uint32_t const cLabels = pReNative->cLabels;
2330 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
2331#ifndef VBOX_STRICT
2332 && enmType < kIemNativeLabelType_FirstWithMultipleInstances
2333 && offWhere == UINT32_MAX
2334 && uData == 0
2335#endif
2336 )
2337 {
2338#ifndef VBOX_STRICT
2339 AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
2340 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2341 uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
2342 if (idxLabel < pReNative->cLabels)
2343 return idxLabel;
2344#else
2345 for (uint32_t i = 0; i < cLabels; i++)
2346 if ( paLabels[i].enmType == enmType
2347 && paLabels[i].uData == uData)
2348 {
2349 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2350 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2351 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
2352 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
2353 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2354 return i;
2355 }
2356 AssertStmt( enmType >= kIemNativeLabelType_FirstWithMultipleInstances
2357 || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2358#endif
2359 }
2360
2361 /*
2362 * Make sure we've got room for another label.
2363 */
2364 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
2365 { /* likely */ }
2366 else
2367 {
2368 uint32_t cNew = pReNative->cLabelsAlloc;
2369 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
2370 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
2371 cNew *= 2;
2372 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
2373 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
2374 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
2375 pReNative->paLabels = paLabels;
2376 pReNative->cLabelsAlloc = cNew;
2377 }
2378
2379 /*
2380 * Define a new label.
2381 */
2382 paLabels[cLabels].off = offWhere;
2383 paLabels[cLabels].enmType = enmType;
2384 paLabels[cLabels].uData = uData;
2385 pReNative->cLabels = cLabels + 1;
2386
2387 Assert((unsigned)enmType < 64);
2388 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
2389
2390 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
2391 {
2392 Assert(uData == 0);
2393 pReNative->aidxUniqueLabels[enmType] = cLabels;
2394 }
2395
2396 if (offWhere != UINT32_MAX)
2397 {
2398#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2399 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
2400 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
2401#endif
2402 }
2403 return cLabels;
2404}
2405
2406
2407/**
2408 * Defines the location of an existing label.
2409 *
2410 * @param pReNative The native recompile state.
2411 * @param idxLabel The label to define.
2412 * @param offWhere The position.
2413 */
2414DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
2415{
2416 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
2417 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
2418 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
2419 pLabel->off = offWhere;
2420#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2421 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
2422 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
2423#endif
2424}
2425
2426
2427/**
2428 * Looks up a lable.
2429 *
2430 * @returns Label ID if found, UINT32_MAX if not.
2431 */
2432DECLHIDDEN(uint32_t) iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
2433 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/) RT_NOEXCEPT
2434{
2435 Assert((unsigned)enmType < 64);
2436 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
2437 {
2438 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
2439 return pReNative->aidxUniqueLabels[enmType];
2440
2441 PIEMNATIVELABEL paLabels = pReNative->paLabels;
2442 uint32_t const cLabels = pReNative->cLabels;
2443 for (uint32_t i = 0; i < cLabels; i++)
2444 if ( paLabels[i].enmType == enmType
2445 && paLabels[i].uData == uData
2446 && ( paLabels[i].off == offWhere
2447 || offWhere == UINT32_MAX
2448 || paLabels[i].off == UINT32_MAX))
2449 return i;
2450 }
2451 return UINT32_MAX;
2452}
2453
2454
2455/**
2456 * Adds a fixup.
2457 *
2458 * @throws VBox status code (int) on failure.
2459 * @param pReNative The native recompile state.
2460 * @param offWhere The instruction offset of the fixup location.
2461 * @param idxLabel The target label ID for the fixup.
2462 * @param enmType The fixup type.
2463 * @param offAddend Fixup addend if applicable to the type. Default is 0.
2464 */
2465DECL_HIDDEN_THROW(void)
2466iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
2467 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
2468{
2469 Assert(idxLabel <= UINT16_MAX);
2470 Assert((unsigned)enmType <= UINT8_MAX);
2471#ifdef RT_ARCH_ARM64
2472 AssertStmt( enmType != kIemNativeFixupType_RelImm14At5
2473 || pReNative->paLabels[idxLabel].enmType >= kIemNativeLabelType_LastWholeTbBranch,
2474 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_SHORT_JMP_TO_TAIL_LABEL));
2475#endif
2476
2477 /*
2478 * Make sure we've room.
2479 */
2480 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
2481 uint32_t const cFixups = pReNative->cFixups;
2482 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
2483 { /* likely */ }
2484 else
2485 {
2486 uint32_t cNew = pReNative->cFixupsAlloc;
2487 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2488 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2489 cNew *= 2;
2490 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
2491 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
2492 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
2493 pReNative->paFixups = paFixups;
2494 pReNative->cFixupsAlloc = cNew;
2495 }
2496
2497 /*
2498 * Add the fixup.
2499 */
2500 paFixups[cFixups].off = offWhere;
2501 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
2502 paFixups[cFixups].enmType = enmType;
2503 paFixups[cFixups].offAddend = offAddend;
2504 pReNative->cFixups = cFixups + 1;
2505}
2506
2507
2508#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
2509/**
2510 * Adds a fixup to the per chunk tail code.
2511 *
2512 * @throws VBox status code (int) on failure.
2513 * @param pReNative The native recompile state.
2514 * @param offWhere The instruction offset of the fixup location.
2515 * @param enmExitReason The exit reason to jump to.
2516 */
2517DECL_HIDDEN_THROW(void)
2518iemNativeAddTbExitFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, IEMNATIVELABELTYPE enmExitReason)
2519{
2520 Assert(IEMNATIVELABELTYPE_IS_EXIT_REASON(enmExitReason));
2521
2522 /*
2523 * Make sure we've room.
2524 */
2525 PIEMNATIVEEXITFIXUP paTbExitFixups = pReNative->paTbExitFixups;
2526 uint32_t const cTbExitFixups = pReNative->cTbExitFixups;
2527 if (RT_LIKELY(cTbExitFixups < pReNative->cTbExitFixupsAlloc))
2528 { /* likely */ }
2529 else
2530 {
2531 uint32_t cNew = pReNative->cTbExitFixupsAlloc;
2532 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2533 AssertStmt(cTbExitFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2534 cNew *= 2;
2535 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
2536 paTbExitFixups = (PIEMNATIVEEXITFIXUP)RTMemRealloc(paTbExitFixups, cNew * sizeof(paTbExitFixups[0]));
2537 AssertStmt(paTbExitFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
2538 pReNative->paTbExitFixups = paTbExitFixups;
2539 pReNative->cTbExitFixupsAlloc = cNew;
2540 }
2541
2542 /*
2543 * Add the fixup.
2544 */
2545 paTbExitFixups[cTbExitFixups].off = offWhere;
2546 paTbExitFixups[cTbExitFixups].enmExitReason = enmExitReason;
2547 pReNative->cTbExitFixups = cTbExitFixups + 1;
2548}
2549#endif
2550
2551
2552/**
2553 * Slow code path for iemNativeInstrBufEnsure.
2554 */
2555DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
2556{
2557 /* Double the buffer size till we meet the request. */
2558 uint32_t cNew = pReNative->cInstrBufAlloc;
2559 AssertStmt(cNew > 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_INTERNAL_ERROR_5)); /* impossible */
2560 do
2561 cNew *= 2;
2562 while (cNew < off + cInstrReq);
2563
2564 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
2565#ifdef RT_ARCH_ARM64
2566 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
2567#else
2568 uint32_t const cbMaxInstrBuf = _2M;
2569#endif
2570 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
2571
2572 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
2573 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
2574
2575#ifdef VBOX_STRICT
2576 pReNative->offInstrBufChecked = off + cInstrReq;
2577#endif
2578 pReNative->cInstrBufAlloc = cNew;
2579 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
2580}
2581
2582#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2583
2584/**
2585 * Grows the static debug info array used during recompilation.
2586 *
2587 * @returns Pointer to the new debug info block; throws VBox status code on
2588 * failure, so no need to check the return value.
2589 */
2590DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
2591{
2592 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
2593 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
2594 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
2595 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
2596 pReNative->pDbgInfo = pDbgInfo;
2597 pReNative->cDbgInfoAlloc = cNew;
2598 return pDbgInfo;
2599}
2600
2601
2602/**
2603 * Adds a new debug info uninitialized entry, returning the pointer to it.
2604 */
2605DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
2606{
2607 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
2608 { /* likely */ }
2609 else
2610 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
2611 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
2612}
2613
2614
2615/**
2616 * Debug Info: Adds a native offset record, if necessary.
2617 */
2618DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2619{
2620 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
2621
2622 /*
2623 * Do we need this one?
2624 */
2625 uint32_t const offPrev = pDbgInfo->offNativeLast;
2626 if (offPrev == off)
2627 return;
2628 AssertStmt(offPrev < off || offPrev == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
2629
2630 /*
2631 * Add it.
2632 */
2633 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
2634 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
2635 pEntry->NativeOffset.offNative = off;
2636 pDbgInfo->offNativeLast = off;
2637}
2638
2639
2640/**
2641 * Debug Info: Record info about a label.
2642 */
2643static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
2644{
2645 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2646 pEntry->Label.uType = kIemTbDbgEntryType_Label;
2647 pEntry->Label.uUnused = 0;
2648 pEntry->Label.enmLabel = (uint8_t)enmType;
2649 pEntry->Label.uData = uData;
2650}
2651
2652
2653/**
2654 * Debug Info: Record info about a threaded call.
2655 */
2656static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
2657{
2658 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2659 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
2660 pEntry->ThreadedCall.fRecompiled = fRecompiled;
2661 pEntry->ThreadedCall.uUnused = 0;
2662 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
2663}
2664
2665
2666/**
2667 * Debug Info: Record info about a new guest instruction.
2668 */
2669static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
2670{
2671 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2672 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
2673 pEntry->GuestInstruction.uUnused = 0;
2674 pEntry->GuestInstruction.fExec = fExec;
2675}
2676
2677
2678/**
2679 * Debug Info: Record info about guest register shadowing.
2680 */
2681DECL_HIDDEN_THROW(void)
2682iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
2683 uint8_t idxHstReg /*= UINT8_MAX*/, uint8_t idxHstRegPrev /*= UINT8_MAX*/)
2684{
2685 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2686 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
2687 pEntry->GuestRegShadowing.uUnused = 0;
2688 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
2689 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
2690 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
2691#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2692 Assert( idxHstReg != UINT8_MAX
2693 || !(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg)));
2694#endif
2695}
2696
2697
2698# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2699/**
2700 * Debug Info: Record info about guest register shadowing.
2701 */
2702DECL_HIDDEN_THROW(void)
2703iemNativeDbgInfoAddGuestSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTSIMDREG enmGstSimdReg,
2704 uint8_t idxHstSimdReg /*= UINT8_MAX*/, uint8_t idxHstSimdRegPrev /*= UINT8_MAX*/)
2705{
2706 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2707 pEntry->GuestSimdRegShadowing.uType = kIemTbDbgEntryType_GuestSimdRegShadowing;
2708 pEntry->GuestSimdRegShadowing.uUnused = 0;
2709 pEntry->GuestSimdRegShadowing.idxGstSimdReg = enmGstSimdReg;
2710 pEntry->GuestSimdRegShadowing.idxHstSimdReg = idxHstSimdReg;
2711 pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev = idxHstSimdRegPrev;
2712}
2713# endif
2714
2715
2716# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2717/**
2718 * Debug Info: Record info about delayed RIP updates.
2719 */
2720DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddDelayedPcUpdate(PIEMRECOMPILERSTATE pReNative, uint64_t offPc, uint32_t cInstrSkipped)
2721{
2722 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2723 pEntry->DelayedPcUpdate.uType = kIemTbDbgEntryType_DelayedPcUpdate;
2724 pEntry->DelayedPcUpdate.cInstrSkipped = cInstrSkipped;
2725 pEntry->DelayedPcUpdate.offPc = offPc; /** @todo support larger values */
2726}
2727# endif
2728
2729# if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK) || defined(IEMNATIVE_WITH_SIMD_REG_ALLOCATOR)
2730
2731/**
2732 * Debug Info: Record info about a dirty guest register.
2733 */
2734DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddGuestRegDirty(PIEMRECOMPILERSTATE pReNative, bool fSimdReg,
2735 uint8_t idxGstReg, uint8_t idxHstReg)
2736{
2737 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2738 pEntry->GuestRegDirty.uType = kIemTbDbgEntryType_GuestRegDirty;
2739 pEntry->GuestRegDirty.fSimdReg = fSimdReg ? 1 : 0;
2740 pEntry->GuestRegDirty.idxGstReg = idxGstReg;
2741 pEntry->GuestRegDirty.idxHstReg = idxHstReg;
2742}
2743
2744
2745/**
2746 * Debug Info: Record info about a dirty guest register writeback operation.
2747 */
2748DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddGuestRegWriteback(PIEMRECOMPILERSTATE pReNative, bool fSimdReg, uint64_t fGstReg)
2749{
2750 unsigned const cBitsGstRegMask = 25;
2751 uint32_t const fGstRegMask = RT_BIT_32(cBitsGstRegMask) - 1U;
2752
2753 /* The first block of 25 bits: */
2754 if (fGstReg & fGstRegMask)
2755 {
2756 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2757 pEntry->GuestRegWriteback.uType = kIemTbDbgEntryType_GuestRegWriteback;
2758 pEntry->GuestRegWriteback.fSimdReg = fSimdReg ? 1 : 0;
2759 pEntry->GuestRegWriteback.cShift = 0;
2760 pEntry->GuestRegWriteback.fGstReg = (uint32_t)(fGstReg & fGstRegMask);
2761 fGstReg &= ~(uint64_t)fGstRegMask;
2762 if (!fGstReg)
2763 return;
2764 }
2765
2766 /* The second block of 25 bits: */
2767 fGstReg >>= cBitsGstRegMask;
2768 if (fGstReg & fGstRegMask)
2769 {
2770 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2771 pEntry->GuestRegWriteback.uType = kIemTbDbgEntryType_GuestRegWriteback;
2772 pEntry->GuestRegWriteback.fSimdReg = fSimdReg ? 1 : 0;
2773 pEntry->GuestRegWriteback.cShift = 0;
2774 pEntry->GuestRegWriteback.fGstReg = (uint32_t)(fGstReg & fGstRegMask);
2775 fGstReg &= ~(uint64_t)fGstRegMask;
2776 if (!fGstReg)
2777 return;
2778 }
2779
2780 /* The last block with 14 bits: */
2781 fGstReg >>= cBitsGstRegMask;
2782 Assert(fGstReg & fGstRegMask);
2783 Assert((fGstReg & ~(uint64_t)fGstRegMask) == 0);
2784 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2785 pEntry->GuestRegWriteback.uType = kIemTbDbgEntryType_GuestRegWriteback;
2786 pEntry->GuestRegWriteback.fSimdReg = fSimdReg ? 1 : 0;
2787 pEntry->GuestRegWriteback.cShift = 2;
2788 pEntry->GuestRegWriteback.fGstReg = (uint32_t)(fGstReg & fGstRegMask);
2789}
2790
2791# endif /* defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK) || defined(IEMNATIVE_WITH_SIMD_REG_ALLOCATOR) */
2792
2793#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
2794
2795
2796/*********************************************************************************************************************************
2797* Register Allocator *
2798*********************************************************************************************************************************/
2799
2800/**
2801 * Register parameter indexes (indexed by argument number).
2802 */
2803DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
2804{
2805 IEMNATIVE_CALL_ARG0_GREG,
2806 IEMNATIVE_CALL_ARG1_GREG,
2807 IEMNATIVE_CALL_ARG2_GREG,
2808 IEMNATIVE_CALL_ARG3_GREG,
2809#if defined(IEMNATIVE_CALL_ARG4_GREG)
2810 IEMNATIVE_CALL_ARG4_GREG,
2811# if defined(IEMNATIVE_CALL_ARG5_GREG)
2812 IEMNATIVE_CALL_ARG5_GREG,
2813# if defined(IEMNATIVE_CALL_ARG6_GREG)
2814 IEMNATIVE_CALL_ARG6_GREG,
2815# if defined(IEMNATIVE_CALL_ARG7_GREG)
2816 IEMNATIVE_CALL_ARG7_GREG,
2817# endif
2818# endif
2819# endif
2820#endif
2821};
2822AssertCompile(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
2823
2824/**
2825 * Call register masks indexed by argument count.
2826 */
2827DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
2828{
2829 0,
2830 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
2831 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
2832 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
2833 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2834 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
2835#if defined(IEMNATIVE_CALL_ARG4_GREG)
2836 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2837 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
2838# if defined(IEMNATIVE_CALL_ARG5_GREG)
2839 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2840 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
2841# if defined(IEMNATIVE_CALL_ARG6_GREG)
2842 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2843 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2844 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
2845# if defined(IEMNATIVE_CALL_ARG7_GREG)
2846 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2847 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2848 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
2849# endif
2850# endif
2851# endif
2852#endif
2853};
2854
2855#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
2856/**
2857 * BP offset of the stack argument slots.
2858 *
2859 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
2860 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
2861 */
2862DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
2863{
2864 IEMNATIVE_FP_OFF_STACK_ARG0,
2865# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
2866 IEMNATIVE_FP_OFF_STACK_ARG1,
2867# endif
2868# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
2869 IEMNATIVE_FP_OFF_STACK_ARG2,
2870# endif
2871# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
2872 IEMNATIVE_FP_OFF_STACK_ARG3,
2873# endif
2874};
2875AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
2876#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
2877
2878/**
2879 * Info about shadowed guest register values.
2880 * @see IEMNATIVEGSTREG
2881 */
2882DECL_HIDDEN_CONST(IEMANTIVEGSTREGINFO const) g_aGstShadowInfo[] =
2883{
2884#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
2885 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
2886 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
2887 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
2888 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
2889 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
2890 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
2891 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
2892 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
2893 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
2894 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
2895 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
2896 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
2897 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
2898 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
2899 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
2900 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
2901 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
2902 /* [kIemNativeGstReg_Cr0] = */ { CPUMCTX_OFF_AND_SIZE(cr0), "cr0", },
2903 /* [kIemNativeGstReg_FpuFcw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FCW), "fcw", },
2904 /* [kIemNativeGstReg_FpuFsw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FSW), "fsw", },
2905 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
2906 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
2907 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
2908 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
2909 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
2910 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
2911 /* [kIemNativeGstReg_SegAttribFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Attr.u), "es_attrib", },
2912 /* [kIemNativeGstReg_SegAttribFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Attr.u), "cs_attrib", },
2913 /* [kIemNativeGstReg_SegAttribFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Attr.u), "ss_attrib", },
2914 /* [kIemNativeGstReg_SegAttribFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Attr.u), "ds_attrib", },
2915 /* [kIemNativeGstReg_SegAttribFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Attr.u), "fs_attrib", },
2916 /* [kIemNativeGstReg_SegAttribFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Attr.u), "gs_attrib", },
2917 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
2918 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
2919 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
2920 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
2921 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
2922 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
2923 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
2924 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
2925 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
2926 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
2927 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
2928 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
2929 /* [kIemNativeGstReg_Cr4] = */ { CPUMCTX_OFF_AND_SIZE(cr4), "cr4", },
2930 /* [kIemNativeGstReg_Xcr0] = */ { CPUMCTX_OFF_AND_SIZE(aXcr[0]), "xcr0", },
2931 /* [kIemNativeGstReg_MxCsr] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.MXCSR), "mxcsr", },
2932 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
2933#undef CPUMCTX_OFF_AND_SIZE
2934};
2935AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
2936
2937
2938/** Host CPU general purpose register names. */
2939DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
2940{
2941#ifdef RT_ARCH_AMD64
2942 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
2943#elif RT_ARCH_ARM64
2944 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
2945 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
2946#else
2947# error "port me"
2948#endif
2949};
2950
2951
2952#if 0 /* unused */
2953/**
2954 * Tries to locate a suitable register in the given register mask.
2955 *
2956 * This ASSUMES the caller has done the minimal/optimal allocation checks and
2957 * failed.
2958 *
2959 * @returns Host register number on success, returns UINT8_MAX on failure.
2960 */
2961static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
2962{
2963 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
2964 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
2965 if (fRegs)
2966 {
2967 /** @todo pick better here: */
2968 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
2969
2970 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
2971 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
2972 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2973 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2974
2975 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2976 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2977 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2978 return idxReg;
2979 }
2980 return UINT8_MAX;
2981}
2982#endif /* unused */
2983
2984#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2985
2986/**
2987 * Stores the host reg @a idxHstReg into guest shadow register @a enmGstReg.
2988 *
2989 * @returns New code buffer offset on success, UINT32_MAX on failure.
2990 * @param pReNative .
2991 * @param off The current code buffer position.
2992 * @param enmGstReg The guest register to store to.
2993 * @param idxHstReg The host register to store from.
2994 */
2995DECL_FORCE_INLINE_THROW(uint32_t)
2996iemNativeEmitStoreGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREG enmGstReg, uint8_t idxHstReg)
2997{
2998 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
2999 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
3000
3001 switch (g_aGstShadowInfo[enmGstReg].cb)
3002 {
3003 case sizeof(uint64_t):
3004 return iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3005 case sizeof(uint32_t):
3006 return iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3007 case sizeof(uint16_t):
3008 return iemNativeEmitStoreGprToVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3009# if 0 /* not present in the table. */
3010 case sizeof(uint8_t):
3011 return iemNativeEmitStoreGprToVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3012# endif
3013 default:
3014 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
3015 }
3016}
3017
3018
3019/**
3020 * Emits code to flush a pending write of the given guest register,
3021 * version with alternative core state.
3022 *
3023 * @returns New code buffer offset.
3024 * @param pReNative The native recompile state.
3025 * @param off Current code buffer position.
3026 * @param pCore Alternative core state.
3027 * @param enmGstReg The guest register to flush.
3028 */
3029DECL_HIDDEN_THROW(uint32_t)
3030iemNativeRegFlushPendingWriteEx(PIEMRECOMPILERSTATE pReNative, uint32_t off, PIEMNATIVECORESTATE pCore, IEMNATIVEGSTREG enmGstReg)
3031{
3032 uint8_t const idxHstReg = pCore->aidxGstRegShadows[enmGstReg];
3033
3034 Assert( ( enmGstReg >= kIemNativeGstReg_GprFirst
3035 && enmGstReg <= kIemNativeGstReg_GprLast)
3036 || enmGstReg == kIemNativeGstReg_MxCsr);
3037 Assert( idxHstReg != UINT8_MAX
3038 && pCore->bmGstRegShadowDirty & RT_BIT_64(enmGstReg));
3039 Log12(("iemNativeRegFlushPendingWriteEx: Clearing guest register %s shadowed by host %s (off=%#x)\n",
3040 g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg], off));
3041
3042 off = iemNativeEmitStoreGprWithGstShadowReg(pReNative, off, enmGstReg, idxHstReg);
3043
3044 pCore->bmGstRegShadowDirty &= ~RT_BIT_64(enmGstReg);
3045 return off;
3046}
3047
3048
3049/**
3050 * Emits code to flush a pending write of the given guest register.
3051 *
3052 * @returns New code buffer offset.
3053 * @param pReNative The native recompile state.
3054 * @param off Current code buffer position.
3055 * @param enmGstReg The guest register to flush.
3056 */
3057DECL_HIDDEN_THROW(uint32_t)
3058iemNativeRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREG enmGstReg)
3059{
3060 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
3061
3062 Assert( ( enmGstReg >= kIemNativeGstReg_GprFirst
3063 && enmGstReg <= kIemNativeGstReg_GprLast)
3064 || enmGstReg == kIemNativeGstReg_MxCsr);
3065 Assert( idxHstReg != UINT8_MAX
3066 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg));
3067 Log12(("iemNativeRegFlushPendingWrite: Clearing guest register %s shadowed by host %s (off=%#x)\n",
3068 g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg], off));
3069
3070 off = iemNativeEmitStoreGprWithGstShadowReg(pReNative, off, enmGstReg, idxHstReg);
3071
3072 pReNative->Core.bmGstRegShadowDirty &= ~RT_BIT_64(enmGstReg);
3073 return off;
3074}
3075
3076
3077/**
3078 * Flush the given set of guest registers if marked as dirty.
3079 *
3080 * @returns New code buffer offset.
3081 * @param pReNative The native recompile state.
3082 * @param off Current code buffer position.
3083 * @param fFlushGstReg The guest register set to flush (default is flush everything).
3084 */
3085DECL_HIDDEN_THROW(uint32_t)
3086iemNativeRegFlushDirtyGuest(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fFlushGstReg /*= UINT64_MAX*/)
3087{
3088 uint64_t bmGstRegShadowDirty = pReNative->Core.bmGstRegShadowDirty & fFlushGstReg;
3089 if (bmGstRegShadowDirty)
3090 {
3091# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3092 iemNativeDbgInfoAddNativeOffset(pReNative, off);
3093 iemNativeDbgInfoAddGuestRegWriteback(pReNative, false /*fSimdReg*/, bmGstRegShadowDirty);
3094# endif
3095 do
3096 {
3097 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadowDirty) - 1;
3098 bmGstRegShadowDirty &= ~RT_BIT_64(idxGstReg);
3099 off = iemNativeRegFlushPendingWrite(pReNative, off, (IEMNATIVEGSTREG)idxGstReg);
3100 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
3101 } while (bmGstRegShadowDirty);
3102 }
3103
3104 return off;
3105}
3106
3107
3108/**
3109 * Flush all shadowed guest registers marked as dirty for the given host register.
3110 *
3111 * @returns New code buffer offset.
3112 * @param pReNative The native recompile state.
3113 * @param off Current code buffer position.
3114 * @param idxHstReg The host register.
3115 *
3116 * @note This doesn't do any unshadowing of guest registers from the host register.
3117 */
3118DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushDirtyGuestByHostRegShadow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg)
3119{
3120 /* We need to flush any pending guest register writes this host register shadows. */
3121 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3122 if (pReNative->Core.bmGstRegShadowDirty & fGstRegShadows)
3123 {
3124# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3125 iemNativeDbgInfoAddNativeOffset(pReNative, off);
3126 iemNativeDbgInfoAddGuestRegWriteback(pReNative, false /*fSimdReg*/, pReNative->Core.bmGstRegShadowDirty & fGstRegShadows);
3127# endif
3128 uint64_t bmGstRegShadowDirty = pReNative->Core.bmGstRegShadowDirty & fGstRegShadows;
3129 do
3130 {
3131 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadowDirty) - 1;
3132 bmGstRegShadowDirty &= ~RT_BIT_64(idxGstReg);
3133 off = iemNativeRegFlushPendingWrite(pReNative, off, (IEMNATIVEGSTREG)idxGstReg);
3134 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
3135 } while (bmGstRegShadowDirty);
3136 }
3137
3138 return off;
3139}
3140
3141#endif /* IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK */
3142
3143
3144/**
3145 * Locate a register, possibly freeing one up.
3146 *
3147 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3148 * failed.
3149 *
3150 * @returns Host register number on success. Returns UINT8_MAX if no registers
3151 * found, the caller is supposed to deal with this and raise a
3152 * allocation type specific status code (if desired).
3153 *
3154 * @throws VBox status code if we're run into trouble spilling a variable of
3155 * recording debug info. Does NOT throw anything if we're out of
3156 * registers, though.
3157 */
3158static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
3159 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
3160{
3161 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
3162 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3163 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3164
3165 /*
3166 * Try a freed register that's shadowing a guest register.
3167 */
3168 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3169 if (fRegs)
3170 {
3171 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
3172
3173#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3174 /*
3175 * When we have livness information, we use it to kick out all shadowed
3176 * guest register that will not be needed any more in this TB. If we're
3177 * lucky, this may prevent us from ending up here again.
3178 *
3179 * Note! We must consider the previous entry here so we don't free
3180 * anything that the current threaded function requires (current
3181 * entry is produced by the next threaded function).
3182 */
3183 uint32_t const idxCurCall = pReNative->idxCurCall;
3184 if (idxCurCall > 0)
3185 {
3186 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
3187
3188# ifndef IEMLIVENESS_EXTENDED_LAYOUT
3189 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
3190 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
3191 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
3192#else
3193 /* Construct a mask of the registers not in the read or write state.
3194 Note! We could skips writes, if they aren't from us, as this is just
3195 a hack to prevent trashing registers that have just been written
3196 or will be written when we retire the current instruction. */
3197 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
3198 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
3199 & IEMLIVENESSBIT_MASK;
3200#endif
3201 /* Merge EFLAGS. */
3202 uint64_t fTmp = fToFreeMask & (fToFreeMask >> 3); /* AF2,PF2,CF2,Other2 = AF,PF,CF,Other & OF,SF,ZF,AF */
3203 fTmp &= fTmp >> 2; /* CF3,Other3 = AF2,PF2 & CF2,Other2 */
3204 fTmp &= fTmp >> 1; /* Other4 = CF3 & Other3 */
3205 fToFreeMask &= RT_BIT_64(kIemNativeGstReg_EFlags) - 1;
3206 fToFreeMask |= fTmp & RT_BIT_64(kIemNativeGstReg_EFlags);
3207
3208 /* If it matches any shadowed registers. */
3209 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
3210 {
3211#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3212 /* Writeback any dirty shadow registers we are about to unshadow. */
3213 *poff = iemNativeRegFlushDirtyGuest(pReNative, *poff, fToFreeMask);
3214#endif
3215
3216 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
3217 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
3218 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
3219
3220 /* See if we've got any unshadowed registers we can return now. */
3221 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
3222 if (fUnshadowedRegs)
3223 {
3224 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
3225 return (fPreferVolatile
3226 ? ASMBitFirstSetU32(fUnshadowedRegs)
3227 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3228 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
3229 - 1;
3230 }
3231 }
3232 }
3233#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
3234
3235 unsigned const idxReg = (fPreferVolatile
3236 ? ASMBitFirstSetU32(fRegs)
3237 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3238 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs))
3239 - 1;
3240
3241 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3242 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3243 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3244 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3245
3246#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3247 /* We need to flush any pending guest register writes this host register shadows. */
3248 *poff = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, *poff, idxReg);
3249#endif
3250
3251 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3252 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3253 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3254 return idxReg;
3255 }
3256
3257 /*
3258 * Try free up a variable that's in a register.
3259 *
3260 * We do two rounds here, first evacuating variables we don't need to be
3261 * saved on the stack, then in the second round move things to the stack.
3262 */
3263 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
3264 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
3265 {
3266 uint32_t fVars = pReNative->Core.bmVars;
3267 while (fVars)
3268 {
3269 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
3270 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
3271#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3272 if (pReNative->Core.aVars[idxVar].fSimdReg) /* Need to ignore SIMD variables here or we end up freeing random registers. */
3273 continue;
3274#endif
3275
3276 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
3277 && (RT_BIT_32(idxReg) & fRegMask)
3278 && ( iLoop == 0
3279 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
3280 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3281 && !pReNative->Core.aVars[idxVar].fRegAcquired)
3282 {
3283 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
3284 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
3285 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3286 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3287 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
3288 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
3289#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3290 Assert(!(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3291#endif
3292
3293 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3294 {
3295 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
3296 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
3297 }
3298
3299 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3300 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
3301
3302 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3303 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3304 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3305 return idxReg;
3306 }
3307 fVars &= ~RT_BIT_32(idxVar);
3308 }
3309 }
3310
3311 return UINT8_MAX;
3312}
3313
3314
3315/**
3316 * Reassigns a variable to a different register specified by the caller.
3317 *
3318 * @returns The new code buffer position.
3319 * @param pReNative The native recompile state.
3320 * @param off The current code buffer position.
3321 * @param idxVar The variable index.
3322 * @param idxRegOld The old host register number.
3323 * @param idxRegNew The new host register number.
3324 * @param pszCaller The caller for logging.
3325 */
3326static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3327 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
3328{
3329 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3330 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
3331#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3332 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
3333#endif
3334 RT_NOREF(pszCaller);
3335
3336#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3337 Assert(!(pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3338#endif
3339 iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
3340
3341 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3342#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3343 Assert(!(fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3344#endif
3345 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
3346 pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
3347 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
3348
3349 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
3350 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
3351 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
3352 if (fGstRegShadows)
3353 {
3354 pReNative->Core.bmHstRegsWithGstShadow = (pReNative->Core.bmHstRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
3355 | RT_BIT_32(idxRegNew);
3356 while (fGstRegShadows)
3357 {
3358 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3359 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3360
3361 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
3362 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
3363 }
3364 }
3365
3366 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
3367 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3368 pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));
3369 return off;
3370}
3371
3372
3373/**
3374 * Moves a variable to a different register or spills it onto the stack.
3375 *
3376 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
3377 * kinds can easily be recreated if needed later.
3378 *
3379 * @returns The new code buffer position.
3380 * @param pReNative The native recompile state.
3381 * @param off The current code buffer position.
3382 * @param idxVar The variable index.
3383 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
3384 * call-volatile registers.
3385 */
3386DECL_HIDDEN_THROW(uint32_t) iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3387 uint32_t fForbiddenRegs /*= IEMNATIVE_CALL_VOLATILE_GREG_MASK*/)
3388{
3389 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3390 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3391 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
3392 Assert(!pVar->fRegAcquired);
3393
3394 uint8_t const idxRegOld = pVar->idxReg;
3395 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
3396 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
3397 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
3398 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
3399 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
3400 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3401 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
3402 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
3403#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3404 Assert(!(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3405#endif
3406
3407
3408 /** @todo Add statistics on this.*/
3409 /** @todo Implement basic variable liveness analysis (python) so variables
3410 * can be freed immediately once no longer used. This has the potential to
3411 * be trashing registers and stack for dead variables.
3412 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
3413
3414 /*
3415 * First try move it to a different register, as that's cheaper.
3416 */
3417 fForbiddenRegs |= RT_BIT_32(idxRegOld);
3418 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
3419 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
3420 if (fRegs)
3421 {
3422 /* Avoid using shadow registers, if possible. */
3423 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
3424 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
3425 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
3426 return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");
3427 }
3428
3429 /*
3430 * Otherwise we must spill the register onto the stack.
3431 */
3432 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
3433 Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
3434 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
3435 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
3436
3437 pVar->idxReg = UINT8_MAX;
3438 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
3439 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
3440 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3441 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3442 return off;
3443}
3444
3445
3446/**
3447 * Allocates a temporary host general purpose register.
3448 *
3449 * This may emit code to save register content onto the stack in order to free
3450 * up a register.
3451 *
3452 * @returns The host register number; throws VBox status code on failure,
3453 * so no need to check the return value.
3454 * @param pReNative The native recompile state.
3455 * @param poff Pointer to the variable with the code buffer position.
3456 * This will be update if we need to move a variable from
3457 * register to stack in order to satisfy the request.
3458 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3459 * registers (@c true, default) or the other way around
3460 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
3461 */
3462DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
3463{
3464 /*
3465 * Try find a completely unused register, preferably a call-volatile one.
3466 */
3467 uint8_t idxReg;
3468 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3469 & ~pReNative->Core.bmHstRegsWithGstShadow
3470 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
3471 if (fRegs)
3472 {
3473 if (fPreferVolatile)
3474 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3475 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3476 else
3477 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3478 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3479 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3480 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3481 Log12(("iemNativeRegAllocTmp: %s\n", g_apszIemNativeHstRegNames[idxReg]));
3482 }
3483 else
3484 {
3485 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
3486 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3487 Log12(("iemNativeRegAllocTmp: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
3488 }
3489 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3490}
3491
3492
3493/**
3494 * Alternative version of iemNativeRegAllocTmp that takes mask with acceptable
3495 * registers.
3496 *
3497 * @returns The host register number; throws VBox status code on failure,
3498 * so no need to check the return value.
3499 * @param pReNative The native recompile state.
3500 * @param poff Pointer to the variable with the code buffer position.
3501 * This will be update if we need to move a variable from
3502 * register to stack in order to satisfy the request.
3503 * @param fRegMask Mask of acceptable registers.
3504 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3505 * registers (@c true, default) or the other way around
3506 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
3507 */
3508DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
3509 bool fPreferVolatile /*= true*/)
3510{
3511 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3512 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3513
3514 /*
3515 * Try find a completely unused register, preferably a call-volatile one.
3516 */
3517 uint8_t idxReg;
3518 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3519 & ~pReNative->Core.bmHstRegsWithGstShadow
3520 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
3521 & fRegMask;
3522 if (fRegs)
3523 {
3524 if (fPreferVolatile)
3525 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3526 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3527 else
3528 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3529 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3530 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3531 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3532 Log12(("iemNativeRegAllocTmpEx: %s\n", g_apszIemNativeHstRegNames[idxReg]));
3533 }
3534 else
3535 {
3536 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
3537 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3538 Log12(("iemNativeRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
3539 }
3540 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3541}
3542
3543
3544/**
3545 * Allocates a temporary register for loading an immediate value into.
3546 *
3547 * This will emit code to load the immediate, unless there happens to be an
3548 * unused register with the value already loaded.
3549 *
3550 * The caller will not modify the returned register, it must be considered
3551 * read-only. Free using iemNativeRegFreeTmpImm.
3552 *
3553 * @returns The host register number; throws VBox status code on failure, so no
3554 * need to check the return value.
3555 * @param pReNative The native recompile state.
3556 * @param poff Pointer to the variable with the code buffer position.
3557 * @param uImm The immediate value that the register must hold upon
3558 * return.
3559 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3560 * registers (@c true, default) or the other way around
3561 * (@c false).
3562 *
3563 * @note Reusing immediate values has not been implemented yet.
3564 */
3565DECL_HIDDEN_THROW(uint8_t)
3566iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
3567{
3568 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
3569 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
3570 return idxReg;
3571}
3572
3573
3574/**
3575 * Allocates a temporary host general purpose register for keeping a guest
3576 * register value.
3577 *
3578 * Since we may already have a register holding the guest register value,
3579 * code will be emitted to do the loading if that's not the case. Code may also
3580 * be emitted if we have to free up a register to satify the request.
3581 *
3582 * @returns The host register number; throws VBox status code on failure, so no
3583 * need to check the return value.
3584 * @param pReNative The native recompile state.
3585 * @param poff Pointer to the variable with the code buffer
3586 * position. This will be update if we need to move a
3587 * variable from register to stack in order to satisfy
3588 * the request.
3589 * @param enmGstReg The guest register that will is to be updated.
3590 * @param enmIntendedUse How the caller will be using the host register.
3591 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
3592 * register is okay (default). The ASSUMPTION here is
3593 * that the caller has already flushed all volatile
3594 * registers, so this is only applied if we allocate a
3595 * new register.
3596 * @param fSkipLivenessAssert Hack for liveness input validation of EFLAGS.
3597 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
3598 */
3599DECL_HIDDEN_THROW(uint8_t)
3600iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg,
3601 IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
3602 bool fNoVolatileRegs /*= false*/, bool fSkipLivenessAssert /*= false*/)
3603{
3604 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
3605#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3606 AssertMsg( fSkipLivenessAssert
3607 || pReNative->idxCurCall == 0
3608 || enmGstReg == kIemNativeGstReg_Pc
3609 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
3610 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
3611 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
3612 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
3613 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)) ),
3614 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
3615#endif
3616 RT_NOREF(fSkipLivenessAssert);
3617#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
3618 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
3619#endif
3620 uint32_t const fRegMask = !fNoVolatileRegs
3621 ? IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK
3622 : IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
3623
3624 /*
3625 * First check if the guest register value is already in a host register.
3626 */
3627 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
3628 {
3629 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
3630 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3631 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
3632 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3633
3634 /* It's not supposed to be allocated... */
3635 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
3636 {
3637 /*
3638 * If the register will trash the guest shadow copy, try find a
3639 * completely unused register we can use instead. If that fails,
3640 * we need to disassociate the host reg from the guest reg.
3641 */
3642 /** @todo would be nice to know if preserving the register is in any way helpful. */
3643 /* If the purpose is calculations, try duplicate the register value as
3644 we'll be clobbering the shadow. */
3645 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
3646 && ( ~pReNative->Core.bmHstRegs
3647 & ~pReNative->Core.bmHstRegsWithGstShadow
3648 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
3649 {
3650 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask);
3651
3652 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3653
3654 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
3655 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3656 g_apszIemNativeHstRegNames[idxRegNew]));
3657 idxReg = idxRegNew;
3658 }
3659 /* If the current register matches the restrictions, go ahead and allocate
3660 it for the caller. */
3661 else if (fRegMask & RT_BIT_32(idxReg))
3662 {
3663 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3664 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
3665 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3666 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
3667 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
3668 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
3669 else
3670 {
3671 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
3672 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
3673 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
3674 }
3675 }
3676 /* Otherwise, allocate a register that satisfies the caller and transfer
3677 the shadowing if compatible with the intended use. (This basically
3678 means the call wants a non-volatile register (RSP push/pop scenario).) */
3679 else
3680 {
3681 Assert(fNoVolatileRegs);
3682 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxReg),
3683 !fNoVolatileRegs
3684 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
3685 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3686 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
3687 {
3688 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
3689 Log12(("iemNativeRegAllocTmpForGuestReg: Transfering %s to %s for guest %s %s\n",
3690 g_apszIemNativeHstRegNames[idxReg], g_apszIemNativeHstRegNames[idxRegNew],
3691 g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
3692 }
3693 else
3694 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
3695 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3696 g_apszIemNativeHstRegNames[idxRegNew]));
3697 idxReg = idxRegNew;
3698 }
3699 }
3700 else
3701 {
3702 /*
3703 * Oops. Shadowed guest register already allocated!
3704 *
3705 * Allocate a new register, copy the value and, if updating, the
3706 * guest shadow copy assignment to the new register.
3707 */
3708 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
3709 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
3710 ("This shouldn't happen: idxReg=%d enmGstReg=%d enmIntendedUse=%s\n",
3711 idxReg, enmGstReg, s_pszIntendedUse[enmIntendedUse]));
3712
3713 /** @todo share register for readonly access. */
3714 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask,
3715 enmIntendedUse == kIemNativeGstRegUse_Calculation);
3716
3717 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3718 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3719
3720 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
3721 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3722 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
3723 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3724 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
3725 else
3726 {
3727 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
3728 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
3729 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3730 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
3731 }
3732 idxReg = idxRegNew;
3733 }
3734 Assert(RT_BIT_32(idxReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
3735
3736#ifdef VBOX_STRICT
3737 /* Strict builds: Check that the value is correct. */
3738 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
3739#endif
3740
3741#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3742 /** @todo r=aeichner Implement for registers other than GPR as well. */
3743 if ( ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
3744 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
3745 && ( ( enmGstReg >= kIemNativeGstReg_GprFirst
3746 && enmGstReg <= kIemNativeGstReg_GprLast)
3747 || enmGstReg == kIemNativeGstReg_MxCsr))
3748 {
3749# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3750 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
3751 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxReg);
3752# endif
3753 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
3754 }
3755#endif
3756
3757 return idxReg;
3758 }
3759
3760 /*
3761 * Allocate a new register, load it with the guest value and designate it as a copy of the
3762 */
3763 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
3764
3765 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3766 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
3767
3768 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
3769 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
3770 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
3771 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
3772
3773#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3774 /** @todo r=aeichner Implement for registers other than GPR as well. */
3775 if ( ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
3776 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
3777 && ( ( enmGstReg >= kIemNativeGstReg_GprFirst
3778 && enmGstReg <= kIemNativeGstReg_GprLast)
3779 || enmGstReg == kIemNativeGstReg_MxCsr))
3780 {
3781# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3782 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
3783 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxRegNew);
3784# endif
3785 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
3786 }
3787#endif
3788
3789 return idxRegNew;
3790}
3791
3792
3793/**
3794 * Allocates a temporary host general purpose register that already holds the
3795 * given guest register value.
3796 *
3797 * The use case for this function is places where the shadowing state cannot be
3798 * modified due to branching and such. This will fail if the we don't have a
3799 * current shadow copy handy or if it's incompatible. The only code that will
3800 * be emitted here is value checking code in strict builds.
3801 *
3802 * The intended use can only be readonly!
3803 *
3804 * @returns The host register number, UINT8_MAX if not present.
3805 * @param pReNative The native recompile state.
3806 * @param poff Pointer to the instruction buffer offset.
3807 * Will be updated in strict builds if a register is
3808 * found.
3809 * @param enmGstReg The guest register that will is to be updated.
3810 * @note In strict builds, this may throw instruction buffer growth failures.
3811 * Non-strict builds will not throw anything.
3812 * @sa iemNativeRegAllocTmpForGuestReg
3813 */
3814DECL_HIDDEN_THROW(uint8_t)
3815iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3816{
3817 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
3818#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3819 AssertMsg( pReNative->idxCurCall == 0
3820 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
3821 || enmGstReg == kIemNativeGstReg_Pc
3822 || enmGstReg == kIemNativeGstReg_EFlags /** @todo EFlags shadowing+liveness is weird and needs fixing (@bugref{10720}) */,
3823 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
3824#endif
3825
3826 /*
3827 * First check if the guest register value is already in a host register.
3828 */
3829 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
3830 {
3831 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
3832 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3833 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
3834 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3835
3836 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
3837 {
3838 /*
3839 * We only do readonly use here, so easy compared to the other
3840 * variant of this code.
3841 */
3842 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3843 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
3844 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3845 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
3846 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
3847
3848#ifdef VBOX_STRICT
3849 /* Strict builds: Check that the value is correct. */
3850 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
3851#else
3852 RT_NOREF(poff);
3853#endif
3854 return idxReg;
3855 }
3856 }
3857
3858 return UINT8_MAX;
3859}
3860
3861
3862/**
3863 * Allocates argument registers for a function call.
3864 *
3865 * @returns New code buffer offset on success; throws VBox status code on failure, so no
3866 * need to check the return value.
3867 * @param pReNative The native recompile state.
3868 * @param off The current code buffer offset.
3869 * @param cArgs The number of arguments the function call takes.
3870 */
3871DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
3872{
3873 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
3874 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
3875 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
3876 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
3877
3878 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
3879 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
3880 else if (cArgs == 0)
3881 return true;
3882
3883 /*
3884 * Do we get luck and all register are free and not shadowing anything?
3885 */
3886 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
3887 for (uint32_t i = 0; i < cArgs; i++)
3888 {
3889 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
3890 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
3891 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3892 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3893 }
3894 /*
3895 * Okay, not lucky so we have to free up the registers.
3896 */
3897 else
3898 for (uint32_t i = 0; i < cArgs; i++)
3899 {
3900 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
3901 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
3902 {
3903 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
3904 {
3905 case kIemNativeWhat_Var:
3906 {
3907 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
3908 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3909 AssertStmt(IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars),
3910 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
3911 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxReg);
3912#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3913 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
3914#endif
3915
3916 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind != kIemNativeVarKind_Stack)
3917 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
3918 else
3919 {
3920 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
3921 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3922 }
3923 break;
3924 }
3925
3926 case kIemNativeWhat_Tmp:
3927 case kIemNativeWhat_Arg:
3928 case kIemNativeWhat_rc:
3929 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
3930 default:
3931 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
3932 }
3933
3934 }
3935 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
3936 {
3937 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3938 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3939 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3940#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3941 Assert(!(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3942#endif
3943 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3944 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3945 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3946 }
3947 else
3948 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3949 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
3950 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3951 }
3952 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
3953 return true;
3954}
3955
3956
3957DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
3958
3959
3960#if 0
3961/**
3962 * Frees a register assignment of any type.
3963 *
3964 * @param pReNative The native recompile state.
3965 * @param idxHstReg The register to free.
3966 *
3967 * @note Does not update variables.
3968 */
3969DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
3970{
3971 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3972 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
3973 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
3974 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
3975 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
3976 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
3977 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
3978 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
3979 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
3980 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
3981 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
3982 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
3983 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
3984 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
3985
3986 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3987 /* no flushing, right:
3988 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3989 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3990 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
3991 */
3992}
3993#endif
3994
3995
3996/**
3997 * Frees a temporary register.
3998 *
3999 * Any shadow copies of guest registers assigned to the host register will not
4000 * be flushed by this operation.
4001 */
4002DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4003{
4004 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4005 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
4006 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4007 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
4008 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4009}
4010
4011
4012/**
4013 * Frees a temporary immediate register.
4014 *
4015 * It is assumed that the call has not modified the register, so it still hold
4016 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
4017 */
4018DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4019{
4020 iemNativeRegFreeTmp(pReNative, idxHstReg);
4021}
4022
4023
4024/**
4025 * Frees a register assigned to a variable.
4026 *
4027 * The register will be disassociated from the variable.
4028 */
4029DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
4030{
4031 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4032 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
4033 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
4034 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4035 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
4036#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4037 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4038#endif
4039
4040 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4041 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4042 if (!fFlushShadows)
4043 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
4044 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows, idxVar));
4045 else
4046 {
4047 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4048 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4049#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4050 Assert(!(pReNative->Core.bmGstRegShadowDirty & fGstRegShadowsOld));
4051#endif
4052 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4053 pReNative->Core.bmGstRegShadows &= ~fGstRegShadowsOld;
4054 uint64_t fGstRegShadows = fGstRegShadowsOld;
4055 while (fGstRegShadows)
4056 {
4057 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4058 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4059
4060 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg);
4061 pReNative->Core.aidxGstRegShadows[idxGstReg] = UINT8_MAX;
4062 }
4063 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
4064 g_apszIemNativeHstRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4065 }
4066}
4067
4068
4069#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4070# if defined(LOG_ENABLED) || defined(IEMNATIVE_WITH_TB_DEBUG_INFO)
4071/** Host CPU SIMD register names. */
4072DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstSimdRegNames[] =
4073{
4074# ifdef RT_ARCH_AMD64
4075 "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15"
4076# elif RT_ARCH_ARM64
4077 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
4078 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
4079# else
4080# error "port me"
4081# endif
4082};
4083# endif
4084
4085
4086/**
4087 * Frees a SIMD register assigned to a variable.
4088 *
4089 * The register will be disassociated from the variable.
4090 */
4091DECLHIDDEN(void) iemNativeSimdRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
4092{
4093 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstReg));
4094 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
4095 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
4096 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4097 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
4098 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4099
4100 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4101 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
4102 if (!fFlushShadows)
4103 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
4104 g_apszIemNativeHstSimdRegNames[idxHstReg], pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows, idxVar));
4105 else
4106 {
4107 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4108 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows;
4109 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
4110 pReNative->Core.bmGstSimdRegShadows &= ~fGstRegShadowsOld;
4111 uint64_t fGstRegShadows = fGstRegShadowsOld;
4112 while (fGstRegShadows)
4113 {
4114 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4115 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4116
4117 Assert(pReNative->Core.aidxGstSimdRegShadows[idxGstReg] == idxHstReg);
4118 pReNative->Core.aidxGstSimdRegShadows[idxGstReg] = UINT8_MAX;
4119 }
4120 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
4121 g_apszIemNativeHstSimdRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4122 }
4123}
4124
4125
4126/**
4127 * Reassigns a variable to a different SIMD register specified by the caller.
4128 *
4129 * @returns The new code buffer position.
4130 * @param pReNative The native recompile state.
4131 * @param off The current code buffer position.
4132 * @param idxVar The variable index.
4133 * @param idxRegOld The old host register number.
4134 * @param idxRegNew The new host register number.
4135 * @param pszCaller The caller for logging.
4136 */
4137static uint32_t iemNativeSimdRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
4138 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
4139{
4140 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4141 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
4142 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4143 RT_NOREF(pszCaller);
4144
4145 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4146 & pReNative->Core.aHstSimdRegs[idxRegNew].fGstRegShadows));
4147 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxRegNew, off);
4148
4149 uint64_t fGstRegShadows = pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
4150 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4151 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
4152
4153 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
4154 pszCaller, idxVar, g_apszIemNativeHstSimdRegNames[idxRegOld], g_apszIemNativeHstSimdRegNames[idxRegNew], fGstRegShadows));
4155 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
4156
4157 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U))
4158 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxRegNew, idxRegOld);
4159 else
4160 {
4161 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U));
4162 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxRegNew, idxRegOld);
4163 }
4164
4165 pReNative->Core.aHstSimdRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
4166 pReNative->Core.aHstSimdRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
4167 pReNative->Core.aHstSimdRegs[idxRegNew].idxVar = idxVar;
4168 if (fGstRegShadows)
4169 {
4170 pReNative->Core.bmHstSimdRegsWithGstShadow = (pReNative->Core.bmHstSimdRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
4171 | RT_BIT_32(idxRegNew);
4172 while (fGstRegShadows)
4173 {
4174 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4175 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4176
4177 Assert(pReNative->Core.aidxGstSimdRegShadows[idxGstReg] == idxRegOld);
4178 pReNative->Core.aidxGstSimdRegShadows[idxGstReg] = idxRegNew;
4179 }
4180 }
4181
4182 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
4183 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
4184 pReNative->Core.bmHstSimdRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstSimdRegs & ~RT_BIT_32(idxRegOld));
4185 return off;
4186}
4187
4188
4189/**
4190 * Moves a variable to a different register or spills it onto the stack.
4191 *
4192 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
4193 * kinds can easily be recreated if needed later.
4194 *
4195 * @returns The new code buffer position.
4196 * @param pReNative The native recompile state.
4197 * @param off The current code buffer position.
4198 * @param idxVar The variable index.
4199 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
4200 * call-volatile registers.
4201 */
4202DECL_HIDDEN_THROW(uint32_t) iemNativeSimdRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
4203 uint32_t fForbiddenRegs /*= IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK*/)
4204{
4205 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4206 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4207 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
4208 Assert(!pVar->fRegAcquired);
4209 Assert(!pVar->fSimdReg);
4210
4211 uint8_t const idxRegOld = pVar->idxReg;
4212 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
4213 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxRegOld));
4214 Assert(pReNative->Core.aHstSimdRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
4215 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows)
4216 == pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows);
4217 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4218 Assert( RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxRegOld))
4219 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
4220 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4221 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
4222
4223 /** @todo Add statistics on this.*/
4224 /** @todo Implement basic variable liveness analysis (python) so variables
4225 * can be freed immediately once no longer used. This has the potential to
4226 * be trashing registers and stack for dead variables.
4227 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
4228
4229 /*
4230 * First try move it to a different register, as that's cheaper.
4231 */
4232 fForbiddenRegs |= RT_BIT_32(idxRegOld);
4233 fForbiddenRegs |= IEMNATIVE_SIMD_REG_FIXED_MASK;
4234 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & ~fForbiddenRegs;
4235 if (fRegs)
4236 {
4237 /* Avoid using shadow registers, if possible. */
4238 if (fRegs & ~pReNative->Core.bmHstSimdRegsWithGstShadow)
4239 fRegs &= ~pReNative->Core.bmHstSimdRegsWithGstShadow;
4240 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
4241 return iemNativeSimdRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeSimdRegMoveOrSpillStackVar");
4242 }
4243
4244 /*
4245 * Otherwise we must spill the register onto the stack.
4246 */
4247 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
4248 Log12(("iemNativeSimdRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
4249 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
4250
4251 if (pVar->cbVar == sizeof(RTUINT128U))
4252 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
4253 else
4254 {
4255 Assert(pVar->cbVar == sizeof(RTUINT256U));
4256 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
4257 }
4258
4259 pVar->idxReg = UINT8_MAX;
4260 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
4261 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxRegOld);
4262 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
4263 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
4264 return off;
4265}
4266
4267
4268/**
4269 * Called right before emitting a call instruction to move anything important
4270 * out of call-volatile SIMD registers, free and flush the call-volatile SIMD registers,
4271 * optionally freeing argument variables.
4272 *
4273 * @returns New code buffer offset, UINT32_MAX on failure.
4274 * @param pReNative The native recompile state.
4275 * @param off The code buffer offset.
4276 * @param cArgs The number of arguments the function call takes.
4277 * It is presumed that the host register part of these have
4278 * been allocated as such already and won't need moving,
4279 * just freeing.
4280 * @param fKeepVars Mask of variables that should keep their register
4281 * assignments. Caller must take care to handle these.
4282 */
4283DECL_HIDDEN_THROW(uint32_t)
4284iemNativeSimdRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4285{
4286 Assert(!cArgs); RT_NOREF(cArgs);
4287
4288 /* fKeepVars will reduce this mask. */
4289 uint32_t fSimdRegsToFree = IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
4290
4291 /*
4292 * Move anything important out of volatile registers.
4293 */
4294 uint32_t fSimdRegsToMove = IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
4295#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
4296 & ~RT_BIT_32(IEMNATIVE_SIMD_REG_FIXED_TMP0)
4297#endif
4298 ;
4299
4300 fSimdRegsToMove &= pReNative->Core.bmHstSimdRegs;
4301 if (!fSimdRegsToMove)
4302 { /* likely */ }
4303 else
4304 {
4305 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: fSimdRegsToMove=%#x\n", fSimdRegsToMove));
4306 while (fSimdRegsToMove != 0)
4307 {
4308 unsigned const idxSimdReg = ASMBitFirstSetU32(fSimdRegsToMove) - 1;
4309 fSimdRegsToMove &= ~RT_BIT_32(idxSimdReg);
4310
4311 switch (pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat)
4312 {
4313 case kIemNativeWhat_Var:
4314 {
4315 uint8_t const idxVar = pReNative->Core.aHstRegs[idxSimdReg].idxVar;
4316 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4317 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4318 Assert(pVar->idxReg == idxSimdReg);
4319 Assert(pVar->fSimdReg);
4320 if (!(RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)) & fKeepVars))
4321 {
4322 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxSimdReg=%d\n",
4323 idxVar, pVar->enmKind, pVar->idxReg));
4324 if (pVar->enmKind != kIemNativeVarKind_Stack)
4325 pVar->idxReg = UINT8_MAX;
4326 else
4327 off = iemNativeSimdRegMoveOrSpillStackVar(pReNative, off, idxVar);
4328 }
4329 else
4330 fSimdRegsToFree &= ~RT_BIT_32(idxSimdReg);
4331 continue;
4332 }
4333
4334 case kIemNativeWhat_Arg:
4335 AssertMsgFailed(("What?!?: %u\n", idxSimdReg));
4336 continue;
4337
4338 case kIemNativeWhat_rc:
4339 case kIemNativeWhat_Tmp:
4340 AssertMsgFailed(("Missing free: %u\n", idxSimdReg));
4341 continue;
4342
4343 case kIemNativeWhat_FixedReserved:
4344#ifdef RT_ARCH_ARM64
4345 continue; /* On ARM the upper half of the virtual 256-bit register. */
4346#endif
4347
4348 case kIemNativeWhat_FixedTmp:
4349 case kIemNativeWhat_pVCpuFixed:
4350 case kIemNativeWhat_pCtxFixed:
4351 case kIemNativeWhat_PcShadow:
4352 case kIemNativeWhat_Invalid:
4353 case kIemNativeWhat_End:
4354 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
4355 }
4356 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
4357 }
4358 }
4359
4360 /*
4361 * Do the actual freeing.
4362 */
4363 if (pReNative->Core.bmHstSimdRegs & fSimdRegsToFree)
4364 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: bmHstSimdRegs %#x -> %#x\n",
4365 pReNative->Core.bmHstSimdRegs, pReNative->Core.bmHstSimdRegs & ~fSimdRegsToFree));
4366 pReNative->Core.bmHstSimdRegs &= ~fSimdRegsToFree;
4367
4368 /* If there are guest register shadows in any call-volatile register, we
4369 have to clear the corrsponding guest register masks for each register. */
4370 uint32_t fHstSimdRegsWithGstShadow = pReNative->Core.bmHstSimdRegsWithGstShadow & fSimdRegsToFree;
4371 if (fHstSimdRegsWithGstShadow)
4372 {
4373 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: bmHstSimdRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
4374 pReNative->Core.bmHstSimdRegsWithGstShadow, pReNative->Core.bmHstSimdRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK, fHstSimdRegsWithGstShadow));
4375 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~fHstSimdRegsWithGstShadow;
4376 do
4377 {
4378 unsigned const idxSimdReg = ASMBitFirstSetU32(fHstSimdRegsWithGstShadow) - 1;
4379 fHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxSimdReg);
4380
4381 AssertMsg(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows != 0, ("idxSimdReg=%#x\n", idxSimdReg));
4382
4383#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4384 /*
4385 * Flush any pending writes now (might have been skipped earlier in iemEmitCallCommon() but it doesn't apply
4386 * to call volatile registers).
4387 */
4388 if ( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4389 & pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows)
4390 off = iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(pReNative, off, idxSimdReg);
4391#endif
4392 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4393 & pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows));
4394
4395 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows;
4396 pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows = 0;
4397 } while (fHstSimdRegsWithGstShadow != 0);
4398 }
4399
4400 return off;
4401}
4402#endif
4403
4404
4405/**
4406 * Called right before emitting a call instruction to move anything important
4407 * out of call-volatile registers, free and flush the call-volatile registers,
4408 * optionally freeing argument variables.
4409 *
4410 * @returns New code buffer offset, UINT32_MAX on failure.
4411 * @param pReNative The native recompile state.
4412 * @param off The code buffer offset.
4413 * @param cArgs The number of arguments the function call takes.
4414 * It is presumed that the host register part of these have
4415 * been allocated as such already and won't need moving,
4416 * just freeing.
4417 * @param fKeepVars Mask of variables that should keep their register
4418 * assignments. Caller must take care to handle these.
4419 */
4420DECL_HIDDEN_THROW(uint32_t)
4421iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4422{
4423 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
4424
4425 /* fKeepVars will reduce this mask. */
4426 uint32_t fRegsToFree = IEMNATIVE_CALL_VOLATILE_NOTMP_GREG_MASK;
4427
4428#ifdef RT_ARCH_ARM64
4429AssertCompile(IEMNATIVE_CALL_VOLATILE_NOTMP_GREG_MASK == UINT32_C(0x37fff));
4430#endif
4431
4432 /*
4433 * Move anything important out of volatile registers.
4434 */
4435 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4436 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4437 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_NOTMP_GREG_MASK
4438#ifdef IEMNATIVE_REG_FIXED_PC_DBG
4439 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
4440#endif
4441 & ~g_afIemNativeCallRegs[cArgs];
4442
4443 fRegsToMove &= pReNative->Core.bmHstRegs;
4444 if (!fRegsToMove)
4445 { /* likely */ }
4446 else
4447 {
4448 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
4449 while (fRegsToMove != 0)
4450 {
4451 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
4452 fRegsToMove &= ~RT_BIT_32(idxReg);
4453
4454 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4455 {
4456 case kIemNativeWhat_Var:
4457 {
4458 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4459 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4460 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4461 Assert(pVar->idxReg == idxReg);
4462#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4463 Assert(!pVar->fSimdReg);
4464#endif
4465 if (!(RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)) & fKeepVars))
4466 {
4467 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxReg=%d\n",
4468 idxVar, pVar->enmKind, pVar->idxReg));
4469 if (pVar->enmKind != kIemNativeVarKind_Stack)
4470 pVar->idxReg = UINT8_MAX;
4471 else
4472 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4473 }
4474 else
4475 fRegsToFree &= ~RT_BIT_32(idxReg);
4476 continue;
4477 }
4478
4479 case kIemNativeWhat_Arg:
4480 AssertMsgFailed(("What?!?: %u\n", idxReg));
4481 continue;
4482
4483 case kIemNativeWhat_rc:
4484 case kIemNativeWhat_Tmp:
4485 AssertMsgFailed(("Missing free: %u\n", idxReg));
4486 continue;
4487
4488 case kIemNativeWhat_FixedTmp:
4489 case kIemNativeWhat_pVCpuFixed:
4490 case kIemNativeWhat_pCtxFixed:
4491 case kIemNativeWhat_PcShadow:
4492 case kIemNativeWhat_FixedReserved:
4493 case kIemNativeWhat_Invalid:
4494 case kIemNativeWhat_End:
4495 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
4496 }
4497 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
4498 }
4499 }
4500
4501 /*
4502 * Do the actual freeing.
4503 */
4504 if (pReNative->Core.bmHstRegs & fRegsToFree)
4505 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n",
4506 pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~fRegsToFree));
4507 pReNative->Core.bmHstRegs &= ~fRegsToFree;
4508
4509 /* If there are guest register shadows in any call-volatile register, we
4510 have to clear the corrsponding guest register masks for each register. */
4511 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & fRegsToFree;
4512 if (fHstRegsWithGstShadow)
4513 {
4514 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
4515 pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK,
4516 fHstRegsWithGstShadow));
4517 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
4518 do
4519 {
4520 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
4521 fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4522
4523 AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
4524
4525#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4526 /*
4527 * Flush any pending writes now (might have been skipped earlier in iemEmitCallCommon() but it doesn't apply
4528 * to call volatile registers).
4529 */
4530 if (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
4531 off = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, off, idxReg);
4532 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
4533#endif
4534
4535 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4536 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4537 } while (fHstRegsWithGstShadow != 0);
4538 }
4539
4540#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4541 /* Now for the SIMD registers, no argument support for now. */
4542 off = iemNativeSimdRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /*cArgs*/, fKeepVars);
4543#endif
4544
4545 return off;
4546}
4547
4548
4549/**
4550 * Flushes a set of guest register shadow copies.
4551 *
4552 * This is usually done after calling a threaded function or a C-implementation
4553 * of an instruction.
4554 *
4555 * @param pReNative The native recompile state.
4556 * @param fGstRegs Set of guest registers to flush.
4557 */
4558DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
4559{
4560 /*
4561 * Reduce the mask by what's currently shadowed
4562 */
4563 uint64_t const bmGstRegShadowsOld = pReNative->Core.bmGstRegShadows;
4564 fGstRegs &= bmGstRegShadowsOld;
4565 if (fGstRegs)
4566 {
4567 uint64_t const bmGstRegShadowsNew = bmGstRegShadowsOld & ~fGstRegs;
4568 Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstRegs, bmGstRegShadowsOld, bmGstRegShadowsNew));
4569 pReNative->Core.bmGstRegShadows = bmGstRegShadowsNew;
4570 if (bmGstRegShadowsNew)
4571 {
4572 /*
4573 * Partial.
4574 */
4575 do
4576 {
4577 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4578 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4579 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4580 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4581 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4582#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4583 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
4584#endif
4585
4586 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
4587 fGstRegs &= ~fInThisHstReg;
4588 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
4589 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
4590 if (!fGstRegShadowsNew)
4591 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4592 } while (fGstRegs != 0);
4593 }
4594 else
4595 {
4596 /*
4597 * Clear all.
4598 */
4599 do
4600 {
4601 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4602 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4603 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4604 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4605 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4606#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4607 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
4608#endif
4609
4610 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
4611 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4612 } while (fGstRegs != 0);
4613 pReNative->Core.bmHstRegsWithGstShadow = 0;
4614 }
4615 }
4616}
4617
4618
4619/**
4620 * Flushes guest register shadow copies held by a set of host registers.
4621 *
4622 * This is used with the TLB lookup code for ensuring that we don't carry on
4623 * with any guest shadows in volatile registers, as these will get corrupted by
4624 * a TLB miss.
4625 *
4626 * @param pReNative The native recompile state.
4627 * @param fHstRegs Set of host registers to flush guest shadows for.
4628 */
4629DECLHIDDEN(void) iemNativeRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstRegs) RT_NOEXCEPT
4630{
4631 /*
4632 * Reduce the mask by what's currently shadowed.
4633 */
4634 uint32_t const bmHstRegsWithGstShadowOld = pReNative->Core.bmHstRegsWithGstShadow;
4635 fHstRegs &= bmHstRegsWithGstShadowOld;
4636 if (fHstRegs)
4637 {
4638 uint32_t const bmHstRegsWithGstShadowNew = bmHstRegsWithGstShadowOld & ~fHstRegs;
4639 Log12(("iemNativeRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
4640 fHstRegs, bmHstRegsWithGstShadowOld, bmHstRegsWithGstShadowNew));
4641 pReNative->Core.bmHstRegsWithGstShadow = bmHstRegsWithGstShadowNew;
4642 if (bmHstRegsWithGstShadowNew)
4643 {
4644 /*
4645 * Partial (likely).
4646 */
4647 uint64_t fGstShadows = 0;
4648 do
4649 {
4650 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4651 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4652 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4653 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4654#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4655 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4656#endif
4657
4658 fGstShadows |= pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4659 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4660 fHstRegs &= ~RT_BIT_32(idxHstReg);
4661 } while (fHstRegs != 0);
4662 pReNative->Core.bmGstRegShadows &= ~fGstShadows;
4663 }
4664 else
4665 {
4666 /*
4667 * Clear all.
4668 */
4669 do
4670 {
4671 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4672 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4673 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4674 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4675#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4676 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4677#endif
4678
4679 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4680 fHstRegs &= ~RT_BIT_32(idxHstReg);
4681 } while (fHstRegs != 0);
4682 pReNative->Core.bmGstRegShadows = 0;
4683 }
4684 }
4685}
4686
4687
4688/**
4689 * Restores guest shadow copies in volatile registers.
4690 *
4691 * This is used after calling a helper function (think TLB miss) to restore the
4692 * register state of volatile registers.
4693 *
4694 * @param pReNative The native recompile state.
4695 * @param off The code buffer offset.
4696 * @param fHstRegsActiveShadows Set of host registers which are allowed to
4697 * be active (allocated) w/o asserting. Hack.
4698 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
4699 * iemNativeVarRestoreVolatileRegsPostHlpCall()
4700 */
4701DECL_HIDDEN_THROW(uint32_t)
4702iemNativeRegRestoreGuestShadowsInVolatileRegs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsActiveShadows)
4703{
4704 uint32_t fHstRegs = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4705 if (fHstRegs)
4706 {
4707 Log12(("iemNativeRegRestoreGuestShadowsInVolatileRegs: %#RX32\n", fHstRegs));
4708 do
4709 {
4710 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4711
4712 /* It's not fatal if a register is active holding a variable that
4713 shadowing a guest register, ASSUMING all pending guest register
4714 writes were flushed prior to the helper call. However, we'll be
4715 emitting duplicate restores, so it wasts code space. */
4716 Assert(!(pReNative->Core.bmHstRegs & ~fHstRegsActiveShadows & RT_BIT_32(idxHstReg)));
4717 RT_NOREF(fHstRegsActiveShadows);
4718
4719 uint64_t const fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4720#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4721 Assert(!(pReNative->Core.bmGstRegShadowDirty & fGstRegShadows));
4722#endif
4723 Assert((pReNative->Core.bmGstRegShadows & fGstRegShadows) == fGstRegShadows);
4724 AssertStmt(fGstRegShadows != 0 && fGstRegShadows < RT_BIT_64(kIemNativeGstReg_End),
4725 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_12));
4726
4727 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4728 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, idxHstReg, (IEMNATIVEGSTREG)idxGstReg);
4729
4730 fHstRegs &= ~RT_BIT_32(idxHstReg);
4731 } while (fHstRegs != 0);
4732 }
4733 return off;
4734}
4735
4736
4737
4738
4739/*********************************************************************************************************************************
4740* SIMD register allocator (largely code duplication of the GPR allocator for now but might diverge) *
4741*********************************************************************************************************************************/
4742#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4743
4744/**
4745 * Info about shadowed guest SIMD register values.
4746 * @see IEMNATIVEGSTSIMDREG
4747 */
4748static struct
4749{
4750 /** Offset in VMCPU of XMM (low 128-bit) registers. */
4751 uint32_t offXmm;
4752 /** Offset in VMCPU of YmmHi (high 128-bit) registers. */
4753 uint32_t offYmm;
4754 /** Name (for logging). */
4755 const char *pszName;
4756} const g_aGstSimdShadowInfo[] =
4757{
4758#define CPUMCTX_OFF_AND_SIZE(a_iSimdReg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.x87.aXMM[a_iSimdReg]), \
4759 (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.u.YmmHi.aYmmHi[a_iSimdReg])
4760 /* [kIemNativeGstSimdReg_SimdRegFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(0), "ymm0", },
4761 /* [kIemNativeGstSimdReg_SimdRegFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(1), "ymm1", },
4762 /* [kIemNativeGstSimdReg_SimdRegFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(2), "ymm2", },
4763 /* [kIemNativeGstSimdReg_SimdRegFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(3), "ymm3", },
4764 /* [kIemNativeGstSimdReg_SimdRegFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(4), "ymm4", },
4765 /* [kIemNativeGstSimdReg_SimdRegFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(5), "ymm5", },
4766 /* [kIemNativeGstSimdReg_SimdRegFirst + 6] = */ { CPUMCTX_OFF_AND_SIZE(6), "ymm6", },
4767 /* [kIemNativeGstSimdReg_SimdRegFirst + 7] = */ { CPUMCTX_OFF_AND_SIZE(7), "ymm7", },
4768 /* [kIemNativeGstSimdReg_SimdRegFirst + 8] = */ { CPUMCTX_OFF_AND_SIZE(8), "ymm8", },
4769 /* [kIemNativeGstSimdReg_SimdRegFirst + 9] = */ { CPUMCTX_OFF_AND_SIZE(9), "ymm9", },
4770 /* [kIemNativeGstSimdReg_SimdRegFirst + 10] = */ { CPUMCTX_OFF_AND_SIZE(10), "ymm10", },
4771 /* [kIemNativeGstSimdReg_SimdRegFirst + 11] = */ { CPUMCTX_OFF_AND_SIZE(11), "ymm11", },
4772 /* [kIemNativeGstSimdReg_SimdRegFirst + 12] = */ { CPUMCTX_OFF_AND_SIZE(12), "ymm12", },
4773 /* [kIemNativeGstSimdReg_SimdRegFirst + 13] = */ { CPUMCTX_OFF_AND_SIZE(13), "ymm13", },
4774 /* [kIemNativeGstSimdReg_SimdRegFirst + 14] = */ { CPUMCTX_OFF_AND_SIZE(14), "ymm14", },
4775 /* [kIemNativeGstSimdReg_SimdRegFirst + 15] = */ { CPUMCTX_OFF_AND_SIZE(15), "ymm15", },
4776#undef CPUMCTX_OFF_AND_SIZE
4777};
4778AssertCompile(RT_ELEMENTS(g_aGstSimdShadowInfo) == kIemNativeGstSimdReg_End);
4779
4780
4781/**
4782 * Frees a temporary SIMD register.
4783 *
4784 * Any shadow copies of guest registers assigned to the host register will not
4785 * be flushed by this operation.
4786 */
4787DECLHIDDEN(void) iemNativeSimdRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg) RT_NOEXCEPT
4788{
4789 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg));
4790 Assert(pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmWhat == kIemNativeWhat_Tmp);
4791 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
4792 Log12(("iemNativeSimdRegFreeTmp: %s (gst: %#RX64)\n",
4793 g_apszIemNativeHstSimdRegNames[idxHstSimdReg], pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
4794}
4795
4796
4797/**
4798 * Emits code to flush a pending write of the given SIMD register if any, also flushes the guest to host SIMD register association.
4799 *
4800 * @returns New code bufferoffset.
4801 * @param pReNative The native recompile state.
4802 * @param off Current code buffer position.
4803 * @param enmGstSimdReg The guest SIMD register to flush.
4804 */
4805DECL_HIDDEN_THROW(uint32_t)
4806iemNativeSimdRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdReg)
4807{
4808 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
4809
4810 Log12(("iemNativeSimdRegFlushPendingWrite: Clearing guest register %s shadowed by host %s with state DirtyLo:%u DirtyHi:%u\n",
4811 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, g_apszIemNativeHstSimdRegNames[idxHstSimdReg],
4812 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg),
4813 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)));
4814
4815 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
4816 {
4817 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
4818 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128);
4819 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
4820 }
4821
4822 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg))
4823 {
4824 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
4825 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128);
4826 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
4827 }
4828
4829 IEMNATIVE_SIMD_REG_STATE_CLR_DIRTY(pReNative, enmGstSimdReg);
4830 return off;
4831}
4832
4833
4834/**
4835 * Flush the given set of guest SIMD registers if marked as dirty.
4836 *
4837 * @returns New code buffer offset.
4838 * @param pReNative The native recompile state.
4839 * @param off Current code buffer position.
4840 * @param fFlushGstSimdReg The guest SIMD register set to flush (default is flush everything).
4841 */
4842DECL_HIDDEN_THROW(uint32_t)
4843iemNativeSimdRegFlushDirtyGuest(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fFlushGstSimdReg /*= UINT64_MAX*/)
4844{
4845 uint64_t bmGstSimdRegShadowDirty = (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4846 & fFlushGstSimdReg;
4847 if (bmGstSimdRegShadowDirty)
4848 {
4849# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4850 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4851 iemNativeDbgInfoAddGuestRegWriteback(pReNative, true /*fSimdReg*/, bmGstSimdRegShadowDirty);
4852# endif
4853
4854 do
4855 {
4856 unsigned const idxGstSimdReg = ASMBitFirstSetU64(bmGstSimdRegShadowDirty) - 1;
4857 bmGstSimdRegShadowDirty &= ~RT_BIT_64(idxGstSimdReg);
4858 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
4859 } while (bmGstSimdRegShadowDirty);
4860 }
4861
4862 return off;
4863}
4864
4865
4866#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4867/**
4868 * Flush all shadowed guest SIMD registers marked as dirty for the given host SIMD register.
4869 *
4870 * @returns New code buffer offset.
4871 * @param pReNative The native recompile state.
4872 * @param off Current code buffer position.
4873 * @param idxHstSimdReg The host SIMD register.
4874 *
4875 * @note This doesn't do any unshadowing of guest registers from the host register.
4876 */
4877DECL_HIDDEN_THROW(uint32_t) iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const idxHstSimdReg)
4878{
4879 /* We need to flush any pending guest register writes this host register shadows. */
4880 uint64_t bmGstSimdRegShadowDirty = (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4881 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
4882 if (bmGstSimdRegShadowDirty)
4883 {
4884# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4885 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4886 iemNativeDbgInfoAddGuestRegWriteback(pReNative, true /*fSimdReg*/, bmGstSimdRegShadowDirty);
4887# endif
4888
4889 do
4890 {
4891 unsigned const idxGstSimdReg = ASMBitFirstSetU64(bmGstSimdRegShadowDirty) - 1;
4892 bmGstSimdRegShadowDirty &= ~RT_BIT_64(idxGstSimdReg);
4893 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
4894 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg));
4895 } while (bmGstSimdRegShadowDirty);
4896 }
4897
4898 return off;
4899}
4900#endif
4901
4902
4903/**
4904 * Locate a register, possibly freeing one up.
4905 *
4906 * This ASSUMES the caller has done the minimal/optimal allocation checks and
4907 * failed.
4908 *
4909 * @returns Host register number on success. Returns UINT8_MAX if no registers
4910 * found, the caller is supposed to deal with this and raise a
4911 * allocation type specific status code (if desired).
4912 *
4913 * @throws VBox status code if we're run into trouble spilling a variable of
4914 * recording debug info. Does NOT throw anything if we're out of
4915 * registers, though.
4916 */
4917static uint8_t iemNativeSimdRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
4918 uint32_t fRegMask = IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK)
4919{
4920 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFree);
4921 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
4922 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
4923
4924 /*
4925 * Try a freed register that's shadowing a guest register.
4926 */
4927 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & fRegMask;
4928 if (fRegs)
4929 {
4930 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeNoVar);
4931
4932#if 0 /** @todo def IEMNATIVE_WITH_LIVENESS_ANALYSIS */
4933 /*
4934 * When we have livness information, we use it to kick out all shadowed
4935 * guest register that will not be needed any more in this TB. If we're
4936 * lucky, this may prevent us from ending up here again.
4937 *
4938 * Note! We must consider the previous entry here so we don't free
4939 * anything that the current threaded function requires (current
4940 * entry is produced by the next threaded function).
4941 */
4942 uint32_t const idxCurCall = pReNative->idxCurCall;
4943 if (idxCurCall > 0)
4944 {
4945 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
4946
4947# ifndef IEMLIVENESS_EXTENDED_LAYOUT
4948 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
4949 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
4950 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
4951#else
4952 /* Construct a mask of the registers not in the read or write state.
4953 Note! We could skips writes, if they aren't from us, as this is just
4954 a hack to prevent trashing registers that have just been written
4955 or will be written when we retire the current instruction. */
4956 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
4957 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
4958 & IEMLIVENESSBIT_MASK;
4959#endif
4960 /* If it matches any shadowed registers. */
4961 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
4962 {
4963 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeLivenessUnshadowed);
4964 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
4965 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
4966
4967 /* See if we've got any unshadowed registers we can return now. */
4968 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
4969 if (fUnshadowedRegs)
4970 {
4971 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeLivenessHelped);
4972 return (fPreferVolatile
4973 ? ASMBitFirstSetU32(fUnshadowedRegs)
4974 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4975 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
4976 - 1;
4977 }
4978 }
4979 }
4980#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
4981
4982 unsigned const idxReg = (fPreferVolatile
4983 ? ASMBitFirstSetU32(fRegs)
4984 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
4985 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs))
4986 - 1;
4987
4988 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows != 0);
4989 Assert( (pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadows)
4990 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
4991 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg));
4992
4993 /* We need to flush any pending guest register writes this host SIMD register shadows. */
4994 *poff = iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(pReNative, *poff, idxReg);
4995
4996 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4997 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
4998 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
4999 pReNative->Core.aHstSimdRegs[idxReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5000 return idxReg;
5001 }
5002
5003 AssertFailed(); /** @todo The following needs testing when it actually gets hit. */
5004
5005 /*
5006 * Try free up a variable that's in a register.
5007 *
5008 * We do two rounds here, first evacuating variables we don't need to be
5009 * saved on the stack, then in the second round move things to the stack.
5010 */
5011 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeVar);
5012 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
5013 {
5014 uint32_t fVars = pReNative->Core.bmVars;
5015 while (fVars)
5016 {
5017 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
5018 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
5019 if (!pReNative->Core.aVars[idxVar].fSimdReg) /* Ignore non SIMD variables here. */
5020 continue;
5021
5022 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
5023 && (RT_BIT_32(idxReg) & fRegMask)
5024 && ( iLoop == 0
5025 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
5026 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5027 && !pReNative->Core.aVars[idxVar].fRegAcquired)
5028 {
5029 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxReg));
5030 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows)
5031 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
5032 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstSimdReg_End));
5033 Assert( RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg))
5034 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows));
5035
5036 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5037 {
5038 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
5039 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
5040 }
5041
5042 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
5043 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxReg);
5044
5045 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5046 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
5047 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
5048 return idxReg;
5049 }
5050 fVars &= ~RT_BIT_32(idxVar);
5051 }
5052 }
5053
5054 AssertFailed();
5055 return UINT8_MAX;
5056}
5057
5058
5059/**
5060 * Flushes a set of guest register shadow copies.
5061 *
5062 * This is usually done after calling a threaded function or a C-implementation
5063 * of an instruction.
5064 *
5065 * @param pReNative The native recompile state.
5066 * @param fGstSimdRegs Set of guest SIMD registers to flush.
5067 */
5068DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstSimdRegs) RT_NOEXCEPT
5069{
5070 /*
5071 * Reduce the mask by what's currently shadowed
5072 */
5073 uint64_t const bmGstSimdRegShadows = pReNative->Core.bmGstSimdRegShadows;
5074 fGstSimdRegs &= bmGstSimdRegShadows;
5075 if (fGstSimdRegs)
5076 {
5077 uint64_t const bmGstSimdRegShadowsNew = bmGstSimdRegShadows & ~fGstSimdRegs;
5078 Log12(("iemNativeSimdRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstSimdRegs, bmGstSimdRegShadows, bmGstSimdRegShadowsNew));
5079 pReNative->Core.bmGstSimdRegShadows = bmGstSimdRegShadowsNew;
5080 if (bmGstSimdRegShadowsNew)
5081 {
5082 /*
5083 * Partial.
5084 */
5085 do
5086 {
5087 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5088 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5089 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5090 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5091 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5092 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5093
5094 uint64_t const fInThisHstReg = (pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & fGstSimdRegs) | RT_BIT_64(idxGstReg);
5095 fGstSimdRegs &= ~fInThisHstReg;
5096 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
5097 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
5098 if (!fGstRegShadowsNew)
5099 {
5100 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5101 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5102 }
5103 } while (fGstSimdRegs != 0);
5104 }
5105 else
5106 {
5107 /*
5108 * Clear all.
5109 */
5110 do
5111 {
5112 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5113 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5114 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5115 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5116 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5117 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5118
5119 fGstSimdRegs &= ~(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
5120 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
5121 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5122 } while (fGstSimdRegs != 0);
5123 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
5124 }
5125 }
5126}
5127
5128
5129/**
5130 * Allocates a temporary host SIMD register.
5131 *
5132 * This may emit code to save register content onto the stack in order to free
5133 * up a register.
5134 *
5135 * @returns The host register number; throws VBox status code on failure,
5136 * so no need to check the return value.
5137 * @param pReNative The native recompile state.
5138 * @param poff Pointer to the variable with the code buffer position.
5139 * This will be update if we need to move a variable from
5140 * register to stack in order to satisfy the request.
5141 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5142 * registers (@c true, default) or the other way around
5143 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5144 */
5145DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
5146{
5147 /*
5148 * Try find a completely unused register, preferably a call-volatile one.
5149 */
5150 uint8_t idxSimdReg;
5151 uint32_t fRegs = ~pReNative->Core.bmHstRegs
5152 & ~pReNative->Core.bmHstRegsWithGstShadow
5153 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK);
5154 if (fRegs)
5155 {
5156 if (fPreferVolatile)
5157 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5158 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5159 else
5160 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5161 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5162 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5163 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5164
5165 pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5166 Log12(("iemNativeSimdRegAllocTmp: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5167 }
5168 else
5169 {
5170 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile);
5171 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5172 Log12(("iemNativeSimdRegAllocTmp: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5173 }
5174
5175 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5176 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5177}
5178
5179
5180/**
5181 * Alternative version of iemNativeSimdRegAllocTmp that takes mask with acceptable
5182 * registers.
5183 *
5184 * @returns The host register number; throws VBox status code on failure,
5185 * so no need to check the return value.
5186 * @param pReNative The native recompile state.
5187 * @param poff Pointer to the variable with the code buffer position.
5188 * This will be update if we need to move a variable from
5189 * register to stack in order to satisfy the request.
5190 * @param fRegMask Mask of acceptable registers.
5191 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5192 * registers (@c true, default) or the other way around
5193 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5194 */
5195DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
5196 bool fPreferVolatile /*= true*/)
5197{
5198 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
5199 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
5200
5201 /*
5202 * Try find a completely unused register, preferably a call-volatile one.
5203 */
5204 uint8_t idxSimdReg;
5205 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs
5206 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5207 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
5208 & fRegMask;
5209 if (fRegs)
5210 {
5211 if (fPreferVolatile)
5212 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5213 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5214 else
5215 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5216 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5217 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5218 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5219
5220 pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5221 Log12(("iemNativeSimdRegAllocTmpEx: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5222 }
5223 else
5224 {
5225 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
5226 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5227 Log12(("iemNativeSimdRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5228 }
5229
5230 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5231 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5232}
5233
5234
5235/**
5236 * Sets the indiactor for which part of the given SIMD register has valid data loaded.
5237 *
5238 * @param pReNative The native recompile state.
5239 * @param idxHstSimdReg The host SIMD register to update the state for.
5240 * @param enmLoadSz The load size to set.
5241 */
5242DECL_FORCE_INLINE(void) iemNativeSimdRegSetValidLoadFlag(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg,
5243 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
5244{
5245 /* Everything valid already? -> nothing to do. */
5246 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
5247 return;
5248
5249 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid)
5250 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = enmLoadSz;
5251 else if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded != enmLoadSz)
5252 {
5253 Assert( ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128
5254 && enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5255 || ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128
5256 && enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128));
5257 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_256;
5258 }
5259}
5260
5261
5262static uint32_t iemNativeSimdRegAllocLoadVecRegFromVecRegSz(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdRegDst,
5263 uint8_t idxHstSimdRegDst, uint8_t idxHstSimdRegSrc, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSzDst)
5264{
5265 /* Easy case first, either the destination loads the same range as what the source has already loaded or the source has loaded everything. */
5266 if ( pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == enmLoadSzDst
5267 || pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
5268 {
5269# ifdef RT_ARCH_ARM64
5270 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
5271 Assert(!(idxHstSimdRegDst & 0x1)); Assert(!(idxHstSimdRegSrc & 0x1));
5272# endif
5273
5274 if (idxHstSimdRegDst != idxHstSimdRegSrc)
5275 {
5276 switch (enmLoadSzDst)
5277 {
5278 case kIemNativeGstSimdRegLdStSz_256:
5279 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5280 break;
5281 case kIemNativeGstSimdRegLdStSz_Low128:
5282 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5283 break;
5284 case kIemNativeGstSimdRegLdStSz_High128:
5285 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegHighU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5286 break;
5287 default:
5288 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5289 }
5290
5291 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdRegDst, enmLoadSzDst);
5292 }
5293 }
5294 else
5295 {
5296 /* The source doesn't has the part loaded, so load the register from CPUMCTX. */
5297 Assert(enmLoadSzDst == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSzDst == kIemNativeGstSimdRegLdStSz_High128);
5298 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, idxHstSimdRegDst, enmGstSimdRegDst, enmLoadSzDst);
5299 }
5300
5301 return off;
5302}
5303
5304
5305/**
5306 * Allocates a temporary host SIMD register for keeping a guest
5307 * SIMD register value.
5308 *
5309 * Since we may already have a register holding the guest register value,
5310 * code will be emitted to do the loading if that's not the case. Code may also
5311 * be emitted if we have to free up a register to satify the request.
5312 *
5313 * @returns The host register number; throws VBox status code on failure, so no
5314 * need to check the return value.
5315 * @param pReNative The native recompile state.
5316 * @param poff Pointer to the variable with the code buffer
5317 * position. This will be update if we need to move a
5318 * variable from register to stack in order to satisfy
5319 * the request.
5320 * @param enmGstSimdReg The guest SIMD register that will is to be updated.
5321 * @param enmIntendedUse How the caller will be using the host register.
5322 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
5323 * register is okay (default). The ASSUMPTION here is
5324 * that the caller has already flushed all volatile
5325 * registers, so this is only applied if we allocate a
5326 * new register.
5327 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
5328 */
5329DECL_HIDDEN_THROW(uint8_t)
5330iemNativeSimdRegAllocTmpForGuestSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTSIMDREG enmGstSimdReg,
5331 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz, IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
5332 bool fNoVolatileRegs /*= false*/)
5333{
5334 Assert(enmGstSimdReg < kIemNativeGstSimdReg_End);
5335#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) && 0 /** @todo r=aeichner */
5336 AssertMsg( pReNative->idxCurCall == 0
5337 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5338 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5339 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
5340 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5341 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)) ),
5342 ("%s - %u\n", g_aGstSimdShadowInfo[enmGstSimdReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)));
5343#endif
5344#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
5345 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
5346#endif
5347 uint32_t const fRegMask = !fNoVolatileRegs
5348 ? IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK
5349 : IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
5350
5351 /*
5352 * First check if the guest register value is already in a host register.
5353 */
5354 if (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(enmGstSimdReg))
5355 {
5356 uint8_t idxSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
5357 Assert(idxSimdReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
5358 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows & RT_BIT_64(enmGstSimdReg));
5359 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg));
5360
5361 /* It's not supposed to be allocated... */
5362 if (!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxSimdReg)))
5363 {
5364 /*
5365 * If the register will trash the guest shadow copy, try find a
5366 * completely unused register we can use instead. If that fails,
5367 * we need to disassociate the host reg from the guest reg.
5368 */
5369 /** @todo would be nice to know if preserving the register is in any way helpful. */
5370 /* If the purpose is calculations, try duplicate the register value as
5371 we'll be clobbering the shadow. */
5372 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
5373 && ( ~pReNative->Core.bmHstSimdRegs
5374 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5375 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)))
5376 {
5377 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask);
5378
5379 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
5380
5381 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5382 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5383 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5384 idxSimdReg = idxRegNew;
5385 }
5386 /* If the current register matches the restrictions, go ahead and allocate
5387 it for the caller. */
5388 else if (fRegMask & RT_BIT_32(idxSimdReg))
5389 {
5390 pReNative->Core.bmHstSimdRegs |= RT_BIT_32(idxSimdReg);
5391 pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat = kIemNativeWhat_Tmp;
5392 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5393 {
5394 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5395 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxSimdReg, idxSimdReg, enmLoadSz);
5396 else
5397 iemNativeSimdRegSetValidLoadFlag(pReNative, idxSimdReg, enmLoadSz);
5398 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Reusing %s for guest %s %s\n",
5399 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5400 }
5401 else
5402 {
5403 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxSimdReg, *poff);
5404 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Grabbing %s for guest %s - destructive calc\n",
5405 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName));
5406 }
5407 }
5408 /* Otherwise, allocate a register that satisfies the caller and transfer
5409 the shadowing if compatible with the intended use. (This basically
5410 means the call wants a non-volatile register (RSP push/pop scenario).) */
5411 else
5412 {
5413 Assert(fNoVolatileRegs);
5414 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxSimdReg),
5415 !fNoVolatileRegs
5416 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
5417 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
5418 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5419 {
5420 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5421 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Transfering %s to %s for guest %s %s\n",
5422 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_apszIemNativeHstSimdRegNames[idxRegNew],
5423 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5424 }
5425 else
5426 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5427 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5428 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5429 idxSimdReg = idxRegNew;
5430 }
5431 }
5432 else
5433 {
5434 /*
5435 * Oops. Shadowed guest register already allocated!
5436 *
5437 * Allocate a new register, copy the value and, if updating, the
5438 * guest shadow copy assignment to the new register.
5439 */
5440 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5441 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
5442 ("This shouldn't happen: idxSimdReg=%d enmGstSimdReg=%d enmIntendedUse=%s\n",
5443 idxSimdReg, enmGstSimdReg, s_pszIntendedUse[enmIntendedUse]));
5444
5445 /** @todo share register for readonly access. */
5446 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask,
5447 enmIntendedUse == kIemNativeGstRegUse_Calculation);
5448
5449 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5450 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
5451 else
5452 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5453
5454 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5455 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5456 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for %s\n",
5457 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5458 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5459 else
5460 {
5461 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5462 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Moved %s for guest %s into %s for %s\n",
5463 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5464 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5465 }
5466 idxSimdReg = idxRegNew;
5467 }
5468 Assert(RT_BIT_32(idxSimdReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
5469
5470#ifdef VBOX_STRICT
5471 /* Strict builds: Check that the value is correct. */
5472 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5473 *poff = iemNativeEmitGuestSimdRegValueCheck(pReNative, *poff, idxSimdReg, enmGstSimdReg, enmLoadSz);
5474#endif
5475
5476 if ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5477 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
5478 {
5479# if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) && defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5480 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
5481 iemNativeDbgInfoAddGuestRegDirty(pReNative, true /*fSimdReg*/, enmGstSimdReg, idxSimdReg);
5482# endif
5483
5484 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128)
5485 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5486 else if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5487 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5488 else
5489 {
5490 Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_256);
5491 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5492 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5493 }
5494 }
5495
5496 return idxSimdReg;
5497 }
5498
5499 /*
5500 * Allocate a new register, load it with the guest value and designate it as a copy of the
5501 */
5502 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
5503
5504 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5505 *poff = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, *poff, idxRegNew, enmGstSimdReg, enmLoadSz);
5506 else
5507 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5508
5509 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5510 iemNativeSimdRegMarkAsGstSimdRegShadow(pReNative, idxRegNew, enmGstSimdReg, *poff);
5511
5512 if ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5513 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
5514 {
5515# if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) && defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5516 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
5517 iemNativeDbgInfoAddGuestRegDirty(pReNative, true /*fSimdReg*/, enmGstSimdReg, idxRegNew);
5518# endif
5519
5520 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128)
5521 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5522 else if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5523 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5524 else
5525 {
5526 Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_256);
5527 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5528 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5529 }
5530 }
5531
5532 Log12(("iemNativeRegAllocTmpForGuestSimdReg: Allocated %s for guest %s %s\n",
5533 g_apszIemNativeHstSimdRegNames[idxRegNew], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5534
5535 return idxRegNew;
5536}
5537
5538
5539/**
5540 * Flushes guest SIMD register shadow copies held by a set of host registers.
5541 *
5542 * This is used whenever calling an external helper for ensuring that we don't carry on
5543 * with any guest shadows in volatile registers, as these will get corrupted by the caller.
5544 *
5545 * @param pReNative The native recompile state.
5546 * @param fHstSimdRegs Set of host SIMD registers to flush guest shadows for.
5547 */
5548DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstSimdRegs) RT_NOEXCEPT
5549{
5550 /*
5551 * Reduce the mask by what's currently shadowed.
5552 */
5553 uint32_t const bmHstSimdRegsWithGstShadowOld = pReNative->Core.bmHstSimdRegsWithGstShadow;
5554 fHstSimdRegs &= bmHstSimdRegsWithGstShadowOld;
5555 if (fHstSimdRegs)
5556 {
5557 uint32_t const bmHstSimdRegsWithGstShadowNew = bmHstSimdRegsWithGstShadowOld & ~fHstSimdRegs;
5558 Log12(("iemNativeSimdRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
5559 fHstSimdRegs, bmHstSimdRegsWithGstShadowOld, bmHstSimdRegsWithGstShadowNew));
5560 pReNative->Core.bmHstSimdRegsWithGstShadow = bmHstSimdRegsWithGstShadowNew;
5561 if (bmHstSimdRegsWithGstShadowNew)
5562 {
5563 /*
5564 * Partial (likely).
5565 */
5566 uint64_t fGstShadows = 0;
5567 do
5568 {
5569 unsigned const idxHstSimdReg = ASMBitFirstSetU32(fHstSimdRegs) - 1;
5570 Assert(!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg)));
5571 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows)
5572 == pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows);
5573 Assert(!(( pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5574 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5575
5576 fGstShadows |= pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
5577 pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows = 0;
5578 fHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
5579 } while (fHstSimdRegs != 0);
5580 pReNative->Core.bmGstSimdRegShadows &= ~fGstShadows;
5581 }
5582 else
5583 {
5584 /*
5585 * Clear all.
5586 */
5587 do
5588 {
5589 unsigned const idxHstSimdReg = ASMBitFirstSetU32(fHstSimdRegs) - 1;
5590 Assert(!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg)));
5591 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows)
5592 == pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows);
5593 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5594 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5595
5596 pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows = 0;
5597 fHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
5598 } while (fHstSimdRegs != 0);
5599 pReNative->Core.bmGstSimdRegShadows = 0;
5600 }
5601 }
5602}
5603#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
5604
5605
5606
5607/*********************************************************************************************************************************
5608* Code emitters for flushing pending guest register writes and sanity checks *
5609*********************************************************************************************************************************/
5610
5611#ifdef VBOX_STRICT
5612/**
5613 * Does internal register allocator sanity checks.
5614 */
5615DECLHIDDEN(void) iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
5616{
5617 /*
5618 * Iterate host registers building a guest shadowing set.
5619 */
5620 uint64_t bmGstRegShadows = 0;
5621 uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
5622 AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
5623 while (bmHstRegsWithGstShadow)
5624 {
5625 unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
5626 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
5627 bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5628
5629 uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5630 AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
5631 AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
5632 bmGstRegShadows |= fThisGstRegShadows;
5633 while (fThisGstRegShadows)
5634 {
5635 unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
5636 fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
5637 AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
5638 ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
5639 idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
5640 }
5641 }
5642 AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
5643 ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
5644 bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
5645
5646 /*
5647 * Now the other way around, checking the guest to host index array.
5648 */
5649 bmHstRegsWithGstShadow = 0;
5650 bmGstRegShadows = pReNative->Core.bmGstRegShadows;
5651 Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5652 while (bmGstRegShadows)
5653 {
5654 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
5655 Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5656 bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
5657
5658 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5659 AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
5660 AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
5661 ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
5662 idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
5663 bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
5664 }
5665 AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
5666 ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
5667 bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
5668}
5669#endif /* VBOX_STRICT */
5670
5671
5672/**
5673 * Flushes any delayed guest register writes.
5674 *
5675 * This must be called prior to calling CImpl functions and any helpers that use
5676 * the guest state (like raising exceptions) and such.
5677 *
5678 * @note This function does not flush any shadowing information for guest registers. This needs to be done by
5679 * the caller if it wishes to do so.
5680 */
5681DECL_HIDDEN_THROW(uint32_t)
5682iemNativeRegFlushPendingWritesSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fGstShwExcept, uint64_t fGstSimdShwExcept)
5683{
5684#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5685 if (!(fGstShwExcept & RT_BIT_64(kIemNativeGstReg_Pc)))
5686 off = iemNativeEmitPcWriteback(pReNative, off);
5687#else
5688 RT_NOREF(pReNative, fGstShwExcept);
5689#endif
5690
5691#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
5692 off = iemNativeRegFlushDirtyGuest(pReNative, off, ~fGstShwExcept);
5693#endif
5694
5695#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5696 off = iemNativeSimdRegFlushDirtyGuest(pReNative, off, ~fGstSimdShwExcept);
5697#endif
5698
5699 return off;
5700}
5701
5702#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5703
5704# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
5705
5706/**
5707 * Checks if the value in @a idxPcReg matches IEMCPU::uPcUpdatingDebug.
5708 */
5709DECL_HIDDEN_THROW(uint32_t) iemNativeEmitPcDebugCheckWithReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxPcReg)
5710{
5711 Assert(idxPcReg != IEMNATIVE_REG_FIXED_TMP0);
5712 Assert(pReNative->Core.fDebugPcInitialized);
5713
5714 /* cmp [pVCpu->iem.s.uPcUpdatingDebug], pcreg */
5715# ifdef RT_ARCH_AMD64
5716 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
5717 pCodeBuf[off++] = X86_OP_REX_W | (idxPcReg >= 8 ? X86_OP_REX_R : 0);
5718 pCodeBuf[off++] = 0x3b;
5719 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, idxPcReg & 7, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
5720# else
5721 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
5722 off = iemNativeEmitLoadGprFromVCpuU64Ex(pCodeBuf, off, IEMNATIVE_REG_FIXED_TMP0, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
5723 off = iemNativeEmitCmpGprWithGprEx(pCodeBuf, off, IEMNATIVE_REG_FIXED_TMP0, idxPcReg);
5724# endif
5725
5726 uint32_t offFixup = off;
5727 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off + 1, kIemNativeInstrCond_e);
5728 off = iemNativeEmitBrkEx(pCodeBuf, off, UINT32_C(0x2200));
5729 iemNativeFixupFixedJump(pReNative, offFixup, off);
5730
5731 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5732 return off;
5733}
5734
5735
5736/**
5737 * Checks that the current RIP+offPc matches IEMCPU::uPcUpdatingDebug.
5738 */
5739DECL_HIDDEN_THROW(uint32_t) iemNativeEmitPcDebugCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5740{
5741 if (pReNative->Core.fDebugPcInitialized)
5742 {
5743 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc);
5744 if (pReNative->Core.offPc)
5745 {
5746 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5747 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, RT_ARCH_VAL == RT_ARCH_VAL_AMD64 ? 32 : 8);
5748 off = iemNativeEmitGprEqGprPlusImmEx(pCodeBuf, off, idxTmpReg, idxPcReg, pReNative->Core.offPc);
5749 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5750 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxTmpReg);
5751 iemNativeRegFreeTmp(pReNative, idxTmpReg);
5752 }
5753 else
5754 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
5755 iemNativeRegFreeTmp(pReNative, idxPcReg);
5756 }
5757 return off;
5758}
5759
5760# endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG */
5761
5762/**
5763 * Emits code to update the guest RIP value by adding the current offset since the start of the last RIP update.
5764 */
5765DECL_HIDDEN_THROW(uint32_t) iemNativeEmitPcWritebackSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5766{
5767 Assert(pReNative->Core.offPc);
5768# if !defined(IEMNATIVE_WITH_TB_DEBUG_INFO) && !defined(VBOX_WITH_STATISTICS)
5769 Log4(("iemNativeEmitPcWritebackSlow: offPc=%#RX64 -> 0; off=%#x\n", pReNative->Core.offPc, off));
5770# else
5771 uint8_t const idxOldInstrPlusOne = pReNative->idxInstrPlusOneOfLastPcUpdate;
5772 uint8_t idxCurCall = pReNative->idxCurCall;
5773 uint8_t idxInstr = pReNative->pTbOrg->Thrd.paCalls[idxCurCall].idxInstr; /* unreliable*/
5774 while (idxInstr == 0 && idxInstr + 1 < idxOldInstrPlusOne && idxCurCall > 0)
5775 idxInstr = pReNative->pTbOrg->Thrd.paCalls[--idxCurCall].idxInstr;
5776 pReNative->idxInstrPlusOneOfLastPcUpdate = RT_MAX(idxInstr + 1, idxOldInstrPlusOne);
5777 uint8_t const cInstrsSkipped = idxInstr <= idxOldInstrPlusOne ? 0 : idxInstr - idxOldInstrPlusOne;
5778 Log4(("iemNativeEmitPcWritebackSlow: offPc=%#RX64 -> 0; off=%#x; idxInstr=%u cInstrsSkipped=%u\n",
5779 pReNative->Core.offPc, off, idxInstr, cInstrsSkipped));
5780
5781 STAM_COUNTER_ADD(&pReNative->pVCpu->iem.s.StatNativePcUpdateDelayed, cInstrsSkipped);
5782
5783# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5784 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5785 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc, cInstrsSkipped);
5786# endif
5787# endif
5788
5789# ifndef IEMNATIVE_REG_FIXED_PC_DBG
5790 /* Allocate a temporary PC register. */
5791 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5792
5793 /* Perform the addition and store the result. */
5794 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
5795 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5796# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
5797 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
5798# endif
5799
5800 /* Free but don't flush the PC register. */
5801 iemNativeRegFreeTmp(pReNative, idxPcReg);
5802# else
5803 /* Compare the shadow with the context value, they should match. */
5804 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, pReNative->Core.offPc);
5805 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, kIemNativeGstReg_Pc);
5806# endif
5807
5808 pReNative->Core.offPc = 0;
5809
5810 return off;
5811}
5812
5813#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
5814
5815
5816/*********************************************************************************************************************************
5817* Code Emitters (larger snippets) *
5818*********************************************************************************************************************************/
5819
5820/**
5821 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
5822 * extending to 64-bit width.
5823 *
5824 * @returns New code buffer offset on success, UINT32_MAX on failure.
5825 * @param pReNative .
5826 * @param off The current code buffer position.
5827 * @param idxHstReg The host register to load the guest register value into.
5828 * @param enmGstReg The guest register to load.
5829 *
5830 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
5831 * that is something the caller needs to do if applicable.
5832 */
5833DECL_HIDDEN_THROW(uint32_t)
5834iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
5835{
5836 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
5837 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
5838
5839 switch (g_aGstShadowInfo[enmGstReg].cb)
5840 {
5841 case sizeof(uint64_t):
5842 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5843 case sizeof(uint32_t):
5844 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5845 case sizeof(uint16_t):
5846 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5847#if 0 /* not present in the table. */
5848 case sizeof(uint8_t):
5849 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5850#endif
5851 default:
5852 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5853 }
5854}
5855
5856
5857#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5858/**
5859 * Loads the guest shadow SIMD register @a enmGstSimdReg into host SIMD reg @a idxHstSimdReg.
5860 *
5861 * @returns New code buffer offset on success, UINT32_MAX on failure.
5862 * @param pReNative The recompiler state.
5863 * @param off The current code buffer position.
5864 * @param idxHstSimdReg The host register to load the guest register value into.
5865 * @param enmGstSimdReg The guest register to load.
5866 * @param enmLoadSz The load size of the register.
5867 *
5868 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
5869 * that is something the caller needs to do if applicable.
5870 */
5871DECL_HIDDEN_THROW(uint32_t)
5872iemNativeEmitLoadSimdRegWithGstShadowSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdReg,
5873 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
5874{
5875 Assert((unsigned)enmGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo));
5876
5877 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdReg, enmLoadSz);
5878 switch (enmLoadSz)
5879 {
5880 case kIemNativeGstSimdRegLdStSz_256:
5881 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5882 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5883 case kIemNativeGstSimdRegLdStSz_Low128:
5884 return iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5885 case kIemNativeGstSimdRegLdStSz_High128:
5886 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5887 default:
5888 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5889 }
5890}
5891#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
5892
5893#ifdef VBOX_STRICT
5894
5895/**
5896 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
5897 *
5898 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5899 * Trashes EFLAGS on AMD64.
5900 */
5901DECL_HIDDEN_THROW(uint32_t)
5902iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
5903{
5904# ifdef RT_ARCH_AMD64
5905 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
5906
5907 /* rol reg64, 32 */
5908 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5909 pbCodeBuf[off++] = 0xc1;
5910 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5911 pbCodeBuf[off++] = 32;
5912
5913 /* test reg32, ffffffffh */
5914 if (idxReg >= 8)
5915 pbCodeBuf[off++] = X86_OP_REX_B;
5916 pbCodeBuf[off++] = 0xf7;
5917 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5918 pbCodeBuf[off++] = 0xff;
5919 pbCodeBuf[off++] = 0xff;
5920 pbCodeBuf[off++] = 0xff;
5921 pbCodeBuf[off++] = 0xff;
5922
5923 /* je/jz +1 */
5924 pbCodeBuf[off++] = 0x74;
5925 pbCodeBuf[off++] = 0x01;
5926
5927 /* int3 */
5928 pbCodeBuf[off++] = 0xcc;
5929
5930 /* rol reg64, 32 */
5931 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5932 pbCodeBuf[off++] = 0xc1;
5933 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5934 pbCodeBuf[off++] = 32;
5935
5936# elif defined(RT_ARCH_ARM64)
5937 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5938 /* lsr tmp0, reg64, #32 */
5939 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
5940 /* cbz tmp0, +1 */
5941 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
5942 /* brk #0x1100 */
5943 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
5944
5945# else
5946# error "Port me!"
5947# endif
5948 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5949 return off;
5950}
5951
5952
5953/**
5954 * Emitting code that checks that the content of register @a idxReg is the same
5955 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
5956 * instruction if that's not the case.
5957 *
5958 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5959 * Trashes EFLAGS on AMD64.
5960 */
5961DECL_HIDDEN_THROW(uint32_t)
5962iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
5963{
5964#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5965 /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
5966 if (pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg))
5967 return off;
5968#endif
5969
5970# ifdef RT_ARCH_AMD64
5971 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
5972
5973 /* cmp reg, [mem] */
5974 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
5975 {
5976 if (idxReg >= 8)
5977 pbCodeBuf[off++] = X86_OP_REX_R;
5978 pbCodeBuf[off++] = 0x38;
5979 }
5980 else
5981 {
5982 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
5983 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
5984 else
5985 {
5986 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
5987 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5988 else
5989 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
5990 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
5991 if (idxReg >= 8)
5992 pbCodeBuf[off++] = X86_OP_REX_R;
5993 }
5994 pbCodeBuf[off++] = 0x39;
5995 }
5996 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
5997
5998 /* je/jz +1 */
5999 pbCodeBuf[off++] = 0x74;
6000 pbCodeBuf[off++] = 0x01;
6001
6002 /* int3 */
6003 pbCodeBuf[off++] = 0xcc;
6004
6005 /* For values smaller than the register size, we must check that the rest
6006 of the register is all zeros. */
6007 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
6008 {
6009 /* test reg64, imm32 */
6010 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6011 pbCodeBuf[off++] = 0xf7;
6012 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6013 pbCodeBuf[off++] = 0;
6014 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
6015 pbCodeBuf[off++] = 0xff;
6016 pbCodeBuf[off++] = 0xff;
6017
6018 /* je/jz +1 */
6019 pbCodeBuf[off++] = 0x74;
6020 pbCodeBuf[off++] = 0x01;
6021
6022 /* int3 */
6023 pbCodeBuf[off++] = 0xcc;
6024 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6025 }
6026 else
6027 {
6028 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6029 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
6030 iemNativeEmitTop32BitsClearCheck(pReNative, off, idxReg);
6031 }
6032
6033# elif defined(RT_ARCH_ARM64)
6034 /* mov TMP0, [gstreg] */
6035 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
6036
6037 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6038 /* sub tmp0, tmp0, idxReg */
6039 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
6040 /* cbz tmp0, +1 */
6041 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6042 /* brk #0x1000+enmGstReg */
6043 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
6044 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6045
6046# else
6047# error "Port me!"
6048# endif
6049 return off;
6050}
6051
6052
6053# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6054# ifdef RT_ARCH_AMD64
6055/**
6056 * Helper for AMD64 to emit code which checks the low 128-bits of the given SIMD register against the given vCPU offset.
6057 */
6058DECL_FORCE_INLINE_THROW(uint32_t) iemNativeEmitGuestSimdRegValueCheckVCpuU128(uint8_t * const pbCodeBuf, uint32_t off, uint8_t idxSimdReg, uint32_t offVCpu)
6059{
6060 /* pcmpeqq vectmp0, [gstreg] (ASSUMES SSE4.1) */
6061 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6062 if (idxSimdReg >= 8)
6063 pbCodeBuf[off++] = X86_OP_REX_R;
6064 pbCodeBuf[off++] = 0x0f;
6065 pbCodeBuf[off++] = 0x38;
6066 pbCodeBuf[off++] = 0x29;
6067 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxSimdReg, offVCpu);
6068
6069 /* pextrq tmp0, vectmp0, #0 (ASSUMES SSE4.1). */
6070 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6071 pbCodeBuf[off++] = X86_OP_REX_W
6072 | (idxSimdReg < 8 ? 0 : X86_OP_REX_R)
6073 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6074 pbCodeBuf[off++] = 0x0f;
6075 pbCodeBuf[off++] = 0x3a;
6076 pbCodeBuf[off++] = 0x16;
6077 pbCodeBuf[off++] = 0xeb;
6078 pbCodeBuf[off++] = 0x00;
6079
6080 /* cmp tmp0, 0xffffffffffffffff. */
6081 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6082 pbCodeBuf[off++] = 0x83;
6083 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
6084 pbCodeBuf[off++] = 0xff;
6085
6086 /* je/jz +1 */
6087 pbCodeBuf[off++] = 0x74;
6088 pbCodeBuf[off++] = 0x01;
6089
6090 /* int3 */
6091 pbCodeBuf[off++] = 0xcc;
6092
6093 /* pextrq tmp0, vectmp0, #1 (ASSUMES SSE4.1). */
6094 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6095 pbCodeBuf[off++] = X86_OP_REX_W
6096 | (idxSimdReg < 8 ? 0 : X86_OP_REX_R)
6097 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6098 pbCodeBuf[off++] = 0x0f;
6099 pbCodeBuf[off++] = 0x3a;
6100 pbCodeBuf[off++] = 0x16;
6101 pbCodeBuf[off++] = 0xeb;
6102 pbCodeBuf[off++] = 0x01;
6103
6104 /* cmp tmp0, 0xffffffffffffffff. */
6105 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6106 pbCodeBuf[off++] = 0x83;
6107 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
6108 pbCodeBuf[off++] = 0xff;
6109
6110 /* je/jz +1 */
6111 pbCodeBuf[off++] = 0x74;
6112 pbCodeBuf[off++] = 0x01;
6113
6114 /* int3 */
6115 pbCodeBuf[off++] = 0xcc;
6116
6117 return off;
6118}
6119# endif
6120
6121
6122/**
6123 * Emitting code that checks that the content of SIMD register @a idxSimdReg is the same
6124 * as what's in the guest register @a enmGstSimdReg, resulting in a breakpoint
6125 * instruction if that's not the case.
6126 *
6127 * @note May of course trash IEMNATIVE_SIMD_REG_FIXED_TMP0 and IEMNATIVE_REG_FIXED_TMP0.
6128 * Trashes EFLAGS on AMD64.
6129 */
6130DECL_HIDDEN_THROW(uint32_t)
6131iemNativeEmitGuestSimdRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSimdReg,
6132 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
6133{
6134 /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
6135 if ( ( enmLoadSz == kIemNativeGstSimdRegLdStSz_256
6136 && ( IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg)
6137 || IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
6138 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128
6139 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
6140 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_High128
6141 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
6142 return off;
6143
6144# ifdef RT_ARCH_AMD64
6145 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6146 {
6147 /* movdqa vectmp0, idxSimdReg */
6148 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
6149
6150 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 44);
6151
6152 off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6153 g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6154 }
6155
6156 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6157 {
6158 /* Due to the fact that CPUMCTX stores the high 128-bit separately we need to do this all over again for the high part. */
6159 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 50);
6160
6161 /* vextracti128 vectmp0, idxSimdReg, 1 */
6162 pbCodeBuf[off++] = X86_OP_VEX3;
6163 pbCodeBuf[off++] = (idxSimdReg < 8 ? X86_OP_VEX3_BYTE1_R : 0)
6164 | X86_OP_VEX3_BYTE1_X
6165 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? X86_OP_VEX3_BYTE1_B : 0)
6166 | 0x03; /* Opcode map */
6167 pbCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false /*f64BitOpSz*/, true /*f256BitAvx*/, X86_OP_VEX3_BYTE2_P_066H);
6168 pbCodeBuf[off++] = 0x39;
6169 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxSimdReg & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
6170 pbCodeBuf[off++] = 0x01;
6171
6172 off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6173 g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
6174 }
6175# elif defined(RT_ARCH_ARM64)
6176 /* mov vectmp0, [gstreg] */
6177 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, enmGstSimdReg, enmLoadSz);
6178
6179 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6180 {
6181 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
6182 /* eor vectmp0, vectmp0, idxSimdReg */
6183 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
6184 /* uaddlv vectmp0, vectmp0.16B */
6185 pu32CodeBuf[off++] = Armv8A64MkVecInstrUAddLV(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, kArmv8InstrUAddLVSz_16B);
6186 /* umov tmp0, vectmp0.H[0] */
6187 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6188 0 /*idxElem*/, kArmv8InstrUmovInsSz_U16, false /*f64Bit*/);
6189 /* cbz tmp0, +1 */
6190 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6191 /* brk #0x1000+enmGstReg */
6192 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6193 }
6194
6195 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6196 {
6197 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
6198 /* eor vectmp0 + 1, vectmp0 + 1, idxSimdReg */
6199 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, idxSimdReg + 1);
6200 /* uaddlv vectmp0 + 1, (vectmp0 + 1).16B */
6201 pu32CodeBuf[off++] = Armv8A64MkVecInstrUAddLV(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, kArmv8InstrUAddLVSz_16B);
6202 /* umov tmp0, (vectmp0 + 1).H[0] */
6203 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1,
6204 0 /*idxElem*/, kArmv8InstrUmovInsSz_U16, false /*f64Bit*/);
6205 /* cbz tmp0, +1 */
6206 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6207 /* brk #0x1000+enmGstReg */
6208 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6209 }
6210
6211# else
6212# error "Port me!"
6213# endif
6214
6215 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6216 return off;
6217}
6218# endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
6219
6220
6221/**
6222 * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
6223 * important bits.
6224 *
6225 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6226 * Trashes EFLAGS on AMD64.
6227 */
6228DECL_HIDDEN_THROW(uint32_t)
6229iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
6230{
6231 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
6232 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
6233 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
6234 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
6235
6236#ifdef RT_ARCH_AMD64
6237 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6238
6239 /* je/jz +1 */
6240 pbCodeBuf[off++] = 0x74;
6241 pbCodeBuf[off++] = 0x01;
6242
6243 /* int3 */
6244 pbCodeBuf[off++] = 0xcc;
6245
6246# elif defined(RT_ARCH_ARM64)
6247 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6248
6249 /* b.eq +1 */
6250 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
6251 /* brk #0x2000 */
6252 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
6253
6254# else
6255# error "Port me!"
6256# endif
6257 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6258
6259 iemNativeRegFreeTmp(pReNative, idxRegTmp);
6260 return off;
6261}
6262
6263#endif /* VBOX_STRICT */
6264
6265
6266#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
6267/**
6268 * Worker for IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK.
6269 */
6270DECL_HIDDEN_THROW(uint32_t)
6271iemNativeEmitEFlagsSkippingCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fEflNeeded)
6272{
6273 uint32_t const offVCpu = RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags);
6274
6275 fEflNeeded &= X86_EFL_STATUS_BITS;
6276 if (fEflNeeded)
6277 {
6278# ifdef RT_ARCH_AMD64
6279 /* test dword [pVCpu + offVCpu], imm32 */
6280 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 13);
6281 if (fEflNeeded <= 0xff)
6282 {
6283 pCodeBuf[off++] = 0xf6;
6284 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
6285 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
6286 }
6287 else
6288 {
6289 pCodeBuf[off++] = 0xf7;
6290 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
6291 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
6292 pCodeBuf[off++] = RT_BYTE2(fEflNeeded);
6293 pCodeBuf[off++] = RT_BYTE3(fEflNeeded);
6294 pCodeBuf[off++] = RT_BYTE4(fEflNeeded);
6295 }
6296
6297 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off + 3, kIemNativeInstrCond_e);
6298 pCodeBuf[off++] = 0xcc;
6299
6300 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6301
6302# else
6303 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
6304 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, offVCpu);
6305 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxRegTmp, fEflNeeded);
6306# ifdef RT_ARCH_ARM64
6307 off = iemNativeEmitJzToFixed(pReNative, off, off + 2);
6308 off = iemNativeEmitBrk(pReNative, off, 0x7777);
6309# else
6310# error "Port me!"
6311# endif
6312 iemNativeRegFreeTmp(pReNative, idxRegTmp);
6313# endif
6314 }
6315 return off;
6316}
6317#endif /* IEMNATIVE_STRICT_EFLAGS_SKIPPING */
6318
6319
6320/**
6321 * Emits a code for checking the return code of a call and rcPassUp, returning
6322 * from the code if either are non-zero.
6323 */
6324DECL_HIDDEN_THROW(uint32_t)
6325iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
6326{
6327#ifdef RT_ARCH_AMD64
6328 /*
6329 * AMD64: eax = call status code.
6330 */
6331
6332 /* edx = rcPassUp */
6333 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6334# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6335 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
6336# endif
6337
6338 /* edx = eax | rcPassUp */
6339 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6340 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
6341 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
6342 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6343
6344 /* Jump to non-zero status return path. */
6345 off = iemNativeEmitJnzTbExit(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
6346
6347 /* done. */
6348
6349#elif RT_ARCH_ARM64
6350 /*
6351 * ARM64: w0 = call status code.
6352 */
6353# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6354 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr);
6355# endif
6356 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6357
6358 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6359
6360 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
6361
6362 off = iemNativeEmitTestIfGprIsNotZeroAndTbExitEx(pReNative, pu32CodeBuf, off, ARMV8_A64_REG_X4, true /*f64Bit*/,
6363 kIemNativeLabelType_NonZeroRetOrPassUp);
6364
6365#else
6366# error "port me"
6367#endif
6368 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6369 RT_NOREF_PV(idxInstr);
6370 return off;
6371}
6372
6373
6374/**
6375 * Emits a call to a CImpl function or something similar.
6376 */
6377DECL_HIDDEN_THROW(uint32_t)
6378iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush, uintptr_t pfnCImpl,
6379 uint8_t cbInstr, uint8_t cAddParams, uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
6380{
6381 /* Writeback everything. */
6382 off = iemNativeRegFlushPendingWrites(pReNative, off);
6383
6384 /*
6385 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
6386 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
6387 */
6388 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
6389 fGstShwFlush
6390 | RT_BIT_64(kIemNativeGstReg_Pc)
6391 | RT_BIT_64(kIemNativeGstReg_EFlags));
6392 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
6393
6394 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6395
6396 /*
6397 * Load the parameters.
6398 */
6399#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
6400 /* Special code the hidden VBOXSTRICTRC pointer. */
6401 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6402 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6403 if (cAddParams > 0)
6404 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
6405 if (cAddParams > 1)
6406 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
6407 if (cAddParams > 2)
6408 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
6409 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6410
6411#else
6412 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
6413 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6414 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6415 if (cAddParams > 0)
6416 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
6417 if (cAddParams > 1)
6418 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
6419 if (cAddParams > 2)
6420# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
6421 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
6422# else
6423 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
6424# endif
6425#endif
6426
6427 /*
6428 * Make the call.
6429 */
6430 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
6431
6432#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6433 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6434#endif
6435
6436#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
6437 pReNative->Core.fDebugPcInitialized = false;
6438 Log4(("fDebugPcInitialized=false cimpl off=%#x (v2)\n", off));
6439#endif
6440
6441 /*
6442 * Check the status code.
6443 */
6444 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
6445}
6446
6447
6448/**
6449 * Emits a call to a threaded worker function.
6450 */
6451DECL_HIDDEN_THROW(uint32_t)
6452iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6453{
6454 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
6455
6456 /* We don't know what the threaded function is doing so we must flush all pending writes. */
6457 off = iemNativeRegFlushPendingWrites(pReNative, off);
6458
6459 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
6460 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6461
6462#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6463 /* The threaded function may throw / long jmp, so set current instruction
6464 number if we're counting. */
6465 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6466#endif
6467
6468 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
6469
6470#ifdef RT_ARCH_AMD64
6471 /* Load the parameters and emit the call. */
6472# ifdef RT_OS_WINDOWS
6473# ifndef VBOXSTRICTRC_STRICT_ENABLED
6474 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6475 if (cParams > 0)
6476 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
6477 if (cParams > 1)
6478 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
6479 if (cParams > 2)
6480 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
6481# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
6482 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
6483 if (cParams > 0)
6484 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
6485 if (cParams > 1)
6486 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
6487 if (cParams > 2)
6488 {
6489 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
6490 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
6491 }
6492 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6493# endif /* VBOXSTRICTRC_STRICT_ENABLED */
6494# else
6495 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6496 if (cParams > 0)
6497 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
6498 if (cParams > 1)
6499 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
6500 if (cParams > 2)
6501 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
6502# endif
6503
6504 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6505
6506# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6507 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6508# endif
6509
6510#elif RT_ARCH_ARM64
6511 /*
6512 * ARM64:
6513 */
6514 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6515 if (cParams > 0)
6516 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
6517 if (cParams > 1)
6518 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
6519 if (cParams > 2)
6520 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
6521
6522 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6523
6524#else
6525# error "port me"
6526#endif
6527
6528#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
6529 pReNative->Core.fDebugPcInitialized = false;
6530 Log4(("fDebugPcInitialized=false todo off=%#x (v2)\n", off));
6531#endif
6532
6533 /*
6534 * Check the status code.
6535 */
6536 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
6537
6538 return off;
6539}
6540
6541#ifdef VBOX_WITH_STATISTICS
6542
6543/**
6544 * Emits code to update the thread call statistics.
6545 */
6546DECL_INLINE_THROW(uint32_t)
6547iemNativeEmitThreadCallStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6548{
6549 /*
6550 * Update threaded function stats.
6551 */
6552 uint32_t const offVCpu = RT_UOFFSETOF_DYN(VMCPUCC, iem.s.acThreadedFuncStats[pCallEntry->enmFunction]);
6553 AssertCompile(sizeof(pReNative->pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction]) == sizeof(uint32_t));
6554# if defined(RT_ARCH_ARM64)
6555 uint8_t const idxTmp1 = iemNativeRegAllocTmp(pReNative, &off);
6556 uint8_t const idxTmp2 = iemNativeRegAllocTmp(pReNative, &off);
6557 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, idxTmp1, idxTmp2, offVCpu);
6558 iemNativeRegFreeTmp(pReNative, idxTmp1);
6559 iemNativeRegFreeTmp(pReNative, idxTmp2);
6560# else
6561 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, UINT8_MAX, UINT8_MAX, offVCpu);
6562# endif
6563 return off;
6564}
6565
6566
6567/**
6568 * Emits code to update the TB exit reason statistics.
6569 */
6570DECL_INLINE_THROW(uint32_t)
6571iemNativeEmitNativeTbExitStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t const offVCpu)
6572{
6573 uint8_t const idxStatsTmp1 = iemNativeRegAllocTmp(pReNative, &off);
6574 uint8_t const idxStatsTmp2 = iemNativeRegAllocTmp(pReNative, &off);
6575 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, idxStatsTmp1, idxStatsTmp2, offVCpu);
6576 iemNativeRegFreeTmp(pReNative, idxStatsTmp1);
6577 iemNativeRegFreeTmp(pReNative, idxStatsTmp2);
6578
6579 return off;
6580}
6581
6582#endif /* VBOX_WITH_STATISTICS */
6583
6584/**
6585 * Worker for iemNativeEmitViaLookupDoOne and iemNativeRecompileAttachExecMemChunkCtx.
6586 */
6587static uint32_t
6588iemNativeEmitCoreViaLookupDoOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offReturnBreak, uintptr_t pfnHelper)
6589{
6590 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6591 off = iemNativeEmitCallImm(pReNative, off, pfnHelper);
6592
6593 /* Jump to ReturnBreak if the return register is NULL. */
6594 off = iemNativeEmitTestIfGprIsZeroAndJmpToFixed(pReNative, off, IEMNATIVE_CALL_RET_GREG,
6595 true /*f64Bit*/, offReturnBreak);
6596
6597 /* Okay, continue executing the next TB. */
6598 off = iemNativeEmitJmpViaGpr(pReNative, off, IEMNATIVE_CALL_RET_GREG);
6599 return off;
6600}
6601
6602#ifndef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
6603
6604/**
6605 * Worker for iemNativeEmitReturnBreakViaLookup.
6606 */
6607static uint32_t iemNativeEmitViaLookupDoOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offReturnBreak,
6608 IEMNATIVELABELTYPE enmLabel, uintptr_t pfnHelper)
6609{
6610 uint32_t const idxLabel = iemNativeLabelFind(pReNative, enmLabel);
6611 if (idxLabel != UINT32_MAX)
6612 {
6613 iemNativeLabelDefine(pReNative, idxLabel, off);
6614 off = iemNativeEmitCoreViaLookupDoOne(pReNative, off, offReturnBreak, pfnHelper);
6615 }
6616 return off;
6617}
6618
6619
6620/**
6621 * Emits the code at the ReturnBreakViaLookup, ReturnBreakViaLookupWithIrq,
6622 * ReturnBreakViaLookupWithTlb and ReturnBreakViaLookupWithTlbAndIrq labels
6623 * (returns VINF_IEM_REEXEC_FINISH_WITH_FLAGS or jumps to the next TB).
6624 */
6625static uint32_t iemNativeEmitReturnBreakViaLookup(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnBreakLabel)
6626{
6627 uint32_t const offReturnBreak = pReNative->paLabels[idxReturnBreakLabel].off;
6628 Assert(offReturnBreak < off);
6629
6630 /*
6631 * The lookup table index is in IEMNATIVE_CALL_ARG1_GREG for all.
6632 * The GCPhysPc is in IEMNATIVE_CALL_ARG2_GREG for ReturnBreakViaLookupWithPc.
6633 */
6634 off = iemNativeEmitViaLookupDoOne(pReNative, off, offReturnBreak, kIemNativeLabelType_ReturnBreakViaLookup,
6635 (uintptr_t)iemNativeHlpReturnBreakViaLookup<false /*a_fWithIrqCheck*/>);
6636 off = iemNativeEmitViaLookupDoOne(pReNative, off, offReturnBreak, kIemNativeLabelType_ReturnBreakViaLookupWithIrq,
6637 (uintptr_t)iemNativeHlpReturnBreakViaLookup<true /*a_fWithIrqCheck*/>);
6638 off = iemNativeEmitViaLookupDoOne(pReNative, off, offReturnBreak, kIemNativeLabelType_ReturnBreakViaLookupWithTlb,
6639 (uintptr_t)iemNativeHlpReturnBreakViaLookupWithTlb<false /*a_fWithIrqCheck*/>);
6640 off = iemNativeEmitViaLookupDoOne(pReNative, off, offReturnBreak, kIemNativeLabelType_ReturnBreakViaLookupWithTlbAndIrq,
6641 (uintptr_t)iemNativeHlpReturnBreakViaLookupWithTlb<true /*a_fWithIrqCheck*/>);
6642 return off;
6643}
6644
6645#endif /* !IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE */
6646
6647/**
6648 * Emits the code at the ReturnWithFlags label (returns VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
6649 */
6650static uint32_t iemNativeEmitCoreReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6651{
6652 /* set the return status */
6653 return iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
6654}
6655
6656
6657#ifndef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
6658/**
6659 * Emits the code at the ReturnWithFlags label (returns VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
6660 */
6661static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6662{
6663 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
6664 if (idxLabel != UINT32_MAX)
6665 {
6666 iemNativeLabelDefine(pReNative, idxLabel, off);
6667 /* set the return status */
6668 off = iemNativeEmitCoreReturnWithFlags(pReNative, off);
6669 /* jump back to the return sequence. */
6670 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6671 }
6672 return off;
6673}
6674#endif
6675
6676
6677/**
6678 * Emits the code at the ReturnBreakFF label (returns VINF_IEM_REEXEC_BREAK_FF).
6679 */
6680static uint32_t iemNativeEmitCoreReturnBreakFF(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6681{
6682 /* set the return status */
6683 return iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK_FF);
6684}
6685
6686
6687#ifndef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
6688/**
6689 * Emits the code at the ReturnBreakFF label (returns VINF_IEM_REEXEC_BREAK_FF).
6690 */
6691static uint32_t iemNativeEmitReturnBreakFF(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6692{
6693 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreakFF);
6694 if (idxLabel != UINT32_MAX)
6695 {
6696 iemNativeLabelDefine(pReNative, idxLabel, off);
6697 /* set the return status */
6698 off = iemNativeEmitCoreReturnBreakFF(pReNative, off);
6699 /* jump back to the return sequence. */
6700 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6701 }
6702 return off;
6703}
6704#endif
6705
6706
6707/**
6708 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
6709 */
6710static uint32_t iemNativeEmitCoreReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6711{
6712 /* set the return status */
6713 return iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
6714}
6715
6716
6717#ifndef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
6718/**
6719 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
6720 */
6721static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6722{
6723 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
6724 if (idxLabel != UINT32_MAX)
6725 {
6726 iemNativeLabelDefine(pReNative, idxLabel, off);
6727 /* set the return status */
6728 off = iemNativeEmitCoreReturnBreak(pReNative, off);
6729 /* jump back to the return sequence. */
6730 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6731 }
6732 return off;
6733}
6734#endif
6735
6736
6737/**
6738 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
6739 */
6740static uint32_t iemNativeEmitCoreRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6741{
6742 /*
6743 * Generate the rc + rcPassUp fiddling code.
6744 */
6745 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
6746#ifdef RT_ARCH_AMD64
6747# ifdef RT_OS_WINDOWS
6748# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6749 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
6750# endif
6751 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6752 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
6753# else
6754 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6755 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
6756# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6757 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
6758# endif
6759# endif
6760# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6761 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
6762# endif
6763
6764#else
6765 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
6766 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6767 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
6768#endif
6769
6770 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
6771 return off;
6772}
6773
6774
6775#ifndef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
6776/**
6777 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
6778 */
6779static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6780{
6781 /*
6782 * Generate the rc + rcPassUp fiddling code if needed.
6783 */
6784 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
6785 if (idxLabel != UINT32_MAX)
6786 {
6787 iemNativeLabelDefine(pReNative, idxLabel, off);
6788 off = iemNativeEmitCoreRcFiddling(pReNative, off);
6789 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6790 }
6791 return off;
6792}
6793#endif
6794
6795
6796/**
6797 * Emits a standard epilog.
6798 */
6799static uint32_t iemNativeEmitCoreEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6800{
6801 pReNative->Core.bmHstRegs |= RT_BIT_32(IEMNATIVE_CALL_RET_GREG); /* HACK: For IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK (return register is already set to status code). */
6802
6803 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
6804
6805 /* HACK: For IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK (return register is already set to status code). */
6806 pReNative->Core.bmHstRegs &= ~RT_BIT_32(IEMNATIVE_CALL_RET_GREG);
6807
6808 /*
6809 * Restore registers and return.
6810 */
6811#ifdef RT_ARCH_AMD64
6812 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
6813
6814 /* Reposition esp at the r15 restore point. */
6815 pbCodeBuf[off++] = X86_OP_REX_W;
6816 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
6817 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
6818 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
6819
6820 /* Pop non-volatile registers and return */
6821 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
6822 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
6823 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
6824 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
6825 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
6826 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
6827 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
6828 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
6829# ifdef RT_OS_WINDOWS
6830 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
6831 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
6832# endif
6833 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
6834 pbCodeBuf[off++] = 0xc9; /* leave */
6835 pbCodeBuf[off++] = 0xc3; /* ret */
6836 pbCodeBuf[off++] = 0xcc; /* int3 poison */
6837
6838#elif RT_ARCH_ARM64
6839 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6840
6841 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
6842 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
6843 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
6844 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
6845 IEMNATIVE_FRAME_VAR_SIZE / 8);
6846 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
6847 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6848 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
6849 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6850 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
6851 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6852 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
6853 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6854 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
6855 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6856 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
6857 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
6858
6859 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
6860 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
6861 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
6862 IEMNATIVE_FRAME_SAVE_REG_SIZE);
6863
6864 /* retab / ret */
6865# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
6866 if (1)
6867 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
6868 else
6869# endif
6870 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
6871
6872#else
6873# error "port me"
6874#endif
6875 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6876
6877 /* HACK: For IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK. */
6878 pReNative->Core.bmHstRegs &= ~RT_BIT_32(IEMNATIVE_CALL_RET_GREG);
6879
6880 return off;
6881}
6882
6883
6884#ifndef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
6885/**
6886 * Emits a standard epilog.
6887 */
6888static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
6889{
6890 /*
6891 * Define label for common return point.
6892 */
6893 *pidxReturnLabel = UINT32_MAX;
6894 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
6895 *pidxReturnLabel = idxReturn;
6896
6897 /*
6898 * Emit the code.
6899 */
6900 return iemNativeEmitCoreEpilog(pReNative, off);
6901}
6902#endif
6903
6904
6905#ifndef IEMNATIVE_WITH_RECOMPILER_PROLOGUE_SINGLETON
6906/**
6907 * Emits a standard prolog.
6908 */
6909static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6910{
6911#ifdef RT_ARCH_AMD64
6912 /*
6913 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
6914 * reserving 64 bytes for stack variables plus 4 non-register argument
6915 * slots. Fixed register assignment: xBX = pReNative;
6916 *
6917 * Since we always do the same register spilling, we can use the same
6918 * unwind description for all the code.
6919 */
6920 uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6921 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
6922 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
6923 pbCodeBuf[off++] = 0x8b;
6924 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
6925 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
6926 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
6927# ifdef RT_OS_WINDOWS
6928 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
6929 pbCodeBuf[off++] = 0x8b;
6930 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
6931 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
6932 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
6933# else
6934 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
6935 pbCodeBuf[off++] = 0x8b;
6936 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
6937# endif
6938 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
6939 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
6940 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
6941 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
6942 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
6943 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
6944 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
6945 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
6946
6947# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
6948 /* Save the frame pointer. */
6949 off = iemNativeEmitStoreGprToVCpuU64Ex(pbCodeBuf, off, X86_GREG_xBP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3));
6950# endif
6951
6952 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
6953 X86_GREG_xSP,
6954 IEMNATIVE_FRAME_ALIGN_SIZE
6955 + IEMNATIVE_FRAME_VAR_SIZE
6956 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
6957 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
6958 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
6959 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
6960 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
6961
6962#elif RT_ARCH_ARM64
6963 /*
6964 * We set up a stack frame exactly like on x86, only we have to push the
6965 * return address our selves here. We save all non-volatile registers.
6966 */
6967 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 16);
6968
6969# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
6970 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
6971 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
6972 * in any way conditional, so just emitting this instructions now and hoping for the best... */
6973 /* pacibsp */
6974 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
6975# endif
6976
6977 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
6978 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
6979 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
6980 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
6981 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
6982 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
6983 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6984 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
6985 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6986 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
6987 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6988 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
6989 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6990 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
6991 /* Save the BP and LR (ret address) registers at the top of the frame. */
6992 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6993 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
6994 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
6995 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
6996 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
6997 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
6998
6999 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
7000 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
7001
7002 /* mov r28, r0 */
7003 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
7004 /* mov r27, r1 */
7005 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
7006
7007# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
7008 /* Save the frame pointer. */
7009 off = iemNativeEmitStoreGprToVCpuU64Ex(pu32CodeBuf, off, ARMV8_A64_REG_BP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3),
7010 ARMV8_A64_REG_X2);
7011# endif
7012
7013#else
7014# error "port me"
7015#endif
7016 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7017 return off;
7018}
7019#endif
7020
7021
7022/*********************************************************************************************************************************
7023* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
7024*********************************************************************************************************************************/
7025
7026/**
7027 * Internal work that allocates a variable with kind set to
7028 * kIemNativeVarKind_Invalid and no current stack allocation.
7029 *
7030 * The kind will either be set by the caller or later when the variable is first
7031 * assigned a value.
7032 *
7033 * @returns Unpacked index.
7034 * @internal
7035 */
7036static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7037{
7038 Assert(cbType > 0 && cbType <= 64);
7039 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
7040 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
7041 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
7042 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
7043 pReNative->Core.aVars[idxVar].cbVar = cbType;
7044 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
7045 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7046 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
7047 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
7048 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
7049 pReNative->Core.aVars[idxVar].fRegAcquired = false;
7050 pReNative->Core.aVars[idxVar].u.uValue = 0;
7051#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7052 pReNative->Core.aVars[idxVar].fSimdReg = false;
7053#endif
7054 return idxVar;
7055}
7056
7057
7058/**
7059 * Internal work that allocates an argument variable w/o setting enmKind.
7060 *
7061 * @returns Unpacked index.
7062 * @internal
7063 */
7064static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7065{
7066 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
7067 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7068 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
7069
7070 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
7071 pReNative->Core.aidxArgVars[iArgNo] = idxVar; /* (unpacked) */
7072 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
7073 return idxVar;
7074}
7075
7076
7077/**
7078 * Gets the stack slot for a stack variable, allocating one if necessary.
7079 *
7080 * Calling this function implies that the stack slot will contain a valid
7081 * variable value. The caller deals with any register currently assigned to the
7082 * variable, typically by spilling it into the stack slot.
7083 *
7084 * @returns The stack slot number.
7085 * @param pReNative The recompiler state.
7086 * @param idxVar The variable.
7087 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS
7088 */
7089DECL_HIDDEN_THROW(uint8_t) iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7090{
7091 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7092 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7093 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
7094
7095 /* Already got a slot? */
7096 uint8_t const idxStackSlot = pVar->idxStackSlot;
7097 if (idxStackSlot != UINT8_MAX)
7098 {
7099 Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
7100 return idxStackSlot;
7101 }
7102
7103 /*
7104 * A single slot is easy to allocate.
7105 * Allocate them from the top end, closest to BP, to reduce the displacement.
7106 */
7107 if (pVar->cbVar <= sizeof(uint64_t))
7108 {
7109 unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7110 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7111 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
7112 pVar->idxStackSlot = (uint8_t)iSlot;
7113 Log11(("iemNativeVarGetStackSlot: idxVar=%#x iSlot=%#x\n", idxVar, iSlot));
7114 return (uint8_t)iSlot;
7115 }
7116
7117 /*
7118 * We need more than one stack slot.
7119 *
7120 * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;
7121 */
7122 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
7123 Assert(pVar->cbVar <= 64);
7124 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pVar->cbVar) - 4) - 1;
7125 uint32_t fBitAllocMask = RT_BIT_32((pVar->cbVar + 7) >> 3) - 1;
7126 uint32_t bmStack = pReNative->Core.bmStack;
7127 while (bmStack != UINT32_MAX)
7128 {
7129 unsigned iSlot = ASMBitLastSetU32(~bmStack);
7130 AssertStmt(iSlot, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7131 iSlot = (iSlot - 1) & ~fBitAlignMask;
7132 if ((bmStack & ~(fBitAllocMask << iSlot)) == bmStack)
7133 {
7134 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
7135 pVar->idxStackSlot = (uint8_t)iSlot;
7136 Log11(("iemNativeVarGetStackSlot: idxVar=%#x iSlot=%#x/%#x (cbVar=%#x)\n",
7137 idxVar, iSlot, fBitAllocMask, pVar->cbVar));
7138 return (uint8_t)iSlot;
7139 }
7140
7141 bmStack |= (fBitAllocMask << iSlot);
7142 }
7143 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7144}
7145
7146
7147/**
7148 * Changes the variable to a stack variable.
7149 *
7150 * Currently this is s only possible to do the first time the variable is used,
7151 * switching later is can be implemented but not done.
7152 *
7153 * @param pReNative The recompiler state.
7154 * @param idxVar The variable.
7155 * @throws VERR_IEM_VAR_IPE_2
7156 */
7157DECL_HIDDEN_THROW(void) iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7158{
7159 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7160 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7161 if (pVar->enmKind != kIemNativeVarKind_Stack)
7162 {
7163 /* We could in theory transition from immediate to stack as well, but it
7164 would involve the caller doing work storing the value on the stack. So,
7165 till that's required we only allow transition from invalid. */
7166 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7167 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7168 pVar->enmKind = kIemNativeVarKind_Stack;
7169
7170 /* Note! We don't allocate a stack slot here, that's only done when a
7171 slot is actually needed to hold a variable value. */
7172 }
7173}
7174
7175
7176/**
7177 * Sets it to a variable with a constant value.
7178 *
7179 * This does not require stack storage as we know the value and can always
7180 * reload it, unless of course it's referenced.
7181 *
7182 * @param pReNative The recompiler state.
7183 * @param idxVar The variable.
7184 * @param uValue The immediate value.
7185 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7186 */
7187DECL_HIDDEN_THROW(void) iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
7188{
7189 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7190 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7191 if (pVar->enmKind != kIemNativeVarKind_Immediate)
7192 {
7193 /* Only simple transitions for now. */
7194 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7195 pVar->enmKind = kIemNativeVarKind_Immediate;
7196 }
7197 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7198
7199 pVar->u.uValue = uValue;
7200 AssertMsg( pVar->cbVar >= sizeof(uint64_t)
7201 || pVar->u.uValue < RT_BIT_64(pVar->cbVar * 8),
7202 ("idxVar=%d cbVar=%u uValue=%#RX64\n", idxVar, pVar->cbVar, uValue));
7203}
7204
7205
7206/**
7207 * Sets the variable to a reference (pointer) to @a idxOtherVar.
7208 *
7209 * This does not require stack storage as we know the value and can always
7210 * reload it. Loading is postponed till needed.
7211 *
7212 * @param pReNative The recompiler state.
7213 * @param idxVar The variable. Unpacked.
7214 * @param idxOtherVar The variable to take the (stack) address of. Unpacked.
7215 *
7216 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7217 * @internal
7218 */
7219static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
7220{
7221 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
7222 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
7223
7224 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
7225 {
7226 /* Only simple transitions for now. */
7227 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
7228 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7229 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
7230 }
7231 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7232
7233 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar; /* unpacked */
7234
7235 /* Update the other variable, ensure it's a stack variable. */
7236 /** @todo handle variables with const values... that'll go boom now. */
7237 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
7238 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
7239}
7240
7241
7242/**
7243 * Sets the variable to a reference (pointer) to a guest register reference.
7244 *
7245 * This does not require stack storage as we know the value and can always
7246 * reload it. Loading is postponed till needed.
7247 *
7248 * @param pReNative The recompiler state.
7249 * @param idxVar The variable.
7250 * @param enmRegClass The class guest registers to reference.
7251 * @param idxReg The register within @a enmRegClass to reference.
7252 *
7253 * @throws VERR_IEM_VAR_IPE_2
7254 */
7255DECL_HIDDEN_THROW(void) iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
7256 IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
7257{
7258 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7259 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7260
7261 if (pVar->enmKind != kIemNativeVarKind_GstRegRef)
7262 {
7263 /* Only simple transitions for now. */
7264 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7265 pVar->enmKind = kIemNativeVarKind_GstRegRef;
7266 }
7267 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7268
7269 pVar->u.GstRegRef.enmClass = enmRegClass;
7270 pVar->u.GstRegRef.idx = idxReg;
7271}
7272
7273
7274DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7275{
7276 return IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
7277}
7278
7279
7280DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
7281{
7282 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
7283
7284 /* Since we're using a generic uint64_t value type, we must truncate it if
7285 the variable is smaller otherwise we may end up with too large value when
7286 scaling up a imm8 w/ sign-extension.
7287
7288 This caused trouble with a "add bx, 0xffff" instruction (around f000:ac60
7289 in the bios, bx=1) when running on arm, because clang expect 16-bit
7290 register parameters to have bits 16 and up set to zero. Instead of
7291 setting x1 = 0xffff we ended up with x1 = 0xffffffffffffff and the wrong
7292 CF value in the result. */
7293 switch (cbType)
7294 {
7295 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7296 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7297 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7298 }
7299 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7300 return idxVar;
7301}
7302
7303
7304DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
7305{
7306 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxOtherVar);
7307 idxOtherVar = IEMNATIVE_VAR_IDX_UNPACK(idxOtherVar);
7308 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
7309 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
7310 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
7311 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7312
7313 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
7314 iemNativeVarSetKindToLocalRef(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxArgVar), idxOtherVar);
7315 return idxArgVar;
7316}
7317
7318
7319DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7320{
7321 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7322 /* Don't set to stack now, leave that to the first use as for instance
7323 IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
7324 return idxVar;
7325}
7326
7327
7328DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
7329{
7330 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7331
7332 /* Since we're using a generic uint64_t value type, we must truncate it if
7333 the variable is smaller otherwise we may end up with too large value when
7334 scaling up a imm8 w/ sign-extension. */
7335 switch (cbType)
7336 {
7337 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7338 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7339 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7340 }
7341 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7342 return idxVar;
7343}
7344
7345
7346DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocAssign(PIEMRECOMPILERSTATE pReNative, uint32_t *poff,
7347 uint8_t cbType, uint8_t idxVarOther)
7348{
7349 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7350 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
7351
7352 uint8_t const idxVarOtherReg = iemNativeVarRegisterAcquire(pReNative, idxVarOther, poff, true /*fInitialized*/);
7353 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, poff);
7354
7355/** @todo combine MOV and AND using MOVZX/similar. */
7356 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxVarReg, idxVarOtherReg);
7357
7358 /* Truncate the value to this variables size. */
7359 switch (cbType)
7360 {
7361 case sizeof(uint8_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xff)); break;
7362 case sizeof(uint16_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xffff)); break;
7363 case sizeof(uint32_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xffffffff)); break;
7364 }
7365
7366 iemNativeVarRegisterRelease(pReNative, idxVarOther);
7367 iemNativeVarRegisterRelease(pReNative, idxVar);
7368 return idxVar;
7369}
7370
7371
7372/**
7373 * Makes sure variable @a idxVar has a register assigned to it and that it stays
7374 * fixed till we call iemNativeVarRegisterRelease.
7375 *
7376 * @returns The host register number.
7377 * @param pReNative The recompiler state.
7378 * @param idxVar The variable.
7379 * @param poff Pointer to the instruction buffer offset.
7380 * In case a register needs to be freed up or the value
7381 * loaded off the stack.
7382 * @param fInitialized Set if the variable must already have been initialized.
7383 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
7384 * the case.
7385 * @param idxRegPref Preferred register number or UINT8_MAX.
7386 */
7387DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
7388 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
7389{
7390 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7391 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7392 Assert(pVar->cbVar <= 8);
7393 Assert(!pVar->fRegAcquired);
7394
7395 uint8_t idxReg = pVar->idxReg;
7396 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7397 {
7398 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
7399 && pVar->enmKind < kIemNativeVarKind_End);
7400 pVar->fRegAcquired = true;
7401 return idxReg;
7402 }
7403
7404 /*
7405 * If the kind of variable has not yet been set, default to 'stack'.
7406 */
7407 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
7408 && pVar->enmKind < kIemNativeVarKind_End);
7409 if (pVar->enmKind == kIemNativeVarKind_Invalid)
7410 iemNativeVarSetKindToStack(pReNative, idxVar);
7411
7412 /*
7413 * We have to allocate a register for the variable, even if its a stack one
7414 * as we don't know if there are modification being made to it before its
7415 * finalized (todo: analyze and insert hints about that?).
7416 *
7417 * If we can, we try get the correct register for argument variables. This
7418 * is assuming that most argument variables are fetched as close as possible
7419 * to the actual call, so that there aren't any interfering hidden calls
7420 * (memory accesses, etc) inbetween.
7421 *
7422 * If we cannot or it's a variable, we make sure no argument registers
7423 * that will be used by this MC block will be allocated here, and we always
7424 * prefer non-volatile registers to avoid needing to spill stuff for internal
7425 * call.
7426 */
7427 /** @todo Detect too early argument value fetches and warn about hidden
7428 * calls causing less optimal code to be generated in the python script. */
7429
7430 uint8_t const uArgNo = pVar->uArgNo;
7431 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
7432 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
7433 {
7434 idxReg = g_aidxIemNativeCallRegs[uArgNo];
7435
7436#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
7437 /* Writeback any dirty shadow registers we are about to unshadow. */
7438 *poff = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, *poff, idxReg);
7439#endif
7440
7441 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7442 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));
7443 }
7444 else if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstRegs)
7445 || (pReNative->Core.bmHstRegs & RT_BIT_32(idxRegPref)))
7446 {
7447 /** @todo there must be a better way for this and boot cArgsX? */
7448 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgsX, IEMNATIVE_CALL_ARG_GREG_COUNT)];
7449 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
7450 & ~pReNative->Core.bmHstRegsWithGstShadow
7451 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
7452 & fNotArgsMask;
7453 if (fRegs)
7454 {
7455 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
7456 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
7457 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
7458 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
7459 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
7460 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7461 }
7462 else
7463 {
7464 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
7465 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
7466 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
7467 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7468 }
7469 }
7470 else
7471 {
7472 idxReg = idxRegPref;
7473 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7474 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
7475 }
7476 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7477 pVar->idxReg = idxReg;
7478
7479#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7480 pVar->fSimdReg = false;
7481#endif
7482
7483 /*
7484 * Load it off the stack if we've got a stack slot.
7485 */
7486 uint8_t const idxStackSlot = pVar->idxStackSlot;
7487 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7488 {
7489 Assert(fInitialized);
7490 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7491 switch (pVar->cbVar)
7492 {
7493 case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;
7494 case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;
7495 case 3: AssertFailed(); RT_FALL_THRU();
7496 case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;
7497 default: AssertFailed(); RT_FALL_THRU();
7498 case 8: *poff = iemNativeEmitLoadGprByBp( pReNative, *poff, idxReg, offDispBp); break;
7499 }
7500 }
7501 else
7502 {
7503 Assert(idxStackSlot == UINT8_MAX);
7504 if (pVar->enmKind != kIemNativeVarKind_Immediate)
7505 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7506 else
7507 {
7508 /*
7509 * Convert from immediate to stack/register. This is currently only
7510 * required by IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR, IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR
7511 * and IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR in connection with BT, BTS, BTR, and BTC.
7512 */
7513 AssertStmt(fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7514 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u uValue=%RX64 converting from immediate to stack\n",
7515 idxVar, idxReg, pVar->u.uValue));
7516 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
7517 pVar->enmKind = kIemNativeVarKind_Stack;
7518 }
7519 }
7520
7521 pVar->fRegAcquired = true;
7522 return idxReg;
7523}
7524
7525
7526#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7527/**
7528 * Makes sure variable @a idxVar has a SIMD register assigned to it and that it stays
7529 * fixed till we call iemNativeVarRegisterRelease.
7530 *
7531 * @returns The host register number.
7532 * @param pReNative The recompiler state.
7533 * @param idxVar The variable.
7534 * @param poff Pointer to the instruction buffer offset.
7535 * In case a register needs to be freed up or the value
7536 * loaded off the stack.
7537 * @param fInitialized Set if the variable must already have been initialized.
7538 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
7539 * the case.
7540 * @param idxRegPref Preferred SIMD register number or UINT8_MAX.
7541 */
7542DECL_HIDDEN_THROW(uint8_t) iemNativeVarSimdRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
7543 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
7544{
7545 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7546 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7547 Assert( pVar->cbVar == sizeof(RTUINT128U)
7548 || pVar->cbVar == sizeof(RTUINT256U));
7549 Assert(!pVar->fRegAcquired);
7550
7551 uint8_t idxReg = pVar->idxReg;
7552 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs))
7553 {
7554 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
7555 && pVar->enmKind < kIemNativeVarKind_End);
7556 pVar->fRegAcquired = true;
7557 return idxReg;
7558 }
7559
7560 /*
7561 * If the kind of variable has not yet been set, default to 'stack'.
7562 */
7563 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
7564 && pVar->enmKind < kIemNativeVarKind_End);
7565 if (pVar->enmKind == kIemNativeVarKind_Invalid)
7566 iemNativeVarSetKindToStack(pReNative, idxVar);
7567
7568 /*
7569 * We have to allocate a register for the variable, even if its a stack one
7570 * as we don't know if there are modification being made to it before its
7571 * finalized (todo: analyze and insert hints about that?).
7572 *
7573 * If we can, we try get the correct register for argument variables. This
7574 * is assuming that most argument variables are fetched as close as possible
7575 * to the actual call, so that there aren't any interfering hidden calls
7576 * (memory accesses, etc) inbetween.
7577 *
7578 * If we cannot or it's a variable, we make sure no argument registers
7579 * that will be used by this MC block will be allocated here, and we always
7580 * prefer non-volatile registers to avoid needing to spill stuff for internal
7581 * call.
7582 */
7583 /** @todo Detect too early argument value fetches and warn about hidden
7584 * calls causing less optimal code to be generated in the python script. */
7585
7586 uint8_t const uArgNo = pVar->uArgNo;
7587 Assert(uArgNo == UINT8_MAX); RT_NOREF(uArgNo); /* No SIMD registers as arguments for now. */
7588
7589 /* SIMD is bit simpler for now because there is no support for arguments. */
7590 if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
7591 || (pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxRegPref)))
7592 {
7593 uint32_t const fNotArgsMask = UINT32_MAX; //~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
7594 uint32_t const fRegs = ~pReNative->Core.bmHstSimdRegs
7595 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
7596 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
7597 & fNotArgsMask;
7598 if (fRegs)
7599 {
7600 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
7601 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
7602 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows == 0);
7603 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg)));
7604 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7605 }
7606 else
7607 {
7608 idxReg = iemNativeSimdRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
7609 IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & fNotArgsMask);
7610 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
7611 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7612 }
7613 }
7614 else
7615 {
7616 idxReg = idxRegPref;
7617 AssertReleaseFailed(); //iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7618 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
7619 }
7620 iemNativeSimdRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7621
7622 pVar->fSimdReg = true;
7623 pVar->idxReg = idxReg;
7624
7625 /*
7626 * Load it off the stack if we've got a stack slot.
7627 */
7628 uint8_t const idxStackSlot = pVar->idxStackSlot;
7629 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7630 {
7631 Assert(fInitialized);
7632 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7633 switch (pVar->cbVar)
7634 {
7635 case sizeof(RTUINT128U): *poff = iemNativeEmitLoadVecRegByBpU128(pReNative, *poff, idxReg, offDispBp); break;
7636 default: AssertFailed(); RT_FALL_THRU();
7637 case sizeof(RTUINT256U): *poff = iemNativeEmitLoadVecRegByBpU256(pReNative, *poff, idxReg, offDispBp); break;
7638 }
7639 }
7640 else
7641 {
7642 Assert(idxStackSlot == UINT8_MAX);
7643 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7644 }
7645 pVar->fRegAcquired = true;
7646 return idxReg;
7647}
7648#endif
7649
7650
7651/**
7652 * The value of variable @a idxVar will be written in full to the @a enmGstReg
7653 * guest register.
7654 *
7655 * This function makes sure there is a register for it and sets it to be the
7656 * current shadow copy of @a enmGstReg.
7657 *
7658 * @returns The host register number.
7659 * @param pReNative The recompiler state.
7660 * @param idxVar The variable.
7661 * @param enmGstReg The guest register this variable will be written to
7662 * after this call.
7663 * @param poff Pointer to the instruction buffer offset.
7664 * In case a register needs to be freed up or if the
7665 * variable content needs to be loaded off the stack.
7666 *
7667 * @note We DO NOT expect @a idxVar to be an argument variable,
7668 * because we can only in the commit stage of an instruction when this
7669 * function is used.
7670 */
7671DECL_HIDDEN_THROW(uint8_t)
7672iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
7673{
7674 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7675 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7676 Assert(!pVar->fRegAcquired);
7677 AssertMsgStmt( pVar->cbVar <= 8
7678 && ( pVar->enmKind == kIemNativeVarKind_Immediate
7679 || pVar->enmKind == kIemNativeVarKind_Stack),
7680 ("idxVar=%#x cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pVar->cbVar,
7681 pVar->enmKind, g_aGstShadowInfo[enmGstReg].pszName),
7682 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7683
7684 /*
7685 * This shouldn't ever be used for arguments, unless it's in a weird else
7686 * branch that doesn't do any calling and even then it's questionable.
7687 *
7688 * However, in case someone writes crazy wrong MC code and does register
7689 * updates before making calls, just use the regular register allocator to
7690 * ensure we get a register suitable for the intended argument number.
7691 */
7692 AssertStmt(pVar->uArgNo == UINT8_MAX, iemNativeVarRegisterAcquire(pReNative, idxVar, poff));
7693
7694 /*
7695 * If there is already a register for the variable, we transfer/set the
7696 * guest shadow copy assignment to it.
7697 */
7698 uint8_t idxReg = pVar->idxReg;
7699 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7700 {
7701#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
7702 if (enmGstReg >= kIemNativeGstReg_GprFirst && enmGstReg <= kIemNativeGstReg_GprLast)
7703 {
7704# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
7705 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
7706 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxReg);
7707# endif
7708 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
7709 }
7710#endif
7711
7712 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
7713 {
7714 uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
7715 iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
7716 Log12(("iemNativeVarRegisterAcquireForGuestReg: Moved %s for guest %s into %s for full write\n",
7717 g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
7718 }
7719 else
7720 {
7721 iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
7722 Log12(("iemNativeVarRegisterAcquireForGuestReg: Marking %s as copy of guest %s (full write)\n",
7723 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
7724 }
7725 /** @todo figure this one out. We need some way of making sure the register isn't
7726 * modified after this point, just in case we start writing crappy MC code. */
7727 pVar->enmGstReg = enmGstReg;
7728 pVar->fRegAcquired = true;
7729 return idxReg;
7730 }
7731 Assert(pVar->uArgNo == UINT8_MAX);
7732
7733 /*
7734 * Because this is supposed to be the commit stage, we're just tag along with the
7735 * temporary register allocator and upgrade it to a variable register.
7736 */
7737 idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
7738 Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
7739 Assert(pReNative->Core.aHstRegs[idxReg].idxVar == UINT8_MAX);
7740 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
7741 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
7742 pVar->idxReg = idxReg;
7743
7744 /*
7745 * Now we need to load the register value.
7746 */
7747 if (pVar->enmKind == kIemNativeVarKind_Immediate)
7748 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
7749 else
7750 {
7751 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7752 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7753 switch (pVar->cbVar)
7754 {
7755 case sizeof(uint64_t):
7756 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
7757 break;
7758 case sizeof(uint32_t):
7759 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
7760 break;
7761 case sizeof(uint16_t):
7762 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
7763 break;
7764 case sizeof(uint8_t):
7765 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
7766 break;
7767 default:
7768 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7769 }
7770 }
7771
7772 pVar->fRegAcquired = true;
7773 return idxReg;
7774}
7775
7776
7777/**
7778 * Emit code to save volatile registers prior to a call to a helper (TLB miss).
7779 *
7780 * This is used together with iemNativeVarRestoreVolatileRegsPostHlpCall() and
7781 * optionally iemNativeRegRestoreGuestShadowsInVolatileRegs() to bypass the
7782 * requirement of flushing anything in volatile host registers when making a
7783 * call.
7784 *
7785 * @returns New @a off value.
7786 * @param pReNative The recompiler state.
7787 * @param off The code buffer position.
7788 * @param fHstRegsNotToSave Set of registers not to save & restore.
7789 */
7790DECL_HIDDEN_THROW(uint32_t)
7791iemNativeVarSaveVolatileRegsPreHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
7792{
7793 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_NOTMP_GREG_MASK & ~fHstRegsNotToSave;
7794 if (fHstRegs)
7795 {
7796 do
7797 {
7798 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7799 fHstRegs &= ~RT_BIT_32(idxHstReg);
7800
7801 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7802 {
7803 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7804 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7805 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7806 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7807 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
7808 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7809 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7810 {
7811 case kIemNativeVarKind_Stack:
7812 {
7813 /* Temporarily spill the variable register. */
7814 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7815 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7816 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7817 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7818 continue;
7819 }
7820
7821 case kIemNativeVarKind_Immediate:
7822 case kIemNativeVarKind_VarRef:
7823 case kIemNativeVarKind_GstRegRef:
7824 /* It is weird to have any of these loaded at this point. */
7825 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7826 continue;
7827
7828 case kIemNativeVarKind_End:
7829 case kIemNativeVarKind_Invalid:
7830 break;
7831 }
7832 AssertFailed();
7833 }
7834 else
7835 {
7836 /*
7837 * Allocate a temporary stack slot and spill the register to it.
7838 */
7839 unsigned const idxStackSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7840 AssertStmt(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS,
7841 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7842 pReNative->Core.bmStack |= RT_BIT_32(idxStackSlot);
7843 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = (uint8_t)idxStackSlot;
7844 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7845 idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7846 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7847 }
7848 } while (fHstRegs);
7849 }
7850#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7851
7852 /*
7853 * Guest register shadows are flushed to CPUMCTX at the moment and don't need allocating a stack slot
7854 * which would be more difficult due to spanning multiple stack slots and different sizes
7855 * (besides we only have a limited amount of slots at the moment).
7856 *
7857 * However the shadows need to be flushed out as the guest SIMD register might get corrupted by
7858 * the callee. This asserts that the registers were written back earlier and are not in the dirty state.
7859 */
7860 iemNativeSimdRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK);
7861
7862 fHstRegs = pReNative->Core.bmHstSimdRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
7863 if (fHstRegs)
7864 {
7865 do
7866 {
7867 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7868 fHstRegs &= ~RT_BIT_32(idxHstReg);
7869
7870 /* Fixed reserved and temporary registers don't need saving. */
7871 if ( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedReserved
7872 || pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedTmp)
7873 continue;
7874
7875 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
7876
7877 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
7878 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7879 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7880 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7881 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
7882 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
7883 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
7884 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
7885 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7886 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7887 {
7888 case kIemNativeVarKind_Stack:
7889 {
7890 /* Temporarily spill the variable register. */
7891 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
7892 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7893 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7894 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7895 if (cbVar == sizeof(RTUINT128U))
7896 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7897 else
7898 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7899 continue;
7900 }
7901
7902 case kIemNativeVarKind_Immediate:
7903 case kIemNativeVarKind_VarRef:
7904 case kIemNativeVarKind_GstRegRef:
7905 /* It is weird to have any of these loaded at this point. */
7906 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7907 continue;
7908
7909 case kIemNativeVarKind_End:
7910 case kIemNativeVarKind_Invalid:
7911 break;
7912 }
7913 AssertFailed();
7914 } while (fHstRegs);
7915 }
7916#endif
7917 return off;
7918}
7919
7920
7921/**
7922 * Emit code to restore volatile registers after to a call to a helper.
7923 *
7924 * @returns New @a off value.
7925 * @param pReNative The recompiler state.
7926 * @param off The code buffer position.
7927 * @param fHstRegsNotToSave Set of registers not to save & restore.
7928 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
7929 * iemNativeRegRestoreGuestShadowsInVolatileRegs()
7930 */
7931DECL_HIDDEN_THROW(uint32_t)
7932iemNativeVarRestoreVolatileRegsPostHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
7933{
7934 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_NOTMP_GREG_MASK & ~fHstRegsNotToSave;
7935 if (fHstRegs)
7936 {
7937 do
7938 {
7939 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7940 fHstRegs &= ~RT_BIT_32(idxHstReg);
7941
7942 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7943 {
7944 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7945 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7946 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7947 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7948 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
7949 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7950 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7951 {
7952 case kIemNativeVarKind_Stack:
7953 {
7954 /* Unspill the variable register. */
7955 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7956 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
7957 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7958 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7959 continue;
7960 }
7961
7962 case kIemNativeVarKind_Immediate:
7963 case kIemNativeVarKind_VarRef:
7964 case kIemNativeVarKind_GstRegRef:
7965 /* It is weird to have any of these loaded at this point. */
7966 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7967 continue;
7968
7969 case kIemNativeVarKind_End:
7970 case kIemNativeVarKind_Invalid:
7971 break;
7972 }
7973 AssertFailed();
7974 }
7975 else
7976 {
7977 /*
7978 * Restore from temporary stack slot.
7979 */
7980 uint8_t const idxStackSlot = pReNative->Core.aHstRegs[idxHstReg].idxStackSlot;
7981 AssertContinue(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS && (pReNative->Core.bmStack & RT_BIT_32(idxStackSlot)));
7982 pReNative->Core.bmStack &= ~RT_BIT_32(idxStackSlot);
7983 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = UINT8_MAX;
7984
7985 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7986 }
7987 } while (fHstRegs);
7988 }
7989#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7990 fHstRegs = pReNative->Core.bmHstSimdRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
7991 if (fHstRegs)
7992 {
7993 do
7994 {
7995 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7996 fHstRegs &= ~RT_BIT_32(idxHstReg);
7997
7998 if ( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedTmp
7999 || pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedReserved)
8000 continue;
8001 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
8002
8003 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
8004 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8005 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
8006 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
8007 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
8008 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
8009 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
8010 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
8011 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
8012 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
8013 {
8014 case kIemNativeVarKind_Stack:
8015 {
8016 /* Unspill the variable register. */
8017 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
8018 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8019 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
8020 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8021
8022 if (cbVar == sizeof(RTUINT128U))
8023 off = iemNativeEmitLoadVecRegByBpU128(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8024 else
8025 off = iemNativeEmitLoadVecRegByBpU256(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8026 continue;
8027 }
8028
8029 case kIemNativeVarKind_Immediate:
8030 case kIemNativeVarKind_VarRef:
8031 case kIemNativeVarKind_GstRegRef:
8032 /* It is weird to have any of these loaded at this point. */
8033 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8034 continue;
8035
8036 case kIemNativeVarKind_End:
8037 case kIemNativeVarKind_Invalid:
8038 break;
8039 }
8040 AssertFailed();
8041 } while (fHstRegs);
8042 }
8043#endif
8044 return off;
8045}
8046
8047
8048/**
8049 * Worker that frees the stack slots for variable @a idxVar if any allocated.
8050 *
8051 * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
8052 *
8053 * ASSUMES that @a idxVar is valid and unpacked.
8054 */
8055DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8056{
8057 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars)); /* unpacked! */
8058 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
8059 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
8060 {
8061 uint8_t const cbVar = pReNative->Core.aVars[idxVar].cbVar;
8062 uint8_t const cSlots = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
8063 uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
8064 Assert(cSlots > 0);
8065 Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
8066 Log11(("iemNativeVarFreeStackSlots: idxVar=%d/%#x iSlot=%#x/%#x (cbVar=%#x)\n",
8067 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxStackSlot, fAllocMask, cbVar));
8068 pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
8069 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
8070 }
8071 else
8072 Assert(idxStackSlot == UINT8_MAX);
8073}
8074
8075
8076/**
8077 * Worker that frees a single variable.
8078 *
8079 * ASSUMES that @a idxVar is valid and unpacked.
8080 */
8081DECLHIDDEN(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8082{
8083 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid /* Including invalid as we may have unused */
8084 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End); /* variables in conditional branches. */
8085 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
8086
8087 /* Free the host register first if any assigned. */
8088 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8089#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8090 if ( idxHstReg != UINT8_MAX
8091 && pReNative->Core.aVars[idxVar].fSimdReg)
8092 {
8093 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8094 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8095 pReNative->Core.aHstSimdRegs[idxHstReg].idxVar = UINT8_MAX;
8096 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
8097 }
8098 else
8099#endif
8100 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8101 {
8102 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8103 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
8104 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8105 }
8106
8107 /* Free argument mapping. */
8108 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
8109 if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
8110 pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
8111
8112 /* Free the stack slots. */
8113 iemNativeVarFreeStackSlots(pReNative, idxVar);
8114
8115 /* Free the actual variable. */
8116 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
8117 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8118}
8119
8120
8121/**
8122 * Worker for iemNativeVarFreeAll that's called when there is anything to do.
8123 */
8124DECLHIDDEN(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
8125{
8126 while (bmVars != 0)
8127 {
8128 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
8129 bmVars &= ~RT_BIT_32(idxVar);
8130
8131#if 1 /** @todo optimize by simplifying this later... */
8132 iemNativeVarFreeOneWorker(pReNative, idxVar);
8133#else
8134 /* Only need to free the host register, the rest is done as bulk updates below. */
8135 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8136 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8137 {
8138 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8139 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
8140 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8141 }
8142#endif
8143 }
8144#if 0 /** @todo optimize by simplifying this later... */
8145 pReNative->Core.bmVars = 0;
8146 pReNative->Core.bmStack = 0;
8147 pReNative->Core.u64ArgVars = UINT64_MAX;
8148#endif
8149}
8150
8151
8152
8153/*********************************************************************************************************************************
8154* Emitters for IEM_MC_CALL_CIMPL_XXX *
8155*********************************************************************************************************************************/
8156
8157/**
8158 * Emits code to load a reference to the given guest register into @a idxGprDst.
8159 */
8160DECL_HIDDEN_THROW(uint32_t)
8161iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
8162 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
8163{
8164#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8165 /** @todo If we ever gonna allow referencing the RIP register we need to update guest value here. */
8166#endif
8167
8168 /*
8169 * Get the offset relative to the CPUMCTX structure.
8170 */
8171 uint32_t offCpumCtx;
8172 switch (enmClass)
8173 {
8174 case kIemNativeGstRegRef_Gpr:
8175 Assert(idxRegInClass < 16);
8176 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
8177 break;
8178
8179 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
8180 Assert(idxRegInClass < 4);
8181 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
8182 break;
8183
8184 case kIemNativeGstRegRef_EFlags:
8185 Assert(idxRegInClass == 0);
8186 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
8187 break;
8188
8189 case kIemNativeGstRegRef_MxCsr:
8190 Assert(idxRegInClass == 0);
8191 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
8192 break;
8193
8194 case kIemNativeGstRegRef_FpuReg:
8195 Assert(idxRegInClass < 8);
8196 AssertFailed(); /** @todo what kind of indexing? */
8197 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
8198 break;
8199
8200 case kIemNativeGstRegRef_MReg:
8201 Assert(idxRegInClass < 8);
8202 AssertFailed(); /** @todo what kind of indexing? */
8203 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
8204 break;
8205
8206 case kIemNativeGstRegRef_XReg:
8207 Assert(idxRegInClass < 16);
8208 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
8209 break;
8210
8211 case kIemNativeGstRegRef_X87: /* Not a register actually but we would just duplicate code otherwise. */
8212 Assert(idxRegInClass == 0);
8213 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87);
8214 break;
8215
8216 case kIemNativeGstRegRef_XState: /* Not a register actually but we would just duplicate code otherwise. */
8217 Assert(idxRegInClass == 0);
8218 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState);
8219 break;
8220
8221 default:
8222 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
8223 }
8224
8225 /*
8226 * Load the value into the destination register.
8227 */
8228#ifdef RT_ARCH_AMD64
8229 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
8230
8231#elif defined(RT_ARCH_ARM64)
8232 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
8233 Assert(offCpumCtx < 4096);
8234 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
8235
8236#else
8237# error "Port me!"
8238#endif
8239
8240 return off;
8241}
8242
8243
8244/**
8245 * Common code for CIMPL and AIMPL calls.
8246 *
8247 * These are calls that uses argument variables and such. They should not be
8248 * confused with internal calls required to implement an MC operation,
8249 * like a TLB load and similar.
8250 *
8251 * Upon return all that is left to do is to load any hidden arguments and
8252 * perform the call. All argument variables are freed.
8253 *
8254 * @returns New code buffer offset; throws VBox status code on error.
8255 * @param pReNative The native recompile state.
8256 * @param off The code buffer offset.
8257 * @param cArgs The total nubmer of arguments (includes hidden
8258 * count).
8259 * @param cHiddenArgs The number of hidden arguments. The hidden
8260 * arguments must not have any variable declared for
8261 * them, whereas all the regular arguments must
8262 * (tstIEMCheckMc ensures this).
8263 * @param fFlushPendingWrites Flag whether to flush pending writes (default true),
8264 * this will still flush pending writes in call volatile registers if false.
8265 */
8266DECL_HIDDEN_THROW(uint32_t)
8267iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs,
8268 bool fFlushPendingWrites /*= true*/)
8269{
8270#ifdef VBOX_STRICT
8271 /*
8272 * Assert sanity.
8273 */
8274 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
8275 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
8276 for (unsigned i = 0; i < cHiddenArgs; i++)
8277 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
8278 for (unsigned i = cHiddenArgs; i < cArgs; i++)
8279 {
8280 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
8281 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
8282 }
8283 iemNativeRegAssertSanity(pReNative);
8284#endif
8285
8286 /* We don't know what the called function makes use of, so flush any pending register writes. */
8287 RT_NOREF(fFlushPendingWrites);
8288#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
8289 if (fFlushPendingWrites)
8290#endif
8291 off = iemNativeRegFlushPendingWrites(pReNative, off);
8292
8293 /*
8294 * Before we do anything else, go over variables that are referenced and
8295 * make sure they are not in a register.
8296 */
8297 uint32_t bmVars = pReNative->Core.bmVars;
8298 if (bmVars)
8299 {
8300 do
8301 {
8302 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
8303 bmVars &= ~RT_BIT_32(idxVar);
8304
8305 if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
8306 {
8307 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
8308#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8309 if ( idxRegOld != UINT8_MAX
8310 && pReNative->Core.aVars[idxVar].fSimdReg)
8311 {
8312 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8313 Assert(pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U) || pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT256U));
8314
8315 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
8316 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
8317 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
8318 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8319 if (pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U))
8320 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8321 else
8322 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8323
8324 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
8325 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
8326
8327 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8328 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxRegOld);
8329 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
8330 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
8331 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
8332 }
8333 else
8334#endif
8335 if (idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs))
8336 {
8337 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
8338 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
8339 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
8340 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8341 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8342
8343 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8344 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
8345 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
8346 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
8347 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
8348 }
8349 }
8350 } while (bmVars != 0);
8351#if 0 //def VBOX_STRICT
8352 iemNativeRegAssertSanity(pReNative);
8353#endif
8354 }
8355
8356 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
8357
8358#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
8359 /*
8360 * At the very first step go over the host registers that will be used for arguments
8361 * don't shadow anything which needs writing back first.
8362 */
8363 for (uint32_t i = 0; i < cRegArgs; i++)
8364 {
8365 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8366
8367 /* Writeback any dirty guest shadows before using this register. */
8368 if (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxArgReg].fGstRegShadows)
8369 off = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, off, idxArgReg);
8370 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxArgReg].fGstRegShadows));
8371 }
8372#endif
8373
8374 /*
8375 * First, go over the host registers that will be used for arguments and make
8376 * sure they either hold the desired argument or are free.
8377 */
8378 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
8379 {
8380 for (uint32_t i = 0; i < cRegArgs; i++)
8381 {
8382 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8383 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8384 {
8385 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
8386 {
8387 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
8388 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8389 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8390 Assert(pVar->idxReg == idxArgReg);
8391 uint8_t const uArgNo = pVar->uArgNo;
8392 if (uArgNo == i)
8393 { /* prefect */ }
8394 /* The variable allocator logic should make sure this is impossible,
8395 except for when the return register is used as a parameter (ARM,
8396 but not x86). */
8397#if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK
8398 else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)
8399 {
8400# ifdef IEMNATIVE_FP_OFF_STACK_ARG0
8401# error "Implement this"
8402# endif
8403 Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);
8404 uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];
8405 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),
8406 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
8407 off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");
8408 }
8409#endif
8410 else
8411 {
8412 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
8413
8414 if (pVar->enmKind == kIemNativeVarKind_Stack)
8415 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
8416 else
8417 {
8418 /* just free it, can be reloaded if used again */
8419 pVar->idxReg = UINT8_MAX;
8420 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
8421 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
8422 }
8423 }
8424 }
8425 else
8426 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
8427 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
8428 }
8429 }
8430#if 0 //def VBOX_STRICT
8431 iemNativeRegAssertSanity(pReNative);
8432#endif
8433 }
8434
8435 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
8436
8437#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
8438 /*
8439 * If there are any stack arguments, make sure they are in their place as well.
8440 *
8441 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
8442 * the caller) be loading it later and it must be free (see first loop).
8443 */
8444 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
8445 {
8446 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
8447 {
8448 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
8449 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
8450 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8451 {
8452 Assert(pVar->enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
8453 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pVar->idxReg);
8454 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pVar->idxReg);
8455 pVar->idxReg = UINT8_MAX;
8456 }
8457 else
8458 {
8459 /* Use ARG0 as temp for stuff we need registers for. */
8460 switch (pVar->enmKind)
8461 {
8462 case kIemNativeVarKind_Stack:
8463 {
8464 uint8_t const idxStackSlot = pVar->idxStackSlot;
8465 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8466 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
8467 iemNativeStackCalcBpDisp(idxStackSlot));
8468 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8469 continue;
8470 }
8471
8472 case kIemNativeVarKind_Immediate:
8473 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pVar->u.uValue);
8474 continue;
8475
8476 case kIemNativeVarKind_VarRef:
8477 {
8478 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
8479 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8480 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8481 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8482 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8483# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8484 bool const fSimdReg = pReNative->Core.aVars[idxOtherVar].fSimdReg;
8485 uint8_t const cbVar = pReNative->Core.aVars[idxOtherVar].cbVar;
8486 if ( fSimdReg
8487 && idxRegOther != UINT8_MAX)
8488 {
8489 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8490 if (cbVar == sizeof(RTUINT128U))
8491 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
8492 else
8493 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
8494 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8495 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8496 }
8497 else
8498# endif
8499 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8500 {
8501 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8502 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8503 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8504 }
8505 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8506 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8507 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);
8508 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8509 continue;
8510 }
8511
8512 case kIemNativeVarKind_GstRegRef:
8513 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
8514 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
8515 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8516 continue;
8517
8518 case kIemNativeVarKind_Invalid:
8519 case kIemNativeVarKind_End:
8520 break;
8521 }
8522 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8523 }
8524 }
8525# if 0 //def VBOX_STRICT
8526 iemNativeRegAssertSanity(pReNative);
8527# endif
8528 }
8529#else
8530 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
8531#endif
8532
8533 /*
8534 * Make sure the argument variables are loaded into their respective registers.
8535 *
8536 * We can optimize this by ASSUMING that any register allocations are for
8537 * registeres that have already been loaded and are ready. The previous step
8538 * saw to that.
8539 */
8540 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
8541 {
8542 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8543 {
8544 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8545 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8546 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == IEMNATIVE_VAR_IDX_PACK(pReNative->Core.aidxArgVars[i])
8547 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
8548 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
8549 else
8550 {
8551 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
8552 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8553 {
8554 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
8555 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pVar->idxReg);
8556 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pVar->idxReg))
8557 | RT_BIT_32(idxArgReg);
8558 pVar->idxReg = idxArgReg;
8559 }
8560 else
8561 {
8562 /* Use ARG0 as temp for stuff we need registers for. */
8563 switch (pVar->enmKind)
8564 {
8565 case kIemNativeVarKind_Stack:
8566 {
8567 uint8_t const idxStackSlot = pVar->idxStackSlot;
8568 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8569 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));
8570 continue;
8571 }
8572
8573 case kIemNativeVarKind_Immediate:
8574 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pVar->u.uValue);
8575 continue;
8576
8577 case kIemNativeVarKind_VarRef:
8578 {
8579 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
8580 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8581 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative,
8582 IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8583 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8584 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8585#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8586 bool const fSimdReg = pReNative->Core.aVars[idxOtherVar].fSimdReg;
8587 uint8_t const cbVar = pReNative->Core.aVars[idxOtherVar].cbVar;
8588 if ( fSimdReg
8589 && idxRegOther != UINT8_MAX)
8590 {
8591 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8592 if (cbVar == sizeof(RTUINT128U))
8593 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
8594 else
8595 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
8596 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8597 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8598 }
8599 else
8600#endif
8601 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8602 {
8603 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8604 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8605 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8606 }
8607 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8608 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8609 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);
8610 continue;
8611 }
8612
8613 case kIemNativeVarKind_GstRegRef:
8614 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
8615 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
8616 continue;
8617
8618 case kIemNativeVarKind_Invalid:
8619 case kIemNativeVarKind_End:
8620 break;
8621 }
8622 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8623 }
8624 }
8625 }
8626#if 0 //def VBOX_STRICT
8627 iemNativeRegAssertSanity(pReNative);
8628#endif
8629 }
8630#ifdef VBOX_STRICT
8631 else
8632 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8633 {
8634 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
8635 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
8636 }
8637#endif
8638
8639 /*
8640 * Free all argument variables (simplified).
8641 * Their lifetime always expires with the call they are for.
8642 */
8643 /** @todo Make the python script check that arguments aren't used after
8644 * IEM_MC_CALL_XXXX. */
8645 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
8646 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
8647 * an argument value. There is also some FPU stuff. */
8648 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
8649 {
8650 uint8_t const idxVar = pReNative->Core.aidxArgVars[i]; /* unpacked */
8651 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
8652
8653 /* no need to free registers: */
8654 AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
8655 ? pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
8656 || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
8657 : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
8658 ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
8659 i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
8660
8661 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
8662 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8663 iemNativeVarFreeStackSlots(pReNative, idxVar);
8664 }
8665 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
8666
8667 /*
8668 * Flush volatile registers as we make the call.
8669 */
8670 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
8671
8672 return off;
8673}
8674
8675
8676
8677/*********************************************************************************************************************************
8678* TLB Lookup. *
8679*********************************************************************************************************************************/
8680
8681/**
8682 * This is called via iemNativeHlpAsmSafeWrapCheckTlbLookup.
8683 */
8684DECLASM(void) iemNativeHlpCheckTlbLookup(PVMCPU pVCpu, uintptr_t uResult, uint64_t GCPtr, uint64_t uSegAndSizeAndAccessAndDisp)
8685{
8686 uint8_t const iSegReg = RT_BYTE1(uSegAndSizeAndAccessAndDisp);
8687 uint8_t const cbMem = RT_BYTE2(uSegAndSizeAndAccessAndDisp);
8688 uint32_t const fAccess = (uint32_t)uSegAndSizeAndAccessAndDisp >> 16;
8689 uint8_t const offDisp = RT_BYTE5(uSegAndSizeAndAccessAndDisp);
8690 Log(("iemNativeHlpCheckTlbLookup: %x:%#RX64+%#x LB %#x fAccess=%#x -> %#RX64\n", iSegReg, GCPtr, offDisp, cbMem, fAccess, uResult));
8691
8692 /* Do the lookup manually. */
8693 RTGCPTR const GCPtrFlat = (iSegReg == UINT8_MAX ? GCPtr : GCPtr + pVCpu->cpum.GstCtx.aSRegs[iSegReg].u64Base) + offDisp;
8694 uint64_t const uTagNoRev = IEMTLB_CALC_TAG_NO_REV(GCPtrFlat);
8695 PCIEMTLBENTRY pTlbe = IEMTLB_TAG_TO_EVEN_ENTRY(&pVCpu->iem.s.DataTlb, uTagNoRev);
8696 if (RT_LIKELY( pTlbe->uTag == (uTagNoRev | pVCpu->iem.s.DataTlb.uTlbRevision)
8697 || (pTlbe = pTlbe + 1)->uTag == (uTagNoRev | pVCpu->iem.s.DataTlb.uTlbRevisionGlobal)))
8698 {
8699 /*
8700 * Check TLB page table level access flags.
8701 */
8702 AssertCompile(IEMTLBE_F_PT_NO_USER == 4);
8703 uint64_t const fNoUser = (IEM_GET_CPL(pVCpu) + 1) & IEMTLBE_F_PT_NO_USER;
8704 uint64_t const fNoWriteNoDirty = !(fAccess & IEM_ACCESS_TYPE_WRITE) ? 0
8705 : IEMTLBE_F_PT_NO_WRITE | IEMTLBE_F_PT_NO_DIRTY | IEMTLBE_F_PG_NO_WRITE;
8706 uint64_t const fFlagsAndPhysRev = pTlbe->fFlagsAndPhysRev & ( IEMTLBE_F_PHYS_REV | IEMTLBE_F_NO_MAPPINGR3
8707 | IEMTLBE_F_PG_UNASSIGNED
8708 | IEMTLBE_F_PT_NO_ACCESSED
8709 | fNoWriteNoDirty | fNoUser);
8710 uint64_t const uTlbPhysRev = pVCpu->iem.s.DataTlb.uTlbPhysRev;
8711 if (RT_LIKELY(fFlagsAndPhysRev == uTlbPhysRev))
8712 {
8713 /*
8714 * Return the address.
8715 */
8716 uint8_t const * const pbAddr = &pTlbe->pbMappingR3[GCPtrFlat & GUEST_PAGE_OFFSET_MASK];
8717 if ((uintptr_t)pbAddr == uResult)
8718 return;
8719 RT_NOREF(cbMem);
8720 AssertFailed();
8721 }
8722 else
8723 AssertMsgFailed(("fFlagsAndPhysRev=%#RX64 vs uTlbPhysRev=%#RX64: %#RX64\n",
8724 fFlagsAndPhysRev, uTlbPhysRev, fFlagsAndPhysRev ^ uTlbPhysRev));
8725 }
8726 else
8727 AssertFailed();
8728 RT_BREAKPOINT();
8729}
8730
8731/* The rest of the code is in IEMN8veRecompilerTlbLookup.h. */
8732
8733
8734
8735/*********************************************************************************************************************************
8736* Recompiler Core. *
8737*********************************************************************************************************************************/
8738
8739/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
8740static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
8741{
8742 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
8743 pDis->cbCachedInstr += cbMaxRead;
8744 RT_NOREF(cbMinRead);
8745 return VERR_NO_DATA;
8746}
8747
8748
8749DECLHIDDEN(const char *) iemNativeDbgVCpuOffsetToName(uint32_t off)
8750{
8751 static struct { uint32_t off; const char *pszName; } const s_aMembers[] =
8752 {
8753#define ENTRY(a_Member) { (uint32_t)RT_UOFFSETOF(VMCPUCC, a_Member), #a_Member } /* cast is for stupid MSC */
8754 ENTRY(fLocalForcedActions),
8755 ENTRY(iem.s.rcPassUp),
8756 ENTRY(iem.s.fExec),
8757 ENTRY(iem.s.pbInstrBuf),
8758 ENTRY(iem.s.uInstrBufPc),
8759 ENTRY(iem.s.GCPhysInstrBuf),
8760 ENTRY(iem.s.cbInstrBufTotal),
8761 ENTRY(iem.s.idxTbCurInstr),
8762 ENTRY(iem.s.fSkippingEFlags),
8763#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
8764 ENTRY(iem.s.uPcUpdatingDebug),
8765#endif
8766#ifdef VBOX_WITH_STATISTICS
8767 ENTRY(iem.s.StatNativeTlbHitsForFetch),
8768 ENTRY(iem.s.StatNativeTlbHitsForStore),
8769 ENTRY(iem.s.StatNativeTlbHitsForStack),
8770 ENTRY(iem.s.StatNativeTlbHitsForMapped),
8771 ENTRY(iem.s.StatNativeCodeTlbMissesNewPage),
8772 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPage),
8773 ENTRY(iem.s.StatNativeCodeTlbMissesNewPageWithOffset),
8774 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPageWithOffset),
8775#endif
8776 ENTRY(iem.s.DataTlb.uTlbRevision),
8777 ENTRY(iem.s.DataTlb.uTlbPhysRev),
8778 ENTRY(iem.s.DataTlb.cTlbCoreHits),
8779 ENTRY(iem.s.DataTlb.cTlbInlineCodeHits),
8780 ENTRY(iem.s.DataTlb.cTlbNativeMissTag),
8781 ENTRY(iem.s.DataTlb.cTlbNativeMissFlagsAndPhysRev),
8782 ENTRY(iem.s.DataTlb.cTlbNativeMissAlignment),
8783 ENTRY(iem.s.DataTlb.cTlbNativeMissCrossPage),
8784 ENTRY(iem.s.DataTlb.cTlbNativeMissNonCanonical),
8785 ENTRY(iem.s.DataTlb.aEntries),
8786 ENTRY(iem.s.CodeTlb.uTlbRevision),
8787 ENTRY(iem.s.CodeTlb.uTlbPhysRev),
8788 ENTRY(iem.s.CodeTlb.cTlbCoreHits),
8789 ENTRY(iem.s.CodeTlb.cTlbNativeMissTag),
8790 ENTRY(iem.s.CodeTlb.cTlbNativeMissFlagsAndPhysRev),
8791 ENTRY(iem.s.CodeTlb.cTlbNativeMissAlignment),
8792 ENTRY(iem.s.CodeTlb.cTlbNativeMissCrossPage),
8793 ENTRY(iem.s.CodeTlb.cTlbNativeMissNonCanonical),
8794 ENTRY(iem.s.CodeTlb.aEntries),
8795 ENTRY(pVMR3),
8796 ENTRY(cpum.GstCtx.rax),
8797 ENTRY(cpum.GstCtx.ah),
8798 ENTRY(cpum.GstCtx.rcx),
8799 ENTRY(cpum.GstCtx.ch),
8800 ENTRY(cpum.GstCtx.rdx),
8801 ENTRY(cpum.GstCtx.dh),
8802 ENTRY(cpum.GstCtx.rbx),
8803 ENTRY(cpum.GstCtx.bh),
8804 ENTRY(cpum.GstCtx.rsp),
8805 ENTRY(cpum.GstCtx.rbp),
8806 ENTRY(cpum.GstCtx.rsi),
8807 ENTRY(cpum.GstCtx.rdi),
8808 ENTRY(cpum.GstCtx.r8),
8809 ENTRY(cpum.GstCtx.r9),
8810 ENTRY(cpum.GstCtx.r10),
8811 ENTRY(cpum.GstCtx.r11),
8812 ENTRY(cpum.GstCtx.r12),
8813 ENTRY(cpum.GstCtx.r13),
8814 ENTRY(cpum.GstCtx.r14),
8815 ENTRY(cpum.GstCtx.r15),
8816 ENTRY(cpum.GstCtx.es.Sel),
8817 ENTRY(cpum.GstCtx.es.u64Base),
8818 ENTRY(cpum.GstCtx.es.u32Limit),
8819 ENTRY(cpum.GstCtx.es.Attr),
8820 ENTRY(cpum.GstCtx.cs.Sel),
8821 ENTRY(cpum.GstCtx.cs.u64Base),
8822 ENTRY(cpum.GstCtx.cs.u32Limit),
8823 ENTRY(cpum.GstCtx.cs.Attr),
8824 ENTRY(cpum.GstCtx.ss.Sel),
8825 ENTRY(cpum.GstCtx.ss.u64Base),
8826 ENTRY(cpum.GstCtx.ss.u32Limit),
8827 ENTRY(cpum.GstCtx.ss.Attr),
8828 ENTRY(cpum.GstCtx.ds.Sel),
8829 ENTRY(cpum.GstCtx.ds.u64Base),
8830 ENTRY(cpum.GstCtx.ds.u32Limit),
8831 ENTRY(cpum.GstCtx.ds.Attr),
8832 ENTRY(cpum.GstCtx.fs.Sel),
8833 ENTRY(cpum.GstCtx.fs.u64Base),
8834 ENTRY(cpum.GstCtx.fs.u32Limit),
8835 ENTRY(cpum.GstCtx.fs.Attr),
8836 ENTRY(cpum.GstCtx.gs.Sel),
8837 ENTRY(cpum.GstCtx.gs.u64Base),
8838 ENTRY(cpum.GstCtx.gs.u32Limit),
8839 ENTRY(cpum.GstCtx.gs.Attr),
8840 ENTRY(cpum.GstCtx.rip),
8841 ENTRY(cpum.GstCtx.eflags),
8842 ENTRY(cpum.GstCtx.uRipInhibitInt),
8843 ENTRY(cpum.GstCtx.cr0),
8844 ENTRY(cpum.GstCtx.cr4),
8845 ENTRY(cpum.GstCtx.aXcr[0]),
8846 ENTRY(cpum.GstCtx.aXcr[1]),
8847#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8848 ENTRY(cpum.GstCtx.XState.x87.MXCSR),
8849 ENTRY(cpum.GstCtx.XState.x87.aXMM[0]),
8850 ENTRY(cpum.GstCtx.XState.x87.aXMM[1]),
8851 ENTRY(cpum.GstCtx.XState.x87.aXMM[2]),
8852 ENTRY(cpum.GstCtx.XState.x87.aXMM[3]),
8853 ENTRY(cpum.GstCtx.XState.x87.aXMM[4]),
8854 ENTRY(cpum.GstCtx.XState.x87.aXMM[5]),
8855 ENTRY(cpum.GstCtx.XState.x87.aXMM[6]),
8856 ENTRY(cpum.GstCtx.XState.x87.aXMM[7]),
8857 ENTRY(cpum.GstCtx.XState.x87.aXMM[8]),
8858 ENTRY(cpum.GstCtx.XState.x87.aXMM[9]),
8859 ENTRY(cpum.GstCtx.XState.x87.aXMM[10]),
8860 ENTRY(cpum.GstCtx.XState.x87.aXMM[11]),
8861 ENTRY(cpum.GstCtx.XState.x87.aXMM[12]),
8862 ENTRY(cpum.GstCtx.XState.x87.aXMM[13]),
8863 ENTRY(cpum.GstCtx.XState.x87.aXMM[14]),
8864 ENTRY(cpum.GstCtx.XState.x87.aXMM[15]),
8865 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[0]),
8866 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[1]),
8867 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[2]),
8868 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[3]),
8869 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[4]),
8870 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[5]),
8871 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[6]),
8872 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[7]),
8873 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[8]),
8874 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[9]),
8875 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[10]),
8876 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[11]),
8877 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[12]),
8878 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[13]),
8879 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[14]),
8880 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[15])
8881#endif
8882#undef ENTRY
8883 };
8884#ifdef VBOX_STRICT
8885 static bool s_fOrderChecked = false;
8886 if (!s_fOrderChecked)
8887 {
8888 s_fOrderChecked = true;
8889 uint32_t offPrev = s_aMembers[0].off;
8890 for (unsigned i = 1; i < RT_ELEMENTS(s_aMembers); i++)
8891 {
8892 Assert(s_aMembers[i].off > offPrev);
8893 offPrev = s_aMembers[i].off;
8894 }
8895 }
8896#endif
8897
8898 /*
8899 * Binary lookup.
8900 */
8901 unsigned iStart = 0;
8902 unsigned iEnd = RT_ELEMENTS(s_aMembers);
8903 for (;;)
8904 {
8905 unsigned const iCur = iStart + (iEnd - iStart) / 2;
8906 uint32_t const offCur = s_aMembers[iCur].off;
8907 if (off < offCur)
8908 {
8909 if (iCur != iStart)
8910 iEnd = iCur;
8911 else
8912 break;
8913 }
8914 else if (off > offCur)
8915 {
8916 if (iCur + 1 < iEnd)
8917 iStart = iCur + 1;
8918 else
8919 break;
8920 }
8921 else
8922 return s_aMembers[iCur].pszName;
8923 }
8924#ifdef VBOX_WITH_STATISTICS
8925 if (off - RT_UOFFSETOF(VMCPUCC, iem.s.acThreadedFuncStats) < RT_SIZEOFMEMB(VMCPUCC, iem.s.acThreadedFuncStats))
8926 return "iem.s.acThreadedFuncStats[iFn]";
8927#endif
8928 return NULL;
8929}
8930
8931
8932/**
8933 * Translates a label to a name.
8934 */
8935static const char *iemNativeGetLabelName(IEMNATIVELABELTYPE enmLabel, bool fCommonCode /*= false*/)
8936{
8937 switch (enmLabel)
8938 {
8939#define STR_CASE_CMN(a_Label) case kIemNativeLabelType_ ## a_Label: return fCommonCode ? "Chunk_" #a_Label : #a_Label;
8940 STR_CASE_CMN(Invalid);
8941 STR_CASE_CMN(RaiseDe);
8942 STR_CASE_CMN(RaiseUd);
8943 STR_CASE_CMN(RaiseSseRelated);
8944 STR_CASE_CMN(RaiseAvxRelated);
8945 STR_CASE_CMN(RaiseSseAvxFpRelated);
8946 STR_CASE_CMN(RaiseNm);
8947 STR_CASE_CMN(RaiseGp0);
8948 STR_CASE_CMN(RaiseMf);
8949 STR_CASE_CMN(RaiseXf);
8950 STR_CASE_CMN(ObsoleteTb);
8951 STR_CASE_CMN(NeedCsLimChecking);
8952 STR_CASE_CMN(CheckBranchMiss);
8953#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
8954 STR_CASE_CMN(ReturnSuccess);
8955#else
8956 STR_CASE_CMN(Return);
8957#endif
8958 STR_CASE_CMN(ReturnBreak);
8959 STR_CASE_CMN(ReturnBreakFF);
8960 STR_CASE_CMN(ReturnWithFlags);
8961 STR_CASE_CMN(ReturnBreakViaLookup);
8962 STR_CASE_CMN(ReturnBreakViaLookupWithIrq);
8963 STR_CASE_CMN(ReturnBreakViaLookupWithTlb);
8964 STR_CASE_CMN(ReturnBreakViaLookupWithTlbAndIrq);
8965 STR_CASE_CMN(NonZeroRetOrPassUp);
8966#undef STR_CASE_CMN
8967#define STR_CASE_LBL(a_Label) case kIemNativeLabelType_ ## a_Label: return #a_Label;
8968 STR_CASE_LBL(LoopJumpTarget);
8969 STR_CASE_LBL(If);
8970 STR_CASE_LBL(Else);
8971 STR_CASE_LBL(Endif);
8972 STR_CASE_LBL(CheckIrq);
8973 STR_CASE_LBL(TlbLookup);
8974 STR_CASE_LBL(TlbMiss);
8975 STR_CASE_LBL(TlbDone);
8976 case kIemNativeLabelType_End: break;
8977 }
8978 return NULL;
8979}
8980
8981
8982/** Info for the symbols resolver used when disassembling. */
8983typedef struct IEMNATIVDISASMSYMCTX
8984{
8985 PVMCPU pVCpu;
8986 PCIEMTB pTb;
8987# ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
8988 PCIEMNATIVEPERCHUNKCTX pCtx;
8989# endif
8990# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
8991 PCIEMTBDBG pDbgInfo;
8992# endif
8993} IEMNATIVDISASMSYMCTX;
8994typedef IEMNATIVDISASMSYMCTX *PIEMNATIVDISASMSYMCTX;
8995
8996
8997/**
8998 * Resolve address to symbol, if we can.
8999 */
9000static const char *iemNativeDisasmGetSymbol(PIEMNATIVDISASMSYMCTX pSymCtx, uintptr_t uAddress, char *pszBuf, size_t cbBuf)
9001{
9002#if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || defined(IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE)
9003 PCIEMTB const pTb = pSymCtx->pTb;
9004 uintptr_t const offNative = (uAddress - (uintptr_t)pTb->Native.paInstructions) / sizeof(IEMNATIVEINSTR);
9005 if (offNative <= pTb->Native.cInstructions)
9006 {
9007# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9008 /*
9009 * Scan debug info for a matching label.
9010 * Since the debug info should be 100% linear, we can do a binary search here.
9011 */
9012 PCIEMTBDBG const pDbgInfo = pSymCtx->pDbgInfo;
9013 if (pDbgInfo)
9014 {
9015 uint32_t const cEntries = pDbgInfo->cEntries;
9016 uint32_t idxEnd = cEntries;
9017 uint32_t idxStart = 0;
9018 for (;;)
9019 {
9020 /* Find a NativeOffset record close to the midpoint. */
9021 uint32_t idx = idxStart + (idxEnd - idxStart) / 2;
9022 while (idx > idxStart && pDbgInfo->aEntries[idx].Gen.uType != kIemTbDbgEntryType_NativeOffset)
9023 idx--;
9024 if (pDbgInfo->aEntries[idx].Gen.uType != kIemTbDbgEntryType_NativeOffset)
9025 {
9026 idx = idxStart + (idxEnd - idxStart) / 2 + 1;
9027 while (idx < idxEnd && pDbgInfo->aEntries[idx].Gen.uType != kIemTbDbgEntryType_NativeOffset)
9028 idx++;
9029 if (idx >= idxEnd)
9030 break;
9031 }
9032
9033 /* Do the binary searching thing. */
9034 if (offNative < pDbgInfo->aEntries[idx].NativeOffset.offNative)
9035 {
9036 if (idx > idxStart)
9037 idxEnd = idx;
9038 else
9039 break;
9040 }
9041 else if (offNative > pDbgInfo->aEntries[idx].NativeOffset.offNative)
9042 {
9043 idx += 1;
9044 if (idx < idxEnd)
9045 idxStart = idx;
9046 else
9047 break;
9048 }
9049 else
9050 {
9051 /* Got a matching offset, scan forward till we hit a label, but
9052 stop when the native offset changes. */
9053 while (++idx < cEntries)
9054 switch (pDbgInfo->aEntries[idx].Gen.uType)
9055 {
9056 case kIemTbDbgEntryType_Label:
9057 {
9058 IEMNATIVELABELTYPE const enmLabel = (IEMNATIVELABELTYPE)pDbgInfo->aEntries[idx].Label.enmLabel;
9059 const char * const pszName = iemNativeGetLabelName(enmLabel);
9060 if (enmLabel < kIemNativeLabelType_FirstWithMultipleInstances)
9061 return pszName;
9062 RTStrPrintf(pszBuf, cbBuf, "%s_%u", pszName, pDbgInfo->aEntries[idx].Label.uData);
9063 return pszBuf;
9064 }
9065
9066 case kIemTbDbgEntryType_NativeOffset:
9067 if (pDbgInfo->aEntries[idx].NativeOffset.offNative != offNative)
9068 return NULL;
9069 break;
9070 }
9071 break;
9072 }
9073 }
9074 }
9075# endif
9076 }
9077# ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
9078 else
9079 {
9080 PCIEMNATIVEPERCHUNKCTX const pChunkCtx = pSymCtx->pCtx;
9081 if (pChunkCtx)
9082 for (uint32_t i = 1; i < RT_ELEMENTS(pChunkCtx->apExitLabels); i++)
9083 if ((PIEMNATIVEINSTR)uAddress == pChunkCtx->apExitLabels[i])
9084 return iemNativeGetLabelName((IEMNATIVELABELTYPE)i, true /*fCommonCode*/);
9085 }
9086# endif
9087#endif
9088 RT_NOREF(pSymCtx, uAddress, pszBuf, cbBuf);
9089 return NULL;
9090}
9091
9092#ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9093
9094/**
9095 * @callback_method_impl{FNDISGETSYMBOL}
9096 */
9097static DECLCALLBACK(int) iemNativeDisasmGetSymbolCb(PCDISSTATE pDis, uint32_t u32Sel, RTUINTPTR uAddress,
9098 char *pszBuf, size_t cchBuf, RTINTPTR *poff, void *pvUser)
9099{
9100 const char * const pszSym = iemNativeDisasmGetSymbol((PIEMNATIVDISASMSYMCTX)pvUser, uAddress, pszBuf, cchBuf);
9101 if (pszSym)
9102 {
9103 *poff = 0;
9104 if (pszSym != pszBuf)
9105 return RTStrCopy(pszBuf, cchBuf, pszSym);
9106 return VINF_SUCCESS;
9107 }
9108 RT_NOREF(pDis, u32Sel);
9109 return VERR_SYMBOL_NOT_FOUND;
9110}
9111
9112#else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9113
9114/**
9115 * Annotates an instruction decoded by the capstone disassembler.
9116 */
9117static const char *
9118iemNativeDisasmAnnotateCapstone(PIEMNATIVDISASMSYMCTX pSymCtx, cs_insn const *pInstr, char *pszBuf, size_t cchBuf)
9119{
9120# if defined(RT_ARCH_ARM64)
9121 if ( (pInstr->id >= ARM64_INS_LD1 && pInstr->id < ARM64_INS_LSL)
9122 || (pInstr->id >= ARM64_INS_ST1 && pInstr->id < ARM64_INS_SUB))
9123 {
9124 /* This is bit crappy, but the disassembler provides incomplete addressing details. */
9125 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == 28 && IEMNATIVE_REG_FIXED_PCPUMCTX == 27);
9126 char const *psz = strchr(pInstr->op_str, '[');
9127 if (psz && psz[1] == 'x' && psz[2] == '2' && (psz[3] == '7' || psz[3] == '8'))
9128 {
9129 uint32_t const offVCpu = psz[3] == '8'? 0 : RT_UOFFSETOF(VMCPU, cpum.GstCtx);
9130 int32_t off = -1;
9131 psz += 4;
9132 if (*psz == ']')
9133 off = 0;
9134 else if (*psz == ',')
9135 {
9136 psz = RTStrStripL(psz + 1);
9137 if (*psz == '#')
9138 off = RTStrToInt32(&psz[1]);
9139 /** @todo deal with index registers and LSL as well... */
9140 }
9141 if (off >= 0)
9142 return iemNativeDbgVCpuOffsetToName(offVCpu + (uint32_t)off);
9143 }
9144 }
9145 else if (pInstr->id == ARM64_INS_B || pInstr->id == ARM64_INS_BL)
9146 {
9147 const char *pszAddr = strchr(pInstr->op_str, '#');
9148 if (pszAddr)
9149 {
9150 uint64_t uAddr = RTStrToUInt64(pszAddr + 1);
9151 if (uAddr != 0)
9152 return iemNativeDisasmGetSymbol(pSymCtx, uAddr, pszBuf, cchBuf);
9153 }
9154 }
9155# endif
9156 RT_NOREF(pSymCtx, pInstr, pszBuf, cchBuf);
9157 return NULL;
9158}
9159#endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9160
9161
9162DECLHIDDEN(void) iemNativeDisassembleTb(PVMCPU pVCpu, PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
9163{
9164 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
9165#if defined(RT_ARCH_AMD64)
9166 static const char * const a_apszMarkers[] =
9167 {
9168 /*[0]=*/ "unknown0", "CheckCsLim", "ConsiderLimChecking", "CheckOpcodes",
9169 /*[4]=*/ "PcAfterBranch", "LoadTlbForNewPage", "LoadTlbAfterBranch"
9170 };
9171#endif
9172
9173 char szDisBuf[512];
9174 DISSTATE Dis;
9175 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
9176 uint32_t const cNative = pTb->Native.cInstructions;
9177 uint32_t offNative = 0;
9178#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9179 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
9180#endif
9181 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
9182 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
9183 : DISCPUMODE_64BIT;
9184#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
9185# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9186 IEMNATIVDISASMSYMCTX SymCtx = { pVCpu, pTb, iemExecMemGetTbChunkCtx(pVCpu, pTb), pDbgInfo };
9187# else
9188 IEMNATIVDISASMSYMCTX SymCtx = { pVCpu, pTb, iemExecMemGetTbChunkCtx(pVCpu, pTb) };
9189# endif
9190#elif defined(IEMNATIVE_WITH_TB_DEBUG_INFO)
9191 IEMNATIVDISASMSYMCTX SymCtx = { pVCpu, pTb, pDbgInfo };
9192#else
9193 IEMNATIVDISASMSYMCTX SymCtx = { pVCpu, pTb };
9194#endif
9195#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
9196 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
9197#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
9198 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
9199#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
9200# error "Port me"
9201#else
9202 csh hDisasm = ~(size_t)0;
9203# if defined(RT_ARCH_AMD64)
9204 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
9205# elif defined(RT_ARCH_ARM64)
9206 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
9207# else
9208# error "Port me"
9209# endif
9210 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
9211
9212 //rcCs = cs_option(hDisasm, CS_OPT_DETAIL, CS_OPT_ON); - not needed as pInstr->detail doesn't provide full memory detail.
9213 //Assert(rcCs == CS_ERR_OK);
9214#endif
9215
9216 /*
9217 * Print TB info.
9218 */
9219 pHlp->pfnPrintf(pHlp,
9220 "pTb=%p: GCPhysPc=%RGp (%%%RGv) cInstructions=%u LB %#x cRanges=%u\n"
9221 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
9222 pTb, pTb->GCPhysPc,
9223#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9224 pTb->pDbgInfo ? pTb->pDbgInfo->FlatPc : RTGCPTR_MAX,
9225#else
9226 pTb->FlatPc,
9227#endif
9228 pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
9229 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
9230#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9231 if (pDbgInfo && pDbgInfo->cEntries > 1)
9232 {
9233 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
9234
9235 /*
9236 * This disassembly is driven by the debug info which follows the native
9237 * code and indicates when it starts with the next guest instructions,
9238 * where labels are and such things.
9239 */
9240 uint32_t idxThreadedCall = 0;
9241 uint32_t idxGuestInstr = 0;
9242 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
9243 uint8_t idxRange = UINT8_MAX;
9244 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
9245 uint32_t offRange = 0;
9246 uint32_t offOpcodes = 0;
9247 uint32_t const cbOpcodes = pTb->cbOpcodes;
9248 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
9249 uint32_t const cDbgEntries = pDbgInfo->cEntries;
9250 uint32_t iDbgEntry = 1;
9251 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
9252
9253 while (offNative < cNative)
9254 {
9255 /* If we're at or have passed the point where the next chunk of debug
9256 info starts, process it. */
9257 if (offDbgNativeNext <= offNative)
9258 {
9259 offDbgNativeNext = UINT32_MAX;
9260 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
9261 {
9262 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
9263 {
9264 case kIemTbDbgEntryType_GuestInstruction:
9265 {
9266 /* Did the exec flag change? */
9267 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
9268 {
9269 pHlp->pfnPrintf(pHlp,
9270 " fExec change %#08x -> %#08x %s\n",
9271 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
9272 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
9273 szDisBuf, sizeof(szDisBuf)));
9274 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
9275 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
9276 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
9277 : DISCPUMODE_64BIT;
9278 }
9279
9280 /* New opcode range? We need to fend up a spurious debug info entry here for cases
9281 where the compilation was aborted before the opcode was recorded and the actual
9282 instruction was translated to a threaded call. This may happen when we run out
9283 of ranges, or when some complicated interrupts/FFs are found to be pending or
9284 similar. So, we just deal with it here rather than in the compiler code as it
9285 is a lot simpler to do here. */
9286 if ( idxRange == UINT8_MAX
9287 || idxRange >= cRanges
9288 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
9289 {
9290 idxRange += 1;
9291 if (idxRange < cRanges)
9292 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
9293 else
9294 continue;
9295 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes + offRange);
9296 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
9297 + (pTb->aRanges[idxRange].idxPhysPage == 0
9298 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
9299 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
9300 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
9301 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
9302 pTb->aRanges[idxRange].idxPhysPage);
9303 GCPhysPc += offRange;
9304 }
9305
9306 /* Disassemble the instruction. */
9307 //uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
9308 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
9309 uint32_t cbInstr = 1;
9310 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
9311 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
9312 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
9313 if (RT_SUCCESS(rc))
9314 {
9315 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9316 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9317 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9318 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9319
9320 static unsigned const s_offMarker = 55;
9321 static char const s_szMarker[] = " ; <--- guest";
9322 if (cch < s_offMarker)
9323 {
9324 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
9325 cch = s_offMarker;
9326 }
9327 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
9328 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
9329
9330 pHlp->pfnPrintf(pHlp, "\n %%%%%RGp: %s #%u\n", GCPhysPc, szDisBuf, idxGuestInstr);
9331 }
9332 else
9333 {
9334 pHlp->pfnPrintf(pHlp, "\n %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
9335 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
9336 cbInstr = 1;
9337 }
9338 idxGuestInstr++;
9339 GCPhysPc += cbInstr;
9340 offOpcodes += cbInstr;
9341 offRange += cbInstr;
9342 continue;
9343 }
9344
9345 case kIemTbDbgEntryType_ThreadedCall:
9346 pHlp->pfnPrintf(pHlp,
9347 " Call #%u to %s (%u args) - %s\n",
9348 idxThreadedCall,
9349 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
9350 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
9351 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? "recompiled" : "todo");
9352 idxThreadedCall++;
9353 continue;
9354
9355 case kIemTbDbgEntryType_GuestRegShadowing:
9356 {
9357 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
9358 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
9359 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
9360 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
9361 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
9362 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
9363 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s \n", pszGstReg,
9364 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
9365 else
9366 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
9367 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
9368 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
9369 continue;
9370 }
9371
9372#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
9373 case kIemTbDbgEntryType_GuestSimdRegShadowing:
9374 {
9375 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
9376 const char * const pszGstReg = g_aGstSimdShadowInfo[pEntry->GuestSimdRegShadowing.idxGstSimdReg].pszName;
9377 if (pEntry->GuestSimdRegShadowing.idxHstSimdReg == UINT8_MAX)
9378 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s != host SIMD register %s\n", pszGstReg,
9379 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
9380 else if (pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev == UINT8_MAX)
9381 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s\n", pszGstReg,
9382 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg]);
9383 else
9384 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s (previously in %s)\n", pszGstReg,
9385 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg],
9386 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
9387 continue;
9388 }
9389#endif
9390
9391 case kIemTbDbgEntryType_Label:
9392 {
9393 const char *pszName = iemNativeGetLabelName((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel);
9394 if (pDbgInfo->aEntries[iDbgEntry].Label.enmLabel >= kIemNativeLabelType_FirstWithMultipleInstances)
9395 {
9396 const char *pszComment = pDbgInfo->aEntries[iDbgEntry].Label.enmLabel == kIemNativeLabelType_Else
9397 ? " ; regs state restored pre-if-block" : "";
9398 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
9399 }
9400 else
9401 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
9402 continue;
9403 }
9404
9405 case kIemTbDbgEntryType_NativeOffset:
9406 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
9407 Assert(offDbgNativeNext >= offNative);
9408 break;
9409
9410#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
9411 case kIemTbDbgEntryType_DelayedPcUpdate:
9412 pHlp->pfnPrintf(pHlp, " Updating guest PC value by %u (cInstrSkipped=%u)\n",
9413 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.offPc,
9414 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.cInstrSkipped);
9415 continue;
9416#endif
9417
9418#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
9419 case kIemTbDbgEntryType_GuestRegDirty:
9420 {
9421 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
9422 const char * const pszGstReg = pEntry->GuestRegDirty.fSimdReg
9423 ? g_aGstSimdShadowInfo[pEntry->GuestRegDirty.idxGstReg].pszName
9424 : g_aGstShadowInfo[pEntry->GuestRegDirty.idxGstReg].pszName;
9425 const char * const pszHstReg = pEntry->GuestRegDirty.fSimdReg
9426 ? g_apszIemNativeHstSimdRegNames[pEntry->GuestRegDirty.idxHstReg]
9427 : g_apszIemNativeHstRegNames[pEntry->GuestRegDirty.idxHstReg];
9428 pHlp->pfnPrintf(pHlp, " Guest register %s (shadowed by %s) is now marked dirty (intent)\n",
9429 pszGstReg, pszHstReg);
9430 continue;
9431 }
9432
9433 case kIemTbDbgEntryType_GuestRegWriteback:
9434 pHlp->pfnPrintf(pHlp, " Writing dirty %s registers (gst %#RX32)\n",
9435 pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.fSimdReg ? "SIMD" : "general",
9436 (uint64_t)pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.fGstReg
9437 << (pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.cShift * 25));
9438 continue;
9439#endif
9440
9441 default:
9442 AssertFailed();
9443 }
9444 iDbgEntry++;
9445 break;
9446 }
9447 }
9448
9449 /*
9450 * Disassemble the next native instruction.
9451 */
9452 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
9453# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9454 uint32_t cbInstr = sizeof(paNative[0]);
9455 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
9456 if (RT_SUCCESS(rc))
9457 {
9458# if defined(RT_ARCH_AMD64)
9459 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
9460 {
9461 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
9462 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
9463 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args) - %s\n",
9464 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
9465 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
9466 uInfo & 0x8000 ? "recompiled" : "todo");
9467 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
9468 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
9469 else
9470 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
9471 }
9472 else
9473# endif
9474 {
9475 const char *pszAnnotation = NULL;
9476# ifdef RT_ARCH_AMD64
9477 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9478 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9479 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9480 iemNativeDisasmGetSymbolCb, &SymCtx);
9481 PCDISOPPARAM pMemOp;
9482 if (DISUSE_IS_EFFECTIVE_ADDR(Dis.aParams[0].fUse))
9483 pMemOp = &Dis.aParams[0];
9484 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.aParams[1].fUse))
9485 pMemOp = &Dis.aParams[1];
9486 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.aParams[2].fUse))
9487 pMemOp = &Dis.aParams[2];
9488 else
9489 pMemOp = NULL;
9490 if ( pMemOp
9491 && pMemOp->x86.Base.idxGenReg == IEMNATIVE_REG_FIXED_PVMCPU
9492 && (pMemOp->fUse & (DISUSE_BASE | DISUSE_REG_GEN64)) == (DISUSE_BASE | DISUSE_REG_GEN64))
9493 pszAnnotation = iemNativeDbgVCpuOffsetToName(pMemOp->fUse & DISUSE_DISPLACEMENT32
9494 ? pMemOp->x86.uDisp.u32 : pMemOp->x86.uDisp.u8);
9495
9496# elif defined(RT_ARCH_ARM64)
9497 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
9498 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9499 iemNativeDisasmGetSymbolCb, &SymCtx);
9500# else
9501# error "Port me"
9502# endif
9503 if (pszAnnotation)
9504 {
9505 static unsigned const s_offAnnotation = 55;
9506 size_t const cchAnnotation = strlen(pszAnnotation);
9507 size_t cchDis = strlen(szDisBuf);
9508 if (RT_MAX(cchDis, s_offAnnotation) + sizeof(" ; ") + cchAnnotation <= sizeof(szDisBuf))
9509 {
9510 if (cchDis < s_offAnnotation)
9511 {
9512 memset(&szDisBuf[cchDis], ' ', s_offAnnotation - cchDis);
9513 cchDis = s_offAnnotation;
9514 }
9515 szDisBuf[cchDis++] = ' ';
9516 szDisBuf[cchDis++] = ';';
9517 szDisBuf[cchDis++] = ' ';
9518 memcpy(&szDisBuf[cchDis], pszAnnotation, cchAnnotation + 1);
9519 }
9520 }
9521 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
9522 }
9523 }
9524 else
9525 {
9526# if defined(RT_ARCH_AMD64)
9527 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
9528 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
9529# elif defined(RT_ARCH_ARM64)
9530 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
9531# else
9532# error "Port me"
9533# endif
9534 cbInstr = sizeof(paNative[0]);
9535 }
9536 offNative += cbInstr / sizeof(paNative[0]);
9537
9538# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9539 cs_insn *pInstr;
9540 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
9541 (uintptr_t)pNativeCur, 1, &pInstr);
9542 if (cInstrs > 0)
9543 {
9544 Assert(cInstrs == 1);
9545 const char * const pszAnnotation = iemNativeDisasmAnnotateCapstone(&SymCtx, pInstr, szDisBuf, sizeof(szDisBuf));
9546 size_t const cchOp = strlen(pInstr->op_str);
9547# if defined(RT_ARCH_AMD64)
9548 if (pszAnnotation)
9549 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s%*s ; %s\n",
9550 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str,
9551 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9552 else
9553 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
9554 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
9555
9556# else
9557 if (pszAnnotation)
9558 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s%*s ; %s\n",
9559 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str,
9560 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9561 else
9562 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
9563 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
9564# endif
9565 offNative += pInstr->size / sizeof(*pNativeCur);
9566 cs_free(pInstr, cInstrs);
9567 }
9568 else
9569 {
9570# if defined(RT_ARCH_AMD64)
9571 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
9572 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
9573# else
9574 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
9575# endif
9576 offNative++;
9577 }
9578# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9579 }
9580 }
9581 else
9582#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
9583 {
9584 /*
9585 * No debug info, just disassemble the x86 code and then the native code.
9586 *
9587 * First the guest code:
9588 */
9589 for (unsigned i = 0; i < pTb->cRanges; i++)
9590 {
9591 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
9592 + (pTb->aRanges[i].idxPhysPage == 0
9593 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
9594 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
9595 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
9596 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
9597 unsigned off = pTb->aRanges[i].offOpcodes;
9598 /** @todo this ain't working when crossing pages! */
9599 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
9600 while (off < cbOpcodes)
9601 {
9602 uint32_t cbInstr = 1;
9603 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
9604 &pTb->pabOpcodes[off], cbOpcodes - off,
9605 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
9606 if (RT_SUCCESS(rc))
9607 {
9608 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9609 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9610 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9611 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9612 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
9613 GCPhysPc += cbInstr;
9614 off += cbInstr;
9615 }
9616 else
9617 {
9618 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - disassembly failure %Rrc\n",
9619 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
9620 break;
9621 }
9622 }
9623 }
9624
9625 /*
9626 * Then the native code:
9627 */
9628 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
9629 while (offNative < cNative)
9630 {
9631 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
9632# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9633 uint32_t cbInstr = sizeof(paNative[0]);
9634 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
9635 if (RT_SUCCESS(rc))
9636 {
9637# if defined(RT_ARCH_AMD64)
9638 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
9639 {
9640 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
9641 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
9642 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args) - %s\n",
9643 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
9644 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
9645 uInfo & 0x8000 ? "recompiled" : "todo");
9646 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
9647 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
9648 else
9649 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
9650 }
9651 else
9652# endif
9653 {
9654# ifdef RT_ARCH_AMD64
9655 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9656 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9657 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9658 iemNativeDisasmGetSymbolCb, &SymCtx);
9659# elif defined(RT_ARCH_ARM64)
9660 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
9661 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9662 iemNativeDisasmGetSymbolCb, &SymCtx);
9663# else
9664# error "Port me"
9665# endif
9666 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
9667 }
9668 }
9669 else
9670 {
9671# if defined(RT_ARCH_AMD64)
9672 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
9673 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
9674# else
9675 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
9676# endif
9677 cbInstr = sizeof(paNative[0]);
9678 }
9679 offNative += cbInstr / sizeof(paNative[0]);
9680
9681# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9682 cs_insn *pInstr;
9683 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
9684 (uintptr_t)pNativeCur, 1, &pInstr);
9685 if (cInstrs > 0)
9686 {
9687 Assert(cInstrs == 1);
9688 const char * const pszAnnotation = iemNativeDisasmAnnotateCapstone(&SymCtx, pInstr, szDisBuf, sizeof(szDisBuf));
9689 size_t const cchOp = strlen(pInstr->op_str);
9690# if defined(RT_ARCH_AMD64)
9691 if (pszAnnotation)
9692 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s%*s ; %s\n",
9693 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str,
9694 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9695 else
9696 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
9697 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
9698
9699# else
9700 if (pszAnnotation)
9701 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s%*s ; %s\n",
9702 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str,
9703 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9704 else
9705 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
9706 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
9707# endif
9708 offNative += pInstr->size / sizeof(*pNativeCur);
9709 cs_free(pInstr, cInstrs);
9710 }
9711 else
9712 {
9713# if defined(RT_ARCH_AMD64)
9714 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
9715 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
9716# else
9717 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
9718# endif
9719 offNative++;
9720 }
9721# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9722 }
9723 }
9724
9725#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9726 /* Cleanup. */
9727 cs_close(&hDisasm);
9728#endif
9729}
9730
9731
9732#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
9733
9734/** Emit alignment padding between labels / functions. */
9735DECL_INLINE_THROW(uint32_t)
9736iemNativeRecompileEmitAlignmentPadding(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fAlignMask)
9737{
9738 if (off & fAlignMask)
9739 {
9740 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, fAlignMask + 1);
9741 while (off & fAlignMask)
9742# if defined(RT_ARCH_AMD64)
9743 pCodeBuf[off++] = 0xcc;
9744# elif defined(RT_ARCH_ARM64)
9745 pCodeBuf[off++] = Armv8A64MkInstrBrk(0xcccc);
9746# else
9747# error "port me"
9748# endif
9749 }
9750 return off;
9751}
9752
9753
9754/**
9755 * Called when a new chunk is allocate to emit common per-chunk code.
9756 *
9757 * Allocates a per-chunk context directly from the chunk itself and place the
9758 * common code there.
9759 *
9760 * @returns Pointer to the chunk context start.
9761 * @param pVCpu The cross context virtual CPU structure of the calling
9762 * thread.
9763 * @param idxChunk The index of the chunk being added and requiring a
9764 * common code context.
9765 */
9766DECLHIDDEN(PCIEMNATIVEPERCHUNKCTX) iemNativeRecompileAttachExecMemChunkCtx(PVMCPU pVCpu, uint32_t idxChunk)
9767{
9768 /*
9769 * Allocate a new recompiler state (since we're likely to be called while
9770 * the default one is fully loaded already with a recompiled TB).
9771 *
9772 * This is a bit of overkill, but this isn't a frequently used code path.
9773 */
9774 PIEMRECOMPILERSTATE pReNative = iemNativeInit(pVCpu, NULL);
9775 AssertReturn(pReNative, NULL);
9776
9777# if defined(RT_ARCH_AMD64)
9778 uint32_t const fAlignMask = 15;
9779# elif defined(RT_ARCH_ARM64)
9780 uint32_t const fAlignMask = 31 / 4;
9781# else
9782# error "port me"
9783# endif
9784 uint32_t aoffLabels[kIemNativeLabelType_LastTbExit + 1] = {0};
9785 int rc = VINF_SUCCESS;
9786 uint32_t off = 0;
9787
9788 IEMNATIVE_TRY_SETJMP(pReNative, rc)
9789 {
9790 /*
9791 * Emit the epilog code.
9792 */
9793 aoffLabels[kIemNativeLabelType_ReturnSuccess] = off;
9794 off = iemNativeEmitGprZero(pReNative, off, IEMNATIVE_CALL_RET_GREG);
9795 uint32_t const offReturnWithStatus = off;
9796 off = iemNativeEmitCoreEpilog(pReNative, off);
9797
9798 /*
9799 * Generate special jump labels. All of these gets a copy of the epilog code.
9800 */
9801 static struct
9802 {
9803 IEMNATIVELABELTYPE enmExitReason;
9804 uint32_t (*pfnEmitCore)(PIEMRECOMPILERSTATE pReNative, uint32_t off);
9805 } const s_aSpecialWithEpilogs[] =
9806 {
9807 { kIemNativeLabelType_NonZeroRetOrPassUp, iemNativeEmitCoreRcFiddling },
9808 { kIemNativeLabelType_ReturnBreak, iemNativeEmitCoreReturnBreak },
9809 { kIemNativeLabelType_ReturnBreakFF, iemNativeEmitCoreReturnBreakFF },
9810 { kIemNativeLabelType_ReturnWithFlags, iemNativeEmitCoreReturnWithFlags },
9811 };
9812 for (uint32_t i = 0; i < RT_ELEMENTS(s_aSpecialWithEpilogs); i++)
9813 {
9814 off = iemNativeRecompileEmitAlignmentPadding(pReNative, off, fAlignMask);
9815 Assert(aoffLabels[s_aSpecialWithEpilogs[i].enmExitReason] == 0);
9816 aoffLabels[s_aSpecialWithEpilogs[i].enmExitReason] = off;
9817 off = s_aSpecialWithEpilogs[i].pfnEmitCore(pReNative, off);
9818 off = iemNativeEmitCoreEpilog(pReNative, off);
9819 }
9820
9821 /*
9822 * Do what iemNativeEmitReturnBreakViaLookup does.
9823 */
9824 static struct
9825 {
9826 IEMNATIVELABELTYPE enmExitReason;
9827 uintptr_t pfnHelper;
9828 } const s_aViaLookup[] =
9829 {
9830 { kIemNativeLabelType_ReturnBreakViaLookup,
9831 (uintptr_t)iemNativeHlpReturnBreakViaLookup<false /*a_fWithIrqCheck*/> },
9832 { kIemNativeLabelType_ReturnBreakViaLookupWithIrq,
9833 (uintptr_t)iemNativeHlpReturnBreakViaLookup<true /*a_fWithIrqCheck*/> },
9834 { kIemNativeLabelType_ReturnBreakViaLookupWithTlb,
9835 (uintptr_t)iemNativeHlpReturnBreakViaLookupWithTlb<false /*a_fWithIrqCheck*/> },
9836 { kIemNativeLabelType_ReturnBreakViaLookupWithTlbAndIrq,
9837 (uintptr_t)iemNativeHlpReturnBreakViaLookupWithTlb<true /*a_fWithIrqCheck*/> },
9838 };
9839 uint32_t const offReturnBreak = aoffLabels[kIemNativeLabelType_ReturnBreak]; Assert(offReturnBreak != 0);
9840 for (uint32_t i = 0; i < RT_ELEMENTS(s_aViaLookup); i++)
9841 {
9842 off = iemNativeRecompileEmitAlignmentPadding(pReNative, off, fAlignMask);
9843 Assert(aoffLabels[s_aViaLookup[i].enmExitReason] == 0);
9844 aoffLabels[s_aViaLookup[i].enmExitReason] = off;
9845 off = iemNativeEmitCoreViaLookupDoOne(pReNative, off, offReturnBreak, s_aViaLookup[i].pfnHelper);
9846 }
9847
9848 /*
9849 * Generate simple TB tail labels that just calls a help with a pVCpu
9850 * arg and either return or longjmps/throws a non-zero status.
9851 */
9852 typedef IEM_DECL_NATIVE_HLP_PTR(int, PFNIEMNATIVESIMPLETAILLABELCALL,(PVMCPUCC pVCpu));
9853 static struct
9854 {
9855 IEMNATIVELABELTYPE enmExitReason;
9856 bool fWithEpilog;
9857 PFNIEMNATIVESIMPLETAILLABELCALL pfnCallback;
9858 } const s_aSimpleTailLabels[] =
9859 {
9860 { kIemNativeLabelType_RaiseDe, false, iemNativeHlpExecRaiseDe },
9861 { kIemNativeLabelType_RaiseUd, false, iemNativeHlpExecRaiseUd },
9862 { kIemNativeLabelType_RaiseSseRelated, false, iemNativeHlpExecRaiseSseRelated },
9863 { kIemNativeLabelType_RaiseAvxRelated, false, iemNativeHlpExecRaiseAvxRelated },
9864 { kIemNativeLabelType_RaiseSseAvxFpRelated, false, iemNativeHlpExecRaiseSseAvxFpRelated },
9865 { kIemNativeLabelType_RaiseNm, false, iemNativeHlpExecRaiseNm },
9866 { kIemNativeLabelType_RaiseGp0, false, iemNativeHlpExecRaiseGp0 },
9867 { kIemNativeLabelType_RaiseMf, false, iemNativeHlpExecRaiseMf },
9868 { kIemNativeLabelType_RaiseXf, false, iemNativeHlpExecRaiseXf },
9869 { kIemNativeLabelType_ObsoleteTb, true, iemNativeHlpObsoleteTb },
9870 { kIemNativeLabelType_NeedCsLimChecking, true, iemNativeHlpNeedCsLimChecking },
9871 { kIemNativeLabelType_CheckBranchMiss, true, iemNativeHlpCheckBranchMiss },
9872 };
9873 for (uint32_t i = 0; i < RT_ELEMENTS(s_aSimpleTailLabels); i++)
9874 {
9875 off = iemNativeRecompileEmitAlignmentPadding(pReNative, off, fAlignMask);
9876 Assert(!aoffLabels[s_aSimpleTailLabels[i].enmExitReason]);
9877 aoffLabels[s_aSimpleTailLabels[i].enmExitReason] = off;
9878
9879 /* int pfnCallback(PVMCPUCC pVCpu) */
9880 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9881 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)s_aSimpleTailLabels[i].pfnCallback);
9882
9883 /* If the callback is supposed to return with a status code we inline the epilog
9884 sequence for better speed. Otherwise, if the callback shouldn't return because
9885 it throws/longjmps, we just jump to the return sequence to be on the safe side. */
9886 if (s_aSimpleTailLabels[i].fWithEpilog)
9887 off = iemNativeEmitCoreEpilog(pReNative, off);
9888 else
9889 {
9890# ifdef VBOX_STRICT
9891 off = iemNativeEmitBrk(pReNative, off, 0x2201);
9892# endif
9893 off = iemNativeEmitJmpToFixed(pReNative, off, offReturnWithStatus);
9894 }
9895 }
9896
9897
9898# ifdef VBOX_STRICT
9899 /* Make sure we've generate code for all labels. */
9900 for (uint32_t i = kIemNativeLabelType_Invalid + 1; i < RT_ELEMENTS(aoffLabels); i++)
9901 Assert(aoffLabels[i] != 0 || i == kIemNativeLabelType_ReturnSuccess);
9902# endif
9903 }
9904 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
9905 {
9906 Log(("iemNativeRecompileAttachExecMemChunkCtx: Caught %Rrc while recompiling!\n", rc));
9907 iemNativeTerm(pReNative);
9908 return NULL;
9909 }
9910 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
9911
9912 /*
9913 * Allocate memory for the context (first) and the common code (last).
9914 */
9915 PIEMNATIVEPERCHUNKCTX pCtx;
9916 uint32_t const cbCtx = RT_ALIGN_32(sizeof(*pCtx), 64);
9917 uint32_t const cbCode = off * sizeof(IEMNATIVEINSTR);
9918 PIEMNATIVEINSTR paFinalCommonCodeRx = NULL;
9919 pCtx = (PIEMNATIVEPERCHUNKCTX)iemExecMemAllocatorAllocFromChunk(pVCpu, idxChunk, cbCtx + cbCode, &paFinalCommonCodeRx);
9920 AssertLogRelMsgReturn(pCtx, ("cbCtx=%#x cbCode=%#x idxChunk=%#x\n", cbCtx, cbCode, idxChunk), NULL);
9921
9922 /*
9923 * Copy over the generated code.
9924 * There should be no fixups or labels defined here.
9925 */
9926 paFinalCommonCodeRx = (PIEMNATIVEINSTR)((uintptr_t)paFinalCommonCodeRx + cbCtx);
9927 memcpy((PIEMNATIVEINSTR)((uintptr_t)pCtx + cbCtx), pReNative->pInstrBuf, cbCode);
9928
9929 Assert(pReNative->cFixups == 0);
9930 Assert(pReNative->cLabels == 0);
9931
9932 /*
9933 * Initialize the context.
9934 */
9935 AssertCompile(kIemNativeLabelType_Invalid == 0);
9936 AssertCompile(RT_ELEMENTS(pCtx->apExitLabels) == RT_ELEMENTS(aoffLabels));
9937 pCtx->apExitLabels[kIemNativeLabelType_Invalid] = 0;
9938 for (uint32_t i = kIemNativeLabelType_Invalid + 1; i < RT_ELEMENTS(pCtx->apExitLabels); i++)
9939 {
9940 Assert(aoffLabels[i] != 0 || i == kIemNativeLabelType_ReturnSuccess);
9941 pCtx->apExitLabels[i] = &paFinalCommonCodeRx[aoffLabels[i]];
9942 Log10((" apExitLabels[%u]=%p %s\n", i, pCtx->apExitLabels[i], iemNativeGetLabelName((IEMNATIVELABELTYPE)i, true)));
9943 }
9944
9945 iemExecMemAllocatorReadyForUse(pVCpu, pCtx, cbCtx + cbCode);
9946
9947 iemNativeTerm(pReNative);
9948 return pCtx;
9949}
9950
9951#endif /* IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE */
9952
9953/**
9954 * Recompiles the given threaded TB into a native one.
9955 *
9956 * In case of failure the translation block will be returned as-is.
9957 *
9958 * @returns pTb.
9959 * @param pVCpu The cross context virtual CPU structure of the calling
9960 * thread.
9961 * @param pTb The threaded translation to recompile to native.
9962 */
9963DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
9964{
9965#if 0 /* For profiling the native recompiler code. */
9966l_profile_again:
9967#endif
9968 STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
9969
9970 /*
9971 * The first time thru, we allocate the recompiler state and save it,
9972 * all the other times we'll just reuse the saved one after a quick reset.
9973 */
9974 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
9975 if (RT_LIKELY(pReNative))
9976 iemNativeReInit(pReNative, pTb);
9977 else
9978 {
9979 pReNative = iemNativeInit(pVCpu, pTb);
9980 AssertReturn(pReNative, pTb);
9981 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative; /* save it */
9982 }
9983
9984#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
9985 /*
9986 * First do liveness analysis. This is done backwards.
9987 */
9988 {
9989 uint32_t idxCall = pTb->Thrd.cCalls;
9990 if (idxCall <= pReNative->cLivenessEntriesAlloc)
9991 { /* likely */ }
9992 else
9993 {
9994 uint32_t cAlloc = RT_MAX(pReNative->cLivenessEntriesAlloc, _4K);
9995 while (idxCall > cAlloc)
9996 cAlloc *= 2;
9997 void *pvNew = RTMemRealloc(pReNative->paLivenessEntries, sizeof(pReNative->paLivenessEntries[0]) * cAlloc);
9998 AssertReturn(pvNew, pTb);
9999 pReNative->paLivenessEntries = (PIEMLIVENESSENTRY)pvNew;
10000 pReNative->cLivenessEntriesAlloc = cAlloc;
10001 }
10002 AssertReturn(idxCall > 0, pTb);
10003 PIEMLIVENESSENTRY const paLivenessEntries = pReNative->paLivenessEntries;
10004
10005 /* The initial (final) entry. */
10006 idxCall--;
10007 IEM_LIVENESS_RAW_INIT_AS_UNUSED(&paLivenessEntries[idxCall]);
10008
10009 /* Loop backwards thru the calls and fill in the other entries. */
10010 PCIEMTHRDEDCALLENTRY pCallEntry = &pTb->Thrd.paCalls[idxCall];
10011 while (idxCall > 0)
10012 {
10013 PFNIEMNATIVELIVENESSFUNC const pfnLiveness = g_apfnIemNativeLivenessFunctions[pCallEntry->enmFunction];
10014 if (pfnLiveness)
10015 pfnLiveness(pCallEntry, &paLivenessEntries[idxCall], &paLivenessEntries[idxCall - 1]);
10016 else
10017 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(&paLivenessEntries[idxCall - 1], &paLivenessEntries[idxCall]);
10018 pCallEntry--;
10019 idxCall--;
10020 }
10021
10022# ifdef VBOX_WITH_STATISTICS
10023 /* Check if there are any EFLAGS optimization to be had here. This requires someone settings them
10024 to 'clobbered' rather that 'input'. */
10025 /** @todo */
10026# endif
10027 }
10028#endif
10029
10030 /*
10031 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
10032 * for aborting if an error happens.
10033 */
10034 uint32_t cCallsLeft = pTb->Thrd.cCalls;
10035#ifdef LOG_ENABLED
10036 uint32_t const cCallsOrg = cCallsLeft;
10037#endif
10038 uint32_t off = 0;
10039 int rc = VINF_SUCCESS;
10040 IEMNATIVE_TRY_SETJMP(pReNative, rc)
10041 {
10042#ifndef IEMNATIVE_WITH_RECOMPILER_PROLOGUE_SINGLETON
10043 /*
10044 * Emit prolog code (fixed).
10045 */
10046 off = iemNativeEmitProlog(pReNative, off);
10047#endif
10048
10049 /*
10050 * Convert the calls to native code.
10051 */
10052#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
10053 int32_t iGstInstr = -1;
10054#endif
10055#ifndef VBOX_WITHOUT_RELEASE_STATISTICS
10056 uint32_t cThreadedCalls = 0;
10057 uint32_t cRecompiledCalls = 0;
10058#endif
10059#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(IEM_WITH_INTRA_TB_JUMPS) || defined(VBOX_STRICT) || defined(LOG_ENABLED) || defined(VBOX_WITH_STATISTICS) || defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING)
10060 uint32_t idxCurCall = 0;
10061#endif
10062 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
10063 pReNative->fExec = pTb->fFlags & IEMTB_F_IEM_F_MASK;
10064 while (cCallsLeft-- > 0)
10065 {
10066 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
10067#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_WITH_STATISTICS) || defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING)
10068 pReNative->idxCurCall = idxCurCall;
10069#endif
10070
10071#ifdef IEM_WITH_INTRA_TB_JUMPS
10072 /*
10073 * Define label for jump targets (currently only the first entry).
10074 */
10075 if (!(pCallEntry->fFlags & IEMTHREADEDCALLENTRY_F_JUMP_TARGET))
10076 { /* likely */ }
10077 else
10078 {
10079 iemNativeLabelCreate(pReNative, kIemNativeLabelType_LoopJumpTarget, off);
10080 Assert(idxCurCall == 0); /** @todo when jumping elsewhere, we have to save the register state. */
10081 }
10082#endif
10083
10084 /*
10085 * Debug info, assembly markup and statistics.
10086 */
10087#if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || !defined(IEMNATIVE_WITH_BLTIN_CHECKMODE)
10088 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
10089 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
10090#endif
10091#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
10092 iemNativeDbgInfoAddNativeOffset(pReNative, off);
10093 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
10094 {
10095 if (iGstInstr < (int32_t)pTb->cInstructions)
10096 iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
10097 else
10098 Assert(iGstInstr == pTb->cInstructions);
10099 iGstInstr = pCallEntry->idxInstr;
10100 }
10101 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
10102#endif
10103#if defined(VBOX_STRICT)
10104 off = iemNativeEmitMarker(pReNative, off,
10105 RT_MAKE_U32(idxCurCall | (pfnRecom ? 0x8000 : 0), pCallEntry->enmFunction));
10106#endif
10107#if defined(VBOX_STRICT)
10108 iemNativeRegAssertSanity(pReNative);
10109#endif
10110#ifdef VBOX_WITH_STATISTICS
10111 off = iemNativeEmitThreadCallStats(pReNative, off, pCallEntry);
10112#endif
10113
10114#if 0
10115 if ( pTb->GCPhysPc == 0x00000000000c1240
10116 && idxCurCall == 67)
10117 off = iemNativeEmitBrk(pReNative, off, 0xf000);
10118#endif
10119
10120 /*
10121 * Actual work.
10122 */
10123 Log2(("%u[%u]: %s%s (off=%#x)\n", idxCurCall, pCallEntry->idxInstr,
10124 g_apszIemThreadedFunctions[pCallEntry->enmFunction], pfnRecom ? "(recompiled)" : "(todo)", off));
10125 if (pfnRecom) /** @todo stats on this. */
10126 {
10127 off = pfnRecom(pReNative, off, pCallEntry);
10128 STAM_REL_STATS({cRecompiledCalls++;});
10129 }
10130 else
10131 {
10132 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
10133 STAM_REL_STATS({cThreadedCalls++;});
10134 }
10135 Assert(off <= pReNative->cInstrBufAlloc);
10136 Assert(pReNative->cCondDepth == 0);
10137
10138#if defined(LOG_ENABLED) && defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
10139 if (LogIs2Enabled())
10140 {
10141 PCIEMLIVENESSENTRY pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall];
10142# ifndef IEMLIVENESS_EXTENDED_LAYOUT
10143 static const char s_achState[] = "CUXI";
10144# else
10145 static const char s_achState[] = "UxRrWwMmCcQqKkNn";
10146# endif
10147
10148 char szGpr[17];
10149 for (unsigned i = 0; i < 16; i++)
10150 szGpr[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_GprFirst)];
10151 szGpr[16] = '\0';
10152
10153 char szSegBase[X86_SREG_COUNT + 1];
10154 char szSegLimit[X86_SREG_COUNT + 1];
10155 char szSegAttrib[X86_SREG_COUNT + 1];
10156 char szSegSel[X86_SREG_COUNT + 1];
10157 for (unsigned i = 0; i < X86_SREG_COUNT; i++)
10158 {
10159 szSegBase[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegBaseFirst)];
10160 szSegAttrib[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegAttribFirst)];
10161 szSegLimit[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegLimitFirst)];
10162 szSegSel[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegSelFirst)];
10163 }
10164 szSegBase[X86_SREG_COUNT] = szSegAttrib[X86_SREG_COUNT] = szSegLimit[X86_SREG_COUNT]
10165 = szSegSel[X86_SREG_COUNT] = '\0';
10166
10167 char szEFlags[8];
10168 for (unsigned i = 0; i < 7; i++)
10169 szEFlags[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_EFlags)];
10170 szEFlags[7] = '\0';
10171
10172 Log2(("liveness: grp=%s segbase=%s segattr=%s seglim=%s segsel=%s efl=%s\n",
10173 szGpr, szSegBase, szSegAttrib, szSegLimit, szSegSel, szEFlags));
10174 }
10175#endif
10176
10177 /*
10178 * Advance.
10179 */
10180 pCallEntry++;
10181#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(IEM_WITH_INTRA_TB_JUMPS) || defined(VBOX_STRICT) || defined(LOG_ENABLED) || defined(VBOX_WITH_STATISTICS) || defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING)
10182 idxCurCall++;
10183#endif
10184 }
10185
10186 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
10187 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded, cThreadedCalls);
10188 if (!cThreadedCalls)
10189 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
10190
10191#ifdef VBOX_WITH_STATISTICS
10192 off = iemNativeEmitNativeTbExitStats(pReNative, off, RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTbFinished));
10193#endif
10194
10195 /* Flush any pending writes before returning from the last instruction (RIP updates, etc.). */
10196 off = iemNativeRegFlushPendingWrites(pReNative, off);
10197
10198#ifndef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
10199 /*
10200 * Successful return, so clear the return register (eax, w0).
10201 */
10202 off = iemNativeEmitGprZero(pReNative, off, IEMNATIVE_CALL_RET_GREG);
10203
10204 /*
10205 * Emit the epilog code.
10206 */
10207 uint32_t idxReturnLabel;
10208 off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
10209#else
10210 /*
10211 * Jump to the common per-chunk epilog code.
10212 */
10213 //off = iemNativeEmitBrk(pReNative, off, 0x1227);
10214 off = iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnSuccess);
10215#endif
10216
10217#ifndef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
10218 /*
10219 * Generate special jump labels.
10220 */
10221 off = iemNativeEmitRcFiddling(pReNative, off, idxReturnLabel);
10222
10223 bool const fReturnBreakViaLookup = RT_BOOL( pReNative->bmLabelTypes
10224 & ( RT_BIT_64(kIemNativeLabelType_ReturnBreakViaLookup)
10225 | RT_BIT_64(kIemNativeLabelType_ReturnBreakViaLookupWithIrq)
10226 | RT_BIT_64(kIemNativeLabelType_ReturnBreakViaLookupWithTlb)
10227 | RT_BIT_64(kIemNativeLabelType_ReturnBreakViaLookupWithTlbAndIrq)));
10228 if (fReturnBreakViaLookup)
10229 {
10230 uint32_t const idxReturnBreakLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnBreak);
10231 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
10232 off = iemNativeEmitReturnBreakViaLookup(pReNative, off, idxReturnBreakLabel);
10233 }
10234 else if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
10235 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
10236
10237 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreakFF))
10238 off = iemNativeEmitReturnBreakFF(pReNative, off, idxReturnLabel);
10239
10240 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
10241 off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
10242
10243 /*
10244 * Generate simple TB tail labels that just calls a help with a pVCpu
10245 * arg and either return or longjmps/throws a non-zero status.
10246 *
10247 * The array entries must be ordered by enmLabel value so we can index
10248 * using fTailLabels bit numbers.
10249 */
10250 typedef IEM_DECL_NATIVE_HLP_PTR(int, PFNIEMNATIVESIMPLETAILLABELCALL,(PVMCPUCC pVCpu));
10251 static struct
10252 {
10253 IEMNATIVELABELTYPE enmLabel;
10254 PFNIEMNATIVESIMPLETAILLABELCALL pfnCallback;
10255 } const g_aSimpleTailLabels[] =
10256 {
10257 { kIemNativeLabelType_Invalid, NULL },
10258 { kIemNativeLabelType_RaiseDe, iemNativeHlpExecRaiseDe },
10259 { kIemNativeLabelType_RaiseUd, iemNativeHlpExecRaiseUd },
10260 { kIemNativeLabelType_RaiseSseRelated, iemNativeHlpExecRaiseSseRelated },
10261 { kIemNativeLabelType_RaiseAvxRelated, iemNativeHlpExecRaiseAvxRelated },
10262 { kIemNativeLabelType_RaiseSseAvxFpRelated, iemNativeHlpExecRaiseSseAvxFpRelated },
10263 { kIemNativeLabelType_RaiseNm, iemNativeHlpExecRaiseNm },
10264 { kIemNativeLabelType_RaiseGp0, iemNativeHlpExecRaiseGp0 },
10265 { kIemNativeLabelType_RaiseMf, iemNativeHlpExecRaiseMf },
10266 { kIemNativeLabelType_RaiseXf, iemNativeHlpExecRaiseXf },
10267 { kIemNativeLabelType_ObsoleteTb, iemNativeHlpObsoleteTb },
10268 { kIemNativeLabelType_NeedCsLimChecking, iemNativeHlpNeedCsLimChecking },
10269 { kIemNativeLabelType_CheckBranchMiss, iemNativeHlpCheckBranchMiss },
10270 };
10271
10272 AssertCompile(RT_ELEMENTS(g_aSimpleTailLabels) == (unsigned)kIemNativeLabelType_LastSimple + 1U);
10273 AssertCompile(kIemNativeLabelType_Invalid == 0);
10274 uint64_t fTailLabels = pReNative->bmLabelTypes & (RT_BIT_64(kIemNativeLabelType_LastSimple + 1U) - 2U);
10275 if (fTailLabels)
10276 {
10277 do
10278 {
10279 IEMNATIVELABELTYPE const enmLabel = (IEMNATIVELABELTYPE)(ASMBitFirstSetU64(fTailLabels) - 1U);
10280 fTailLabels &= ~RT_BIT_64(enmLabel);
10281 Assert(g_aSimpleTailLabels[enmLabel].enmLabel == enmLabel);
10282
10283 uint32_t const idxLabel = iemNativeLabelFind(pReNative, enmLabel);
10284 Assert(idxLabel != UINT32_MAX);
10285 if (idxLabel != UINT32_MAX)
10286 {
10287 iemNativeLabelDefine(pReNative, idxLabel, off);
10288
10289 /* int pfnCallback(PVMCPUCC pVCpu) */
10290 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
10291 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_aSimpleTailLabels[enmLabel].pfnCallback);
10292
10293 /* jump back to the return sequence. */
10294 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
10295 }
10296
10297 } while (fTailLabels);
10298 }
10299
10300#else /* IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE */
10301 /*
10302 * Generate tail labels with jumps to the common per-chunk code.
10303 */
10304# ifndef RT_ARCH_AMD64
10305 Assert(!(pReNative->bmLabelTypes & ( RT_BIT_64(kIemNativeLabelType_ReturnSuccess)
10306 | RT_BIT_64(kIemNativeLabelType_Invalid) )));
10307 AssertCompile(kIemNativeLabelType_Invalid == 0);
10308 uint64_t fTailLabels = pReNative->bmLabelTypes & (RT_BIT_64(kIemNativeLabelType_LastTbExit + 1U) - 2U);
10309 if (fTailLabels)
10310 {
10311 do
10312 {
10313 IEMNATIVELABELTYPE const enmLabel = (IEMNATIVELABELTYPE)(ASMBitFirstSetU64(fTailLabels) - 1U);
10314 fTailLabels &= ~RT_BIT_64(enmLabel);
10315
10316 uint32_t const idxLabel = iemNativeLabelFind(pReNative, enmLabel);
10317 AssertContinue(idxLabel != UINT32_MAX);
10318 iemNativeLabelDefine(pReNative, idxLabel, off);
10319 off = iemNativeEmitTbExit(pReNative, off, enmLabel);
10320 } while (fTailLabels);
10321 }
10322# else
10323 Assert(!(pReNative->bmLabelTypes & (RT_BIT_64(kIemNativeLabelType_LastTbExit + 1) - 1U))); /* Should not be used! */
10324# endif
10325#endif /* IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE */
10326 }
10327 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
10328 {
10329 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
10330 return pTb;
10331 }
10332 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
10333 Assert(off <= pReNative->cInstrBufAlloc);
10334
10335 /*
10336 * Make sure all labels has been defined.
10337 */
10338 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
10339#ifdef VBOX_STRICT
10340 uint32_t const cLabels = pReNative->cLabels;
10341 for (uint32_t i = 0; i < cLabels; i++)
10342 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
10343#endif
10344
10345#if 0 /* For profiling the native recompiler code. */
10346 if (pTb->Thrd.cCalls >= 136)
10347 {
10348 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
10349 goto l_profile_again;
10350 }
10351#endif
10352
10353 /*
10354 * Allocate executable memory, copy over the code we've generated.
10355 */
10356 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
10357 if (pTbAllocator->pDelayedFreeHead)
10358 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
10359
10360 PIEMNATIVEINSTR paFinalInstrBufRx = NULL;
10361#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
10362 PCIEMNATIVEPERCHUNKCTX pCtx = NULL;
10363 PIEMNATIVEINSTR const paFinalInstrBuf = iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR), pTb,
10364 &paFinalInstrBufRx, &pCtx);
10365
10366#else
10367 PIEMNATIVEINSTR const paFinalInstrBuf = iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR), pTb,
10368 &paFinalInstrBufRx, NULL);
10369#endif
10370 AssertReturn(paFinalInstrBuf, pTb);
10371 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
10372
10373 /*
10374 * Apply fixups.
10375 */
10376 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
10377 uint32_t const cFixups = pReNative->cFixups;
10378 for (uint32_t i = 0; i < cFixups; i++)
10379 {
10380 Assert(paFixups[i].off < off);
10381 Assert(paFixups[i].idxLabel < cLabels);
10382 AssertMsg(paLabels[paFixups[i].idxLabel].off < off,
10383 ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,
10384 paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));
10385 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
10386 switch (paFixups[i].enmType)
10387 {
10388#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
10389 case kIemNativeFixupType_Rel32:
10390 Assert(paFixups[i].off + 4 <= off);
10391 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10392 continue;
10393
10394#elif defined(RT_ARCH_ARM64)
10395 case kIemNativeFixupType_RelImm26At0:
10396 {
10397 Assert(paFixups[i].off < off);
10398 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10399 Assert(offDisp >= -33554432 && offDisp < 33554432);
10400 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
10401 continue;
10402 }
10403
10404 case kIemNativeFixupType_RelImm19At5:
10405 {
10406 Assert(paFixups[i].off < off);
10407 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10408 Assert(offDisp >= -262144 && offDisp < 262144);
10409 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
10410 continue;
10411 }
10412
10413 case kIemNativeFixupType_RelImm14At5:
10414 {
10415 Assert(paFixups[i].off < off);
10416 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10417 Assert(offDisp >= -8192 && offDisp < 8192);
10418 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
10419 continue;
10420 }
10421
10422#endif
10423 case kIemNativeFixupType_Invalid:
10424 case kIemNativeFixupType_End:
10425 break;
10426 }
10427 AssertFailed();
10428 }
10429
10430#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
10431 /*
10432 * Apply TB exit fixups.
10433 */
10434 PIEMNATIVEEXITFIXUP const paTbExitFixups = pReNative->paTbExitFixups;
10435 uint32_t const cTbExitFixups = pReNative->cTbExitFixups;
10436 for (uint32_t i = 0; i < cTbExitFixups; i++)
10437 {
10438 Assert(paTbExitFixups[i].off < off);
10439 Assert(IEMNATIVELABELTYPE_IS_EXIT_REASON(paTbExitFixups[i].enmExitReason));
10440 RTPTRUNION const Ptr = { &paFinalInstrBuf[paTbExitFixups[i].off] };
10441
10442# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
10443 Assert(paTbExitFixups[i].off + 4 <= off);
10444 intptr_t const offDisp = pCtx->apExitLabels[paTbExitFixups[i].enmExitReason] - &paFinalInstrBufRx[paTbExitFixups[i].off + 4];
10445 Assert(offDisp >= INT32_MIN && offDisp <= INT32_MAX);
10446 *Ptr.pi32 = (int32_t)offDisp;
10447
10448# elif defined(RT_ARCH_ARM64)
10449 intptr_t const offDisp = pCtx->apExitLabels[paTbExitFixups[i].enmExitReason] - &paFinalInstrBufRx[paTbExitFixups[i].off];
10450 Assert(offDisp >= -33554432 && offDisp < 33554432);
10451 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
10452
10453# else
10454# error "Port me!"
10455# endif
10456 }
10457#endif
10458
10459 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBufRx, off * sizeof(IEMNATIVEINSTR));
10460 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
10461
10462 /*
10463 * Convert the translation block.
10464 */
10465 RTMemFree(pTb->Thrd.paCalls);
10466 pTb->Native.paInstructions = paFinalInstrBufRx;
10467 pTb->Native.cInstructions = off;
10468 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
10469#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
10470 pReNative->pDbgInfo->FlatPc = pTb->FlatPc;
10471 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
10472 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
10473#endif
10474
10475 Assert(pTbAllocator->cThreadedTbs > 0);
10476 pTbAllocator->cThreadedTbs -= 1;
10477 pTbAllocator->cNativeTbs += 1;
10478 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
10479
10480#ifdef LOG_ENABLED
10481 /*
10482 * Disassemble to the log if enabled.
10483 */
10484 if (LogIs3Enabled())
10485 {
10486 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
10487 iemNativeDisassembleTb(pVCpu, pTb, DBGFR3InfoLogHlp());
10488# if defined(DEBUG_bird) || defined(DEBUG_aeichner)
10489 RTLogFlush(NULL);
10490# endif
10491 }
10492#endif
10493 /*iemNativeDisassembleTb(pTb, DBGFR3InfoLogRelHlp());*/
10494
10495 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
10496 return pTb;
10497}
10498
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette