VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompiler.cpp@ 105997

Last change on this file since 105997 was 105997, checked in by vboxsync, 6 months ago

VMM/IEM: Introduce a ReturnZero label when using per-chunk tail code, saving one instruction per TB. (todo 16) bugref:10720

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 455.6 KB
Line 
1/* $Id: IEMAllN8veRecompiler.cpp 105997 2024-09-10 08:55:10Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler
4 *
5 * Logging group IEM_RE_NATIVE assignments:
6 * - Level 1 (Log) : ...
7 * - Flow (LogFlow) : ...
8 * - Level 2 (Log2) : Details calls as they're recompiled.
9 * - Level 3 (Log3) : Disassemble native code after recompiling.
10 * - Level 4 (Log4) : Delayed PC updating.
11 * - Level 5 (Log5) : ...
12 * - Level 6 (Log6) : ...
13 * - Level 7 (Log7) : ...
14 * - Level 8 (Log8) : ...
15 * - Level 9 (Log9) : ...
16 * - Level 10 (Log10): ...
17 * - Level 11 (Log11): Variable allocator.
18 * - Level 12 (Log12): Register allocator.
19 */
20
21/*
22 * Copyright (C) 2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
48#define IEM_WITH_OPAQUE_DECODER_STATE
49#define VMCPU_INCL_CPUM_GST_CTX
50#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
51#include <VBox/vmm/iem.h>
52#include <VBox/vmm/cpum.h>
53#include <VBox/vmm/dbgf.h>
54#include <VBox/vmm/tm.h>
55#include "IEMInternal.h"
56#include <VBox/vmm/vmcc.h>
57#include <VBox/log.h>
58#include <VBox/err.h>
59#include <VBox/dis.h>
60#include <VBox/param.h>
61#include <iprt/assert.h>
62#include <iprt/mem.h>
63#include <iprt/string.h>
64#if defined(RT_ARCH_AMD64)
65# include <iprt/x86.h>
66#elif defined(RT_ARCH_ARM64)
67# include <iprt/armv8.h>
68#endif
69
70#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
71# include "/opt/local/include/capstone/capstone.h"
72#endif
73
74#include "IEMInline.h"
75#include "IEMThreadedFunctions.h"
76#include "IEMN8veRecompiler.h"
77#include "IEMN8veRecompilerEmit.h"
78#include "IEMN8veRecompilerTlbLookup.h"
79#include "IEMNativeFunctions.h"
80
81
82/*
83 * Narrow down configs here to avoid wasting time on unused configs here.
84 * Note! Same checks in IEMAllThrdRecompiler.cpp.
85 */
86
87#ifndef IEM_WITH_CODE_TLB
88# error The code TLB must be enabled for the recompiler.
89#endif
90
91#ifndef IEM_WITH_DATA_TLB
92# error The data TLB must be enabled for the recompiler.
93#endif
94
95#ifndef IEM_WITH_SETJMP
96# error The setjmp approach must be enabled for the recompiler.
97#endif
98
99/** @todo eliminate this clang build hack. */
100#if RT_CLANG_PREREQ(4, 0)
101# pragma GCC diagnostic ignored "-Wunused-function"
102#endif
103
104
105/*********************************************************************************************************************************
106* Internal Functions *
107*********************************************************************************************************************************/
108#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
109static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData);
110#endif
111DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowing(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, uint32_t off);
112DECL_FORCE_INLINE(void) iemNativeRegClearGstRegShadowingOne(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg,
113 IEMNATIVEGSTREG enmGstReg, uint32_t off);
114DECL_INLINE_THROW(void) iemNativeVarRegisterRelease(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar);
115static const char *iemNativeGetLabelName(IEMNATIVELABELTYPE enmLabel, bool fCommonCode = false);
116
117
118
119/*********************************************************************************************************************************
120* Native Recompilation *
121*********************************************************************************************************************************/
122
123
124/**
125 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
126 */
127IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecStatusCodeFiddling,(PVMCPUCC pVCpu, int rc, uint8_t idxInstr))
128{
129 pVCpu->iem.s.cInstructions += idxInstr;
130 return VBOXSTRICTRC_VAL(iemExecStatusCodeFiddling(pVCpu, rc == VINF_IEM_REEXEC_BREAK ? VINF_SUCCESS : rc));
131}
132
133
134/**
135 * Helping iemNativeHlpReturnBreakViaLookup and iemNativeHlpReturnBreakViaLookupWithTlb.
136 */
137DECL_FORCE_INLINE(bool) iemNativeHlpReturnBreakViaLookupIsIrqOrForceFlagPending(PVMCPU pVCpu)
138{
139 uint64_t fCpu = pVCpu->fLocalForcedActions;
140 fCpu &= VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR3
141 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
142 | VMCPU_FF_TLB_FLUSH
143 | VMCPU_FF_UNHALT );
144 /** @todo this isn't even close to the NMI/IRQ conditions in EM. */
145 if (RT_LIKELY( ( !fCpu
146 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
147 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
148 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx) )) )
149 && !VM_FF_IS_ANY_SET(pVCpu->CTX_SUFF(pVM), VM_FF_ALL_MASK) ))
150 return false;
151 return true;
152}
153
154
155/**
156 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
157 */
158template <bool const a_fWithIrqCheck>
159IEM_DECL_NATIVE_HLP_DEF(uintptr_t, iemNativeHlpReturnBreakViaLookup,(PVMCPUCC pVCpu, uint8_t idxTbLookup,
160 uint32_t fFlags, RTGCPHYS GCPhysPc))
161{
162 PIEMTB const pTb = pVCpu->iem.s.pCurTbR3;
163 Assert(idxTbLookup < pTb->cTbLookupEntries);
164 PIEMTB * const ppNewTb = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, idxTbLookup);
165#if 1
166 PIEMTB const pNewTb = *ppNewTb;
167 if (pNewTb)
168 {
169# ifdef VBOX_STRICT
170 uint64_t const uFlatPcAssert = pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base;
171 AssertMsg( (uFlatPcAssert & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == pVCpu->iem.s.uInstrBufPc
172 && (GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == pVCpu->iem.s.GCPhysInstrBuf
173 && (GCPhysPc & GUEST_PAGE_OFFSET_MASK) == (uFlatPcAssert & GUEST_PAGE_OFFSET_MASK),
174 ("GCPhysPc=%RGp uFlatPcAssert=%#RX64 uInstrBufPc=%#RX64 GCPhysInstrBuf=%RGp\n",
175 GCPhysPc, uFlatPcAssert, pVCpu->iem.s.uInstrBufPc, pVCpu->iem.s.GCPhysInstrBuf));
176# endif
177 if (pNewTb->GCPhysPc == GCPhysPc)
178 {
179# ifdef VBOX_STRICT
180 uint32_t fAssertFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK) | IEMTB_F_TYPE_NATIVE;
181 if (pVCpu->cpum.GstCtx.rflags.uBoth & CPUMCTX_INHIBIT_SHADOW)
182 fAssertFlags |= IEMTB_F_INHIBIT_SHADOW;
183 if (pVCpu->cpum.GstCtx.rflags.uBoth & CPUMCTX_INHIBIT_NMI)
184 fAssertFlags |= IEMTB_F_INHIBIT_NMI;
185# if 1 /** @todo breaks on IP/EIP/RIP wraparound tests in bs3-cpu-weird-1. */
186 Assert(IEM_F_MODE_X86_IS_FLAT(fFlags));
187# else
188 if (!IEM_F_MODE_X86_IS_FLAT(fFlags))
189 {
190 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
191 if (offFromLim < X86_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
192 fAssertFlags |= IEMTB_F_CS_LIM_CHECKS;
193 }
194# endif
195 Assert(!(fFlags & ~(IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)));
196 AssertMsg(fFlags == fAssertFlags, ("fFlags=%#RX32 fAssertFlags=%#RX32 cs:rip=%04x:%#010RX64\n",
197 fFlags, fAssertFlags, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
198#endif
199
200 /*
201 * Check them + type.
202 */
203 if ((pNewTb->fFlags & (IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)) == fFlags)
204 {
205 /*
206 * Check for interrupts and stuff.
207 */
208 /** @todo We duplicate code here that's also in iemNativeHlpReturnBreakViaLookupWithTlb.
209 * The main problem are the statistics and to some degree the logging. :/ */
210 if (!a_fWithIrqCheck || !iemNativeHlpReturnBreakViaLookupIsIrqOrForceFlagPending(pVCpu) )
211 {
212 /* Do polling. */
213 if ( RT_LIKELY((int32_t)--pVCpu->iem.s.cTbsTillNextTimerPoll > 0)
214 || iemPollTimers(pVCpu->CTX_SUFF(pVM), pVCpu) == VINF_SUCCESS)
215 {
216 /*
217 * Success. Update statistics and switch to the next TB.
218 */
219 if (a_fWithIrqCheck)
220 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1Irq);
221 else
222 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1NoIrq);
223
224 pNewTb->cUsed += 1;
225 pNewTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
226 pVCpu->iem.s.pCurTbR3 = pNewTb;
227 pVCpu->iem.s.ppTbLookupEntryR3 = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pNewTb, 0);
228 pVCpu->iem.s.cTbExecNative += 1;
229 Log10(("iemNativeHlpReturnBreakViaLookupWithPc: match at %04x:%08RX64 (%RGp): pTb=%p[%#x]-> %p\n",
230 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, GCPhysPc, pTb, idxTbLookup, pNewTb));
231 return (uintptr_t)pNewTb->Native.paInstructions;
232 }
233 }
234 Log10(("iemNativeHlpReturnBreakViaLookupWithPc: IRQ or FF pending\n"));
235 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1PendingIrq);
236 }
237 else
238 {
239 Log10(("iemNativeHlpReturnBreakViaLookupWithPc: fFlags mismatch at %04x:%08RX64: %#x vs %#x (pTb=%p[%#x]-> %p)\n",
240 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, fFlags, pNewTb->fFlags, pTb, idxTbLookup, pNewTb));
241 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1MismatchFlags);
242 }
243 }
244 else
245 {
246 Log10(("iemNativeHlpReturnBreakViaLookupWithPc: GCPhysPc mismatch at %04x:%08RX64: %RGp vs %RGp (pTb=%p[%#x]-> %p)\n",
247 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, GCPhysPc, pNewTb->GCPhysPc, pTb, idxTbLookup, pNewTb));
248 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1MismatchGCPhysPc);
249 }
250 }
251 else
252 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking1NoTb);
253#else
254 NOREF(GCPhysPc);
255#endif
256
257 pVCpu->iem.s.ppTbLookupEntryR3 = ppNewTb;
258 return 0;
259}
260
261
262/**
263 * Used by TB code when encountering a non-zero status or rcPassUp after a call.
264 */
265template <bool const a_fWithIrqCheck>
266IEM_DECL_NATIVE_HLP_DEF(uintptr_t, iemNativeHlpReturnBreakViaLookupWithTlb,(PVMCPUCC pVCpu, uint8_t idxTbLookup))
267{
268 PIEMTB const pTb = pVCpu->iem.s.pCurTbR3;
269 Assert(idxTbLookup < pTb->cTbLookupEntries);
270 PIEMTB * const ppNewTb = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, idxTbLookup);
271#if 1
272 PIEMTB const pNewTb = *ppNewTb;
273 if (pNewTb)
274 {
275 /*
276 * Calculate the flags for the next TB and check if they match.
277 */
278 uint32_t fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK) | IEMTB_F_TYPE_NATIVE;
279 if (!(pVCpu->cpum.GstCtx.rflags.uBoth & (CPUMCTX_INHIBIT_SHADOW | CPUMCTX_INHIBIT_NMI)))
280 { /* likely */ }
281 else
282 {
283 if (pVCpu->cpum.GstCtx.rflags.uBoth & CPUMCTX_INHIBIT_SHADOW)
284 fFlags |= IEMTB_F_INHIBIT_SHADOW;
285 if (pVCpu->cpum.GstCtx.rflags.uBoth & CPUMCTX_INHIBIT_NMI)
286 fFlags |= IEMTB_F_INHIBIT_NMI;
287 }
288 if (!IEM_F_MODE_X86_IS_FLAT(fFlags))
289 {
290 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
291 if (offFromLim >= X86_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
292 { /* likely */ }
293 else
294 fFlags |= IEMTB_F_CS_LIM_CHECKS;
295 }
296 Assert(!(fFlags & ~(IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)));
297
298 if ((pNewTb->fFlags & (IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)) == fFlags)
299 {
300 /*
301 * Do the TLB lookup for flat RIP and compare the result with the next TB.
302 *
303 * Note! This replicates iemGetPcWithPhysAndCode and iemGetPcWithPhysAndCodeMissed.
304 */
305 /* Calc the effective PC. */
306 uint64_t uPc = pVCpu->cpum.GstCtx.rip;
307 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu));
308 uPc += pVCpu->cpum.GstCtx.cs.u64Base;
309
310 /* Advance within the current buffer (PAGE) when possible. */
311 RTGCPHYS GCPhysPc;
312 uint64_t off;
313 if ( pVCpu->iem.s.pbInstrBuf
314 && (off = uPc - pVCpu->iem.s.uInstrBufPc) < pVCpu->iem.s.cbInstrBufTotal) /*ugly*/
315 {
316 pVCpu->iem.s.offInstrNextByte = (uint32_t)off;
317 pVCpu->iem.s.offCurInstrStart = (uint16_t)off;
318 if ((uint16_t)off + 15 <= pVCpu->iem.s.cbInstrBufTotal)
319 pVCpu->iem.s.cbInstrBuf = (uint16_t)off + 15;
320 else
321 pVCpu->iem.s.cbInstrBuf = pVCpu->iem.s.cbInstrBufTotal;
322 GCPhysPc = pVCpu->iem.s.GCPhysInstrBuf + off;
323 }
324 else
325 {
326 pVCpu->iem.s.pbInstrBuf = NULL;
327 pVCpu->iem.s.offCurInstrStart = 0;
328 pVCpu->iem.s.offInstrNextByte = 0;
329 iemOpcodeFetchBytesJmp(pVCpu, 0, NULL);
330 GCPhysPc = pVCpu->iem.s.pbInstrBuf ? pVCpu->iem.s.GCPhysInstrBuf + pVCpu->iem.s.offCurInstrStart : NIL_RTGCPHYS;
331 }
332
333 if (pNewTb->GCPhysPc == GCPhysPc)
334 {
335 /*
336 * Check for interrupts and stuff.
337 */
338 /** @todo We duplicate code here that's also in iemNativeHlpReturnBreakViaLookupWithPc.
339 * The main problem are the statistics and to some degree the logging. :/ */
340 if (!a_fWithIrqCheck || !iemNativeHlpReturnBreakViaLookupIsIrqOrForceFlagPending(pVCpu) )
341 {
342 /* Do polling. */
343 if ( RT_LIKELY((int32_t)--pVCpu->iem.s.cTbsTillNextTimerPoll > 0)
344 || iemPollTimers(pVCpu->CTX_SUFF(pVM), pVCpu) == VINF_SUCCESS)
345 {
346 /*
347 * Success. Update statistics and switch to the next TB.
348 */
349 if (a_fWithIrqCheck)
350 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2Irq);
351 else
352 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2NoIrq);
353
354 pNewTb->cUsed += 1;
355 pNewTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
356 pVCpu->iem.s.pCurTbR3 = pNewTb;
357 pVCpu->iem.s.ppTbLookupEntryR3 = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pNewTb, 0);
358 pVCpu->iem.s.cTbExecNative += 1;
359 Log10(("iemNativeHlpReturnBreakViaLookupWithTlb: match at %04x:%08RX64 (%RGp): pTb=%p[%#x]-> %p\n",
360 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, GCPhysPc, pTb, idxTbLookup, pNewTb));
361 return (uintptr_t)pNewTb->Native.paInstructions;
362 }
363 }
364 Log10(("iemNativeHlpReturnBreakViaLookupWithTlb: IRQ or FF pending\n"));
365 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2PendingIrq);
366 }
367 else
368 {
369 Log10(("iemNativeHlpReturnBreakViaLookupWithTlb: GCPhysPc mismatch at %04x:%08RX64: %RGp vs %RGp (pTb=%p[%#x]-> %p)\n",
370 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, GCPhysPc, pNewTb->GCPhysPc, pTb, idxTbLookup, pNewTb));
371 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2MismatchGCPhysPc);
372 }
373 }
374 else
375 {
376 Log10(("iemNativeHlpReturnBreakViaLookupWithTlb: fFlags mismatch at %04x:%08RX64: %#x vs %#x (pTb=%p[%#x]-> %p)\n",
377 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, fFlags, pNewTb->fFlags, pTb, idxTbLookup, pNewTb));
378 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2MismatchFlags);
379 }
380 }
381 else
382 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2NoTb);
383#else
384 NOREF(fFlags);
385 STAM_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitDirectLinking2NoTb); /* just for some stats, even if misleading */
386#endif
387
388 pVCpu->iem.s.ppTbLookupEntryR3 = ppNewTb;
389 return 0;
390}
391
392
393/**
394 * Used by TB code when it wants to raise a \#DE.
395 */
396IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseDe,(PVMCPUCC pVCpu))
397{
398 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseDe);
399 iemRaiseDivideErrorJmp(pVCpu);
400#ifndef _MSC_VER
401 return VINF_IEM_RAISED_XCPT; /* not reached */
402#endif
403}
404
405
406/**
407 * Used by TB code when it wants to raise a \#UD.
408 */
409IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseUd,(PVMCPUCC pVCpu))
410{
411 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseUd);
412 iemRaiseUndefinedOpcodeJmp(pVCpu);
413#ifndef _MSC_VER
414 return VINF_IEM_RAISED_XCPT; /* not reached */
415#endif
416}
417
418
419/**
420 * Used by TB code when it wants to raise an SSE related \#UD or \#NM.
421 *
422 * See IEM_MC_MAYBE_RAISE_SSE_RELATED_XCPT.
423 */
424IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseSseRelated,(PVMCPUCC pVCpu))
425{
426 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseSseRelated);
427 if ( (pVCpu->cpum.GstCtx.cr0 & X86_CR0_EM)
428 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSFXSR))
429 iemRaiseUndefinedOpcodeJmp(pVCpu);
430 else
431 iemRaiseDeviceNotAvailableJmp(pVCpu);
432#ifndef _MSC_VER
433 return VINF_IEM_RAISED_XCPT; /* not reached */
434#endif
435}
436
437
438/**
439 * Used by TB code when it wants to raise an AVX related \#UD or \#NM.
440 *
441 * See IEM_MC_MAYBE_RAISE_AVX_RELATED_XCPT.
442 */
443IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseAvxRelated,(PVMCPUCC pVCpu))
444{
445 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseAvxRelated);
446 if ( (pVCpu->cpum.GstCtx.aXcr[0] & (XSAVE_C_YMM | XSAVE_C_SSE)) != (XSAVE_C_YMM | XSAVE_C_SSE)
447 || !(pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXSAVE))
448 iemRaiseUndefinedOpcodeJmp(pVCpu);
449 else
450 iemRaiseDeviceNotAvailableJmp(pVCpu);
451#ifndef _MSC_VER
452 return VINF_IEM_RAISED_XCPT; /* not reached */
453#endif
454}
455
456
457/**
458 * Used by TB code when it wants to raise an SSE/AVX floating point exception related \#UD or \#XF.
459 *
460 * See IEM_MC_CALL_AVX_XXX/IEM_MC_CALL_SSE_XXX.
461 */
462IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseSseAvxFpRelated,(PVMCPUCC pVCpu))
463{
464 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseSseAvxFpRelated);
465 if (pVCpu->cpum.GstCtx.cr4 & X86_CR4_OSXMMEEXCPT)
466 iemRaiseSimdFpExceptionJmp(pVCpu);
467 else
468 iemRaiseUndefinedOpcodeJmp(pVCpu);
469#ifndef _MSC_VER
470 return VINF_IEM_RAISED_XCPT; /* not reached */
471#endif
472}
473
474
475/**
476 * Used by TB code when it wants to raise a \#NM.
477 */
478IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseNm,(PVMCPUCC pVCpu))
479{
480 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseNm);
481 iemRaiseDeviceNotAvailableJmp(pVCpu);
482#ifndef _MSC_VER
483 return VINF_IEM_RAISED_XCPT; /* not reached */
484#endif
485}
486
487
488/**
489 * Used by TB code when it wants to raise a \#GP(0).
490 */
491IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseGp0,(PVMCPUCC pVCpu))
492{
493 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseGp0);
494 iemRaiseGeneralProtectionFault0Jmp(pVCpu);
495#ifndef _MSC_VER
496 return VINF_IEM_RAISED_XCPT; /* not reached */
497#endif
498}
499
500
501/**
502 * Used by TB code when it wants to raise a \#MF.
503 */
504IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseMf,(PVMCPUCC pVCpu))
505{
506 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseMf);
507 iemRaiseMathFaultJmp(pVCpu);
508#ifndef _MSC_VER
509 return VINF_IEM_RAISED_XCPT; /* not reached */
510#endif
511}
512
513
514/**
515 * Used by TB code when it wants to raise a \#XF.
516 */
517IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpExecRaiseXf,(PVMCPUCC pVCpu))
518{
519 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitRaiseXf);
520 iemRaiseSimdFpExceptionJmp(pVCpu);
521#ifndef _MSC_VER
522 return VINF_IEM_RAISED_XCPT; /* not reached */
523#endif
524}
525
526
527/**
528 * Used by TB code when detecting opcode changes.
529 * @see iemThreadeFuncWorkerObsoleteTb
530 */
531IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpObsoleteTb,(PVMCPUCC pVCpu))
532{
533 /* We set fSafeToFree to false where as we're being called in the context
534 of a TB callback function, which for native TBs means we cannot release
535 the executable memory till we've returned our way back to iemTbExec as
536 that return path codes via the native code generated for the TB. */
537 Log7(("TB obsolete: %p at %04x:%08RX64\n", pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
538 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitObsoleteTb);
539 iemThreadedTbObsolete(pVCpu, pVCpu->iem.s.pCurTbR3, false /*fSafeToFree*/);
540 return VINF_IEM_REEXEC_BREAK;
541}
542
543
544/**
545 * Used by TB code when we need to switch to a TB with CS.LIM checking.
546 */
547IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpNeedCsLimChecking,(PVMCPUCC pVCpu))
548{
549 Log7(("TB need CS.LIM: %p at %04x:%08RX64; offFromLim=%#RX64 CS.LIM=%#RX32 CS.BASE=%#RX64\n",
550 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
551 (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.rip,
552 pVCpu->cpum.GstCtx.cs.u32Limit, pVCpu->cpum.GstCtx.cs.u64Base));
553 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckNeedCsLimChecking);
554 return VINF_IEM_REEXEC_BREAK;
555}
556
557
558/**
559 * Used by TB code when we missed a PC check after a branch.
560 */
561IEM_DECL_NATIVE_HLP_DEF(int, iemNativeHlpCheckBranchMiss,(PVMCPUCC pVCpu))
562{
563 Log7(("TB jmp miss: %p at %04x:%08RX64; GCPhysWithOffset=%RGp, pbInstrBuf=%p\n",
564 pVCpu->iem.s.pCurTbR3, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
565 pVCpu->iem.s.GCPhysInstrBuf + pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base - pVCpu->iem.s.uInstrBufPc,
566 pVCpu->iem.s.pbInstrBuf));
567 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatCheckBranchMisses);
568 return VINF_IEM_REEXEC_BREAK;
569}
570
571
572
573/*********************************************************************************************************************************
574* Helpers: Segmented memory fetches and stores. *
575*********************************************************************************************************************************/
576
577/**
578 * Used by TB code to load unsigned 8-bit data w/ segmentation.
579 */
580IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
581{
582#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
583 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
584#else
585 return (uint64_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
586#endif
587}
588
589
590/**
591 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
592 * to 16 bits.
593 */
594IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
595{
596#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
597 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
598#else
599 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
600#endif
601}
602
603
604/**
605 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
606 * to 32 bits.
607 */
608IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
609{
610#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
611 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
612#else
613 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
614#endif
615}
616
617/**
618 * Used by TB code to load signed 8-bit data w/ segmentation, sign extending it
619 * to 64 bits.
620 */
621IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
622{
623#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
624 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem);
625#else
626 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8Jmp(pVCpu, iSegReg, GCPtrMem);
627#endif
628}
629
630
631/**
632 * Used by TB code to load unsigned 16-bit data w/ segmentation.
633 */
634IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
635{
636#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
637 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
638#else
639 return (uint64_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
640#endif
641}
642
643
644/**
645 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
646 * to 32 bits.
647 */
648IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
649{
650#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
651 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
652#else
653 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
654#endif
655}
656
657
658/**
659 * Used by TB code to load signed 16-bit data w/ segmentation, sign extending it
660 * to 64 bits.
661 */
662IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
663{
664#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
665 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem);
666#else
667 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16Jmp(pVCpu, iSegReg, GCPtrMem);
668#endif
669}
670
671
672/**
673 * Used by TB code to load unsigned 32-bit data w/ segmentation.
674 */
675IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
676{
677#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
678 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
679#else
680 return (uint64_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
681#endif
682}
683
684
685/**
686 * Used by TB code to load signed 32-bit data w/ segmentation, sign extending it
687 * to 64 bits.
688 */
689IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
690{
691#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
692 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem);
693#else
694 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32Jmp(pVCpu, iSegReg, GCPtrMem);
695#endif
696}
697
698
699/**
700 * Used by TB code to load unsigned 64-bit data w/ segmentation.
701 */
702IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg))
703{
704#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
705 return iemMemFetchDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem);
706#else
707 return iemMemFetchDataU64Jmp(pVCpu, iSegReg, GCPtrMem);
708#endif
709}
710
711
712#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
713/**
714 * Used by TB code to load 128-bit data w/ segmentation.
715 */
716IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
717{
718#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
719 iemMemFetchDataU128SafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
720#else
721 iemMemFetchDataU128Jmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
722#endif
723}
724
725
726/**
727 * Used by TB code to load 128-bit data w/ segmentation.
728 */
729IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
730{
731#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
732 iemMemFetchDataU128AlignedSseSafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
733#else
734 iemMemFetchDataU128AlignedSseJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
735#endif
736}
737
738
739/**
740 * Used by TB code to load 128-bit data w/ segmentation.
741 */
742IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT128U pu128Dst))
743{
744#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
745 iemMemFetchDataU128NoAcSafeJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
746#else
747 iemMemFetchDataU128NoAcJmp(pVCpu, pu128Dst, iSegReg, GCPtrMem);
748#endif
749}
750
751
752/**
753 * Used by TB code to load 256-bit data w/ segmentation.
754 */
755IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT256U pu256Dst))
756{
757#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
758 iemMemFetchDataU256NoAcSafeJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
759#else
760 iemMemFetchDataU256NoAcJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
761#endif
762}
763
764
765/**
766 * Used by TB code to load 256-bit data w/ segmentation.
767 */
768IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFetchDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PRTUINT256U pu256Dst))
769{
770#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
771 iemMemFetchDataU256AlignedAvxSafeJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
772#else
773 iemMemFetchDataU256AlignedAvxJmp(pVCpu, pu256Dst, iSegReg, GCPtrMem);
774#endif
775}
776#endif
777
778
779/**
780 * Used by TB code to store unsigned 8-bit data w/ segmentation.
781 */
782IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint8_t u8Value))
783{
784#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
785 iemMemStoreDataU8SafeJmp(pVCpu, iSegReg, GCPtrMem, u8Value);
786#else
787 iemMemStoreDataU8Jmp(pVCpu, iSegReg, GCPtrMem, u8Value);
788#endif
789}
790
791
792/**
793 * Used by TB code to store unsigned 16-bit data w/ segmentation.
794 */
795IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint16_t u16Value))
796{
797#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
798 iemMemStoreDataU16SafeJmp(pVCpu, iSegReg, GCPtrMem, u16Value);
799#else
800 iemMemStoreDataU16Jmp(pVCpu, iSegReg, GCPtrMem, u16Value);
801#endif
802}
803
804
805/**
806 * Used by TB code to store unsigned 32-bit data w/ segmentation.
807 */
808IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint32_t u32Value))
809{
810#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
811 iemMemStoreDataU32SafeJmp(pVCpu, iSegReg, GCPtrMem, u32Value);
812#else
813 iemMemStoreDataU32Jmp(pVCpu, iSegReg, GCPtrMem, u32Value);
814#endif
815}
816
817
818/**
819 * Used by TB code to store unsigned 64-bit data w/ segmentation.
820 */
821IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, uint64_t u64Value))
822{
823#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
824 iemMemStoreDataU64SafeJmp(pVCpu, iSegReg, GCPtrMem, u64Value);
825#else
826 iemMemStoreDataU64Jmp(pVCpu, iSegReg, GCPtrMem, u64Value);
827#endif
828}
829
830
831#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
832/**
833 * Used by TB code to store unsigned 128-bit data w/ segmentation.
834 */
835IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT128U pu128Src))
836{
837#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
838 iemMemStoreDataU128AlignedSseSafeJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
839#else
840 iemMemStoreDataU128AlignedSseJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
841#endif
842}
843
844
845/**
846 * Used by TB code to store unsigned 128-bit data w/ segmentation.
847 */
848IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT128U pu128Src))
849{
850#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
851 iemMemStoreDataU128NoAcSafeJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
852#else
853 iemMemStoreDataU128NoAcJmp(pVCpu, iSegReg, GCPtrMem, pu128Src);
854#endif
855}
856
857
858/**
859 * Used by TB code to store unsigned 256-bit data w/ segmentation.
860 */
861IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT256U pu256Src))
862{
863#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
864 iemMemStoreDataU256NoAcSafeJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
865#else
866 iemMemStoreDataU256NoAcJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
867#endif
868}
869
870
871/**
872 * Used by TB code to store unsigned 256-bit data w/ segmentation.
873 */
874IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemStoreDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t iSegReg, PCRTUINT256U pu256Src))
875{
876#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
877 iemMemStoreDataU256AlignedAvxSafeJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
878#else
879 iemMemStoreDataU256AlignedAvxJmp(pVCpu, iSegReg, GCPtrMem, pu256Src);
880#endif
881}
882#endif
883
884
885
886/**
887 * Used by TB code to store an unsigned 16-bit value onto a generic stack.
888 */
889IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
890{
891#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
892 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
893#else
894 iemMemStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
895#endif
896}
897
898
899/**
900 * Used by TB code to store an unsigned 32-bit value onto a generic stack.
901 */
902IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
903{
904#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
905 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
906#else
907 iemMemStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
908#endif
909}
910
911
912/**
913 * Used by TB code to store an 32-bit selector value onto a generic stack.
914 *
915 * Intel CPUs doesn't do write a whole dword, thus the special function.
916 */
917IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
918{
919#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
920 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
921#else
922 iemMemStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
923#endif
924}
925
926
927/**
928 * Used by TB code to push unsigned 64-bit value onto a generic stack.
929 */
930IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
931{
932#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
933 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
934#else
935 iemMemStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
936#endif
937}
938
939
940/**
941 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
942 */
943IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
944{
945#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
946 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
947#else
948 return iemMemFetchStackU16Jmp(pVCpu, GCPtrMem);
949#endif
950}
951
952
953/**
954 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
955 */
956IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
957{
958#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
959 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
960#else
961 return iemMemFetchStackU32Jmp(pVCpu, GCPtrMem);
962#endif
963}
964
965
966/**
967 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
968 */
969IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
970{
971#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
972 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
973#else
974 return iemMemFetchStackU64Jmp(pVCpu, GCPtrMem);
975#endif
976}
977
978
979
980/*********************************************************************************************************************************
981* Helpers: Flat memory fetches and stores. *
982*********************************************************************************************************************************/
983
984/**
985 * Used by TB code to load unsigned 8-bit data w/ flat address.
986 * @note Zero extending the value to 64-bit to simplify assembly.
987 */
988IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
989{
990#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
991 return (uint64_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
992#else
993 return (uint64_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
994#endif
995}
996
997
998/**
999 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1000 * to 16 bits.
1001 * @note Zero extending the value to 64-bit to simplify assembly.
1002 */
1003IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1004{
1005#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1006 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1007#else
1008 return (uint64_t)(uint16_t)(int16_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1009#endif
1010}
1011
1012
1013/**
1014 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1015 * to 32 bits.
1016 * @note Zero extending the value to 64-bit to simplify assembly.
1017 */
1018IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1019{
1020#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1021 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1022#else
1023 return (uint64_t)(uint32_t)(int32_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1024#endif
1025}
1026
1027
1028/**
1029 * Used by TB code to load signed 8-bit data w/ flat address, sign extending it
1030 * to 64 bits.
1031 */
1032IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU8_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1033{
1034#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1035 return (uint64_t)(int64_t)(int8_t)iemMemFetchDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1036#else
1037 return (uint64_t)(int64_t)(int8_t)iemMemFlatFetchDataU8Jmp(pVCpu, GCPtrMem);
1038#endif
1039}
1040
1041
1042/**
1043 * Used by TB code to load unsigned 16-bit data w/ flat address.
1044 * @note Zero extending the value to 64-bit to simplify assembly.
1045 */
1046IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1047{
1048#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1049 return (uint64_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1050#else
1051 return (uint64_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
1052#endif
1053}
1054
1055
1056/**
1057 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
1058 * to 32 bits.
1059 * @note Zero extending the value to 64-bit to simplify assembly.
1060 */
1061IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1062{
1063#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1064 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1065#else
1066 return (uint64_t)(uint32_t)(int32_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
1067#endif
1068}
1069
1070
1071/**
1072 * Used by TB code to load signed 16-bit data w/ flat address, sign extending it
1073 * to 64 bits.
1074 * @note Zero extending the value to 64-bit to simplify assembly.
1075 */
1076IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU16_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1077{
1078#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1079 return (uint64_t)(int64_t)(int16_t)iemMemFetchDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1080#else
1081 return (uint64_t)(int64_t)(int16_t)iemMemFlatFetchDataU16Jmp(pVCpu, GCPtrMem);
1082#endif
1083}
1084
1085
1086/**
1087 * Used by TB code to load unsigned 32-bit data w/ flat address.
1088 * @note Zero extending the value to 64-bit to simplify assembly.
1089 */
1090IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1091{
1092#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1093 return (uint64_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1094#else
1095 return (uint64_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
1096#endif
1097}
1098
1099
1100/**
1101 * Used by TB code to load signed 32-bit data w/ flat address, sign extending it
1102 * to 64 bits.
1103 * @note Zero extending the value to 64-bit to simplify assembly.
1104 */
1105IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU32_Sx_U64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1106{
1107#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1108 return (uint64_t)(int64_t)(int32_t)iemMemFetchDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1109#else
1110 return (uint64_t)(int64_t)(int32_t)iemMemFlatFetchDataU32Jmp(pVCpu, GCPtrMem);
1111#endif
1112}
1113
1114
1115/**
1116 * Used by TB code to load unsigned 64-bit data w/ flat address.
1117 */
1118IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpMemFlatFetchDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1119{
1120#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1121 return iemMemFetchDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem);
1122#else
1123 return iemMemFlatFetchDataU64Jmp(pVCpu, GCPtrMem);
1124#endif
1125}
1126
1127
1128#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1129/**
1130 * Used by TB code to load unsigned 128-bit data w/ flat address.
1131 */
1132IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
1133{
1134#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1135 return iemMemFetchDataU128SafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
1136#else
1137 return iemMemFlatFetchDataU128Jmp(pVCpu, pu128Dst, GCPtrMem);
1138#endif
1139}
1140
1141
1142/**
1143 * Used by TB code to load unsigned 128-bit data w/ flat address.
1144 */
1145IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
1146{
1147#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1148 return iemMemFetchDataU128AlignedSseSafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
1149#else
1150 return iemMemFlatFetchDataU128AlignedSseJmp(pVCpu, pu128Dst, GCPtrMem);
1151#endif
1152}
1153
1154
1155/**
1156 * Used by TB code to load unsigned 128-bit data w/ flat address.
1157 */
1158IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT128U pu128Dst))
1159{
1160#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1161 return iemMemFetchDataU128NoAcSafeJmp(pVCpu, pu128Dst, UINT8_MAX, GCPtrMem);
1162#else
1163 return iemMemFlatFetchDataU128NoAcJmp(pVCpu, pu128Dst, GCPtrMem);
1164#endif
1165}
1166
1167
1168/**
1169 * Used by TB code to load unsigned 256-bit data w/ flat address.
1170 */
1171IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT256U pu256Dst))
1172{
1173#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1174 return iemMemFetchDataU256NoAcSafeJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
1175#else
1176 return iemMemFlatFetchDataU256NoAcJmp(pVCpu, pu256Dst, GCPtrMem);
1177#endif
1178}
1179
1180
1181/**
1182 * Used by TB code to load unsigned 256-bit data w/ flat address.
1183 */
1184IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatFetchDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PRTUINT256U pu256Dst))
1185{
1186#ifdef IEMNATIVE_WITH_TLB_LOOKUP_FETCH
1187 return iemMemFetchDataU256AlignedAvxSafeJmp(pVCpu, pu256Dst, UINT8_MAX, GCPtrMem);
1188#else
1189 return iemMemFlatFetchDataU256AlignedAvxJmp(pVCpu, pu256Dst, GCPtrMem);
1190#endif
1191}
1192#endif
1193
1194
1195/**
1196 * Used by TB code to store unsigned 8-bit data w/ flat address.
1197 */
1198IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU8,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint8_t u8Value))
1199{
1200#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1201 iemMemStoreDataU8SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u8Value);
1202#else
1203 iemMemFlatStoreDataU8Jmp(pVCpu, GCPtrMem, u8Value);
1204#endif
1205}
1206
1207
1208/**
1209 * Used by TB code to store unsigned 16-bit data w/ flat address.
1210 */
1211IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1212{
1213#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1214 iemMemStoreDataU16SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u16Value);
1215#else
1216 iemMemFlatStoreDataU16Jmp(pVCpu, GCPtrMem, u16Value);
1217#endif
1218}
1219
1220
1221/**
1222 * Used by TB code to store unsigned 32-bit data w/ flat address.
1223 */
1224IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1225{
1226#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1227 iemMemStoreDataU32SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u32Value);
1228#else
1229 iemMemFlatStoreDataU32Jmp(pVCpu, GCPtrMem, u32Value);
1230#endif
1231}
1232
1233
1234/**
1235 * Used by TB code to store unsigned 64-bit data w/ flat address.
1236 */
1237IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
1238{
1239#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1240 iemMemStoreDataU64SafeJmp(pVCpu, UINT8_MAX, GCPtrMem, u64Value);
1241#else
1242 iemMemFlatStoreDataU64Jmp(pVCpu, GCPtrMem, u64Value);
1243#endif
1244}
1245
1246
1247#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
1248/**
1249 * Used by TB code to store unsigned 128-bit data w/ flat address.
1250 */
1251IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU128AlignedSse,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT128U pu128Src))
1252{
1253#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1254 iemMemStoreDataU128AlignedSseSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu128Src);
1255#else
1256 iemMemFlatStoreDataU128AlignedSseJmp(pVCpu, GCPtrMem, pu128Src);
1257#endif
1258}
1259
1260
1261/**
1262 * Used by TB code to store unsigned 128-bit data w/ flat address.
1263 */
1264IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU128NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT128U pu128Src))
1265{
1266#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1267 iemMemStoreDataU128NoAcSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu128Src);
1268#else
1269 iemMemFlatStoreDataU128NoAcJmp(pVCpu, GCPtrMem, pu128Src);
1270#endif
1271}
1272
1273
1274/**
1275 * Used by TB code to store unsigned 256-bit data w/ flat address.
1276 */
1277IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU256NoAc,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT256U pu256Src))
1278{
1279#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1280 iemMemStoreDataU256NoAcSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu256Src);
1281#else
1282 iemMemFlatStoreDataU256NoAcJmp(pVCpu, GCPtrMem, pu256Src);
1283#endif
1284}
1285
1286
1287/**
1288 * Used by TB code to store unsigned 256-bit data w/ flat address.
1289 */
1290IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemFlatStoreDataU256AlignedAvx,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, PCRTUINT256U pu256Src))
1291{
1292#ifdef IEMNATIVE_WITH_TLB_LOOKUP_STORE
1293 iemMemStoreDataU256AlignedAvxSafeJmp(pVCpu, UINT8_MAX, GCPtrMem, pu256Src);
1294#else
1295 iemMemFlatStoreDataU256AlignedAvxJmp(pVCpu, GCPtrMem, pu256Src);
1296#endif
1297}
1298#endif
1299
1300
1301
1302/**
1303 * Used by TB code to store an unsigned 16-bit value onto a flat stack.
1304 */
1305IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint16_t u16Value))
1306{
1307#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1308 iemMemStoreStackU16SafeJmp(pVCpu, GCPtrMem, u16Value);
1309#else
1310 iemMemFlatStoreStackU16Jmp(pVCpu, GCPtrMem, u16Value);
1311#endif
1312}
1313
1314
1315/**
1316 * Used by TB code to store an unsigned 32-bit value onto a flat stack.
1317 */
1318IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1319{
1320#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1321 iemMemStoreStackU32SafeJmp(pVCpu, GCPtrMem, u32Value);
1322#else
1323 iemMemFlatStoreStackU32Jmp(pVCpu, GCPtrMem, u32Value);
1324#endif
1325}
1326
1327
1328/**
1329 * Used by TB code to store a segment selector value onto a flat stack.
1330 *
1331 * Intel CPUs doesn't do write a whole dword, thus the special function.
1332 */
1333IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU32SReg,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint32_t u32Value))
1334{
1335#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1336 iemMemStoreStackU32SRegSafeJmp(pVCpu, GCPtrMem, u32Value);
1337#else
1338 iemMemFlatStoreStackU32SRegJmp(pVCpu, GCPtrMem, u32Value);
1339#endif
1340}
1341
1342
1343/**
1344 * Used by TB code to store an unsigned 64-bit value onto a flat stack.
1345 */
1346IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpStackFlatStoreU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem, uint64_t u64Value))
1347{
1348#ifdef IEMNATIVE_WITH_TLB_LOOKUP_PUSH
1349 iemMemStoreStackU64SafeJmp(pVCpu, GCPtrMem, u64Value);
1350#else
1351 iemMemFlatStoreStackU64Jmp(pVCpu, GCPtrMem, u64Value);
1352#endif
1353}
1354
1355
1356/**
1357 * Used by TB code to fetch an unsigned 16-bit item off a generic stack.
1358 */
1359IEM_DECL_NATIVE_HLP_DEF(uint16_t, iemNativeHlpStackFlatFetchU16,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1360{
1361#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1362 return iemMemFetchStackU16SafeJmp(pVCpu, GCPtrMem);
1363#else
1364 return iemMemFlatFetchStackU16Jmp(pVCpu, GCPtrMem);
1365#endif
1366}
1367
1368
1369/**
1370 * Used by TB code to fetch an unsigned 32-bit item off a generic stack.
1371 */
1372IEM_DECL_NATIVE_HLP_DEF(uint32_t, iemNativeHlpStackFlatFetchU32,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1373{
1374#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1375 return iemMemFetchStackU32SafeJmp(pVCpu, GCPtrMem);
1376#else
1377 return iemMemFlatFetchStackU32Jmp(pVCpu, GCPtrMem);
1378#endif
1379}
1380
1381
1382/**
1383 * Used by TB code to fetch an unsigned 64-bit item off a generic stack.
1384 */
1385IEM_DECL_NATIVE_HLP_DEF(uint64_t, iemNativeHlpStackFlatFetchU64,(PVMCPUCC pVCpu, RTGCPTR GCPtrMem))
1386{
1387#ifdef IEMNATIVE_WITH_TLB_LOOKUP_POP
1388 return iemMemFetchStackU64SafeJmp(pVCpu, GCPtrMem);
1389#else
1390 return iemMemFlatFetchStackU64Jmp(pVCpu, GCPtrMem);
1391#endif
1392}
1393
1394
1395
1396/*********************************************************************************************************************************
1397* Helpers: Segmented memory mapping. *
1398*********************************************************************************************************************************/
1399
1400/**
1401 * Used by TB code to map unsigned 8-bit data for atomic read-write w/
1402 * segmentation.
1403 */
1404IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1405 RTGCPTR GCPtrMem, uint8_t iSegReg))
1406{
1407#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1408 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1409#else
1410 return iemMemMapDataU8AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1411#endif
1412}
1413
1414
1415/**
1416 * Used by TB code to map unsigned 8-bit data read-write w/ segmentation.
1417 */
1418IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1419 RTGCPTR GCPtrMem, uint8_t iSegReg))
1420{
1421#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1422 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1423#else
1424 return iemMemMapDataU8RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1425#endif
1426}
1427
1428
1429/**
1430 * Used by TB code to map unsigned 8-bit data writeonly w/ segmentation.
1431 */
1432IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1433 RTGCPTR GCPtrMem, uint8_t iSegReg))
1434{
1435#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1436 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1437#else
1438 return iemMemMapDataU8WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1439#endif
1440}
1441
1442
1443/**
1444 * Used by TB code to map unsigned 8-bit data readonly w/ segmentation.
1445 */
1446IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1447 RTGCPTR GCPtrMem, uint8_t iSegReg))
1448{
1449#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1450 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1451#else
1452 return iemMemMapDataU8RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1453#endif
1454}
1455
1456
1457/**
1458 * Used by TB code to map unsigned 16-bit data for atomic read-write w/
1459 * segmentation.
1460 */
1461IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1462 RTGCPTR GCPtrMem, uint8_t iSegReg))
1463{
1464#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1465 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1466#else
1467 return iemMemMapDataU16AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1468#endif
1469}
1470
1471
1472/**
1473 * Used by TB code to map unsigned 16-bit data read-write w/ segmentation.
1474 */
1475IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1476 RTGCPTR GCPtrMem, uint8_t iSegReg))
1477{
1478#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1479 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1480#else
1481 return iemMemMapDataU16RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1482#endif
1483}
1484
1485
1486/**
1487 * Used by TB code to map unsigned 16-bit data writeonly w/ segmentation.
1488 */
1489IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1490 RTGCPTR GCPtrMem, uint8_t iSegReg))
1491{
1492#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1493 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1494#else
1495 return iemMemMapDataU16WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1496#endif
1497}
1498
1499
1500/**
1501 * Used by TB code to map unsigned 16-bit data readonly w/ segmentation.
1502 */
1503IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1504 RTGCPTR GCPtrMem, uint8_t iSegReg))
1505{
1506#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1507 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1508#else
1509 return iemMemMapDataU16RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1510#endif
1511}
1512
1513
1514/**
1515 * Used by TB code to map unsigned 32-bit data for atomic read-write w/
1516 * segmentation.
1517 */
1518IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1519 RTGCPTR GCPtrMem, uint8_t iSegReg))
1520{
1521#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1522 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1523#else
1524 return iemMemMapDataU32AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1525#endif
1526}
1527
1528
1529/**
1530 * Used by TB code to map unsigned 32-bit data read-write w/ segmentation.
1531 */
1532IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1533 RTGCPTR GCPtrMem, uint8_t iSegReg))
1534{
1535#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1536 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1537#else
1538 return iemMemMapDataU32RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1539#endif
1540}
1541
1542
1543/**
1544 * Used by TB code to map unsigned 32-bit data writeonly w/ segmentation.
1545 */
1546IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1547 RTGCPTR GCPtrMem, uint8_t iSegReg))
1548{
1549#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1550 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1551#else
1552 return iemMemMapDataU32WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1553#endif
1554}
1555
1556
1557/**
1558 * Used by TB code to map unsigned 32-bit data readonly w/ segmentation.
1559 */
1560IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1561 RTGCPTR GCPtrMem, uint8_t iSegReg))
1562{
1563#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1564 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1565#else
1566 return iemMemMapDataU32RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1567#endif
1568}
1569
1570
1571/**
1572 * Used by TB code to map unsigned 64-bit data for atomic read-write w/
1573 * segmentation.
1574 */
1575IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1576 RTGCPTR GCPtrMem, uint8_t iSegReg))
1577{
1578#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1579 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1580#else
1581 return iemMemMapDataU64AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1582#endif
1583}
1584
1585
1586/**
1587 * Used by TB code to map unsigned 64-bit data read-write w/ segmentation.
1588 */
1589IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1590 RTGCPTR GCPtrMem, uint8_t iSegReg))
1591{
1592#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1593 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1594#else
1595 return iemMemMapDataU64RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1596#endif
1597}
1598
1599
1600/**
1601 * Used by TB code to map unsigned 64-bit data writeonly w/ segmentation.
1602 */
1603IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1604 RTGCPTR GCPtrMem, uint8_t iSegReg))
1605{
1606#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1607 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1608#else
1609 return iemMemMapDataU64WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1610#endif
1611}
1612
1613
1614/**
1615 * Used by TB code to map unsigned 64-bit data readonly w/ segmentation.
1616 */
1617IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1618 RTGCPTR GCPtrMem, uint8_t iSegReg))
1619{
1620#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1621 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1622#else
1623 return iemMemMapDataU64RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1624#endif
1625}
1626
1627
1628/**
1629 * Used by TB code to map 80-bit float data writeonly w/ segmentation.
1630 */
1631IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1632 RTGCPTR GCPtrMem, uint8_t iSegReg))
1633{
1634#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1635 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1636#else
1637 return iemMemMapDataR80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1638#endif
1639}
1640
1641
1642/**
1643 * Used by TB code to map 80-bit BCD data writeonly w/ segmentation.
1644 */
1645IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1646 RTGCPTR GCPtrMem, uint8_t iSegReg))
1647{
1648#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1649 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1650#else
1651 return iemMemMapDataD80WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1652#endif
1653}
1654
1655
1656/**
1657 * Used by TB code to map unsigned 128-bit data for atomic read-write w/
1658 * segmentation.
1659 */
1660IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1661 RTGCPTR GCPtrMem, uint8_t iSegReg))
1662{
1663#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1664 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1665#else
1666 return iemMemMapDataU128AtJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1667#endif
1668}
1669
1670
1671/**
1672 * Used by TB code to map unsigned 128-bit data read-write w/ segmentation.
1673 */
1674IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1675 RTGCPTR GCPtrMem, uint8_t iSegReg))
1676{
1677#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1678 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1679#else
1680 return iemMemMapDataU128RwJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1681#endif
1682}
1683
1684
1685/**
1686 * Used by TB code to map unsigned 128-bit data writeonly w/ segmentation.
1687 */
1688IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1689 RTGCPTR GCPtrMem, uint8_t iSegReg))
1690{
1691#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1692 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1693#else
1694 return iemMemMapDataU128WoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1695#endif
1696}
1697
1698
1699/**
1700 * Used by TB code to map unsigned 128-bit data readonly w/ segmentation.
1701 */
1702IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo,
1703 RTGCPTR GCPtrMem, uint8_t iSegReg))
1704{
1705#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1706 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1707#else
1708 return iemMemMapDataU128RoJmp(pVCpu, pbUnmapInfo, iSegReg, GCPtrMem);
1709#endif
1710}
1711
1712
1713/*********************************************************************************************************************************
1714* Helpers: Flat memory mapping. *
1715*********************************************************************************************************************************/
1716
1717/**
1718 * Used by TB code to map unsigned 8-bit data for atomic read-write w/ flat
1719 * address.
1720 */
1721IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1722{
1723#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1724 return iemMemMapDataU8AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1725#else
1726 return iemMemFlatMapDataU8AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1727#endif
1728}
1729
1730
1731/**
1732 * Used by TB code to map unsigned 8-bit data read-write w/ flat address.
1733 */
1734IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1735{
1736#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1737 return iemMemMapDataU8RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1738#else
1739 return iemMemFlatMapDataU8RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1740#endif
1741}
1742
1743
1744/**
1745 * Used by TB code to map unsigned 8-bit data writeonly w/ flat address.
1746 */
1747IEM_DECL_NATIVE_HLP_DEF(uint8_t *, iemNativeHlpMemFlatMapDataU8Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1748{
1749#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1750 return iemMemMapDataU8WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1751#else
1752 return iemMemFlatMapDataU8WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1753#endif
1754}
1755
1756
1757/**
1758 * Used by TB code to map unsigned 8-bit data readonly w/ flat address.
1759 */
1760IEM_DECL_NATIVE_HLP_DEF(uint8_t const *, iemNativeHlpMemFlatMapDataU8Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1761{
1762#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1763 return iemMemMapDataU8RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1764#else
1765 return iemMemFlatMapDataU8RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1766#endif
1767}
1768
1769
1770/**
1771 * Used by TB code to map unsigned 16-bit data for atomic read-write w/ flat
1772 * address.
1773 */
1774IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1775{
1776#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1777 return iemMemMapDataU16AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1778#else
1779 return iemMemFlatMapDataU16AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1780#endif
1781}
1782
1783
1784/**
1785 * Used by TB code to map unsigned 16-bit data read-write w/ flat address.
1786 */
1787IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1788{
1789#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1790 return iemMemMapDataU16RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1791#else
1792 return iemMemFlatMapDataU16RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1793#endif
1794}
1795
1796
1797/**
1798 * Used by TB code to map unsigned 16-bit data writeonly w/ flat address.
1799 */
1800IEM_DECL_NATIVE_HLP_DEF(uint16_t *, iemNativeHlpMemFlatMapDataU16Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1801{
1802#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1803 return iemMemMapDataU16WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1804#else
1805 return iemMemFlatMapDataU16WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1806#endif
1807}
1808
1809
1810/**
1811 * Used by TB code to map unsigned 16-bit data readonly w/ flat address.
1812 */
1813IEM_DECL_NATIVE_HLP_DEF(uint16_t const *, iemNativeHlpMemFlatMapDataU16Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1814{
1815#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1816 return iemMemMapDataU16RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1817#else
1818 return iemMemFlatMapDataU16RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1819#endif
1820}
1821
1822
1823/**
1824 * Used by TB code to map unsigned 32-bit data for atomic read-write w/ flat
1825 * address.
1826 */
1827IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1828{
1829#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1830 return iemMemMapDataU32AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1831#else
1832 return iemMemFlatMapDataU32AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1833#endif
1834}
1835
1836
1837/**
1838 * Used by TB code to map unsigned 32-bit data read-write w/ flat address.
1839 */
1840IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1841{
1842#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1843 return iemMemMapDataU32RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1844#else
1845 return iemMemFlatMapDataU32RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1846#endif
1847}
1848
1849
1850/**
1851 * Used by TB code to map unsigned 32-bit data writeonly w/ flat address.
1852 */
1853IEM_DECL_NATIVE_HLP_DEF(uint32_t *, iemNativeHlpMemFlatMapDataU32Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1854{
1855#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1856 return iemMemMapDataU32WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1857#else
1858 return iemMemFlatMapDataU32WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1859#endif
1860}
1861
1862
1863/**
1864 * Used by TB code to map unsigned 32-bit data readonly w/ flat address.
1865 */
1866IEM_DECL_NATIVE_HLP_DEF(uint32_t const *, iemNativeHlpMemFlatMapDataU32Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1867{
1868#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1869 return iemMemMapDataU32RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1870#else
1871 return iemMemFlatMapDataU32RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1872#endif
1873}
1874
1875
1876/**
1877 * Used by TB code to map unsigned 64-bit data for atomic read-write w/ flat
1878 * address.
1879 */
1880IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1881{
1882#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1883 return iemMemMapDataU64AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1884#else
1885 return iemMemFlatMapDataU64AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1886#endif
1887}
1888
1889
1890/**
1891 * Used by TB code to map unsigned 64-bit data read-write w/ flat address.
1892 */
1893IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1894{
1895#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1896 return iemMemMapDataU64RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1897#else
1898 return iemMemFlatMapDataU64RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1899#endif
1900}
1901
1902
1903/**
1904 * Used by TB code to map unsigned 64-bit data writeonly w/ flat address.
1905 */
1906IEM_DECL_NATIVE_HLP_DEF(uint64_t *, iemNativeHlpMemFlatMapDataU64Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1907{
1908#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1909 return iemMemMapDataU64WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1910#else
1911 return iemMemFlatMapDataU64WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1912#endif
1913}
1914
1915
1916/**
1917 * Used by TB code to map unsigned 64-bit data readonly w/ flat address.
1918 */
1919IEM_DECL_NATIVE_HLP_DEF(uint64_t const *, iemNativeHlpMemFlatMapDataU64Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1920{
1921#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1922 return iemMemMapDataU64RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1923#else
1924 return iemMemFlatMapDataU64RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1925#endif
1926}
1927
1928
1929/**
1930 * Used by TB code to map 80-bit float data writeonly w/ flat address.
1931 */
1932IEM_DECL_NATIVE_HLP_DEF(RTFLOAT80U *, iemNativeHlpMemFlatMapDataR80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1933{
1934#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1935 return iemMemMapDataR80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1936#else
1937 return iemMemFlatMapDataR80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1938#endif
1939}
1940
1941
1942/**
1943 * Used by TB code to map 80-bit BCD data writeonly w/ flat address.
1944 */
1945IEM_DECL_NATIVE_HLP_DEF(RTPBCD80U *, iemNativeHlpMemFlatMapDataD80Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1946{
1947#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1948 return iemMemMapDataD80WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1949#else
1950 return iemMemFlatMapDataD80WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1951#endif
1952}
1953
1954
1955/**
1956 * Used by TB code to map unsigned 128-bit data for atomic read-write w/ flat
1957 * address.
1958 */
1959IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Atomic,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1960{
1961#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1962 return iemMemMapDataU128AtSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1963#else
1964 return iemMemFlatMapDataU128AtJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1965#endif
1966}
1967
1968
1969/**
1970 * Used by TB code to map unsigned 128-bit data read-write w/ flat address.
1971 */
1972IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Rw,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1973{
1974#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1975 return iemMemMapDataU128RwSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1976#else
1977 return iemMemFlatMapDataU128RwJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1978#endif
1979}
1980
1981
1982/**
1983 * Used by TB code to map unsigned 128-bit data writeonly w/ flat address.
1984 */
1985IEM_DECL_NATIVE_HLP_DEF(RTUINT128U *, iemNativeHlpMemFlatMapDataU128Wo,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1986{
1987#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
1988 return iemMemMapDataU128WoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
1989#else
1990 return iemMemFlatMapDataU128WoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
1991#endif
1992}
1993
1994
1995/**
1996 * Used by TB code to map unsigned 128-bit data readonly w/ flat address.
1997 */
1998IEM_DECL_NATIVE_HLP_DEF(RTUINT128U const *, iemNativeHlpMemFlatMapDataU128Ro,(PVMCPUCC pVCpu, uint8_t *pbUnmapInfo, RTGCPTR GCPtrMem))
1999{
2000#ifdef IEMNATIVE_WITH_TLB_LOOKUP_MAPPED
2001 return iemMemMapDataU128RoSafeJmp(pVCpu, pbUnmapInfo, UINT8_MAX, GCPtrMem);
2002#else
2003 return iemMemFlatMapDataU128RoJmp(pVCpu, pbUnmapInfo, GCPtrMem);
2004#endif
2005}
2006
2007
2008/*********************************************************************************************************************************
2009* Helpers: Commit, rollback & unmap *
2010*********************************************************************************************************************************/
2011
2012/**
2013 * Used by TB code to commit and unmap a read-write memory mapping.
2014 */
2015IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapAtomic,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2016{
2017 return iemMemCommitAndUnmapAtSafeJmp(pVCpu, bUnmapInfo);
2018}
2019
2020
2021/**
2022 * Used by TB code to commit and unmap a read-write memory mapping.
2023 */
2024IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRw,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2025{
2026 return iemMemCommitAndUnmapRwSafeJmp(pVCpu, bUnmapInfo);
2027}
2028
2029
2030/**
2031 * Used by TB code to commit and unmap a write-only memory mapping.
2032 */
2033IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapWo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2034{
2035 return iemMemCommitAndUnmapWoSafeJmp(pVCpu, bUnmapInfo);
2036}
2037
2038
2039/**
2040 * Used by TB code to commit and unmap a read-only memory mapping.
2041 */
2042IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCommitAndUnmapRo,(PVMCPUCC pVCpu, uint8_t bUnmapInfo))
2043{
2044 return iemMemCommitAndUnmapRoSafeJmp(pVCpu, bUnmapInfo);
2045}
2046
2047
2048/**
2049 * Reinitializes the native recompiler state.
2050 *
2051 * Called before starting a new recompile job.
2052 */
2053static PIEMRECOMPILERSTATE iemNativeReInit(PIEMRECOMPILERSTATE pReNative, PCIEMTB pTb)
2054{
2055 pReNative->cLabels = 0;
2056 pReNative->bmLabelTypes = 0;
2057 pReNative->cFixups = 0;
2058#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
2059 pReNative->cTbExitFixups = 0;
2060#endif
2061#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2062 pReNative->pDbgInfo->cEntries = 0;
2063 pReNative->pDbgInfo->offNativeLast = UINT32_MAX;
2064#endif
2065 pReNative->pTbOrg = pTb;
2066 pReNative->cCondDepth = 0;
2067 pReNative->uCondSeqNo = 0;
2068 pReNative->uCheckIrqSeqNo = 0;
2069 pReNative->uTlbSeqNo = 0;
2070
2071#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2072 pReNative->Core.offPc = 0;
2073# if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || defined(VBOX_WITH_STATISTICS)
2074 pReNative->idxInstrPlusOneOfLastPcUpdate = 0;
2075# endif
2076# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
2077 pReNative->Core.fDebugPcInitialized = false;
2078# endif
2079#endif
2080#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2081 pReNative->fSimdRaiseXcptChecksEmitted = 0;
2082#endif
2083 pReNative->Core.bmHstRegs = IEMNATIVE_REG_FIXED_MASK
2084#if IEMNATIVE_HST_GREG_COUNT < 32
2085 | ~(RT_BIT(IEMNATIVE_HST_GREG_COUNT) - 1U)
2086#endif
2087 ;
2088 pReNative->Core.bmHstRegsWithGstShadow = 0;
2089 pReNative->Core.bmGstRegShadows = 0;
2090#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2091 pReNative->Core.bmGstRegShadowDirty = 0;
2092#endif
2093 pReNative->Core.bmVars = 0;
2094 pReNative->Core.bmStack = 0;
2095 AssertCompile(sizeof(pReNative->Core.bmStack) * 8 == IEMNATIVE_FRAME_VAR_SLOTS); /* Must set reserved slots to 1 otherwise. */
2096 pReNative->Core.u64ArgVars = UINT64_MAX;
2097
2098 pReNative->aidxUniqueLabels[0] = UINT32_MAX;
2099 pReNative->aidxUniqueLabels[1] = UINT32_MAX;
2100 pReNative->aidxUniqueLabels[2] = UINT32_MAX;
2101 pReNative->aidxUniqueLabels[3] = UINT32_MAX;
2102 pReNative->aidxUniqueLabels[4] = UINT32_MAX;
2103 pReNative->aidxUniqueLabels[5] = UINT32_MAX;
2104 pReNative->aidxUniqueLabels[6] = UINT32_MAX;
2105 pReNative->aidxUniqueLabels[7] = UINT32_MAX;
2106 pReNative->aidxUniqueLabels[8] = UINT32_MAX;
2107 pReNative->aidxUniqueLabels[9] = UINT32_MAX;
2108 pReNative->aidxUniqueLabels[10] = UINT32_MAX;
2109 pReNative->aidxUniqueLabels[11] = UINT32_MAX;
2110 pReNative->aidxUniqueLabels[12] = UINT32_MAX;
2111 pReNative->aidxUniqueLabels[13] = UINT32_MAX;
2112 pReNative->aidxUniqueLabels[14] = UINT32_MAX;
2113 pReNative->aidxUniqueLabels[15] = UINT32_MAX;
2114 pReNative->aidxUniqueLabels[16] = UINT32_MAX;
2115 pReNative->aidxUniqueLabels[17] = UINT32_MAX;
2116 pReNative->aidxUniqueLabels[18] = UINT32_MAX;
2117 pReNative->aidxUniqueLabels[19] = UINT32_MAX;
2118 pReNative->aidxUniqueLabels[20] = UINT32_MAX;
2119 pReNative->aidxUniqueLabels[21] = UINT32_MAX;
2120 pReNative->aidxUniqueLabels[22] = UINT32_MAX;
2121#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
2122 pReNative->aidxUniqueLabels[23] = UINT32_MAX;
2123 AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 24);
2124#else
2125 AssertCompile(RT_ELEMENTS(pReNative->aidxUniqueLabels) == 23);
2126#endif
2127
2128 pReNative->idxLastCheckIrqCallNo = UINT32_MAX;
2129
2130 /* Full host register reinit: */
2131 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstRegs); i++)
2132 {
2133 pReNative->Core.aHstRegs[i].fGstRegShadows = 0;
2134 pReNative->Core.aHstRegs[i].enmWhat = kIemNativeWhat_Invalid;
2135 pReNative->Core.aHstRegs[i].idxVar = UINT8_MAX;
2136 }
2137
2138 uint32_t fRegs = IEMNATIVE_REG_FIXED_MASK
2139 & ~( RT_BIT_32(IEMNATIVE_REG_FIXED_PVMCPU)
2140#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2141 | RT_BIT_32(IEMNATIVE_REG_FIXED_PCPUMCTX)
2142#endif
2143#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2144 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP0)
2145#endif
2146#ifdef IEMNATIVE_REG_FIXED_TMP1
2147 | RT_BIT_32(IEMNATIVE_REG_FIXED_TMP1)
2148#endif
2149#ifdef IEMNATIVE_REG_FIXED_PC_DBG
2150 | RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
2151#endif
2152 );
2153 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
2154 {
2155 fRegs &= ~RT_BIT_32(idxReg);
2156 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_FixedReserved;
2157 }
2158
2159 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PVMCPU].enmWhat = kIemNativeWhat_pVCpuFixed;
2160#ifdef IEMNATIVE_REG_FIXED_PCPUMCTX
2161 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PCPUMCTX].enmWhat = kIemNativeWhat_pCtxFixed;
2162#endif
2163#ifdef IEMNATIVE_REG_FIXED_TMP0
2164 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
2165#endif
2166#ifdef IEMNATIVE_REG_FIXED_TMP1
2167 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_TMP1].enmWhat = kIemNativeWhat_FixedTmp;
2168#endif
2169#ifdef IEMNATIVE_REG_FIXED_PC_DBG
2170 pReNative->Core.aHstRegs[IEMNATIVE_REG_FIXED_PC_DBG].enmWhat = kIemNativeWhat_PcShadow;
2171#endif
2172
2173#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2174 pReNative->Core.bmHstSimdRegs = IEMNATIVE_SIMD_REG_FIXED_MASK
2175# if IEMNATIVE_HST_SIMD_REG_COUNT < 32
2176 | ~(RT_BIT(IEMNATIVE_HST_SIMD_REG_COUNT) - 1U)
2177# endif
2178 ;
2179 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
2180 pReNative->Core.bmGstSimdRegShadows = 0;
2181 pReNative->Core.bmGstSimdRegShadowDirtyLo128 = 0;
2182 pReNative->Core.bmGstSimdRegShadowDirtyHi128 = 0;
2183
2184 /* Full host register reinit: */
2185 for (unsigned i = 0; i < RT_ELEMENTS(pReNative->Core.aHstSimdRegs); i++)
2186 {
2187 pReNative->Core.aHstSimdRegs[i].fGstRegShadows = 0;
2188 pReNative->Core.aHstSimdRegs[i].enmWhat = kIemNativeWhat_Invalid;
2189 pReNative->Core.aHstSimdRegs[i].idxVar = UINT8_MAX;
2190 pReNative->Core.aHstSimdRegs[i].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
2191 }
2192
2193 fRegs = IEMNATIVE_SIMD_REG_FIXED_MASK;
2194 for (uint32_t idxReg = ASMBitFirstSetU32(fRegs) - 1; fRegs != 0; idxReg = ASMBitFirstSetU32(fRegs) - 1)
2195 {
2196 fRegs &= ~RT_BIT_32(idxReg);
2197 pReNative->Core.aHstSimdRegs[idxReg].enmWhat = kIemNativeWhat_FixedReserved;
2198 }
2199
2200#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
2201 pReNative->Core.aHstSimdRegs[IEMNATIVE_SIMD_REG_FIXED_TMP0].enmWhat = kIemNativeWhat_FixedTmp;
2202#endif
2203
2204#endif
2205
2206 return pReNative;
2207}
2208
2209
2210/**
2211 * Used when done emitting the per-chunk code and for iemNativeInit bailout.
2212 */
2213static void iemNativeTerm(PIEMRECOMPILERSTATE pReNative)
2214{
2215 RTMemFree(pReNative->pInstrBuf);
2216 RTMemFree(pReNative->paLabels);
2217 RTMemFree(pReNative->paFixups);
2218#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
2219 RTMemFree(pReNative->paTbExitFixups);
2220#endif
2221#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2222 RTMemFree(pReNative->pDbgInfo);
2223#endif
2224 RTMemFree(pReNative);
2225}
2226
2227
2228/**
2229 * Allocates and initializes the native recompiler state.
2230 *
2231 * This is called the first time an EMT wants to recompile something.
2232 *
2233 * @returns Pointer to the new recompiler state.
2234 * @param pVCpu The cross context virtual CPU structure of the calling
2235 * thread.
2236 * @param pTb The TB that's about to be recompiled. When this is NULL,
2237 * the recompiler state is for emitting the common per-chunk
2238 * code from iemNativeRecompileAttachExecMemChunkCtx.
2239 * @thread EMT(pVCpu)
2240 */
2241static PIEMRECOMPILERSTATE iemNativeInit(PVMCPUCC pVCpu, PCIEMTB pTb)
2242{
2243 VMCPU_ASSERT_EMT(pVCpu);
2244
2245 PIEMRECOMPILERSTATE pReNative = (PIEMRECOMPILERSTATE)RTMemAllocZ(sizeof(*pReNative));
2246 AssertReturn(pReNative, NULL);
2247
2248 /*
2249 * Try allocate all the buffers and stuff we need.
2250 */
2251 uint32_t const cFactor = pTb ? 1 : 32 /* per-chunk stuff doesn't really need anything but the code buffer */;
2252 pReNative->pInstrBuf = (PIEMNATIVEINSTR)RTMemAllocZ(_64K);
2253 pReNative->paLabels = (PIEMNATIVELABEL)RTMemAllocZ(sizeof(IEMNATIVELABEL) * _8K / cFactor);
2254 pReNative->paFixups = (PIEMNATIVEFIXUP)RTMemAllocZ(sizeof(IEMNATIVEFIXUP) * _16K / cFactor);
2255#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
2256 pReNative->paTbExitFixups = (PIEMNATIVEEXITFIXUP)RTMemAllocZ(sizeof(IEMNATIVEEXITFIXUP) * _8K / cFactor);
2257#endif
2258#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2259 pReNative->pDbgInfo = (PIEMTBDBG)RTMemAllocZ(RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[_16K / cFactor]));
2260#endif
2261 if (RT_LIKELY( pReNative->pInstrBuf
2262 && pReNative->paLabels
2263 && pReNative->paFixups)
2264#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
2265 && pReNative->paTbExitFixups
2266#endif
2267#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2268 && pReNative->pDbgInfo
2269#endif
2270 )
2271 {
2272 /*
2273 * Set the buffer & array sizes on success.
2274 */
2275 pReNative->cInstrBufAlloc = _64K / sizeof(IEMNATIVEINSTR);
2276 pReNative->cLabelsAlloc = _8K / cFactor;
2277 pReNative->cFixupsAlloc = _16K / cFactor;
2278#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
2279 pReNative->cTbExitFixupsAlloc = _8K / cFactor;
2280#endif
2281#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2282 pReNative->cDbgInfoAlloc = _16K / cFactor;
2283#endif
2284
2285 /* Other constant stuff: */
2286 pReNative->pVCpu = pVCpu;
2287
2288 /*
2289 * Done, just reinit it.
2290 */
2291 return iemNativeReInit(pReNative, pTb);
2292 }
2293
2294 /*
2295 * Failed. Cleanup and return.
2296 */
2297 AssertFailed();
2298 iemNativeTerm(pReNative);
2299 return NULL;
2300}
2301
2302
2303/**
2304 * Creates a label
2305 *
2306 * If the label does not yet have a defined position,
2307 * call iemNativeLabelDefine() later to set it.
2308 *
2309 * @returns Label ID. Throws VBox status code on failure, so no need to check
2310 * the return value.
2311 * @param pReNative The native recompile state.
2312 * @param enmType The label type.
2313 * @param offWhere The instruction offset of the label. UINT32_MAX if the
2314 * label is not yet defined (default).
2315 * @param uData Data associated with the lable. Only applicable to
2316 * certain type of labels. Default is zero.
2317 */
2318DECL_HIDDEN_THROW(uint32_t)
2319iemNativeLabelCreate(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
2320 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/)
2321{
2322 Assert(uData == 0 || enmType >= kIemNativeLabelType_FirstWithMultipleInstances);
2323#if defined(IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE) && defined(RT_ARCH_AMD64)
2324 Assert(enmType >= kIemNativeLabelType_LoopJumpTarget);
2325#endif
2326
2327 /*
2328 * Locate existing label definition.
2329 *
2330 * This is only allowed for forward declarations where offWhere=UINT32_MAX
2331 * and uData is zero.
2332 */
2333 PIEMNATIVELABEL paLabels = pReNative->paLabels;
2334 uint32_t const cLabels = pReNative->cLabels;
2335 if ( pReNative->bmLabelTypes & RT_BIT_64(enmType)
2336#ifndef VBOX_STRICT
2337 && enmType < kIemNativeLabelType_FirstWithMultipleInstances
2338 && offWhere == UINT32_MAX
2339 && uData == 0
2340#endif
2341 )
2342 {
2343#ifndef VBOX_STRICT
2344 AssertStmt(enmType > kIemNativeLabelType_Invalid && enmType < kIemNativeLabelType_FirstWithMultipleInstances,
2345 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2346 uint32_t const idxLabel = pReNative->aidxUniqueLabels[enmType];
2347 if (idxLabel < pReNative->cLabels)
2348 return idxLabel;
2349#else
2350 for (uint32_t i = 0; i < cLabels; i++)
2351 if ( paLabels[i].enmType == enmType
2352 && paLabels[i].uData == uData)
2353 {
2354 AssertStmt(uData == 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2355 AssertStmt(offWhere == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2356 AssertStmt(paLabels[i].off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_2));
2357 AssertStmt(enmType < kIemNativeLabelType_FirstWithMultipleInstances && pReNative->aidxUniqueLabels[enmType] == i,
2358 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2359 return i;
2360 }
2361 AssertStmt( enmType >= kIemNativeLabelType_FirstWithMultipleInstances
2362 || pReNative->aidxUniqueLabels[enmType] == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_1));
2363#endif
2364 }
2365
2366 /*
2367 * Make sure we've got room for another label.
2368 */
2369 if (RT_LIKELY(cLabels < pReNative->cLabelsAlloc))
2370 { /* likely */ }
2371 else
2372 {
2373 uint32_t cNew = pReNative->cLabelsAlloc;
2374 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
2375 AssertStmt(cLabels == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_3));
2376 cNew *= 2;
2377 AssertStmt(cNew <= _64K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_TOO_MANY)); /* IEMNATIVEFIXUP::idxLabel type restrict this */
2378 paLabels = (PIEMNATIVELABEL)RTMemRealloc(paLabels, cNew * sizeof(paLabels[0]));
2379 AssertStmt(paLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_OUT_OF_MEMORY));
2380 pReNative->paLabels = paLabels;
2381 pReNative->cLabelsAlloc = cNew;
2382 }
2383
2384 /*
2385 * Define a new label.
2386 */
2387 paLabels[cLabels].off = offWhere;
2388 paLabels[cLabels].enmType = enmType;
2389 paLabels[cLabels].uData = uData;
2390 pReNative->cLabels = cLabels + 1;
2391
2392 Assert((unsigned)enmType < 64);
2393 pReNative->bmLabelTypes |= RT_BIT_64(enmType);
2394
2395 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
2396 {
2397 Assert(uData == 0);
2398 pReNative->aidxUniqueLabels[enmType] = cLabels;
2399 }
2400
2401 if (offWhere != UINT32_MAX)
2402 {
2403#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2404 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
2405 iemNativeDbgInfoAddLabel(pReNative, enmType, uData);
2406#endif
2407 }
2408 return cLabels;
2409}
2410
2411
2412/**
2413 * Defines the location of an existing label.
2414 *
2415 * @param pReNative The native recompile state.
2416 * @param idxLabel The label to define.
2417 * @param offWhere The position.
2418 */
2419DECL_HIDDEN_THROW(void) iemNativeLabelDefine(PIEMRECOMPILERSTATE pReNative, uint32_t idxLabel, uint32_t offWhere)
2420{
2421 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_4));
2422 PIEMNATIVELABEL const pLabel = &pReNative->paLabels[idxLabel];
2423 AssertStmt(pLabel->off == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_5));
2424 pLabel->off = offWhere;
2425#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2426 iemNativeDbgInfoAddNativeOffset(pReNative, offWhere);
2427 iemNativeDbgInfoAddLabel(pReNative, (IEMNATIVELABELTYPE)pLabel->enmType, pLabel->uData);
2428#endif
2429}
2430
2431
2432/**
2433 * Looks up a lable.
2434 *
2435 * @returns Label ID if found, UINT32_MAX if not.
2436 */
2437DECLHIDDEN(uint32_t) iemNativeLabelFind(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType,
2438 uint32_t offWhere /*= UINT32_MAX*/, uint16_t uData /*= 0*/) RT_NOEXCEPT
2439{
2440 Assert((unsigned)enmType < 64);
2441 if (RT_BIT_64(enmType) & pReNative->bmLabelTypes)
2442 {
2443 if (enmType < kIemNativeLabelType_FirstWithMultipleInstances)
2444 return pReNative->aidxUniqueLabels[enmType];
2445
2446 PIEMNATIVELABEL paLabels = pReNative->paLabels;
2447 uint32_t const cLabels = pReNative->cLabels;
2448 for (uint32_t i = 0; i < cLabels; i++)
2449 if ( paLabels[i].enmType == enmType
2450 && paLabels[i].uData == uData
2451 && ( paLabels[i].off == offWhere
2452 || offWhere == UINT32_MAX
2453 || paLabels[i].off == UINT32_MAX))
2454 return i;
2455 }
2456 return UINT32_MAX;
2457}
2458
2459
2460/**
2461 * Adds a fixup.
2462 *
2463 * @throws VBox status code (int) on failure.
2464 * @param pReNative The native recompile state.
2465 * @param offWhere The instruction offset of the fixup location.
2466 * @param idxLabel The target label ID for the fixup.
2467 * @param enmType The fixup type.
2468 * @param offAddend Fixup addend if applicable to the type. Default is 0.
2469 */
2470DECL_HIDDEN_THROW(void)
2471iemNativeAddFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, uint32_t idxLabel,
2472 IEMNATIVEFIXUPTYPE enmType, int8_t offAddend /*= 0*/)
2473{
2474 Assert(idxLabel <= UINT16_MAX);
2475 Assert((unsigned)enmType <= UINT8_MAX);
2476#ifdef RT_ARCH_ARM64
2477 AssertStmt( enmType != kIemNativeFixupType_RelImm14At5
2478 || pReNative->paLabels[idxLabel].enmType >= kIemNativeLabelType_LastWholeTbBranch,
2479 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_SHORT_JMP_TO_TAIL_LABEL));
2480#endif
2481
2482 /*
2483 * Make sure we've room.
2484 */
2485 PIEMNATIVEFIXUP paFixups = pReNative->paFixups;
2486 uint32_t const cFixups = pReNative->cFixups;
2487 if (RT_LIKELY(cFixups < pReNative->cFixupsAlloc))
2488 { /* likely */ }
2489 else
2490 {
2491 uint32_t cNew = pReNative->cFixupsAlloc;
2492 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2493 AssertStmt(cFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2494 cNew *= 2;
2495 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
2496 paFixups = (PIEMNATIVEFIXUP)RTMemRealloc(paFixups, cNew * sizeof(paFixups[0]));
2497 AssertStmt(paFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
2498 pReNative->paFixups = paFixups;
2499 pReNative->cFixupsAlloc = cNew;
2500 }
2501
2502 /*
2503 * Add the fixup.
2504 */
2505 paFixups[cFixups].off = offWhere;
2506 paFixups[cFixups].idxLabel = (uint16_t)idxLabel;
2507 paFixups[cFixups].enmType = enmType;
2508 paFixups[cFixups].offAddend = offAddend;
2509 pReNative->cFixups = cFixups + 1;
2510}
2511
2512
2513#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
2514/**
2515 * Adds a fixup to the per chunk tail code.
2516 *
2517 * @throws VBox status code (int) on failure.
2518 * @param pReNative The native recompile state.
2519 * @param offWhere The instruction offset of the fixup location.
2520 * @param enmExitReason The exit reason to jump to.
2521 */
2522DECL_HIDDEN_THROW(void)
2523iemNativeAddTbExitFixup(PIEMRECOMPILERSTATE pReNative, uint32_t offWhere, IEMNATIVELABELTYPE enmExitReason)
2524{
2525 Assert(IEMNATIVELABELTYPE_IS_EXIT_REASON(enmExitReason));
2526
2527 /*
2528 * Make sure we've room.
2529 */
2530 PIEMNATIVEEXITFIXUP paTbExitFixups = pReNative->paTbExitFixups;
2531 uint32_t const cTbExitFixups = pReNative->cTbExitFixups;
2532 if (RT_LIKELY(cTbExitFixups < pReNative->cTbExitFixupsAlloc))
2533 { /* likely */ }
2534 else
2535 {
2536 uint32_t cNew = pReNative->cTbExitFixupsAlloc;
2537 AssertStmt(cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2538 AssertStmt(cTbExitFixups == cNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_IPE_1));
2539 cNew *= 2;
2540 AssertStmt(cNew <= _128K, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_TOO_MANY));
2541 paTbExitFixups = (PIEMNATIVEEXITFIXUP)RTMemRealloc(paTbExitFixups, cNew * sizeof(paTbExitFixups[0]));
2542 AssertStmt(paTbExitFixups, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_FIXUP_OUT_OF_MEMORY));
2543 pReNative->paTbExitFixups = paTbExitFixups;
2544 pReNative->cTbExitFixupsAlloc = cNew;
2545 }
2546
2547 /*
2548 * Add the fixup.
2549 */
2550 paTbExitFixups[cTbExitFixups].off = offWhere;
2551 paTbExitFixups[cTbExitFixups].enmExitReason = enmExitReason;
2552 pReNative->cTbExitFixups = cTbExitFixups + 1;
2553}
2554#endif
2555
2556
2557/**
2558 * Slow code path for iemNativeInstrBufEnsure.
2559 */
2560DECL_HIDDEN_THROW(PIEMNATIVEINSTR) iemNativeInstrBufEnsureSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t cInstrReq)
2561{
2562 /* Double the buffer size till we meet the request. */
2563 uint32_t cNew = pReNative->cInstrBufAlloc;
2564 AssertStmt(cNew > 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_INTERNAL_ERROR_5)); /* impossible */
2565 do
2566 cNew *= 2;
2567 while (cNew < off + cInstrReq);
2568
2569 uint32_t const cbNew = cNew * sizeof(IEMNATIVEINSTR);
2570#ifdef RT_ARCH_ARM64
2571 uint32_t const cbMaxInstrBuf = _1M; /* Limited by the branch instruction range (18+2 bits). */
2572#else
2573 uint32_t const cbMaxInstrBuf = _2M;
2574#endif
2575 AssertStmt(cbNew <= cbMaxInstrBuf, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_TOO_LARGE));
2576
2577 void *pvNew = RTMemRealloc(pReNative->pInstrBuf, cbNew);
2578 AssertStmt(pvNew, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_INSTR_BUF_OUT_OF_MEMORY));
2579
2580#ifdef VBOX_STRICT
2581 pReNative->offInstrBufChecked = off + cInstrReq;
2582#endif
2583 pReNative->cInstrBufAlloc = cNew;
2584 return pReNative->pInstrBuf = (PIEMNATIVEINSTR)pvNew;
2585}
2586
2587#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
2588
2589/**
2590 * Grows the static debug info array used during recompilation.
2591 *
2592 * @returns Pointer to the new debug info block; throws VBox status code on
2593 * failure, so no need to check the return value.
2594 */
2595DECL_NO_INLINE(static, PIEMTBDBG) iemNativeDbgInfoGrow(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
2596{
2597 uint32_t cNew = pReNative->cDbgInfoAlloc * 2;
2598 AssertStmt(cNew < _1M && cNew != 0, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_1));
2599 pDbgInfo = (PIEMTBDBG)RTMemRealloc(pDbgInfo, RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[cNew]));
2600 AssertStmt(pDbgInfo, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_OUT_OF_MEMORY));
2601 pReNative->pDbgInfo = pDbgInfo;
2602 pReNative->cDbgInfoAlloc = cNew;
2603 return pDbgInfo;
2604}
2605
2606
2607/**
2608 * Adds a new debug info uninitialized entry, returning the pointer to it.
2609 */
2610DECL_INLINE_THROW(PIEMTBDBGENTRY) iemNativeDbgInfoAddNewEntry(PIEMRECOMPILERSTATE pReNative, PIEMTBDBG pDbgInfo)
2611{
2612 if (RT_LIKELY(pDbgInfo->cEntries < pReNative->cDbgInfoAlloc))
2613 { /* likely */ }
2614 else
2615 pDbgInfo = iemNativeDbgInfoGrow(pReNative, pDbgInfo);
2616 return &pDbgInfo->aEntries[pDbgInfo->cEntries++];
2617}
2618
2619
2620/**
2621 * Debug Info: Adds a native offset record, if necessary.
2622 */
2623DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddNativeOffset(PIEMRECOMPILERSTATE pReNative, uint32_t off)
2624{
2625 PIEMTBDBG pDbgInfo = pReNative->pDbgInfo;
2626
2627 /*
2628 * Do we need this one?
2629 */
2630 uint32_t const offPrev = pDbgInfo->offNativeLast;
2631 if (offPrev == off)
2632 return;
2633 AssertStmt(offPrev < off || offPrev == UINT32_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_DBGINFO_IPE_2));
2634
2635 /*
2636 * Add it.
2637 */
2638 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pDbgInfo);
2639 pEntry->NativeOffset.uType = kIemTbDbgEntryType_NativeOffset;
2640 pEntry->NativeOffset.offNative = off;
2641 pDbgInfo->offNativeLast = off;
2642}
2643
2644
2645/**
2646 * Debug Info: Record info about a label.
2647 */
2648static void iemNativeDbgInfoAddLabel(PIEMRECOMPILERSTATE pReNative, IEMNATIVELABELTYPE enmType, uint16_t uData)
2649{
2650 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2651 pEntry->Label.uType = kIemTbDbgEntryType_Label;
2652 pEntry->Label.uUnused = 0;
2653 pEntry->Label.enmLabel = (uint8_t)enmType;
2654 pEntry->Label.uData = uData;
2655}
2656
2657
2658/**
2659 * Debug Info: Record info about a threaded call.
2660 */
2661static void iemNativeDbgInfoAddThreadedCall(PIEMRECOMPILERSTATE pReNative, IEMTHREADEDFUNCS enmCall, bool fRecompiled)
2662{
2663 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2664 pEntry->ThreadedCall.uType = kIemTbDbgEntryType_ThreadedCall;
2665 pEntry->ThreadedCall.fRecompiled = fRecompiled;
2666 pEntry->ThreadedCall.uUnused = 0;
2667 pEntry->ThreadedCall.enmCall = (uint16_t)enmCall;
2668}
2669
2670
2671/**
2672 * Debug Info: Record info about a new guest instruction.
2673 */
2674static void iemNativeDbgInfoAddGuestInstruction(PIEMRECOMPILERSTATE pReNative, uint32_t fExec)
2675{
2676 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2677 pEntry->GuestInstruction.uType = kIemTbDbgEntryType_GuestInstruction;
2678 pEntry->GuestInstruction.uUnused = 0;
2679 pEntry->GuestInstruction.fExec = fExec;
2680}
2681
2682
2683/**
2684 * Debug Info: Record info about guest register shadowing.
2685 */
2686DECL_HIDDEN_THROW(void)
2687iemNativeDbgInfoAddGuestRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTREG enmGstReg,
2688 uint8_t idxHstReg /*= UINT8_MAX*/, uint8_t idxHstRegPrev /*= UINT8_MAX*/)
2689{
2690 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2691 pEntry->GuestRegShadowing.uType = kIemTbDbgEntryType_GuestRegShadowing;
2692 pEntry->GuestRegShadowing.uUnused = 0;
2693 pEntry->GuestRegShadowing.idxGstReg = enmGstReg;
2694 pEntry->GuestRegShadowing.idxHstReg = idxHstReg;
2695 pEntry->GuestRegShadowing.idxHstRegPrev = idxHstRegPrev;
2696#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2697 Assert( idxHstReg != UINT8_MAX
2698 || !(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg)));
2699#endif
2700}
2701
2702
2703# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
2704/**
2705 * Debug Info: Record info about guest register shadowing.
2706 */
2707DECL_HIDDEN_THROW(void)
2708iemNativeDbgInfoAddGuestSimdRegShadowing(PIEMRECOMPILERSTATE pReNative, IEMNATIVEGSTSIMDREG enmGstSimdReg,
2709 uint8_t idxHstSimdReg /*= UINT8_MAX*/, uint8_t idxHstSimdRegPrev /*= UINT8_MAX*/)
2710{
2711 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2712 pEntry->GuestSimdRegShadowing.uType = kIemTbDbgEntryType_GuestSimdRegShadowing;
2713 pEntry->GuestSimdRegShadowing.uUnused = 0;
2714 pEntry->GuestSimdRegShadowing.idxGstSimdReg = enmGstSimdReg;
2715 pEntry->GuestSimdRegShadowing.idxHstSimdReg = idxHstSimdReg;
2716 pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev = idxHstSimdRegPrev;
2717}
2718# endif
2719
2720
2721# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
2722/**
2723 * Debug Info: Record info about delayed RIP updates.
2724 */
2725DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddDelayedPcUpdate(PIEMRECOMPILERSTATE pReNative, uint64_t offPc, uint32_t cInstrSkipped)
2726{
2727 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2728 pEntry->DelayedPcUpdate.uType = kIemTbDbgEntryType_DelayedPcUpdate;
2729 pEntry->DelayedPcUpdate.cInstrSkipped = cInstrSkipped;
2730 pEntry->DelayedPcUpdate.offPc = offPc; /** @todo support larger values */
2731}
2732# endif
2733
2734# if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK) || defined(IEMNATIVE_WITH_SIMD_REG_ALLOCATOR)
2735
2736/**
2737 * Debug Info: Record info about a dirty guest register.
2738 */
2739DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddGuestRegDirty(PIEMRECOMPILERSTATE pReNative, bool fSimdReg,
2740 uint8_t idxGstReg, uint8_t idxHstReg)
2741{
2742 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2743 pEntry->GuestRegDirty.uType = kIemTbDbgEntryType_GuestRegDirty;
2744 pEntry->GuestRegDirty.fSimdReg = fSimdReg ? 1 : 0;
2745 pEntry->GuestRegDirty.idxGstReg = idxGstReg;
2746 pEntry->GuestRegDirty.idxHstReg = idxHstReg;
2747}
2748
2749
2750/**
2751 * Debug Info: Record info about a dirty guest register writeback operation.
2752 */
2753DECL_HIDDEN_THROW(void) iemNativeDbgInfoAddGuestRegWriteback(PIEMRECOMPILERSTATE pReNative, bool fSimdReg, uint64_t fGstReg)
2754{
2755 unsigned const cBitsGstRegMask = 25;
2756 uint32_t const fGstRegMask = RT_BIT_32(cBitsGstRegMask) - 1U;
2757
2758 /* The first block of 25 bits: */
2759 if (fGstReg & fGstRegMask)
2760 {
2761 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2762 pEntry->GuestRegWriteback.uType = kIemTbDbgEntryType_GuestRegWriteback;
2763 pEntry->GuestRegWriteback.fSimdReg = fSimdReg ? 1 : 0;
2764 pEntry->GuestRegWriteback.cShift = 0;
2765 pEntry->GuestRegWriteback.fGstReg = (uint32_t)(fGstReg & fGstRegMask);
2766 fGstReg &= ~(uint64_t)fGstRegMask;
2767 if (!fGstReg)
2768 return;
2769 }
2770
2771 /* The second block of 25 bits: */
2772 fGstReg >>= cBitsGstRegMask;
2773 if (fGstReg & fGstRegMask)
2774 {
2775 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2776 pEntry->GuestRegWriteback.uType = kIemTbDbgEntryType_GuestRegWriteback;
2777 pEntry->GuestRegWriteback.fSimdReg = fSimdReg ? 1 : 0;
2778 pEntry->GuestRegWriteback.cShift = 0;
2779 pEntry->GuestRegWriteback.fGstReg = (uint32_t)(fGstReg & fGstRegMask);
2780 fGstReg &= ~(uint64_t)fGstRegMask;
2781 if (!fGstReg)
2782 return;
2783 }
2784
2785 /* The last block with 14 bits: */
2786 fGstReg >>= cBitsGstRegMask;
2787 Assert(fGstReg & fGstRegMask);
2788 Assert((fGstReg & ~(uint64_t)fGstRegMask) == 0);
2789 PIEMTBDBGENTRY const pEntry = iemNativeDbgInfoAddNewEntry(pReNative, pReNative->pDbgInfo);
2790 pEntry->GuestRegWriteback.uType = kIemTbDbgEntryType_GuestRegWriteback;
2791 pEntry->GuestRegWriteback.fSimdReg = fSimdReg ? 1 : 0;
2792 pEntry->GuestRegWriteback.cShift = 2;
2793 pEntry->GuestRegWriteback.fGstReg = (uint32_t)(fGstReg & fGstRegMask);
2794}
2795
2796# endif /* defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK) || defined(IEMNATIVE_WITH_SIMD_REG_ALLOCATOR) */
2797
2798#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
2799
2800
2801/*********************************************************************************************************************************
2802* Register Allocator *
2803*********************************************************************************************************************************/
2804
2805/**
2806 * Register parameter indexes (indexed by argument number).
2807 */
2808DECL_HIDDEN_CONST(uint8_t) const g_aidxIemNativeCallRegs[] =
2809{
2810 IEMNATIVE_CALL_ARG0_GREG,
2811 IEMNATIVE_CALL_ARG1_GREG,
2812 IEMNATIVE_CALL_ARG2_GREG,
2813 IEMNATIVE_CALL_ARG3_GREG,
2814#if defined(IEMNATIVE_CALL_ARG4_GREG)
2815 IEMNATIVE_CALL_ARG4_GREG,
2816# if defined(IEMNATIVE_CALL_ARG5_GREG)
2817 IEMNATIVE_CALL_ARG5_GREG,
2818# if defined(IEMNATIVE_CALL_ARG6_GREG)
2819 IEMNATIVE_CALL_ARG6_GREG,
2820# if defined(IEMNATIVE_CALL_ARG7_GREG)
2821 IEMNATIVE_CALL_ARG7_GREG,
2822# endif
2823# endif
2824# endif
2825#endif
2826};
2827AssertCompile(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
2828
2829/**
2830 * Call register masks indexed by argument count.
2831 */
2832DECL_HIDDEN_CONST(uint32_t) const g_afIemNativeCallRegs[] =
2833{
2834 0,
2835 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG),
2836 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG),
2837 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG),
2838 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2839 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG),
2840#if defined(IEMNATIVE_CALL_ARG4_GREG)
2841 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2842 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG),
2843# if defined(IEMNATIVE_CALL_ARG5_GREG)
2844 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2845 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG),
2846# if defined(IEMNATIVE_CALL_ARG6_GREG)
2847 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2848 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2849 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG),
2850# if defined(IEMNATIVE_CALL_ARG7_GREG)
2851 RT_BIT_32(IEMNATIVE_CALL_ARG0_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG1_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG2_GREG)
2852 | RT_BIT_32(IEMNATIVE_CALL_ARG3_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG4_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG5_GREG)
2853 | RT_BIT_32(IEMNATIVE_CALL_ARG6_GREG) | RT_BIT_32(IEMNATIVE_CALL_ARG7_GREG),
2854# endif
2855# endif
2856# endif
2857#endif
2858};
2859
2860#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
2861/**
2862 * BP offset of the stack argument slots.
2863 *
2864 * This array is indexed by \#argument - IEMNATIVE_CALL_ARG_GREG_COUNT and has
2865 * IEMNATIVE_FRAME_STACK_ARG_COUNT entries.
2866 */
2867DECL_HIDDEN_CONST(int32_t) const g_aoffIemNativeCallStackArgBpDisp[] =
2868{
2869 IEMNATIVE_FP_OFF_STACK_ARG0,
2870# ifdef IEMNATIVE_FP_OFF_STACK_ARG1
2871 IEMNATIVE_FP_OFF_STACK_ARG1,
2872# endif
2873# ifdef IEMNATIVE_FP_OFF_STACK_ARG2
2874 IEMNATIVE_FP_OFF_STACK_ARG2,
2875# endif
2876# ifdef IEMNATIVE_FP_OFF_STACK_ARG3
2877 IEMNATIVE_FP_OFF_STACK_ARG3,
2878# endif
2879};
2880AssertCompile(RT_ELEMENTS(g_aoffIemNativeCallStackArgBpDisp) == IEMNATIVE_FRAME_STACK_ARG_COUNT);
2881#endif /* IEMNATIVE_FP_OFF_STACK_ARG0 */
2882
2883/**
2884 * Info about shadowed guest register values.
2885 * @see IEMNATIVEGSTREG
2886 */
2887DECL_HIDDEN_CONST(IEMANTIVEGSTREGINFO const) g_aGstShadowInfo[] =
2888{
2889#define CPUMCTX_OFF_AND_SIZE(a_Reg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx. a_Reg), RT_SIZEOFMEMB(VMCPU, cpum.GstCtx. a_Reg)
2890 /* [kIemNativeGstReg_GprFirst + X86_GREG_xAX] = */ { CPUMCTX_OFF_AND_SIZE(rax), "rax", },
2891 /* [kIemNativeGstReg_GprFirst + X86_GREG_xCX] = */ { CPUMCTX_OFF_AND_SIZE(rcx), "rcx", },
2892 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDX] = */ { CPUMCTX_OFF_AND_SIZE(rdx), "rdx", },
2893 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBX] = */ { CPUMCTX_OFF_AND_SIZE(rbx), "rbx", },
2894 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSP] = */ { CPUMCTX_OFF_AND_SIZE(rsp), "rsp", },
2895 /* [kIemNativeGstReg_GprFirst + X86_GREG_xBP] = */ { CPUMCTX_OFF_AND_SIZE(rbp), "rbp", },
2896 /* [kIemNativeGstReg_GprFirst + X86_GREG_xSI] = */ { CPUMCTX_OFF_AND_SIZE(rsi), "rsi", },
2897 /* [kIemNativeGstReg_GprFirst + X86_GREG_xDI] = */ { CPUMCTX_OFF_AND_SIZE(rdi), "rdi", },
2898 /* [kIemNativeGstReg_GprFirst + X86_GREG_x8 ] = */ { CPUMCTX_OFF_AND_SIZE(r8), "r8", },
2899 /* [kIemNativeGstReg_GprFirst + X86_GREG_x9 ] = */ { CPUMCTX_OFF_AND_SIZE(r9), "r9", },
2900 /* [kIemNativeGstReg_GprFirst + X86_GREG_x10] = */ { CPUMCTX_OFF_AND_SIZE(r10), "r10", },
2901 /* [kIemNativeGstReg_GprFirst + X86_GREG_x11] = */ { CPUMCTX_OFF_AND_SIZE(r11), "r11", },
2902 /* [kIemNativeGstReg_GprFirst + X86_GREG_x12] = */ { CPUMCTX_OFF_AND_SIZE(r12), "r12", },
2903 /* [kIemNativeGstReg_GprFirst + X86_GREG_x13] = */ { CPUMCTX_OFF_AND_SIZE(r13), "r13", },
2904 /* [kIemNativeGstReg_GprFirst + X86_GREG_x14] = */ { CPUMCTX_OFF_AND_SIZE(r14), "r14", },
2905 /* [kIemNativeGstReg_GprFirst + X86_GREG_x15] = */ { CPUMCTX_OFF_AND_SIZE(r15), "r15", },
2906 /* [kIemNativeGstReg_Pc] = */ { CPUMCTX_OFF_AND_SIZE(rip), "rip", },
2907 /* [kIemNativeGstReg_Cr0] = */ { CPUMCTX_OFF_AND_SIZE(cr0), "cr0", },
2908 /* [kIemNativeGstReg_FpuFcw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FCW), "fcw", },
2909 /* [kIemNativeGstReg_FpuFsw] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.FSW), "fsw", },
2910 /* [kIemNativeGstReg_SegBaseFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u64Base), "es_base", },
2911 /* [kIemNativeGstReg_SegBaseFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u64Base), "cs_base", },
2912 /* [kIemNativeGstReg_SegBaseFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u64Base), "ss_base", },
2913 /* [kIemNativeGstReg_SegBaseFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u64Base), "ds_base", },
2914 /* [kIemNativeGstReg_SegBaseFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u64Base), "fs_base", },
2915 /* [kIemNativeGstReg_SegBaseFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u64Base), "gs_base", },
2916 /* [kIemNativeGstReg_SegAttribFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Attr.u), "es_attrib", },
2917 /* [kIemNativeGstReg_SegAttribFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Attr.u), "cs_attrib", },
2918 /* [kIemNativeGstReg_SegAttribFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Attr.u), "ss_attrib", },
2919 /* [kIemNativeGstReg_SegAttribFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Attr.u), "ds_attrib", },
2920 /* [kIemNativeGstReg_SegAttribFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Attr.u), "fs_attrib", },
2921 /* [kIemNativeGstReg_SegAttribFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Attr.u), "gs_attrib", },
2922 /* [kIemNativeGstReg_SegLimitFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].u32Limit), "es_limit", },
2923 /* [kIemNativeGstReg_SegLimitFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].u32Limit), "cs_limit", },
2924 /* [kIemNativeGstReg_SegLimitFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].u32Limit), "ss_limit", },
2925 /* [kIemNativeGstReg_SegLimitFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].u32Limit), "ds_limit", },
2926 /* [kIemNativeGstReg_SegLimitFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].u32Limit), "fs_limit", },
2927 /* [kIemNativeGstReg_SegLimitFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].u32Limit), "gs_limit", },
2928 /* [kIemNativeGstReg_SegSelFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[0].Sel), "es", },
2929 /* [kIemNativeGstReg_SegSelFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[1].Sel), "cs", },
2930 /* [kIemNativeGstReg_SegSelFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[2].Sel), "ss", },
2931 /* [kIemNativeGstReg_SegSelFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[3].Sel), "ds", },
2932 /* [kIemNativeGstReg_SegSelFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[4].Sel), "fs", },
2933 /* [kIemNativeGstReg_SegSelFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(aSRegs[5].Sel), "gs", },
2934 /* [kIemNativeGstReg_Cr4] = */ { CPUMCTX_OFF_AND_SIZE(cr4), "cr4", },
2935 /* [kIemNativeGstReg_Xcr0] = */ { CPUMCTX_OFF_AND_SIZE(aXcr[0]), "xcr0", },
2936 /* [kIemNativeGstReg_MxCsr] = */ { CPUMCTX_OFF_AND_SIZE(XState.x87.MXCSR), "mxcsr", },
2937 /* [kIemNativeGstReg_EFlags] = */ { CPUMCTX_OFF_AND_SIZE(eflags), "eflags", },
2938#undef CPUMCTX_OFF_AND_SIZE
2939};
2940AssertCompile(RT_ELEMENTS(g_aGstShadowInfo) == kIemNativeGstReg_End);
2941
2942
2943/** Host CPU general purpose register names. */
2944DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstRegNames[] =
2945{
2946#ifdef RT_ARCH_AMD64
2947 "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
2948#elif RT_ARCH_ARM64
2949 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
2950 "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", "x24", "x25", "x26", "x27", "x28", "bp", "lr", "sp/xzr",
2951#else
2952# error "port me"
2953#endif
2954};
2955
2956
2957#if 0 /* unused */
2958/**
2959 * Tries to locate a suitable register in the given register mask.
2960 *
2961 * This ASSUMES the caller has done the minimal/optimal allocation checks and
2962 * failed.
2963 *
2964 * @returns Host register number on success, returns UINT8_MAX on failure.
2965 */
2966static uint8_t iemNativeRegTryAllocFree(PIEMRECOMPILERSTATE pReNative, uint32_t fRegMask)
2967{
2968 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
2969 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
2970 if (fRegs)
2971 {
2972 /** @todo pick better here: */
2973 unsigned const idxReg = ASMBitFirstSetU32(fRegs) - 1;
2974
2975 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
2976 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
2977 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
2978 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
2979
2980 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
2981 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
2982 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
2983 return idxReg;
2984 }
2985 return UINT8_MAX;
2986}
2987#endif /* unused */
2988
2989#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
2990
2991/**
2992 * Stores the host reg @a idxHstReg into guest shadow register @a enmGstReg.
2993 *
2994 * @returns New code buffer offset on success, UINT32_MAX on failure.
2995 * @param pReNative .
2996 * @param off The current code buffer position.
2997 * @param enmGstReg The guest register to store to.
2998 * @param idxHstReg The host register to store from.
2999 */
3000DECL_FORCE_INLINE_THROW(uint32_t)
3001iemNativeEmitStoreGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREG enmGstReg, uint8_t idxHstReg)
3002{
3003 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
3004 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
3005
3006 switch (g_aGstShadowInfo[enmGstReg].cb)
3007 {
3008 case sizeof(uint64_t):
3009 return iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3010 case sizeof(uint32_t):
3011 return iemNativeEmitStoreGprToVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3012 case sizeof(uint16_t):
3013 return iemNativeEmitStoreGprToVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3014# if 0 /* not present in the table. */
3015 case sizeof(uint8_t):
3016 return iemNativeEmitStoreGprToVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
3017# endif
3018 default:
3019 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
3020 }
3021}
3022
3023
3024/**
3025 * Emits code to flush a pending write of the given guest register,
3026 * version with alternative core state.
3027 *
3028 * @returns New code buffer offset.
3029 * @param pReNative The native recompile state.
3030 * @param off Current code buffer position.
3031 * @param pCore Alternative core state.
3032 * @param enmGstReg The guest register to flush.
3033 */
3034DECL_HIDDEN_THROW(uint32_t)
3035iemNativeRegFlushPendingWriteEx(PIEMRECOMPILERSTATE pReNative, uint32_t off, PIEMNATIVECORESTATE pCore, IEMNATIVEGSTREG enmGstReg)
3036{
3037 uint8_t const idxHstReg = pCore->aidxGstRegShadows[enmGstReg];
3038
3039 Assert( ( enmGstReg >= kIemNativeGstReg_GprFirst
3040 && enmGstReg <= kIemNativeGstReg_GprLast)
3041 || enmGstReg == kIemNativeGstReg_MxCsr);
3042 Assert( idxHstReg != UINT8_MAX
3043 && pCore->bmGstRegShadowDirty & RT_BIT_64(enmGstReg));
3044 Log12(("iemNativeRegFlushPendingWriteEx: Clearing guest register %s shadowed by host %s (off=%#x)\n",
3045 g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg], off));
3046
3047 off = iemNativeEmitStoreGprWithGstShadowReg(pReNative, off, enmGstReg, idxHstReg);
3048
3049 pCore->bmGstRegShadowDirty &= ~RT_BIT_64(enmGstReg);
3050 return off;
3051}
3052
3053
3054/**
3055 * Emits code to flush a pending write of the given guest register.
3056 *
3057 * @returns New code buffer offset.
3058 * @param pReNative The native recompile state.
3059 * @param off Current code buffer position.
3060 * @param enmGstReg The guest register to flush.
3061 */
3062DECL_HIDDEN_THROW(uint32_t)
3063iemNativeRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTREG enmGstReg)
3064{
3065 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
3066
3067 Assert( ( enmGstReg >= kIemNativeGstReg_GprFirst
3068 && enmGstReg <= kIemNativeGstReg_GprLast)
3069 || enmGstReg == kIemNativeGstReg_MxCsr);
3070 Assert( idxHstReg != UINT8_MAX
3071 && pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg));
3072 Log12(("iemNativeRegFlushPendingWrite: Clearing guest register %s shadowed by host %s (off=%#x)\n",
3073 g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxHstReg], off));
3074
3075 off = iemNativeEmitStoreGprWithGstShadowReg(pReNative, off, enmGstReg, idxHstReg);
3076
3077 pReNative->Core.bmGstRegShadowDirty &= ~RT_BIT_64(enmGstReg);
3078 return off;
3079}
3080
3081
3082/**
3083 * Flush the given set of guest registers if marked as dirty.
3084 *
3085 * @returns New code buffer offset.
3086 * @param pReNative The native recompile state.
3087 * @param off Current code buffer position.
3088 * @param fFlushGstReg The guest register set to flush (default is flush everything).
3089 */
3090DECL_HIDDEN_THROW(uint32_t)
3091iemNativeRegFlushDirtyGuest(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fFlushGstReg /*= UINT64_MAX*/)
3092{
3093 uint64_t bmGstRegShadowDirty = pReNative->Core.bmGstRegShadowDirty & fFlushGstReg;
3094 if (bmGstRegShadowDirty)
3095 {
3096# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3097 iemNativeDbgInfoAddNativeOffset(pReNative, off);
3098 iemNativeDbgInfoAddGuestRegWriteback(pReNative, false /*fSimdReg*/, bmGstRegShadowDirty);
3099# endif
3100 do
3101 {
3102 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadowDirty) - 1;
3103 bmGstRegShadowDirty &= ~RT_BIT_64(idxGstReg);
3104 off = iemNativeRegFlushPendingWrite(pReNative, off, (IEMNATIVEGSTREG)idxGstReg);
3105 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
3106 } while (bmGstRegShadowDirty);
3107 }
3108
3109 return off;
3110}
3111
3112
3113/**
3114 * Flush all shadowed guest registers marked as dirty for the given host register.
3115 *
3116 * @returns New code buffer offset.
3117 * @param pReNative The native recompile state.
3118 * @param off Current code buffer position.
3119 * @param idxHstReg The host register.
3120 *
3121 * @note This doesn't do any unshadowing of guest registers from the host register.
3122 */
3123DECL_HIDDEN_THROW(uint32_t) iemNativeRegFlushDirtyGuestByHostRegShadow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg)
3124{
3125 /* We need to flush any pending guest register writes this host register shadows. */
3126 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3127 if (pReNative->Core.bmGstRegShadowDirty & fGstRegShadows)
3128 {
3129# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3130 iemNativeDbgInfoAddNativeOffset(pReNative, off);
3131 iemNativeDbgInfoAddGuestRegWriteback(pReNative, false /*fSimdReg*/, pReNative->Core.bmGstRegShadowDirty & fGstRegShadows);
3132# endif
3133 uint64_t bmGstRegShadowDirty = pReNative->Core.bmGstRegShadowDirty & fGstRegShadows;
3134 do
3135 {
3136 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadowDirty) - 1;
3137 bmGstRegShadowDirty &= ~RT_BIT_64(idxGstReg);
3138 off = iemNativeRegFlushPendingWrite(pReNative, off, (IEMNATIVEGSTREG)idxGstReg);
3139 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
3140 } while (bmGstRegShadowDirty);
3141 }
3142
3143 return off;
3144}
3145
3146#endif /* IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK */
3147
3148
3149/**
3150 * Locate a register, possibly freeing one up.
3151 *
3152 * This ASSUMES the caller has done the minimal/optimal allocation checks and
3153 * failed.
3154 *
3155 * @returns Host register number on success. Returns UINT8_MAX if no registers
3156 * found, the caller is supposed to deal with this and raise a
3157 * allocation type specific status code (if desired).
3158 *
3159 * @throws VBox status code if we're run into trouble spilling a variable of
3160 * recording debug info. Does NOT throw anything if we're out of
3161 * registers, though.
3162 */
3163static uint8_t iemNativeRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
3164 uint32_t fRegMask = IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK)
3165{
3166 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFree);
3167 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3168 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3169
3170 /*
3171 * Try a freed register that's shadowing a guest register.
3172 */
3173 uint32_t fRegs = ~pReNative->Core.bmHstRegs & fRegMask;
3174 if (fRegs)
3175 {
3176 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeNoVar);
3177
3178#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3179 /*
3180 * When we have livness information, we use it to kick out all shadowed
3181 * guest register that will not be needed any more in this TB. If we're
3182 * lucky, this may prevent us from ending up here again.
3183 *
3184 * Note! We must consider the previous entry here so we don't free
3185 * anything that the current threaded function requires (current
3186 * entry is produced by the next threaded function).
3187 */
3188 uint32_t const idxCurCall = pReNative->idxCurCall;
3189 if (idxCurCall > 0)
3190 {
3191 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
3192
3193# ifndef IEMLIVENESS_EXTENDED_LAYOUT
3194 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
3195 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
3196 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
3197#else
3198 /* Construct a mask of the registers not in the read or write state.
3199 Note! We could skips writes, if they aren't from us, as this is just
3200 a hack to prevent trashing registers that have just been written
3201 or will be written when we retire the current instruction. */
3202 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
3203 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
3204 & IEMLIVENESSBIT_MASK;
3205#endif
3206 /* Merge EFLAGS. */
3207 uint64_t fTmp = fToFreeMask & (fToFreeMask >> 3); /* AF2,PF2,CF2,Other2 = AF,PF,CF,Other & OF,SF,ZF,AF */
3208 fTmp &= fTmp >> 2; /* CF3,Other3 = AF2,PF2 & CF2,Other2 */
3209 fTmp &= fTmp >> 1; /* Other4 = CF3 & Other3 */
3210 fToFreeMask &= RT_BIT_64(kIemNativeGstReg_EFlags) - 1;
3211 fToFreeMask |= fTmp & RT_BIT_64(kIemNativeGstReg_EFlags);
3212
3213 /* If it matches any shadowed registers. */
3214 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
3215 {
3216#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3217 /* Writeback any dirty shadow registers we are about to unshadow. */
3218 *poff = iemNativeRegFlushDirtyGuest(pReNative, *poff, fToFreeMask);
3219#endif
3220
3221 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessUnshadowed);
3222 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
3223 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
3224
3225 /* See if we've got any unshadowed registers we can return now. */
3226 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
3227 if (fUnshadowedRegs)
3228 {
3229 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeLivenessHelped);
3230 return (fPreferVolatile
3231 ? ASMBitFirstSetU32(fUnshadowedRegs)
3232 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3233 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
3234 - 1;
3235 }
3236 }
3237 }
3238#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
3239
3240 unsigned const idxReg = (fPreferVolatile
3241 ? ASMBitFirstSetU32(fRegs)
3242 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3243 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs))
3244 - 1;
3245
3246 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3247 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3248 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3249 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3250
3251#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3252 /* We need to flush any pending guest register writes this host register shadows. */
3253 *poff = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, *poff, idxReg);
3254#endif
3255
3256 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3257 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3258 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3259 return idxReg;
3260 }
3261
3262 /*
3263 * Try free up a variable that's in a register.
3264 *
3265 * We do two rounds here, first evacuating variables we don't need to be
3266 * saved on the stack, then in the second round move things to the stack.
3267 */
3268 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeRegFindFreeVar);
3269 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
3270 {
3271 uint32_t fVars = pReNative->Core.bmVars;
3272 while (fVars)
3273 {
3274 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
3275 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
3276#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3277 if (pReNative->Core.aVars[idxVar].fSimdReg) /* Need to ignore SIMD variables here or we end up freeing random registers. */
3278 continue;
3279#endif
3280
3281 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs)
3282 && (RT_BIT_32(idxReg) & fRegMask)
3283 && ( iLoop == 0
3284 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
3285 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3286 && !pReNative->Core.aVars[idxVar].fRegAcquired)
3287 {
3288 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg));
3289 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
3290 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3291 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3292 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
3293 == RT_BOOL(pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
3294#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3295 Assert(!(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3296#endif
3297
3298 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
3299 {
3300 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
3301 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
3302 }
3303
3304 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
3305 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxReg);
3306
3307 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3308 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3309 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3310 return idxReg;
3311 }
3312 fVars &= ~RT_BIT_32(idxVar);
3313 }
3314 }
3315
3316 return UINT8_MAX;
3317}
3318
3319
3320/**
3321 * Reassigns a variable to a different register specified by the caller.
3322 *
3323 * @returns The new code buffer position.
3324 * @param pReNative The native recompile state.
3325 * @param off The current code buffer position.
3326 * @param idxVar The variable index.
3327 * @param idxRegOld The old host register number.
3328 * @param idxRegNew The new host register number.
3329 * @param pszCaller The caller for logging.
3330 */
3331static uint32_t iemNativeRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3332 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
3333{
3334 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3335 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
3336#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3337 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
3338#endif
3339 RT_NOREF(pszCaller);
3340
3341#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3342 Assert(!(pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3343#endif
3344 iemNativeRegClearGstRegShadowing(pReNative, idxRegNew, off);
3345
3346 uint64_t fGstRegShadows = pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3347#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3348 Assert(!(fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3349#endif
3350 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
3351 pszCaller, idxVar, g_apszIemNativeHstRegNames[idxRegOld], g_apszIemNativeHstRegNames[idxRegNew], fGstRegShadows));
3352 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
3353
3354 pReNative->Core.aHstRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
3355 pReNative->Core.aHstRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
3356 pReNative->Core.aHstRegs[idxRegNew].idxVar = idxVar;
3357 if (fGstRegShadows)
3358 {
3359 pReNative->Core.bmHstRegsWithGstShadow = (pReNative->Core.bmHstRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
3360 | RT_BIT_32(idxRegNew);
3361 while (fGstRegShadows)
3362 {
3363 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
3364 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
3365
3366 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxRegOld);
3367 pReNative->Core.aidxGstRegShadows[idxGstReg] = idxRegNew;
3368 }
3369 }
3370
3371 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
3372 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3373 pReNative->Core.bmHstRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstRegs & ~RT_BIT_32(idxRegOld));
3374 return off;
3375}
3376
3377
3378/**
3379 * Moves a variable to a different register or spills it onto the stack.
3380 *
3381 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
3382 * kinds can easily be recreated if needed later.
3383 *
3384 * @returns The new code buffer position.
3385 * @param pReNative The native recompile state.
3386 * @param off The current code buffer position.
3387 * @param idxVar The variable index.
3388 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
3389 * call-volatile registers.
3390 */
3391DECL_HIDDEN_THROW(uint32_t) iemNativeRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
3392 uint32_t fForbiddenRegs /*= IEMNATIVE_CALL_VOLATILE_GREG_MASK*/)
3393{
3394 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3395 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
3396 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
3397 Assert(!pVar->fRegAcquired);
3398
3399 uint8_t const idxRegOld = pVar->idxReg;
3400 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs));
3401 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxRegOld));
3402 Assert(pReNative->Core.aHstRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
3403 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows)
3404 == pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows);
3405 Assert(pReNative->Core.bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
3406 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxRegOld))
3407 == RT_BOOL(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows));
3408#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3409 Assert(!(pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3410#endif
3411
3412
3413 /** @todo Add statistics on this.*/
3414 /** @todo Implement basic variable liveness analysis (python) so variables
3415 * can be freed immediately once no longer used. This has the potential to
3416 * be trashing registers and stack for dead variables.
3417 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
3418
3419 /*
3420 * First try move it to a different register, as that's cheaper.
3421 */
3422 fForbiddenRegs |= RT_BIT_32(idxRegOld);
3423 fForbiddenRegs |= IEMNATIVE_REG_FIXED_MASK;
3424 uint32_t fRegs = ~pReNative->Core.bmHstRegs & ~fForbiddenRegs;
3425 if (fRegs)
3426 {
3427 /* Avoid using shadow registers, if possible. */
3428 if (fRegs & ~pReNative->Core.bmHstRegsWithGstShadow)
3429 fRegs &= ~pReNative->Core.bmHstRegsWithGstShadow;
3430 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
3431 return iemNativeRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeRegMoveOrSpillStackVar");
3432 }
3433
3434 /*
3435 * Otherwise we must spill the register onto the stack.
3436 */
3437 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
3438 Log12(("iemNativeRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
3439 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
3440 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
3441
3442 pVar->idxReg = UINT8_MAX;
3443 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
3444 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
3445 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
3446 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
3447 return off;
3448}
3449
3450
3451/**
3452 * Allocates a temporary host general purpose register.
3453 *
3454 * This may emit code to save register content onto the stack in order to free
3455 * up a register.
3456 *
3457 * @returns The host register number; throws VBox status code on failure,
3458 * so no need to check the return value.
3459 * @param pReNative The native recompile state.
3460 * @param poff Pointer to the variable with the code buffer position.
3461 * This will be update if we need to move a variable from
3462 * register to stack in order to satisfy the request.
3463 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3464 * registers (@c true, default) or the other way around
3465 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
3466 */
3467DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
3468{
3469 /*
3470 * Try find a completely unused register, preferably a call-volatile one.
3471 */
3472 uint8_t idxReg;
3473 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3474 & ~pReNative->Core.bmHstRegsWithGstShadow
3475 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK);
3476 if (fRegs)
3477 {
3478 if (fPreferVolatile)
3479 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3480 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3481 else
3482 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3483 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3484 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3485 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3486 Log12(("iemNativeRegAllocTmp: %s\n", g_apszIemNativeHstRegNames[idxReg]));
3487 }
3488 else
3489 {
3490 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile);
3491 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3492 Log12(("iemNativeRegAllocTmp: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
3493 }
3494 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3495}
3496
3497
3498/**
3499 * Alternative version of iemNativeRegAllocTmp that takes mask with acceptable
3500 * registers.
3501 *
3502 * @returns The host register number; throws VBox status code on failure,
3503 * so no need to check the return value.
3504 * @param pReNative The native recompile state.
3505 * @param poff Pointer to the variable with the code buffer position.
3506 * This will be update if we need to move a variable from
3507 * register to stack in order to satisfy the request.
3508 * @param fRegMask Mask of acceptable registers.
3509 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3510 * registers (@c true, default) or the other way around
3511 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
3512 */
3513DECL_HIDDEN_THROW(uint8_t) iemNativeRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
3514 bool fPreferVolatile /*= true*/)
3515{
3516 Assert(!(fRegMask & ~IEMNATIVE_HST_GREG_MASK));
3517 Assert(!(fRegMask & IEMNATIVE_REG_FIXED_MASK));
3518
3519 /*
3520 * Try find a completely unused register, preferably a call-volatile one.
3521 */
3522 uint8_t idxReg;
3523 uint32_t fRegs = ~pReNative->Core.bmHstRegs
3524 & ~pReNative->Core.bmHstRegsWithGstShadow
3525 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
3526 & fRegMask;
3527 if (fRegs)
3528 {
3529 if (fPreferVolatile)
3530 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK
3531 ? fRegs & IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3532 else
3533 idxReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
3534 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
3535 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3536 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3537 Log12(("iemNativeRegAllocTmpEx: %s\n", g_apszIemNativeHstRegNames[idxReg]));
3538 }
3539 else
3540 {
3541 idxReg = iemNativeRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
3542 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
3543 Log12(("iemNativeRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstRegNames[idxReg]));
3544 }
3545 return iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Tmp);
3546}
3547
3548
3549/**
3550 * Allocates a temporary register for loading an immediate value into.
3551 *
3552 * This will emit code to load the immediate, unless there happens to be an
3553 * unused register with the value already loaded.
3554 *
3555 * The caller will not modify the returned register, it must be considered
3556 * read-only. Free using iemNativeRegFreeTmpImm.
3557 *
3558 * @returns The host register number; throws VBox status code on failure, so no
3559 * need to check the return value.
3560 * @param pReNative The native recompile state.
3561 * @param poff Pointer to the variable with the code buffer position.
3562 * @param uImm The immediate value that the register must hold upon
3563 * return.
3564 * @param fPreferVolatile Whether to prefer volatile over non-volatile
3565 * registers (@c true, default) or the other way around
3566 * (@c false).
3567 *
3568 * @note Reusing immediate values has not been implemented yet.
3569 */
3570DECL_HIDDEN_THROW(uint8_t)
3571iemNativeRegAllocTmpImm(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint64_t uImm, bool fPreferVolatile /*= true*/)
3572{
3573 uint8_t const idxReg = iemNativeRegAllocTmp(pReNative, poff, fPreferVolatile);
3574 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, uImm);
3575 return idxReg;
3576}
3577
3578
3579/**
3580 * Allocates a temporary host general purpose register for keeping a guest
3581 * register value.
3582 *
3583 * Since we may already have a register holding the guest register value,
3584 * code will be emitted to do the loading if that's not the case. Code may also
3585 * be emitted if we have to free up a register to satify the request.
3586 *
3587 * @returns The host register number; throws VBox status code on failure, so no
3588 * need to check the return value.
3589 * @param pReNative The native recompile state.
3590 * @param poff Pointer to the variable with the code buffer
3591 * position. This will be update if we need to move a
3592 * variable from register to stack in order to satisfy
3593 * the request.
3594 * @param enmGstReg The guest register that will is to be updated.
3595 * @param enmIntendedUse How the caller will be using the host register.
3596 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
3597 * register is okay (default). The ASSUMPTION here is
3598 * that the caller has already flushed all volatile
3599 * registers, so this is only applied if we allocate a
3600 * new register.
3601 * @param fSkipLivenessAssert Hack for liveness input validation of EFLAGS.
3602 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
3603 */
3604DECL_HIDDEN_THROW(uint8_t)
3605iemNativeRegAllocTmpForGuestReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg,
3606 IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
3607 bool fNoVolatileRegs /*= false*/, bool fSkipLivenessAssert /*= false*/)
3608{
3609 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
3610#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3611 AssertMsg( fSkipLivenessAssert
3612 || pReNative->idxCurCall == 0
3613 || enmGstReg == kIemNativeGstReg_Pc
3614 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
3615 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
3616 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
3617 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
3618 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)) ),
3619 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
3620#endif
3621 RT_NOREF(fSkipLivenessAssert);
3622#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
3623 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
3624#endif
3625 uint32_t const fRegMask = !fNoVolatileRegs
3626 ? IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK
3627 : IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK;
3628
3629 /*
3630 * First check if the guest register value is already in a host register.
3631 */
3632 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
3633 {
3634 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
3635 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3636 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
3637 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3638
3639 /* It's not supposed to be allocated... */
3640 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
3641 {
3642 /*
3643 * If the register will trash the guest shadow copy, try find a
3644 * completely unused register we can use instead. If that fails,
3645 * we need to disassociate the host reg from the guest reg.
3646 */
3647 /** @todo would be nice to know if preserving the register is in any way helpful. */
3648 /* If the purpose is calculations, try duplicate the register value as
3649 we'll be clobbering the shadow. */
3650 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
3651 && ( ~pReNative->Core.bmHstRegs
3652 & ~pReNative->Core.bmHstRegsWithGstShadow
3653 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)))
3654 {
3655 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask);
3656
3657 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3658
3659 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
3660 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3661 g_apszIemNativeHstRegNames[idxRegNew]));
3662 idxReg = idxRegNew;
3663 }
3664 /* If the current register matches the restrictions, go ahead and allocate
3665 it for the caller. */
3666 else if (fRegMask & RT_BIT_32(idxReg))
3667 {
3668 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3669 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
3670 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3671 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
3672 Log12(("iemNativeRegAllocTmpForGuestReg: Reusing %s for guest %s %s\n",
3673 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
3674 else
3675 {
3676 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
3677 Log12(("iemNativeRegAllocTmpForGuestReg: Grabbing %s for guest %s - destructive calc\n",
3678 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
3679 }
3680 }
3681 /* Otherwise, allocate a register that satisfies the caller and transfer
3682 the shadowing if compatible with the intended use. (This basically
3683 means the call wants a non-volatile register (RSP push/pop scenario).) */
3684 else
3685 {
3686 Assert(fNoVolatileRegs);
3687 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxReg),
3688 !fNoVolatileRegs
3689 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
3690 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3691 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
3692 {
3693 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
3694 Log12(("iemNativeRegAllocTmpForGuestReg: Transfering %s to %s for guest %s %s\n",
3695 g_apszIemNativeHstRegNames[idxReg], g_apszIemNativeHstRegNames[idxRegNew],
3696 g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
3697 }
3698 else
3699 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for destructive calc\n",
3700 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3701 g_apszIemNativeHstRegNames[idxRegNew]));
3702 idxReg = idxRegNew;
3703 }
3704 }
3705 else
3706 {
3707 /*
3708 * Oops. Shadowed guest register already allocated!
3709 *
3710 * Allocate a new register, copy the value and, if updating, the
3711 * guest shadow copy assignment to the new register.
3712 */
3713 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
3714 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
3715 ("This shouldn't happen: idxReg=%d enmGstReg=%d enmIntendedUse=%s\n",
3716 idxReg, enmGstReg, s_pszIntendedUse[enmIntendedUse]));
3717
3718 /** @todo share register for readonly access. */
3719 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask,
3720 enmIntendedUse == kIemNativeGstRegUse_Calculation);
3721
3722 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3723 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxRegNew, idxReg);
3724
3725 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
3726 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3727 Log12(("iemNativeRegAllocTmpForGuestReg: Duplicated %s for guest %s into %s for %s\n",
3728 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3729 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
3730 else
3731 {
3732 iemNativeRegTransferGstRegShadowing(pReNative, idxReg, idxRegNew, enmGstReg, *poff);
3733 Log12(("iemNativeRegAllocTmpForGuestReg: Moved %s for guest %s into %s for %s\n",
3734 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName,
3735 g_apszIemNativeHstRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
3736 }
3737 idxReg = idxRegNew;
3738 }
3739 Assert(RT_BIT_32(idxReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
3740
3741#ifdef VBOX_STRICT
3742 /* Strict builds: Check that the value is correct. */
3743 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
3744#endif
3745
3746#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3747 /** @todo r=aeichner Implement for registers other than GPR as well. */
3748 if ( ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
3749 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
3750 && ( ( enmGstReg >= kIemNativeGstReg_GprFirst
3751 && enmGstReg <= kIemNativeGstReg_GprLast)
3752 || enmGstReg == kIemNativeGstReg_MxCsr))
3753 {
3754# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3755 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
3756 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxReg);
3757# endif
3758 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
3759 }
3760#endif
3761
3762 return idxReg;
3763 }
3764
3765 /*
3766 * Allocate a new register, load it with the guest value and designate it as a copy of the
3767 */
3768 uint8_t const idxRegNew = iemNativeRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
3769
3770 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
3771 *poff = iemNativeEmitLoadGprWithGstShadowReg(pReNative, *poff, idxRegNew, enmGstReg);
3772
3773 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
3774 iemNativeRegMarkAsGstRegShadow(pReNative, idxRegNew, enmGstReg, *poff);
3775 Log12(("iemNativeRegAllocTmpForGuestReg: Allocated %s for guest %s %s\n",
3776 g_apszIemNativeHstRegNames[idxRegNew], g_aGstShadowInfo[enmGstReg].pszName, s_pszIntendedUse[enmIntendedUse]));
3777
3778#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3779 /** @todo r=aeichner Implement for registers other than GPR as well. */
3780 if ( ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
3781 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
3782 && ( ( enmGstReg >= kIemNativeGstReg_GprFirst
3783 && enmGstReg <= kIemNativeGstReg_GprLast)
3784 || enmGstReg == kIemNativeGstReg_MxCsr))
3785 {
3786# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
3787 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
3788 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxRegNew);
3789# endif
3790 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
3791 }
3792#endif
3793
3794 return idxRegNew;
3795}
3796
3797
3798/**
3799 * Allocates a temporary host general purpose register that already holds the
3800 * given guest register value.
3801 *
3802 * The use case for this function is places where the shadowing state cannot be
3803 * modified due to branching and such. This will fail if the we don't have a
3804 * current shadow copy handy or if it's incompatible. The only code that will
3805 * be emitted here is value checking code in strict builds.
3806 *
3807 * The intended use can only be readonly!
3808 *
3809 * @returns The host register number, UINT8_MAX if not present.
3810 * @param pReNative The native recompile state.
3811 * @param poff Pointer to the instruction buffer offset.
3812 * Will be updated in strict builds if a register is
3813 * found.
3814 * @param enmGstReg The guest register that will is to be updated.
3815 * @note In strict builds, this may throw instruction buffer growth failures.
3816 * Non-strict builds will not throw anything.
3817 * @sa iemNativeRegAllocTmpForGuestReg
3818 */
3819DECL_HIDDEN_THROW(uint8_t)
3820iemNativeRegAllocTmpForGuestRegIfAlreadyPresent(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTREG enmGstReg)
3821{
3822 Assert(enmGstReg < kIemNativeGstReg_End && g_aGstShadowInfo[enmGstReg].cb != 0);
3823#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
3824 AssertMsg( pReNative->idxCurCall == 0
3825 || IEMLIVENESS_STATE_IS_INPUT_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg))
3826 || enmGstReg == kIemNativeGstReg_Pc,
3827 ("%s - %u\n", g_aGstShadowInfo[enmGstReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstReg)));
3828#endif
3829
3830 /*
3831 * First check if the guest register value is already in a host register.
3832 */
3833 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
3834 {
3835 uint8_t idxReg = pReNative->Core.aidxGstRegShadows[enmGstReg];
3836 Assert(idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3837 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & RT_BIT_64(enmGstReg));
3838 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg));
3839
3840 if (!(pReNative->Core.bmHstRegs & RT_BIT_32(idxReg)))
3841 {
3842 /*
3843 * We only do readonly use here, so easy compared to the other
3844 * variant of this code.
3845 */
3846 pReNative->Core.bmHstRegs |= RT_BIT_32(idxReg);
3847 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Tmp;
3848 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3849 Log12(("iemNativeRegAllocTmpForGuestRegIfAlreadyPresent: Reusing %s for guest %s readonly\n",
3850 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
3851
3852#ifdef VBOX_STRICT
3853 /* Strict builds: Check that the value is correct. */
3854 *poff = iemNativeEmitGuestRegValueCheck(pReNative, *poff, idxReg, enmGstReg);
3855#else
3856 RT_NOREF(poff);
3857#endif
3858 return idxReg;
3859 }
3860 }
3861
3862 return UINT8_MAX;
3863}
3864
3865
3866/**
3867 * Allocates argument registers for a function call.
3868 *
3869 * @returns New code buffer offset on success; throws VBox status code on failure, so no
3870 * need to check the return value.
3871 * @param pReNative The native recompile state.
3872 * @param off The current code buffer offset.
3873 * @param cArgs The number of arguments the function call takes.
3874 */
3875DECL_HIDDEN_THROW(uint32_t) iemNativeRegAllocArgs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs)
3876{
3877 AssertStmt(cArgs <= IEMNATIVE_CALL_ARG_GREG_COUNT + IEMNATIVE_FRAME_STACK_ARG_COUNT,
3878 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_4));
3879 Assert(RT_ELEMENTS(g_aidxIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
3880 Assert(RT_ELEMENTS(g_afIemNativeCallRegs) == IEMNATIVE_CALL_ARG_GREG_COUNT);
3881
3882 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
3883 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
3884 else if (cArgs == 0)
3885 return true;
3886
3887 /*
3888 * Do we get luck and all register are free and not shadowing anything?
3889 */
3890 if (((pReNative->Core.bmHstRegs | pReNative->Core.bmHstRegsWithGstShadow) & g_afIemNativeCallRegs[cArgs]) == 0)
3891 for (uint32_t i = 0; i < cArgs; i++)
3892 {
3893 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
3894 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
3895 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3896 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3897 }
3898 /*
3899 * Okay, not lucky so we have to free up the registers.
3900 */
3901 else
3902 for (uint32_t i = 0; i < cArgs; i++)
3903 {
3904 uint8_t const idxReg = g_aidxIemNativeCallRegs[i];
3905 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxReg))
3906 {
3907 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
3908 {
3909 case kIemNativeWhat_Var:
3910 {
3911 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
3912 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
3913 AssertStmt(IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars),
3914 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
3915 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxReg);
3916#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
3917 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
3918#endif
3919
3920 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind != kIemNativeVarKind_Stack)
3921 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
3922 else
3923 {
3924 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
3925 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
3926 }
3927 break;
3928 }
3929
3930 case kIemNativeWhat_Tmp:
3931 case kIemNativeWhat_Arg:
3932 case kIemNativeWhat_rc:
3933 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_5));
3934 default:
3935 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_6));
3936 }
3937
3938 }
3939 if (pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg))
3940 {
3941 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0);
3942 Assert( (pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadows)
3943 == pReNative->Core.aHstRegs[idxReg].fGstRegShadows);
3944#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
3945 Assert(!(pReNative->Core.aHstRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstRegShadowDirty));
3946#endif
3947 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
3948 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
3949 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
3950 }
3951 else
3952 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
3953 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Arg;
3954 pReNative->Core.aHstRegs[idxReg].idxVar = UINT8_MAX;
3955 }
3956 pReNative->Core.bmHstRegs |= g_afIemNativeCallRegs[cArgs];
3957 return true;
3958}
3959
3960
3961DECL_HIDDEN_THROW(uint8_t) iemNativeRegAssignRc(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg);
3962
3963
3964#if 0
3965/**
3966 * Frees a register assignment of any type.
3967 *
3968 * @param pReNative The native recompile state.
3969 * @param idxHstReg The register to free.
3970 *
3971 * @note Does not update variables.
3972 */
3973DECLHIDDEN(void) iemNativeRegFree(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
3974{
3975 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
3976 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
3977 Assert(!(IEMNATIVE_REG_FIXED_MASK & RT_BIT_32(idxHstReg)));
3978 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var
3979 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp
3980 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Arg
3981 || pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_rc);
3982 Assert( pReNative->Core.aHstRegs[idxHstReg].enmWhat != kIemNativeWhat_Var
3983 || pReNative->Core.aVars[pReNative->Core.aHstRegs[idxHstReg].idxVar].idxReg == UINT8_MAX
3984 || (pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aHstRegs[idxHstReg].idxVar)));
3985 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
3986 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
3987 Assert( RT_BOOL(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg))
3988 == RT_BOOL(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
3989
3990 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
3991 /* no flushing, right:
3992 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
3993 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
3994 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
3995 */
3996}
3997#endif
3998
3999
4000/**
4001 * Frees a temporary register.
4002 *
4003 * Any shadow copies of guest registers assigned to the host register will not
4004 * be flushed by this operation.
4005 */
4006DECLHIDDEN(void) iemNativeRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4007{
4008 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4009 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Tmp);
4010 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4011 Log12(("iemNativeRegFreeTmp: %s (gst: %#RX64)\n",
4012 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4013}
4014
4015
4016/**
4017 * Frees a temporary immediate register.
4018 *
4019 * It is assumed that the call has not modified the register, so it still hold
4020 * the same value as when it was allocated via iemNativeRegAllocTmpImm().
4021 */
4022DECLHIDDEN(void) iemNativeRegFreeTmpImm(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg) RT_NOEXCEPT
4023{
4024 iemNativeRegFreeTmp(pReNative, idxHstReg);
4025}
4026
4027
4028/**
4029 * Frees a register assigned to a variable.
4030 *
4031 * The register will be disassociated from the variable.
4032 */
4033DECLHIDDEN(void) iemNativeRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
4034{
4035 Assert(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg));
4036 Assert(pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
4037 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
4038 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4039 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
4040#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4041 Assert(!pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4042#endif
4043
4044 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4045 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
4046 if (!fFlushShadows)
4047 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
4048 g_apszIemNativeHstRegNames[idxHstReg], pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows, idxVar));
4049 else
4050 {
4051 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4052 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4053#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4054 Assert(!(pReNative->Core.bmGstRegShadowDirty & fGstRegShadowsOld));
4055#endif
4056 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4057 pReNative->Core.bmGstRegShadows &= ~fGstRegShadowsOld;
4058 uint64_t fGstRegShadows = fGstRegShadowsOld;
4059 while (fGstRegShadows)
4060 {
4061 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4062 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4063
4064 Assert(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg);
4065 pReNative->Core.aidxGstRegShadows[idxGstReg] = UINT8_MAX;
4066 }
4067 Log12(("iemNativeRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
4068 g_apszIemNativeHstRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4069 }
4070}
4071
4072
4073#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4074# if defined(LOG_ENABLED) || defined(IEMNATIVE_WITH_TB_DEBUG_INFO)
4075/** Host CPU SIMD register names. */
4076DECL_HIDDEN_CONST(const char * const) g_apszIemNativeHstSimdRegNames[] =
4077{
4078# ifdef RT_ARCH_AMD64
4079 "ymm0", "ymm1", "ymm2", "ymm3", "ymm4", "ymm5", "ymm6", "ymm7", "ymm8", "ymm9", "ymm10", "ymm11", "ymm12", "ymm13", "ymm14", "ymm15"
4080# elif RT_ARCH_ARM64
4081 "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15",
4082 "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31",
4083# else
4084# error "port me"
4085# endif
4086};
4087# endif
4088
4089
4090/**
4091 * Frees a SIMD register assigned to a variable.
4092 *
4093 * The register will be disassociated from the variable.
4094 */
4095DECLHIDDEN(void) iemNativeSimdRegFreeVar(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstReg, bool fFlushShadows) RT_NOEXCEPT
4096{
4097 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstReg));
4098 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
4099 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
4100 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4101 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg);
4102 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4103
4104 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = UINT8_MAX;
4105 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
4106 if (!fFlushShadows)
4107 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64) idxVar=%#x\n",
4108 g_apszIemNativeHstSimdRegNames[idxHstReg], pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows, idxVar));
4109 else
4110 {
4111 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4112 uint64_t const fGstRegShadowsOld = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows;
4113 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
4114 pReNative->Core.bmGstSimdRegShadows &= ~fGstRegShadowsOld;
4115 uint64_t fGstRegShadows = fGstRegShadowsOld;
4116 while (fGstRegShadows)
4117 {
4118 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4119 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4120
4121 Assert(pReNative->Core.aidxGstSimdRegShadows[idxGstReg] == idxHstReg);
4122 pReNative->Core.aidxGstSimdRegShadows[idxGstReg] = UINT8_MAX;
4123 }
4124 Log12(("iemNativeSimdRegFreeVar: %s (gst: %#RX64 -> 0) idxVar=%#x\n",
4125 g_apszIemNativeHstSimdRegNames[idxHstReg], fGstRegShadowsOld, idxVar));
4126 }
4127}
4128
4129
4130/**
4131 * Reassigns a variable to a different SIMD register specified by the caller.
4132 *
4133 * @returns The new code buffer position.
4134 * @param pReNative The native recompile state.
4135 * @param off The current code buffer position.
4136 * @param idxVar The variable index.
4137 * @param idxRegOld The old host register number.
4138 * @param idxRegNew The new host register number.
4139 * @param pszCaller The caller for logging.
4140 */
4141static uint32_t iemNativeSimdRegMoveVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
4142 uint8_t idxRegOld, uint8_t idxRegNew, const char *pszCaller)
4143{
4144 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4145 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxRegOld);
4146 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg);
4147 RT_NOREF(pszCaller);
4148
4149 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4150 & pReNative->Core.aHstSimdRegs[idxRegNew].fGstRegShadows));
4151 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxRegNew, off);
4152
4153 uint64_t fGstRegShadows = pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
4154 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4155 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
4156
4157 Log12(("%s: moving idxVar=%#x from %s to %s (fGstRegShadows=%RX64)\n",
4158 pszCaller, idxVar, g_apszIemNativeHstSimdRegNames[idxRegOld], g_apszIemNativeHstSimdRegNames[idxRegNew], fGstRegShadows));
4159 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegNew, idxRegOld);
4160
4161 if (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U))
4162 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxRegNew, idxRegOld);
4163 else
4164 {
4165 Assert(pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U));
4166 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxRegNew, idxRegOld);
4167 }
4168
4169 pReNative->Core.aHstSimdRegs[idxRegNew].fGstRegShadows = fGstRegShadows;
4170 pReNative->Core.aHstSimdRegs[idxRegNew].enmWhat = kIemNativeWhat_Var;
4171 pReNative->Core.aHstSimdRegs[idxRegNew].idxVar = idxVar;
4172 if (fGstRegShadows)
4173 {
4174 pReNative->Core.bmHstSimdRegsWithGstShadow = (pReNative->Core.bmHstSimdRegsWithGstShadow & ~RT_BIT_32(idxRegOld))
4175 | RT_BIT_32(idxRegNew);
4176 while (fGstRegShadows)
4177 {
4178 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4179 fGstRegShadows &= ~RT_BIT_64(idxGstReg);
4180
4181 Assert(pReNative->Core.aidxGstSimdRegShadows[idxGstReg] == idxRegOld);
4182 pReNative->Core.aidxGstSimdRegShadows[idxGstReg] = idxRegNew;
4183 }
4184 }
4185
4186 pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg = (uint8_t)idxRegNew;
4187 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
4188 pReNative->Core.bmHstSimdRegs = RT_BIT_32(idxRegNew) | (pReNative->Core.bmHstSimdRegs & ~RT_BIT_32(idxRegOld));
4189 return off;
4190}
4191
4192
4193/**
4194 * Moves a variable to a different register or spills it onto the stack.
4195 *
4196 * This must be a stack variable (kIemNativeVarKind_Stack) because the other
4197 * kinds can easily be recreated if needed later.
4198 *
4199 * @returns The new code buffer position.
4200 * @param pReNative The native recompile state.
4201 * @param off The current code buffer position.
4202 * @param idxVar The variable index.
4203 * @param fForbiddenRegs Mask of the forbidden registers. Defaults to
4204 * call-volatile registers.
4205 */
4206DECL_HIDDEN_THROW(uint32_t) iemNativeSimdRegMoveOrSpillStackVar(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxVar,
4207 uint32_t fForbiddenRegs /*= IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK*/)
4208{
4209 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4210 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4211 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
4212 Assert(!pVar->fRegAcquired);
4213 Assert(!pVar->fSimdReg);
4214
4215 uint8_t const idxRegOld = pVar->idxReg;
4216 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
4217 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxRegOld));
4218 Assert(pReNative->Core.aHstSimdRegs[idxRegOld].enmWhat == kIemNativeWhat_Var);
4219 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows)
4220 == pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows);
4221 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstReg_End));
4222 Assert( RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxRegOld))
4223 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
4224 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4225 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
4226
4227 /** @todo Add statistics on this.*/
4228 /** @todo Implement basic variable liveness analysis (python) so variables
4229 * can be freed immediately once no longer used. This has the potential to
4230 * be trashing registers and stack for dead variables.
4231 * Update: This is mostly done. (Not IEMNATIVE_WITH_LIVENESS_ANALYSIS.) */
4232
4233 /*
4234 * First try move it to a different register, as that's cheaper.
4235 */
4236 fForbiddenRegs |= RT_BIT_32(idxRegOld);
4237 fForbiddenRegs |= IEMNATIVE_SIMD_REG_FIXED_MASK;
4238 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & ~fForbiddenRegs;
4239 if (fRegs)
4240 {
4241 /* Avoid using shadow registers, if possible. */
4242 if (fRegs & ~pReNative->Core.bmHstSimdRegsWithGstShadow)
4243 fRegs &= ~pReNative->Core.bmHstSimdRegsWithGstShadow;
4244 unsigned const idxRegNew = ASMBitFirstSetU32(fRegs) - 1;
4245 return iemNativeSimdRegMoveVar(pReNative, off, idxVar, idxRegOld, idxRegNew, "iemNativeSimdRegMoveOrSpillStackVar");
4246 }
4247
4248 /*
4249 * Otherwise we must spill the register onto the stack.
4250 */
4251 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
4252 Log12(("iemNativeSimdRegMoveOrSpillStackVar: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
4253 idxVar, idxRegOld, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
4254
4255 if (pVar->cbVar == sizeof(RTUINT128U))
4256 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
4257 else
4258 {
4259 Assert(pVar->cbVar == sizeof(RTUINT256U));
4260 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
4261 }
4262
4263 pVar->idxReg = UINT8_MAX;
4264 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
4265 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxRegOld);
4266 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
4267 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
4268 return off;
4269}
4270
4271
4272/**
4273 * Called right before emitting a call instruction to move anything important
4274 * out of call-volatile SIMD registers, free and flush the call-volatile SIMD registers,
4275 * optionally freeing argument variables.
4276 *
4277 * @returns New code buffer offset, UINT32_MAX on failure.
4278 * @param pReNative The native recompile state.
4279 * @param off The code buffer offset.
4280 * @param cArgs The number of arguments the function call takes.
4281 * It is presumed that the host register part of these have
4282 * been allocated as such already and won't need moving,
4283 * just freeing.
4284 * @param fKeepVars Mask of variables that should keep their register
4285 * assignments. Caller must take care to handle these.
4286 */
4287DECL_HIDDEN_THROW(uint32_t)
4288iemNativeSimdRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4289{
4290 Assert(!cArgs); RT_NOREF(cArgs);
4291
4292 /* fKeepVars will reduce this mask. */
4293 uint32_t fSimdRegsToFree = IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
4294
4295 /*
4296 * Move anything important out of volatile registers.
4297 */
4298 uint32_t fSimdRegsToMove = IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
4299#ifdef IEMNATIVE_SIMD_REG_FIXED_TMP0
4300 & ~RT_BIT_32(IEMNATIVE_SIMD_REG_FIXED_TMP0)
4301#endif
4302 ;
4303
4304 fSimdRegsToMove &= pReNative->Core.bmHstSimdRegs;
4305 if (!fSimdRegsToMove)
4306 { /* likely */ }
4307 else
4308 {
4309 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: fSimdRegsToMove=%#x\n", fSimdRegsToMove));
4310 while (fSimdRegsToMove != 0)
4311 {
4312 unsigned const idxSimdReg = ASMBitFirstSetU32(fSimdRegsToMove) - 1;
4313 fSimdRegsToMove &= ~RT_BIT_32(idxSimdReg);
4314
4315 switch (pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat)
4316 {
4317 case kIemNativeWhat_Var:
4318 {
4319 uint8_t const idxVar = pReNative->Core.aHstRegs[idxSimdReg].idxVar;
4320 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4321 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4322 Assert(pVar->idxReg == idxSimdReg);
4323 Assert(pVar->fSimdReg);
4324 if (!(RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)) & fKeepVars))
4325 {
4326 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxSimdReg=%d\n",
4327 idxVar, pVar->enmKind, pVar->idxReg));
4328 if (pVar->enmKind != kIemNativeVarKind_Stack)
4329 pVar->idxReg = UINT8_MAX;
4330 else
4331 off = iemNativeSimdRegMoveOrSpillStackVar(pReNative, off, idxVar);
4332 }
4333 else
4334 fSimdRegsToFree &= ~RT_BIT_32(idxSimdReg);
4335 continue;
4336 }
4337
4338 case kIemNativeWhat_Arg:
4339 AssertMsgFailed(("What?!?: %u\n", idxSimdReg));
4340 continue;
4341
4342 case kIemNativeWhat_rc:
4343 case kIemNativeWhat_Tmp:
4344 AssertMsgFailed(("Missing free: %u\n", idxSimdReg));
4345 continue;
4346
4347 case kIemNativeWhat_FixedReserved:
4348#ifdef RT_ARCH_ARM64
4349 continue; /* On ARM the upper half of the virtual 256-bit register. */
4350#endif
4351
4352 case kIemNativeWhat_FixedTmp:
4353 case kIemNativeWhat_pVCpuFixed:
4354 case kIemNativeWhat_pCtxFixed:
4355 case kIemNativeWhat_PcShadow:
4356 case kIemNativeWhat_Invalid:
4357 case kIemNativeWhat_End:
4358 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
4359 }
4360 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
4361 }
4362 }
4363
4364 /*
4365 * Do the actual freeing.
4366 */
4367 if (pReNative->Core.bmHstSimdRegs & fSimdRegsToFree)
4368 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: bmHstSimdRegs %#x -> %#x\n",
4369 pReNative->Core.bmHstSimdRegs, pReNative->Core.bmHstSimdRegs & ~fSimdRegsToFree));
4370 pReNative->Core.bmHstSimdRegs &= ~fSimdRegsToFree;
4371
4372 /* If there are guest register shadows in any call-volatile register, we
4373 have to clear the corrsponding guest register masks for each register. */
4374 uint32_t fHstSimdRegsWithGstShadow = pReNative->Core.bmHstSimdRegsWithGstShadow & fSimdRegsToFree;
4375 if (fHstSimdRegsWithGstShadow)
4376 {
4377 Log12(("iemNativeSimdRegMoveAndFreeAndFlushAtCall: bmHstSimdRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
4378 pReNative->Core.bmHstSimdRegsWithGstShadow, pReNative->Core.bmHstSimdRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK, fHstSimdRegsWithGstShadow));
4379 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~fHstSimdRegsWithGstShadow;
4380 do
4381 {
4382 unsigned const idxSimdReg = ASMBitFirstSetU32(fHstSimdRegsWithGstShadow) - 1;
4383 fHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxSimdReg);
4384
4385 AssertMsg(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows != 0, ("idxSimdReg=%#x\n", idxSimdReg));
4386
4387#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4388 /*
4389 * Flush any pending writes now (might have been skipped earlier in iemEmitCallCommon() but it doesn't apply
4390 * to call volatile registers).
4391 */
4392 if ( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4393 & pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows)
4394 off = iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(pReNative, off, idxSimdReg);
4395#endif
4396 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4397 & pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows));
4398
4399 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows;
4400 pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows = 0;
4401 } while (fHstSimdRegsWithGstShadow != 0);
4402 }
4403
4404 return off;
4405}
4406#endif
4407
4408
4409/**
4410 * Called right before emitting a call instruction to move anything important
4411 * out of call-volatile registers, free and flush the call-volatile registers,
4412 * optionally freeing argument variables.
4413 *
4414 * @returns New code buffer offset, UINT32_MAX on failure.
4415 * @param pReNative The native recompile state.
4416 * @param off The code buffer offset.
4417 * @param cArgs The number of arguments the function call takes.
4418 * It is presumed that the host register part of these have
4419 * been allocated as such already and won't need moving,
4420 * just freeing.
4421 * @param fKeepVars Mask of variables that should keep their register
4422 * assignments. Caller must take care to handle these.
4423 */
4424DECL_HIDDEN_THROW(uint32_t)
4425iemNativeRegMoveAndFreeAndFlushAtCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint32_t fKeepVars /*= 0*/)
4426{
4427 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
4428
4429 /* fKeepVars will reduce this mask. */
4430 uint32_t fRegsToFree = IEMNATIVE_CALL_VOLATILE_NOTMP_GREG_MASK;
4431
4432#ifdef RT_ARCH_ARM64
4433AssertCompile(IEMNATIVE_CALL_VOLATILE_NOTMP_GREG_MASK == UINT32_C(0x37fff));
4434#endif
4435
4436 /*
4437 * Move anything important out of volatile registers.
4438 */
4439 if (cArgs > RT_ELEMENTS(g_aidxIemNativeCallRegs))
4440 cArgs = RT_ELEMENTS(g_aidxIemNativeCallRegs);
4441 uint32_t fRegsToMove = IEMNATIVE_CALL_VOLATILE_NOTMP_GREG_MASK
4442#ifdef IEMNATIVE_REG_FIXED_PC_DBG
4443 & ~RT_BIT_32(IEMNATIVE_REG_FIXED_PC_DBG)
4444#endif
4445 & ~g_afIemNativeCallRegs[cArgs];
4446
4447 fRegsToMove &= pReNative->Core.bmHstRegs;
4448 if (!fRegsToMove)
4449 { /* likely */ }
4450 else
4451 {
4452 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: fRegsToMove=%#x\n", fRegsToMove));
4453 while (fRegsToMove != 0)
4454 {
4455 unsigned const idxReg = ASMBitFirstSetU32(fRegsToMove) - 1;
4456 fRegsToMove &= ~RT_BIT_32(idxReg);
4457
4458 switch (pReNative->Core.aHstRegs[idxReg].enmWhat)
4459 {
4460 case kIemNativeWhat_Var:
4461 {
4462 uint8_t const idxVar = pReNative->Core.aHstRegs[idxReg].idxVar;
4463 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
4464 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
4465 Assert(pVar->idxReg == idxReg);
4466#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4467 Assert(!pVar->fSimdReg);
4468#endif
4469 if (!(RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)) & fKeepVars))
4470 {
4471 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: idxVar=%#x enmKind=%d idxReg=%d\n",
4472 idxVar, pVar->enmKind, pVar->idxReg));
4473 if (pVar->enmKind != kIemNativeVarKind_Stack)
4474 pVar->idxReg = UINT8_MAX;
4475 else
4476 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
4477 }
4478 else
4479 fRegsToFree &= ~RT_BIT_32(idxReg);
4480 continue;
4481 }
4482
4483 case kIemNativeWhat_Arg:
4484 AssertMsgFailed(("What?!?: %u\n", idxReg));
4485 continue;
4486
4487 case kIemNativeWhat_rc:
4488 case kIemNativeWhat_Tmp:
4489 AssertMsgFailed(("Missing free: %u\n", idxReg));
4490 continue;
4491
4492 case kIemNativeWhat_FixedTmp:
4493 case kIemNativeWhat_pVCpuFixed:
4494 case kIemNativeWhat_pCtxFixed:
4495 case kIemNativeWhat_PcShadow:
4496 case kIemNativeWhat_FixedReserved:
4497 case kIemNativeWhat_Invalid:
4498 case kIemNativeWhat_End:
4499 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_1));
4500 }
4501 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_2));
4502 }
4503 }
4504
4505 /*
4506 * Do the actual freeing.
4507 */
4508 if (pReNative->Core.bmHstRegs & fRegsToFree)
4509 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegs %#x -> %#x\n",
4510 pReNative->Core.bmHstRegs, pReNative->Core.bmHstRegs & ~fRegsToFree));
4511 pReNative->Core.bmHstRegs &= ~fRegsToFree;
4512
4513 /* If there are guest register shadows in any call-volatile register, we
4514 have to clear the corrsponding guest register masks for each register. */
4515 uint32_t fHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow & fRegsToFree;
4516 if (fHstRegsWithGstShadow)
4517 {
4518 Log12(("iemNativeRegMoveAndFreeAndFlushAtCall: bmHstRegsWithGstShadow %#RX32 -> %#RX32; removed %#RX32\n",
4519 pReNative->Core.bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK,
4520 fHstRegsWithGstShadow));
4521 pReNative->Core.bmHstRegsWithGstShadow &= ~fHstRegsWithGstShadow;
4522 do
4523 {
4524 unsigned const idxReg = ASMBitFirstSetU32(fHstRegsWithGstShadow) - 1;
4525 fHstRegsWithGstShadow &= ~RT_BIT_32(idxReg);
4526
4527 AssertMsg(pReNative->Core.aHstRegs[idxReg].fGstRegShadows != 0, ("idxReg=%#x\n", idxReg));
4528
4529#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4530 /*
4531 * Flush any pending writes now (might have been skipped earlier in iemEmitCallCommon() but it doesn't apply
4532 * to call volatile registers).
4533 */
4534 if (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxReg].fGstRegShadows)
4535 off = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, off, idxReg);
4536 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxReg].fGstRegShadows));
4537#endif
4538
4539 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
4540 pReNative->Core.aHstRegs[idxReg].fGstRegShadows = 0;
4541 } while (fHstRegsWithGstShadow != 0);
4542 }
4543
4544#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4545 /* Now for the SIMD registers, no argument support for now. */
4546 off = iemNativeSimdRegMoveAndFreeAndFlushAtCall(pReNative, off, 0 /*cArgs*/, fKeepVars);
4547#endif
4548
4549 return off;
4550}
4551
4552
4553/**
4554 * Flushes a set of guest register shadow copies.
4555 *
4556 * This is usually done after calling a threaded function or a C-implementation
4557 * of an instruction.
4558 *
4559 * @param pReNative The native recompile state.
4560 * @param fGstRegs Set of guest registers to flush.
4561 */
4562DECLHIDDEN(void) iemNativeRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstRegs) RT_NOEXCEPT
4563{
4564 /*
4565 * Reduce the mask by what's currently shadowed
4566 */
4567 uint64_t const bmGstRegShadowsOld = pReNative->Core.bmGstRegShadows;
4568 fGstRegs &= bmGstRegShadowsOld;
4569 if (fGstRegs)
4570 {
4571 uint64_t const bmGstRegShadowsNew = bmGstRegShadowsOld & ~fGstRegs;
4572 Log12(("iemNativeRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstRegs, bmGstRegShadowsOld, bmGstRegShadowsNew));
4573 pReNative->Core.bmGstRegShadows = bmGstRegShadowsNew;
4574 if (bmGstRegShadowsNew)
4575 {
4576 /*
4577 * Partial.
4578 */
4579 do
4580 {
4581 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4582 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4583 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4584 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4585 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4586#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4587 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
4588#endif
4589
4590 uint64_t const fInThisHstReg = (pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & fGstRegs) | RT_BIT_64(idxGstReg);
4591 fGstRegs &= ~fInThisHstReg;
4592 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
4593 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
4594 if (!fGstRegShadowsNew)
4595 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
4596 } while (fGstRegs != 0);
4597 }
4598 else
4599 {
4600 /*
4601 * Clear all.
4602 */
4603 do
4604 {
4605 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegs) - 1;
4606 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
4607 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
4608 Assert(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxHstReg));
4609 Assert(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
4610#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4611 Assert(!(pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(idxGstReg)));
4612#endif
4613
4614 fGstRegs &= ~(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
4615 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4616 } while (fGstRegs != 0);
4617 pReNative->Core.bmHstRegsWithGstShadow = 0;
4618 }
4619 }
4620}
4621
4622
4623/**
4624 * Flushes guest register shadow copies held by a set of host registers.
4625 *
4626 * This is used with the TLB lookup code for ensuring that we don't carry on
4627 * with any guest shadows in volatile registers, as these will get corrupted by
4628 * a TLB miss.
4629 *
4630 * @param pReNative The native recompile state.
4631 * @param fHstRegs Set of host registers to flush guest shadows for.
4632 */
4633DECLHIDDEN(void) iemNativeRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstRegs) RT_NOEXCEPT
4634{
4635 /*
4636 * Reduce the mask by what's currently shadowed.
4637 */
4638 uint32_t const bmHstRegsWithGstShadowOld = pReNative->Core.bmHstRegsWithGstShadow;
4639 fHstRegs &= bmHstRegsWithGstShadowOld;
4640 if (fHstRegs)
4641 {
4642 uint32_t const bmHstRegsWithGstShadowNew = bmHstRegsWithGstShadowOld & ~fHstRegs;
4643 Log12(("iemNativeRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
4644 fHstRegs, bmHstRegsWithGstShadowOld, bmHstRegsWithGstShadowNew));
4645 pReNative->Core.bmHstRegsWithGstShadow = bmHstRegsWithGstShadowNew;
4646 if (bmHstRegsWithGstShadowNew)
4647 {
4648 /*
4649 * Partial (likely).
4650 */
4651 uint64_t fGstShadows = 0;
4652 do
4653 {
4654 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4655 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4656 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4657 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4658#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4659 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4660#endif
4661
4662 fGstShadows |= pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4663 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4664 fHstRegs &= ~RT_BIT_32(idxHstReg);
4665 } while (fHstRegs != 0);
4666 pReNative->Core.bmGstRegShadows &= ~fGstShadows;
4667 }
4668 else
4669 {
4670 /*
4671 * Clear all.
4672 */
4673 do
4674 {
4675 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4676 Assert(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxHstReg)));
4677 Assert( (pReNative->Core.bmGstRegShadows & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows)
4678 == pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows);
4679#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4680 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
4681#endif
4682
4683 pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows = 0;
4684 fHstRegs &= ~RT_BIT_32(idxHstReg);
4685 } while (fHstRegs != 0);
4686 pReNative->Core.bmGstRegShadows = 0;
4687 }
4688 }
4689}
4690
4691
4692/**
4693 * Restores guest shadow copies in volatile registers.
4694 *
4695 * This is used after calling a helper function (think TLB miss) to restore the
4696 * register state of volatile registers.
4697 *
4698 * @param pReNative The native recompile state.
4699 * @param off The code buffer offset.
4700 * @param fHstRegsActiveShadows Set of host registers which are allowed to
4701 * be active (allocated) w/o asserting. Hack.
4702 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
4703 * iemNativeVarRestoreVolatileRegsPostHlpCall()
4704 */
4705DECL_HIDDEN_THROW(uint32_t)
4706iemNativeRegRestoreGuestShadowsInVolatileRegs(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsActiveShadows)
4707{
4708 uint32_t fHstRegs = pReNative->Core.bmHstRegsWithGstShadow & IEMNATIVE_CALL_VOLATILE_GREG_MASK;
4709 if (fHstRegs)
4710 {
4711 Log12(("iemNativeRegRestoreGuestShadowsInVolatileRegs: %#RX32\n", fHstRegs));
4712 do
4713 {
4714 unsigned const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
4715
4716 /* It's not fatal if a register is active holding a variable that
4717 shadowing a guest register, ASSUMING all pending guest register
4718 writes were flushed prior to the helper call. However, we'll be
4719 emitting duplicate restores, so it wasts code space. */
4720 Assert(!(pReNative->Core.bmHstRegs & ~fHstRegsActiveShadows & RT_BIT_32(idxHstReg)));
4721 RT_NOREF(fHstRegsActiveShadows);
4722
4723 uint64_t const fGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
4724#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4725 Assert(!(pReNative->Core.bmGstRegShadowDirty & fGstRegShadows));
4726#endif
4727 Assert((pReNative->Core.bmGstRegShadows & fGstRegShadows) == fGstRegShadows);
4728 AssertStmt(fGstRegShadows != 0 && fGstRegShadows < RT_BIT_64(kIemNativeGstReg_End),
4729 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_12));
4730
4731 unsigned const idxGstReg = ASMBitFirstSetU64(fGstRegShadows) - 1;
4732 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, idxHstReg, (IEMNATIVEGSTREG)idxGstReg);
4733
4734 fHstRegs &= ~RT_BIT_32(idxHstReg);
4735 } while (fHstRegs != 0);
4736 }
4737 return off;
4738}
4739
4740
4741
4742
4743/*********************************************************************************************************************************
4744* SIMD register allocator (largely code duplication of the GPR allocator for now but might diverge) *
4745*********************************************************************************************************************************/
4746#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
4747
4748/**
4749 * Info about shadowed guest SIMD register values.
4750 * @see IEMNATIVEGSTSIMDREG
4751 */
4752static struct
4753{
4754 /** Offset in VMCPU of XMM (low 128-bit) registers. */
4755 uint32_t offXmm;
4756 /** Offset in VMCPU of YmmHi (high 128-bit) registers. */
4757 uint32_t offYmm;
4758 /** Name (for logging). */
4759 const char *pszName;
4760} const g_aGstSimdShadowInfo[] =
4761{
4762#define CPUMCTX_OFF_AND_SIZE(a_iSimdReg) (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.x87.aXMM[a_iSimdReg]), \
4763 (uint32_t)RT_UOFFSETOF(VMCPU, cpum.GstCtx.XState.u.YmmHi.aYmmHi[a_iSimdReg])
4764 /* [kIemNativeGstSimdReg_SimdRegFirst + 0] = */ { CPUMCTX_OFF_AND_SIZE(0), "ymm0", },
4765 /* [kIemNativeGstSimdReg_SimdRegFirst + 1] = */ { CPUMCTX_OFF_AND_SIZE(1), "ymm1", },
4766 /* [kIemNativeGstSimdReg_SimdRegFirst + 2] = */ { CPUMCTX_OFF_AND_SIZE(2), "ymm2", },
4767 /* [kIemNativeGstSimdReg_SimdRegFirst + 3] = */ { CPUMCTX_OFF_AND_SIZE(3), "ymm3", },
4768 /* [kIemNativeGstSimdReg_SimdRegFirst + 4] = */ { CPUMCTX_OFF_AND_SIZE(4), "ymm4", },
4769 /* [kIemNativeGstSimdReg_SimdRegFirst + 5] = */ { CPUMCTX_OFF_AND_SIZE(5), "ymm5", },
4770 /* [kIemNativeGstSimdReg_SimdRegFirst + 6] = */ { CPUMCTX_OFF_AND_SIZE(6), "ymm6", },
4771 /* [kIemNativeGstSimdReg_SimdRegFirst + 7] = */ { CPUMCTX_OFF_AND_SIZE(7), "ymm7", },
4772 /* [kIemNativeGstSimdReg_SimdRegFirst + 8] = */ { CPUMCTX_OFF_AND_SIZE(8), "ymm8", },
4773 /* [kIemNativeGstSimdReg_SimdRegFirst + 9] = */ { CPUMCTX_OFF_AND_SIZE(9), "ymm9", },
4774 /* [kIemNativeGstSimdReg_SimdRegFirst + 10] = */ { CPUMCTX_OFF_AND_SIZE(10), "ymm10", },
4775 /* [kIemNativeGstSimdReg_SimdRegFirst + 11] = */ { CPUMCTX_OFF_AND_SIZE(11), "ymm11", },
4776 /* [kIemNativeGstSimdReg_SimdRegFirst + 12] = */ { CPUMCTX_OFF_AND_SIZE(12), "ymm12", },
4777 /* [kIemNativeGstSimdReg_SimdRegFirst + 13] = */ { CPUMCTX_OFF_AND_SIZE(13), "ymm13", },
4778 /* [kIemNativeGstSimdReg_SimdRegFirst + 14] = */ { CPUMCTX_OFF_AND_SIZE(14), "ymm14", },
4779 /* [kIemNativeGstSimdReg_SimdRegFirst + 15] = */ { CPUMCTX_OFF_AND_SIZE(15), "ymm15", },
4780#undef CPUMCTX_OFF_AND_SIZE
4781};
4782AssertCompile(RT_ELEMENTS(g_aGstSimdShadowInfo) == kIemNativeGstSimdReg_End);
4783
4784
4785/**
4786 * Frees a temporary SIMD register.
4787 *
4788 * Any shadow copies of guest registers assigned to the host register will not
4789 * be flushed by this operation.
4790 */
4791DECLHIDDEN(void) iemNativeSimdRegFreeTmp(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg) RT_NOEXCEPT
4792{
4793 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg));
4794 Assert(pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmWhat == kIemNativeWhat_Tmp);
4795 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
4796 Log12(("iemNativeSimdRegFreeTmp: %s (gst: %#RX64)\n",
4797 g_apszIemNativeHstSimdRegNames[idxHstSimdReg], pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
4798}
4799
4800
4801/**
4802 * Emits code to flush a pending write of the given SIMD register if any, also flushes the guest to host SIMD register association.
4803 *
4804 * @returns New code bufferoffset.
4805 * @param pReNative The native recompile state.
4806 * @param off Current code buffer position.
4807 * @param enmGstSimdReg The guest SIMD register to flush.
4808 */
4809DECL_HIDDEN_THROW(uint32_t)
4810iemNativeSimdRegFlushPendingWrite(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdReg)
4811{
4812 uint8_t const idxHstSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
4813
4814 Log12(("iemNativeSimdRegFlushPendingWrite: Clearing guest register %s shadowed by host %s with state DirtyLo:%u DirtyHi:%u\n",
4815 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, g_apszIemNativeHstSimdRegNames[idxHstSimdReg],
4816 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg),
4817 IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)));
4818
4819 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
4820 {
4821 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
4822 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128);
4823 off = iemNativeEmitSimdStoreVecRegToVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
4824 }
4825
4826 if (IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg))
4827 {
4828 Assert( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256
4829 || pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128);
4830 off = iemNativeEmitSimdStoreVecRegToVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
4831 }
4832
4833 IEMNATIVE_SIMD_REG_STATE_CLR_DIRTY(pReNative, enmGstSimdReg);
4834 return off;
4835}
4836
4837
4838/**
4839 * Flush the given set of guest SIMD registers if marked as dirty.
4840 *
4841 * @returns New code buffer offset.
4842 * @param pReNative The native recompile state.
4843 * @param off Current code buffer position.
4844 * @param fFlushGstSimdReg The guest SIMD register set to flush (default is flush everything).
4845 */
4846DECL_HIDDEN_THROW(uint32_t)
4847iemNativeSimdRegFlushDirtyGuest(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fFlushGstSimdReg /*= UINT64_MAX*/)
4848{
4849 uint64_t bmGstSimdRegShadowDirty = (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4850 & fFlushGstSimdReg;
4851 if (bmGstSimdRegShadowDirty)
4852 {
4853# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4854 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4855 iemNativeDbgInfoAddGuestRegWriteback(pReNative, true /*fSimdReg*/, bmGstSimdRegShadowDirty);
4856# endif
4857
4858 do
4859 {
4860 unsigned const idxGstSimdReg = ASMBitFirstSetU64(bmGstSimdRegShadowDirty) - 1;
4861 bmGstSimdRegShadowDirty &= ~RT_BIT_64(idxGstSimdReg);
4862 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
4863 } while (bmGstSimdRegShadowDirty);
4864 }
4865
4866 return off;
4867}
4868
4869
4870#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
4871/**
4872 * Flush all shadowed guest SIMD registers marked as dirty for the given host SIMD register.
4873 *
4874 * @returns New code buffer offset.
4875 * @param pReNative The native recompile state.
4876 * @param off Current code buffer position.
4877 * @param idxHstSimdReg The host SIMD register.
4878 *
4879 * @note This doesn't do any unshadowing of guest registers from the host register.
4880 */
4881DECL_HIDDEN_THROW(uint32_t) iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t const idxHstSimdReg)
4882{
4883 /* We need to flush any pending guest register writes this host register shadows. */
4884 uint64_t bmGstSimdRegShadowDirty = (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
4885 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
4886 if (bmGstSimdRegShadowDirty)
4887 {
4888# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
4889 iemNativeDbgInfoAddNativeOffset(pReNative, off);
4890 iemNativeDbgInfoAddGuestRegWriteback(pReNative, true /*fSimdReg*/, bmGstSimdRegShadowDirty);
4891# endif
4892
4893 do
4894 {
4895 unsigned const idxGstSimdReg = ASMBitFirstSetU64(bmGstSimdRegShadowDirty) - 1;
4896 bmGstSimdRegShadowDirty &= ~RT_BIT_64(idxGstSimdReg);
4897 off = iemNativeSimdRegFlushPendingWrite(pReNative, off, IEMNATIVEGSTSIMDREG_SIMD(idxGstSimdReg));
4898 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstSimdReg));
4899 } while (bmGstSimdRegShadowDirty);
4900 }
4901
4902 return off;
4903}
4904#endif
4905
4906
4907/**
4908 * Locate a register, possibly freeing one up.
4909 *
4910 * This ASSUMES the caller has done the minimal/optimal allocation checks and
4911 * failed.
4912 *
4913 * @returns Host register number on success. Returns UINT8_MAX if no registers
4914 * found, the caller is supposed to deal with this and raise a
4915 * allocation type specific status code (if desired).
4916 *
4917 * @throws VBox status code if we're run into trouble spilling a variable of
4918 * recording debug info. Does NOT throw anything if we're out of
4919 * registers, though.
4920 */
4921static uint8_t iemNativeSimdRegAllocFindFree(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile,
4922 uint32_t fRegMask = IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK)
4923{
4924 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFree);
4925 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
4926 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
4927
4928 /*
4929 * Try a freed register that's shadowing a guest register.
4930 */
4931 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs & fRegMask;
4932 if (fRegs)
4933 {
4934 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeNoVar);
4935
4936#if 0 /** @todo def IEMNATIVE_WITH_LIVENESS_ANALYSIS */
4937 /*
4938 * When we have livness information, we use it to kick out all shadowed
4939 * guest register that will not be needed any more in this TB. If we're
4940 * lucky, this may prevent us from ending up here again.
4941 *
4942 * Note! We must consider the previous entry here so we don't free
4943 * anything that the current threaded function requires (current
4944 * entry is produced by the next threaded function).
4945 */
4946 uint32_t const idxCurCall = pReNative->idxCurCall;
4947 if (idxCurCall > 0)
4948 {
4949 PCIEMLIVENESSENTRY const pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall - 1];
4950
4951# ifndef IEMLIVENESS_EXTENDED_LAYOUT
4952 /* Construct a mask of the guest registers in the UNUSED and XCPT_OR_CALL state. */
4953 AssertCompile(IEMLIVENESS_STATE_UNUSED == 1 && IEMLIVENESS_STATE_XCPT_OR_CALL == 2);
4954 uint64_t fToFreeMask = pLivenessEntry->Bit0.bm64 ^ pLivenessEntry->Bit1.bm64; /* mask of regs in either UNUSED */
4955#else
4956 /* Construct a mask of the registers not in the read or write state.
4957 Note! We could skips writes, if they aren't from us, as this is just
4958 a hack to prevent trashing registers that have just been written
4959 or will be written when we retire the current instruction. */
4960 uint64_t fToFreeMask = ~pLivenessEntry->aBits[IEMLIVENESS_BIT_READ].bm64
4961 & ~pLivenessEntry->aBits[IEMLIVENESS_BIT_WRITE].bm64
4962 & IEMLIVENESSBIT_MASK;
4963#endif
4964 /* If it matches any shadowed registers. */
4965 if (pReNative->Core.bmGstRegShadows & fToFreeMask)
4966 {
4967 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeLivenessUnshadowed);
4968 iemNativeRegFlushGuestShadows(pReNative, fToFreeMask);
4969 Assert(fRegs == (~pReNative->Core.bmHstRegs & fRegMask)); /* this shall not change. */
4970
4971 /* See if we've got any unshadowed registers we can return now. */
4972 uint32_t const fUnshadowedRegs = fRegs & ~pReNative->Core.bmHstRegsWithGstShadow;
4973 if (fUnshadowedRegs)
4974 {
4975 STAM_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeLivenessHelped);
4976 return (fPreferVolatile
4977 ? ASMBitFirstSetU32(fUnshadowedRegs)
4978 : ASMBitLastSetU32( fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
4979 ? fUnshadowedRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fUnshadowedRegs))
4980 - 1;
4981 }
4982 }
4983 }
4984#endif /* IEMNATIVE_WITH_LIVENESS_ANALYSIS */
4985
4986 unsigned const idxReg = (fPreferVolatile
4987 ? ASMBitFirstSetU32(fRegs)
4988 : ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
4989 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs))
4990 - 1;
4991
4992 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows != 0);
4993 Assert( (pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows & pReNative->Core.bmGstSimdRegShadows)
4994 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
4995 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg));
4996
4997 /* We need to flush any pending guest register writes this host SIMD register shadows. */
4998 *poff = iemNativeSimdRegFlushDirtyGuestByHostSimdRegShadow(pReNative, *poff, idxReg);
4999
5000 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5001 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows;
5002 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
5003 pReNative->Core.aHstSimdRegs[idxReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5004 return idxReg;
5005 }
5006
5007 AssertFailed(); /** @todo The following needs testing when it actually gets hit. */
5008
5009 /*
5010 * Try free up a variable that's in a register.
5011 *
5012 * We do two rounds here, first evacuating variables we don't need to be
5013 * saved on the stack, then in the second round move things to the stack.
5014 */
5015 STAM_REL_COUNTER_INC(&pReNative->pVCpu->iem.s.StatNativeSimdRegFindFreeVar);
5016 for (uint32_t iLoop = 0; iLoop < 2; iLoop++)
5017 {
5018 uint32_t fVars = pReNative->Core.bmVars;
5019 while (fVars)
5020 {
5021 uint32_t const idxVar = ASMBitFirstSetU32(fVars) - 1;
5022 uint8_t const idxReg = pReNative->Core.aVars[idxVar].idxReg;
5023 if (!pReNative->Core.aVars[idxVar].fSimdReg) /* Ignore non SIMD variables here. */
5024 continue;
5025
5026 if ( idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
5027 && (RT_BIT_32(idxReg) & fRegMask)
5028 && ( iLoop == 0
5029 ? pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_Stack
5030 : pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5031 && !pReNative->Core.aVars[idxVar].fRegAcquired)
5032 {
5033 Assert(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxReg));
5034 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows)
5035 == pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows);
5036 Assert(pReNative->Core.bmGstSimdRegShadows < RT_BIT_64(kIemNativeGstSimdReg_End));
5037 Assert( RT_BOOL(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg))
5038 == RT_BOOL(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows));
5039
5040 if (pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Stack)
5041 {
5042 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
5043 *poff = iemNativeEmitStoreGprByBp(pReNative, *poff, iemNativeStackCalcBpDisp(idxStackSlot), idxReg);
5044 }
5045
5046 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
5047 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxReg);
5048
5049 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxReg);
5050 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstRegs[idxReg].fGstRegShadows;
5051 pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows = 0;
5052 return idxReg;
5053 }
5054 fVars &= ~RT_BIT_32(idxVar);
5055 }
5056 }
5057
5058 AssertFailed();
5059 return UINT8_MAX;
5060}
5061
5062
5063/**
5064 * Flushes a set of guest register shadow copies.
5065 *
5066 * This is usually done after calling a threaded function or a C-implementation
5067 * of an instruction.
5068 *
5069 * @param pReNative The native recompile state.
5070 * @param fGstSimdRegs Set of guest SIMD registers to flush.
5071 */
5072DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadows(PIEMRECOMPILERSTATE pReNative, uint64_t fGstSimdRegs) RT_NOEXCEPT
5073{
5074 /*
5075 * Reduce the mask by what's currently shadowed
5076 */
5077 uint64_t const bmGstSimdRegShadows = pReNative->Core.bmGstSimdRegShadows;
5078 fGstSimdRegs &= bmGstSimdRegShadows;
5079 if (fGstSimdRegs)
5080 {
5081 uint64_t const bmGstSimdRegShadowsNew = bmGstSimdRegShadows & ~fGstSimdRegs;
5082 Log12(("iemNativeSimdRegFlushGuestShadows: flushing %#RX64 (%#RX64 -> %#RX64)\n", fGstSimdRegs, bmGstSimdRegShadows, bmGstSimdRegShadowsNew));
5083 pReNative->Core.bmGstSimdRegShadows = bmGstSimdRegShadowsNew;
5084 if (bmGstSimdRegShadowsNew)
5085 {
5086 /*
5087 * Partial.
5088 */
5089 do
5090 {
5091 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5092 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5093 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5094 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5095 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5096 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5097
5098 uint64_t const fInThisHstReg = (pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & fGstSimdRegs) | RT_BIT_64(idxGstReg);
5099 fGstSimdRegs &= ~fInThisHstReg;
5100 uint64_t const fGstRegShadowsNew = pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & ~fInThisHstReg;
5101 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = fGstRegShadowsNew;
5102 if (!fGstRegShadowsNew)
5103 {
5104 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5105 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5106 }
5107 } while (fGstSimdRegs != 0);
5108 }
5109 else
5110 {
5111 /*
5112 * Clear all.
5113 */
5114 do
5115 {
5116 unsigned const idxGstReg = ASMBitFirstSetU64(fGstSimdRegs) - 1;
5117 uint8_t const idxHstReg = pReNative->Core.aidxGstSimdRegShadows[idxGstReg];
5118 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aidxGstSimdRegShadows));
5119 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxHstReg));
5120 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg));
5121 Assert(!IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_U256(pReNative, idxGstReg));
5122
5123 fGstSimdRegs &= ~(pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows | RT_BIT_64(idxGstReg));
5124 pReNative->Core.aHstSimdRegs[idxHstReg].fGstRegShadows = 0;
5125 pReNative->Core.aHstSimdRegs[idxHstReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5126 } while (fGstSimdRegs != 0);
5127 pReNative->Core.bmHstSimdRegsWithGstShadow = 0;
5128 }
5129 }
5130}
5131
5132
5133/**
5134 * Allocates a temporary host SIMD register.
5135 *
5136 * This may emit code to save register content onto the stack in order to free
5137 * up a register.
5138 *
5139 * @returns The host register number; throws VBox status code on failure,
5140 * so no need to check the return value.
5141 * @param pReNative The native recompile state.
5142 * @param poff Pointer to the variable with the code buffer position.
5143 * This will be update if we need to move a variable from
5144 * register to stack in order to satisfy the request.
5145 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5146 * registers (@c true, default) or the other way around
5147 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5148 */
5149DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmp(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, bool fPreferVolatile /*= true*/)
5150{
5151 /*
5152 * Try find a completely unused register, preferably a call-volatile one.
5153 */
5154 uint8_t idxSimdReg;
5155 uint32_t fRegs = ~pReNative->Core.bmHstRegs
5156 & ~pReNative->Core.bmHstRegsWithGstShadow
5157 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK);
5158 if (fRegs)
5159 {
5160 if (fPreferVolatile)
5161 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5162 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5163 else
5164 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5165 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5166 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5167 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5168
5169 pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5170 Log12(("iemNativeSimdRegAllocTmp: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5171 }
5172 else
5173 {
5174 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile);
5175 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5176 Log12(("iemNativeSimdRegAllocTmp: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5177 }
5178
5179 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5180 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5181}
5182
5183
5184/**
5185 * Alternative version of iemNativeSimdRegAllocTmp that takes mask with acceptable
5186 * registers.
5187 *
5188 * @returns The host register number; throws VBox status code on failure,
5189 * so no need to check the return value.
5190 * @param pReNative The native recompile state.
5191 * @param poff Pointer to the variable with the code buffer position.
5192 * This will be update if we need to move a variable from
5193 * register to stack in order to satisfy the request.
5194 * @param fRegMask Mask of acceptable registers.
5195 * @param fPreferVolatile Whether to prefer volatile over non-volatile
5196 * registers (@c true, default) or the other way around
5197 * (@c false, for iemNativeRegAllocTmpForGuestReg()).
5198 */
5199DECL_HIDDEN_THROW(uint8_t) iemNativeSimdRegAllocTmpEx(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint32_t fRegMask,
5200 bool fPreferVolatile /*= true*/)
5201{
5202 Assert(!(fRegMask & ~IEMNATIVE_HST_SIMD_REG_MASK));
5203 Assert(!(fRegMask & IEMNATIVE_SIMD_REG_FIXED_MASK));
5204
5205 /*
5206 * Try find a completely unused register, preferably a call-volatile one.
5207 */
5208 uint8_t idxSimdReg;
5209 uint32_t fRegs = ~pReNative->Core.bmHstSimdRegs
5210 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5211 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
5212 & fRegMask;
5213 if (fRegs)
5214 {
5215 if (fPreferVolatile)
5216 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5217 ? fRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5218 else
5219 idxSimdReg = (uint8_t)ASMBitFirstSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
5220 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
5221 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows == 0);
5222 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg)));
5223
5224 pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_Invalid;
5225 Log12(("iemNativeSimdRegAllocTmpEx: %s\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5226 }
5227 else
5228 {
5229 idxSimdReg = iemNativeSimdRegAllocFindFree(pReNative, poff, fPreferVolatile, fRegMask);
5230 AssertStmt(idxSimdReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_TMP));
5231 Log12(("iemNativeSimdRegAllocTmpEx: %s (slow)\n", g_apszIemNativeHstSimdRegNames[idxSimdReg]));
5232 }
5233
5234 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid);
5235 return iemNativeSimdRegMarkAllocated(pReNative, idxSimdReg, kIemNativeWhat_Tmp);
5236}
5237
5238
5239/**
5240 * Sets the indiactor for which part of the given SIMD register has valid data loaded.
5241 *
5242 * @param pReNative The native recompile state.
5243 * @param idxHstSimdReg The host SIMD register to update the state for.
5244 * @param enmLoadSz The load size to set.
5245 */
5246DECL_FORCE_INLINE(void) iemNativeSimdRegSetValidLoadFlag(PIEMRECOMPILERSTATE pReNative, uint8_t idxHstSimdReg,
5247 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
5248{
5249 /* Everything valid already? -> nothing to do. */
5250 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
5251 return;
5252
5253 if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Invalid)
5254 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = enmLoadSz;
5255 else if (pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded != enmLoadSz)
5256 {
5257 Assert( ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_Low128
5258 && enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5259 || ( pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded == kIemNativeGstSimdRegLdStSz_High128
5260 && enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128));
5261 pReNative->Core.aHstSimdRegs[idxHstSimdReg].enmLoaded = kIemNativeGstSimdRegLdStSz_256;
5262 }
5263}
5264
5265
5266static uint32_t iemNativeSimdRegAllocLoadVecRegFromVecRegSz(PIEMRECOMPILERSTATE pReNative, uint32_t off, IEMNATIVEGSTSIMDREG enmGstSimdRegDst,
5267 uint8_t idxHstSimdRegDst, uint8_t idxHstSimdRegSrc, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSzDst)
5268{
5269 /* Easy case first, either the destination loads the same range as what the source has already loaded or the source has loaded everything. */
5270 if ( pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == enmLoadSzDst
5271 || pReNative->Core.aHstSimdRegs[idxHstSimdRegSrc].enmLoaded == kIemNativeGstSimdRegLdStSz_256)
5272 {
5273# ifdef RT_ARCH_ARM64
5274 /* ASSUMES that there are two adjacent 128-bit registers available for the 256-bit value. */
5275 Assert(!(idxHstSimdRegDst & 0x1)); Assert(!(idxHstSimdRegSrc & 0x1));
5276# endif
5277
5278 if (idxHstSimdRegDst != idxHstSimdRegSrc)
5279 {
5280 switch (enmLoadSzDst)
5281 {
5282 case kIemNativeGstSimdRegLdStSz_256:
5283 off = iemNativeEmitSimdLoadVecRegFromVecRegU256(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5284 break;
5285 case kIemNativeGstSimdRegLdStSz_Low128:
5286 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5287 break;
5288 case kIemNativeGstSimdRegLdStSz_High128:
5289 off = iemNativeEmitSimdLoadVecRegHighU128FromVecRegHighU128(pReNative, off, idxHstSimdRegDst, idxHstSimdRegSrc);
5290 break;
5291 default:
5292 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5293 }
5294
5295 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdRegDst, enmLoadSzDst);
5296 }
5297 }
5298 else
5299 {
5300 /* The source doesn't has the part loaded, so load the register from CPUMCTX. */
5301 Assert(enmLoadSzDst == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSzDst == kIemNativeGstSimdRegLdStSz_High128);
5302 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, idxHstSimdRegDst, enmGstSimdRegDst, enmLoadSzDst);
5303 }
5304
5305 return off;
5306}
5307
5308
5309/**
5310 * Allocates a temporary host SIMD register for keeping a guest
5311 * SIMD register value.
5312 *
5313 * Since we may already have a register holding the guest register value,
5314 * code will be emitted to do the loading if that's not the case. Code may also
5315 * be emitted if we have to free up a register to satify the request.
5316 *
5317 * @returns The host register number; throws VBox status code on failure, so no
5318 * need to check the return value.
5319 * @param pReNative The native recompile state.
5320 * @param poff Pointer to the variable with the code buffer
5321 * position. This will be update if we need to move a
5322 * variable from register to stack in order to satisfy
5323 * the request.
5324 * @param enmGstSimdReg The guest SIMD register that will is to be updated.
5325 * @param enmIntendedUse How the caller will be using the host register.
5326 * @param fNoVolatileRegs Set if no volatile register allowed, clear if any
5327 * register is okay (default). The ASSUMPTION here is
5328 * that the caller has already flushed all volatile
5329 * registers, so this is only applied if we allocate a
5330 * new register.
5331 * @sa iemNativeRegAllocTmpForGuestRegIfAlreadyPresent
5332 */
5333DECL_HIDDEN_THROW(uint8_t)
5334iemNativeSimdRegAllocTmpForGuestSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, IEMNATIVEGSTSIMDREG enmGstSimdReg,
5335 IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz, IEMNATIVEGSTREGUSE enmIntendedUse /*= kIemNativeGstRegUse_ReadOnly*/,
5336 bool fNoVolatileRegs /*= false*/)
5337{
5338 Assert(enmGstSimdReg < kIemNativeGstSimdReg_End);
5339#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) && 0 /** @todo r=aeichner */
5340 AssertMsg( pReNative->idxCurCall == 0
5341 || (enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5342 ? IEMLIVENESS_STATE_IS_CLOBBER_EXPECTED(iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5343 : enmIntendedUse == kIemNativeGstRegUse_ForUpdate
5344 ? IEMLIVENESS_STATE_IS_MODIFY_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg))
5345 : IEMLIVENESS_STATE_IS_INPUT_EXPECTED( iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)) ),
5346 ("%s - %u\n", g_aGstSimdShadowInfo[enmGstSimdReg].pszName, iemNativeLivenessGetPrevStateByGstReg(pReNative, enmGstSimdReg)));
5347#endif
5348#if defined(LOG_ENABLED) || defined(VBOX_STRICT)
5349 static const char * const s_pszIntendedUse[] = { "fetch", "update", "full write", "destructive calc" };
5350#endif
5351 uint32_t const fRegMask = !fNoVolatileRegs
5352 ? IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK
5353 : IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
5354
5355 /*
5356 * First check if the guest register value is already in a host register.
5357 */
5358 if (pReNative->Core.bmGstSimdRegShadows & RT_BIT_64(enmGstSimdReg))
5359 {
5360 uint8_t idxSimdReg = pReNative->Core.aidxGstSimdRegShadows[enmGstSimdReg];
5361 Assert(idxSimdReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
5362 Assert(pReNative->Core.aHstSimdRegs[idxSimdReg].fGstRegShadows & RT_BIT_64(enmGstSimdReg));
5363 Assert(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxSimdReg));
5364
5365 /* It's not supposed to be allocated... */
5366 if (!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxSimdReg)))
5367 {
5368 /*
5369 * If the register will trash the guest shadow copy, try find a
5370 * completely unused register we can use instead. If that fails,
5371 * we need to disassociate the host reg from the guest reg.
5372 */
5373 /** @todo would be nice to know if preserving the register is in any way helpful. */
5374 /* If the purpose is calculations, try duplicate the register value as
5375 we'll be clobbering the shadow. */
5376 if ( enmIntendedUse == kIemNativeGstRegUse_Calculation
5377 && ( ~pReNative->Core.bmHstSimdRegs
5378 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
5379 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)))
5380 {
5381 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask);
5382
5383 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
5384
5385 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5386 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5387 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5388 idxSimdReg = idxRegNew;
5389 }
5390 /* If the current register matches the restrictions, go ahead and allocate
5391 it for the caller. */
5392 else if (fRegMask & RT_BIT_32(idxSimdReg))
5393 {
5394 pReNative->Core.bmHstSimdRegs |= RT_BIT_32(idxSimdReg);
5395 pReNative->Core.aHstSimdRegs[idxSimdReg].enmWhat = kIemNativeWhat_Tmp;
5396 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5397 {
5398 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5399 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxSimdReg, idxSimdReg, enmLoadSz);
5400 else
5401 iemNativeSimdRegSetValidLoadFlag(pReNative, idxSimdReg, enmLoadSz);
5402 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Reusing %s for guest %s %s\n",
5403 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5404 }
5405 else
5406 {
5407 iemNativeSimdRegClearGstSimdRegShadowing(pReNative, idxSimdReg, *poff);
5408 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Grabbing %s for guest %s - destructive calc\n",
5409 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName));
5410 }
5411 }
5412 /* Otherwise, allocate a register that satisfies the caller and transfer
5413 the shadowing if compatible with the intended use. (This basically
5414 means the call wants a non-volatile register (RSP push/pop scenario).) */
5415 else
5416 {
5417 Assert(fNoVolatileRegs);
5418 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask & ~RT_BIT_32(idxSimdReg),
5419 !fNoVolatileRegs
5420 && enmIntendedUse == kIemNativeGstRegUse_Calculation);
5421 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
5422 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5423 {
5424 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5425 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Transfering %s to %s for guest %s %s\n",
5426 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_apszIemNativeHstSimdRegNames[idxRegNew],
5427 g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5428 }
5429 else
5430 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for destructive calc\n",
5431 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5432 g_apszIemNativeHstSimdRegNames[idxRegNew]));
5433 idxSimdReg = idxRegNew;
5434 }
5435 }
5436 else
5437 {
5438 /*
5439 * Oops. Shadowed guest register already allocated!
5440 *
5441 * Allocate a new register, copy the value and, if updating, the
5442 * guest shadow copy assignment to the new register.
5443 */
5444 AssertMsg( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5445 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite,
5446 ("This shouldn't happen: idxSimdReg=%d enmGstSimdReg=%d enmIntendedUse=%s\n",
5447 idxSimdReg, enmGstSimdReg, s_pszIntendedUse[enmIntendedUse]));
5448
5449 /** @todo share register for readonly access. */
5450 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask,
5451 enmIntendedUse == kIemNativeGstRegUse_Calculation);
5452
5453 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5454 *poff = iemNativeSimdRegAllocLoadVecRegFromVecRegSz(pReNative, *poff, enmGstSimdReg, idxRegNew, idxSimdReg, enmLoadSz);
5455 else
5456 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5457
5458 if ( enmIntendedUse != kIemNativeGstRegUse_ForUpdate
5459 && enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5460 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Duplicated %s for guest %s into %s for %s\n",
5461 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5462 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5463 else
5464 {
5465 iemNativeSimdRegTransferGstSimdRegShadowing(pReNative, idxSimdReg, idxRegNew, enmGstSimdReg, *poff);
5466 Log12(("iemNativeSimdRegAllocTmpForGuestSimdReg: Moved %s for guest %s into %s for %s\n",
5467 g_apszIemNativeHstSimdRegNames[idxSimdReg], g_aGstSimdShadowInfo[enmGstSimdReg].pszName,
5468 g_apszIemNativeHstSimdRegNames[idxRegNew], s_pszIntendedUse[enmIntendedUse]));
5469 }
5470 idxSimdReg = idxRegNew;
5471 }
5472 Assert(RT_BIT_32(idxSimdReg) & fRegMask); /* See assumption in fNoVolatileRegs docs. */
5473
5474#ifdef VBOX_STRICT
5475 /* Strict builds: Check that the value is correct. */
5476 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5477 *poff = iemNativeEmitGuestSimdRegValueCheck(pReNative, *poff, idxSimdReg, enmGstSimdReg, enmLoadSz);
5478#endif
5479
5480 if ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5481 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
5482 {
5483# if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) && defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5484 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
5485 iemNativeDbgInfoAddGuestRegDirty(pReNative, true /*fSimdReg*/, enmGstSimdReg, idxSimdReg);
5486# endif
5487
5488 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128)
5489 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5490 else if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5491 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5492 else
5493 {
5494 Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_256);
5495 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5496 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5497 }
5498 }
5499
5500 return idxSimdReg;
5501 }
5502
5503 /*
5504 * Allocate a new register, load it with the guest value and designate it as a copy of the
5505 */
5506 uint8_t const idxRegNew = iemNativeSimdRegAllocTmpEx(pReNative, poff, fRegMask, enmIntendedUse == kIemNativeGstRegUse_Calculation);
5507
5508 if (enmIntendedUse != kIemNativeGstRegUse_ForFullWrite)
5509 *poff = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, *poff, idxRegNew, enmGstSimdReg, enmLoadSz);
5510 else
5511 iemNativeSimdRegSetValidLoadFlag(pReNative, idxRegNew, enmLoadSz);
5512
5513 if (enmIntendedUse != kIemNativeGstRegUse_Calculation)
5514 iemNativeSimdRegMarkAsGstSimdRegShadow(pReNative, idxRegNew, enmGstSimdReg, *poff);
5515
5516 if ( enmIntendedUse == kIemNativeGstRegUse_ForFullWrite
5517 || enmIntendedUse == kIemNativeGstRegUse_ForUpdate)
5518 {
5519# if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) && defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5520 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
5521 iemNativeDbgInfoAddGuestRegDirty(pReNative, true /*fSimdReg*/, enmGstSimdReg, idxRegNew);
5522# endif
5523
5524 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128)
5525 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5526 else if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128)
5527 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5528 else
5529 {
5530 Assert(enmLoadSz == kIemNativeGstSimdRegLdStSz_256);
5531 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_LO_U128(pReNative, enmGstSimdReg);
5532 IEMNATIVE_SIMD_REG_STATE_SET_DIRTY_HI_U128(pReNative, enmGstSimdReg);
5533 }
5534 }
5535
5536 Log12(("iemNativeRegAllocTmpForGuestSimdReg: Allocated %s for guest %s %s\n",
5537 g_apszIemNativeHstSimdRegNames[idxRegNew], g_aGstSimdShadowInfo[enmGstSimdReg].pszName, s_pszIntendedUse[enmIntendedUse]));
5538
5539 return idxRegNew;
5540}
5541
5542
5543/**
5544 * Flushes guest SIMD register shadow copies held by a set of host registers.
5545 *
5546 * This is used whenever calling an external helper for ensuring that we don't carry on
5547 * with any guest shadows in volatile registers, as these will get corrupted by the caller.
5548 *
5549 * @param pReNative The native recompile state.
5550 * @param fHstSimdRegs Set of host SIMD registers to flush guest shadows for.
5551 */
5552DECLHIDDEN(void) iemNativeSimdRegFlushGuestShadowsByHostMask(PIEMRECOMPILERSTATE pReNative, uint32_t fHstSimdRegs) RT_NOEXCEPT
5553{
5554 /*
5555 * Reduce the mask by what's currently shadowed.
5556 */
5557 uint32_t const bmHstSimdRegsWithGstShadowOld = pReNative->Core.bmHstSimdRegsWithGstShadow;
5558 fHstSimdRegs &= bmHstSimdRegsWithGstShadowOld;
5559 if (fHstSimdRegs)
5560 {
5561 uint32_t const bmHstSimdRegsWithGstShadowNew = bmHstSimdRegsWithGstShadowOld & ~fHstSimdRegs;
5562 Log12(("iemNativeSimdRegFlushGuestShadowsByHostMask: flushing %#RX32 (%#RX32 -> %#RX32)\n",
5563 fHstSimdRegs, bmHstSimdRegsWithGstShadowOld, bmHstSimdRegsWithGstShadowNew));
5564 pReNative->Core.bmHstSimdRegsWithGstShadow = bmHstSimdRegsWithGstShadowNew;
5565 if (bmHstSimdRegsWithGstShadowNew)
5566 {
5567 /*
5568 * Partial (likely).
5569 */
5570 uint64_t fGstShadows = 0;
5571 do
5572 {
5573 unsigned const idxHstSimdReg = ASMBitFirstSetU32(fHstSimdRegs) - 1;
5574 Assert(!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg)));
5575 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows)
5576 == pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows);
5577 Assert(!(( pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5578 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5579
5580 fGstShadows |= pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows;
5581 pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows = 0;
5582 fHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
5583 } while (fHstSimdRegs != 0);
5584 pReNative->Core.bmGstSimdRegShadows &= ~fGstShadows;
5585 }
5586 else
5587 {
5588 /*
5589 * Clear all.
5590 */
5591 do
5592 {
5593 unsigned const idxHstSimdReg = ASMBitFirstSetU32(fHstSimdRegs) - 1;
5594 Assert(!(pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxHstSimdReg)));
5595 Assert( (pReNative->Core.bmGstSimdRegShadows & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows)
5596 == pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows);
5597 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
5598 & pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows));
5599
5600 pReNative->Core.aHstSimdRegs[idxHstSimdReg].fGstRegShadows = 0;
5601 fHstSimdRegs &= ~RT_BIT_32(idxHstSimdReg);
5602 } while (fHstSimdRegs != 0);
5603 pReNative->Core.bmGstSimdRegShadows = 0;
5604 }
5605 }
5606}
5607#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
5608
5609
5610
5611/*********************************************************************************************************************************
5612* Code emitters for flushing pending guest register writes and sanity checks *
5613*********************************************************************************************************************************/
5614
5615#ifdef VBOX_STRICT
5616/**
5617 * Does internal register allocator sanity checks.
5618 */
5619DECLHIDDEN(void) iemNativeRegAssertSanity(PIEMRECOMPILERSTATE pReNative)
5620{
5621 /*
5622 * Iterate host registers building a guest shadowing set.
5623 */
5624 uint64_t bmGstRegShadows = 0;
5625 uint32_t bmHstRegsWithGstShadow = pReNative->Core.bmHstRegsWithGstShadow;
5626 AssertMsg(!(bmHstRegsWithGstShadow & IEMNATIVE_REG_FIXED_MASK), ("%#RX32\n", bmHstRegsWithGstShadow));
5627 while (bmHstRegsWithGstShadow)
5628 {
5629 unsigned const idxHstReg = ASMBitFirstSetU32(bmHstRegsWithGstShadow) - 1;
5630 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs));
5631 bmHstRegsWithGstShadow &= ~RT_BIT_32(idxHstReg);
5632
5633 uint64_t fThisGstRegShadows = pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows;
5634 AssertMsg(fThisGstRegShadows != 0, ("idxHstReg=%d\n", idxHstReg));
5635 AssertMsg(fThisGstRegShadows < RT_BIT_64(kIemNativeGstReg_End), ("idxHstReg=%d %#RX64\n", idxHstReg, fThisGstRegShadows));
5636 bmGstRegShadows |= fThisGstRegShadows;
5637 while (fThisGstRegShadows)
5638 {
5639 unsigned const idxGstReg = ASMBitFirstSetU64(fThisGstRegShadows) - 1;
5640 fThisGstRegShadows &= ~RT_BIT_64(idxGstReg);
5641 AssertMsg(pReNative->Core.aidxGstRegShadows[idxGstReg] == idxHstReg,
5642 ("idxHstReg=%d aidxGstRegShadows[idxGstReg=%d]=%d\n",
5643 idxHstReg, idxGstReg, pReNative->Core.aidxGstRegShadows[idxGstReg]));
5644 }
5645 }
5646 AssertMsg(bmGstRegShadows == pReNative->Core.bmGstRegShadows,
5647 ("%RX64 vs %RX64; diff %RX64\n", bmGstRegShadows, pReNative->Core.bmGstRegShadows,
5648 bmGstRegShadows ^ pReNative->Core.bmGstRegShadows));
5649
5650 /*
5651 * Now the other way around, checking the guest to host index array.
5652 */
5653 bmHstRegsWithGstShadow = 0;
5654 bmGstRegShadows = pReNative->Core.bmGstRegShadows;
5655 Assert(bmGstRegShadows < RT_BIT_64(kIemNativeGstReg_End));
5656 while (bmGstRegShadows)
5657 {
5658 unsigned const idxGstReg = ASMBitFirstSetU64(bmGstRegShadows) - 1;
5659 Assert(idxGstReg < RT_ELEMENTS(pReNative->Core.aidxGstRegShadows));
5660 bmGstRegShadows &= ~RT_BIT_64(idxGstReg);
5661
5662 uint8_t const idxHstReg = pReNative->Core.aidxGstRegShadows[idxGstReg];
5663 AssertMsg(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs), ("aidxGstRegShadows[%d]=%d\n", idxGstReg, idxHstReg));
5664 AssertMsg(pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows & RT_BIT_64(idxGstReg),
5665 ("idxGstReg=%d idxHstReg=%d fGstRegShadows=%RX64\n",
5666 idxGstReg, idxHstReg, pReNative->Core.aHstRegs[idxHstReg].fGstRegShadows));
5667 bmHstRegsWithGstShadow |= RT_BIT_32(idxHstReg);
5668 }
5669 AssertMsg(bmHstRegsWithGstShadow == pReNative->Core.bmHstRegsWithGstShadow,
5670 ("%RX64 vs %RX64; diff %RX64\n", bmHstRegsWithGstShadow, pReNative->Core.bmHstRegsWithGstShadow,
5671 bmHstRegsWithGstShadow ^ pReNative->Core.bmHstRegsWithGstShadow));
5672}
5673#endif /* VBOX_STRICT */
5674
5675
5676/**
5677 * Flushes any delayed guest register writes.
5678 *
5679 * This must be called prior to calling CImpl functions and any helpers that use
5680 * the guest state (like raising exceptions) and such.
5681 *
5682 * @note This function does not flush any shadowing information for guest registers. This needs to be done by
5683 * the caller if it wishes to do so.
5684 */
5685DECL_HIDDEN_THROW(uint32_t)
5686iemNativeRegFlushPendingWritesSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint64_t fGstShwExcept, uint64_t fGstSimdShwExcept)
5687{
5688#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5689 if (!(fGstShwExcept & RT_BIT_64(kIemNativeGstReg_Pc)))
5690 off = iemNativeEmitPcWriteback(pReNative, off);
5691#else
5692 RT_NOREF(pReNative, fGstShwExcept);
5693#endif
5694
5695#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
5696 off = iemNativeRegFlushDirtyGuest(pReNative, off, ~fGstShwExcept);
5697#endif
5698
5699#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5700 off = iemNativeSimdRegFlushDirtyGuest(pReNative, off, ~fGstSimdShwExcept);
5701#endif
5702
5703 return off;
5704}
5705
5706#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
5707
5708# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
5709
5710/**
5711 * Checks if the value in @a idxPcReg matches IEMCPU::uPcUpdatingDebug.
5712 */
5713DECL_HIDDEN_THROW(uint32_t) iemNativeEmitPcDebugCheckWithReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxPcReg)
5714{
5715 Assert(idxPcReg != IEMNATIVE_REG_FIXED_TMP0);
5716 Assert(pReNative->Core.fDebugPcInitialized);
5717
5718 /* cmp [pVCpu->iem.s.uPcUpdatingDebug], pcreg */
5719# ifdef RT_ARCH_AMD64
5720 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
5721 pCodeBuf[off++] = X86_OP_REX_W | (idxPcReg >= 8 ? X86_OP_REX_R : 0);
5722 pCodeBuf[off++] = 0x3b;
5723 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, idxPcReg & 7, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
5724# else
5725 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
5726 off = iemNativeEmitLoadGprFromVCpuU64Ex(pCodeBuf, off, IEMNATIVE_REG_FIXED_TMP0, RT_UOFFSETOF(VMCPU, iem.s.uPcUpdatingDebug));
5727 off = iemNativeEmitCmpGprWithGprEx(pCodeBuf, off, IEMNATIVE_REG_FIXED_TMP0, idxPcReg);
5728# endif
5729
5730 uint32_t offFixup = off;
5731 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off + 1, kIemNativeInstrCond_e);
5732 off = iemNativeEmitBrkEx(pCodeBuf, off, UINT32_C(0x2200));
5733 iemNativeFixupFixedJump(pReNative, offFixup, off);
5734
5735 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5736 return off;
5737}
5738
5739
5740/**
5741 * Checks that the current RIP+offPc matches IEMCPU::uPcUpdatingDebug.
5742 */
5743DECL_HIDDEN_THROW(uint32_t) iemNativeEmitPcDebugCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5744{
5745 if (pReNative->Core.fDebugPcInitialized)
5746 {
5747 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc);
5748 if (pReNative->Core.offPc)
5749 {
5750 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
5751 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, RT_ARCH_VAL == RT_ARCH_VAL_AMD64 ? 32 : 8);
5752 off = iemNativeEmitGprEqGprPlusImmEx(pCodeBuf, off, idxTmpReg, idxPcReg, pReNative->Core.offPc);
5753 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5754 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxTmpReg);
5755 iemNativeRegFreeTmp(pReNative, idxTmpReg);
5756 }
5757 else
5758 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
5759 iemNativeRegFreeTmp(pReNative, idxPcReg);
5760 }
5761 return off;
5762}
5763
5764# endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG */
5765
5766/**
5767 * Emits code to update the guest RIP value by adding the current offset since the start of the last RIP update.
5768 */
5769DECL_HIDDEN_THROW(uint32_t) iemNativeEmitPcWritebackSlow(PIEMRECOMPILERSTATE pReNative, uint32_t off)
5770{
5771 Assert(pReNative->Core.offPc);
5772# if !defined(IEMNATIVE_WITH_TB_DEBUG_INFO) && !defined(VBOX_WITH_STATISTICS)
5773 Log4(("iemNativeEmitPcWritebackSlow: offPc=%#RX64 -> 0; off=%#x\n", pReNative->Core.offPc, off));
5774# else
5775 uint8_t const idxOldInstrPlusOne = pReNative->idxInstrPlusOneOfLastPcUpdate;
5776 uint8_t idxCurCall = pReNative->idxCurCall;
5777 uint8_t idxInstr = pReNative->pTbOrg->Thrd.paCalls[idxCurCall].idxInstr; /* unreliable*/
5778 while (idxInstr == 0 && idxInstr + 1 < idxOldInstrPlusOne && idxCurCall > 0)
5779 idxInstr = pReNative->pTbOrg->Thrd.paCalls[--idxCurCall].idxInstr;
5780 pReNative->idxInstrPlusOneOfLastPcUpdate = RT_MAX(idxInstr + 1, idxOldInstrPlusOne);
5781 uint8_t const cInstrsSkipped = idxInstr <= idxOldInstrPlusOne ? 0 : idxInstr - idxOldInstrPlusOne;
5782 Log4(("iemNativeEmitPcWritebackSlow: offPc=%#RX64 -> 0; off=%#x; idxInstr=%u cInstrsSkipped=%u\n",
5783 pReNative->Core.offPc, off, idxInstr, cInstrsSkipped));
5784
5785 STAM_COUNTER_ADD(&pReNative->pVCpu->iem.s.StatNativePcUpdateDelayed, cInstrsSkipped);
5786
5787# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
5788 iemNativeDbgInfoAddNativeOffset(pReNative, off);
5789 iemNativeDbgInfoAddDelayedPcUpdate(pReNative, pReNative->Core.offPc, cInstrsSkipped);
5790# endif
5791# endif
5792
5793# ifndef IEMNATIVE_REG_FIXED_PC_DBG
5794 /* Allocate a temporary PC register. */
5795 uint8_t const idxPcReg = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc, kIemNativeGstRegUse_ForUpdate);
5796
5797 /* Perform the addition and store the result. */
5798 off = iemNativeEmitAddGprImm(pReNative, off, idxPcReg, pReNative->Core.offPc);
5799 off = iemNativeEmitStoreGprToVCpuU64(pReNative, off, idxPcReg, RT_UOFFSETOF(VMCPU, cpum.GstCtx.rip));
5800# ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
5801 off = iemNativeEmitPcDebugCheckWithReg(pReNative, off, idxPcReg);
5802# endif
5803
5804 /* Free but don't flush the PC register. */
5805 iemNativeRegFreeTmp(pReNative, idxPcReg);
5806# else
5807 /* Compare the shadow with the context value, they should match. */
5808 off = iemNativeEmitAddGprImm(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, pReNative->Core.offPc);
5809 off = iemNativeEmitGuestRegValueCheck(pReNative, off, IEMNATIVE_REG_FIXED_PC_DBG, kIemNativeGstReg_Pc);
5810# endif
5811
5812 pReNative->Core.offPc = 0;
5813
5814 return off;
5815}
5816
5817#endif /* IEMNATIVE_WITH_DELAYED_PC_UPDATING */
5818
5819
5820/*********************************************************************************************************************************
5821* Code Emitters (larger snippets) *
5822*********************************************************************************************************************************/
5823
5824/**
5825 * Loads the guest shadow register @a enmGstReg into host reg @a idxHstReg, zero
5826 * extending to 64-bit width.
5827 *
5828 * @returns New code buffer offset on success, UINT32_MAX on failure.
5829 * @param pReNative .
5830 * @param off The current code buffer position.
5831 * @param idxHstReg The host register to load the guest register value into.
5832 * @param enmGstReg The guest register to load.
5833 *
5834 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
5835 * that is something the caller needs to do if applicable.
5836 */
5837DECL_HIDDEN_THROW(uint32_t)
5838iemNativeEmitLoadGprWithGstShadowReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstReg, IEMNATIVEGSTREG enmGstReg)
5839{
5840 Assert((unsigned)enmGstReg < (unsigned)kIemNativeGstReg_End);
5841 Assert(g_aGstShadowInfo[enmGstReg].cb != 0);
5842
5843 switch (g_aGstShadowInfo[enmGstReg].cb)
5844 {
5845 case sizeof(uint64_t):
5846 return iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5847 case sizeof(uint32_t):
5848 return iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5849 case sizeof(uint16_t):
5850 return iemNativeEmitLoadGprFromVCpuU16(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5851#if 0 /* not present in the table. */
5852 case sizeof(uint8_t):
5853 return iemNativeEmitLoadGprFromVCpuU8(pReNative, off, idxHstReg, g_aGstShadowInfo[enmGstReg].off);
5854#endif
5855 default:
5856 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5857 }
5858}
5859
5860
5861#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
5862/**
5863 * Loads the guest shadow SIMD register @a enmGstSimdReg into host SIMD reg @a idxHstSimdReg.
5864 *
5865 * @returns New code buffer offset on success, UINT32_MAX on failure.
5866 * @param pReNative The recompiler state.
5867 * @param off The current code buffer position.
5868 * @param idxHstSimdReg The host register to load the guest register value into.
5869 * @param enmGstSimdReg The guest register to load.
5870 * @param enmLoadSz The load size of the register.
5871 *
5872 * @note This does not mark @a idxHstReg as having a shadow copy of @a enmGstReg,
5873 * that is something the caller needs to do if applicable.
5874 */
5875DECL_HIDDEN_THROW(uint32_t)
5876iemNativeEmitLoadSimdRegWithGstShadowSimdReg(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxHstSimdReg,
5877 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
5878{
5879 Assert((unsigned)enmGstSimdReg < RT_ELEMENTS(g_aGstSimdShadowInfo));
5880
5881 iemNativeSimdRegSetValidLoadFlag(pReNative, idxHstSimdReg, enmLoadSz);
5882 switch (enmLoadSz)
5883 {
5884 case kIemNativeGstSimdRegLdStSz_256:
5885 off = iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5886 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5887 case kIemNativeGstSimdRegLdStSz_Low128:
5888 return iemNativeEmitSimdLoadVecRegFromVCpuLowU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
5889 case kIemNativeGstSimdRegLdStSz_High128:
5890 return iemNativeEmitSimdLoadVecRegFromVCpuHighU128(pReNative, off, idxHstSimdReg, g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
5891 default:
5892 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IPE_NOT_REACHED_DEFAULT_CASE));
5893 }
5894}
5895#endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
5896
5897#ifdef VBOX_STRICT
5898
5899/**
5900 * Emitting code that checks that the value of @a idxReg is UINT32_MAX or less.
5901 *
5902 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5903 * Trashes EFLAGS on AMD64.
5904 */
5905DECL_HIDDEN_THROW(uint32_t)
5906iemNativeEmitTop32BitsClearCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg)
5907{
5908# ifdef RT_ARCH_AMD64
5909 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
5910
5911 /* rol reg64, 32 */
5912 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5913 pbCodeBuf[off++] = 0xc1;
5914 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5915 pbCodeBuf[off++] = 32;
5916
5917 /* test reg32, ffffffffh */
5918 if (idxReg >= 8)
5919 pbCodeBuf[off++] = X86_OP_REX_B;
5920 pbCodeBuf[off++] = 0xf7;
5921 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5922 pbCodeBuf[off++] = 0xff;
5923 pbCodeBuf[off++] = 0xff;
5924 pbCodeBuf[off++] = 0xff;
5925 pbCodeBuf[off++] = 0xff;
5926
5927 /* je/jz +1 */
5928 pbCodeBuf[off++] = 0x74;
5929 pbCodeBuf[off++] = 0x01;
5930
5931 /* int3 */
5932 pbCodeBuf[off++] = 0xcc;
5933
5934 /* rol reg64, 32 */
5935 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
5936 pbCodeBuf[off++] = 0xc1;
5937 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
5938 pbCodeBuf[off++] = 32;
5939
5940# elif defined(RT_ARCH_ARM64)
5941 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
5942 /* lsr tmp0, reg64, #32 */
5943 pu32CodeBuf[off++] = Armv8A64MkInstrLsrImm(IEMNATIVE_REG_FIXED_TMP0, idxReg, 32);
5944 /* cbz tmp0, +1 */
5945 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
5946 /* brk #0x1100 */
5947 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x1100));
5948
5949# else
5950# error "Port me!"
5951# endif
5952 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
5953 return off;
5954}
5955
5956
5957/**
5958 * Emitting code that checks that the content of register @a idxReg is the same
5959 * as what's in the guest register @a enmGstReg, resulting in a breakpoint
5960 * instruction if that's not the case.
5961 *
5962 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
5963 * Trashes EFLAGS on AMD64.
5964 */
5965DECL_HIDDEN_THROW(uint32_t)
5966iemNativeEmitGuestRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxReg, IEMNATIVEGSTREG enmGstReg)
5967{
5968#if defined(IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK)
5969 /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
5970 if (pReNative->Core.bmGstRegShadowDirty & RT_BIT_64(enmGstReg))
5971 return off;
5972#endif
5973
5974# ifdef RT_ARCH_AMD64
5975 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
5976
5977 /* cmp reg, [mem] */
5978 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint8_t))
5979 {
5980 if (idxReg >= 8)
5981 pbCodeBuf[off++] = X86_OP_REX_R;
5982 pbCodeBuf[off++] = 0x38;
5983 }
5984 else
5985 {
5986 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint64_t))
5987 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_R);
5988 else
5989 {
5990 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint16_t))
5991 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
5992 else
5993 AssertStmt(g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t),
5994 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6));
5995 if (idxReg >= 8)
5996 pbCodeBuf[off++] = X86_OP_REX_R;
5997 }
5998 pbCodeBuf[off++] = 0x39;
5999 }
6000 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxReg, g_aGstShadowInfo[enmGstReg].off);
6001
6002 /* je/jz +1 */
6003 pbCodeBuf[off++] = 0x74;
6004 pbCodeBuf[off++] = 0x01;
6005
6006 /* int3 */
6007 pbCodeBuf[off++] = 0xcc;
6008
6009 /* For values smaller than the register size, we must check that the rest
6010 of the register is all zeros. */
6011 if (g_aGstShadowInfo[enmGstReg].cb < sizeof(uint32_t))
6012 {
6013 /* test reg64, imm32 */
6014 pbCodeBuf[off++] = X86_OP_REX_W | (idxReg < 8 ? 0 : X86_OP_REX_B);
6015 pbCodeBuf[off++] = 0xf7;
6016 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 0, idxReg & 7);
6017 pbCodeBuf[off++] = 0;
6018 pbCodeBuf[off++] = g_aGstShadowInfo[enmGstReg].cb > sizeof(uint8_t) ? 0 : 0xff;
6019 pbCodeBuf[off++] = 0xff;
6020 pbCodeBuf[off++] = 0xff;
6021
6022 /* je/jz +1 */
6023 pbCodeBuf[off++] = 0x74;
6024 pbCodeBuf[off++] = 0x01;
6025
6026 /* int3 */
6027 pbCodeBuf[off++] = 0xcc;
6028 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6029 }
6030 else
6031 {
6032 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6033 if (g_aGstShadowInfo[enmGstReg].cb == sizeof(uint32_t))
6034 iemNativeEmitTop32BitsClearCheck(pReNative, off, idxReg);
6035 }
6036
6037# elif defined(RT_ARCH_ARM64)
6038 /* mov TMP0, [gstreg] */
6039 off = iemNativeEmitLoadGprWithGstShadowReg(pReNative, off, IEMNATIVE_REG_FIXED_TMP0, enmGstReg);
6040
6041 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6042 /* sub tmp0, tmp0, idxReg */
6043 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_REG_FIXED_TMP0, idxReg);
6044 /* cbz tmp0, +1 */
6045 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6046 /* brk #0x1000+enmGstReg */
6047 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstReg | UINT32_C(0x1000));
6048 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6049
6050# else
6051# error "Port me!"
6052# endif
6053 return off;
6054}
6055
6056
6057# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
6058# ifdef RT_ARCH_AMD64
6059/**
6060 * Helper for AMD64 to emit code which checks the low 128-bits of the given SIMD register against the given vCPU offset.
6061 */
6062DECL_FORCE_INLINE_THROW(uint32_t) iemNativeEmitGuestSimdRegValueCheckVCpuU128(uint8_t * const pbCodeBuf, uint32_t off, uint8_t idxSimdReg, uint32_t offVCpu)
6063{
6064 /* pcmpeqq vectmp0, [gstreg] (ASSUMES SSE4.1) */
6065 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6066 if (idxSimdReg >= 8)
6067 pbCodeBuf[off++] = X86_OP_REX_R;
6068 pbCodeBuf[off++] = 0x0f;
6069 pbCodeBuf[off++] = 0x38;
6070 pbCodeBuf[off++] = 0x29;
6071 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxSimdReg, offVCpu);
6072
6073 /* pextrq tmp0, vectmp0, #0 (ASSUMES SSE4.1). */
6074 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6075 pbCodeBuf[off++] = X86_OP_REX_W
6076 | (idxSimdReg < 8 ? 0 : X86_OP_REX_R)
6077 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6078 pbCodeBuf[off++] = 0x0f;
6079 pbCodeBuf[off++] = 0x3a;
6080 pbCodeBuf[off++] = 0x16;
6081 pbCodeBuf[off++] = 0xeb;
6082 pbCodeBuf[off++] = 0x00;
6083
6084 /* cmp tmp0, 0xffffffffffffffff. */
6085 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6086 pbCodeBuf[off++] = 0x83;
6087 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
6088 pbCodeBuf[off++] = 0xff;
6089
6090 /* je/jz +1 */
6091 pbCodeBuf[off++] = 0x74;
6092 pbCodeBuf[off++] = 0x01;
6093
6094 /* int3 */
6095 pbCodeBuf[off++] = 0xcc;
6096
6097 /* pextrq tmp0, vectmp0, #1 (ASSUMES SSE4.1). */
6098 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP;
6099 pbCodeBuf[off++] = X86_OP_REX_W
6100 | (idxSimdReg < 8 ? 0 : X86_OP_REX_R)
6101 | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6102 pbCodeBuf[off++] = 0x0f;
6103 pbCodeBuf[off++] = 0x3a;
6104 pbCodeBuf[off++] = 0x16;
6105 pbCodeBuf[off++] = 0xeb;
6106 pbCodeBuf[off++] = 0x01;
6107
6108 /* cmp tmp0, 0xffffffffffffffff. */
6109 pbCodeBuf[off++] = X86_OP_REX_W | (IEMNATIVE_REG_FIXED_TMP0 < 8 ? 0 : X86_OP_REX_B);
6110 pbCodeBuf[off++] = 0x83;
6111 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 7, IEMNATIVE_REG_FIXED_TMP0 & 7);
6112 pbCodeBuf[off++] = 0xff;
6113
6114 /* je/jz +1 */
6115 pbCodeBuf[off++] = 0x74;
6116 pbCodeBuf[off++] = 0x01;
6117
6118 /* int3 */
6119 pbCodeBuf[off++] = 0xcc;
6120
6121 return off;
6122}
6123# endif
6124
6125
6126/**
6127 * Emitting code that checks that the content of SIMD register @a idxSimdReg is the same
6128 * as what's in the guest register @a enmGstSimdReg, resulting in a breakpoint
6129 * instruction if that's not the case.
6130 *
6131 * @note May of course trash IEMNATIVE_SIMD_REG_FIXED_TMP0 and IEMNATIVE_REG_FIXED_TMP0.
6132 * Trashes EFLAGS on AMD64.
6133 */
6134DECL_HIDDEN_THROW(uint32_t)
6135iemNativeEmitGuestSimdRegValueCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxSimdReg,
6136 IEMNATIVEGSTSIMDREG enmGstSimdReg, IEMNATIVEGSTSIMDREGLDSTSZ enmLoadSz)
6137{
6138 /* We can't check the value against whats in CPUMCTX if the register is already marked as dirty, so skip the check. */
6139 if ( ( enmLoadSz == kIemNativeGstSimdRegLdStSz_256
6140 && ( IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg)
6141 || IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
6142 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128
6143 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_LO_U128(pReNative, enmGstSimdReg))
6144 || ( enmLoadSz == kIemNativeGstSimdRegLdStSz_High128
6145 && IEMNATIVE_SIMD_REG_STATE_IS_DIRTY_HI_U128(pReNative, enmGstSimdReg)))
6146 return off;
6147
6148# ifdef RT_ARCH_AMD64
6149 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6150 {
6151 /* movdqa vectmp0, idxSimdReg */
6152 off = iemNativeEmitSimdLoadVecRegFromVecRegU128(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
6153
6154 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 44);
6155
6156 off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6157 g_aGstSimdShadowInfo[enmGstSimdReg].offXmm);
6158 }
6159
6160 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6161 {
6162 /* Due to the fact that CPUMCTX stores the high 128-bit separately we need to do this all over again for the high part. */
6163 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 50);
6164
6165 /* vextracti128 vectmp0, idxSimdReg, 1 */
6166 pbCodeBuf[off++] = X86_OP_VEX3;
6167 pbCodeBuf[off++] = (idxSimdReg < 8 ? X86_OP_VEX3_BYTE1_R : 0)
6168 | X86_OP_VEX3_BYTE1_X
6169 | (IEMNATIVE_SIMD_REG_FIXED_TMP0 < 8 ? X86_OP_VEX3_BYTE1_B : 0)
6170 | 0x03; /* Opcode map */
6171 pbCodeBuf[off++] = X86_OP_VEX3_BYTE2_MAKE_NO_VVVV(false /*f64BitOpSz*/, true /*f256BitAvx*/, X86_OP_VEX3_BYTE2_P_066H);
6172 pbCodeBuf[off++] = 0x39;
6173 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxSimdReg & 7, IEMNATIVE_SIMD_REG_FIXED_TMP0 & 7);
6174 pbCodeBuf[off++] = 0x01;
6175
6176 off = iemNativeEmitGuestSimdRegValueCheckVCpuU128(pbCodeBuf, off, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6177 g_aGstSimdShadowInfo[enmGstSimdReg].offYmm);
6178 }
6179# elif defined(RT_ARCH_ARM64)
6180 /* mov vectmp0, [gstreg] */
6181 off = iemNativeEmitLoadSimdRegWithGstShadowSimdReg(pReNative, off, IEMNATIVE_SIMD_REG_FIXED_TMP0, enmGstSimdReg, enmLoadSz);
6182
6183 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_Low128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6184 {
6185 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
6186 /* eor vectmp0, vectmp0, idxSimdReg */
6187 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, idxSimdReg);
6188 /* uaddlv vectmp0, vectmp0.16B */
6189 pu32CodeBuf[off++] = Armv8A64MkVecInstrUAddLV(IEMNATIVE_SIMD_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0, kArmv8InstrUAddLVSz_16B);
6190 /* umov tmp0, vectmp0.H[0] */
6191 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0,
6192 0 /*idxElem*/, kArmv8InstrUmovInsSz_U16, false /*f64Bit*/);
6193 /* cbz tmp0, +1 */
6194 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6195 /* brk #0x1000+enmGstReg */
6196 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6197 }
6198
6199 if (enmLoadSz == kIemNativeGstSimdRegLdStSz_High128 || enmLoadSz == kIemNativeGstSimdRegLdStSz_256)
6200 {
6201 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6);
6202 /* eor vectmp0 + 1, vectmp0 + 1, idxSimdReg */
6203 pu32CodeBuf[off++] = Armv8A64MkVecInstrEor(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, idxSimdReg + 1);
6204 /* uaddlv vectmp0 + 1, (vectmp0 + 1).16B */
6205 pu32CodeBuf[off++] = Armv8A64MkVecInstrUAddLV(IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1, kArmv8InstrUAddLVSz_16B);
6206 /* umov tmp0, (vectmp0 + 1).H[0] */
6207 pu32CodeBuf[off++] = Armv8A64MkVecInstrUmov(IEMNATIVE_REG_FIXED_TMP0, IEMNATIVE_SIMD_REG_FIXED_TMP0 + 1,
6208 0 /*idxElem*/, kArmv8InstrUmovInsSz_U16, false /*f64Bit*/);
6209 /* cbz tmp0, +1 */
6210 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, IEMNATIVE_REG_FIXED_TMP0);
6211 /* brk #0x1000+enmGstReg */
6212 pu32CodeBuf[off++] = Armv8A64MkInstrBrk((uint32_t)enmGstSimdReg | UINT32_C(0x1000));
6213 }
6214
6215# else
6216# error "Port me!"
6217# endif
6218
6219 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6220 return off;
6221}
6222# endif /* IEMNATIVE_WITH_SIMD_REG_ALLOCATOR */
6223
6224
6225/**
6226 * Emitting code that checks that IEMCPU::fExec matches @a fExec for all
6227 * important bits.
6228 *
6229 * @note May of course trash IEMNATIVE_REG_FIXED_TMP0.
6230 * Trashes EFLAGS on AMD64.
6231 */
6232DECL_HIDDEN_THROW(uint32_t)
6233iemNativeEmitExecFlagsCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fExec)
6234{
6235 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
6236 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
6237 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp, IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK);
6238 off = iemNativeEmitCmpGpr32WithImm(pReNative, off, idxRegTmp, fExec & IEMTB_F_KEY_MASK);
6239
6240#ifdef RT_ARCH_AMD64
6241 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6242
6243 /* je/jz +1 */
6244 pbCodeBuf[off++] = 0x74;
6245 pbCodeBuf[off++] = 0x01;
6246
6247 /* int3 */
6248 pbCodeBuf[off++] = 0xcc;
6249
6250# elif defined(RT_ARCH_ARM64)
6251 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6252
6253 /* b.eq +1 */
6254 pu32CodeBuf[off++] = Armv8A64MkInstrBCond(kArmv8InstrCond_Eq, 2);
6255 /* brk #0x2000 */
6256 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(UINT32_C(0x2000));
6257
6258# else
6259# error "Port me!"
6260# endif
6261 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6262
6263 iemNativeRegFreeTmp(pReNative, idxRegTmp);
6264 return off;
6265}
6266
6267#endif /* VBOX_STRICT */
6268
6269
6270#ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
6271/**
6272 * Worker for IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK.
6273 */
6274DECL_HIDDEN_THROW(uint32_t)
6275iemNativeEmitEFlagsSkippingCheck(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fEflNeeded)
6276{
6277 uint32_t const offVCpu = RT_UOFFSETOF(VMCPU, iem.s.fSkippingEFlags);
6278
6279 fEflNeeded &= X86_EFL_STATUS_BITS;
6280 if (fEflNeeded)
6281 {
6282# ifdef RT_ARCH_AMD64
6283 /* test dword [pVCpu + offVCpu], imm32 */
6284 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 13);
6285 if (fEflNeeded <= 0xff)
6286 {
6287 pCodeBuf[off++] = 0xf6;
6288 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
6289 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
6290 }
6291 else
6292 {
6293 pCodeBuf[off++] = 0xf7;
6294 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 0, offVCpu);
6295 pCodeBuf[off++] = RT_BYTE1(fEflNeeded);
6296 pCodeBuf[off++] = RT_BYTE2(fEflNeeded);
6297 pCodeBuf[off++] = RT_BYTE3(fEflNeeded);
6298 pCodeBuf[off++] = RT_BYTE4(fEflNeeded);
6299 }
6300
6301 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off + 3, kIemNativeInstrCond_e);
6302 pCodeBuf[off++] = 0xcc;
6303
6304 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6305
6306# else
6307 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
6308 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxRegTmp, offVCpu);
6309 off = iemNativeEmitTestAnyBitsInGpr(pReNative, off, idxRegTmp, fEflNeeded);
6310# ifdef RT_ARCH_ARM64
6311 off = iemNativeEmitJzToFixed(pReNative, off, off + 2);
6312 off = iemNativeEmitBrk(pReNative, off, 0x7777);
6313# else
6314# error "Port me!"
6315# endif
6316 iemNativeRegFreeTmp(pReNative, idxRegTmp);
6317# endif
6318 }
6319 return off;
6320}
6321#endif /* IEMNATIVE_STRICT_EFLAGS_SKIPPING */
6322
6323
6324/**
6325 * Emits a code for checking the return code of a call and rcPassUp, returning
6326 * from the code if either are non-zero.
6327 */
6328DECL_HIDDEN_THROW(uint32_t)
6329iemNativeEmitCheckCallRetAndPassUp(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr)
6330{
6331#ifdef RT_ARCH_AMD64
6332 /*
6333 * AMD64: eax = call status code.
6334 */
6335
6336 /* edx = rcPassUp */
6337 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, X86_GREG_xDX, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6338# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6339 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, idxInstr);
6340# endif
6341
6342 /* edx = eax | rcPassUp */
6343 uint8_t *pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
6344 pbCodeBuf[off++] = 0x0b; /* or edx, eax */
6345 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xDX, X86_GREG_xAX);
6346 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6347
6348 /* Jump to non-zero status return path. */
6349 off = iemNativeEmitJnzTbExit(pReNative, off, kIemNativeLabelType_NonZeroRetOrPassUp);
6350
6351 /* done. */
6352
6353#elif RT_ARCH_ARM64
6354 /*
6355 * ARM64: w0 = call status code.
6356 */
6357# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6358 off = iemNativeEmitLoadGprImm64(pReNative, off, ARMV8_A64_REG_X2, idxInstr);
6359# endif
6360 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, ARMV8_A64_REG_X3, RT_UOFFSETOF(VMCPUCC, iem.s.rcPassUp));
6361
6362 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 3);
6363
6364 pu32CodeBuf[off++] = Armv8A64MkInstrOrr(ARMV8_A64_REG_X4, ARMV8_A64_REG_X3, ARMV8_A64_REG_X0, false /*f64Bit*/);
6365
6366 off = iemNativeEmitTestIfGprIsNotZeroAndTbExitEx(pReNative, pu32CodeBuf, off, ARMV8_A64_REG_X4, true /*f64Bit*/,
6367 kIemNativeLabelType_NonZeroRetOrPassUp);
6368
6369#else
6370# error "port me"
6371#endif
6372 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6373 RT_NOREF_PV(idxInstr);
6374 return off;
6375}
6376
6377
6378/**
6379 * Emits a call to a CImpl function or something similar.
6380 */
6381DECL_HIDDEN_THROW(uint32_t)
6382iemNativeEmitCImplCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxInstr, uint64_t fGstShwFlush, uintptr_t pfnCImpl,
6383 uint8_t cbInstr, uint8_t cAddParams, uint64_t uParam0, uint64_t uParam1, uint64_t uParam2)
6384{
6385 /* Writeback everything. */
6386 off = iemNativeRegFlushPendingWrites(pReNative, off);
6387
6388 /*
6389 * Flush stuff. PC and EFlags are implictly flushed, the latter because we
6390 * don't do with/without flags variants of defer-to-cimpl stuff at the moment.
6391 */
6392 fGstShwFlush = iemNativeCImplFlagsToGuestShadowFlushMask(pReNative->fCImpl,
6393 fGstShwFlush
6394 | RT_BIT_64(kIemNativeGstReg_Pc)
6395 | RT_BIT_64(kIemNativeGstReg_EFlags));
6396 iemNativeRegFlushGuestShadows(pReNative, fGstShwFlush);
6397
6398 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6399
6400 /*
6401 * Load the parameters.
6402 */
6403#if defined(RT_OS_WINDOWS) && defined(VBOXSTRICTRC_STRICT_ENABLED)
6404 /* Special code the hidden VBOXSTRICTRC pointer. */
6405 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6406 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG2_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6407 if (cAddParams > 0)
6408 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam0);
6409 if (cAddParams > 1)
6410 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam1);
6411 if (cAddParams > 2)
6412 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG1, uParam2);
6413 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6414
6415#else
6416 AssertCompile(IEMNATIVE_CALL_ARG_GREG_COUNT >= 4);
6417 off = iemNativeEmitLoadGprFromGpr( pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6418 off = iemNativeEmitLoadGprImm64( pReNative, off, IEMNATIVE_CALL_ARG1_GREG, cbInstr); /** @todo 8-bit reg load opt for amd64 */
6419 if (cAddParams > 0)
6420 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, uParam0);
6421 if (cAddParams > 1)
6422 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, uParam1);
6423 if (cAddParams > 2)
6424# if IEMNATIVE_CALL_ARG_GREG_COUNT >= 5
6425 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG4_GREG, uParam2);
6426# else
6427 off = iemNativeEmitStoreImm64ByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, uParam2);
6428# endif
6429#endif
6430
6431 /*
6432 * Make the call.
6433 */
6434 off = iemNativeEmitCallImm(pReNative, off, pfnCImpl);
6435
6436#if defined(RT_ARCH_AMD64) && defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6437 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6438#endif
6439
6440#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
6441 pReNative->Core.fDebugPcInitialized = false;
6442 Log4(("fDebugPcInitialized=false cimpl off=%#x (v2)\n", off));
6443#endif
6444
6445 /*
6446 * Check the status code.
6447 */
6448 return iemNativeEmitCheckCallRetAndPassUp(pReNative, off, idxInstr);
6449}
6450
6451
6452/**
6453 * Emits a call to a threaded worker function.
6454 */
6455DECL_HIDDEN_THROW(uint32_t)
6456iemNativeEmitThreadedCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6457{
6458 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
6459
6460 /* We don't know what the threaded function is doing so we must flush all pending writes. */
6461 off = iemNativeRegFlushPendingWrites(pReNative, off);
6462
6463 iemNativeRegFlushGuestShadows(pReNative, UINT64_MAX); /** @todo optimize this */
6464 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, 4);
6465
6466#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6467 /* The threaded function may throw / long jmp, so set current instruction
6468 number if we're counting. */
6469 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr));
6470#endif
6471
6472 uint8_t const cParams = g_acIemThreadedFunctionUsedArgs[pCallEntry->enmFunction];
6473
6474#ifdef RT_ARCH_AMD64
6475 /* Load the parameters and emit the call. */
6476# ifdef RT_OS_WINDOWS
6477# ifndef VBOXSTRICTRC_STRICT_ENABLED
6478 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6479 if (cParams > 0)
6480 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[0]);
6481 if (cParams > 1)
6482 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[1]);
6483 if (cParams > 2)
6484 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[2]);
6485# else /* VBOXSTRICTRC: Returned via hidden parameter. Sigh. */
6486 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, IEMNATIVE_REG_FIXED_PVMCPU);
6487 if (cParams > 0)
6488 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x8, pCallEntry->auParams[0]);
6489 if (cParams > 1)
6490 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x9, pCallEntry->auParams[1]);
6491 if (cParams > 2)
6492 {
6493 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_x10, pCallEntry->auParams[2]);
6494 off = iemNativeEmitStoreGprByBp(pReNative, off, IEMNATIVE_FP_OFF_STACK_ARG0, X86_GREG_x10);
6495 }
6496 off = iemNativeEmitLeaGprByBp(pReNative, off, X86_GREG_xCX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict */
6497# endif /* VBOXSTRICTRC_STRICT_ENABLED */
6498# else
6499 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6500 if (cParams > 0)
6501 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xSI, pCallEntry->auParams[0]);
6502 if (cParams > 1)
6503 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xDX, pCallEntry->auParams[1]);
6504 if (cParams > 2)
6505 off = iemNativeEmitLoadGprImm64(pReNative, off, X86_GREG_xCX, pCallEntry->auParams[2]);
6506# endif
6507
6508 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6509
6510# if defined(VBOXSTRICTRC_STRICT_ENABLED) && defined(RT_OS_WINDOWS)
6511 off = iemNativeEmitLoadGprByBpU32(pReNative, off, X86_GREG_xAX, IEMNATIVE_FP_OFF_IN_SHADOW_ARG0); /* rcStrict (see above) */
6512# endif
6513
6514#elif RT_ARCH_ARM64
6515 /*
6516 * ARM64:
6517 */
6518 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6519 if (cParams > 0)
6520 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, pCallEntry->auParams[0]);
6521 if (cParams > 1)
6522 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG2_GREG, pCallEntry->auParams[1]);
6523 if (cParams > 2)
6524 off = iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_ARG3_GREG, pCallEntry->auParams[2]);
6525
6526 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_apfnIemThreadedFunctions[pCallEntry->enmFunction]);
6527
6528#else
6529# error "port me"
6530#endif
6531
6532#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
6533 pReNative->Core.fDebugPcInitialized = false;
6534 Log4(("fDebugPcInitialized=false todo off=%#x (v2)\n", off));
6535#endif
6536
6537 /*
6538 * Check the status code.
6539 */
6540 off = iemNativeEmitCheckCallRetAndPassUp(pReNative, off, pCallEntry->idxInstr);
6541
6542 return off;
6543}
6544
6545#ifdef VBOX_WITH_STATISTICS
6546
6547/**
6548 * Emits code to update the thread call statistics.
6549 */
6550DECL_INLINE_THROW(uint32_t)
6551iemNativeEmitThreadCallStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTHRDEDCALLENTRY pCallEntry)
6552{
6553 /*
6554 * Update threaded function stats.
6555 */
6556 uint32_t const offVCpu = RT_UOFFSETOF_DYN(VMCPUCC, iem.s.acThreadedFuncStats[pCallEntry->enmFunction]);
6557 AssertCompile(sizeof(pReNative->pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction]) == sizeof(uint32_t));
6558# if defined(RT_ARCH_ARM64)
6559 uint8_t const idxTmp1 = iemNativeRegAllocTmp(pReNative, &off);
6560 uint8_t const idxTmp2 = iemNativeRegAllocTmp(pReNative, &off);
6561 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, idxTmp1, idxTmp2, offVCpu);
6562 iemNativeRegFreeTmp(pReNative, idxTmp1);
6563 iemNativeRegFreeTmp(pReNative, idxTmp2);
6564# else
6565 off = iemNativeEmitIncU32CounterInVCpu(pReNative, off, UINT8_MAX, UINT8_MAX, offVCpu);
6566# endif
6567 return off;
6568}
6569
6570
6571/**
6572 * Emits code to update the TB exit reason statistics.
6573 */
6574DECL_INLINE_THROW(uint32_t)
6575iemNativeEmitNativeTbExitStats(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t const offVCpu)
6576{
6577 uint8_t const idxStatsTmp1 = iemNativeRegAllocTmp(pReNative, &off);
6578 uint8_t const idxStatsTmp2 = iemNativeRegAllocTmp(pReNative, &off);
6579 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, idxStatsTmp1, idxStatsTmp2, offVCpu);
6580 iemNativeRegFreeTmp(pReNative, idxStatsTmp1);
6581 iemNativeRegFreeTmp(pReNative, idxStatsTmp2);
6582
6583 return off;
6584}
6585
6586#endif /* VBOX_WITH_STATISTICS */
6587
6588/**
6589 * Worker for iemNativeEmitViaLookupDoOne and iemNativeRecompileAttachExecMemChunkCtx.
6590 */
6591static uint32_t
6592iemNativeEmitCoreViaLookupDoOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offReturnBreak, uintptr_t pfnHelper)
6593{
6594 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6595 off = iemNativeEmitCallImm(pReNative, off, pfnHelper);
6596
6597 /* Jump to ReturnBreak if the return register is NULL. */
6598 off = iemNativeEmitTestIfGprIsZeroAndJmpToFixed(pReNative, off, IEMNATIVE_CALL_RET_GREG,
6599 true /*f64Bit*/, offReturnBreak);
6600
6601 /* Okay, continue executing the next TB. */
6602 off = iemNativeEmitJmpViaGpr(pReNative, off, IEMNATIVE_CALL_RET_GREG);
6603 return off;
6604}
6605
6606#ifndef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
6607
6608/**
6609 * Worker for iemNativeEmitReturnBreakViaLookup.
6610 */
6611static uint32_t iemNativeEmitViaLookupDoOne(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t offReturnBreak,
6612 IEMNATIVELABELTYPE enmLabel, uintptr_t pfnHelper)
6613{
6614 uint32_t const idxLabel = iemNativeLabelFind(pReNative, enmLabel);
6615 if (idxLabel != UINT32_MAX)
6616 {
6617 iemNativeLabelDefine(pReNative, idxLabel, off);
6618 off = iemNativeEmitCoreViaLookupDoOne(pReNative, off, offReturnBreak, pfnHelper);
6619 }
6620 return off;
6621}
6622
6623
6624/**
6625 * Emits the code at the ReturnBreakViaLookup, ReturnBreakViaLookupWithIrq,
6626 * ReturnBreakViaLookupWithTlb and ReturnBreakViaLookupWithTlbAndIrq labels
6627 * (returns VINF_IEM_REEXEC_FINISH_WITH_FLAGS or jumps to the next TB).
6628 */
6629static uint32_t iemNativeEmitReturnBreakViaLookup(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnBreakLabel)
6630{
6631 uint32_t const offReturnBreak = pReNative->paLabels[idxReturnBreakLabel].off;
6632 Assert(offReturnBreak < off);
6633
6634 /*
6635 * The lookup table index is in IEMNATIVE_CALL_ARG1_GREG for all.
6636 * The GCPhysPc is in IEMNATIVE_CALL_ARG2_GREG for ReturnBreakViaLookupWithPc.
6637 */
6638 off = iemNativeEmitViaLookupDoOne(pReNative, off, offReturnBreak, kIemNativeLabelType_ReturnBreakViaLookup,
6639 (uintptr_t)iemNativeHlpReturnBreakViaLookup<false /*a_fWithIrqCheck*/>);
6640 off = iemNativeEmitViaLookupDoOne(pReNative, off, offReturnBreak, kIemNativeLabelType_ReturnBreakViaLookupWithIrq,
6641 (uintptr_t)iemNativeHlpReturnBreakViaLookup<true /*a_fWithIrqCheck*/>);
6642 off = iemNativeEmitViaLookupDoOne(pReNative, off, offReturnBreak, kIemNativeLabelType_ReturnBreakViaLookupWithTlb,
6643 (uintptr_t)iemNativeHlpReturnBreakViaLookupWithTlb<false /*a_fWithIrqCheck*/>);
6644 off = iemNativeEmitViaLookupDoOne(pReNative, off, offReturnBreak, kIemNativeLabelType_ReturnBreakViaLookupWithTlbAndIrq,
6645 (uintptr_t)iemNativeHlpReturnBreakViaLookupWithTlb<true /*a_fWithIrqCheck*/>);
6646 return off;
6647}
6648
6649#endif /* !IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE */
6650
6651/**
6652 * Emits the code at the ReturnWithFlags label (returns VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
6653 */
6654static uint32_t iemNativeEmitCoreReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6655{
6656 /* set the return status */
6657 return iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_FINISH_WITH_FLAGS);
6658}
6659
6660
6661#ifndef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
6662/**
6663 * Emits the code at the ReturnWithFlags label (returns VINF_IEM_REEXEC_FINISH_WITH_FLAGS).
6664 */
6665static uint32_t iemNativeEmitReturnWithFlags(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6666{
6667 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnWithFlags);
6668 if (idxLabel != UINT32_MAX)
6669 {
6670 iemNativeLabelDefine(pReNative, idxLabel, off);
6671 /* set the return status */
6672 off = iemNativeEmitCoreReturnWithFlags(pReNative, off);
6673 /* jump back to the return sequence. */
6674 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6675 }
6676 return off;
6677}
6678#endif
6679
6680
6681/**
6682 * Emits the code at the ReturnBreakFF label (returns VINF_IEM_REEXEC_BREAK_FF).
6683 */
6684static uint32_t iemNativeEmitCoreReturnBreakFF(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6685{
6686 /* set the return status */
6687 return iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK_FF);
6688}
6689
6690
6691#ifndef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
6692/**
6693 * Emits the code at the ReturnBreakFF label (returns VINF_IEM_REEXEC_BREAK_FF).
6694 */
6695static uint32_t iemNativeEmitReturnBreakFF(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6696{
6697 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreakFF);
6698 if (idxLabel != UINT32_MAX)
6699 {
6700 iemNativeLabelDefine(pReNative, idxLabel, off);
6701 /* set the return status */
6702 off = iemNativeEmitCoreReturnBreakFF(pReNative, off);
6703 /* jump back to the return sequence. */
6704 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6705 }
6706 return off;
6707}
6708#endif
6709
6710
6711/**
6712 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
6713 */
6714static uint32_t iemNativeEmitCoreReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6715{
6716 /* set the return status */
6717 return iemNativeEmitLoadGprImm64(pReNative, off, IEMNATIVE_CALL_RET_GREG, VINF_IEM_REEXEC_BREAK);
6718}
6719
6720
6721#ifndef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
6722/**
6723 * Emits the code at the ReturnBreak label (returns VINF_IEM_REEXEC_BREAK).
6724 */
6725static uint32_t iemNativeEmitReturnBreak(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6726{
6727 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_ReturnBreak);
6728 if (idxLabel != UINT32_MAX)
6729 {
6730 iemNativeLabelDefine(pReNative, idxLabel, off);
6731 /* set the return status */
6732 off = iemNativeEmitCoreReturnBreak(pReNative, off);
6733 /* jump back to the return sequence. */
6734 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6735 }
6736 return off;
6737}
6738#endif
6739
6740
6741/**
6742 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
6743 */
6744static uint32_t iemNativeEmitCoreRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6745{
6746 /*
6747 * Generate the rc + rcPassUp fiddling code.
6748 */
6749 /* iemNativeHlpExecStatusCodeFiddling(PVMCPUCC pVCpu, int rc, uint8_t idxInstr) */
6750#ifdef RT_ARCH_AMD64
6751# ifdef RT_OS_WINDOWS
6752# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6753 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_x8, X86_GREG_xCX); /* cl = instruction number */
6754# endif
6755 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xCX, IEMNATIVE_REG_FIXED_PVMCPU);
6756 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xAX);
6757# else
6758 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDI, IEMNATIVE_REG_FIXED_PVMCPU);
6759 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xSI, X86_GREG_xAX);
6760# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6761 off = iemNativeEmitLoadGprFromGpr(pReNative, off, X86_GREG_xDX, X86_GREG_xCX); /* cl = instruction number */
6762# endif
6763# endif
6764# ifndef IEMNATIVE_WITH_INSTRUCTION_COUNTING
6765 off = iemNativeEmitLoadGpr8Imm(pReNative, off, X86_GREG_xCX, 0);
6766# endif
6767
6768#else
6769 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, IEMNATIVE_CALL_RET_GREG);
6770 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
6771 /* IEMNATIVE_CALL_ARG2_GREG is already set. */
6772#endif
6773
6774 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpExecStatusCodeFiddling);
6775 return off;
6776}
6777
6778
6779#ifndef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
6780/**
6781 * Emits the RC fiddling code for handling non-zero return code or rcPassUp.
6782 */
6783static uint32_t iemNativeEmitRcFiddling(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t idxReturnLabel)
6784{
6785 /*
6786 * Generate the rc + rcPassUp fiddling code if needed.
6787 */
6788 uint32_t const idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_NonZeroRetOrPassUp);
6789 if (idxLabel != UINT32_MAX)
6790 {
6791 iemNativeLabelDefine(pReNative, idxLabel, off);
6792 off = iemNativeEmitCoreRcFiddling(pReNative, off);
6793 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
6794 }
6795 return off;
6796}
6797#endif
6798
6799
6800/**
6801 * Emits a standard epilog.
6802 */
6803static uint32_t iemNativeEmitCoreEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6804{
6805 pReNative->Core.bmHstRegs |= RT_BIT_32(IEMNATIVE_CALL_RET_GREG); /* HACK: For IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK (return register is already set to status code). */
6806
6807 IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK(pReNative, off, X86_EFL_STATUS_BITS);
6808
6809 /* HACK: For IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK (return register is already set to status code). */
6810 pReNative->Core.bmHstRegs &= ~RT_BIT_32(IEMNATIVE_CALL_RET_GREG);
6811
6812 /*
6813 * Restore registers and return.
6814 */
6815#ifdef RT_ARCH_AMD64
6816 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
6817
6818 /* Reposition esp at the r15 restore point. */
6819 pbCodeBuf[off++] = X86_OP_REX_W;
6820 pbCodeBuf[off++] = 0x8d; /* lea rsp, [rbp - (gcc ? 5 : 7) * 8] */
6821 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, X86_GREG_xSP, X86_GREG_xBP);
6822 pbCodeBuf[off++] = (uint8_t)IEMNATIVE_FP_OFF_LAST_PUSH;
6823
6824 /* Pop non-volatile registers and return */
6825 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r15 */
6826 pbCodeBuf[off++] = 0x58 + X86_GREG_x15 - 8;
6827 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r14 */
6828 pbCodeBuf[off++] = 0x58 + X86_GREG_x14 - 8;
6829 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r13 */
6830 pbCodeBuf[off++] = 0x58 + X86_GREG_x13 - 8;
6831 pbCodeBuf[off++] = X86_OP_REX_B; /* pop r12 */
6832 pbCodeBuf[off++] = 0x58 + X86_GREG_x12 - 8;
6833# ifdef RT_OS_WINDOWS
6834 pbCodeBuf[off++] = 0x58 + X86_GREG_xDI; /* pop rdi */
6835 pbCodeBuf[off++] = 0x58 + X86_GREG_xSI; /* pop rsi */
6836# endif
6837 pbCodeBuf[off++] = 0x58 + X86_GREG_xBX; /* pop rbx */
6838 pbCodeBuf[off++] = 0xc9; /* leave */
6839 pbCodeBuf[off++] = 0xc3; /* ret */
6840 pbCodeBuf[off++] = 0xcc; /* int3 poison */
6841
6842#elif RT_ARCH_ARM64
6843 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 10);
6844
6845 /* ldp x19, x20, [sp #IEMNATIVE_FRAME_VAR_SIZE]! ; Unallocate the variable space and restore x19+x20. */
6846 AssertCompile(IEMNATIVE_FRAME_VAR_SIZE < 64*8);
6847 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
6848 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
6849 IEMNATIVE_FRAME_VAR_SIZE / 8);
6850 /* Restore x21 thru x28 + BP and LR (ret address) (SP remains unchanged in the kSigned variant). */
6851 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6852 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
6853 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6854 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
6855 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6856 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
6857 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6858 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
6859 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(true /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6860 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
6861 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
6862
6863 /* add sp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE ; */
6864 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 4096);
6865 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP,
6866 IEMNATIVE_FRAME_SAVE_REG_SIZE);
6867
6868 /* retab / ret */
6869# ifdef RT_OS_DARWIN /** @todo See todo on pacibsp in the prolog. */
6870 if (1)
6871 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RETAB;
6872 else
6873# endif
6874 pu32CodeBuf[off++] = ARMV8_A64_INSTR_RET;
6875
6876#else
6877# error "port me"
6878#endif
6879 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
6880
6881 /* HACK: For IEMNATIVE_STRICT_EFLAGS_SKIPPING_EMIT_CHECK. */
6882 pReNative->Core.bmHstRegs &= ~RT_BIT_32(IEMNATIVE_CALL_RET_GREG);
6883
6884 return off;
6885}
6886
6887
6888#ifndef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
6889/**
6890 * Emits a standard epilog.
6891 */
6892static uint32_t iemNativeEmitEpilog(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t *pidxReturnLabel)
6893{
6894 /*
6895 * Define label for common return point.
6896 */
6897 *pidxReturnLabel = UINT32_MAX;
6898 uint32_t const idxReturn = iemNativeLabelCreate(pReNative, kIemNativeLabelType_Return, off);
6899 *pidxReturnLabel = idxReturn;
6900
6901 /*
6902 * Emit the code.
6903 */
6904 return iemNativeEmitCoreEpilog(pReNative, off);
6905}
6906#endif
6907
6908
6909#ifndef IEMNATIVE_WITH_RECOMPILER_PROLOGUE_SINGLETON
6910/**
6911 * Emits a standard prolog.
6912 */
6913static uint32_t iemNativeEmitProlog(PIEMRECOMPILERSTATE pReNative, uint32_t off)
6914{
6915#ifdef RT_ARCH_AMD64
6916 /*
6917 * Set up a regular xBP stack frame, pushing all non-volatile GPRs,
6918 * reserving 64 bytes for stack variables plus 4 non-register argument
6919 * slots. Fixed register assignment: xBX = pReNative;
6920 *
6921 * Since we always do the same register spilling, we can use the same
6922 * unwind description for all the code.
6923 */
6924 uint8_t *const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 32);
6925 pbCodeBuf[off++] = 0x50 + X86_GREG_xBP; /* push rbp */
6926 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbp, rsp */
6927 pbCodeBuf[off++] = 0x8b;
6928 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBP, X86_GREG_xSP);
6929 pbCodeBuf[off++] = 0x50 + X86_GREG_xBX; /* push rbx */
6930 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == X86_GREG_xBX);
6931# ifdef RT_OS_WINDOWS
6932 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rcx ; RBX = pVCpu */
6933 pbCodeBuf[off++] = 0x8b;
6934 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xCX);
6935 pbCodeBuf[off++] = 0x50 + X86_GREG_xSI; /* push rsi */
6936 pbCodeBuf[off++] = 0x50 + X86_GREG_xDI; /* push rdi */
6937# else
6938 pbCodeBuf[off++] = X86_OP_REX_W; /* mov rbx, rdi ; RBX = pVCpu */
6939 pbCodeBuf[off++] = 0x8b;
6940 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, X86_GREG_xBX, X86_GREG_xDI);
6941# endif
6942 pbCodeBuf[off++] = X86_OP_REX_B; /* push r12 */
6943 pbCodeBuf[off++] = 0x50 + X86_GREG_x12 - 8;
6944 pbCodeBuf[off++] = X86_OP_REX_B; /* push r13 */
6945 pbCodeBuf[off++] = 0x50 + X86_GREG_x13 - 8;
6946 pbCodeBuf[off++] = X86_OP_REX_B; /* push r14 */
6947 pbCodeBuf[off++] = 0x50 + X86_GREG_x14 - 8;
6948 pbCodeBuf[off++] = X86_OP_REX_B; /* push r15 */
6949 pbCodeBuf[off++] = 0x50 + X86_GREG_x15 - 8;
6950
6951# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
6952 /* Save the frame pointer. */
6953 off = iemNativeEmitStoreGprToVCpuU64Ex(pbCodeBuf, off, X86_GREG_xBP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3));
6954# endif
6955
6956 off = iemNativeEmitSubGprImm(pReNative, off, /* sub rsp, byte 28h */
6957 X86_GREG_xSP,
6958 IEMNATIVE_FRAME_ALIGN_SIZE
6959 + IEMNATIVE_FRAME_VAR_SIZE
6960 + IEMNATIVE_FRAME_STACK_ARG_COUNT * 8
6961 + IEMNATIVE_FRAME_SHADOW_ARG_COUNT * 8);
6962 AssertCompile(!(IEMNATIVE_FRAME_VAR_SIZE & 0xf));
6963 AssertCompile(!(IEMNATIVE_FRAME_STACK_ARG_COUNT & 0x1));
6964 AssertCompile(!(IEMNATIVE_FRAME_SHADOW_ARG_COUNT & 0x1));
6965
6966#elif RT_ARCH_ARM64
6967 /*
6968 * We set up a stack frame exactly like on x86, only we have to push the
6969 * return address our selves here. We save all non-volatile registers.
6970 */
6971 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 16);
6972
6973# ifdef RT_OS_DARWIN /** @todo This seems to be requirement by libunwind for JIT FDEs. Investigate further as been unable
6974 * to figure out where the BRK following AUTHB*+XPACB* stuff comes from in libunwind. It's
6975 * definitely the dwarf stepping code, but till found it's very tedious to figure out whether it's
6976 * in any way conditional, so just emitting this instructions now and hoping for the best... */
6977 /* pacibsp */
6978 pu32CodeBuf[off++] = ARMV8_A64_INSTR_PACIBSP;
6979# endif
6980
6981 /* stp x19, x20, [sp, #-IEMNATIVE_FRAME_SAVE_REG_SIZE] ; Allocate space for saving registers and place x19+x20 at the bottom. */
6982 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE < 64*8);
6983 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_PreIndex,
6984 ARMV8_A64_REG_X19, ARMV8_A64_REG_X20, ARMV8_A64_REG_SP,
6985 -IEMNATIVE_FRAME_SAVE_REG_SIZE / 8);
6986 /* Save x21 thru x28 (SP remains unchanged in the kSigned variant). */
6987 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6988 ARMV8_A64_REG_X21, ARMV8_A64_REG_X22, ARMV8_A64_REG_SP, 2);
6989 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6990 ARMV8_A64_REG_X23, ARMV8_A64_REG_X24, ARMV8_A64_REG_SP, 4);
6991 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6992 ARMV8_A64_REG_X25, ARMV8_A64_REG_X26, ARMV8_A64_REG_SP, 6);
6993 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6994 ARMV8_A64_REG_X27, ARMV8_A64_REG_X28, ARMV8_A64_REG_SP, 8);
6995 /* Save the BP and LR (ret address) registers at the top of the frame. */
6996 pu32CodeBuf[off++] = Armv8A64MkInstrStLdPair(false /*fLoad*/, 2 /*64-bit*/, kArm64InstrStLdPairType_Signed,
6997 ARMV8_A64_REG_BP, ARMV8_A64_REG_LR, ARMV8_A64_REG_SP, 10);
6998 AssertCompile(IEMNATIVE_FRAME_SAVE_REG_SIZE / 8 == 12);
6999 /* add bp, sp, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16 ; Set BP to point to the old BP stack address. */
7000 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, ARMV8_A64_REG_BP,
7001 ARMV8_A64_REG_SP, IEMNATIVE_FRAME_SAVE_REG_SIZE - 16);
7002
7003 /* sub sp, sp, IEMNATIVE_FRAME_VAR_SIZE ; Allocate the variable area from SP. */
7004 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(true /*fSub*/, ARMV8_A64_REG_SP, ARMV8_A64_REG_SP, IEMNATIVE_FRAME_VAR_SIZE);
7005
7006 /* mov r28, r0 */
7007 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PVMCPU, IEMNATIVE_CALL_ARG0_GREG);
7008 /* mov r27, r1 */
7009 off = iemNativeEmitLoadGprFromGprEx(pu32CodeBuf, off, IEMNATIVE_REG_FIXED_PCPUMCTX, IEMNATIVE_CALL_ARG1_GREG);
7010
7011# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
7012 /* Save the frame pointer. */
7013 off = iemNativeEmitStoreGprToVCpuU64Ex(pu32CodeBuf, off, ARMV8_A64_REG_BP, RT_UOFFSETOF(VMCPUCC, iem.s.pvTbFramePointerR3),
7014 ARMV8_A64_REG_X2);
7015# endif
7016
7017#else
7018# error "port me"
7019#endif
7020 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
7021 return off;
7022}
7023#endif
7024
7025
7026/*********************************************************************************************************************************
7027* Emitters for IEM_MC_ARG_XXX, IEM_MC_LOCAL, IEM_MC_LOCAL_CONST, ++ *
7028*********************************************************************************************************************************/
7029
7030/**
7031 * Internal work that allocates a variable with kind set to
7032 * kIemNativeVarKind_Invalid and no current stack allocation.
7033 *
7034 * The kind will either be set by the caller or later when the variable is first
7035 * assigned a value.
7036 *
7037 * @returns Unpacked index.
7038 * @internal
7039 */
7040static uint8_t iemNativeVarAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7041{
7042 Assert(cbType > 0 && cbType <= 64);
7043 unsigned const idxVar = ASMBitFirstSetU32(~pReNative->Core.bmVars) - 1;
7044 AssertStmt(idxVar < RT_ELEMENTS(pReNative->Core.aVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_EXHAUSTED));
7045 pReNative->Core.bmVars |= RT_BIT_32(idxVar);
7046 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
7047 pReNative->Core.aVars[idxVar].cbVar = cbType;
7048 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
7049 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
7050 pReNative->Core.aVars[idxVar].uArgNo = UINT8_MAX;
7051 pReNative->Core.aVars[idxVar].idxReferrerVar = UINT8_MAX;
7052 pReNative->Core.aVars[idxVar].enmGstReg = kIemNativeGstReg_End;
7053 pReNative->Core.aVars[idxVar].fRegAcquired = false;
7054 pReNative->Core.aVars[idxVar].u.uValue = 0;
7055#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7056 pReNative->Core.aVars[idxVar].fSimdReg = false;
7057#endif
7058 return idxVar;
7059}
7060
7061
7062/**
7063 * Internal work that allocates an argument variable w/o setting enmKind.
7064 *
7065 * @returns Unpacked index.
7066 * @internal
7067 */
7068static uint8_t iemNativeArgAllocInt(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7069{
7070 iArgNo += iemNativeArgGetHiddenArgCount(pReNative);
7071 AssertStmt(iArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars), IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7072 AssertStmt(pReNative->Core.aidxArgVars[iArgNo] == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_DUP_ARG_NO));
7073
7074 uint8_t const idxVar = iemNativeVarAllocInt(pReNative, cbType);
7075 pReNative->Core.aidxArgVars[iArgNo] = idxVar; /* (unpacked) */
7076 pReNative->Core.aVars[idxVar].uArgNo = iArgNo;
7077 return idxVar;
7078}
7079
7080
7081/**
7082 * Gets the stack slot for a stack variable, allocating one if necessary.
7083 *
7084 * Calling this function implies that the stack slot will contain a valid
7085 * variable value. The caller deals with any register currently assigned to the
7086 * variable, typically by spilling it into the stack slot.
7087 *
7088 * @returns The stack slot number.
7089 * @param pReNative The recompiler state.
7090 * @param idxVar The variable.
7091 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS
7092 */
7093DECL_HIDDEN_THROW(uint8_t) iemNativeVarGetStackSlot(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7094{
7095 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7096 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7097 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
7098
7099 /* Already got a slot? */
7100 uint8_t const idxStackSlot = pVar->idxStackSlot;
7101 if (idxStackSlot != UINT8_MAX)
7102 {
7103 Assert(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS);
7104 return idxStackSlot;
7105 }
7106
7107 /*
7108 * A single slot is easy to allocate.
7109 * Allocate them from the top end, closest to BP, to reduce the displacement.
7110 */
7111 if (pVar->cbVar <= sizeof(uint64_t))
7112 {
7113 unsigned const iSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7114 AssertStmt(iSlot < IEMNATIVE_FRAME_VAR_SLOTS, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7115 pReNative->Core.bmStack |= RT_BIT_32(iSlot);
7116 pVar->idxStackSlot = (uint8_t)iSlot;
7117 Log11(("iemNativeVarGetStackSlot: idxVar=%#x iSlot=%#x\n", idxVar, iSlot));
7118 return (uint8_t)iSlot;
7119 }
7120
7121 /*
7122 * We need more than one stack slot.
7123 *
7124 * cbVar -> fBitAlignMask: 16 -> 1; 32 -> 3; 64 -> 7;
7125 */
7126 AssertCompile(RT_IS_POWER_OF_TWO(IEMNATIVE_FRAME_VAR_SLOTS)); /* If not we have to add an overflow check. */
7127 Assert(pVar->cbVar <= 64);
7128 uint32_t const fBitAlignMask = RT_BIT_32(ASMBitLastSetU32(pVar->cbVar) - 4) - 1;
7129 uint32_t fBitAllocMask = RT_BIT_32((pVar->cbVar + 7) >> 3) - 1;
7130 uint32_t bmStack = pReNative->Core.bmStack;
7131 while (bmStack != UINT32_MAX)
7132 {
7133 unsigned iSlot = ASMBitLastSetU32(~bmStack);
7134 AssertStmt(iSlot, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7135 iSlot = (iSlot - 1) & ~fBitAlignMask;
7136 if ((bmStack & ~(fBitAllocMask << iSlot)) == bmStack)
7137 {
7138 pReNative->Core.bmStack |= (fBitAllocMask << iSlot);
7139 pVar->idxStackSlot = (uint8_t)iSlot;
7140 Log11(("iemNativeVarGetStackSlot: idxVar=%#x iSlot=%#x/%#x (cbVar=%#x)\n",
7141 idxVar, iSlot, fBitAllocMask, pVar->cbVar));
7142 return (uint8_t)iSlot;
7143 }
7144
7145 bmStack |= (fBitAllocMask << iSlot);
7146 }
7147 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7148}
7149
7150
7151/**
7152 * Changes the variable to a stack variable.
7153 *
7154 * Currently this is s only possible to do the first time the variable is used,
7155 * switching later is can be implemented but not done.
7156 *
7157 * @param pReNative The recompiler state.
7158 * @param idxVar The variable.
7159 * @throws VERR_IEM_VAR_IPE_2
7160 */
7161DECL_HIDDEN_THROW(void) iemNativeVarSetKindToStack(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
7162{
7163 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7164 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7165 if (pVar->enmKind != kIemNativeVarKind_Stack)
7166 {
7167 /* We could in theory transition from immediate to stack as well, but it
7168 would involve the caller doing work storing the value on the stack. So,
7169 till that's required we only allow transition from invalid. */
7170 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7171 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7172 pVar->enmKind = kIemNativeVarKind_Stack;
7173
7174 /* Note! We don't allocate a stack slot here, that's only done when a
7175 slot is actually needed to hold a variable value. */
7176 }
7177}
7178
7179
7180/**
7181 * Sets it to a variable with a constant value.
7182 *
7183 * This does not require stack storage as we know the value and can always
7184 * reload it, unless of course it's referenced.
7185 *
7186 * @param pReNative The recompiler state.
7187 * @param idxVar The variable.
7188 * @param uValue The immediate value.
7189 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7190 */
7191DECL_HIDDEN_THROW(void) iemNativeVarSetKindToConst(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint64_t uValue)
7192{
7193 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7194 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7195 if (pVar->enmKind != kIemNativeVarKind_Immediate)
7196 {
7197 /* Only simple transitions for now. */
7198 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7199 pVar->enmKind = kIemNativeVarKind_Immediate;
7200 }
7201 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7202
7203 pVar->u.uValue = uValue;
7204 AssertMsg( pVar->cbVar >= sizeof(uint64_t)
7205 || pVar->u.uValue < RT_BIT_64(pVar->cbVar * 8),
7206 ("idxVar=%d cbVar=%u uValue=%#RX64\n", idxVar, pVar->cbVar, uValue));
7207}
7208
7209
7210/**
7211 * Sets the variable to a reference (pointer) to @a idxOtherVar.
7212 *
7213 * This does not require stack storage as we know the value and can always
7214 * reload it. Loading is postponed till needed.
7215 *
7216 * @param pReNative The recompiler state.
7217 * @param idxVar The variable. Unpacked.
7218 * @param idxOtherVar The variable to take the (stack) address of. Unpacked.
7219 *
7220 * @throws VERR_IEM_VAR_OUT_OF_STACK_SLOTS, VERR_IEM_VAR_IPE_2
7221 * @internal
7222 */
7223static void iemNativeVarSetKindToLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint8_t idxOtherVar)
7224{
7225 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxVar)));
7226 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars) && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar)));
7227
7228 if (pReNative->Core.aVars[idxVar].enmKind != kIemNativeVarKind_VarRef)
7229 {
7230 /* Only simple transitions for now. */
7231 AssertStmt(pReNative->Core.aVars[idxVar].enmKind == kIemNativeVarKind_Invalid,
7232 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7233 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_VarRef;
7234 }
7235 AssertStmt(pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7236
7237 pReNative->Core.aVars[idxVar].u.idxRefVar = idxOtherVar; /* unpacked */
7238
7239 /* Update the other variable, ensure it's a stack variable. */
7240 /** @todo handle variables with const values... that'll go boom now. */
7241 pReNative->Core.aVars[idxOtherVar].idxReferrerVar = idxVar;
7242 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
7243}
7244
7245
7246/**
7247 * Sets the variable to a reference (pointer) to a guest register reference.
7248 *
7249 * This does not require stack storage as we know the value and can always
7250 * reload it. Loading is postponed till needed.
7251 *
7252 * @param pReNative The recompiler state.
7253 * @param idxVar The variable.
7254 * @param enmRegClass The class guest registers to reference.
7255 * @param idxReg The register within @a enmRegClass to reference.
7256 *
7257 * @throws VERR_IEM_VAR_IPE_2
7258 */
7259DECL_HIDDEN_THROW(void) iemNativeVarSetKindToGstRegRef(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar,
7260 IEMNATIVEGSTREGREF enmRegClass, uint8_t idxReg)
7261{
7262 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7263 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7264
7265 if (pVar->enmKind != kIemNativeVarKind_GstRegRef)
7266 {
7267 /* Only simple transitions for now. */
7268 AssertStmt(pVar->enmKind == kIemNativeVarKind_Invalid, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7269 pVar->enmKind = kIemNativeVarKind_GstRegRef;
7270 }
7271 AssertStmt(pVar->idxReg == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_2));
7272
7273 pVar->u.GstRegRef.enmClass = enmRegClass;
7274 pVar->u.GstRegRef.idx = idxReg;
7275}
7276
7277
7278DECL_HIDDEN_THROW(uint8_t) iemNativeArgAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType)
7279{
7280 return IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
7281}
7282
7283
7284DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t cbType, uint64_t uValue)
7285{
7286 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeArgAllocInt(pReNative, iArgNo, cbType));
7287
7288 /* Since we're using a generic uint64_t value type, we must truncate it if
7289 the variable is smaller otherwise we may end up with too large value when
7290 scaling up a imm8 w/ sign-extension.
7291
7292 This caused trouble with a "add bx, 0xffff" instruction (around f000:ac60
7293 in the bios, bx=1) when running on arm, because clang expect 16-bit
7294 register parameters to have bits 16 and up set to zero. Instead of
7295 setting x1 = 0xffff we ended up with x1 = 0xffffffffffffff and the wrong
7296 CF value in the result. */
7297 switch (cbType)
7298 {
7299 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7300 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7301 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7302 }
7303 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7304 return idxVar;
7305}
7306
7307
7308DECL_HIDDEN_THROW(uint8_t) iemNativeArgAllocLocalRef(PIEMRECOMPILERSTATE pReNative, uint8_t iArgNo, uint8_t idxOtherVar)
7309{
7310 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxOtherVar);
7311 idxOtherVar = IEMNATIVE_VAR_IDX_UNPACK(idxOtherVar);
7312 AssertStmt( idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars)
7313 && (pReNative->Core.bmVars & RT_BIT_32(idxOtherVar))
7314 && pReNative->Core.aVars[idxOtherVar].uArgNo == UINT8_MAX,
7315 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_1));
7316
7317 uint8_t const idxArgVar = iemNativeArgAlloc(pReNative, iArgNo, sizeof(uintptr_t));
7318 iemNativeVarSetKindToLocalRef(pReNative, IEMNATIVE_VAR_IDX_UNPACK(idxArgVar), idxOtherVar);
7319 return idxArgVar;
7320}
7321
7322
7323DECL_HIDDEN_THROW(uint8_t) iemNativeVarAlloc(PIEMRECOMPILERSTATE pReNative, uint8_t cbType)
7324{
7325 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7326 /* Don't set to stack now, leave that to the first use as for instance
7327 IEM_MC_CALC_RM_EFF_ADDR may produce a const/immediate result (esp. in DOS). */
7328 return idxVar;
7329}
7330
7331
7332DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocConst(PIEMRECOMPILERSTATE pReNative, uint8_t cbType, uint64_t uValue)
7333{
7334 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7335
7336 /* Since we're using a generic uint64_t value type, we must truncate it if
7337 the variable is smaller otherwise we may end up with too large value when
7338 scaling up a imm8 w/ sign-extension. */
7339 switch (cbType)
7340 {
7341 case sizeof(uint8_t): uValue &= UINT64_C(0xff); break;
7342 case sizeof(uint16_t): uValue &= UINT64_C(0xffff); break;
7343 case sizeof(uint32_t): uValue &= UINT64_C(0xffffffff); break;
7344 }
7345 iemNativeVarSetKindToConst(pReNative, idxVar, uValue);
7346 return idxVar;
7347}
7348
7349
7350DECL_HIDDEN_THROW(uint8_t) iemNativeVarAllocAssign(PIEMRECOMPILERSTATE pReNative, uint32_t *poff, uint8_t cbType, uint8_t idxVarOther)
7351{
7352 uint8_t const idxVar = IEMNATIVE_VAR_IDX_PACK(iemNativeVarAllocInt(pReNative, cbType));
7353 iemNativeVarSetKindToStack(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
7354
7355 uint8_t const idxVarOtherReg = iemNativeVarRegisterAcquire(pReNative, idxVarOther, poff, true /*fInitialized*/);
7356 uint8_t const idxVarReg = iemNativeVarRegisterAcquire(pReNative, idxVar, poff);
7357
7358 *poff = iemNativeEmitLoadGprFromGpr(pReNative, *poff, idxVarReg, idxVarOtherReg);
7359
7360 /* Truncate the value to this variables size. */
7361 switch (cbType)
7362 {
7363 case sizeof(uint8_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xff)); break;
7364 case sizeof(uint16_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xffff)); break;
7365 case sizeof(uint32_t): *poff = iemNativeEmitAndGpr32ByImm(pReNative, *poff, idxVarReg, UINT64_C(0xffffffff)); break;
7366 }
7367
7368 iemNativeVarRegisterRelease(pReNative, idxVarOther);
7369 iemNativeVarRegisterRelease(pReNative, idxVar);
7370 return idxVar;
7371}
7372
7373
7374/**
7375 * Makes sure variable @a idxVar has a register assigned to it and that it stays
7376 * fixed till we call iemNativeVarRegisterRelease.
7377 *
7378 * @returns The host register number.
7379 * @param pReNative The recompiler state.
7380 * @param idxVar The variable.
7381 * @param poff Pointer to the instruction buffer offset.
7382 * In case a register needs to be freed up or the value
7383 * loaded off the stack.
7384 * @param fInitialized Set if the variable must already have been initialized.
7385 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
7386 * the case.
7387 * @param idxRegPref Preferred register number or UINT8_MAX.
7388 */
7389DECL_HIDDEN_THROW(uint8_t) iemNativeVarRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
7390 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
7391{
7392 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7393 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7394 Assert(pVar->cbVar <= 8);
7395 Assert(!pVar->fRegAcquired);
7396
7397 uint8_t idxReg = pVar->idxReg;
7398 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7399 {
7400 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
7401 && pVar->enmKind < kIemNativeVarKind_End);
7402 pVar->fRegAcquired = true;
7403 return idxReg;
7404 }
7405
7406 /*
7407 * If the kind of variable has not yet been set, default to 'stack'.
7408 */
7409 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
7410 && pVar->enmKind < kIemNativeVarKind_End);
7411 if (pVar->enmKind == kIemNativeVarKind_Invalid)
7412 iemNativeVarSetKindToStack(pReNative, idxVar);
7413
7414 /*
7415 * We have to allocate a register for the variable, even if its a stack one
7416 * as we don't know if there are modification being made to it before its
7417 * finalized (todo: analyze and insert hints about that?).
7418 *
7419 * If we can, we try get the correct register for argument variables. This
7420 * is assuming that most argument variables are fetched as close as possible
7421 * to the actual call, so that there aren't any interfering hidden calls
7422 * (memory accesses, etc) inbetween.
7423 *
7424 * If we cannot or it's a variable, we make sure no argument registers
7425 * that will be used by this MC block will be allocated here, and we always
7426 * prefer non-volatile registers to avoid needing to spill stuff for internal
7427 * call.
7428 */
7429 /** @todo Detect too early argument value fetches and warn about hidden
7430 * calls causing less optimal code to be generated in the python script. */
7431
7432 uint8_t const uArgNo = pVar->uArgNo;
7433 if ( uArgNo < RT_ELEMENTS(g_aidxIemNativeCallRegs)
7434 && !(pReNative->Core.bmHstRegs & RT_BIT_32(g_aidxIemNativeCallRegs[uArgNo])))
7435 {
7436 idxReg = g_aidxIemNativeCallRegs[uArgNo];
7437
7438#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
7439 /* Writeback any dirty shadow registers we are about to unshadow. */
7440 *poff = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, *poff, idxReg);
7441#endif
7442
7443 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7444 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (matching arg %u)\n", idxVar, idxReg, uArgNo));
7445 }
7446 else if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstRegs)
7447 || (pReNative->Core.bmHstRegs & RT_BIT_32(idxRegPref)))
7448 {
7449 /** @todo there must be a better way for this and boot cArgsX? */
7450 uint32_t const fNotArgsMask = ~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgsX, IEMNATIVE_CALL_ARG_GREG_COUNT)];
7451 uint32_t const fRegs = ~pReNative->Core.bmHstRegs
7452 & ~pReNative->Core.bmHstRegsWithGstShadow
7453 & (~IEMNATIVE_REG_FIXED_MASK & IEMNATIVE_HST_GREG_MASK)
7454 & fNotArgsMask;
7455 if (fRegs)
7456 {
7457 /* Pick from the top as that both arm64 and amd64 have a block of non-volatile registers there. */
7458 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK
7459 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_GREG_MASK : fRegs) - 1;
7460 Assert(pReNative->Core.aHstRegs[idxReg].fGstRegShadows == 0);
7461 Assert(!(pReNative->Core.bmHstRegsWithGstShadow & RT_BIT_32(idxReg)));
7462 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7463 }
7464 else
7465 {
7466 idxReg = iemNativeRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
7467 IEMNATIVE_HST_GREG_MASK & ~IEMNATIVE_REG_FIXED_MASK & fNotArgsMask);
7468 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
7469 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7470 }
7471 }
7472 else
7473 {
7474 idxReg = idxRegPref;
7475 iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7476 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
7477 }
7478 iemNativeRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7479 pVar->idxReg = idxReg;
7480
7481#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7482 pVar->fSimdReg = false;
7483#endif
7484
7485 /*
7486 * Load it off the stack if we've got a stack slot.
7487 */
7488 uint8_t const idxStackSlot = pVar->idxStackSlot;
7489 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7490 {
7491 Assert(fInitialized);
7492 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7493 switch (pVar->cbVar)
7494 {
7495 case 1: *poff = iemNativeEmitLoadGprByBpU8( pReNative, *poff, idxReg, offDispBp); break;
7496 case 2: *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp); break;
7497 case 3: AssertFailed(); RT_FALL_THRU();
7498 case 4: *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp); break;
7499 default: AssertFailed(); RT_FALL_THRU();
7500 case 8: *poff = iemNativeEmitLoadGprByBp( pReNative, *poff, idxReg, offDispBp); break;
7501 }
7502 }
7503 else
7504 {
7505 Assert(idxStackSlot == UINT8_MAX);
7506 if (pVar->enmKind != kIemNativeVarKind_Immediate)
7507 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7508 else
7509 {
7510 /*
7511 * Convert from immediate to stack/register. This is currently only
7512 * required by IEM_MC_ADD_LOCAL_S16_TO_EFF_ADDR, IEM_MC_ADD_LOCAL_S32_TO_EFF_ADDR
7513 * and IEM_MC_ADD_LOCAL_S64_TO_EFF_ADDR in connection with BT, BTS, BTR, and BTC.
7514 */
7515 AssertStmt(fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7516 Log11(("iemNativeVarRegisterAcquire: idxVar=%#x idxReg=%u uValue=%RX64 converting from immediate to stack\n",
7517 idxVar, idxReg, pVar->u.uValue));
7518 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
7519 pVar->enmKind = kIemNativeVarKind_Stack;
7520 }
7521 }
7522
7523 pVar->fRegAcquired = true;
7524 return idxReg;
7525}
7526
7527
7528#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7529/**
7530 * Makes sure variable @a idxVar has a SIMD register assigned to it and that it stays
7531 * fixed till we call iemNativeVarRegisterRelease.
7532 *
7533 * @returns The host register number.
7534 * @param pReNative The recompiler state.
7535 * @param idxVar The variable.
7536 * @param poff Pointer to the instruction buffer offset.
7537 * In case a register needs to be freed up or the value
7538 * loaded off the stack.
7539 * @param fInitialized Set if the variable must already have been initialized.
7540 * Will throw VERR_IEM_VAR_NOT_INITIALIZED if this is not
7541 * the case.
7542 * @param idxRegPref Preferred SIMD register number or UINT8_MAX.
7543 */
7544DECL_HIDDEN_THROW(uint8_t) iemNativeVarSimdRegisterAcquire(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, uint32_t *poff,
7545 bool fInitialized /*= false*/, uint8_t idxRegPref /*= UINT8_MAX*/)
7546{
7547 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7548 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7549 Assert( pVar->cbVar == sizeof(RTUINT128U)
7550 || pVar->cbVar == sizeof(RTUINT256U));
7551 Assert(!pVar->fRegAcquired);
7552
7553 uint8_t idxReg = pVar->idxReg;
7554 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs))
7555 {
7556 Assert( pVar->enmKind > kIemNativeVarKind_Invalid
7557 && pVar->enmKind < kIemNativeVarKind_End);
7558 pVar->fRegAcquired = true;
7559 return idxReg;
7560 }
7561
7562 /*
7563 * If the kind of variable has not yet been set, default to 'stack'.
7564 */
7565 Assert( pVar->enmKind >= kIemNativeVarKind_Invalid
7566 && pVar->enmKind < kIemNativeVarKind_End);
7567 if (pVar->enmKind == kIemNativeVarKind_Invalid)
7568 iemNativeVarSetKindToStack(pReNative, idxVar);
7569
7570 /*
7571 * We have to allocate a register for the variable, even if its a stack one
7572 * as we don't know if there are modification being made to it before its
7573 * finalized (todo: analyze and insert hints about that?).
7574 *
7575 * If we can, we try get the correct register for argument variables. This
7576 * is assuming that most argument variables are fetched as close as possible
7577 * to the actual call, so that there aren't any interfering hidden calls
7578 * (memory accesses, etc) inbetween.
7579 *
7580 * If we cannot or it's a variable, we make sure no argument registers
7581 * that will be used by this MC block will be allocated here, and we always
7582 * prefer non-volatile registers to avoid needing to spill stuff for internal
7583 * call.
7584 */
7585 /** @todo Detect too early argument value fetches and warn about hidden
7586 * calls causing less optimal code to be generated in the python script. */
7587
7588 uint8_t const uArgNo = pVar->uArgNo;
7589 Assert(uArgNo == UINT8_MAX); RT_NOREF(uArgNo); /* No SIMD registers as arguments for now. */
7590
7591 /* SIMD is bit simpler for now because there is no support for arguments. */
7592 if ( idxRegPref >= RT_ELEMENTS(pReNative->Core.aHstSimdRegs)
7593 || (pReNative->Core.bmHstSimdRegs & RT_BIT_32(idxRegPref)))
7594 {
7595 uint32_t const fNotArgsMask = UINT32_MAX; //~g_afIemNativeCallRegs[RT_MIN(pReNative->cArgs, IEMNATIVE_CALL_ARG_GREG_COUNT)];
7596 uint32_t const fRegs = ~pReNative->Core.bmHstSimdRegs
7597 & ~pReNative->Core.bmHstSimdRegsWithGstShadow
7598 & (~IEMNATIVE_SIMD_REG_FIXED_MASK & IEMNATIVE_HST_SIMD_REG_MASK)
7599 & fNotArgsMask;
7600 if (fRegs)
7601 {
7602 idxReg = (uint8_t)ASMBitLastSetU32( fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK
7603 ? fRegs & ~IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK : fRegs) - 1;
7604 Assert(pReNative->Core.aHstSimdRegs[idxReg].fGstRegShadows == 0);
7605 Assert(!(pReNative->Core.bmHstSimdRegsWithGstShadow & RT_BIT_32(idxReg)));
7606 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7607 }
7608 else
7609 {
7610 idxReg = iemNativeSimdRegAllocFindFree(pReNative, poff, false /*fPreferVolatile*/,
7611 IEMNATIVE_HST_SIMD_REG_MASK & ~IEMNATIVE_SIMD_REG_FIXED_MASK & fNotArgsMask);
7612 AssertStmt(idxReg != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_ALLOCATOR_NO_FREE_VAR));
7613 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (slow, uArgNo=%u)\n", idxVar, idxReg, uArgNo));
7614 }
7615 }
7616 else
7617 {
7618 idxReg = idxRegPref;
7619 AssertReleaseFailed(); //iemNativeRegClearGstRegShadowing(pReNative, idxReg, *poff);
7620 Log11(("iemNativeVarSimdRegisterAcquire: idxVar=%#x idxReg=%u (preferred)\n", idxVar, idxReg));
7621 }
7622 iemNativeSimdRegMarkAllocated(pReNative, idxReg, kIemNativeWhat_Var, idxVar);
7623
7624 pVar->fSimdReg = true;
7625 pVar->idxReg = idxReg;
7626
7627 /*
7628 * Load it off the stack if we've got a stack slot.
7629 */
7630 uint8_t const idxStackSlot = pVar->idxStackSlot;
7631 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
7632 {
7633 Assert(fInitialized);
7634 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7635 switch (pVar->cbVar)
7636 {
7637 case sizeof(RTUINT128U): *poff = iemNativeEmitLoadVecRegByBpU128(pReNative, *poff, idxReg, offDispBp); break;
7638 default: AssertFailed(); RT_FALL_THRU();
7639 case sizeof(RTUINT256U): *poff = iemNativeEmitLoadVecRegByBpU256(pReNative, *poff, idxReg, offDispBp); break;
7640 }
7641 }
7642 else
7643 {
7644 Assert(idxStackSlot == UINT8_MAX);
7645 AssertStmt(!fInitialized, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
7646 }
7647 pVar->fRegAcquired = true;
7648 return idxReg;
7649}
7650#endif
7651
7652
7653/**
7654 * The value of variable @a idxVar will be written in full to the @a enmGstReg
7655 * guest register.
7656 *
7657 * This function makes sure there is a register for it and sets it to be the
7658 * current shadow copy of @a enmGstReg.
7659 *
7660 * @returns The host register number.
7661 * @param pReNative The recompiler state.
7662 * @param idxVar The variable.
7663 * @param enmGstReg The guest register this variable will be written to
7664 * after this call.
7665 * @param poff Pointer to the instruction buffer offset.
7666 * In case a register needs to be freed up or if the
7667 * variable content needs to be loaded off the stack.
7668 *
7669 * @note We DO NOT expect @a idxVar to be an argument variable,
7670 * because we can only in the commit stage of an instruction when this
7671 * function is used.
7672 */
7673DECL_HIDDEN_THROW(uint8_t)
7674iemNativeVarRegisterAcquireForGuestReg(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar, IEMNATIVEGSTREG enmGstReg, uint32_t *poff)
7675{
7676 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7677 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
7678 Assert(!pVar->fRegAcquired);
7679 AssertMsgStmt( pVar->cbVar <= 8
7680 && ( pVar->enmKind == kIemNativeVarKind_Immediate
7681 || pVar->enmKind == kIemNativeVarKind_Stack),
7682 ("idxVar=%#x cbVar=%d enmKind=%d enmGstReg=%s\n", idxVar, pVar->cbVar,
7683 pVar->enmKind, g_aGstShadowInfo[enmGstReg].pszName),
7684 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7685
7686 /*
7687 * This shouldn't ever be used for arguments, unless it's in a weird else
7688 * branch that doesn't do any calling and even then it's questionable.
7689 *
7690 * However, in case someone writes crazy wrong MC code and does register
7691 * updates before making calls, just use the regular register allocator to
7692 * ensure we get a register suitable for the intended argument number.
7693 */
7694 AssertStmt(pVar->uArgNo == UINT8_MAX, iemNativeVarRegisterAcquire(pReNative, idxVar, poff));
7695
7696 /*
7697 * If there is already a register for the variable, we transfer/set the
7698 * guest shadow copy assignment to it.
7699 */
7700 uint8_t idxReg = pVar->idxReg;
7701 if (idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
7702 {
7703#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
7704 if (enmGstReg >= kIemNativeGstReg_GprFirst && enmGstReg <= kIemNativeGstReg_GprLast)
7705 {
7706# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
7707 iemNativeDbgInfoAddNativeOffset(pReNative, *poff);
7708 iemNativeDbgInfoAddGuestRegDirty(pReNative, false /*fSimdReg*/, enmGstReg, idxReg);
7709# endif
7710 pReNative->Core.bmGstRegShadowDirty |= RT_BIT_64(enmGstReg);
7711 }
7712#endif
7713
7714 if (pReNative->Core.bmGstRegShadows & RT_BIT_64(enmGstReg))
7715 {
7716 uint8_t const idxRegOld = pReNative->Core.aidxGstRegShadows[enmGstReg];
7717 iemNativeRegTransferGstRegShadowing(pReNative, idxRegOld, idxReg, enmGstReg, *poff);
7718 Log12(("iemNativeVarRegisterAcquireForGuestReg: Moved %s for guest %s into %s for full write\n",
7719 g_apszIemNativeHstRegNames[idxRegOld], g_aGstShadowInfo[enmGstReg].pszName, g_apszIemNativeHstRegNames[idxReg]));
7720 }
7721 else
7722 {
7723 iemNativeRegMarkAsGstRegShadow(pReNative, idxReg, enmGstReg, *poff);
7724 Log12(("iemNativeVarRegisterAcquireForGuestReg: Marking %s as copy of guest %s (full write)\n",
7725 g_apszIemNativeHstRegNames[idxReg], g_aGstShadowInfo[enmGstReg].pszName));
7726 }
7727 /** @todo figure this one out. We need some way of making sure the register isn't
7728 * modified after this point, just in case we start writing crappy MC code. */
7729 pVar->enmGstReg = enmGstReg;
7730 pVar->fRegAcquired = true;
7731 return idxReg;
7732 }
7733 Assert(pVar->uArgNo == UINT8_MAX);
7734
7735 /*
7736 * Because this is supposed to be the commit stage, we're just tag along with the
7737 * temporary register allocator and upgrade it to a variable register.
7738 */
7739 idxReg = iemNativeRegAllocTmpForGuestReg(pReNative, poff, enmGstReg, kIemNativeGstRegUse_ForFullWrite);
7740 Assert(pReNative->Core.aHstRegs[idxReg].enmWhat == kIemNativeWhat_Tmp);
7741 Assert(pReNative->Core.aHstRegs[idxReg].idxVar == UINT8_MAX);
7742 pReNative->Core.aHstRegs[idxReg].enmWhat = kIemNativeWhat_Var;
7743 pReNative->Core.aHstRegs[idxReg].idxVar = idxVar;
7744 pVar->idxReg = idxReg;
7745
7746 /*
7747 * Now we need to load the register value.
7748 */
7749 if (pVar->enmKind == kIemNativeVarKind_Immediate)
7750 *poff = iemNativeEmitLoadGprImm64(pReNative, *poff, idxReg, pVar->u.uValue);
7751 else
7752 {
7753 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7754 int32_t const offDispBp = iemNativeStackCalcBpDisp(idxStackSlot);
7755 switch (pVar->cbVar)
7756 {
7757 case sizeof(uint64_t):
7758 *poff = iemNativeEmitLoadGprByBp(pReNative, *poff, idxReg, offDispBp);
7759 break;
7760 case sizeof(uint32_t):
7761 *poff = iemNativeEmitLoadGprByBpU32(pReNative, *poff, idxReg, offDispBp);
7762 break;
7763 case sizeof(uint16_t):
7764 *poff = iemNativeEmitLoadGprByBpU16(pReNative, *poff, idxReg, offDispBp);
7765 break;
7766 case sizeof(uint8_t):
7767 *poff = iemNativeEmitLoadGprByBpU8(pReNative, *poff, idxReg, offDispBp);
7768 break;
7769 default:
7770 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_6));
7771 }
7772 }
7773
7774 pVar->fRegAcquired = true;
7775 return idxReg;
7776}
7777
7778
7779/**
7780 * Emit code to save volatile registers prior to a call to a helper (TLB miss).
7781 *
7782 * This is used together with iemNativeVarRestoreVolatileRegsPostHlpCall() and
7783 * optionally iemNativeRegRestoreGuestShadowsInVolatileRegs() to bypass the
7784 * requirement of flushing anything in volatile host registers when making a
7785 * call.
7786 *
7787 * @returns New @a off value.
7788 * @param pReNative The recompiler state.
7789 * @param off The code buffer position.
7790 * @param fHstRegsNotToSave Set of registers not to save & restore.
7791 */
7792DECL_HIDDEN_THROW(uint32_t)
7793iemNativeVarSaveVolatileRegsPreHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
7794{
7795 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_NOTMP_GREG_MASK & ~fHstRegsNotToSave;
7796 if (fHstRegs)
7797 {
7798 do
7799 {
7800 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7801 fHstRegs &= ~RT_BIT_32(idxHstReg);
7802
7803 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7804 {
7805 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7806 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7807 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7808 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7809 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
7810 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7811 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7812 {
7813 case kIemNativeVarKind_Stack:
7814 {
7815 /* Temporarily spill the variable register. */
7816 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7817 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7818 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7819 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7820 continue;
7821 }
7822
7823 case kIemNativeVarKind_Immediate:
7824 case kIemNativeVarKind_VarRef:
7825 case kIemNativeVarKind_GstRegRef:
7826 /* It is weird to have any of these loaded at this point. */
7827 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7828 continue;
7829
7830 case kIemNativeVarKind_End:
7831 case kIemNativeVarKind_Invalid:
7832 break;
7833 }
7834 AssertFailed();
7835 }
7836 else
7837 {
7838 /*
7839 * Allocate a temporary stack slot and spill the register to it.
7840 */
7841 unsigned const idxStackSlot = ASMBitLastSetU32(~pReNative->Core.bmStack) - 1;
7842 AssertStmt(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS,
7843 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_OUT_OF_STACK_SLOTS));
7844 pReNative->Core.bmStack |= RT_BIT_32(idxStackSlot);
7845 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = (uint8_t)idxStackSlot;
7846 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7847 idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7848 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7849 }
7850 } while (fHstRegs);
7851 }
7852#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7853
7854 /*
7855 * Guest register shadows are flushed to CPUMCTX at the moment and don't need allocating a stack slot
7856 * which would be more difficult due to spanning multiple stack slots and different sizes
7857 * (besides we only have a limited amount of slots at the moment).
7858 *
7859 * However the shadows need to be flushed out as the guest SIMD register might get corrupted by
7860 * the callee. This asserts that the registers were written back earlier and are not in the dirty state.
7861 */
7862 iemNativeSimdRegFlushGuestShadowsByHostMask(pReNative, IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK);
7863
7864 fHstRegs = pReNative->Core.bmHstSimdRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
7865 if (fHstRegs)
7866 {
7867 do
7868 {
7869 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7870 fHstRegs &= ~RT_BIT_32(idxHstReg);
7871
7872 /* Fixed reserved and temporary registers don't need saving. */
7873 if ( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedReserved
7874 || pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedTmp)
7875 continue;
7876
7877 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
7878
7879 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
7880 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7881 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7882 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7883 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
7884 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
7885 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
7886 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
7887 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7888 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7889 {
7890 case kIemNativeVarKind_Stack:
7891 {
7892 /* Temporarily spill the variable register. */
7893 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
7894 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7895 Log12(("iemNativeVarSaveVolatileRegsPreHlpCall: spilling idxVar=%#x/idxReg=%d onto the stack (slot %#x bp+%d, off=%#x)\n",
7896 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7897 if (cbVar == sizeof(RTUINT128U))
7898 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7899 else
7900 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxHstReg);
7901 continue;
7902 }
7903
7904 case kIemNativeVarKind_Immediate:
7905 case kIemNativeVarKind_VarRef:
7906 case kIemNativeVarKind_GstRegRef:
7907 /* It is weird to have any of these loaded at this point. */
7908 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7909 continue;
7910
7911 case kIemNativeVarKind_End:
7912 case kIemNativeVarKind_Invalid:
7913 break;
7914 }
7915 AssertFailed();
7916 } while (fHstRegs);
7917 }
7918#endif
7919 return off;
7920}
7921
7922
7923/**
7924 * Emit code to restore volatile registers after to a call to a helper.
7925 *
7926 * @returns New @a off value.
7927 * @param pReNative The recompiler state.
7928 * @param off The code buffer position.
7929 * @param fHstRegsNotToSave Set of registers not to save & restore.
7930 * @see iemNativeVarSaveVolatileRegsPreHlpCall(),
7931 * iemNativeRegRestoreGuestShadowsInVolatileRegs()
7932 */
7933DECL_HIDDEN_THROW(uint32_t)
7934iemNativeVarRestoreVolatileRegsPostHlpCall(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fHstRegsNotToSave)
7935{
7936 uint32_t fHstRegs = pReNative->Core.bmHstRegs & IEMNATIVE_CALL_VOLATILE_NOTMP_GREG_MASK & ~fHstRegsNotToSave;
7937 if (fHstRegs)
7938 {
7939 do
7940 {
7941 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7942 fHstRegs &= ~RT_BIT_32(idxHstReg);
7943
7944 if (pReNative->Core.aHstRegs[idxHstReg].enmWhat == kIemNativeWhat_Var)
7945 {
7946 uint8_t const idxVar = pReNative->Core.aHstRegs[idxHstReg].idxVar;
7947 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
7948 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
7949 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
7950 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg,
7951 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
7952 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
7953 {
7954 case kIemNativeVarKind_Stack:
7955 {
7956 /* Unspill the variable register. */
7957 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
7958 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
7959 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
7960 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7961 continue;
7962 }
7963
7964 case kIemNativeVarKind_Immediate:
7965 case kIemNativeVarKind_VarRef:
7966 case kIemNativeVarKind_GstRegRef:
7967 /* It is weird to have any of these loaded at this point. */
7968 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
7969 continue;
7970
7971 case kIemNativeVarKind_End:
7972 case kIemNativeVarKind_Invalid:
7973 break;
7974 }
7975 AssertFailed();
7976 }
7977 else
7978 {
7979 /*
7980 * Restore from temporary stack slot.
7981 */
7982 uint8_t const idxStackSlot = pReNative->Core.aHstRegs[idxHstReg].idxStackSlot;
7983 AssertContinue(idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS && (pReNative->Core.bmStack & RT_BIT_32(idxStackSlot)));
7984 pReNative->Core.bmStack &= ~RT_BIT_32(idxStackSlot);
7985 pReNative->Core.aHstRegs[idxHstReg].idxStackSlot = UINT8_MAX;
7986
7987 off = iemNativeEmitLoadGprByBp(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
7988 }
7989 } while (fHstRegs);
7990 }
7991#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
7992 fHstRegs = pReNative->Core.bmHstSimdRegs & IEMNATIVE_CALL_VOLATILE_SIMD_REG_MASK;
7993 if (fHstRegs)
7994 {
7995 do
7996 {
7997 unsigned int const idxHstReg = ASMBitFirstSetU32(fHstRegs) - 1;
7998 fHstRegs &= ~RT_BIT_32(idxHstReg);
7999
8000 if ( pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedTmp
8001 || pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_FixedReserved)
8002 continue;
8003 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].enmWhat == kIemNativeWhat_Var);
8004
8005 uint8_t const idxVar = pReNative->Core.aHstSimdRegs[idxHstReg].idxVar;
8006 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8007 AssertStmt( IEMNATIVE_VAR_IDX_UNPACK(idxVar) < RT_ELEMENTS(pReNative->Core.aVars)
8008 && (pReNative->Core.bmVars & RT_BIT_32(IEMNATIVE_VAR_IDX_UNPACK(idxVar)))
8009 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].idxReg == idxHstReg
8010 && pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].fSimdReg
8011 && ( pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT128U)
8012 || pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar == sizeof(RTUINT256U)),
8013 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_12));
8014 switch (pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].enmKind)
8015 {
8016 case kIemNativeVarKind_Stack:
8017 {
8018 /* Unspill the variable register. */
8019 uint8_t const cbVar = pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)].cbVar;
8020 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, idxVar);
8021 Log12(("iemNativeVarRestoreVolatileRegsPostHlpCall: unspilling idxVar=%#x/idxReg=%d (slot %#x bp+%d, off=%#x)\n",
8022 idxVar, idxHstReg, idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8023
8024 if (cbVar == sizeof(RTUINT128U))
8025 off = iemNativeEmitLoadVecRegByBpU128(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8026 else
8027 off = iemNativeEmitLoadVecRegByBpU256(pReNative, off, idxHstReg, iemNativeStackCalcBpDisp(idxStackSlot));
8028 continue;
8029 }
8030
8031 case kIemNativeVarKind_Immediate:
8032 case kIemNativeVarKind_VarRef:
8033 case kIemNativeVarKind_GstRegRef:
8034 /* It is weird to have any of these loaded at this point. */
8035 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_13));
8036 continue;
8037
8038 case kIemNativeVarKind_End:
8039 case kIemNativeVarKind_Invalid:
8040 break;
8041 }
8042 AssertFailed();
8043 } while (fHstRegs);
8044 }
8045#endif
8046 return off;
8047}
8048
8049
8050/**
8051 * Worker that frees the stack slots for variable @a idxVar if any allocated.
8052 *
8053 * This is used both by iemNativeVarFreeOneWorker and iemNativeEmitCallCommon.
8054 *
8055 * ASSUMES that @a idxVar is valid and unpacked.
8056 */
8057DECL_FORCE_INLINE(void) iemNativeVarFreeStackSlots(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8058{
8059 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars)); /* unpacked! */
8060 uint8_t const idxStackSlot = pReNative->Core.aVars[idxVar].idxStackSlot;
8061 if (idxStackSlot < IEMNATIVE_FRAME_VAR_SLOTS)
8062 {
8063 uint8_t const cbVar = pReNative->Core.aVars[idxVar].cbVar;
8064 uint8_t const cSlots = (cbVar + sizeof(uint64_t) - 1) / sizeof(uint64_t);
8065 uint32_t const fAllocMask = (uint32_t)(RT_BIT_32(cSlots) - 1U);
8066 Assert(cSlots > 0);
8067 Assert(((pReNative->Core.bmStack >> idxStackSlot) & fAllocMask) == fAllocMask);
8068 Log11(("iemNativeVarFreeStackSlots: idxVar=%d/%#x iSlot=%#x/%#x (cbVar=%#x)\n",
8069 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxStackSlot, fAllocMask, cbVar));
8070 pReNative->Core.bmStack &= ~(fAllocMask << idxStackSlot);
8071 pReNative->Core.aVars[idxVar].idxStackSlot = UINT8_MAX;
8072 }
8073 else
8074 Assert(idxStackSlot == UINT8_MAX);
8075}
8076
8077
8078/**
8079 * Worker that frees a single variable.
8080 *
8081 * ASSUMES that @a idxVar is valid and unpacked.
8082 */
8083DECLHIDDEN(void) iemNativeVarFreeOneWorker(PIEMRECOMPILERSTATE pReNative, uint8_t idxVar)
8084{
8085 Assert( pReNative->Core.aVars[idxVar].enmKind >= kIemNativeVarKind_Invalid /* Including invalid as we may have unused */
8086 && pReNative->Core.aVars[idxVar].enmKind < kIemNativeVarKind_End); /* variables in conditional branches. */
8087 Assert(!pReNative->Core.aVars[idxVar].fRegAcquired);
8088
8089 /* Free the host register first if any assigned. */
8090 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8091#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8092 if ( idxHstReg != UINT8_MAX
8093 && pReNative->Core.aVars[idxVar].fSimdReg)
8094 {
8095 Assert(idxHstReg < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8096 Assert(pReNative->Core.aHstSimdRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8097 pReNative->Core.aHstSimdRegs[idxHstReg].idxVar = UINT8_MAX;
8098 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxHstReg);
8099 }
8100 else
8101#endif
8102 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8103 {
8104 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8105 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
8106 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8107 }
8108
8109 /* Free argument mapping. */
8110 uint8_t const uArgNo = pReNative->Core.aVars[idxVar].uArgNo;
8111 if (uArgNo < RT_ELEMENTS(pReNative->Core.aidxArgVars))
8112 pReNative->Core.aidxArgVars[uArgNo] = UINT8_MAX;
8113
8114 /* Free the stack slots. */
8115 iemNativeVarFreeStackSlots(pReNative, idxVar);
8116
8117 /* Free the actual variable. */
8118 pReNative->Core.aVars[idxVar].enmKind = kIemNativeVarKind_Invalid;
8119 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8120}
8121
8122
8123/**
8124 * Worker for iemNativeVarFreeAll that's called when there is anything to do.
8125 */
8126DECLHIDDEN(void) iemNativeVarFreeAllSlow(PIEMRECOMPILERSTATE pReNative, uint32_t bmVars)
8127{
8128 while (bmVars != 0)
8129 {
8130 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
8131 bmVars &= ~RT_BIT_32(idxVar);
8132
8133#if 1 /** @todo optimize by simplifying this later... */
8134 iemNativeVarFreeOneWorker(pReNative, idxVar);
8135#else
8136 /* Only need to free the host register, the rest is done as bulk updates below. */
8137 uint8_t const idxHstReg = pReNative->Core.aVars[idxVar].idxReg;
8138 if (idxHstReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8139 {
8140 Assert(pReNative->Core.aHstRegs[idxHstReg].idxVar == IEMNATIVE_VAR_IDX_PACK(idxVar));
8141 pReNative->Core.aHstRegs[idxHstReg].idxVar = UINT8_MAX;
8142 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxHstReg);
8143 }
8144#endif
8145 }
8146#if 0 /** @todo optimize by simplifying this later... */
8147 pReNative->Core.bmVars = 0;
8148 pReNative->Core.bmStack = 0;
8149 pReNative->Core.u64ArgVars = UINT64_MAX;
8150#endif
8151}
8152
8153
8154
8155/*********************************************************************************************************************************
8156* Emitters for IEM_MC_CALL_CIMPL_XXX *
8157*********************************************************************************************************************************/
8158
8159/**
8160 * Emits code to load a reference to the given guest register into @a idxGprDst.
8161 */
8162DECL_HIDDEN_THROW(uint32_t)
8163iemNativeEmitLeaGprByGstRegRef(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t idxGprDst,
8164 IEMNATIVEGSTREGREF enmClass, uint8_t idxRegInClass)
8165{
8166#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
8167 /** @todo If we ever gonna allow referencing the RIP register we need to update guest value here. */
8168#endif
8169
8170 /*
8171 * Get the offset relative to the CPUMCTX structure.
8172 */
8173 uint32_t offCpumCtx;
8174 switch (enmClass)
8175 {
8176 case kIemNativeGstRegRef_Gpr:
8177 Assert(idxRegInClass < 16);
8178 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[idxRegInClass]);
8179 break;
8180
8181 case kIemNativeGstRegRef_GprHighByte: /**< AH, CH, DH, BH*/
8182 Assert(idxRegInClass < 4);
8183 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, aGRegs[0].bHi) + idxRegInClass * sizeof(CPUMCTXGREG);
8184 break;
8185
8186 case kIemNativeGstRegRef_EFlags:
8187 Assert(idxRegInClass == 0);
8188 offCpumCtx = RT_UOFFSETOF(CPUMCTX, eflags);
8189 break;
8190
8191 case kIemNativeGstRegRef_MxCsr:
8192 Assert(idxRegInClass == 0);
8193 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87.MXCSR);
8194 break;
8195
8196 case kIemNativeGstRegRef_FpuReg:
8197 Assert(idxRegInClass < 8);
8198 AssertFailed(); /** @todo what kind of indexing? */
8199 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
8200 break;
8201
8202 case kIemNativeGstRegRef_MReg:
8203 Assert(idxRegInClass < 8);
8204 AssertFailed(); /** @todo what kind of indexing? */
8205 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aRegs[idxRegInClass]);
8206 break;
8207
8208 case kIemNativeGstRegRef_XReg:
8209 Assert(idxRegInClass < 16);
8210 offCpumCtx = RT_UOFFSETOF_DYN(CPUMCTX, XState.x87.aXMM[idxRegInClass]);
8211 break;
8212
8213 case kIemNativeGstRegRef_X87: /* Not a register actually but we would just duplicate code otherwise. */
8214 Assert(idxRegInClass == 0);
8215 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState.x87);
8216 break;
8217
8218 case kIemNativeGstRegRef_XState: /* Not a register actually but we would just duplicate code otherwise. */
8219 Assert(idxRegInClass == 0);
8220 offCpumCtx = RT_UOFFSETOF(CPUMCTX, XState);
8221 break;
8222
8223 default:
8224 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_5));
8225 }
8226
8227 /*
8228 * Load the value into the destination register.
8229 */
8230#ifdef RT_ARCH_AMD64
8231 off = iemNativeEmitLeaGprByVCpu(pReNative, off, idxGprDst, offCpumCtx + RT_UOFFSETOF(VMCPUCC, cpum.GstCtx));
8232
8233#elif defined(RT_ARCH_ARM64)
8234 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
8235 Assert(offCpumCtx < 4096);
8236 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxGprDst, IEMNATIVE_REG_FIXED_PCPUMCTX, offCpumCtx);
8237
8238#else
8239# error "Port me!"
8240#endif
8241
8242 return off;
8243}
8244
8245
8246/**
8247 * Common code for CIMPL and AIMPL calls.
8248 *
8249 * These are calls that uses argument variables and such. They should not be
8250 * confused with internal calls required to implement an MC operation,
8251 * like a TLB load and similar.
8252 *
8253 * Upon return all that is left to do is to load any hidden arguments and
8254 * perform the call. All argument variables are freed.
8255 *
8256 * @returns New code buffer offset; throws VBox status code on error.
8257 * @param pReNative The native recompile state.
8258 * @param off The code buffer offset.
8259 * @param cArgs The total nubmer of arguments (includes hidden
8260 * count).
8261 * @param cHiddenArgs The number of hidden arguments. The hidden
8262 * arguments must not have any variable declared for
8263 * them, whereas all the regular arguments must
8264 * (tstIEMCheckMc ensures this).
8265 * @param fFlushPendingWrites Flag whether to flush pending writes (default true),
8266 * this will still flush pending writes in call volatile registers if false.
8267 */
8268DECL_HIDDEN_THROW(uint32_t)
8269iemNativeEmitCallCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cArgs, uint8_t cHiddenArgs,
8270 bool fFlushPendingWrites /*= true*/)
8271{
8272#ifdef VBOX_STRICT
8273 /*
8274 * Assert sanity.
8275 */
8276 Assert(cArgs <= IEMNATIVE_CALL_MAX_ARG_COUNT);
8277 Assert(cHiddenArgs < IEMNATIVE_CALL_ARG_GREG_COUNT);
8278 for (unsigned i = 0; i < cHiddenArgs; i++)
8279 Assert(pReNative->Core.aidxArgVars[i] == UINT8_MAX);
8280 for (unsigned i = cHiddenArgs; i < cArgs; i++)
8281 {
8282 Assert(pReNative->Core.aidxArgVars[i] != UINT8_MAX); /* checked by tstIEMCheckMc.cpp */
8283 Assert(pReNative->Core.bmVars & RT_BIT_32(pReNative->Core.aidxArgVars[i]));
8284 }
8285 iemNativeRegAssertSanity(pReNative);
8286#endif
8287
8288 /* We don't know what the called function makes use of, so flush any pending register writes. */
8289 RT_NOREF(fFlushPendingWrites);
8290#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
8291 if (fFlushPendingWrites)
8292#endif
8293 off = iemNativeRegFlushPendingWrites(pReNative, off);
8294
8295 /*
8296 * Before we do anything else, go over variables that are referenced and
8297 * make sure they are not in a register.
8298 */
8299 uint32_t bmVars = pReNative->Core.bmVars;
8300 if (bmVars)
8301 {
8302 do
8303 {
8304 uint8_t const idxVar = ASMBitFirstSetU32(bmVars) - 1;
8305 bmVars &= ~RT_BIT_32(idxVar);
8306
8307 if (pReNative->Core.aVars[idxVar].idxReferrerVar != UINT8_MAX)
8308 {
8309 uint8_t const idxRegOld = pReNative->Core.aVars[idxVar].idxReg;
8310#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8311 if ( idxRegOld != UINT8_MAX
8312 && pReNative->Core.aVars[idxVar].fSimdReg)
8313 {
8314 Assert(idxRegOld < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8315 Assert(pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U) || pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT256U));
8316
8317 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
8318 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
8319 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
8320 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8321 if (pReNative->Core.aVars[idxVar].cbVar == sizeof(RTUINT128U))
8322 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8323 else
8324 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8325
8326 Assert(!( (pReNative->Core.bmGstSimdRegShadowDirtyLo128 | pReNative->Core.bmGstSimdRegShadowDirtyHi128)
8327 & pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows));
8328
8329 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8330 pReNative->Core.bmHstSimdRegs &= ~RT_BIT_32(idxRegOld);
8331 pReNative->Core.bmHstSimdRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
8332 pReNative->Core.bmGstSimdRegShadows &= ~pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows;
8333 pReNative->Core.aHstSimdRegs[idxRegOld].fGstRegShadows = 0;
8334 }
8335 else
8336#endif
8337 if (idxRegOld < RT_ELEMENTS(pReNative->Core.aHstRegs))
8338 {
8339 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxVar));
8340 Log12(("iemNativeEmitCallCommon: spilling idxVar=%d/%#x/idxReg=%d (referred to by %d) onto the stack (slot %#x bp+%d, off=%#x)\n",
8341 idxVar, IEMNATIVE_VAR_IDX_PACK(idxVar), idxRegOld, pReNative->Core.aVars[idxVar].idxReferrerVar,
8342 idxStackSlot, iemNativeStackCalcBpDisp(idxStackSlot), off));
8343 off = iemNativeEmitStoreGprByBp(pReNative, off, iemNativeStackCalcBpDisp(idxStackSlot), idxRegOld);
8344
8345 pReNative->Core.aVars[idxVar].idxReg = UINT8_MAX;
8346 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxRegOld);
8347 pReNative->Core.bmHstRegsWithGstShadow &= ~RT_BIT_32(idxRegOld);
8348 pReNative->Core.bmGstRegShadows &= ~pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows;
8349 pReNative->Core.aHstRegs[idxRegOld].fGstRegShadows = 0;
8350 }
8351 }
8352 } while (bmVars != 0);
8353#if 0 //def VBOX_STRICT
8354 iemNativeRegAssertSanity(pReNative);
8355#endif
8356 }
8357
8358 uint8_t const cRegArgs = RT_MIN(cArgs, RT_ELEMENTS(g_aidxIemNativeCallRegs));
8359
8360#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
8361 /*
8362 * At the very first step go over the host registers that will be used for arguments
8363 * don't shadow anything which needs writing back first.
8364 */
8365 for (uint32_t i = 0; i < cRegArgs; i++)
8366 {
8367 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8368
8369 /* Writeback any dirty guest shadows before using this register. */
8370 if (pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxArgReg].fGstRegShadows)
8371 off = iemNativeRegFlushDirtyGuestByHostRegShadow(pReNative, off, idxArgReg);
8372 Assert(!(pReNative->Core.bmGstRegShadowDirty & pReNative->Core.aHstRegs[idxArgReg].fGstRegShadows));
8373 }
8374#endif
8375
8376 /*
8377 * First, go over the host registers that will be used for arguments and make
8378 * sure they either hold the desired argument or are free.
8379 */
8380 if (pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cRegArgs])
8381 {
8382 for (uint32_t i = 0; i < cRegArgs; i++)
8383 {
8384 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8385 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8386 {
8387 if (pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Var)
8388 {
8389 uint8_t const idxVar = pReNative->Core.aHstRegs[idxArgReg].idxVar;
8390 IEMNATIVE_ASSERT_VAR_IDX(pReNative, idxVar);
8391 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[IEMNATIVE_VAR_IDX_UNPACK(idxVar)];
8392 Assert(pVar->idxReg == idxArgReg);
8393 uint8_t const uArgNo = pVar->uArgNo;
8394 if (uArgNo == i)
8395 { /* prefect */ }
8396 /* The variable allocator logic should make sure this is impossible,
8397 except for when the return register is used as a parameter (ARM,
8398 but not x86). */
8399#if RT_BIT_32(IEMNATIVE_CALL_RET_GREG) & IEMNATIVE_CALL_ARGS_GREG_MASK
8400 else if (idxArgReg == IEMNATIVE_CALL_RET_GREG && uArgNo != UINT8_MAX)
8401 {
8402# ifdef IEMNATIVE_FP_OFF_STACK_ARG0
8403# error "Implement this"
8404# endif
8405 Assert(uArgNo < IEMNATIVE_CALL_ARG_GREG_COUNT);
8406 uint8_t const idxFinalArgReg = g_aidxIemNativeCallRegs[uArgNo];
8407 AssertStmt(!(pReNative->Core.bmHstRegs & RT_BIT_32(idxFinalArgReg)),
8408 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
8409 off = iemNativeRegMoveVar(pReNative, off, idxVar, idxArgReg, idxFinalArgReg, "iemNativeEmitCallCommon");
8410 }
8411#endif
8412 else
8413 {
8414 AssertStmt(uArgNo == UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_10));
8415
8416 if (pVar->enmKind == kIemNativeVarKind_Stack)
8417 off = iemNativeRegMoveOrSpillStackVar(pReNative, off, idxVar);
8418 else
8419 {
8420 /* just free it, can be reloaded if used again */
8421 pVar->idxReg = UINT8_MAX;
8422 pReNative->Core.bmHstRegs &= ~RT_BIT_32(idxArgReg);
8423 iemNativeRegClearGstRegShadowing(pReNative, idxArgReg, off);
8424 }
8425 }
8426 }
8427 else
8428 AssertStmt(pReNative->Core.aHstRegs[idxArgReg].enmWhat == kIemNativeWhat_Arg,
8429 IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_REG_IPE_8));
8430 }
8431 }
8432#if 0 //def VBOX_STRICT
8433 iemNativeRegAssertSanity(pReNative);
8434#endif
8435 }
8436
8437 Assert(!(pReNative->Core.bmHstRegs & g_afIemNativeCallRegs[cHiddenArgs])); /* No variables for hidden arguments. */
8438
8439#ifdef IEMNATIVE_FP_OFF_STACK_ARG0
8440 /*
8441 * If there are any stack arguments, make sure they are in their place as well.
8442 *
8443 * We can use IEMNATIVE_CALL_ARG0_GREG as temporary register since we'll (or
8444 * the caller) be loading it later and it must be free (see first loop).
8445 */
8446 if (cArgs > IEMNATIVE_CALL_ARG_GREG_COUNT)
8447 {
8448 for (unsigned i = IEMNATIVE_CALL_ARG_GREG_COUNT; i < cArgs; i++)
8449 {
8450 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
8451 int32_t const offBpDisp = g_aoffIemNativeCallStackArgBpDisp[i - IEMNATIVE_CALL_ARG_GREG_COUNT];
8452 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8453 {
8454 Assert(pVar->enmKind == kIemNativeVarKind_Stack); /* Imm as well? */
8455 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, pVar->idxReg);
8456 pReNative->Core.bmHstRegs &= ~RT_BIT_32(pVar->idxReg);
8457 pVar->idxReg = UINT8_MAX;
8458 }
8459 else
8460 {
8461 /* Use ARG0 as temp for stuff we need registers for. */
8462 switch (pVar->enmKind)
8463 {
8464 case kIemNativeVarKind_Stack:
8465 {
8466 uint8_t const idxStackSlot = pVar->idxStackSlot;
8467 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8468 off = iemNativeEmitLoadGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG /* is free */,
8469 iemNativeStackCalcBpDisp(idxStackSlot));
8470 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8471 continue;
8472 }
8473
8474 case kIemNativeVarKind_Immediate:
8475 off = iemNativeEmitStoreImm64ByBp(pReNative, off, offBpDisp, pVar->u.uValue);
8476 continue;
8477
8478 case kIemNativeVarKind_VarRef:
8479 {
8480 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
8481 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8482 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative, IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8483 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8484 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8485# ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8486 bool const fSimdReg = pReNative->Core.aVars[idxOtherVar].fSimdReg;
8487 uint8_t const cbVar = pReNative->Core.aVars[idxOtherVar].cbVar;
8488 if ( fSimdReg
8489 && idxRegOther != UINT8_MAX)
8490 {
8491 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8492 if (cbVar == sizeof(RTUINT128U))
8493 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
8494 else
8495 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
8496 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8497 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8498 }
8499 else
8500# endif
8501 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8502 {
8503 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8504 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8505 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8506 }
8507 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8508 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8509 off = iemNativeEmitLeaGprByBp(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, offBpDispOther);
8510 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8511 continue;
8512 }
8513
8514 case kIemNativeVarKind_GstRegRef:
8515 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, IEMNATIVE_CALL_ARG0_GREG,
8516 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
8517 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDisp, IEMNATIVE_CALL_ARG0_GREG);
8518 continue;
8519
8520 case kIemNativeVarKind_Invalid:
8521 case kIemNativeVarKind_End:
8522 break;
8523 }
8524 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8525 }
8526 }
8527# if 0 //def VBOX_STRICT
8528 iemNativeRegAssertSanity(pReNative);
8529# endif
8530 }
8531#else
8532 AssertCompile(IEMNATIVE_CALL_MAX_ARG_COUNT <= IEMNATIVE_CALL_ARG_GREG_COUNT);
8533#endif
8534
8535 /*
8536 * Make sure the argument variables are loaded into their respective registers.
8537 *
8538 * We can optimize this by ASSUMING that any register allocations are for
8539 * registeres that have already been loaded and are ready. The previous step
8540 * saw to that.
8541 */
8542 if (~pReNative->Core.bmHstRegs & (g_afIemNativeCallRegs[cRegArgs] & ~g_afIemNativeCallRegs[cHiddenArgs]))
8543 {
8544 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8545 {
8546 uint8_t const idxArgReg = g_aidxIemNativeCallRegs[i];
8547 if (pReNative->Core.bmHstRegs & RT_BIT_32(idxArgReg))
8548 Assert( pReNative->Core.aHstRegs[idxArgReg].idxVar == IEMNATIVE_VAR_IDX_PACK(pReNative->Core.aidxArgVars[i])
8549 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i
8550 && pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == idxArgReg);
8551 else
8552 {
8553 PIEMNATIVEVAR const pVar = &pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]]; /* unpacked */
8554 if (pVar->idxReg < RT_ELEMENTS(pReNative->Core.aHstRegs))
8555 {
8556 Assert(pVar->enmKind == kIemNativeVarKind_Stack);
8557 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxArgReg, pVar->idxReg);
8558 pReNative->Core.bmHstRegs = (pReNative->Core.bmHstRegs & ~RT_BIT_32(pVar->idxReg))
8559 | RT_BIT_32(idxArgReg);
8560 pVar->idxReg = idxArgReg;
8561 }
8562 else
8563 {
8564 /* Use ARG0 as temp for stuff we need registers for. */
8565 switch (pVar->enmKind)
8566 {
8567 case kIemNativeVarKind_Stack:
8568 {
8569 uint8_t const idxStackSlot = pVar->idxStackSlot;
8570 AssertStmt(idxStackSlot != UINT8_MAX, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_NOT_INITIALIZED));
8571 off = iemNativeEmitLoadGprByBp(pReNative, off, idxArgReg, iemNativeStackCalcBpDisp(idxStackSlot));
8572 continue;
8573 }
8574
8575 case kIemNativeVarKind_Immediate:
8576 off = iemNativeEmitLoadGprImm64(pReNative, off, idxArgReg, pVar->u.uValue);
8577 continue;
8578
8579 case kIemNativeVarKind_VarRef:
8580 {
8581 uint8_t const idxOtherVar = pVar->u.idxRefVar; /* unpacked */
8582 Assert(idxOtherVar < RT_ELEMENTS(pReNative->Core.aVars));
8583 uint8_t const idxStackSlot = iemNativeVarGetStackSlot(pReNative,
8584 IEMNATIVE_VAR_IDX_PACK(idxOtherVar));
8585 int32_t const offBpDispOther = iemNativeStackCalcBpDisp(idxStackSlot);
8586 uint8_t const idxRegOther = pReNative->Core.aVars[idxOtherVar].idxReg;
8587#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8588 bool const fSimdReg = pReNative->Core.aVars[idxOtherVar].fSimdReg;
8589 uint8_t const cbVar = pReNative->Core.aVars[idxOtherVar].cbVar;
8590 if ( fSimdReg
8591 && idxRegOther != UINT8_MAX)
8592 {
8593 Assert(idxRegOther < RT_ELEMENTS(pReNative->Core.aHstSimdRegs));
8594 if (cbVar == sizeof(RTUINT128U))
8595 off = iemNativeEmitStoreVecRegByBpU128(pReNative, off, offBpDispOther, idxRegOther);
8596 else
8597 off = iemNativeEmitStoreVecRegByBpU256(pReNative, off, offBpDispOther, idxRegOther);
8598 iemNativeSimdRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8599 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8600 }
8601 else
8602#endif
8603 if (idxRegOther < RT_ELEMENTS(pReNative->Core.aHstRegs))
8604 {
8605 off = iemNativeEmitStoreGprByBp(pReNative, off, offBpDispOther, idxRegOther);
8606 iemNativeRegFreeVar(pReNative, idxRegOther, true); /** @todo const ref? */
8607 Assert(pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8608 }
8609 Assert( pReNative->Core.aVars[idxOtherVar].idxStackSlot != UINT8_MAX
8610 && pReNative->Core.aVars[idxOtherVar].idxReg == UINT8_MAX);
8611 off = iemNativeEmitLeaGprByBp(pReNative, off, idxArgReg, offBpDispOther);
8612 continue;
8613 }
8614
8615 case kIemNativeVarKind_GstRegRef:
8616 off = iemNativeEmitLeaGprByGstRegRef(pReNative, off, idxArgReg,
8617 pVar->u.GstRegRef.enmClass, pVar->u.GstRegRef.idx);
8618 continue;
8619
8620 case kIemNativeVarKind_Invalid:
8621 case kIemNativeVarKind_End:
8622 break;
8623 }
8624 AssertFailedStmt(IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_VAR_IPE_3));
8625 }
8626 }
8627 }
8628#if 0 //def VBOX_STRICT
8629 iemNativeRegAssertSanity(pReNative);
8630#endif
8631 }
8632#ifdef VBOX_STRICT
8633 else
8634 for (unsigned i = cHiddenArgs; i < cRegArgs; i++)
8635 {
8636 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].uArgNo == i);
8637 Assert(pReNative->Core.aVars[pReNative->Core.aidxArgVars[i]].idxReg == g_aidxIemNativeCallRegs[i]);
8638 }
8639#endif
8640
8641 /*
8642 * Free all argument variables (simplified).
8643 * Their lifetime always expires with the call they are for.
8644 */
8645 /** @todo Make the python script check that arguments aren't used after
8646 * IEM_MC_CALL_XXXX. */
8647 /** @todo There is a special with IEM_MC_MEM_MAP_U16_RW and friends requiring
8648 * a IEM_MC_MEM_COMMIT_AND_UNMAP_RW after a AIMPL call typically with
8649 * an argument value. There is also some FPU stuff. */
8650 for (uint32_t i = cHiddenArgs; i < cArgs; i++)
8651 {
8652 uint8_t const idxVar = pReNative->Core.aidxArgVars[i]; /* unpacked */
8653 Assert(idxVar < RT_ELEMENTS(pReNative->Core.aVars));
8654
8655 /* no need to free registers: */
8656 AssertMsg(i < IEMNATIVE_CALL_ARG_GREG_COUNT
8657 ? pReNative->Core.aVars[idxVar].idxReg == g_aidxIemNativeCallRegs[i]
8658 || pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX
8659 : pReNative->Core.aVars[idxVar].idxReg == UINT8_MAX,
8660 ("i=%d idxVar=%d idxReg=%d, expected %d\n", i, idxVar, pReNative->Core.aVars[idxVar].idxReg,
8661 i < IEMNATIVE_CALL_ARG_GREG_COUNT ? g_aidxIemNativeCallRegs[i] : UINT8_MAX));
8662
8663 pReNative->Core.aidxArgVars[i] = UINT8_MAX;
8664 pReNative->Core.bmVars &= ~RT_BIT_32(idxVar);
8665 iemNativeVarFreeStackSlots(pReNative, idxVar);
8666 }
8667 Assert(pReNative->Core.u64ArgVars == UINT64_MAX);
8668
8669 /*
8670 * Flush volatile registers as we make the call.
8671 */
8672 off = iemNativeRegMoveAndFreeAndFlushAtCall(pReNative, off, cRegArgs);
8673
8674 return off;
8675}
8676
8677
8678
8679/*********************************************************************************************************************************
8680* TLB Lookup. *
8681*********************************************************************************************************************************/
8682
8683/**
8684 * This is called via iemNativeHlpAsmSafeWrapCheckTlbLookup.
8685 */
8686DECLASM(void) iemNativeHlpCheckTlbLookup(PVMCPU pVCpu, uintptr_t uResult, uint64_t GCPtr, uint64_t uSegAndSizeAndAccessAndDisp)
8687{
8688 uint8_t const iSegReg = RT_BYTE1(uSegAndSizeAndAccessAndDisp);
8689 uint8_t const cbMem = RT_BYTE2(uSegAndSizeAndAccessAndDisp);
8690 uint32_t const fAccess = (uint32_t)uSegAndSizeAndAccessAndDisp >> 16;
8691 uint8_t const offDisp = RT_BYTE5(uSegAndSizeAndAccessAndDisp);
8692 Log(("iemNativeHlpCheckTlbLookup: %x:%#RX64+%#x LB %#x fAccess=%#x -> %#RX64\n", iSegReg, GCPtr, offDisp, cbMem, fAccess, uResult));
8693
8694 /* Do the lookup manually. */
8695 RTGCPTR const GCPtrFlat = (iSegReg == UINT8_MAX ? GCPtr : GCPtr + pVCpu->cpum.GstCtx.aSRegs[iSegReg].u64Base) + offDisp;
8696 uint64_t const uTagNoRev = IEMTLB_CALC_TAG_NO_REV(GCPtrFlat);
8697 PCIEMTLBENTRY pTlbe = IEMTLB_TAG_TO_EVEN_ENTRY(&pVCpu->iem.s.DataTlb, uTagNoRev);
8698 if (RT_LIKELY( pTlbe->uTag == (uTagNoRev | pVCpu->iem.s.DataTlb.uTlbRevision)
8699 || (pTlbe = pTlbe + 1)->uTag == (uTagNoRev | pVCpu->iem.s.DataTlb.uTlbRevisionGlobal)))
8700 {
8701 /*
8702 * Check TLB page table level access flags.
8703 */
8704 AssertCompile(IEMTLBE_F_PT_NO_USER == 4);
8705 uint64_t const fNoUser = (IEM_GET_CPL(pVCpu) + 1) & IEMTLBE_F_PT_NO_USER;
8706 uint64_t const fNoWriteNoDirty = !(fAccess & IEM_ACCESS_TYPE_WRITE) ? 0
8707 : IEMTLBE_F_PT_NO_WRITE | IEMTLBE_F_PT_NO_DIRTY | IEMTLBE_F_PG_NO_WRITE;
8708 uint64_t const fFlagsAndPhysRev = pTlbe->fFlagsAndPhysRev & ( IEMTLBE_F_PHYS_REV | IEMTLBE_F_NO_MAPPINGR3
8709 | IEMTLBE_F_PG_UNASSIGNED
8710 | IEMTLBE_F_PT_NO_ACCESSED
8711 | fNoWriteNoDirty | fNoUser);
8712 uint64_t const uTlbPhysRev = pVCpu->iem.s.DataTlb.uTlbPhysRev;
8713 if (RT_LIKELY(fFlagsAndPhysRev == uTlbPhysRev))
8714 {
8715 /*
8716 * Return the address.
8717 */
8718 uint8_t const * const pbAddr = &pTlbe->pbMappingR3[GCPtrFlat & GUEST_PAGE_OFFSET_MASK];
8719 if ((uintptr_t)pbAddr == uResult)
8720 return;
8721 RT_NOREF(cbMem);
8722 AssertFailed();
8723 }
8724 else
8725 AssertMsgFailed(("fFlagsAndPhysRev=%#RX64 vs uTlbPhysRev=%#RX64: %#RX64\n",
8726 fFlagsAndPhysRev, uTlbPhysRev, fFlagsAndPhysRev ^ uTlbPhysRev));
8727 }
8728 else
8729 AssertFailed();
8730 RT_BREAKPOINT();
8731}
8732
8733/* The rest of the code is in IEMN8veRecompilerTlbLookup.h. */
8734
8735
8736
8737/*********************************************************************************************************************************
8738* Recompiler Core. *
8739*********************************************************************************************************************************/
8740
8741/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
8742static DECLCALLBACK(int) iemNativeDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
8743{
8744 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
8745 pDis->cbCachedInstr += cbMaxRead;
8746 RT_NOREF(cbMinRead);
8747 return VERR_NO_DATA;
8748}
8749
8750
8751DECLHIDDEN(const char *) iemNativeDbgVCpuOffsetToName(uint32_t off)
8752{
8753 static struct { uint32_t off; const char *pszName; } const s_aMembers[] =
8754 {
8755#define ENTRY(a_Member) { (uint32_t)RT_UOFFSETOF(VMCPUCC, a_Member), #a_Member } /* cast is for stupid MSC */
8756 ENTRY(fLocalForcedActions),
8757 ENTRY(iem.s.rcPassUp),
8758 ENTRY(iem.s.fExec),
8759 ENTRY(iem.s.pbInstrBuf),
8760 ENTRY(iem.s.uInstrBufPc),
8761 ENTRY(iem.s.GCPhysInstrBuf),
8762 ENTRY(iem.s.cbInstrBufTotal),
8763 ENTRY(iem.s.idxTbCurInstr),
8764 ENTRY(iem.s.fSkippingEFlags),
8765#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING_DEBUG
8766 ENTRY(iem.s.uPcUpdatingDebug),
8767#endif
8768#ifdef VBOX_WITH_STATISTICS
8769 ENTRY(iem.s.StatNativeTlbHitsForFetch),
8770 ENTRY(iem.s.StatNativeTlbHitsForStore),
8771 ENTRY(iem.s.StatNativeTlbHitsForStack),
8772 ENTRY(iem.s.StatNativeTlbHitsForMapped),
8773 ENTRY(iem.s.StatNativeCodeTlbMissesNewPage),
8774 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPage),
8775 ENTRY(iem.s.StatNativeCodeTlbMissesNewPageWithOffset),
8776 ENTRY(iem.s.StatNativeCodeTlbHitsForNewPageWithOffset),
8777#endif
8778 ENTRY(iem.s.DataTlb.uTlbRevision),
8779 ENTRY(iem.s.DataTlb.uTlbPhysRev),
8780 ENTRY(iem.s.DataTlb.cTlbCoreHits),
8781 ENTRY(iem.s.DataTlb.cTlbInlineCodeHits),
8782 ENTRY(iem.s.DataTlb.cTlbNativeMissTag),
8783 ENTRY(iem.s.DataTlb.cTlbNativeMissFlagsAndPhysRev),
8784 ENTRY(iem.s.DataTlb.cTlbNativeMissAlignment),
8785 ENTRY(iem.s.DataTlb.cTlbNativeMissCrossPage),
8786 ENTRY(iem.s.DataTlb.cTlbNativeMissNonCanonical),
8787 ENTRY(iem.s.DataTlb.aEntries),
8788 ENTRY(iem.s.CodeTlb.uTlbRevision),
8789 ENTRY(iem.s.CodeTlb.uTlbPhysRev),
8790 ENTRY(iem.s.CodeTlb.cTlbCoreHits),
8791 ENTRY(iem.s.CodeTlb.cTlbNativeMissTag),
8792 ENTRY(iem.s.CodeTlb.cTlbNativeMissFlagsAndPhysRev),
8793 ENTRY(iem.s.CodeTlb.cTlbNativeMissAlignment),
8794 ENTRY(iem.s.CodeTlb.cTlbNativeMissCrossPage),
8795 ENTRY(iem.s.CodeTlb.cTlbNativeMissNonCanonical),
8796 ENTRY(iem.s.CodeTlb.aEntries),
8797 ENTRY(pVMR3),
8798 ENTRY(cpum.GstCtx.rax),
8799 ENTRY(cpum.GstCtx.ah),
8800 ENTRY(cpum.GstCtx.rcx),
8801 ENTRY(cpum.GstCtx.ch),
8802 ENTRY(cpum.GstCtx.rdx),
8803 ENTRY(cpum.GstCtx.dh),
8804 ENTRY(cpum.GstCtx.rbx),
8805 ENTRY(cpum.GstCtx.bh),
8806 ENTRY(cpum.GstCtx.rsp),
8807 ENTRY(cpum.GstCtx.rbp),
8808 ENTRY(cpum.GstCtx.rsi),
8809 ENTRY(cpum.GstCtx.rdi),
8810 ENTRY(cpum.GstCtx.r8),
8811 ENTRY(cpum.GstCtx.r9),
8812 ENTRY(cpum.GstCtx.r10),
8813 ENTRY(cpum.GstCtx.r11),
8814 ENTRY(cpum.GstCtx.r12),
8815 ENTRY(cpum.GstCtx.r13),
8816 ENTRY(cpum.GstCtx.r14),
8817 ENTRY(cpum.GstCtx.r15),
8818 ENTRY(cpum.GstCtx.es.Sel),
8819 ENTRY(cpum.GstCtx.es.u64Base),
8820 ENTRY(cpum.GstCtx.es.u32Limit),
8821 ENTRY(cpum.GstCtx.es.Attr),
8822 ENTRY(cpum.GstCtx.cs.Sel),
8823 ENTRY(cpum.GstCtx.cs.u64Base),
8824 ENTRY(cpum.GstCtx.cs.u32Limit),
8825 ENTRY(cpum.GstCtx.cs.Attr),
8826 ENTRY(cpum.GstCtx.ss.Sel),
8827 ENTRY(cpum.GstCtx.ss.u64Base),
8828 ENTRY(cpum.GstCtx.ss.u32Limit),
8829 ENTRY(cpum.GstCtx.ss.Attr),
8830 ENTRY(cpum.GstCtx.ds.Sel),
8831 ENTRY(cpum.GstCtx.ds.u64Base),
8832 ENTRY(cpum.GstCtx.ds.u32Limit),
8833 ENTRY(cpum.GstCtx.ds.Attr),
8834 ENTRY(cpum.GstCtx.fs.Sel),
8835 ENTRY(cpum.GstCtx.fs.u64Base),
8836 ENTRY(cpum.GstCtx.fs.u32Limit),
8837 ENTRY(cpum.GstCtx.fs.Attr),
8838 ENTRY(cpum.GstCtx.gs.Sel),
8839 ENTRY(cpum.GstCtx.gs.u64Base),
8840 ENTRY(cpum.GstCtx.gs.u32Limit),
8841 ENTRY(cpum.GstCtx.gs.Attr),
8842 ENTRY(cpum.GstCtx.rip),
8843 ENTRY(cpum.GstCtx.eflags),
8844 ENTRY(cpum.GstCtx.uRipInhibitInt),
8845 ENTRY(cpum.GstCtx.cr0),
8846 ENTRY(cpum.GstCtx.cr4),
8847 ENTRY(cpum.GstCtx.aXcr[0]),
8848 ENTRY(cpum.GstCtx.aXcr[1]),
8849#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
8850 ENTRY(cpum.GstCtx.XState.x87.MXCSR),
8851 ENTRY(cpum.GstCtx.XState.x87.aXMM[0]),
8852 ENTRY(cpum.GstCtx.XState.x87.aXMM[1]),
8853 ENTRY(cpum.GstCtx.XState.x87.aXMM[2]),
8854 ENTRY(cpum.GstCtx.XState.x87.aXMM[3]),
8855 ENTRY(cpum.GstCtx.XState.x87.aXMM[4]),
8856 ENTRY(cpum.GstCtx.XState.x87.aXMM[5]),
8857 ENTRY(cpum.GstCtx.XState.x87.aXMM[6]),
8858 ENTRY(cpum.GstCtx.XState.x87.aXMM[7]),
8859 ENTRY(cpum.GstCtx.XState.x87.aXMM[8]),
8860 ENTRY(cpum.GstCtx.XState.x87.aXMM[9]),
8861 ENTRY(cpum.GstCtx.XState.x87.aXMM[10]),
8862 ENTRY(cpum.GstCtx.XState.x87.aXMM[11]),
8863 ENTRY(cpum.GstCtx.XState.x87.aXMM[12]),
8864 ENTRY(cpum.GstCtx.XState.x87.aXMM[13]),
8865 ENTRY(cpum.GstCtx.XState.x87.aXMM[14]),
8866 ENTRY(cpum.GstCtx.XState.x87.aXMM[15]),
8867 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[0]),
8868 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[1]),
8869 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[2]),
8870 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[3]),
8871 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[4]),
8872 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[5]),
8873 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[6]),
8874 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[7]),
8875 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[8]),
8876 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[9]),
8877 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[10]),
8878 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[11]),
8879 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[12]),
8880 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[13]),
8881 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[14]),
8882 ENTRY(cpum.GstCtx.XState.u.YmmHi.aYmmHi[15])
8883#endif
8884#undef ENTRY
8885 };
8886#ifdef VBOX_STRICT
8887 static bool s_fOrderChecked = false;
8888 if (!s_fOrderChecked)
8889 {
8890 s_fOrderChecked = true;
8891 uint32_t offPrev = s_aMembers[0].off;
8892 for (unsigned i = 1; i < RT_ELEMENTS(s_aMembers); i++)
8893 {
8894 Assert(s_aMembers[i].off > offPrev);
8895 offPrev = s_aMembers[i].off;
8896 }
8897 }
8898#endif
8899
8900 /*
8901 * Binary lookup.
8902 */
8903 unsigned iStart = 0;
8904 unsigned iEnd = RT_ELEMENTS(s_aMembers);
8905 for (;;)
8906 {
8907 unsigned const iCur = iStart + (iEnd - iStart) / 2;
8908 uint32_t const offCur = s_aMembers[iCur].off;
8909 if (off < offCur)
8910 {
8911 if (iCur != iStart)
8912 iEnd = iCur;
8913 else
8914 break;
8915 }
8916 else if (off > offCur)
8917 {
8918 if (iCur + 1 < iEnd)
8919 iStart = iCur + 1;
8920 else
8921 break;
8922 }
8923 else
8924 return s_aMembers[iCur].pszName;
8925 }
8926#ifdef VBOX_WITH_STATISTICS
8927 if (off - RT_UOFFSETOF(VMCPUCC, iem.s.acThreadedFuncStats) < RT_SIZEOFMEMB(VMCPUCC, iem.s.acThreadedFuncStats))
8928 return "iem.s.acThreadedFuncStats[iFn]";
8929#endif
8930 return NULL;
8931}
8932
8933
8934/**
8935 * Translates a label to a name.
8936 */
8937static const char *iemNativeGetLabelName(IEMNATIVELABELTYPE enmLabel, bool fCommonCode /*= false*/)
8938{
8939 switch (enmLabel)
8940 {
8941#define STR_CASE_CMN(a_Label) case kIemNativeLabelType_ ## a_Label: return fCommonCode ? "Chunk_" #a_Label : #a_Label;
8942 STR_CASE_CMN(Invalid);
8943 STR_CASE_CMN(RaiseDe);
8944 STR_CASE_CMN(RaiseUd);
8945 STR_CASE_CMN(RaiseSseRelated);
8946 STR_CASE_CMN(RaiseAvxRelated);
8947 STR_CASE_CMN(RaiseSseAvxFpRelated);
8948 STR_CASE_CMN(RaiseNm);
8949 STR_CASE_CMN(RaiseGp0);
8950 STR_CASE_CMN(RaiseMf);
8951 STR_CASE_CMN(RaiseXf);
8952 STR_CASE_CMN(ObsoleteTb);
8953 STR_CASE_CMN(NeedCsLimChecking);
8954 STR_CASE_CMN(CheckBranchMiss);
8955 STR_CASE_CMN(Return);
8956#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
8957 STR_CASE_CMN(ReturnZero);
8958#endif
8959 STR_CASE_CMN(ReturnBreak);
8960 STR_CASE_CMN(ReturnBreakFF);
8961 STR_CASE_CMN(ReturnWithFlags);
8962 STR_CASE_CMN(ReturnBreakViaLookup);
8963 STR_CASE_CMN(ReturnBreakViaLookupWithIrq);
8964 STR_CASE_CMN(ReturnBreakViaLookupWithTlb);
8965 STR_CASE_CMN(ReturnBreakViaLookupWithTlbAndIrq);
8966 STR_CASE_CMN(NonZeroRetOrPassUp);
8967#undef STR_CASE_CMN
8968#define STR_CASE_LBL(a_Label) case kIemNativeLabelType_ ## a_Label: return #a_Label;
8969 STR_CASE_LBL(LoopJumpTarget);
8970 STR_CASE_LBL(If);
8971 STR_CASE_LBL(Else);
8972 STR_CASE_LBL(Endif);
8973 STR_CASE_LBL(CheckIrq);
8974 STR_CASE_LBL(TlbLookup);
8975 STR_CASE_LBL(TlbMiss);
8976 STR_CASE_LBL(TlbDone);
8977 case kIemNativeLabelType_End: break;
8978 }
8979 return NULL;
8980}
8981
8982
8983/** Info for the symbols resolver used when disassembling. */
8984typedef struct IEMNATIVDISASMSYMCTX
8985{
8986 PVMCPU pVCpu;
8987 PCIEMTB pTb;
8988# ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
8989 PCIEMNATIVEPERCHUNKCTX pCtx;
8990# endif
8991# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
8992 PCIEMTBDBG pDbgInfo;
8993# endif
8994} IEMNATIVDISASMSYMCTX;
8995typedef IEMNATIVDISASMSYMCTX *PIEMNATIVDISASMSYMCTX;
8996
8997
8998/**
8999 * Resolve address to symbol, if we can.
9000 */
9001static const char *iemNativeDisasmGetSymbol(PIEMNATIVDISASMSYMCTX pSymCtx, uintptr_t uAddress, char *pszBuf, size_t cbBuf)
9002{
9003#if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || defined(IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE)
9004 PCIEMTB const pTb = pSymCtx->pTb;
9005 uintptr_t const offNative = (uAddress - (uintptr_t)pTb->Native.paInstructions) / sizeof(IEMNATIVEINSTR);
9006 if (offNative <= pTb->Native.cInstructions)
9007 {
9008# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9009 /*
9010 * Scan debug info for a matching label.
9011 * Since the debug info should be 100% linear, we can do a binary search here.
9012 */
9013 PCIEMTBDBG const pDbgInfo = pSymCtx->pDbgInfo;
9014 if (pDbgInfo)
9015 {
9016 uint32_t const cEntries = pDbgInfo->cEntries;
9017 uint32_t idxEnd = cEntries;
9018 uint32_t idxStart = 0;
9019 for (;;)
9020 {
9021 /* Find a NativeOffset record close to the midpoint. */
9022 uint32_t idx = idxStart + (idxEnd - idxStart) / 2;
9023 while (idx > idxStart && pDbgInfo->aEntries[idx].Gen.uType != kIemTbDbgEntryType_NativeOffset)
9024 idx--;
9025 if (pDbgInfo->aEntries[idx].Gen.uType != kIemTbDbgEntryType_NativeOffset)
9026 {
9027 idx = idxStart + (idxEnd - idxStart) / 2 + 1;
9028 while (idx < idxEnd && pDbgInfo->aEntries[idx].Gen.uType != kIemTbDbgEntryType_NativeOffset)
9029 idx++;
9030 if (idx >= idxEnd)
9031 break;
9032 }
9033
9034 /* Do the binary searching thing. */
9035 if (offNative < pDbgInfo->aEntries[idx].NativeOffset.offNative)
9036 {
9037 if (idx > idxStart)
9038 idxEnd = idx;
9039 else
9040 break;
9041 }
9042 else if (offNative > pDbgInfo->aEntries[idx].NativeOffset.offNative)
9043 {
9044 idx += 1;
9045 if (idx < idxEnd)
9046 idxStart = idx;
9047 else
9048 break;
9049 }
9050 else
9051 {
9052 /* Got a matching offset, scan forward till we hit a label, but
9053 stop when the native offset changes. */
9054 while (++idx < cEntries)
9055 switch (pDbgInfo->aEntries[idx].Gen.uType)
9056 {
9057 case kIemTbDbgEntryType_Label:
9058 {
9059 IEMNATIVELABELTYPE const enmLabel = (IEMNATIVELABELTYPE)pDbgInfo->aEntries[idx].Label.enmLabel;
9060 const char * const pszName = iemNativeGetLabelName(enmLabel);
9061 if (enmLabel < kIemNativeLabelType_FirstWithMultipleInstances)
9062 return pszName;
9063 RTStrPrintf(pszBuf, cbBuf, "%s_%u", pszName, pDbgInfo->aEntries[idx].Label.uData);
9064 return pszBuf;
9065 }
9066
9067 case kIemTbDbgEntryType_NativeOffset:
9068 if (pDbgInfo->aEntries[idx].NativeOffset.offNative != offNative)
9069 return NULL;
9070 break;
9071 }
9072 break;
9073 }
9074 }
9075 }
9076# endif
9077 }
9078# ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
9079 else
9080 {
9081 PCIEMNATIVEPERCHUNKCTX const pChunkCtx = pSymCtx->pCtx;
9082 if (pChunkCtx)
9083 for (uint32_t i = 1; i < RT_ELEMENTS(pChunkCtx->apExitLabels); i++)
9084 if ((PIEMNATIVEINSTR)uAddress == pChunkCtx->apExitLabels[i])
9085 return iemNativeGetLabelName((IEMNATIVELABELTYPE)i, true /*fCommonCode*/);
9086 }
9087# endif
9088#endif
9089 RT_NOREF(pSymCtx, uAddress, pszBuf, cbBuf);
9090 return NULL;
9091}
9092
9093#ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9094
9095/**
9096 * @callback_method_impl{FNDISGETSYMBOL}
9097 */
9098static DECLCALLBACK(int) iemNativeDisasmGetSymbolCb(PCDISSTATE pDis, uint32_t u32Sel, RTUINTPTR uAddress,
9099 char *pszBuf, size_t cchBuf, RTINTPTR *poff, void *pvUser)
9100{
9101 const char * const pszSym = iemNativeDisasmGetSymbol((PIEMNATIVDISASMSYMCTX)pvUser, uAddress, pszBuf, cchBuf);
9102 if (pszSym)
9103 {
9104 *poff = 0;
9105 if (pszSym != pszBuf)
9106 return RTStrCopy(pszBuf, cchBuf, pszSym);
9107 return VINF_SUCCESS;
9108 }
9109 RT_NOREF(pDis, u32Sel);
9110 return VERR_SYMBOL_NOT_FOUND;
9111}
9112
9113#else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9114
9115/**
9116 * Annotates an instruction decoded by the capstone disassembler.
9117 */
9118static const char *
9119iemNativeDisasmAnnotateCapstone(PIEMNATIVDISASMSYMCTX pSymCtx, cs_insn const *pInstr, char *pszBuf, size_t cchBuf)
9120{
9121# if defined(RT_ARCH_ARM64)
9122 if ( (pInstr->id >= ARM64_INS_LD1 && pInstr->id < ARM64_INS_LSL)
9123 || (pInstr->id >= ARM64_INS_ST1 && pInstr->id < ARM64_INS_SUB))
9124 {
9125 /* This is bit crappy, but the disassembler provides incomplete addressing details. */
9126 AssertCompile(IEMNATIVE_REG_FIXED_PVMCPU == 28 && IEMNATIVE_REG_FIXED_PCPUMCTX == 27);
9127 char const *psz = strchr(pInstr->op_str, '[');
9128 if (psz && psz[1] == 'x' && psz[2] == '2' && (psz[3] == '7' || psz[3] == '8'))
9129 {
9130 uint32_t const offVCpu = psz[3] == '8'? 0 : RT_UOFFSETOF(VMCPU, cpum.GstCtx);
9131 int32_t off = -1;
9132 psz += 4;
9133 if (*psz == ']')
9134 off = 0;
9135 else if (*psz == ',')
9136 {
9137 psz = RTStrStripL(psz + 1);
9138 if (*psz == '#')
9139 off = RTStrToInt32(&psz[1]);
9140 /** @todo deal with index registers and LSL as well... */
9141 }
9142 if (off >= 0)
9143 return iemNativeDbgVCpuOffsetToName(offVCpu + (uint32_t)off);
9144 }
9145 }
9146 else if (pInstr->id == ARM64_INS_B || pInstr->id == ARM64_INS_BL)
9147 {
9148 const char *pszAddr = strchr(pInstr->op_str, '#');
9149 if (pszAddr)
9150 {
9151 uint64_t uAddr = RTStrToUInt64(pszAddr + 1);
9152 if (uAddr != 0)
9153 return iemNativeDisasmGetSymbol(pSymCtx, uAddr, pszBuf, cchBuf);
9154 }
9155 }
9156# endif
9157 RT_NOREF(pSymCtx, pInstr, pszBuf, cchBuf);
9158 return NULL;
9159}
9160#endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9161
9162
9163DECLHIDDEN(void) iemNativeDisassembleTb(PVMCPU pVCpu, PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
9164{
9165 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
9166#if defined(RT_ARCH_AMD64)
9167 static const char * const a_apszMarkers[] =
9168 {
9169 /*[0]=*/ "unknown0", "CheckCsLim", "ConsiderLimChecking", "CheckOpcodes",
9170 /*[4]=*/ "PcAfterBranch", "LoadTlbForNewPage", "LoadTlbAfterBranch"
9171 };
9172#endif
9173
9174 char szDisBuf[512];
9175 DISSTATE Dis;
9176 PCIEMNATIVEINSTR const paNative = pTb->Native.paInstructions;
9177 uint32_t const cNative = pTb->Native.cInstructions;
9178 uint32_t offNative = 0;
9179#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9180 PCIEMTBDBG const pDbgInfo = pTb->pDbgInfo;
9181#endif
9182 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
9183 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
9184 : DISCPUMODE_64BIT;
9185#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
9186# ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9187 IEMNATIVDISASMSYMCTX SymCtx = { pVCpu, pTb, iemExecMemGetTbChunkCtx(pVCpu, pTb), pDbgInfo };
9188# else
9189 IEMNATIVDISASMSYMCTX SymCtx = { pVCpu, pTb, iemExecMemGetTbChunkCtx(pVCpu, pTb) };
9190# endif
9191#elif defined(IEMNATIVE_WITH_TB_DEBUG_INFO)
9192 IEMNATIVDISASMSYMCTX SymCtx = { pVCpu, pTb, pDbgInfo };
9193#else
9194 IEMNATIVDISASMSYMCTX SymCtx = { pVCpu, pTb };
9195#endif
9196#if defined(RT_ARCH_AMD64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
9197 DISCPUMODE const enmHstCpuMode = DISCPUMODE_64BIT;
9198#elif defined(RT_ARCH_ARM64) && !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
9199 DISCPUMODE const enmHstCpuMode = DISCPUMODE_ARMV8_A64;
9200#elif !defined(VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER)
9201# error "Port me"
9202#else
9203 csh hDisasm = ~(size_t)0;
9204# if defined(RT_ARCH_AMD64)
9205 cs_err rcCs = cs_open(CS_ARCH_X86, CS_MODE_LITTLE_ENDIAN | CS_MODE_64, &hDisasm);
9206# elif defined(RT_ARCH_ARM64)
9207 cs_err rcCs = cs_open(CS_ARCH_ARM64, CS_MODE_LITTLE_ENDIAN, &hDisasm);
9208# else
9209# error "Port me"
9210# endif
9211 AssertMsgReturnVoid(rcCs == CS_ERR_OK, ("%d (%#x)\n", rcCs, rcCs));
9212
9213 //rcCs = cs_option(hDisasm, CS_OPT_DETAIL, CS_OPT_ON); - not needed as pInstr->detail doesn't provide full memory detail.
9214 //Assert(rcCs == CS_ERR_OK);
9215#endif
9216
9217 /*
9218 * Print TB info.
9219 */
9220 pHlp->pfnPrintf(pHlp,
9221 "pTb=%p: GCPhysPc=%RGp (%%%RGv) cInstructions=%u LB %#x cRanges=%u\n"
9222 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
9223 pTb, pTb->GCPhysPc,
9224#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9225 pTb->pDbgInfo ? pTb->pDbgInfo->FlatPc : RTGCPTR_MAX,
9226#else
9227 pTb->FlatPc,
9228#endif
9229 pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges,
9230 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
9231#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
9232 if (pDbgInfo && pDbgInfo->cEntries > 1)
9233 {
9234 Assert(pDbgInfo->aEntries[0].Gen.uType == kIemTbDbgEntryType_NativeOffset);
9235
9236 /*
9237 * This disassembly is driven by the debug info which follows the native
9238 * code and indicates when it starts with the next guest instructions,
9239 * where labels are and such things.
9240 */
9241 uint32_t idxThreadedCall = 0;
9242 uint32_t idxGuestInstr = 0;
9243 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
9244 uint8_t idxRange = UINT8_MAX;
9245 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
9246 uint32_t offRange = 0;
9247 uint32_t offOpcodes = 0;
9248 uint32_t const cbOpcodes = pTb->cbOpcodes;
9249 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
9250 uint32_t const cDbgEntries = pDbgInfo->cEntries;
9251 uint32_t iDbgEntry = 1;
9252 uint32_t offDbgNativeNext = pDbgInfo->aEntries[0].NativeOffset.offNative;
9253
9254 while (offNative < cNative)
9255 {
9256 /* If we're at or have passed the point where the next chunk of debug
9257 info starts, process it. */
9258 if (offDbgNativeNext <= offNative)
9259 {
9260 offDbgNativeNext = UINT32_MAX;
9261 for (; iDbgEntry < cDbgEntries; iDbgEntry++)
9262 {
9263 switch (pDbgInfo->aEntries[iDbgEntry].Gen.uType)
9264 {
9265 case kIemTbDbgEntryType_GuestInstruction:
9266 {
9267 /* Did the exec flag change? */
9268 if (fExec != pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec)
9269 {
9270 pHlp->pfnPrintf(pHlp,
9271 " fExec change %#08x -> %#08x %s\n",
9272 fExec, pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
9273 iemTbFlagsToString(pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec,
9274 szDisBuf, sizeof(szDisBuf)));
9275 fExec = pDbgInfo->aEntries[iDbgEntry].GuestInstruction.fExec;
9276 enmGstCpuMode = (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
9277 : (fExec & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
9278 : DISCPUMODE_64BIT;
9279 }
9280
9281 /* New opcode range? We need to fend up a spurious debug info entry here for cases
9282 where the compilation was aborted before the opcode was recorded and the actual
9283 instruction was translated to a threaded call. This may happen when we run out
9284 of ranges, or when some complicated interrupts/FFs are found to be pending or
9285 similar. So, we just deal with it here rather than in the compiler code as it
9286 is a lot simpler to do here. */
9287 if ( idxRange == UINT8_MAX
9288 || idxRange >= cRanges
9289 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
9290 {
9291 idxRange += 1;
9292 if (idxRange < cRanges)
9293 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
9294 else
9295 continue;
9296 Assert(offOpcodes == pTb->aRanges[idxRange].offOpcodes + offRange);
9297 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
9298 + (pTb->aRanges[idxRange].idxPhysPage == 0
9299 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
9300 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
9301 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
9302 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
9303 pTb->aRanges[idxRange].idxPhysPage);
9304 GCPhysPc += offRange;
9305 }
9306
9307 /* Disassemble the instruction. */
9308 //uint8_t const cbInstrMax = RT_MIN(pTb->aRanges[idxRange].cbOpcodes - offRange, 15);
9309 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
9310 uint32_t cbInstr = 1;
9311 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
9312 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
9313 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
9314 if (RT_SUCCESS(rc))
9315 {
9316 size_t cch = DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9317 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9318 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9319 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9320
9321 static unsigned const s_offMarker = 55;
9322 static char const s_szMarker[] = " ; <--- guest";
9323 if (cch < s_offMarker)
9324 {
9325 memset(&szDisBuf[cch], ' ', s_offMarker - cch);
9326 cch = s_offMarker;
9327 }
9328 if (cch + sizeof(s_szMarker) <= sizeof(szDisBuf))
9329 memcpy(&szDisBuf[cch], s_szMarker, sizeof(s_szMarker));
9330
9331 pHlp->pfnPrintf(pHlp, "\n %%%%%RGp: %s #%u\n", GCPhysPc, szDisBuf, idxGuestInstr);
9332 }
9333 else
9334 {
9335 pHlp->pfnPrintf(pHlp, "\n %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
9336 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
9337 cbInstr = 1;
9338 }
9339 idxGuestInstr++;
9340 GCPhysPc += cbInstr;
9341 offOpcodes += cbInstr;
9342 offRange += cbInstr;
9343 continue;
9344 }
9345
9346 case kIemTbDbgEntryType_ThreadedCall:
9347 pHlp->pfnPrintf(pHlp,
9348 " Call #%u to %s (%u args) - %s\n",
9349 idxThreadedCall,
9350 g_apszIemThreadedFunctions[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
9351 g_acIemThreadedFunctionUsedArgs[pDbgInfo->aEntries[iDbgEntry].ThreadedCall.enmCall],
9352 pDbgInfo->aEntries[iDbgEntry].ThreadedCall.fRecompiled ? "recompiled" : "todo");
9353 idxThreadedCall++;
9354 continue;
9355
9356 case kIemTbDbgEntryType_GuestRegShadowing:
9357 {
9358 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
9359 const char * const pszGstReg = g_aGstShadowInfo[pEntry->GuestRegShadowing.idxGstReg].pszName;
9360 if (pEntry->GuestRegShadowing.idxHstReg == UINT8_MAX)
9361 pHlp->pfnPrintf(pHlp, " Guest register %s != host register %s\n", pszGstReg,
9362 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
9363 else if (pEntry->GuestRegShadowing.idxHstRegPrev == UINT8_MAX)
9364 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s \n", pszGstReg,
9365 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg]);
9366 else
9367 pHlp->pfnPrintf(pHlp, " Guest register %s == host register %s (previously in %s)\n", pszGstReg,
9368 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstReg],
9369 g_apszIemNativeHstRegNames[pEntry->GuestRegShadowing.idxHstRegPrev]);
9370 continue;
9371 }
9372
9373#ifdef IEMNATIVE_WITH_SIMD_REG_ALLOCATOR
9374 case kIemTbDbgEntryType_GuestSimdRegShadowing:
9375 {
9376 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
9377 const char * const pszGstReg = g_aGstSimdShadowInfo[pEntry->GuestSimdRegShadowing.idxGstSimdReg].pszName;
9378 if (pEntry->GuestSimdRegShadowing.idxHstSimdReg == UINT8_MAX)
9379 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s != host SIMD register %s\n", pszGstReg,
9380 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
9381 else if (pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev == UINT8_MAX)
9382 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s\n", pszGstReg,
9383 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg]);
9384 else
9385 pHlp->pfnPrintf(pHlp, " Guest SIMD register %s == host SIMD register %s (previously in %s)\n", pszGstReg,
9386 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdReg],
9387 g_apszIemNativeHstSimdRegNames[pEntry->GuestSimdRegShadowing.idxHstSimdRegPrev]);
9388 continue;
9389 }
9390#endif
9391
9392 case kIemTbDbgEntryType_Label:
9393 {
9394 const char *pszName = iemNativeGetLabelName((IEMNATIVELABELTYPE)pDbgInfo->aEntries[iDbgEntry].Label.enmLabel);
9395 if (pDbgInfo->aEntries[iDbgEntry].Label.enmLabel >= kIemNativeLabelType_FirstWithMultipleInstances)
9396 {
9397 const char *pszComment = pDbgInfo->aEntries[iDbgEntry].Label.enmLabel == kIemNativeLabelType_Else
9398 ? " ; regs state restored pre-if-block" : "";
9399 pHlp->pfnPrintf(pHlp, " %s_%u:%s\n", pszName, pDbgInfo->aEntries[iDbgEntry].Label.uData, pszComment);
9400 }
9401 else
9402 pHlp->pfnPrintf(pHlp, " %s:\n", pszName);
9403 continue;
9404 }
9405
9406 case kIemTbDbgEntryType_NativeOffset:
9407 offDbgNativeNext = pDbgInfo->aEntries[iDbgEntry].NativeOffset.offNative;
9408 Assert(offDbgNativeNext >= offNative);
9409 break;
9410
9411#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
9412 case kIemTbDbgEntryType_DelayedPcUpdate:
9413 pHlp->pfnPrintf(pHlp, " Updating guest PC value by %u (cInstrSkipped=%u)\n",
9414 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.offPc,
9415 pDbgInfo->aEntries[iDbgEntry].DelayedPcUpdate.cInstrSkipped);
9416 continue;
9417#endif
9418
9419#ifdef IEMNATIVE_WITH_DELAYED_REGISTER_WRITEBACK
9420 case kIemTbDbgEntryType_GuestRegDirty:
9421 {
9422 PCIEMTBDBGENTRY const pEntry = &pDbgInfo->aEntries[iDbgEntry];
9423 const char * const pszGstReg = pEntry->GuestRegDirty.fSimdReg
9424 ? g_aGstSimdShadowInfo[pEntry->GuestRegDirty.idxGstReg].pszName
9425 : g_aGstShadowInfo[pEntry->GuestRegDirty.idxGstReg].pszName;
9426 const char * const pszHstReg = pEntry->GuestRegDirty.fSimdReg
9427 ? g_apszIemNativeHstSimdRegNames[pEntry->GuestRegDirty.idxHstReg]
9428 : g_apszIemNativeHstRegNames[pEntry->GuestRegDirty.idxHstReg];
9429 pHlp->pfnPrintf(pHlp, " Guest register %s (shadowed by %s) is now marked dirty (intent)\n",
9430 pszGstReg, pszHstReg);
9431 continue;
9432 }
9433
9434 case kIemTbDbgEntryType_GuestRegWriteback:
9435 pHlp->pfnPrintf(pHlp, " Writing dirty %s registers (gst %#RX32)\n",
9436 pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.fSimdReg ? "SIMD" : "general",
9437 (uint64_t)pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.fGstReg
9438 << (pDbgInfo->aEntries[iDbgEntry].GuestRegWriteback.cShift * 25));
9439 continue;
9440#endif
9441
9442 default:
9443 AssertFailed();
9444 }
9445 iDbgEntry++;
9446 break;
9447 }
9448 }
9449
9450 /*
9451 * Disassemble the next native instruction.
9452 */
9453 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
9454# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9455 uint32_t cbInstr = sizeof(paNative[0]);
9456 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
9457 if (RT_SUCCESS(rc))
9458 {
9459# if defined(RT_ARCH_AMD64)
9460 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
9461 {
9462 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
9463 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
9464 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: call #%u to %s (%u args) - %s\n",
9465 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
9466 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
9467 uInfo & 0x8000 ? "recompiled" : "todo");
9468 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
9469 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
9470 else
9471 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
9472 }
9473 else
9474# endif
9475 {
9476 const char *pszAnnotation = NULL;
9477# ifdef RT_ARCH_AMD64
9478 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9479 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9480 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9481 iemNativeDisasmGetSymbolCb, &SymCtx);
9482 PCDISOPPARAM pMemOp;
9483 if (DISUSE_IS_EFFECTIVE_ADDR(Dis.aParams[0].fUse))
9484 pMemOp = &Dis.aParams[0];
9485 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.aParams[1].fUse))
9486 pMemOp = &Dis.aParams[1];
9487 else if (DISUSE_IS_EFFECTIVE_ADDR(Dis.aParams[2].fUse))
9488 pMemOp = &Dis.aParams[2];
9489 else
9490 pMemOp = NULL;
9491 if ( pMemOp
9492 && pMemOp->x86.Base.idxGenReg == IEMNATIVE_REG_FIXED_PVMCPU
9493 && (pMemOp->fUse & (DISUSE_BASE | DISUSE_REG_GEN64)) == (DISUSE_BASE | DISUSE_REG_GEN64))
9494 pszAnnotation = iemNativeDbgVCpuOffsetToName(pMemOp->fUse & DISUSE_DISPLACEMENT32
9495 ? pMemOp->x86.uDisp.u32 : pMemOp->x86.uDisp.u8);
9496
9497# elif defined(RT_ARCH_ARM64)
9498 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
9499 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9500 iemNativeDisasmGetSymbolCb, &SymCtx);
9501# else
9502# error "Port me"
9503# endif
9504 if (pszAnnotation)
9505 {
9506 static unsigned const s_offAnnotation = 55;
9507 size_t const cchAnnotation = strlen(pszAnnotation);
9508 size_t cchDis = strlen(szDisBuf);
9509 if (RT_MAX(cchDis, s_offAnnotation) + sizeof(" ; ") + cchAnnotation <= sizeof(szDisBuf))
9510 {
9511 if (cchDis < s_offAnnotation)
9512 {
9513 memset(&szDisBuf[cchDis], ' ', s_offAnnotation - cchDis);
9514 cchDis = s_offAnnotation;
9515 }
9516 szDisBuf[cchDis++] = ' ';
9517 szDisBuf[cchDis++] = ';';
9518 szDisBuf[cchDis++] = ' ';
9519 memcpy(&szDisBuf[cchDis], pszAnnotation, cchAnnotation + 1);
9520 }
9521 }
9522 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
9523 }
9524 }
9525 else
9526 {
9527# if defined(RT_ARCH_AMD64)
9528 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
9529 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
9530# elif defined(RT_ARCH_ARM64)
9531 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
9532# else
9533# error "Port me"
9534# endif
9535 cbInstr = sizeof(paNative[0]);
9536 }
9537 offNative += cbInstr / sizeof(paNative[0]);
9538
9539# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9540 cs_insn *pInstr;
9541 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
9542 (uintptr_t)pNativeCur, 1, &pInstr);
9543 if (cInstrs > 0)
9544 {
9545 Assert(cInstrs == 1);
9546 const char * const pszAnnotation = iemNativeDisasmAnnotateCapstone(&SymCtx, pInstr, szDisBuf, sizeof(szDisBuf));
9547 size_t const cchOp = strlen(pInstr->op_str);
9548# if defined(RT_ARCH_AMD64)
9549 if (pszAnnotation)
9550 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s%*s ; %s\n",
9551 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str,
9552 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9553 else
9554 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
9555 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
9556
9557# else
9558 if (pszAnnotation)
9559 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s%*s ; %s\n",
9560 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str,
9561 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9562 else
9563 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
9564 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
9565# endif
9566 offNative += pInstr->size / sizeof(*pNativeCur);
9567 cs_free(pInstr, cInstrs);
9568 }
9569 else
9570 {
9571# if defined(RT_ARCH_AMD64)
9572 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
9573 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
9574# else
9575 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
9576# endif
9577 offNative++;
9578 }
9579# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9580 }
9581 }
9582 else
9583#endif /* IEMNATIVE_WITH_TB_DEBUG_INFO */
9584 {
9585 /*
9586 * No debug info, just disassemble the x86 code and then the native code.
9587 *
9588 * First the guest code:
9589 */
9590 for (unsigned i = 0; i < pTb->cRanges; i++)
9591 {
9592 RTGCPHYS GCPhysPc = pTb->aRanges[i].offPhysPage
9593 + (pTb->aRanges[i].idxPhysPage == 0
9594 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
9595 : pTb->aGCPhysPages[pTb->aRanges[i].idxPhysPage - 1]);
9596 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
9597 i, GCPhysPc, pTb->aRanges[i].cbOpcodes, pTb->aRanges[i].idxPhysPage);
9598 unsigned off = pTb->aRanges[i].offOpcodes;
9599 /** @todo this ain't working when crossing pages! */
9600 unsigned const cbOpcodes = pTb->aRanges[i].cbOpcodes + off;
9601 while (off < cbOpcodes)
9602 {
9603 uint32_t cbInstr = 1;
9604 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
9605 &pTb->pabOpcodes[off], cbOpcodes - off,
9606 iemNativeDisasReadBytesDummy, NULL, &Dis, &cbInstr);
9607 if (RT_SUCCESS(rc))
9608 {
9609 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9610 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9611 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9612 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
9613 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
9614 GCPhysPc += cbInstr;
9615 off += cbInstr;
9616 }
9617 else
9618 {
9619 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - disassembly failure %Rrc\n",
9620 GCPhysPc, cbOpcodes - off, &pTb->pabOpcodes[off], rc);
9621 break;
9622 }
9623 }
9624 }
9625
9626 /*
9627 * Then the native code:
9628 */
9629 pHlp->pfnPrintf(pHlp, " Native code %p L %#x\n", paNative, cNative);
9630 while (offNative < cNative)
9631 {
9632 PCIEMNATIVEINSTR const pNativeCur = &paNative[offNative];
9633# ifndef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9634 uint32_t cbInstr = sizeof(paNative[0]);
9635 int const rc = DISInstr(pNativeCur, enmHstCpuMode, &Dis, &cbInstr);
9636 if (RT_SUCCESS(rc))
9637 {
9638# if defined(RT_ARCH_AMD64)
9639 if (Dis.pCurInstr->uOpcode == OP_NOP && cbInstr == 7) /* iemNativeEmitMarker */
9640 {
9641 uint32_t const uInfo = *(uint32_t const *)&Dis.Instr.ab[3];
9642 if (RT_HIWORD(uInfo) < kIemThreadedFunc_End)
9643 pHlp->pfnPrintf(pHlp, "\n %p: nop ; marker: call #%u to %s (%u args) - %s\n",
9644 pNativeCur, uInfo & 0x7fff, g_apszIemThreadedFunctions[RT_HIWORD(uInfo)],
9645 g_acIemThreadedFunctionUsedArgs[RT_HIWORD(uInfo)],
9646 uInfo & 0x8000 ? "recompiled" : "todo");
9647 else if ((uInfo & ~RT_BIT_32(31)) < RT_ELEMENTS(a_apszMarkers))
9648 pHlp->pfnPrintf(pHlp, " %p: nop ; marker: %s\n", pNativeCur, a_apszMarkers[uInfo & ~RT_BIT_32(31)]);
9649 else
9650 pHlp->pfnPrintf(pHlp, " %p: nop ; unknown marker: %#x (%d)\n", pNativeCur, uInfo, uInfo);
9651 }
9652 else
9653# endif
9654 {
9655# ifdef RT_ARCH_AMD64
9656 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
9657 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
9658 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9659 iemNativeDisasmGetSymbolCb, &SymCtx);
9660# elif defined(RT_ARCH_ARM64)
9661 DISFormatArmV8Ex(&Dis, szDisBuf, sizeof(szDisBuf),
9662 DIS_FMT_FLAGS_BYTES_LEFT | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
9663 iemNativeDisasmGetSymbolCb, &SymCtx);
9664# else
9665# error "Port me"
9666# endif
9667 pHlp->pfnPrintf(pHlp, " %p: %s\n", pNativeCur, szDisBuf);
9668 }
9669 }
9670 else
9671 {
9672# if defined(RT_ARCH_AMD64)
9673 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %Rrc\n",
9674 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, rc);
9675# else
9676 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %Rrc\n", pNativeCur, *pNativeCur, rc);
9677# endif
9678 cbInstr = sizeof(paNative[0]);
9679 }
9680 offNative += cbInstr / sizeof(paNative[0]);
9681
9682# else /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9683 cs_insn *pInstr;
9684 size_t cInstrs = cs_disasm(hDisasm, (const uint8_t *)pNativeCur, (cNative - offNative) * sizeof(*pNativeCur),
9685 (uintptr_t)pNativeCur, 1, &pInstr);
9686 if (cInstrs > 0)
9687 {
9688 Assert(cInstrs == 1);
9689 const char * const pszAnnotation = iemNativeDisasmAnnotateCapstone(&SymCtx, pInstr, szDisBuf, sizeof(szDisBuf));
9690 size_t const cchOp = strlen(pInstr->op_str);
9691# if defined(RT_ARCH_AMD64)
9692 if (pszAnnotation)
9693 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s%*s ; %s\n",
9694 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str,
9695 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9696 else
9697 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs %-7s %s\n",
9698 pNativeCur, pInstr->size, pNativeCur, pInstr->mnemonic, pInstr->op_str);
9699
9700# else
9701 if (pszAnnotation)
9702 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s%*s ; %s\n",
9703 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str,
9704 cchOp < 55 ? 55 - cchOp : 0, "", pszAnnotation);
9705 else
9706 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 %-7s %s\n",
9707 pNativeCur, *pNativeCur, pInstr->mnemonic, pInstr->op_str);
9708# endif
9709 offNative += pInstr->size / sizeof(*pNativeCur);
9710 cs_free(pInstr, cInstrs);
9711 }
9712 else
9713 {
9714# if defined(RT_ARCH_AMD64)
9715 pHlp->pfnPrintf(pHlp, " %p: %.*Rhxs - disassembly failure %d\n",
9716 pNativeCur, RT_MIN(cNative - offNative, 16), pNativeCur, cs_errno(hDisasm)));
9717# else
9718 pHlp->pfnPrintf(pHlp, " %p: %#010RX32 - disassembly failure %d\n", pNativeCur, *pNativeCur, cs_errno(hDisasm));
9719# endif
9720 offNative++;
9721 }
9722# endif /* VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER */
9723 }
9724 }
9725
9726#ifdef VBOX_WITH_IEM_USING_CAPSTONE_DISASSEMBLER
9727 /* Cleanup. */
9728 cs_close(&hDisasm);
9729#endif
9730}
9731
9732
9733#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
9734
9735/** Emit alignment padding between labels / functions. */
9736DECL_INLINE_THROW(uint32_t)
9737iemNativeRecompileEmitAlignmentPadding(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint32_t fAlignMask)
9738{
9739 if (off & fAlignMask)
9740 {
9741 PIEMNATIVEINSTR pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, fAlignMask + 1);
9742 while (off & fAlignMask)
9743# if defined(RT_ARCH_AMD64)
9744 pCodeBuf[off++] = 0xcc;
9745# elif defined(RT_ARCH_ARM64)
9746 pCodeBuf[off++] = Armv8A64MkInstrBrk(0xcccc);
9747# else
9748# error "port me"
9749# endif
9750 }
9751 return off;
9752}
9753
9754
9755/**
9756 * Called when a new chunk is allocate to emit common per-chunk code.
9757 *
9758 * Allocates a per-chunk context directly from the chunk itself and place the
9759 * common code there.
9760 *
9761 * @returns Pointer to the chunk context start.
9762 * @param pVCpu The cross context virtual CPU structure of the calling
9763 * thread.
9764 * @param idxChunk The index of the chunk being added and requiring a
9765 * common code context.
9766 */
9767DECLHIDDEN(PCIEMNATIVEPERCHUNKCTX) iemNativeRecompileAttachExecMemChunkCtx(PVMCPU pVCpu, uint32_t idxChunk)
9768{
9769 /*
9770 * Allocate a new recompiler state (since we're likely to be called while
9771 * the default one is fully loaded already with a recompiled TB).
9772 *
9773 * This is a bit of overkill, but this isn't a frequently used code path.
9774 */
9775 PIEMRECOMPILERSTATE pReNative = iemNativeInit(pVCpu, NULL);
9776 AssertReturn(pReNative, NULL);
9777
9778# if defined(RT_ARCH_AMD64)
9779 uint32_t const fAlignMask = 15;
9780# elif defined(RT_ARCH_ARM64)
9781 uint32_t const fAlignMask = 31 / 4;
9782# else
9783# error "port me"
9784# endif
9785 uint32_t aoffLabels[kIemNativeLabelType_LastTbExit + 1] = {0};
9786 int rc = VINF_SUCCESS;
9787 uint32_t off = 0;
9788
9789 IEMNATIVE_TRY_SETJMP(pReNative, rc)
9790 {
9791 /*
9792 * Emit the epilog code.
9793 */
9794 aoffLabels[kIemNativeLabelType_ReturnZero] = off;
9795 off = iemNativeEmitGprZero(pReNative, off, IEMNATIVE_CALL_RET_GREG);
9796
9797 aoffLabels[kIemNativeLabelType_Return] = off;
9798 off = iemNativeEmitCoreEpilog(pReNative, off);
9799
9800 /*
9801 * Generate special jump labels. All of these gets a copy of the epilog code.
9802 */
9803 static struct
9804 {
9805 IEMNATIVELABELTYPE enmExitReason;
9806 uint32_t (*pfnEmitCore)(PIEMRECOMPILERSTATE pReNative, uint32_t off);
9807 } const s_aSpecialWithEpilogs[] =
9808 {
9809 { kIemNativeLabelType_NonZeroRetOrPassUp, iemNativeEmitCoreRcFiddling },
9810 { kIemNativeLabelType_ReturnBreak, iemNativeEmitCoreReturnBreak },
9811 { kIemNativeLabelType_ReturnBreakFF, iemNativeEmitCoreReturnBreakFF },
9812 { kIemNativeLabelType_ReturnWithFlags, iemNativeEmitCoreReturnWithFlags },
9813 };
9814 for (uint32_t i = 0; i < RT_ELEMENTS(s_aSpecialWithEpilogs); i++)
9815 {
9816 off = iemNativeRecompileEmitAlignmentPadding(pReNative, off, fAlignMask);
9817 Assert(aoffLabels[s_aSpecialWithEpilogs[i].enmExitReason] == 0);
9818 aoffLabels[s_aSpecialWithEpilogs[i].enmExitReason] = off;
9819 off = s_aSpecialWithEpilogs[i].pfnEmitCore(pReNative, off);
9820 off = iemNativeEmitCoreEpilog(pReNative, off);
9821 }
9822
9823 /*
9824 * Do what iemNativeEmitReturnBreakViaLookup does.
9825 */
9826 static struct
9827 {
9828 IEMNATIVELABELTYPE enmExitReason;
9829 uintptr_t pfnHelper;
9830 } const s_aViaLookup[] =
9831 {
9832 { kIemNativeLabelType_ReturnBreakViaLookup,
9833 (uintptr_t)iemNativeHlpReturnBreakViaLookup<false /*a_fWithIrqCheck*/> },
9834 { kIemNativeLabelType_ReturnBreakViaLookupWithIrq,
9835 (uintptr_t)iemNativeHlpReturnBreakViaLookup<true /*a_fWithIrqCheck*/> },
9836 { kIemNativeLabelType_ReturnBreakViaLookupWithTlb,
9837 (uintptr_t)iemNativeHlpReturnBreakViaLookupWithTlb<false /*a_fWithIrqCheck*/> },
9838 { kIemNativeLabelType_ReturnBreakViaLookupWithTlbAndIrq,
9839 (uintptr_t)iemNativeHlpReturnBreakViaLookupWithTlb<true /*a_fWithIrqCheck*/> },
9840 };
9841 uint32_t const offReturnBreak = aoffLabels[kIemNativeLabelType_ReturnBreak]; Assert(offReturnBreak != 0);
9842 for (uint32_t i = 0; i < RT_ELEMENTS(s_aViaLookup); i++)
9843 {
9844 off = iemNativeRecompileEmitAlignmentPadding(pReNative, off, fAlignMask);
9845 Assert(aoffLabels[s_aViaLookup[i].enmExitReason] == 0);
9846 aoffLabels[s_aViaLookup[i].enmExitReason] = off;
9847 off = iemNativeEmitCoreViaLookupDoOne(pReNative, off, offReturnBreak, s_aViaLookup[i].pfnHelper);
9848 }
9849
9850 /*
9851 * Generate simple TB tail labels that just calls a help with a pVCpu
9852 * arg and either return or longjmps/throws a non-zero status.
9853 */
9854 typedef IEM_DECL_NATIVE_HLP_PTR(int, PFNIEMNATIVESIMPLETAILLABELCALL,(PVMCPUCC pVCpu));
9855 static struct
9856 {
9857 IEMNATIVELABELTYPE enmExitReason;
9858 bool fWithEpilog;
9859 PFNIEMNATIVESIMPLETAILLABELCALL pfnCallback;
9860 } const s_aSimpleTailLabels[] =
9861 {
9862 { kIemNativeLabelType_RaiseDe, false, iemNativeHlpExecRaiseDe },
9863 { kIemNativeLabelType_RaiseUd, false, iemNativeHlpExecRaiseUd },
9864 { kIemNativeLabelType_RaiseSseRelated, false, iemNativeHlpExecRaiseSseRelated },
9865 { kIemNativeLabelType_RaiseAvxRelated, false, iemNativeHlpExecRaiseAvxRelated },
9866 { kIemNativeLabelType_RaiseSseAvxFpRelated, false, iemNativeHlpExecRaiseSseAvxFpRelated },
9867 { kIemNativeLabelType_RaiseNm, false, iemNativeHlpExecRaiseNm },
9868 { kIemNativeLabelType_RaiseGp0, false, iemNativeHlpExecRaiseGp0 },
9869 { kIemNativeLabelType_RaiseMf, false, iemNativeHlpExecRaiseMf },
9870 { kIemNativeLabelType_RaiseXf, false, iemNativeHlpExecRaiseXf },
9871 { kIemNativeLabelType_ObsoleteTb, true, iemNativeHlpObsoleteTb },
9872 { kIemNativeLabelType_NeedCsLimChecking, true, iemNativeHlpNeedCsLimChecking },
9873 { kIemNativeLabelType_CheckBranchMiss, true, iemNativeHlpCheckBranchMiss },
9874 };
9875 for (uint32_t i = 0; i < RT_ELEMENTS(s_aSimpleTailLabels); i++)
9876 {
9877 off = iemNativeRecompileEmitAlignmentPadding(pReNative, off, fAlignMask);
9878 Assert(!aoffLabels[s_aSimpleTailLabels[i].enmExitReason]);
9879 aoffLabels[s_aSimpleTailLabels[i].enmExitReason] = off;
9880
9881 /* int pfnCallback(PVMCPUCC pVCpu) */
9882 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
9883 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)s_aSimpleTailLabels[i].pfnCallback);
9884
9885 /* jump back to the return sequence / generate a return sequence. */
9886 if (!s_aSimpleTailLabels[i].fWithEpilog)
9887 off = iemNativeEmitJmpToFixed(pReNative, off, aoffLabels[kIemNativeLabelType_Return]);
9888 else
9889 off = iemNativeEmitCoreEpilog(pReNative, off);
9890 }
9891
9892
9893# ifdef VBOX_STRICT
9894 /* Make sure we've generate code for all labels. */
9895 for (uint32_t i = kIemNativeLabelType_Invalid + 1; i < RT_ELEMENTS(aoffLabels); i++)
9896 Assert(aoffLabels[i] != 0 || i == kIemNativeLabelType_ReturnZero);
9897# endif
9898 }
9899 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
9900 {
9901 Log(("iemNativeRecompileAttachExecMemChunkCtx: Caught %Rrc while recompiling!\n", rc));
9902 iemNativeTerm(pReNative);
9903 return NULL;
9904 }
9905 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
9906
9907 /*
9908 * Allocate memory for the context (first) and the common code (last).
9909 */
9910 PIEMNATIVEPERCHUNKCTX pCtx;
9911 uint32_t const cbCtx = RT_ALIGN_32(sizeof(*pCtx), 64);
9912 uint32_t const cbCode = off * sizeof(IEMNATIVEINSTR);
9913 PIEMNATIVEINSTR paFinalCommonCodeRx = NULL;
9914 pCtx = (PIEMNATIVEPERCHUNKCTX)iemExecMemAllocatorAllocFromChunk(pVCpu, idxChunk, cbCtx + cbCode, &paFinalCommonCodeRx);
9915 AssertLogRelMsgReturn(pCtx, ("cbCtx=%#x cbCode=%#x idxChunk=%#x\n", cbCtx, cbCode, idxChunk), NULL);
9916
9917 /*
9918 * Copy over the generated code.
9919 * There should be no fixups or labels defined here.
9920 */
9921 paFinalCommonCodeRx = (PIEMNATIVEINSTR)((uintptr_t)paFinalCommonCodeRx + cbCtx);
9922 memcpy((PIEMNATIVEINSTR)((uintptr_t)pCtx + cbCtx), pReNative->pInstrBuf, cbCode);
9923
9924 Assert(pReNative->cFixups == 0);
9925 Assert(pReNative->cLabels == 0);
9926
9927 /*
9928 * Initialize the context.
9929 */
9930 AssertCompile(kIemNativeLabelType_Invalid == 0);
9931 AssertCompile(RT_ELEMENTS(pCtx->apExitLabels) == RT_ELEMENTS(aoffLabels));
9932 pCtx->apExitLabels[kIemNativeLabelType_Invalid] = 0;
9933 for (uint32_t i = kIemNativeLabelType_Invalid + 1; i < RT_ELEMENTS(pCtx->apExitLabels); i++)
9934 {
9935 Assert(aoffLabels[i] != 0 || i == kIemNativeLabelType_ReturnZero);
9936 pCtx->apExitLabels[i] = &paFinalCommonCodeRx[aoffLabels[i]];
9937 Log10((" apExitLabels[%u]=%p %s\n", i, pCtx->apExitLabels[i], iemNativeGetLabelName((IEMNATIVELABELTYPE)i, true)));
9938 }
9939
9940 iemExecMemAllocatorReadyForUse(pVCpu, pCtx, cbCtx + cbCode);
9941
9942 iemNativeTerm(pReNative);
9943 return pCtx;
9944}
9945
9946#endif /* IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE */
9947
9948/**
9949 * Recompiles the given threaded TB into a native one.
9950 *
9951 * In case of failure the translation block will be returned as-is.
9952 *
9953 * @returns pTb.
9954 * @param pVCpu The cross context virtual CPU structure of the calling
9955 * thread.
9956 * @param pTb The threaded translation to recompile to native.
9957 */
9958DECLHIDDEN(PIEMTB) iemNativeRecompile(PVMCPUCC pVCpu, PIEMTB pTb) RT_NOEXCEPT
9959{
9960#if 0 /* For profiling the native recompiler code. */
9961l_profile_again:
9962#endif
9963 STAM_REL_PROFILE_START(&pVCpu->iem.s.StatNativeRecompilation, a);
9964
9965 /*
9966 * The first time thru, we allocate the recompiler state and save it,
9967 * all the other times we'll just reuse the saved one after a quick reset.
9968 */
9969 PIEMRECOMPILERSTATE pReNative = pVCpu->iem.s.pNativeRecompilerStateR3;
9970 if (RT_LIKELY(pReNative))
9971 iemNativeReInit(pReNative, pTb);
9972 else
9973 {
9974 pReNative = iemNativeInit(pVCpu, pTb);
9975 AssertReturn(pReNative, pTb);
9976 pVCpu->iem.s.pNativeRecompilerStateR3 = pReNative; /* save it */
9977 }
9978
9979#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
9980 /*
9981 * First do liveness analysis. This is done backwards.
9982 */
9983 {
9984 uint32_t idxCall = pTb->Thrd.cCalls;
9985 if (idxCall <= pReNative->cLivenessEntriesAlloc)
9986 { /* likely */ }
9987 else
9988 {
9989 uint32_t cAlloc = RT_MAX(pReNative->cLivenessEntriesAlloc, _4K);
9990 while (idxCall > cAlloc)
9991 cAlloc *= 2;
9992 void *pvNew = RTMemRealloc(pReNative->paLivenessEntries, sizeof(pReNative->paLivenessEntries[0]) * cAlloc);
9993 AssertReturn(pvNew, pTb);
9994 pReNative->paLivenessEntries = (PIEMLIVENESSENTRY)pvNew;
9995 pReNative->cLivenessEntriesAlloc = cAlloc;
9996 }
9997 AssertReturn(idxCall > 0, pTb);
9998 PIEMLIVENESSENTRY const paLivenessEntries = pReNative->paLivenessEntries;
9999
10000 /* The initial (final) entry. */
10001 idxCall--;
10002 IEM_LIVENESS_RAW_INIT_AS_UNUSED(&paLivenessEntries[idxCall]);
10003
10004 /* Loop backwards thru the calls and fill in the other entries. */
10005 PCIEMTHRDEDCALLENTRY pCallEntry = &pTb->Thrd.paCalls[idxCall];
10006 while (idxCall > 0)
10007 {
10008 PFNIEMNATIVELIVENESSFUNC const pfnLiveness = g_apfnIemNativeLivenessFunctions[pCallEntry->enmFunction];
10009 if (pfnLiveness)
10010 pfnLiveness(pCallEntry, &paLivenessEntries[idxCall], &paLivenessEntries[idxCall - 1]);
10011 else
10012 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(&paLivenessEntries[idxCall - 1], &paLivenessEntries[idxCall]);
10013 pCallEntry--;
10014 idxCall--;
10015 }
10016
10017# ifdef VBOX_WITH_STATISTICS
10018 /* Check if there are any EFLAGS optimization to be had here. This requires someone settings them
10019 to 'clobbered' rather that 'input'. */
10020 /** @todo */
10021# endif
10022 }
10023#endif
10024
10025 /*
10026 * Recompiling and emitting code is done using try/throw/catch or setjmp/longjmp
10027 * for aborting if an error happens.
10028 */
10029 uint32_t cCallsLeft = pTb->Thrd.cCalls;
10030#ifdef LOG_ENABLED
10031 uint32_t const cCallsOrg = cCallsLeft;
10032#endif
10033 uint32_t off = 0;
10034 int rc = VINF_SUCCESS;
10035 IEMNATIVE_TRY_SETJMP(pReNative, rc)
10036 {
10037#ifndef IEMNATIVE_WITH_RECOMPILER_PROLOGUE_SINGLETON
10038 /*
10039 * Emit prolog code (fixed).
10040 */
10041 off = iemNativeEmitProlog(pReNative, off);
10042#endif
10043
10044 /*
10045 * Convert the calls to native code.
10046 */
10047#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
10048 int32_t iGstInstr = -1;
10049#endif
10050#ifndef VBOX_WITHOUT_RELEASE_STATISTICS
10051 uint32_t cThreadedCalls = 0;
10052 uint32_t cRecompiledCalls = 0;
10053#endif
10054#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(IEM_WITH_INTRA_TB_JUMPS) || defined(VBOX_STRICT) || defined(LOG_ENABLED) || defined(VBOX_WITH_STATISTICS) || defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING)
10055 uint32_t idxCurCall = 0;
10056#endif
10057 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
10058 pReNative->fExec = pTb->fFlags & IEMTB_F_IEM_F_MASK;
10059 while (cCallsLeft-- > 0)
10060 {
10061 PFNIEMNATIVERECOMPFUNC const pfnRecom = g_apfnIemNativeRecompileFunctions[pCallEntry->enmFunction];
10062#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(VBOX_WITH_STATISTICS) || defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING)
10063 pReNative->idxCurCall = idxCurCall;
10064#endif
10065
10066#ifdef IEM_WITH_INTRA_TB_JUMPS
10067 /*
10068 * Define label for jump targets (currently only the first entry).
10069 */
10070 if (!(pCallEntry->fFlags & IEMTHREADEDCALLENTRY_F_JUMP_TARGET))
10071 { /* likely */ }
10072 else
10073 {
10074 iemNativeLabelCreate(pReNative, kIemNativeLabelType_LoopJumpTarget, off);
10075 Assert(idxCurCall == 0); /** @todo when jumping elsewhere, we have to save the register state. */
10076 }
10077#endif
10078
10079 /*
10080 * Debug info, assembly markup and statistics.
10081 */
10082#if defined(IEMNATIVE_WITH_TB_DEBUG_INFO) || !defined(IEMNATIVE_WITH_BLTIN_CHECKMODE)
10083 if (pCallEntry->enmFunction == kIemThreadedFunc_BltIn_CheckMode)
10084 pReNative->fExec = pCallEntry->auParams[0] & IEMTB_F_IEM_F_MASK;
10085#endif
10086#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
10087 iemNativeDbgInfoAddNativeOffset(pReNative, off);
10088 if (iGstInstr < (int32_t)pCallEntry->idxInstr)
10089 {
10090 if (iGstInstr < (int32_t)pTb->cInstructions)
10091 iemNativeDbgInfoAddGuestInstruction(pReNative, pReNative->fExec);
10092 else
10093 Assert(iGstInstr == pTb->cInstructions);
10094 iGstInstr = pCallEntry->idxInstr;
10095 }
10096 iemNativeDbgInfoAddThreadedCall(pReNative, (IEMTHREADEDFUNCS)pCallEntry->enmFunction, pfnRecom != NULL);
10097#endif
10098#if defined(VBOX_STRICT)
10099 off = iemNativeEmitMarker(pReNative, off,
10100 RT_MAKE_U32(idxCurCall | (pfnRecom ? 0x8000 : 0), pCallEntry->enmFunction));
10101#endif
10102#if defined(VBOX_STRICT)
10103 iemNativeRegAssertSanity(pReNative);
10104#endif
10105#ifdef VBOX_WITH_STATISTICS
10106 off = iemNativeEmitThreadCallStats(pReNative, off, pCallEntry);
10107#endif
10108
10109#if 0
10110 if ( pTb->GCPhysPc == 0x00000000000c1240
10111 && idxCurCall == 67)
10112 off = iemNativeEmitBrk(pReNative, off, 0xf000);
10113#endif
10114
10115 /*
10116 * Actual work.
10117 */
10118 Log2(("%u[%u]: %s%s (off=%#x)\n", idxCurCall, pCallEntry->idxInstr,
10119 g_apszIemThreadedFunctions[pCallEntry->enmFunction], pfnRecom ? "(recompiled)" : "(todo)", off));
10120 if (pfnRecom) /** @todo stats on this. */
10121 {
10122 off = pfnRecom(pReNative, off, pCallEntry);
10123 STAM_REL_STATS({cRecompiledCalls++;});
10124 }
10125 else
10126 {
10127 off = iemNativeEmitThreadedCall(pReNative, off, pCallEntry);
10128 STAM_REL_STATS({cThreadedCalls++;});
10129 }
10130 Assert(off <= pReNative->cInstrBufAlloc);
10131 Assert(pReNative->cCondDepth == 0);
10132
10133#if defined(LOG_ENABLED) && defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS)
10134 if (LogIs2Enabled())
10135 {
10136 PCIEMLIVENESSENTRY pLivenessEntry = &pReNative->paLivenessEntries[idxCurCall];
10137# ifndef IEMLIVENESS_EXTENDED_LAYOUT
10138 static const char s_achState[] = "CUXI";
10139# else
10140 static const char s_achState[] = "UxRrWwMmCcQqKkNn";
10141# endif
10142
10143 char szGpr[17];
10144 for (unsigned i = 0; i < 16; i++)
10145 szGpr[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_GprFirst)];
10146 szGpr[16] = '\0';
10147
10148 char szSegBase[X86_SREG_COUNT + 1];
10149 char szSegLimit[X86_SREG_COUNT + 1];
10150 char szSegAttrib[X86_SREG_COUNT + 1];
10151 char szSegSel[X86_SREG_COUNT + 1];
10152 for (unsigned i = 0; i < X86_SREG_COUNT; i++)
10153 {
10154 szSegBase[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegBaseFirst)];
10155 szSegAttrib[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegAttribFirst)];
10156 szSegLimit[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegLimitFirst)];
10157 szSegSel[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_SegSelFirst)];
10158 }
10159 szSegBase[X86_SREG_COUNT] = szSegAttrib[X86_SREG_COUNT] = szSegLimit[X86_SREG_COUNT]
10160 = szSegSel[X86_SREG_COUNT] = '\0';
10161
10162 char szEFlags[8];
10163 for (unsigned i = 0; i < 7; i++)
10164 szEFlags[i] = s_achState[iemNativeLivenessGetStateByGstRegEx(pLivenessEntry, i + kIemNativeGstReg_EFlags)];
10165 szEFlags[7] = '\0';
10166
10167 Log2(("liveness: grp=%s segbase=%s segattr=%s seglim=%s segsel=%s efl=%s\n",
10168 szGpr, szSegBase, szSegAttrib, szSegLimit, szSegSel, szEFlags));
10169 }
10170#endif
10171
10172 /*
10173 * Advance.
10174 */
10175 pCallEntry++;
10176#if defined(IEMNATIVE_WITH_LIVENESS_ANALYSIS) || defined(IEM_WITH_INTRA_TB_JUMPS) || defined(VBOX_STRICT) || defined(LOG_ENABLED) || defined(VBOX_WITH_STATISTICS) || defined(IEMNATIVE_WITH_DELAYED_PC_UPDATING)
10177 idxCurCall++;
10178#endif
10179 }
10180
10181 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsRecompiled, cRecompiledCalls);
10182 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatNativeCallsThreaded, cThreadedCalls);
10183 if (!cThreadedCalls)
10184 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeFullyRecompiledTbs);
10185
10186#ifdef VBOX_WITH_STATISTICS
10187 off = iemNativeEmitNativeTbExitStats(pReNative, off, RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeTbFinished));
10188#endif
10189
10190 /* Flush any pending writes before returning from the last instruction (RIP updates, etc.). */
10191 off = iemNativeRegFlushPendingWrites(pReNative, off);
10192
10193#ifndef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
10194 /*
10195 * Successful return, so clear the return register (eax, w0).
10196 */
10197 off = iemNativeEmitGprZero(pReNative, off, IEMNATIVE_CALL_RET_GREG);
10198
10199 /*
10200 * Emit the epilog code.
10201 */
10202 uint32_t idxReturnLabel;
10203 off = iemNativeEmitEpilog(pReNative, off, &idxReturnLabel);
10204#else
10205 /*
10206 * Jump to the common per-chunk epilog code.
10207 */
10208 //off = iemNativeEmitBrk(pReNative, off, 0x1227);
10209 off = iemNativeEmitTbExit(pReNative, off, kIemNativeLabelType_ReturnZero);
10210#endif
10211
10212#ifndef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
10213 /*
10214 * Generate special jump labels.
10215 */
10216 off = iemNativeEmitRcFiddling(pReNative, off, idxReturnLabel);
10217
10218 bool const fReturnBreakViaLookup = RT_BOOL( pReNative->bmLabelTypes
10219 & ( RT_BIT_64(kIemNativeLabelType_ReturnBreakViaLookup)
10220 | RT_BIT_64(kIemNativeLabelType_ReturnBreakViaLookupWithIrq)
10221 | RT_BIT_64(kIemNativeLabelType_ReturnBreakViaLookupWithTlb)
10222 | RT_BIT_64(kIemNativeLabelType_ReturnBreakViaLookupWithTlbAndIrq)));
10223 if (fReturnBreakViaLookup)
10224 {
10225 uint32_t const idxReturnBreakLabel = iemNativeLabelCreate(pReNative, kIemNativeLabelType_ReturnBreak);
10226 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
10227 off = iemNativeEmitReturnBreakViaLookup(pReNative, off, idxReturnBreakLabel);
10228 }
10229 else if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreak))
10230 off = iemNativeEmitReturnBreak(pReNative, off, idxReturnLabel);
10231
10232 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnBreakFF))
10233 off = iemNativeEmitReturnBreakFF(pReNative, off, idxReturnLabel);
10234
10235 if (pReNative->bmLabelTypes & RT_BIT_64(kIemNativeLabelType_ReturnWithFlags))
10236 off = iemNativeEmitReturnWithFlags(pReNative, off, idxReturnLabel);
10237
10238 /*
10239 * Generate simple TB tail labels that just calls a help with a pVCpu
10240 * arg and either return or longjmps/throws a non-zero status.
10241 *
10242 * The array entries must be ordered by enmLabel value so we can index
10243 * using fTailLabels bit numbers.
10244 */
10245 typedef IEM_DECL_NATIVE_HLP_PTR(int, PFNIEMNATIVESIMPLETAILLABELCALL,(PVMCPUCC pVCpu));
10246 static struct
10247 {
10248 IEMNATIVELABELTYPE enmLabel;
10249 PFNIEMNATIVESIMPLETAILLABELCALL pfnCallback;
10250 } const g_aSimpleTailLabels[] =
10251 {
10252 { kIemNativeLabelType_Invalid, NULL },
10253 { kIemNativeLabelType_RaiseDe, iemNativeHlpExecRaiseDe },
10254 { kIemNativeLabelType_RaiseUd, iemNativeHlpExecRaiseUd },
10255 { kIemNativeLabelType_RaiseSseRelated, iemNativeHlpExecRaiseSseRelated },
10256 { kIemNativeLabelType_RaiseAvxRelated, iemNativeHlpExecRaiseAvxRelated },
10257 { kIemNativeLabelType_RaiseSseAvxFpRelated, iemNativeHlpExecRaiseSseAvxFpRelated },
10258 { kIemNativeLabelType_RaiseNm, iemNativeHlpExecRaiseNm },
10259 { kIemNativeLabelType_RaiseGp0, iemNativeHlpExecRaiseGp0 },
10260 { kIemNativeLabelType_RaiseMf, iemNativeHlpExecRaiseMf },
10261 { kIemNativeLabelType_RaiseXf, iemNativeHlpExecRaiseXf },
10262 { kIemNativeLabelType_ObsoleteTb, iemNativeHlpObsoleteTb },
10263 { kIemNativeLabelType_NeedCsLimChecking, iemNativeHlpNeedCsLimChecking },
10264 { kIemNativeLabelType_CheckBranchMiss, iemNativeHlpCheckBranchMiss },
10265 };
10266
10267 AssertCompile(RT_ELEMENTS(g_aSimpleTailLabels) == (unsigned)kIemNativeLabelType_LastSimple + 1U);
10268 AssertCompile(kIemNativeLabelType_Invalid == 0);
10269 uint64_t fTailLabels = pReNative->bmLabelTypes & (RT_BIT_64(kIemNativeLabelType_LastSimple + 1U) - 2U);
10270 if (fTailLabels)
10271 {
10272 do
10273 {
10274 IEMNATIVELABELTYPE const enmLabel = (IEMNATIVELABELTYPE)(ASMBitFirstSetU64(fTailLabels) - 1U);
10275 fTailLabels &= ~RT_BIT_64(enmLabel);
10276 Assert(g_aSimpleTailLabels[enmLabel].enmLabel == enmLabel);
10277
10278 uint32_t const idxLabel = iemNativeLabelFind(pReNative, enmLabel);
10279 Assert(idxLabel != UINT32_MAX);
10280 if (idxLabel != UINT32_MAX)
10281 {
10282 iemNativeLabelDefine(pReNative, idxLabel, off);
10283
10284 /* int pfnCallback(PVMCPUCC pVCpu) */
10285 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
10286 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)g_aSimpleTailLabels[enmLabel].pfnCallback);
10287
10288 /* jump back to the return sequence. */
10289 off = iemNativeEmitJmpToLabel(pReNative, off, idxReturnLabel);
10290 }
10291
10292 } while (fTailLabels);
10293 }
10294
10295#else /* IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE */
10296 /*
10297 * Generate tail labels with jumps to the common per-chunk code.
10298 */
10299# ifndef RT_ARCH_AMD64
10300 Assert(!(pReNative->bmLabelTypes & (RT_BIT_64(kIemNativeLabelType_Return) | RT_BIT_64(kIemNativeLabelType_Invalid))));
10301 AssertCompile(kIemNativeLabelType_Invalid == 0);
10302 uint64_t fTailLabels = pReNative->bmLabelTypes & (RT_BIT_64(kIemNativeLabelType_LastTbExit + 1U) - 2U);
10303 if (fTailLabels)
10304 {
10305 do
10306 {
10307 IEMNATIVELABELTYPE const enmLabel = (IEMNATIVELABELTYPE)(ASMBitFirstSetU64(fTailLabels) - 1U);
10308 fTailLabels &= ~RT_BIT_64(enmLabel);
10309
10310 uint32_t const idxLabel = iemNativeLabelFind(pReNative, enmLabel);
10311 AssertContinue(idxLabel != UINT32_MAX);
10312 iemNativeLabelDefine(pReNative, idxLabel, off);
10313 off = iemNativeEmitTbExit(pReNative, off, enmLabel);
10314 } while (fTailLabels);
10315 }
10316# else
10317 Assert(!(pReNative->bmLabelTypes & (RT_BIT_64(kIemNativeLabelType_LastTbExit + 1) - 1U))); /* Should not be used! */
10318# endif
10319#endif /* IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE */
10320 }
10321 IEMNATIVE_CATCH_LONGJMP_BEGIN(pReNative, rc);
10322 {
10323 Log(("iemNativeRecompile: Caught %Rrc while recompiling!\n", rc));
10324 return pTb;
10325 }
10326 IEMNATIVE_CATCH_LONGJMP_END(pReNative);
10327 Assert(off <= pReNative->cInstrBufAlloc);
10328
10329 /*
10330 * Make sure all labels has been defined.
10331 */
10332 PIEMNATIVELABEL const paLabels = pReNative->paLabels;
10333#ifdef VBOX_STRICT
10334 uint32_t const cLabels = pReNative->cLabels;
10335 for (uint32_t i = 0; i < cLabels; i++)
10336 AssertMsgReturn(paLabels[i].off < off, ("i=%d enmType=%d\n", i, paLabels[i].enmType), pTb);
10337#endif
10338
10339#if 0 /* For profiling the native recompiler code. */
10340 if (pTb->Thrd.cCalls >= 136)
10341 {
10342 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
10343 goto l_profile_again;
10344 }
10345#endif
10346
10347 /*
10348 * Allocate executable memory, copy over the code we've generated.
10349 */
10350 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
10351 if (pTbAllocator->pDelayedFreeHead)
10352 iemTbAllocatorProcessDelayedFrees(pVCpu, pVCpu->iem.s.pTbAllocatorR3);
10353
10354 PIEMNATIVEINSTR paFinalInstrBufRx = NULL;
10355#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
10356 PCIEMNATIVEPERCHUNKCTX pCtx = NULL;
10357 PIEMNATIVEINSTR const paFinalInstrBuf = iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR), pTb,
10358 &paFinalInstrBufRx, &pCtx);
10359
10360#else
10361 PIEMNATIVEINSTR const paFinalInstrBuf = iemExecMemAllocatorAlloc(pVCpu, off * sizeof(IEMNATIVEINSTR), pTb,
10362 &paFinalInstrBufRx, NULL);
10363#endif
10364 AssertReturn(paFinalInstrBuf, pTb);
10365 memcpy(paFinalInstrBuf, pReNative->pInstrBuf, off * sizeof(paFinalInstrBuf[0]));
10366
10367 /*
10368 * Apply fixups.
10369 */
10370 PIEMNATIVEFIXUP const paFixups = pReNative->paFixups;
10371 uint32_t const cFixups = pReNative->cFixups;
10372 for (uint32_t i = 0; i < cFixups; i++)
10373 {
10374 Assert(paFixups[i].off < off);
10375 Assert(paFixups[i].idxLabel < cLabels);
10376 AssertMsg(paLabels[paFixups[i].idxLabel].off < off,
10377 ("idxLabel=%d enmType=%d off=%#x (max %#x)\n", paFixups[i].idxLabel,
10378 paLabels[paFixups[i].idxLabel].enmType, paLabels[paFixups[i].idxLabel].off, off));
10379 RTPTRUNION const Ptr = { &paFinalInstrBuf[paFixups[i].off] };
10380 switch (paFixups[i].enmType)
10381 {
10382#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
10383 case kIemNativeFixupType_Rel32:
10384 Assert(paFixups[i].off + 4 <= off);
10385 *Ptr.pi32 = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10386 continue;
10387
10388#elif defined(RT_ARCH_ARM64)
10389 case kIemNativeFixupType_RelImm26At0:
10390 {
10391 Assert(paFixups[i].off < off);
10392 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10393 Assert(offDisp >= -33554432 && offDisp < 33554432);
10394 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
10395 continue;
10396 }
10397
10398 case kIemNativeFixupType_RelImm19At5:
10399 {
10400 Assert(paFixups[i].off < off);
10401 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10402 Assert(offDisp >= -262144 && offDisp < 262144);
10403 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xff00001f)) | (((uint32_t)offDisp & UINT32_C(0x0007ffff)) << 5);
10404 continue;
10405 }
10406
10407 case kIemNativeFixupType_RelImm14At5:
10408 {
10409 Assert(paFixups[i].off < off);
10410 int32_t const offDisp = paLabels[paFixups[i].idxLabel].off - paFixups[i].off + paFixups[i].offAddend;
10411 Assert(offDisp >= -8192 && offDisp < 8192);
10412 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfff8001f)) | (((uint32_t)offDisp & UINT32_C(0x00003fff)) << 5);
10413 continue;
10414 }
10415
10416#endif
10417 case kIemNativeFixupType_Invalid:
10418 case kIemNativeFixupType_End:
10419 break;
10420 }
10421 AssertFailed();
10422 }
10423
10424#ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
10425 /*
10426 * Apply TB exit fixups.
10427 */
10428 PIEMNATIVEEXITFIXUP const paTbExitFixups = pReNative->paTbExitFixups;
10429 uint32_t const cTbExitFixups = pReNative->cTbExitFixups;
10430 for (uint32_t i = 0; i < cTbExitFixups; i++)
10431 {
10432 Assert(paTbExitFixups[i].off < off);
10433 Assert(IEMNATIVELABELTYPE_IS_EXIT_REASON(paTbExitFixups[i].enmExitReason));
10434 RTPTRUNION const Ptr = { &paFinalInstrBuf[paTbExitFixups[i].off] };
10435
10436# if defined(RT_ARCH_AMD64) || defined(RT_ARCH_X86)
10437 Assert(paTbExitFixups[i].off + 4 <= off);
10438 intptr_t const offDisp = pCtx->apExitLabels[paTbExitFixups[i].enmExitReason] - &paFinalInstrBufRx[paTbExitFixups[i].off + 4];
10439 Assert(offDisp >= INT32_MIN && offDisp <= INT32_MAX);
10440 *Ptr.pi32 = (int32_t)offDisp;
10441
10442# elif defined(RT_ARCH_ARM64)
10443 intptr_t const offDisp = pCtx->apExitLabels[paTbExitFixups[i].enmExitReason] - &paFinalInstrBufRx[paTbExitFixups[i].off];
10444 Assert(offDisp >= -33554432 && offDisp < 33554432);
10445 *Ptr.pu32 = (*Ptr.pu32 & UINT32_C(0xfc000000)) | ((uint32_t)offDisp & UINT32_C(0x03ffffff));
10446
10447# else
10448# error "Port me!"
10449# endif
10450 }
10451#endif
10452
10453 iemExecMemAllocatorReadyForUse(pVCpu, paFinalInstrBufRx, off * sizeof(IEMNATIVEINSTR));
10454 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbNativeCode, off * sizeof(IEMNATIVEINSTR));
10455
10456 /*
10457 * Convert the translation block.
10458 */
10459 RTMemFree(pTb->Thrd.paCalls);
10460 pTb->Native.paInstructions = paFinalInstrBufRx;
10461 pTb->Native.cInstructions = off;
10462 pTb->fFlags = (pTb->fFlags & ~IEMTB_F_TYPE_MASK) | IEMTB_F_TYPE_NATIVE;
10463#ifdef IEMNATIVE_WITH_TB_DEBUG_INFO
10464 pReNative->pDbgInfo->FlatPc = pTb->FlatPc;
10465 pTb->pDbgInfo = (PIEMTBDBG)RTMemDup(pReNative->pDbgInfo, /* non-fatal, so not return check. */
10466 RT_UOFFSETOF_DYN(IEMTBDBG, aEntries[pReNative->pDbgInfo->cEntries]));
10467#endif
10468
10469 Assert(pTbAllocator->cThreadedTbs > 0);
10470 pTbAllocator->cThreadedTbs -= 1;
10471 pTbAllocator->cNativeTbs += 1;
10472 Assert(pTbAllocator->cNativeTbs <= pTbAllocator->cTotalTbs);
10473
10474#ifdef LOG_ENABLED
10475 /*
10476 * Disassemble to the log if enabled.
10477 */
10478 if (LogIs3Enabled())
10479 {
10480 Log3(("----------------------------------------- %d calls ---------------------------------------\n", cCallsOrg));
10481 iemNativeDisassembleTb(pVCpu, pTb, DBGFR3InfoLogHlp());
10482# if defined(DEBUG_bird) || defined(DEBUG_aeichner)
10483 RTLogFlush(NULL);
10484# endif
10485 }
10486#endif
10487 /*iemNativeDisassembleTb(pTb, DBGFR3InfoLogRelHlp());*/
10488
10489 STAM_REL_PROFILE_STOP(&pVCpu->iem.s.StatNativeRecompilation, a);
10490 return pTb;
10491}
10492
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette