VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllN8veRecompBltIn.cpp@ 105673

Last change on this file since 105673 was 105673, checked in by vboxsync, 4 months ago

VMM/IEM,TM: Do full-TB looping. Redid timer polling in the recompiler. Rewrote the Blt_CheckIrq code, eliminating a conditional. Fixed some TLB related assertions. Moved some IEMCPU members around in hope of better cache-locality. bugref:10656

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 101.3 KB
Line 
1/* $Id: IEMAllN8veRecompBltIn.cpp 105673 2024-08-14 13:57:57Z vboxsync $ */
2/** @file
3 * IEM - Native Recompiler, Emitters for Built-In Threaded Functions.
4 */
5
6/*
7 * Copyright (C) 2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#define LOG_GROUP LOG_GROUP_IEM_RE_NATIVE
33#define IEM_WITH_OPAQUE_DECODER_STATE
34#define VMCPU_INCL_CPUM_GST_CTX
35#define VMM_INCLUDED_SRC_include_IEMMc_h /* block IEMMc.h inclusion. */
36#include <VBox/vmm/iem.h>
37#include <VBox/vmm/cpum.h>
38#include <VBox/vmm/dbgf.h>
39#include "IEMInternal.h"
40#include <VBox/vmm/vmcc.h>
41#include <VBox/log.h>
42#include <VBox/err.h>
43#include <VBox/param.h>
44#include <iprt/assert.h>
45#include <iprt/string.h>
46#if defined(RT_ARCH_AMD64)
47# include <iprt/x86.h>
48#elif defined(RT_ARCH_ARM64)
49# include <iprt/armv8.h>
50#endif
51
52
53#include "IEMInline.h"
54#include "IEMThreadedFunctions.h"
55#include "IEMN8veRecompiler.h"
56#include "IEMN8veRecompilerEmit.h"
57#include "IEMN8veRecompilerTlbLookup.h"
58
59
60
61/*********************************************************************************************************************************
62* TB Helper Functions *
63*********************************************************************************************************************************/
64#if defined(RT_ARCH_AMD64) || defined(RT_ARCH_ARM64)
65DECLASM(void) iemNativeHlpAsmSafeWrapLogCpuState(void);
66#endif
67
68
69/**
70 * Used by TB code to deal with a TLB miss for a new page.
71 */
72IEM_DECL_NATIVE_HLP_DEF(void, iemNativeHlpMemCodeNewPageTlbMiss,(PVMCPUCC pVCpu))
73{
74#ifdef IEM_WITH_TLB_STATISTICS
75 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeCodeTlbMissesNewPage);
76#endif
77 pVCpu->iem.s.pbInstrBuf = NULL;
78 pVCpu->iem.s.offCurInstrStart = GUEST_PAGE_SIZE;
79 pVCpu->iem.s.offInstrNextByte = GUEST_PAGE_SIZE;
80 iemOpcodeFetchBytesJmp(pVCpu, 0, NULL);
81 if (pVCpu->iem.s.pbInstrBuf)
82 { /* likely */ }
83 else
84 {
85 AssertMsgFailed(("cs:rip=%04x:%08RX64\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
86 IEM_DO_LONGJMP(pVCpu, VINF_SUCCESS);
87 }
88}
89
90
91/**
92 * Used by TB code to deal with a TLB miss for a new page.
93 */
94IEM_DECL_NATIVE_HLP_DEF(RTGCPHYS, iemNativeHlpMemCodeNewPageTlbMissWithOff,(PVMCPUCC pVCpu, uint8_t offInstr))
95{
96#ifdef IEM_WITH_TLB_STATISTICS
97 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeCodeTlbMissesNewPageWithOffset);
98#endif
99 pVCpu->iem.s.pbInstrBuf = NULL;
100 pVCpu->iem.s.offCurInstrStart = GUEST_PAGE_SIZE - offInstr;
101 pVCpu->iem.s.offInstrNextByte = GUEST_PAGE_SIZE;
102 iemOpcodeFetchBytesJmp(pVCpu, 0, NULL);
103 AssertMsg(pVCpu->iem.s.pbInstrBuf, ("cs:rip=%04x:%08RX64\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
104 return pVCpu->iem.s.pbInstrBuf ? pVCpu->iem.s.GCPhysInstrBuf : NIL_RTGCPHYS;
105}
106
107
108/*********************************************************************************************************************************
109* Builtin functions *
110*********************************************************************************************************************************/
111
112/**
113 * Built-in function that does nothing.
114 *
115 * Whether this is called or not can be controlled by the entry in the
116 * IEMThreadedGenerator.katBltIns table. This can be useful to determine
117 * whether why behaviour changes when enabling the LogCpuState builtins. I.e.
118 * whether it's the reduced call count in the TBs or the threaded calls flushing
119 * register state.
120 */
121IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_Nop)
122{
123 RT_NOREF(pReNative, pCallEntry);
124 return off;
125}
126
127IEM_DECL_IEMNATIVELIVENESSFUNC_DEF(iemNativeLivenessFunc_BltIn_Nop)
128{
129 *pOutgoing = *pIncoming;
130 RT_NOREF(pCallEntry);
131}
132
133
134/**
135 * Emits for for LogCpuState.
136 *
137 * This shouldn't have any relevant impact on the recompiler state.
138 */
139IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_LogCpuState)
140{
141#ifdef RT_ARCH_AMD64
142 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 20);
143 /* push rax */
144 pbCodeBuf[off++] = 0x50 + X86_GREG_xAX;
145 /* push imm32 */
146 pbCodeBuf[off++] = 0x68;
147 pbCodeBuf[off++] = RT_BYTE1(pCallEntry->auParams[0]);
148 pbCodeBuf[off++] = RT_BYTE2(pCallEntry->auParams[0]);
149 pbCodeBuf[off++] = RT_BYTE3(pCallEntry->auParams[0]);
150 pbCodeBuf[off++] = RT_BYTE4(pCallEntry->auParams[0]);
151 /* mov rax, iemNativeHlpAsmSafeWrapLogCpuState */
152 pbCodeBuf[off++] = X86_OP_REX_W;
153 pbCodeBuf[off++] = 0xb8 + X86_GREG_xAX;
154 *(uint64_t *)&pbCodeBuf[off] = (uintptr_t)iemNativeHlpAsmSafeWrapLogCpuState;
155 off += sizeof(uint64_t);
156 /* call rax */
157 pbCodeBuf[off++] = 0xff;
158 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 2, X86_GREG_xAX);
159 /* pop rax */
160 pbCodeBuf[off++] = 0x58 + X86_GREG_xAX;
161 /* pop rax */
162 pbCodeBuf[off++] = 0x58 + X86_GREG_xAX;
163#else
164 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpAsmSafeWrapLogCpuState);
165 RT_NOREF(pCallEntry);
166#endif
167
168 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
169 return off;
170}
171
172IEM_DECL_IEMNATIVELIVENESSFUNC_DEF(iemNativeLivenessFunc_BltIn_LogCpuState)
173{
174 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(pOutgoing, pIncoming);
175 RT_NOREF(pCallEntry);
176}
177
178
179/**
180 * Built-in function that calls a C-implemention function taking zero arguments.
181 */
182IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_DeferToCImpl0)
183{
184 PFNIEMCIMPL0 const pfnCImpl = (PFNIEMCIMPL0)(uintptr_t)pCallEntry->auParams[0];
185 uint8_t const cbInstr = (uint8_t)pCallEntry->auParams[1];
186 uint64_t const fGstShwFlush = pCallEntry->auParams[2];
187 return iemNativeEmitCImplCall(pReNative, off, pCallEntry->idxInstr, fGstShwFlush, (uintptr_t)pfnCImpl, cbInstr, 0, 0, 0, 0);
188}
189
190IEM_DECL_IEMNATIVELIVENESSFUNC_DEF(iemNativeLivenessFunc_BltIn_DeferToCImpl0)
191{
192 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(pOutgoing, pIncoming);
193 RT_NOREF(pCallEntry);
194}
195
196
197/**
198 * Flushes pending writes in preparation of raising an exception or aborting the TB.
199 */
200#define BODY_FLUSH_PENDING_WRITES() \
201 off = iemNativeRegFlushPendingWrites(pReNative, off);
202
203
204/**
205 * Worker for the CheckIrq, CheckTimers and CheckTimersAndIrq builtins below.
206 */
207template<bool const a_fCheckTimers, bool const a_fCheckIrqs>
208DECL_FORCE_INLINE(uint32_t) iemNativeRecompFunc_BltIn_CheckTimersAndIrqsCommon(PIEMRECOMPILERSTATE pReNative, uint32_t off)
209{
210 uint8_t const idxEflReg = !a_fCheckIrqs ? UINT8_MAX
211 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_EFlags,
212 kIemNativeGstRegUse_ReadOnly);
213 uint8_t const idxTmpReg1 = iemNativeRegAllocTmp(pReNative, &off);
214 uint8_t const idxTmpReg2 = a_fCheckIrqs ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
215 PIEMNATIVEINSTR const pCodeBuf = iemNativeInstrBufEnsure(pReNative, off, RT_ARCH_VAL == RT_ARCH_VAL_AMD64 ? 72 : 32);
216
217 /*
218 * First we decrement the timer poll counter, if so desired.
219 */
220 if (a_fCheckTimers)
221 {
222# ifdef RT_ARCH_AMD64
223 /* dec [rbx + cIrqChecksTillNextPoll] */
224 pCodeBuf[off++] = 0xff;
225 off = iemNativeEmitGprByVCpuDisp(pCodeBuf, off, 1, RT_UOFFSETOF(VMCPU, iem.s.cIrqChecksTillNextPoll));
226
227 /* jz ReturnBreakFF */
228 off = iemNativeEmitJccTbExitEx(pReNative, pCodeBuf, off, kIemNativeLabelType_ReturnBreakFF, kIemNativeInstrCond_e);
229
230# elif defined(RT_ARCH_ARM64)
231 AssertCompile(RTASSERT_OFFSET_OF(VMCPU, iem.s.cIrqChecksTillNextPoll) < _4K * sizeof(uint32_t));
232 off = iemNativeEmitLoadGprFromVCpuU32Ex(pCodeBuf, off, idxTmpReg1, RT_UOFFSETOF(VMCPU, iem.s.cIrqChecksTillNextPoll));
233 pCodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxTmpReg1, idxTmpReg1, 1, false /*f64Bit*/);
234 off = iemNativeEmitStoreGprToVCpuU32Ex(pCodeBuf, off, idxTmpReg1, RT_UOFFSETOF(VMCPU, iem.s.cIrqChecksTillNextPoll));
235
236 /* cbz reg1, ReturnBreakFF */
237 off = iemNativeEmitTestIfGprIsZeroAndTbExitEx(pReNative, pCodeBuf, off, idxTmpReg1, false /*f64Bit*/,
238 kIemNativeLabelType_ReturnBreakFF);
239
240# else
241# error "port me"
242# endif
243 }
244
245 /*
246 * Second, check forced flags, if so desired.
247 *
248 * We OR them together to save a conditional. A trick here is that the
249 * two IRQ flags are unused in the global flags, so we can still use the
250 * resulting value to check for suppressed interrupts.
251 */
252 if (a_fCheckIrqs)
253 {
254 /* Load VMCPU::fLocalForcedActions first and mask it. We can simplify the
255 masking by ASSUMING none of the unwanted flags are located above bit 30. */
256 uint64_t const fUnwantedCpuFFs = VMCPU_FF_PGM_SYNC_CR3
257 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
258 | VMCPU_FF_TLB_FLUSH
259 | VMCPU_FF_UNHALT;
260 AssertCompile(fUnwantedCpuFFs < RT_BIT_64(31));
261 off = iemNativeEmitLoadGprFromVCpuU64Ex(pCodeBuf, off, idxTmpReg1, RT_UOFFSETOF(VMCPUCC, fLocalForcedActions));
262# if defined(RT_ARCH_AMD64)
263 /* and reg1, ~fUnwantedCpuFFs */
264 pCodeBuf[off++] = idxTmpReg1 >= 8 ? X86_OP_REX_B | X86_OP_REX_W : X86_OP_REX_W;
265 pCodeBuf[off++] = 0x81;
266 pCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, 4, idxTmpReg1 & 7);
267 *(uint32_t *)&pCodeBuf[off] = ~(uint32_t)fUnwantedCpuFFs;
268 off += 4;
269
270# else
271 off = iemNativeEmitLoadGprImmEx(pCodeBuf, off, idxTmpReg2, ~fUnwantedCpuFFs);
272 off = iemNativeEmitAndGprByGprEx(pCodeBuf, off, idxTmpReg1, idxTmpReg2);
273# endif
274
275 /* OR in VM::fGlobalForcedActions. We access the member via pVCpu.
276 No need to mask anything here. Unfortunately, it's a 32-bit
277 variable, so we can't OR it directly on x86. */
278 AssertCompile(VM_FF_ALL_MASK == UINT32_MAX);
279 intptr_t const offGlobalForcedActions = (intptr_t)&pReNative->pVCpu->CTX_SUFF(pVM)->fGlobalForcedActions
280 - (intptr_t)pReNative->pVCpu;
281 Assert((int32_t)offGlobalForcedActions == offGlobalForcedActions);
282
283# ifdef RT_ARCH_AMD64
284 if (idxTmpReg2 >= 8)
285 pCodeBuf[off++] = X86_OP_REX_R;
286 pCodeBuf[off++] = 0x8b; /* mov */
287 off = iemNativeEmitGprByVCpuSignedDisp(pCodeBuf, off, idxTmpReg2, (int32_t)offGlobalForcedActions);
288
289 /* or reg1, reg2 */
290 off = iemNativeEmitOrGprByGprEx(pCodeBuf, off, idxTmpReg1, idxTmpReg2);
291
292 /* jz nothing_pending */
293 uint32_t const offFixup1 = off;
294 off = iemNativeEmitJccToFixedEx(pCodeBuf, off, off + 64, kIemNativeInstrCond_e);
295
296# elif defined(RT_ARCH_ARM64)
297 off = iemNativeEmitGprBySignedVCpuLdStEx(pCodeBuf, off, idxTmpReg2, (int32_t)offGlobalForcedActions,
298 kArmv8A64InstrLdStType_Ld_Word, sizeof(uint32_t));
299 off = iemNativeEmitOrGprByGprEx(pCodeBuf, off, idxTmpReg1, idxTmpReg2);
300
301 /* cbz nothing_pending */
302 uint32_t const offFixup1 = off;
303 off = iemNativeEmitTestIfGprIsZeroOrNotZeroAndJmpToFixedEx(pCodeBuf, off, idxTmpReg1, true /*f64Bit*/,
304 false /*fJmpIfNotZero*/, off + 16);
305# else
306# error "port me"
307# endif
308
309 /* More than just IRQ FFs pending? */
310 AssertCompile((VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC) == 3);
311 /* cmp reg1, 3 */
312 off = iemNativeEmitCmpGprWithImmEx(pCodeBuf, off, idxTmpReg1, VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC);
313 /* ja ReturnBreakFF */
314 off = iemNativeEmitJccTbExitEx(pReNative, pCodeBuf, off, kIemNativeLabelType_ReturnBreakFF, kIemNativeInstrCond_nbe);
315
316 /*
317 * Okay, we've only got pending IRQ related FFs: Can we dispatch IRQs?
318 *
319 * ASSUME that the shadow flags are cleared when they ought to be cleared,
320 * so we can skip the RIP check.
321 */
322 AssertCompile(CPUMCTX_INHIBIT_SHADOW < RT_BIT_32(31));
323 /* reg1 = efl & (IF | INHIBIT_SHADOW) */
324 off = iemNativeEmitGpr32EqGprAndImmEx(pCodeBuf, off, idxTmpReg1, idxEflReg, X86_EFL_IF | CPUMCTX_INHIBIT_SHADOW);
325 /* reg1 ^= IF */
326 off = iemNativeEmitXorGpr32ByImmEx(pCodeBuf, off, idxTmpReg1, X86_EFL_IF);
327
328# ifdef RT_ARCH_AMD64
329 /* jz ReturnBreakFF */
330 off = iemNativeEmitJccTbExitEx(pReNative, pCodeBuf, off, kIemNativeLabelType_ReturnBreakFF, kIemNativeInstrCond_e);
331
332# elif defined(RT_ARCH_ARM64)
333 /* cbz reg1, ReturnBreakFF */
334 off = iemNativeEmitTestIfGprIsZeroAndTbExitEx(pReNative, pCodeBuf, off, idxTmpReg1, false /*f64Bit*/,
335 kIemNativeLabelType_ReturnBreakFF);
336# else
337# error "port me"
338# endif
339 /*
340 * nothing_pending:
341 */
342 iemNativeFixupFixedJump(pReNative, offFixup1, off);
343 }
344
345 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
346
347 /*
348 * Cleanup.
349 */
350 iemNativeRegFreeTmp(pReNative, idxTmpReg1);
351 if (a_fCheckIrqs)
352 {
353 iemNativeRegFreeTmp(pReNative, idxTmpReg2);
354 iemNativeRegFreeTmp(pReNative, idxEflReg);
355 }
356 else
357 {
358 Assert(idxTmpReg2 == UINT8_MAX);
359 Assert(idxEflReg == UINT8_MAX);
360 }
361
362 return off;
363}
364
365
366/**
367 * Built-in function that checks for pending interrupts that can be delivered or
368 * forced action flags.
369 *
370 * This triggers after the completion of an instruction, so EIP is already at
371 * the next instruction. If an IRQ or important FF is pending, this will return
372 * a non-zero status that stops TB execution.
373 */
374IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckIrq)
375{
376 BODY_FLUSH_PENDING_WRITES();
377 off = iemNativeRecompFunc_BltIn_CheckTimersAndIrqsCommon<false, true>(pReNative, off);
378
379 /* Note down that we've been here, so we can skip FFs + IRQ checks when
380 doing direct linking. */
381#ifdef IEMNATIVE_WITH_LIVENESS_ANALYSIS
382 pReNative->idxLastCheckIrqCallNo = pReNative->idxCurCall;
383 RT_NOREF(pCallEntry);
384#else
385 pReNative->idxLastCheckIrqCallNo = pCallEntry - pReNative->pTbOrg->Thrd.paCalls;
386#endif
387
388 return off;
389}
390
391IEM_DECL_IEMNATIVELIVENESSFUNC_DEF(iemNativeLivenessFunc_BltIn_CheckIrq)
392{
393 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(pOutgoing, pIncoming);
394 IEM_LIVENESS_RAW_EFLAGS_ONE_INPUT(pOutgoing, fEflOther);
395 RT_NOREF(pCallEntry);
396}
397
398
399/**
400 * Built-in function that works the cIrqChecksTillNextPoll counter on direct TB
401 * linking, like loop-jumps.
402 */
403IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckTimers)
404{
405 BODY_FLUSH_PENDING_WRITES();
406 RT_NOREF(pCallEntry);
407 return iemNativeRecompFunc_BltIn_CheckTimersAndIrqsCommon<true, false>(pReNative, off);
408}
409
410IEM_DECL_IEMNATIVELIVENESSFUNC_DEF(iemNativeLivenessFunc_BltIn_CheckTimers)
411{
412 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(pOutgoing, pIncoming);
413 RT_NOREF(pCallEntry);
414}
415
416
417/**
418 * Combined BltIn_CheckTimers + BltIn_CheckIrq for direct linking.
419 */
420IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckTimersAndIrq)
421{
422 BODY_FLUSH_PENDING_WRITES();
423 RT_NOREF(pCallEntry);
424 return iemNativeRecompFunc_BltIn_CheckTimersAndIrqsCommon<true, true>(pReNative, off);
425}
426
427IEM_DECL_IEMNATIVELIVENESSFUNC_DEF(iemNativeLivenessFunc_BltIn_CheckTimersAndIrq)
428{
429 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(pOutgoing, pIncoming);
430 IEM_LIVENESS_RAW_EFLAGS_ONE_INPUT(pOutgoing, fEflOther);
431 RT_NOREF(pCallEntry);
432}
433
434
435/**
436 * Built-in function checks if IEMCPU::fExec has the expected value.
437 */
438IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckMode)
439{
440 uint32_t const fExpectedExec = (uint32_t)pCallEntry->auParams[0];
441 uint8_t const idxTmpReg = iemNativeRegAllocTmp(pReNative, &off);
442
443 off = iemNativeEmitLoadGprFromVCpuU32(pReNative, off, idxTmpReg, RT_UOFFSETOF(VMCPUCC, iem.s.fExec));
444 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxTmpReg, IEMTB_F_KEY_MASK);
445 off = iemNativeEmitTestIfGpr32NotEqualImmAndTbExit(pReNative, off, idxTmpReg, fExpectedExec & IEMTB_F_KEY_MASK,
446 kIemNativeLabelType_ReturnBreak);
447 iemNativeRegFreeTmp(pReNative, idxTmpReg);
448
449 /* Maintain the recompiler fExec state. */
450 pReNative->fExec = fExpectedExec & IEMTB_F_IEM_F_MASK;
451 return off;
452}
453
454IEM_DECL_IEMNATIVELIVENESSFUNC_DEF(iemNativeLivenessFunc_BltIn_CheckMode)
455{
456 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(pOutgoing, pIncoming);
457 RT_NOREF(pCallEntry);
458}
459
460
461/**
462 * Sets idxTbCurInstr in preparation of raising an exception or aborting the TB.
463 */
464/** @todo Optimize this, so we don't set the same value more than once. Just
465 * needs some tracking. */
466#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
467# define BODY_SET_CUR_INSTR() \
468 off = iemNativeEmitStoreImmToVCpuU8(pReNative, off, pCallEntry->idxInstr, RT_UOFFSETOF(VMCPUCC, iem.s.idxTbCurInstr))
469#else
470# define BODY_SET_CUR_INSTR() ((void)0)
471#endif
472
473
474/**
475 * Macro that emits the 16/32-bit CS.LIM check.
476 */
477#define BODY_CHECK_CS_LIM(a_cbInstr) \
478 off = iemNativeEmitBltInCheckCsLim(pReNative, off, (a_cbInstr))
479
480#define LIVENESS_CHECK_CS_LIM(a_pOutgoing) \
481 IEM_LIVENESS_RAW_SEG_LIMIT_INPUT(a_pOutgoing, X86_SREG_CS)
482
483DECL_FORCE_INLINE(uint32_t)
484iemNativeEmitBltInCheckCsLim(PIEMRECOMPILERSTATE pReNative, uint32_t off, uint8_t cbInstr)
485{
486 Assert(cbInstr > 0);
487 Assert(cbInstr < 16);
488#ifdef VBOX_STRICT
489 off = iemNativeEmitMarker(pReNative, off, 0x80000001);
490#endif
491
492#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
493 Assert(pReNative->Core.offPc == 0);
494#endif
495
496 /*
497 * We need CS.LIM and RIP here. When cbInstr is larger than 1, we also need
498 * a temporary register for calculating the last address of the instruction.
499 *
500 * The calculation and comparisons are 32-bit. We ASSUME that the incoming
501 * RIP isn't totally invalid, i.e. that any jump/call/ret/iret instruction
502 * that last updated EIP here checked it already, and that we're therefore
503 * safe in the 32-bit wrap-around scenario to only check that the last byte
504 * is within CS.LIM. In the case of instruction-by-instruction advancing
505 * up to a EIP wrap-around, we know that CS.LIM is 4G-1 because the limit
506 * must be using 4KB granularity and the previous instruction was fine.
507 */
508 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
509 kIemNativeGstRegUse_ReadOnly);
510 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_LIMIT(X86_SREG_CS),
511 kIemNativeGstRegUse_ReadOnly);
512#ifdef RT_ARCH_AMD64
513 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8);
514#elif defined(RT_ARCH_ARM64)
515 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
516#else
517# error "Port me"
518#endif
519
520 if (cbInstr != 1)
521 {
522 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
523
524 /*
525 * 1. idxRegTmp = idxRegPc + cbInstr;
526 * 2. if idxRegTmp > idxRegCsLim then raise #GP(0).
527 */
528#ifdef RT_ARCH_AMD64
529 /* 1. lea tmp32, [Pc + cbInstr - 1] */
530 if (idxRegTmp >= 8 || idxRegPc >= 8)
531 pbCodeBuf[off++] = (idxRegTmp < 8 ? 0 : X86_OP_REX_R) | (idxRegPc < 8 ? 0 : X86_OP_REX_B);
532 pbCodeBuf[off++] = 0x8d;
533 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, idxRegTmp & 7, idxRegPc & 7);
534 if ((idxRegPc & 7) == X86_GREG_xSP)
535 pbCodeBuf[off++] = X86_SIB_MAKE(idxRegPc & 7, 4 /*no index*/, 0);
536 pbCodeBuf[off++] = cbInstr - 1;
537
538 /* 2. cmp tmp32(r), CsLim(r/m). */
539 if (idxRegTmp >= 8 || idxRegCsLim >= 8)
540 pbCodeBuf[off++] = (idxRegTmp < 8 ? 0 : X86_OP_REX_R) | (idxRegCsLim < 8 ? 0 : X86_OP_REX_B);
541 pbCodeBuf[off++] = 0x3b;
542 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxRegTmp & 7, idxRegCsLim & 7);
543
544#elif defined(RT_ARCH_ARM64)
545 /* 1. add tmp32, Pc, #cbInstr-1 */
546 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubUImm12(false /*fSub*/, idxRegTmp, idxRegPc, cbInstr - 1, false /*f64Bit*/);
547 /* 2. cmp tmp32, CsLim */
548 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, ARMV8_A64_REG_XZR, idxRegTmp, idxRegCsLim,
549 false /*f64Bit*/, true /*fSetFlags*/);
550
551#endif
552 iemNativeRegFreeTmp(pReNative, idxRegTmp);
553 }
554 else
555 {
556 /*
557 * Here we can skip step 1 and compare PC and CS.LIM directly.
558 */
559#ifdef RT_ARCH_AMD64
560 /* 2. cmp eip(r), CsLim(r/m). */
561 if (idxRegPc >= 8 || idxRegCsLim >= 8)
562 pbCodeBuf[off++] = (idxRegPc < 8 ? 0 : X86_OP_REX_R) | (idxRegCsLim < 8 ? 0 : X86_OP_REX_B);
563 pbCodeBuf[off++] = 0x3b;
564 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_REG, idxRegPc & 7, idxRegCsLim & 7);
565
566#elif defined(RT_ARCH_ARM64)
567 /* 2. cmp Pc, CsLim */
568 pu32CodeBuf[off++] = Armv8A64MkInstrAddSubReg(true /*fSub*/, ARMV8_A64_REG_XZR, idxRegPc, idxRegCsLim,
569 false /*f64Bit*/, true /*fSetFlags*/);
570
571#endif
572 }
573
574 /* 3. Jump if greater. */
575 off = iemNativeEmitJaTbExit(pReNative, off, kIemNativeLabelType_RaiseGp0);
576
577 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
578 iemNativeRegFreeTmp(pReNative, idxRegPc);
579 return off;
580}
581
582
583/**
584 * Macro that considers whether we need CS.LIM checking after a branch or
585 * crossing over to a new page.
586 */
587#define BODY_CONSIDER_CS_LIM_CHECKING(a_pTb, a_cbInstr) \
588 RT_NOREF(a_cbInstr); \
589 off = iemNativeEmitBltInConsiderLimChecking(pReNative, off)
590
591#define LIVENESS_CONSIDER_CS_LIM_CHECKING(a_pOutgoing) \
592 IEM_LIVENESS_RAW_SEG_LIMIT_INPUT(a_pOutgoing, X86_SREG_CS); \
593 IEM_LIVENESS_RAW_SEG_BASE_INPUT(a_pOutgoing, X86_SREG_CS)
594
595DECL_FORCE_INLINE(uint32_t)
596iemNativeEmitBltInConsiderLimChecking(PIEMRECOMPILERSTATE pReNative, uint32_t off)
597{
598#ifdef VBOX_STRICT
599 off = iemNativeEmitMarker(pReNative, off, 0x80000002);
600#endif
601
602#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
603 Assert(pReNative->Core.offPc == 0);
604#endif
605
606 /*
607 * This check must match the ones in the iem in iemGetTbFlagsForCurrentPc
608 * exactly:
609 *
610 * int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
611 * if (offFromLim >= X86_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
612 * return fRet;
613 * return fRet | IEMTB_F_CS_LIM_CHECKS;
614 *
615 *
616 * We need EIP, CS.LIM and CS.BASE here.
617 */
618
619 /* Calculate the offFromLim first: */
620 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
621 kIemNativeGstRegUse_ReadOnly);
622 uint8_t const idxRegCsLim = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_LIMIT(X86_SREG_CS),
623 kIemNativeGstRegUse_ReadOnly);
624 uint8_t const idxRegLeft = iemNativeRegAllocTmp(pReNative, &off);
625
626#ifdef RT_ARCH_ARM64
627 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
628 pu32CodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegLeft, idxRegCsLim, idxRegPc);
629 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
630#else
631 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegLeft, idxRegCsLim);
632 off = iemNativeEmitSubTwoGprs(pReNative, off, idxRegLeft, idxRegPc);
633#endif
634
635 iemNativeRegFreeTmp(pReNative, idxRegCsLim);
636 iemNativeRegFreeTmp(pReNative, idxRegPc);
637
638 /* Calculate the threshold level (right side). */
639 uint8_t const idxRegCsBase = iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_BASE(X86_SREG_CS),
640 kIemNativeGstRegUse_ReadOnly);
641 uint8_t const idxRegRight = iemNativeRegAllocTmp(pReNative, &off);
642
643#ifdef RT_ARCH_ARM64
644 pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 4);
645 Assert(Armv8A64ConvertImmRImmS2Mask32(11, 0) == GUEST_PAGE_OFFSET_MASK);
646 pu32CodeBuf[off++] = Armv8A64MkInstrAndImm(idxRegRight, idxRegCsBase, 11, 0, false /*f64Bit*/);
647 pu32CodeBuf[off++] = Armv8A64MkInstrNeg(idxRegRight);
648 pu32CodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegRight, idxRegRight, (X86_PAGE_SIZE + 16) / 2);
649 pu32CodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegRight, idxRegRight, (X86_PAGE_SIZE + 16) / 2);
650 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
651
652#else
653 off = iemNativeEmitLoadGprImm32(pReNative, off, idxRegRight, GUEST_PAGE_OFFSET_MASK);
654 off = iemNativeEmitAndGpr32ByGpr32(pReNative, off, idxRegRight, idxRegCsBase);
655 off = iemNativeEmitNegGpr(pReNative, off, idxRegRight);
656 off = iemNativeEmitAddGprImm(pReNative, off, idxRegRight, X86_PAGE_SIZE + 16);
657#endif
658
659 iemNativeRegFreeTmp(pReNative, idxRegCsBase);
660
661 /* Compare the two and jump out if we're too close to the limit. */
662 off = iemNativeEmitCmpGprWithGpr(pReNative, off, idxRegLeft, idxRegRight);
663 off = iemNativeEmitJlTbExit(pReNative, off, kIemNativeLabelType_NeedCsLimChecking);
664
665 iemNativeRegFreeTmp(pReNative, idxRegRight);
666 iemNativeRegFreeTmp(pReNative, idxRegLeft);
667 return off;
668}
669
670
671
672/**
673 * Macro that implements opcode (re-)checking.
674 */
675#define BODY_CHECK_OPCODES(a_pTb, a_idxRange, a_offRange, a_cbInstr) \
676 RT_NOREF(a_cbInstr); \
677 off = iemNativeEmitBltInCheckOpcodes(pReNative, off, (a_pTb), (a_idxRange), (a_offRange))
678
679#define LIVENESS_CHECK_OPCODES(a_pOutgoing) ((void)0)
680
681#if 0 /* debugging aid */
682bool g_fBpOnObsoletion = false;
683# define BP_ON_OBSOLETION g_fBpOnObsoletion
684#else
685# define BP_ON_OBSOLETION 0
686#endif
687
688DECL_FORCE_INLINE(uint32_t)
689iemNativeEmitBltInCheckOpcodes(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTB pTb, uint8_t idxRange, uint16_t offRange)
690{
691 Assert(idxRange < pTb->cRanges && pTb->cRanges <= RT_ELEMENTS(pTb->aRanges));
692 Assert(offRange < pTb->aRanges[idxRange].cbOpcodes);
693#ifdef VBOX_STRICT
694 off = iemNativeEmitMarker(pReNative, off, 0x80000003);
695#endif
696
697 /*
698 * Where to start and how much to compare.
699 *
700 * Looking at the ranges produced when r160746 was running a DOS VM with TB
701 * logging, the ranges can be anything from 1 byte to at least 0x197 bytes,
702 * with the 6, 5, 4, 7, 8, 40, 3, 2, 9 and 10 being the top 10 in the sample.
703 *
704 * The top 10 for the early boot phase of a 64-bit debian 9.4 VM: 5, 9, 8,
705 * 12, 10, 11, 6, 13, 15 and 16. Max 0x359 bytes. Same revision as above.
706 */
707 uint16_t offPage = pTb->aRanges[idxRange].offPhysPage + offRange;
708 uint16_t cbLeft = pTb->aRanges[idxRange].cbOpcodes - offRange;
709 Assert(cbLeft > 0);
710 uint8_t const *pbOpcodes = &pTb->pabOpcodes[pTb->aRanges[idxRange].offOpcodes + offRange];
711 uint32_t offConsolidatedJump = UINT32_MAX;
712
713#ifdef RT_ARCH_AMD64
714 /* AMD64/x86 offers a bunch of options. Smaller stuff will can be
715 completely inlined, for larger we use REPE CMPS. */
716# define CHECK_OPCODES_CMP_IMMXX(a_idxReg, a_bOpcode) /* cost: 3 bytes */ do { \
717 pbCodeBuf[off++] = a_bOpcode; \
718 Assert(offPage < 127); \
719 pbCodeBuf[off++] = X86_MODRM_MAKE(X86_MOD_MEM1, 7, a_idxReg); \
720 pbCodeBuf[off++] = RT_BYTE1(offPage); \
721 } while (0)
722
723# ifdef IEMNATIVE_WITH_RECOMPILER_PER_CHUNK_TAIL_CODE
724# define NEAR_JMP_SIZE 5
725# else
726# define NEAR_JMP_SIZE 6
727# endif
728
729# define CHECK_OPCODES_CMP_JMP() /* cost: 7 bytes first time, then 2 bytes */ do { \
730 if (offConsolidatedJump != UINT32_MAX) \
731 { \
732 int32_t const offDisp = (int32_t)offConsolidatedJump - (int32_t)(off + 2); \
733 Assert(offDisp >= -128); \
734 pbCodeBuf[off++] = 0x75; /* jnz near */ \
735 pbCodeBuf[off++] = (uint8_t)offDisp; \
736 } \
737 else \
738 { \
739 pbCodeBuf[off++] = 0x74; /* jz near +NEAR_JMP_SIZE */ \
740 pbCodeBuf[off++] = NEAR_JMP_SIZE + BP_ON_OBSOLETION; \
741 offConsolidatedJump = off; \
742 if (BP_ON_OBSOLETION) pbCodeBuf[off++] = 0xcc; \
743 off = iemNativeEmitTbExitEx(pReNative, pbCodeBuf, off, kIemNativeLabelType_ObsoleteTb); \
744 } \
745 } while (0)
746
747# define CHECK_OPCODES_CMP_IMM32(a_idxReg) /* cost: 3+4+2 = 9 */ do { \
748 CHECK_OPCODES_CMP_IMMXX(a_idxReg, 0x81); \
749 pbCodeBuf[off++] = *pbOpcodes++; \
750 pbCodeBuf[off++] = *pbOpcodes++; \
751 pbCodeBuf[off++] = *pbOpcodes++; \
752 pbCodeBuf[off++] = *pbOpcodes++; \
753 cbLeft -= 4; \
754 offPage += 4; \
755 CHECK_OPCODES_CMP_JMP(); \
756 } while (0)
757
758# define CHECK_OPCODES_CMP_IMM16(a_idxReg) /* cost: 1+3+2+2 = 8 */ do { \
759 pbCodeBuf[off++] = X86_OP_PRF_SIZE_OP; \
760 CHECK_OPCODES_CMP_IMMXX(a_idxReg, 0x81); \
761 pbCodeBuf[off++] = *pbOpcodes++; \
762 pbCodeBuf[off++] = *pbOpcodes++; \
763 cbLeft -= 2; \
764 offPage += 2; \
765 CHECK_OPCODES_CMP_JMP(); \
766 } while (0)
767
768# define CHECK_OPCODES_CMP_IMM8(a_idxReg) /* cost: 3+1+2 = 6 */ do { \
769 CHECK_OPCODES_CMP_IMMXX(a_idxReg, 0x80); \
770 pbCodeBuf[off++] = *pbOpcodes++; \
771 cbLeft -= 1; \
772 offPage += 1; \
773 CHECK_OPCODES_CMP_JMP(); \
774 } while (0)
775
776# define CHECK_OPCODES_CMPSX(a_bOpcode, a_cbToSubtract, a_bPrefix) /* cost: 2+2 = 4 */ do { \
777 if (a_bPrefix) \
778 pbCodeBuf[off++] = (a_bPrefix); \
779 pbCodeBuf[off++] = (a_bOpcode); \
780 CHECK_OPCODES_CMP_JMP(); \
781 cbLeft -= (a_cbToSubtract); \
782 } while (0)
783
784# define CHECK_OPCODES_ECX_IMM(a_uValue) /* cost: 5 */ do { \
785 pbCodeBuf[off++] = 0xb8 + X86_GREG_xCX; \
786 pbCodeBuf[off++] = RT_BYTE1(a_uValue); \
787 pbCodeBuf[off++] = RT_BYTE2(a_uValue); \
788 pbCodeBuf[off++] = RT_BYTE3(a_uValue); \
789 pbCodeBuf[off++] = RT_BYTE4(a_uValue); \
790 } while (0)
791
792 if (cbLeft <= 24)
793 {
794 uint8_t const idxRegTmp = iemNativeRegAllocTmpEx(pReNative, &off,
795 ( RT_BIT_32(X86_GREG_xAX)
796 | RT_BIT_32(X86_GREG_xCX)
797 | RT_BIT_32(X86_GREG_xDX)
798 | RT_BIT_32(X86_GREG_xBX)
799 | RT_BIT_32(X86_GREG_xSI)
800 | RT_BIT_32(X86_GREG_xDI))
801 & ~IEMNATIVE_REG_FIXED_MASK); /* pick reg not requiring rex prefix */
802 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.pbInstrBuf));
803 if (offPage >= 128 - cbLeft)
804 {
805 off = iemNativeEmitAddGprImm(pReNative, off, idxRegTmp, offPage & ~(uint16_t)3);
806 offPage &= 3;
807 }
808
809 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6 + 14 + 54 + 8 + 6 + BP_ON_OBSOLETION /* = 88 */);
810
811 if (cbLeft > 8)
812 switch (offPage & 3)
813 {
814 case 0:
815 break;
816 case 1: /* cost: 6 + 8 = 14 */
817 CHECK_OPCODES_CMP_IMM8(idxRegTmp);
818 RT_FALL_THRU();
819 case 2: /* cost: 8 */
820 CHECK_OPCODES_CMP_IMM16(idxRegTmp);
821 break;
822 case 3: /* cost: 6 */
823 CHECK_OPCODES_CMP_IMM8(idxRegTmp);
824 break;
825 }
826
827 while (cbLeft >= 4)
828 CHECK_OPCODES_CMP_IMM32(idxRegTmp); /* max iteration: 24/4 = 6; --> cost: 6 * 9 = 54 */
829
830 if (cbLeft >= 2)
831 CHECK_OPCODES_CMP_IMM16(idxRegTmp); /* cost: 8 */
832 if (cbLeft)
833 CHECK_OPCODES_CMP_IMM8(idxRegTmp); /* cost: 6 */
834
835 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
836 iemNativeRegFreeTmp(pReNative, idxRegTmp);
837 }
838 else
839 {
840 /* RDI = &pbInstrBuf[offPage] */
841 uint8_t const idxRegDi = iemNativeRegAllocTmpEx(pReNative, &off, RT_BIT_32(X86_GREG_xDI));
842 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxRegDi, RT_UOFFSETOF(VMCPU, iem.s.pbInstrBuf));
843 if (offPage != 0)
844 off = iemNativeEmitAddGprImm(pReNative, off, idxRegDi, offPage);
845
846 /* RSI = pbOpcodes */
847 uint8_t const idxRegSi = iemNativeRegAllocTmpEx(pReNative, &off, RT_BIT_32(X86_GREG_xSI));
848 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegSi, (uintptr_t)pbOpcodes);
849
850 /* RCX = counts. */
851 uint8_t const idxRegCx = iemNativeRegAllocTmpEx(pReNative, &off, RT_BIT_32(X86_GREG_xCX));
852
853 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 6 + 10 + 5 + 5 + 3 + 4 + 3 + BP_ON_OBSOLETION /*= 36*/);
854
855 /** @todo profile and optimize this further. Maybe an idea to align by
856 * offPage if the two cannot be reconsidled. */
857 /* Align by the page offset, so that at least one of the accesses are naturally aligned. */
858 switch (offPage & 7) /* max cost: 10 */
859 {
860 case 0:
861 break;
862 case 1: /* cost: 3+4+3 = 10 */
863 CHECK_OPCODES_CMPSX(0xa6, 1, 0);
864 RT_FALL_THRU();
865 case 2: /* cost: 4+3 = 7 */
866 CHECK_OPCODES_CMPSX(0xa7, 2, X86_OP_PRF_SIZE_OP);
867 CHECK_OPCODES_CMPSX(0xa7, 4, 0);
868 break;
869 case 3: /* cost: 3+3 = 6 */
870 CHECK_OPCODES_CMPSX(0xa6, 1, 0);
871 RT_FALL_THRU();
872 case 4: /* cost: 3 */
873 CHECK_OPCODES_CMPSX(0xa7, 4, 0);
874 break;
875 case 5: /* cost: 3+4 = 7 */
876 CHECK_OPCODES_CMPSX(0xa6, 1, 0);
877 RT_FALL_THRU();
878 case 6: /* cost: 4 */
879 CHECK_OPCODES_CMPSX(0xa7, 2, X86_OP_PRF_SIZE_OP);
880 break;
881 case 7: /* cost: 3 */
882 CHECK_OPCODES_CMPSX(0xa6, 1, 0);
883 break;
884 }
885
886 /* Compare qwords: */
887 uint32_t const cQWords = cbLeft >> 3;
888 CHECK_OPCODES_ECX_IMM(cQWords); /* cost: 5 */
889
890 pbCodeBuf[off++] = X86_OP_PRF_REPZ; /* cost: 5 */
891 CHECK_OPCODES_CMPSX(0xa7, 0, X86_OP_REX_W);
892 cbLeft &= 7;
893
894 if (cbLeft & 4)
895 CHECK_OPCODES_CMPSX(0xa7, 4, 0); /* cost: 3 */
896 if (cbLeft & 2)
897 CHECK_OPCODES_CMPSX(0xa7, 2, X86_OP_PRF_SIZE_OP); /* cost: 4 */
898 if (cbLeft & 1)
899 CHECK_OPCODES_CMPSX(0xa6, 1, 0); /* cost: 3 */
900
901 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
902 iemNativeRegFreeTmp(pReNative, idxRegCx);
903 iemNativeRegFreeTmp(pReNative, idxRegSi);
904 iemNativeRegFreeTmp(pReNative, idxRegDi);
905 }
906
907#elif defined(RT_ARCH_ARM64)
908 /* We need pbInstrBuf in a register, whatever we do. */
909 uint8_t const idxRegSrc1Ptr = iemNativeRegAllocTmp(pReNative, &off);
910 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxRegSrc1Ptr, RT_UOFFSETOF(VMCPU, iem.s.pbInstrBuf));
911
912 /* We also need at least one more register for holding bytes & words we
913 load via pbInstrBuf. */
914 uint8_t const idxRegSrc1Val = iemNativeRegAllocTmp(pReNative, &off);
915
916 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 64);
917
918 /* One byte compare can be done with the opcode byte as an immediate. We'll
919 do this to uint16_t align src1. */
920 bool fPendingJmp = RT_BOOL(offPage & 1);
921 if (fPendingJmp)
922 {
923 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Byte, idxRegSrc1Val, idxRegSrc1Ptr, offPage);
924 pu32CodeBuf[off++] = Armv8A64MkInstrCmpUImm12(idxRegSrc1Val, *pbOpcodes++, false /*f64Bit*/);
925 offPage += 1;
926 cbLeft -= 1;
927 }
928
929 if (cbLeft > 0)
930 {
931 /* We need a register for holding the opcode bytes we're comparing with,
932 as CCMP only has a 5-bit immediate form and thus cannot hold bytes. */
933 uint8_t const idxRegSrc2Val = iemNativeRegAllocTmp(pReNative, &off);
934
935 /* Word (uint32_t) aligning the src1 pointer is best done using a 16-bit constant load. */
936 if ((offPage & 3) && cbLeft >= 2)
937 {
938 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Half, idxRegSrc1Val, idxRegSrc1Ptr, offPage / 2);
939 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegSrc2Val, RT_MAKE_U16(pbOpcodes[0], pbOpcodes[1]));
940 if (fPendingJmp)
941 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
942 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq, false /*f64Bit*/);
943 else
944 {
945 pu32CodeBuf[off++] = Armv8A64MkInstrCmpReg(idxRegSrc1Val, idxRegSrc2Val, false /*f64Bit*/);
946 fPendingJmp = true;
947 }
948 pbOpcodes += 2;
949 offPage += 2;
950 cbLeft -= 2;
951 }
952
953 /* DWord (uint64_t) aligning the src2 pointer. We use a 32-bit constant here for simplicitly. */
954 if ((offPage & 7) && cbLeft >= 4)
955 {
956 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxRegSrc1Val, idxRegSrc1Ptr, offPage / 4);
957 off = iemNativeEmitLoadGpr32ImmEx(pu32CodeBuf, off, idxRegSrc2Val,
958 RT_MAKE_U32_FROM_MSB_U8(pbOpcodes[3], pbOpcodes[2], pbOpcodes[1], pbOpcodes[0]));
959 if (fPendingJmp)
960 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
961 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq, false /*f64Bit*/);
962 else
963 {
964 pu32CodeBuf[off++] = Armv8A64MkInstrCmpReg(idxRegSrc1Val, idxRegSrc2Val, false /*f64Bit*/);
965 fPendingJmp = true;
966 }
967 pbOpcodes += 4;
968 offPage += 4;
969 cbLeft -= 4;
970 }
971
972 /*
973 * If we've got 16 bytes or more left, switch to memcmp-style.
974 */
975 if (cbLeft >= 16)
976 {
977 /* We need a pointer to the copy of the original opcode bytes. */
978 uint8_t const idxRegSrc2Ptr = iemNativeRegAllocTmp(pReNative, &off);
979 off = iemNativeEmitLoadGprImmEx(pu32CodeBuf, off, idxRegSrc2Ptr, (uintptr_t)pbOpcodes);
980
981 /* If there are more than 32 bytes to compare we create a loop, for
982 which we'll need a loop register. */
983 if (cbLeft >= 64)
984 {
985 if (fPendingJmp)
986 {
987 off = iemNativeEmitJccTbExitEx(pReNative, pu32CodeBuf, off, kIemNativeLabelType_ObsoleteTb,
988 kArmv8InstrCond_Ne);
989 fPendingJmp = false;
990 }
991
992 uint8_t const idxRegLoop = iemNativeRegAllocTmp(pReNative, &off);
993 uint16_t const cLoops = cbLeft / 32;
994 cbLeft = cbLeft % 32;
995 pbOpcodes += cLoops * 32;
996 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegLoop, cLoops);
997
998 if (offPage != 0) /** @todo optimize out this instruction. */
999 {
1000 pu32CodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegSrc1Ptr, idxRegSrc1Ptr, offPage);
1001 offPage = 0;
1002 }
1003
1004 uint32_t const offLoopStart = off;
1005 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc1Val, idxRegSrc1Ptr, 0);
1006 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc2Val, idxRegSrc2Ptr, 0);
1007 pu32CodeBuf[off++] = Armv8A64MkInstrCmpReg(idxRegSrc1Val, idxRegSrc2Val);
1008
1009 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc1Val, idxRegSrc1Ptr, 1);
1010 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc2Val, idxRegSrc2Ptr, 1);
1011 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
1012 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq);
1013
1014 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc1Val, idxRegSrc1Ptr, 2);
1015 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc2Val, idxRegSrc2Ptr, 2);
1016 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
1017 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq);
1018
1019 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc1Val, idxRegSrc1Ptr, 3);
1020 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc2Val, idxRegSrc2Ptr, 3);
1021 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
1022 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq);
1023
1024 off = iemNativeEmitJccTbExitEx(pReNative, pu32CodeBuf, off, kIemNativeLabelType_ObsoleteTb,
1025 kArmv8InstrCond_Ne);
1026
1027 /* Advance and loop. */
1028 pu32CodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegSrc1Ptr, idxRegSrc1Ptr, 0x20);
1029 pu32CodeBuf[off++] = Armv8A64MkInstrAddUImm12(idxRegSrc2Ptr, idxRegSrc2Ptr, 0x20);
1030 pu32CodeBuf[off++] = Armv8A64MkInstrSubUImm12(idxRegLoop, idxRegLoop, 1, false /*f64Bit*/, true /*fSetFlags*/);
1031 pu32CodeBuf[off] = Armv8A64MkInstrBCond(kArmv8InstrCond_Ne, (int32_t)offLoopStart - (int32_t)off);
1032 off++;
1033
1034 iemNativeRegFreeTmp(pReNative, idxRegLoop);
1035 }
1036
1037 /* Deal with any remaining dwords (uint64_t). There can be up to
1038 three if we looped and four if we didn't. */
1039 uint32_t offSrc2 = 0;
1040 while (cbLeft >= 8)
1041 {
1042 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc1Val,
1043 idxRegSrc1Ptr, offPage / 8);
1044 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc2Val,
1045 idxRegSrc2Ptr, offSrc2 / 8);
1046 if (fPendingJmp)
1047 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
1048 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq);
1049 else
1050 {
1051 pu32CodeBuf[off++] = Armv8A64MkInstrCmpReg(idxRegSrc1Val, idxRegSrc2Val);
1052 fPendingJmp = true;
1053 }
1054 pbOpcodes += 8;
1055 offPage += 8;
1056 offSrc2 += 8;
1057 cbLeft -= 8;
1058 }
1059
1060 iemNativeRegFreeTmp(pReNative, idxRegSrc2Ptr);
1061 /* max cost thus far: memcmp-loop=43 vs memcmp-no-loop=30 */
1062 }
1063 /*
1064 * Otherwise, we compare with constants and merge with the general mop-up.
1065 */
1066 else
1067 {
1068 while (cbLeft >= 8)
1069 {
1070 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Dword, idxRegSrc1Val, idxRegSrc1Ptr,
1071 offPage / 8);
1072 off = iemNativeEmitLoadGprImmEx(pu32CodeBuf, off, idxRegSrc2Val,
1073 RT_MAKE_U64_FROM_MSB_U8(pbOpcodes[7], pbOpcodes[6], pbOpcodes[5], pbOpcodes[4],
1074 pbOpcodes[3], pbOpcodes[2], pbOpcodes[1], pbOpcodes[0]));
1075 if (fPendingJmp)
1076 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
1077 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq, true /*f64Bit*/);
1078 else
1079 {
1080 pu32CodeBuf[off++] = Armv8A64MkInstrCmpReg(idxRegSrc1Val, idxRegSrc2Val, true /*f64Bit*/);
1081 fPendingJmp = true;
1082 }
1083 pbOpcodes += 8;
1084 offPage += 8;
1085 cbLeft -= 8;
1086 }
1087 /* max cost thus far: 21 */
1088 }
1089
1090 /* Deal with any remaining bytes (7 or less). */
1091 Assert(cbLeft < 8);
1092 if (cbLeft >= 4)
1093 {
1094 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Word, idxRegSrc1Val, idxRegSrc1Ptr,
1095 offPage / 4);
1096 off = iemNativeEmitLoadGpr32ImmEx(pu32CodeBuf, off, idxRegSrc2Val,
1097 RT_MAKE_U32_FROM_MSB_U8(pbOpcodes[3], pbOpcodes[2], pbOpcodes[1], pbOpcodes[0]));
1098 if (fPendingJmp)
1099 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
1100 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq, false /*f64Bit*/);
1101 else
1102 {
1103 pu32CodeBuf[off++] = Armv8A64MkInstrCmpReg(idxRegSrc1Val, idxRegSrc2Val, false /*f64Bit*/);
1104 fPendingJmp = true;
1105 }
1106 pbOpcodes += 4;
1107 offPage += 4;
1108 cbLeft -= 4;
1109
1110 }
1111
1112 if (cbLeft >= 2)
1113 {
1114 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Half, idxRegSrc1Val, idxRegSrc1Ptr,
1115 offPage / 2);
1116 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegSrc2Val, RT_MAKE_U16(pbOpcodes[0], pbOpcodes[1]));
1117 if (fPendingJmp)
1118 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
1119 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq, false /*f64Bit*/);
1120 else
1121 {
1122 pu32CodeBuf[off++] = Armv8A64MkInstrCmpReg(idxRegSrc1Val, idxRegSrc2Val, false /*f64Bit*/);
1123 fPendingJmp = true;
1124 }
1125 pbOpcodes += 2;
1126 offPage += 2;
1127 cbLeft -= 2;
1128 }
1129
1130 if (cbLeft > 0)
1131 {
1132 Assert(cbLeft == 1);
1133 pu32CodeBuf[off++] = Armv8A64MkInstrStLdRUOff(kArmv8A64InstrLdStType_Ld_Byte, idxRegSrc1Val, idxRegSrc1Ptr, offPage);
1134 if (fPendingJmp)
1135 {
1136 pu32CodeBuf[off++] = Armv8A64MkInstrMovZ(idxRegSrc2Val, pbOpcodes[0]);
1137 pu32CodeBuf[off++] = Armv8A64MkInstrCCmpReg(idxRegSrc1Val, idxRegSrc2Val,
1138 ARMA64_NZCV_F_N0_Z0_C0_V0, kArmv8InstrCond_Eq, false /*f64Bit*/);
1139 }
1140 else
1141 {
1142 pu32CodeBuf[off++] = Armv8A64MkInstrCmpUImm12(idxRegSrc1Val, pbOpcodes[0], false /*f64Bit*/);
1143 fPendingJmp = true;
1144 }
1145 pbOpcodes += 1;
1146 offPage += 1;
1147 cbLeft -= 1;
1148 }
1149
1150 iemNativeRegFreeTmp(pReNative, idxRegSrc2Val);
1151 }
1152 Assert(cbLeft == 0);
1153
1154 /*
1155 * Finally, the branch on difference.
1156 */
1157 if (fPendingJmp)
1158 off = iemNativeEmitJnzTbExit(pReNative, off, kIemNativeLabelType_ObsoleteTb);
1159
1160 RT_NOREF(pu32CodeBuf, cbLeft, offPage, pbOpcodes, offConsolidatedJump);
1161
1162 /* max costs: memcmp-loop=54; memcmp-no-loop=41; only-src1-ptr=32 */
1163 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1164 iemNativeRegFreeTmp(pReNative, idxRegSrc1Val);
1165 iemNativeRegFreeTmp(pReNative, idxRegSrc1Ptr);
1166
1167#else
1168# error "Port me"
1169#endif
1170 return off;
1171}
1172
1173
1174
1175/**
1176 * Macro that implements PC check after a conditional branch.
1177 */
1178#define BODY_CHECK_PC_AFTER_BRANCH(a_pTb, a_idxRange, a_offRange, a_cbInstr) \
1179 RT_NOREF(a_cbInstr); \
1180 off = iemNativeEmitBltInCheckPcAfterBranch(pReNative, off, a_pTb, a_idxRange, a_offRange)
1181
1182#define LIVENESS_CHECK_PC_AFTER_BRANCH(a_pOutgoing, a_pCallEntry) \
1183 if (!IEM_F_MODE_X86_IS_FLAT((uint32_t)(a_pCallEntry)->auParams[0] >> 8)) \
1184 IEM_LIVENESS_RAW_SEG_BASE_INPUT(a_pOutgoing, X86_SREG_CS); \
1185 else do { } while (0)
1186
1187DECL_FORCE_INLINE(uint32_t)
1188iemNativeEmitBltInCheckPcAfterBranch(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTB pTb,
1189 uint8_t idxRange, uint16_t offRange)
1190{
1191#ifdef VBOX_STRICT
1192 off = iemNativeEmitMarker(pReNative, off, 0x80000004);
1193#endif
1194
1195#ifdef IEMNATIVE_WITH_DELAYED_PC_UPDATING
1196 Assert(pReNative->Core.offPc == 0);
1197#endif
1198
1199 /*
1200 * The GCPhysRangePageWithOffset value in the threaded function is a fixed
1201 * constant for us here.
1202 *
1203 * We can pretend that iem.s.cbInstrBufTotal is X86_PAGE_SIZE here, because
1204 * it serves no purpose as a CS.LIM, if that's needed we've just performed
1205 * it, and as long as we don't implement code TLB reload code here there is
1206 * no point in checking that the TLB data we're using is still valid.
1207 *
1208 * What we to do is.
1209 * 1. Calculate the FLAT PC (RIP + CS.BASE).
1210 * 2. Subtract iem.s.uInstrBufPc from it and getting 'off'.
1211 * 3. The 'off' must be less than X86_PAGE_SIZE/cbInstrBufTotal or
1212 * we're in the wrong spot and need to find a new TB.
1213 * 4. Add 'off' to iem.s.GCPhysInstrBuf and compare with the
1214 * GCPhysRangePageWithOffset constant mentioned above.
1215 *
1216 * The adding of CS.BASE to RIP can be skipped in the first step if we're
1217 * in 64-bit code or flat 32-bit.
1218 */
1219
1220 /* Allocate registers for step 1. Get the shadowed stuff before allocating
1221 the temp register, so we don't accidentally clobber something we'll be
1222 needing again immediately. This is why we get idxRegCsBase here. */
1223 uint8_t const idxRegPc = iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1224 kIemNativeGstRegUse_ReadOnly);
1225 uint8_t const idxRegCsBase = IEM_F_MODE_X86_IS_FLAT(pReNative->fExec) ? UINT8_MAX
1226 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_BASE(X86_SREG_CS),
1227 kIemNativeGstRegUse_ReadOnly);
1228
1229 uint8_t const idxRegTmp = iemNativeRegAllocTmp(pReNative, &off);
1230
1231#ifdef VBOX_STRICT
1232 /* Do assertions before idxRegTmp contains anything. */
1233 Assert(RT_SIZEOFMEMB(VMCPUCC, iem.s.cbInstrBufTotal) == sizeof(uint16_t));
1234# ifdef RT_ARCH_AMD64
1235 {
1236 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8+2+1 + 11+2+1);
1237 /* Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_F_MODE_X86_IS_FLAT(pReNative->fExec)); */
1238 if (IEM_F_MODE_X86_IS_FLAT(pReNative->fExec))
1239 {
1240 /* cmp r/m64, imm8 */
1241 pbCodeBuf[off++] = X86_OP_REX_W;
1242 pbCodeBuf[off++] = 0x83;
1243 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, 7, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.cs.u64Base));
1244 pbCodeBuf[off++] = 0;
1245 /* je rel8 */
1246 pbCodeBuf[off++] = 0x74;
1247 pbCodeBuf[off++] = 1;
1248 /* int3 */
1249 pbCodeBuf[off++] = 0xcc;
1250
1251 }
1252
1253 /* Assert(!(pVCpu->iem.s.GCPhysInstrBuf & X86_PAGE_OFFSET_MASK)); - done later by the non-x86 code */
1254 /* test r/m64, imm32 */
1255 pbCodeBuf[off++] = X86_OP_REX_W;
1256 pbCodeBuf[off++] = 0xf7;
1257 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, 0, RT_UOFFSETOF(VMCPUCC, iem.s.GCPhysInstrBuf));
1258 pbCodeBuf[off++] = RT_BYTE1(X86_PAGE_OFFSET_MASK);
1259 pbCodeBuf[off++] = RT_BYTE2(X86_PAGE_OFFSET_MASK);
1260 pbCodeBuf[off++] = RT_BYTE3(X86_PAGE_OFFSET_MASK);
1261 pbCodeBuf[off++] = RT_BYTE4(X86_PAGE_OFFSET_MASK);
1262 /* jz rel8 */
1263 pbCodeBuf[off++] = 0x74;
1264 pbCodeBuf[off++] = 1;
1265 /* int3 */
1266 pbCodeBuf[off++] = 0xcc;
1267 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1268 }
1269# else
1270
1271 /* Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_F_MODE_X86_IS_FLAT(pReNative->fExec)); */
1272 if (IEM_F_MODE_X86_IS_FLAT(pReNative->fExec))
1273 {
1274 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.cs.u64Base));
1275# ifdef RT_ARCH_ARM64
1276 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1277 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, idxRegTmp);
1278 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(0x2004);
1279 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1280# else
1281# error "Port me!"
1282# endif
1283 }
1284# endif
1285
1286#endif /* VBOX_STRICT */
1287
1288 /* 1+2. Calculate 'off' first (into idxRegTmp). */
1289 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.uInstrBufPc));
1290 if (IEM_F_MODE_X86_IS_FLAT(pReNative->fExec))
1291 {
1292#ifdef RT_ARCH_ARM64
1293 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1294 pu32CodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegTmp, idxRegPc, idxRegTmp);
1295 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1296#else
1297 off = iemNativeEmitNegGpr(pReNative, off, idxRegTmp);
1298 off = iemNativeEmitAddTwoGprs(pReNative, off, idxRegTmp, idxRegPc);
1299#endif
1300 }
1301 else
1302 {
1303#ifdef RT_ARCH_ARM64
1304 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1305 pu32CodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegTmp, idxRegCsBase, idxRegTmp);
1306 pu32CodeBuf[off++] = Armv8A64MkInstrAddReg(idxRegTmp, idxRegTmp, idxRegPc);
1307 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1308#else
1309 off = iemNativeEmitNegGpr(pReNative, off, idxRegTmp);
1310 off = iemNativeEmitAddTwoGprs(pReNative, off, idxRegTmp, idxRegCsBase);
1311 off = iemNativeEmitAddTwoGprs(pReNative, off, idxRegTmp, idxRegPc);
1312#endif
1313 iemNativeRegFreeTmp(pReNative, idxRegCsBase);
1314 }
1315 iemNativeRegFreeTmp(pReNative, idxRegPc);
1316
1317 /* 3. Check that off is less than X86_PAGE_SIZE/cbInstrBufTotal. */
1318 off = iemNativeEmitCmpGprWithImm(pReNative, off, idxRegTmp, X86_PAGE_SIZE - 1);
1319 off = iemNativeEmitJaTbExit(pReNative, off, kIemNativeLabelType_CheckBranchMiss);
1320
1321 /* 4. Add iem.s.GCPhysInstrBuf and compare with GCPhysRangePageWithOffset. */
1322#ifdef RT_ARCH_AMD64
1323 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
1324 pbCodeBuf[off++] = idxRegTmp < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R;
1325 pbCodeBuf[off++] = 0x03; /* add r64, r/m64 */
1326 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.GCPhysInstrBuf));
1327 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1328
1329#elif defined(RT_ARCH_ARM64)
1330 uint8_t const idxRegTmp2 = iemNativeRegAllocTmp(pReNative, &off);
1331
1332 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxRegTmp2, RT_UOFFSETOF(VMCPUCC, iem.s.GCPhysInstrBuf));
1333 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1334 pu32CodeBuf[off++] = Armv8A64MkInstrAddReg(idxRegTmp, idxRegTmp, idxRegTmp2);
1335 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1336
1337# ifdef VBOX_STRICT /* Assert(!(pVCpu->iem.s.GCPhysInstrBuf & X86_PAGE_OFFSET_MASK)); */
1338 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp2, X86_PAGE_OFFSET_MASK, true /*fSetFlags*/);
1339 off = iemNativeEmitJzToFixed(pReNative, off, off + 2 /* correct for ARM64 */);
1340 off = iemNativeEmitBrk(pReNative, off, 0x2005);
1341# endif
1342 iemNativeRegFreeTmp(pReNative, idxRegTmp2);
1343#else
1344# error "Port me"
1345#endif
1346
1347 RTGCPHYS const GCPhysRangePageWithOffset = ( iemTbGetRangePhysPageAddr(pTb, idxRange)
1348 | pTb->aRanges[idxRange].offPhysPage)
1349 + offRange;
1350 off = iemNativeEmitTestIfGprNotEqualImmAndTbExit(pReNative, off, idxRegTmp, GCPhysRangePageWithOffset,
1351 kIemNativeLabelType_CheckBranchMiss);
1352
1353 iemNativeRegFreeTmp(pReNative, idxRegTmp);
1354 return off;
1355}
1356
1357
1358/**
1359 * Macro that implements TLB loading and updating pbInstrBuf updating for an
1360 * instruction crossing into a new page.
1361 *
1362 * This may long jump if we're raising a \#PF, \#GP or similar trouble.
1363 */
1364#define BODY_LOAD_TLB_FOR_NEW_PAGE(a_pTb, a_offInstr, a_idxRange, a_cbInstr) \
1365 RT_NOREF(a_cbInstr); \
1366 off = iemNativeEmitBltLoadTlbForNewPage(pReNative, off, pTb, a_idxRange, a_offInstr)
1367
1368#define LIVENESS_LOAD_TLB_FOR_NEW_PAGE(a_pOutgoing, a_pCallEntry) \
1369 if (!IEM_F_MODE_X86_IS_FLAT((uint32_t)(a_pCallEntry)->auParams[0] >> 8)) \
1370 IEM_LIVENESS_RAW_SEG_BASE_INPUT(a_pOutgoing, X86_SREG_CS); \
1371 else do { } while (0)
1372
1373DECL_FORCE_INLINE(uint32_t)
1374iemNativeEmitBltLoadTlbForNewPage(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTB pTb, uint8_t idxRange, uint8_t offInstr)
1375{
1376#ifdef VBOX_STRICT
1377 off = iemNativeEmitMarker(pReNative, off, 0x80000005);
1378#endif
1379
1380 /*
1381 * Define labels and allocate the register for holding the GCPhys of the new page.
1382 */
1383 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
1384 uint32_t const idxRegGCPhys = iemNativeRegAllocTmp(pReNative, &off);
1385 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, IEM_F_MODE_X86_IS_FLAT(pReNative->fExec), &off);
1386 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
1387 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
1388 : UINT32_MAX;
1389
1390 //off = iemNativeEmitBrk(pReNative, off, 0x1111);
1391
1392 /*
1393 * Jump to the TLB lookup code.
1394 */
1395 if (!TlbState.fSkip)
1396 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
1397
1398 /*
1399 * TlbMiss:
1400 *
1401 * Call iemNativeHlpMemCodeNewPageTlbMissWithOff to do the work.
1402 */
1403 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
1404
1405 /* Save variables in volatile registers. */
1406 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave() | RT_BIT_32(idxRegGCPhys);
1407 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
1408
1409 /* IEMNATIVE_CALL_ARG1_GREG = offInstr */
1410 off = iemNativeEmitLoadGpr8Imm(pReNative, off, IEMNATIVE_CALL_ARG1_GREG, offInstr);
1411
1412 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
1413 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
1414
1415 /* Done setting up parameters, make the call. */
1416 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpMemCodeNewPageTlbMissWithOff);
1417
1418 /* Move the result to the right register. */
1419 if (idxRegGCPhys != IEMNATIVE_CALL_RET_GREG)
1420 off = iemNativeEmitLoadGprFromGpr(pReNative, off, idxRegGCPhys, IEMNATIVE_CALL_RET_GREG);
1421
1422 /* Restore variables and guest shadow registers to volatile registers. */
1423 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
1424 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off, TlbState.getActiveRegsWithShadows(true /*fCode*/));
1425
1426#ifdef IEMNATIVE_WITH_TLB_LOOKUP
1427 if (!TlbState.fSkip)
1428 {
1429 /* end of TlbMiss - Jump to the done label. */
1430 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
1431 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
1432
1433 /*
1434 * TlbLookup:
1435 */
1436 off = iemNativeEmitTlbLookup<false>(pReNative, off, &TlbState,
1437 IEM_F_MODE_X86_IS_FLAT(pReNative->fExec) ? UINT8_MAX : X86_SREG_CS,
1438 1 /*cbMem*/, 0 /*fAlignMask*/, IEM_ACCESS_TYPE_EXEC,
1439 idxLabelTlbLookup, idxLabelTlbMiss, idxRegGCPhys, offInstr);
1440
1441# ifdef IEM_WITH_TLB_STATISTICS
1442 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, TlbState.idxReg1, TlbState.idxReg2,
1443 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeCodeTlbHitsForNewPageWithOffset));
1444# endif
1445
1446 /*
1447 * TlbDone:
1448 */
1449 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
1450 TlbState.freeRegsAndReleaseVars(pReNative, UINT8_MAX /*idxVarGCPtrMem*/, true /*fIsCode*/);
1451 }
1452#else
1453 RT_NOREF(idxLabelTlbMiss);
1454#endif
1455
1456 /*
1457 * Now check the physical address of the page matches the expected one.
1458 */
1459 RTGCPHYS const GCPhysNewPage = iemTbGetRangePhysPageAddr(pTb, idxRange);
1460 off = iemNativeEmitTestIfGprNotEqualImmAndTbExit(pReNative, off, idxRegGCPhys, GCPhysNewPage,
1461 kIemNativeLabelType_ObsoleteTb);
1462
1463 iemNativeRegFreeTmp(pReNative, idxRegGCPhys);
1464 return off;
1465}
1466
1467
1468/**
1469 * Macro that implements TLB loading and updating pbInstrBuf updating when
1470 * branching or when crossing a page on an instruction boundrary.
1471 *
1472 * This differs from BODY_LOAD_TLB_FOR_NEW_PAGE in that it will first check if
1473 * it is an inter-page branch and also check the page offset.
1474 *
1475 * This may long jump if we're raising a \#PF, \#GP or similar trouble.
1476 */
1477#define BODY_LOAD_TLB_AFTER_BRANCH(a_pTb, a_idxRange, a_cbInstr) \
1478 RT_NOREF(a_cbInstr); \
1479 off = iemNativeEmitBltLoadTlbAfterBranch(pReNative, off, pTb, a_idxRange)
1480
1481#define LIVENESS_LOAD_TLB_AFTER_BRANCH(a_pOutgoing, a_pCallEntry) \
1482 if (!IEM_F_MODE_X86_IS_FLAT((uint32_t)(a_pCallEntry)->auParams[0] >> 8)) \
1483 IEM_LIVENESS_RAW_SEG_BASE_INPUT(a_pOutgoing, X86_SREG_CS); \
1484 else do { } while (0)
1485
1486DECL_FORCE_INLINE(uint32_t)
1487iemNativeEmitBltLoadTlbAfterBranch(PIEMRECOMPILERSTATE pReNative, uint32_t off, PCIEMTB pTb, uint8_t idxRange)
1488{
1489#ifdef VBOX_STRICT
1490 off = iemNativeEmitMarker(pReNative, off, 0x80000006);
1491#endif
1492
1493 BODY_FLUSH_PENDING_WRITES();
1494
1495 /*
1496 * Define labels and allocate the register for holding the GCPhys of the new page.
1497 */
1498 uint16_t const uTlbSeqNo = pReNative->uTlbSeqNo++;
1499 RTGCPHYS const GCPhysRangePageWithOffset = iemTbGetRangePhysPageAddr(pTb, idxRange)
1500 | pTb->aRanges[idxRange].offPhysPage;
1501
1502 /*
1503 *
1504 * First check if RIP is within the current code.
1505 *
1506 * This is very similar to iemNativeEmitBltInCheckPcAfterBranch, the only
1507 * difference is what we do when stuff doesn't match up.
1508 *
1509 * What we to do is.
1510 * 1. Calculate the FLAT PC (RIP + CS.BASE).
1511 * 2. Subtract iem.s.uInstrBufPc from it and getting 'off'.
1512 * 3. The 'off' must be less than X86_PAGE_SIZE/cbInstrBufTotal or
1513 * we need to retranslate RIP via the TLB.
1514 * 4. Add 'off' to iem.s.GCPhysInstrBuf and compare with the
1515 * GCPhysRangePageWithOffset constant mentioned above.
1516 *
1517 * The adding of CS.BASE to RIP can be skipped in the first step if we're
1518 * in 64-bit code or flat 32-bit.
1519 *
1520 */
1521
1522 /* Allocate registers for step 1. Get the shadowed stuff before allocating
1523 the temp register, so we don't accidentally clobber something we'll be
1524 needing again immediately. This is why we get idxRegCsBase here.
1525 Update: We share registers with the TlbState, as the TLB code path has
1526 little in common with the rest of the code. */
1527 bool const fIsFlat = IEM_F_MODE_X86_IS_FLAT(pReNative->fExec);
1528 IEMNATIVEEMITTLBSTATE const TlbState(pReNative, fIsFlat, &off);
1529 uint8_t const idxRegPc = !TlbState.fSkip ? TlbState.idxRegPtr
1530 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, kIemNativeGstReg_Pc,
1531 kIemNativeGstRegUse_ReadOnly, true /*fNoVolatileRegs*/);
1532 uint8_t const idxRegCsBase = !TlbState.fSkip || fIsFlat ? TlbState.idxRegSegBase
1533 : iemNativeRegAllocTmpForGuestReg(pReNative, &off, IEMNATIVEGSTREG_SEG_BASE(X86_SREG_CS),
1534 kIemNativeGstRegUse_ReadOnly, true /*fNoVolatileRegs*/);
1535
1536 uint8_t const idxRegTmp = !TlbState.fSkip ? TlbState.idxReg1 : iemNativeRegAllocTmp(pReNative, &off);
1537 uint8_t const idxRegTmp2 = !TlbState.fSkip ? TlbState.idxReg2 : iemNativeRegAllocTmp(pReNative, &off);
1538 uint8_t const idxRegDummy = !TlbState.fSkip ? iemNativeRegAllocTmp(pReNative, &off) : UINT8_MAX;
1539
1540#ifdef VBOX_STRICT
1541 /* Do assertions before idxRegTmp contains anything. */
1542 Assert(RT_SIZEOFMEMB(VMCPUCC, iem.s.cbInstrBufTotal) == sizeof(uint16_t));
1543# ifdef RT_ARCH_AMD64
1544 {
1545 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 8+2+1 + 11+2+1);
1546 /* Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_F_MODE_X86_IS_FLAT(pReNative->fExec)); */
1547 if (IEM_F_MODE_X86_IS_FLAT(pReNative->fExec))
1548 {
1549 /* cmp r/m64, imm8 */
1550 pbCodeBuf[off++] = X86_OP_REX_W;
1551 pbCodeBuf[off++] = 0x83;
1552 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, 7, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.cs.u64Base));
1553 pbCodeBuf[off++] = 0;
1554 /* je rel8 */
1555 pbCodeBuf[off++] = 0x74;
1556 pbCodeBuf[off++] = 1;
1557 /* int3 */
1558 pbCodeBuf[off++] = 0xcc;
1559
1560 }
1561
1562 /* Assert(!(pVCpu->iem.s.GCPhysInstrBuf & X86_PAGE_OFFSET_MASK)); - done later by the non-x86 code */
1563 /* test r/m64, imm32 */
1564 pbCodeBuf[off++] = X86_OP_REX_W;
1565 pbCodeBuf[off++] = 0xf7;
1566 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, 0, RT_UOFFSETOF(VMCPUCC, iem.s.GCPhysInstrBuf));
1567 pbCodeBuf[off++] = RT_BYTE1(X86_PAGE_OFFSET_MASK);
1568 pbCodeBuf[off++] = RT_BYTE2(X86_PAGE_OFFSET_MASK);
1569 pbCodeBuf[off++] = RT_BYTE3(X86_PAGE_OFFSET_MASK);
1570 pbCodeBuf[off++] = RT_BYTE4(X86_PAGE_OFFSET_MASK);
1571 /* jz rel8 */
1572 pbCodeBuf[off++] = 0x74;
1573 pbCodeBuf[off++] = 1;
1574 /* int3 */
1575 pbCodeBuf[off++] = 0xcc;
1576 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1577 }
1578# else
1579
1580 /* Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_F_MODE_X86_IS_FLAT(pReNative->fExec)); */
1581 if (IEM_F_MODE_X86_IS_FLAT(pReNative->fExec))
1582 {
1583 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, cpum.GstCtx.cs.u64Base));
1584# ifdef RT_ARCH_ARM64
1585 uint32_t * const pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1586 pu32CodeBuf[off++] = Armv8A64MkInstrCbzCbnz(false /*fJmpIfNotZero*/, 2, idxRegTmp);
1587 pu32CodeBuf[off++] = Armv8A64MkInstrBrk(0x2006);
1588 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1589# else
1590# error "Port me!"
1591# endif
1592 }
1593# endif
1594
1595#endif /* VBOX_STRICT */
1596
1597 /* Because we're lazy, we'll jump back here to recalc 'off' and share the
1598 GCPhysRangePageWithOffset check. This is a little risky, so we use the
1599 2nd register to check if we've looped more than once already.*/
1600 off = iemNativeEmitGprZero(pReNative, off, idxRegTmp2);
1601
1602 uint32_t const offLabelRedoChecks = off;
1603
1604 /* 1+2. Calculate 'off' first (into idxRegTmp). */
1605 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.uInstrBufPc));
1606 if (IEM_F_MODE_X86_IS_FLAT(pReNative->fExec))
1607 {
1608#ifdef RT_ARCH_ARM64
1609 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1610 pu32CodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegTmp, idxRegPc, idxRegTmp);
1611 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1612#else
1613 off = iemNativeEmitNegGpr(pReNative, off, idxRegTmp);
1614 off = iemNativeEmitAddTwoGprs(pReNative, off, idxRegTmp, idxRegPc);
1615#endif
1616 }
1617 else
1618 {
1619#ifdef RT_ARCH_ARM64
1620 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 2);
1621 pu32CodeBuf[off++] = Armv8A64MkInstrSubReg(idxRegTmp, idxRegCsBase, idxRegTmp);
1622 pu32CodeBuf[off++] = Armv8A64MkInstrAddReg(idxRegTmp, idxRegTmp, idxRegPc);
1623 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1624#else
1625 off = iemNativeEmitNegGpr(pReNative, off, idxRegTmp);
1626 off = iemNativeEmitAddTwoGprs(pReNative, off, idxRegTmp, idxRegCsBase);
1627 off = iemNativeEmitAddTwoGprs(pReNative, off, idxRegTmp, idxRegPc);
1628#endif
1629 }
1630
1631 /* 3. Check that off is less than X86_PAGE_SIZE/cbInstrBufTotal.
1632 Unlike iemNativeEmitBltInCheckPcAfterBranch we'll jump to the TLB loading if this fails. */
1633 off = iemNativeEmitCmpGprWithImm(pReNative, off, idxRegTmp, X86_PAGE_SIZE - 1);
1634 uint32_t const offFixedJumpToTlbLoad = off;
1635 off = iemNativeEmitJaToFixed(pReNative, off, off /* (ASSUME ja rel8 suffices) */);
1636
1637 /* 4a. Add iem.s.GCPhysInstrBuf to off ... */
1638#ifdef RT_ARCH_AMD64
1639 uint8_t * const pbCodeBuf = iemNativeInstrBufEnsure(pReNative, off, 7);
1640 pbCodeBuf[off++] = idxRegTmp < 8 ? X86_OP_REX_W : X86_OP_REX_W | X86_OP_REX_R;
1641 pbCodeBuf[off++] = 0x03; /* add r64, r/m64 */
1642 off = iemNativeEmitGprByVCpuDisp(pbCodeBuf, off, idxRegTmp, RT_UOFFSETOF(VMCPUCC, iem.s.GCPhysInstrBuf));
1643 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1644
1645#elif defined(RT_ARCH_ARM64)
1646
1647 off = iemNativeEmitLoadGprFromVCpuU64(pReNative, off, idxRegTmp2, RT_UOFFSETOF(VMCPUCC, iem.s.GCPhysInstrBuf));
1648 uint32_t *pu32CodeBuf = iemNativeInstrBufEnsure(pReNative, off, 1);
1649 pu32CodeBuf[off++] = Armv8A64MkInstrAddReg(idxRegTmp, idxRegTmp, idxRegTmp2);
1650 IEMNATIVE_ASSERT_INSTR_BUF_ENSURE(pReNative, off);
1651
1652# ifdef VBOX_STRICT /* Assert(!(pVCpu->iem.s.GCPhysInstrBuf & X86_PAGE_OFFSET_MASK)); */
1653 off = iemNativeEmitAndGpr32ByImm(pReNative, off, idxRegTmp2, X86_PAGE_OFFSET_MASK, true /*fSetFlags*/);
1654 off = iemNativeEmitJzToFixed(pReNative, off, off + 2 /* correct for ARM64 */);
1655 off = iemNativeEmitBrk(pReNative, off, 0x2005);
1656# endif
1657#else
1658# error "Port me"
1659#endif
1660
1661 /* 4b. ... and compare with GCPhysRangePageWithOffset.
1662
1663 Unlike iemNativeEmitBltInCheckPcAfterBranch we'll have to be more
1664 careful and avoid implicit temporary register usage here.
1665
1666 Unlike the threaded version of this code, we do not obsolete TBs here to
1667 reduce the code size and because indirect calls may legally end at the
1668 same offset in two different pages depending on the program state. */
1669 /** @todo synch the threaded BODY_LOAD_TLB_AFTER_BRANCH version with this. */
1670 off = iemNativeEmitLoadGprImm64(pReNative, off, idxRegTmp2, GCPhysRangePageWithOffset);
1671 off = iemNativeEmitCmpGprWithGpr(pReNative, off, idxRegTmp, idxRegTmp2);
1672 off = iemNativeEmitJnzTbExit(pReNative, off, kIemNativeLabelType_CheckBranchMiss);
1673 uint32_t const offFixedJumpToEnd = off;
1674 off = iemNativeEmitJmpToFixed(pReNative, off, off + 512 /* force rel32 */);
1675
1676 /*
1677 * TlbLoad:
1678 *
1679 * First we try to go via the TLB.
1680 */
1681 iemNativeFixupFixedJump(pReNative, offFixedJumpToTlbLoad, off);
1682
1683 /* Check that we haven't been here before. */
1684 off = iemNativeEmitTestIfGprIsNotZeroAndTbExit(pReNative, off, idxRegTmp2, false /*f64Bit*/,
1685 kIemNativeLabelType_CheckBranchMiss);
1686
1687 /* Jump to the TLB lookup code. */
1688 uint32_t const idxLabelTlbLookup = !TlbState.fSkip
1689 ? iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbLookup, UINT32_MAX, uTlbSeqNo)
1690 : UINT32_MAX;
1691//off = iemNativeEmitBrk(pReNative, off, 0x1234);
1692 if (!TlbState.fSkip)
1693 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbLookup); /** @todo short jump */
1694
1695 /*
1696 * TlbMiss:
1697 *
1698 * Call iemNativeHlpMemCodeNewPageTlbMiss to do the work.
1699 */
1700 uint32_t const idxLabelTlbMiss = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbMiss, off, uTlbSeqNo);
1701 RT_NOREF(idxLabelTlbMiss);
1702
1703 /* Save variables in volatile registers. */
1704 uint32_t const fHstRegsNotToSave = TlbState.getRegsNotToSave() | RT_BIT_32(idxRegTmp) | RT_BIT_32(idxRegTmp2)
1705 | (idxRegDummy != UINT8_MAX ? RT_BIT_32(idxRegDummy) : 0);
1706 off = iemNativeVarSaveVolatileRegsPreHlpCall(pReNative, off, fHstRegsNotToSave);
1707
1708 /* IEMNATIVE_CALL_ARG0_GREG = pVCpu */
1709 off = iemNativeEmitLoadGprFromGpr(pReNative, off, IEMNATIVE_CALL_ARG0_GREG, IEMNATIVE_REG_FIXED_PVMCPU);
1710
1711 /* Done setting up parameters, make the call. */
1712 off = iemNativeEmitCallImm(pReNative, off, (uintptr_t)iemNativeHlpMemCodeNewPageTlbMiss);
1713
1714 /* Restore variables and guest shadow registers to volatile registers. */
1715 off = iemNativeVarRestoreVolatileRegsPostHlpCall(pReNative, off, fHstRegsNotToSave);
1716 off = iemNativeRegRestoreGuestShadowsInVolatileRegs(pReNative, off,
1717 TlbState.getActiveRegsWithShadows()
1718 | RT_BIT_32(idxRegPc)
1719 | (idxRegCsBase != UINT8_MAX ? RT_BIT_32(idxRegCsBase) : 0));
1720
1721#ifdef IEMNATIVE_WITH_TLB_LOOKUP
1722 if (!TlbState.fSkip)
1723 {
1724 /* end of TlbMiss - Jump to the done label. */
1725 uint32_t const idxLabelTlbDone = iemNativeLabelCreate(pReNative, kIemNativeLabelType_TlbDone, UINT32_MAX, uTlbSeqNo);
1726 off = iemNativeEmitJmpToLabel(pReNative, off, idxLabelTlbDone);
1727
1728 /*
1729 * TlbLookup:
1730 */
1731 off = iemNativeEmitTlbLookup<false, true>(pReNative, off, &TlbState, fIsFlat ? UINT8_MAX : X86_SREG_CS,
1732 1 /*cbMem*/, 0 /*fAlignMask*/, IEM_ACCESS_TYPE_EXEC,
1733 idxLabelTlbLookup, idxLabelTlbMiss, idxRegDummy);
1734
1735# ifdef IEM_WITH_TLB_STATISTICS
1736 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, TlbState.idxReg1, TlbState.idxReg2,
1737 RT_UOFFSETOF(VMCPUCC, iem.s.StatNativeCodeTlbHitsForNewPage));
1738# endif
1739
1740 /*
1741 * TlbDone:
1742 */
1743 iemNativeLabelDefine(pReNative, idxLabelTlbDone, off);
1744 TlbState.freeRegsAndReleaseVars(pReNative, UINT8_MAX /*idxVarGCPtrMem*/, true /*fIsCode*/);
1745 }
1746#else
1747 RT_NOREF(idxLabelTlbMiss);
1748#endif
1749
1750 /* Jmp back to the start and redo the checks. */
1751 off = iemNativeEmitLoadGpr8Imm(pReNative, off, idxRegTmp2, 1); /* indicate that we've looped once already */
1752 off = iemNativeEmitJmpToFixed(pReNative, off, offLabelRedoChecks);
1753
1754 /*
1755 * End:
1756 *
1757 * The end.
1758 */
1759 iemNativeFixupFixedJump(pReNative, offFixedJumpToEnd, off);
1760
1761 if (!TlbState.fSkip)
1762 iemNativeRegFreeTmp(pReNative, idxRegDummy);
1763 else
1764 {
1765 iemNativeRegFreeTmp(pReNative, idxRegTmp2);
1766 iemNativeRegFreeTmp(pReNative, idxRegTmp);
1767 iemNativeRegFreeTmp(pReNative, idxRegPc);
1768 if (idxRegCsBase != UINT8_MAX)
1769 iemNativeRegFreeTmp(pReNative, idxRegCsBase);
1770 }
1771 return off;
1772}
1773
1774
1775#ifdef BODY_CHECK_CS_LIM
1776/**
1777 * Built-in function that checks the EIP/IP + uParam0 is within CS.LIM,
1778 * raising a \#GP(0) if this isn't the case.
1779 */
1780IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLim)
1781{
1782 uint32_t const cbInstr = (uint8_t)pCallEntry->auParams[0];
1783 BODY_SET_CUR_INSTR();
1784 BODY_FLUSH_PENDING_WRITES();
1785 BODY_CHECK_CS_LIM(cbInstr);
1786 return off;
1787}
1788
1789IEM_DECL_IEMNATIVELIVENESSFUNC_DEF(iemNativeLivenessFunc_BltIn_CheckCsLim)
1790{
1791 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(pOutgoing, pIncoming);
1792 LIVENESS_CHECK_CS_LIM(pOutgoing);
1793 RT_NOREF(pCallEntry);
1794}
1795#endif
1796
1797
1798#if defined(BODY_CHECK_OPCODES) && defined(BODY_CHECK_CS_LIM)
1799/**
1800 * Built-in function for re-checking opcodes and CS.LIM after an instruction
1801 * that may have modified them.
1802 */
1803IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLimAndOpcodes)
1804{
1805 PCIEMTB const pTb = pReNative->pTbOrg;
1806 uint32_t const cbInstr = (uint8_t)pCallEntry->auParams[0];
1807 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
1808 uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
1809 BODY_SET_CUR_INSTR();
1810 BODY_FLUSH_PENDING_WRITES();
1811 BODY_CHECK_CS_LIM(cbInstr);
1812 BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
1813 return off;
1814}
1815
1816IEM_DECL_IEMNATIVELIVENESSFUNC_DEF(iemNativeLivenessFunc_BltIn_CheckCsLimAndOpcodes)
1817{
1818 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(pOutgoing, pIncoming);
1819 LIVENESS_CHECK_CS_LIM(pOutgoing);
1820 LIVENESS_CHECK_OPCODES(pOutgoing);
1821 RT_NOREF(pCallEntry);
1822}
1823#endif
1824
1825
1826#if defined(BODY_CHECK_OPCODES)
1827/**
1828 * Built-in function for re-checking opcodes after an instruction that may have
1829 * modified them.
1830 */
1831IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodes)
1832{
1833 PCIEMTB const pTb = pReNative->pTbOrg;
1834 uint32_t const cbInstr = (uint8_t)pCallEntry->auParams[0];
1835 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
1836 uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
1837 BODY_SET_CUR_INSTR();
1838 BODY_FLUSH_PENDING_WRITES();
1839 BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
1840 return off;
1841}
1842
1843IEM_DECL_IEMNATIVELIVENESSFUNC_DEF(iemNativeLivenessFunc_BltIn_CheckOpcodes)
1844{
1845 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(pOutgoing, pIncoming);
1846 LIVENESS_CHECK_OPCODES(pOutgoing);
1847 RT_NOREF(pCallEntry);
1848}
1849#endif
1850
1851
1852#if defined(BODY_CHECK_OPCODES) && defined(BODY_CONSIDER_CS_LIM_CHECKING)
1853/**
1854 * Built-in function for re-checking opcodes and considering the need for CS.LIM
1855 * checking after an instruction that may have modified them.
1856 */
1857IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesConsiderCsLim)
1858{
1859 PCIEMTB const pTb = pReNative->pTbOrg;
1860 uint32_t const cbInstr = (uint8_t)pCallEntry->auParams[0];
1861 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
1862 uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
1863 BODY_SET_CUR_INSTR();
1864 BODY_FLUSH_PENDING_WRITES();
1865 BODY_CONSIDER_CS_LIM_CHECKING(pTb, cbInstr);
1866 BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
1867 return off;
1868}
1869
1870IEM_DECL_IEMNATIVELIVENESSFUNC_DEF(iemNativeLivenessFunc_BltIn_CheckOpcodesConsiderCsLim)
1871{
1872 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(pOutgoing, pIncoming);
1873 LIVENESS_CONSIDER_CS_LIM_CHECKING(pOutgoing);
1874 LIVENESS_CHECK_OPCODES(pOutgoing);
1875 RT_NOREF(pCallEntry);
1876}
1877#endif
1878
1879
1880/*
1881 * Post-branching checkers.
1882 */
1883
1884#if defined(BODY_CHECK_OPCODES) && defined(BODY_CHECK_PC_AFTER_BRANCH) && defined(BODY_CHECK_CS_LIM)
1885/**
1886 * Built-in function for checking CS.LIM, checking the PC and checking opcodes
1887 * after conditional branching within the same page.
1888 *
1889 * @see iemThreadedFunc_BltIn_CheckPcAndOpcodes
1890 */
1891IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLimAndPcAndOpcodes)
1892{
1893 PCIEMTB const pTb = pReNative->pTbOrg;
1894 uint32_t const cbInstr = (uint8_t)pCallEntry->auParams[0];
1895 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
1896 uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
1897 //LogFunc(("idxRange=%u @ %#x LB %#x: offPhysPage=%#x LB %#x\n", idxRange, offRange, cbInstr, pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].cbOpcodes));
1898 BODY_SET_CUR_INSTR();
1899 BODY_FLUSH_PENDING_WRITES();
1900 BODY_CHECK_CS_LIM(cbInstr);
1901 BODY_CHECK_PC_AFTER_BRANCH(pTb, idxRange, offRange, cbInstr);
1902 BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
1903 //LogFunc(("okay\n"));
1904 return off;
1905}
1906
1907IEM_DECL_IEMNATIVELIVENESSFUNC_DEF(iemNativeLivenessFunc_BltIn_CheckCsLimAndPcAndOpcodes)
1908{
1909 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(pOutgoing, pIncoming);
1910 LIVENESS_CHECK_CS_LIM(pOutgoing);
1911 LIVENESS_CHECK_PC_AFTER_BRANCH(pOutgoing, pCallEntry);
1912 LIVENESS_CHECK_OPCODES(pOutgoing);
1913 RT_NOREF(pCallEntry);
1914}
1915#endif
1916
1917
1918#if defined(BODY_CHECK_OPCODES) && defined(BODY_CHECK_PC_AFTER_BRANCH)
1919/**
1920 * Built-in function for checking the PC and checking opcodes after conditional
1921 * branching within the same page.
1922 *
1923 * @see iemThreadedFunc_BltIn_CheckCsLimAndPcAndOpcodes
1924 */
1925IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckPcAndOpcodes)
1926{
1927 PCIEMTB const pTb = pReNative->pTbOrg;
1928 uint32_t const cbInstr = (uint8_t)pCallEntry->auParams[0];
1929 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
1930 uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
1931 //LogFunc(("idxRange=%u @ %#x LB %#x: offPhysPage=%#x LB %#x\n", idxRange, offRange, cbInstr, pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].cbOpcodes));
1932 BODY_SET_CUR_INSTR();
1933 BODY_FLUSH_PENDING_WRITES();
1934 BODY_CHECK_PC_AFTER_BRANCH(pTb, idxRange, offRange, cbInstr);
1935 BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
1936 //LogFunc(("okay\n"));
1937 return off;
1938}
1939
1940IEM_DECL_IEMNATIVELIVENESSFUNC_DEF(iemNativeLivenessFunc_BltIn_CheckPcAndOpcodes)
1941{
1942 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(pOutgoing, pIncoming);
1943 LIVENESS_CHECK_PC_AFTER_BRANCH(pOutgoing, pCallEntry);
1944 LIVENESS_CHECK_OPCODES(pOutgoing);
1945 RT_NOREF(pCallEntry);
1946}
1947#endif
1948
1949
1950#if defined(BODY_CHECK_OPCODES) && defined(BODY_CHECK_PC_AFTER_BRANCH) && defined(BODY_CONSIDER_CS_LIM_CHECKING)
1951/**
1952 * Built-in function for checking the PC and checking opcodes and considering
1953 * the need for CS.LIM checking after conditional branching within the same
1954 * page.
1955 *
1956 * @see iemThreadedFunc_BltIn_CheckCsLimAndPcAndOpcodes
1957 */
1958IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckPcAndOpcodesConsiderCsLim)
1959{
1960 PCIEMTB const pTb = pReNative->pTbOrg;
1961 uint32_t const cbInstr = (uint8_t)pCallEntry->auParams[0];
1962 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
1963 uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
1964 //LogFunc(("idxRange=%u @ %#x LB %#x: offPhysPage=%#x LB %#x\n", idxRange, offRange, cbInstr, pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].cbOpcodes));
1965 BODY_SET_CUR_INSTR();
1966 BODY_FLUSH_PENDING_WRITES();
1967 BODY_CONSIDER_CS_LIM_CHECKING(pTb, cbInstr);
1968 BODY_CHECK_PC_AFTER_BRANCH(pTb, idxRange, offRange, cbInstr);
1969 BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
1970 //LogFunc(("okay\n"));
1971 return off;
1972}
1973
1974IEM_DECL_IEMNATIVELIVENESSFUNC_DEF(iemNativeLivenessFunc_BltIn_CheckPcAndOpcodesConsiderCsLim)
1975{
1976 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(pOutgoing, pIncoming);
1977 LIVENESS_CONSIDER_CS_LIM_CHECKING(pOutgoing);
1978 LIVENESS_CHECK_PC_AFTER_BRANCH(pOutgoing, pCallEntry);
1979 LIVENESS_CHECK_OPCODES(pOutgoing);
1980 RT_NOREF(pCallEntry);
1981}
1982#endif
1983
1984
1985#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_AFTER_BRANCH) && defined(BODY_CHECK_CS_LIM)
1986/**
1987 * Built-in function for checking CS.LIM, loading TLB and checking opcodes when
1988 * transitioning to a different code page.
1989 *
1990 * The code page transition can either be natural over onto the next page (with
1991 * the instruction starting at page offset zero) or by means of branching.
1992 *
1993 * @see iemThreadedFunc_BltIn_CheckOpcodesLoadingTlb
1994 */
1995IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLimAndOpcodesLoadingTlb)
1996{
1997 PCIEMTB const pTb = pReNative->pTbOrg;
1998 uint32_t const cbInstr = (uint8_t)pCallEntry->auParams[0];
1999 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
2000 uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
2001 //LogFunc(("idxRange=%u @ %#x LB %#x: offPhysPage=%#x LB %#x\n", idxRange, offRange, cbInstr, pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].cbOpcodes));
2002 BODY_SET_CUR_INSTR();
2003 BODY_FLUSH_PENDING_WRITES();
2004 BODY_CHECK_CS_LIM(cbInstr);
2005 Assert(offRange == 0);
2006 BODY_LOAD_TLB_AFTER_BRANCH(pTb, idxRange, cbInstr);
2007 BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
2008 //LogFunc(("okay\n"));
2009 return off;
2010}
2011
2012IEM_DECL_IEMNATIVELIVENESSFUNC_DEF(iemNativeLivenessFunc_BltIn_CheckCsLimAndOpcodesLoadingTlb)
2013{
2014 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(pOutgoing, pIncoming);
2015 LIVENESS_CHECK_CS_LIM(pOutgoing);
2016 LIVENESS_LOAD_TLB_AFTER_BRANCH(pOutgoing, pCallEntry);
2017 LIVENESS_CHECK_OPCODES(pOutgoing);
2018 RT_NOREF(pCallEntry);
2019}
2020#endif
2021
2022
2023#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_AFTER_BRANCH)
2024/**
2025 * Built-in function for loading TLB and checking opcodes when transitioning to
2026 * a different code page.
2027 *
2028 * The code page transition can either be natural over onto the next page (with
2029 * the instruction starting at page offset zero) or by means of branching.
2030 *
2031 * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesLoadingTlb
2032 */
2033IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesLoadingTlb)
2034{
2035 PCIEMTB const pTb = pReNative->pTbOrg;
2036 uint32_t const cbInstr = (uint8_t)pCallEntry->auParams[0];
2037 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
2038 uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
2039 //LogFunc(("idxRange=%u @ %#x LB %#x: offPhysPage=%#x LB %#x\n", idxRange, offRange, cbInstr, pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].cbOpcodes));
2040 BODY_SET_CUR_INSTR();
2041 BODY_FLUSH_PENDING_WRITES();
2042 Assert(offRange == 0);
2043 BODY_LOAD_TLB_AFTER_BRANCH(pTb, idxRange, cbInstr);
2044 BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
2045 //LogFunc(("okay\n"));
2046 return off;
2047}
2048
2049IEM_DECL_IEMNATIVELIVENESSFUNC_DEF(iemNativeLivenessFunc_BltIn_CheckOpcodesLoadingTlb)
2050{
2051 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(pOutgoing, pIncoming);
2052 LIVENESS_LOAD_TLB_AFTER_BRANCH(pOutgoing, pCallEntry);
2053 LIVENESS_CHECK_OPCODES(pOutgoing);
2054 RT_NOREF(pCallEntry);
2055}
2056#endif
2057
2058
2059#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_AFTER_BRANCH) && defined(BODY_CONSIDER_CS_LIM_CHECKING)
2060/**
2061 * Built-in function for loading TLB and checking opcodes and considering the
2062 * need for CS.LIM checking when transitioning to a different code page.
2063 *
2064 * The code page transition can either be natural over onto the next page (with
2065 * the instruction starting at page offset zero) or by means of branching.
2066 *
2067 * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesLoadingTlb
2068 */
2069IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesLoadingTlbConsiderCsLim)
2070{
2071 PCIEMTB const pTb = pReNative->pTbOrg;
2072 uint32_t const cbInstr = (uint8_t)pCallEntry->auParams[0];
2073 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
2074 uint32_t const offRange = (uint32_t)pCallEntry->auParams[2];
2075 //LogFunc(("idxRange=%u @ %#x LB %#x: offPhysPage=%#x LB %#x\n", idxRange, offRange, cbInstr, pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].cbOpcodes));
2076 BODY_SET_CUR_INSTR();
2077 BODY_FLUSH_PENDING_WRITES();
2078 BODY_CONSIDER_CS_LIM_CHECKING(pTb, cbInstr);
2079 Assert(offRange == 0);
2080 BODY_LOAD_TLB_AFTER_BRANCH(pTb, idxRange, cbInstr);
2081 BODY_CHECK_OPCODES(pTb, idxRange, offRange, cbInstr);
2082 //LogFunc(("okay\n"));
2083 return off;
2084}
2085
2086IEM_DECL_IEMNATIVELIVENESSFUNC_DEF(iemNativeLivenessFunc_BltIn_CheckOpcodesLoadingTlbConsiderCsLim)
2087{
2088 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(pOutgoing, pIncoming);
2089 LIVENESS_CONSIDER_CS_LIM_CHECKING(pOutgoing);
2090 LIVENESS_LOAD_TLB_AFTER_BRANCH(pOutgoing, pCallEntry);
2091 LIVENESS_CHECK_OPCODES(pOutgoing);
2092 RT_NOREF(pCallEntry);
2093}
2094#endif
2095
2096
2097
2098/*
2099 * Natural page crossing checkers.
2100 */
2101
2102#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE) && defined(BODY_CHECK_CS_LIM)
2103/**
2104 * Built-in function for checking CS.LIM, loading TLB and checking opcodes on
2105 * both pages when transitioning to a different code page.
2106 *
2107 * This is used when the previous instruction requires revalidation of opcodes
2108 * bytes and the current instruction stries a page boundrary with opcode bytes
2109 * in both the old and new page.
2110 *
2111 * @see iemThreadedFunc_BltIn_CheckOpcodesAcrossPageLoadingTlb
2112 */
2113IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLimAndOpcodesAcrossPageLoadingTlb)
2114{
2115 PCIEMTB const pTb = pReNative->pTbOrg;
2116 uint32_t const cbInstr = (uint8_t)pCallEntry->auParams[0];
2117 uint32_t const cbStartPage = (uint32_t)(pCallEntry->auParams[0] >> 32);
2118 uint32_t const idxRange1 = (uint32_t)pCallEntry->auParams[1];
2119 uint32_t const offRange1 = (uint32_t)pCallEntry->auParams[2];
2120 uint32_t const idxRange2 = idxRange1 + 1;
2121 BODY_SET_CUR_INSTR();
2122 BODY_FLUSH_PENDING_WRITES();
2123 BODY_CHECK_CS_LIM(cbInstr);
2124 BODY_CHECK_OPCODES(pTb, idxRange1, offRange1, cbInstr);
2125 BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, cbStartPage, idxRange2, cbInstr);
2126 BODY_CHECK_OPCODES(pTb, idxRange2, 0, cbInstr);
2127 return off;
2128}
2129
2130IEM_DECL_IEMNATIVELIVENESSFUNC_DEF(iemNativeLivenessFunc_BltIn_CheckCsLimAndOpcodesAcrossPageLoadingTlb)
2131{
2132 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(pOutgoing, pIncoming);
2133 LIVENESS_CHECK_CS_LIM(pOutgoing);
2134 LIVENESS_CHECK_OPCODES(pOutgoing);
2135 LIVENESS_LOAD_TLB_FOR_NEW_PAGE(pOutgoing, pCallEntry);
2136 RT_NOREF(pCallEntry);
2137}
2138#endif
2139
2140
2141#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE)
2142/**
2143 * Built-in function for loading TLB and checking opcodes on both pages when
2144 * transitioning to a different code page.
2145 *
2146 * This is used when the previous instruction requires revalidation of opcodes
2147 * bytes and the current instruction stries a page boundrary with opcode bytes
2148 * in both the old and new page.
2149 *
2150 * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesAcrossPageLoadingTlb
2151 */
2152IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesAcrossPageLoadingTlb)
2153{
2154 PCIEMTB const pTb = pReNative->pTbOrg;
2155 uint32_t const cbInstr = (uint8_t)pCallEntry->auParams[0];
2156 uint32_t const cbStartPage = (uint32_t)(pCallEntry->auParams[0] >> 32);
2157 uint32_t const idxRange1 = (uint32_t)pCallEntry->auParams[1];
2158 uint32_t const offRange1 = (uint32_t)pCallEntry->auParams[2];
2159 uint32_t const idxRange2 = idxRange1 + 1;
2160 BODY_SET_CUR_INSTR();
2161 BODY_FLUSH_PENDING_WRITES();
2162 BODY_CHECK_OPCODES(pTb, idxRange1, offRange1, cbInstr);
2163 BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, cbStartPage, idxRange2, cbInstr);
2164 BODY_CHECK_OPCODES(pTb, idxRange2, 0, cbInstr);
2165 return off;
2166}
2167
2168IEM_DECL_IEMNATIVELIVENESSFUNC_DEF(iemNativeLivenessFunc_BltIn_CheckOpcodesAcrossPageLoadingTlb)
2169{
2170 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(pOutgoing, pIncoming);
2171 LIVENESS_CHECK_OPCODES(pOutgoing);
2172 LIVENESS_LOAD_TLB_FOR_NEW_PAGE(pOutgoing, pCallEntry);
2173 RT_NOREF(pCallEntry);
2174}
2175#endif
2176
2177
2178#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE) && defined(BODY_CONSIDER_CS_LIM_CHECKING)
2179/**
2180 * Built-in function for loading TLB and checking opcodes on both pages and
2181 * considering the need for CS.LIM checking when transitioning to a different
2182 * code page.
2183 *
2184 * This is used when the previous instruction requires revalidation of opcodes
2185 * bytes and the current instruction stries a page boundrary with opcode bytes
2186 * in both the old and new page.
2187 *
2188 * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesAcrossPageLoadingTlb
2189 */
2190IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesAcrossPageLoadingTlbConsiderCsLim)
2191{
2192 PCIEMTB const pTb = pReNative->pTbOrg;
2193 uint32_t const cbInstr = (uint8_t)pCallEntry->auParams[0];
2194 uint32_t const cbStartPage = (uint32_t)(pCallEntry->auParams[0] >> 32);
2195 uint32_t const idxRange1 = (uint32_t)pCallEntry->auParams[1];
2196 uint32_t const offRange1 = (uint32_t)pCallEntry->auParams[2];
2197 uint32_t const idxRange2 = idxRange1 + 1;
2198 BODY_SET_CUR_INSTR();
2199 BODY_FLUSH_PENDING_WRITES();
2200 BODY_CONSIDER_CS_LIM_CHECKING(pTb, cbInstr);
2201 BODY_CHECK_OPCODES(pTb, idxRange1, offRange1, cbInstr);
2202 BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, cbStartPage, idxRange2, cbInstr);
2203 BODY_CHECK_OPCODES(pTb, idxRange2, 0, cbInstr);
2204 return off;
2205}
2206
2207IEM_DECL_IEMNATIVELIVENESSFUNC_DEF(iemNativeLivenessFunc_BltIn_CheckOpcodesAcrossPageLoadingTlbConsiderCsLim)
2208{
2209 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(pOutgoing, pIncoming);
2210 LIVENESS_CONSIDER_CS_LIM_CHECKING(pOutgoing);
2211 LIVENESS_CHECK_OPCODES(pOutgoing);
2212 LIVENESS_LOAD_TLB_FOR_NEW_PAGE(pOutgoing, pCallEntry);
2213 RT_NOREF(pCallEntry);
2214}
2215#endif
2216
2217
2218#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE) && defined(BODY_CHECK_CS_LIM)
2219/**
2220 * Built-in function for checking CS.LIM, loading TLB and checking opcodes when
2221 * advancing naturally to a different code page.
2222 *
2223 * Only opcodes on the new page is checked.
2224 *
2225 * @see iemThreadedFunc_BltIn_CheckOpcodesOnNextPageLoadingTlb
2226 */
2227IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLimAndOpcodesOnNextPageLoadingTlb)
2228{
2229 PCIEMTB const pTb = pReNative->pTbOrg;
2230 uint32_t const cbInstr = (uint8_t)pCallEntry->auParams[0];
2231 uint32_t const cbStartPage = (uint32_t)(pCallEntry->auParams[0] >> 32);
2232 uint32_t const idxRange1 = (uint32_t)pCallEntry->auParams[1];
2233 //uint32_t const offRange1 = (uint32_t)uParam2;
2234 uint32_t const idxRange2 = idxRange1 + 1;
2235 BODY_SET_CUR_INSTR();
2236 BODY_FLUSH_PENDING_WRITES();
2237 BODY_CHECK_CS_LIM(cbInstr);
2238 BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, cbStartPage, idxRange2, cbInstr);
2239 BODY_CHECK_OPCODES(pTb, idxRange2, 0, cbInstr);
2240 return off;
2241}
2242
2243IEM_DECL_IEMNATIVELIVENESSFUNC_DEF(iemNativeLivenessFunc_BltIn_CheckCsLimAndOpcodesOnNextPageLoadingTlb)
2244{
2245 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(pOutgoing, pIncoming);
2246 LIVENESS_CHECK_CS_LIM(pOutgoing);
2247 LIVENESS_LOAD_TLB_FOR_NEW_PAGE(pOutgoing, pCallEntry);
2248 LIVENESS_CHECK_OPCODES(pOutgoing);
2249 RT_NOREF(pCallEntry);
2250}
2251#endif
2252
2253
2254#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE)
2255/**
2256 * Built-in function for loading TLB and checking opcodes when advancing
2257 * naturally to a different code page.
2258 *
2259 * Only opcodes on the new page is checked.
2260 *
2261 * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNextPageLoadingTlb
2262 */
2263IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesOnNextPageLoadingTlb)
2264{
2265 PCIEMTB const pTb = pReNative->pTbOrg;
2266 uint32_t const cbInstr = (uint8_t)pCallEntry->auParams[0];
2267 uint32_t const cbStartPage = (uint32_t)(pCallEntry->auParams[0] >> 32);
2268 uint32_t const idxRange1 = (uint32_t)pCallEntry->auParams[1];
2269 //uint32_t const offRange1 = (uint32_t)pCallEntry->auParams[2];
2270 uint32_t const idxRange2 = idxRange1 + 1;
2271 BODY_SET_CUR_INSTR();
2272 BODY_FLUSH_PENDING_WRITES();
2273 BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, cbStartPage, idxRange2, cbInstr);
2274 BODY_CHECK_OPCODES(pTb, idxRange2, 0, cbInstr);
2275 return off;
2276}
2277
2278IEM_DECL_IEMNATIVELIVENESSFUNC_DEF(iemNativeLivenessFunc_BltIn_CheckOpcodesOnNextPageLoadingTlb)
2279{
2280 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(pOutgoing, pIncoming);
2281 LIVENESS_LOAD_TLB_FOR_NEW_PAGE(pOutgoing, pCallEntry);
2282 LIVENESS_CHECK_OPCODES(pOutgoing);
2283 RT_NOREF(pCallEntry);
2284}
2285#endif
2286
2287
2288#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE) && defined(BODY_CONSIDER_CS_LIM_CHECKING)
2289/**
2290 * Built-in function for loading TLB and checking opcodes and considering the
2291 * need for CS.LIM checking when advancing naturally to a different code page.
2292 *
2293 * Only opcodes on the new page is checked.
2294 *
2295 * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNextPageLoadingTlb
2296 */
2297IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesOnNextPageLoadingTlbConsiderCsLim)
2298{
2299 PCIEMTB const pTb = pReNative->pTbOrg;
2300 uint32_t const cbInstr = (uint8_t)pCallEntry->auParams[0];
2301 uint32_t const cbStartPage = (uint32_t)(pCallEntry->auParams[0] >> 32);
2302 uint32_t const idxRange1 = (uint32_t)pCallEntry->auParams[1];
2303 //uint32_t const offRange1 = (uint32_t)pCallEntry->auParams[2];
2304 uint32_t const idxRange2 = idxRange1 + 1;
2305 BODY_SET_CUR_INSTR();
2306 BODY_FLUSH_PENDING_WRITES();
2307 BODY_CONSIDER_CS_LIM_CHECKING(pTb, cbInstr);
2308 BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, cbStartPage, idxRange2, cbInstr);
2309 BODY_CHECK_OPCODES(pTb, idxRange2, 0, cbInstr);
2310 return off;
2311}
2312
2313IEM_DECL_IEMNATIVELIVENESSFUNC_DEF(iemNativeLivenessFunc_BltIn_CheckOpcodesOnNextPageLoadingTlbConsiderCsLim)
2314{
2315 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(pOutgoing, pIncoming);
2316 LIVENESS_CONSIDER_CS_LIM_CHECKING(pOutgoing);
2317 LIVENESS_LOAD_TLB_FOR_NEW_PAGE(pOutgoing, pCallEntry);
2318 LIVENESS_CHECK_OPCODES(pOutgoing);
2319 RT_NOREF(pCallEntry);
2320}
2321#endif
2322
2323
2324#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE) && defined(BODY_CHECK_CS_LIM)
2325/**
2326 * Built-in function for checking CS.LIM, loading TLB and checking opcodes when
2327 * advancing naturally to a different code page with first instr at byte 0.
2328 *
2329 * @see iemThreadedFunc_BltIn_CheckOpcodesOnNewPageLoadingTlb
2330 */
2331IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckCsLimAndOpcodesOnNewPageLoadingTlb)
2332{
2333 PCIEMTB const pTb = pReNative->pTbOrg;
2334 uint32_t const cbInstr = (uint8_t)pCallEntry->auParams[0];
2335 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
2336 BODY_SET_CUR_INSTR();
2337 BODY_FLUSH_PENDING_WRITES();
2338 BODY_CHECK_CS_LIM(cbInstr);
2339 BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, 0, idxRange, cbInstr);
2340 //Assert(pVCpu->iem.s.offCurInstrStart == 0);
2341 BODY_CHECK_OPCODES(pTb, idxRange, 0, cbInstr);
2342 return off;
2343}
2344
2345IEM_DECL_IEMNATIVELIVENESSFUNC_DEF(iemNativeLivenessFunc_BltIn_CheckCsLimAndOpcodesOnNewPageLoadingTlb)
2346{
2347 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(pOutgoing, pIncoming);
2348 LIVENESS_CHECK_CS_LIM(pOutgoing);
2349 LIVENESS_LOAD_TLB_FOR_NEW_PAGE(pOutgoing, pCallEntry);
2350 LIVENESS_CHECK_OPCODES(pOutgoing);
2351 RT_NOREF(pCallEntry);
2352}
2353#endif
2354
2355
2356#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE)
2357/**
2358 * Built-in function for loading TLB and checking opcodes when advancing
2359 * naturally to a different code page with first instr at byte 0.
2360 *
2361 * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNewPageLoadingTlb
2362 */
2363IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesOnNewPageLoadingTlb)
2364{
2365 PCIEMTB const pTb = pReNative->pTbOrg;
2366 uint32_t const cbInstr = (uint8_t)pCallEntry->auParams[0];
2367 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
2368 BODY_SET_CUR_INSTR();
2369 BODY_FLUSH_PENDING_WRITES();
2370 BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, 0, idxRange, cbInstr);
2371 //Assert(pVCpu->iem.s.offCurInstrStart == 0);
2372 BODY_CHECK_OPCODES(pTb, idxRange, 0, cbInstr);
2373 return off;
2374}
2375
2376IEM_DECL_IEMNATIVELIVENESSFUNC_DEF(iemNativeLivenessFunc_BltIn_CheckOpcodesOnNewPageLoadingTlb)
2377{
2378 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(pOutgoing, pIncoming);
2379 LIVENESS_LOAD_TLB_FOR_NEW_PAGE(pOutgoing, pCallEntry);
2380 LIVENESS_CHECK_OPCODES(pOutgoing);
2381 RT_NOREF(pCallEntry);
2382}
2383#endif
2384
2385
2386#if defined(BODY_CHECK_OPCODES) && defined(BODY_LOAD_TLB_FOR_NEW_PAGE) && defined(BODY_CONSIDER_CS_LIM_CHECKING)
2387/**
2388 * Built-in function for loading TLB and checking opcodes and considering the
2389 * need for CS.LIM checking when advancing naturally to a different code page
2390 * with first instr at byte 0.
2391 *
2392 * @see iemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNewPageLoadingTlb
2393 */
2394IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_CheckOpcodesOnNewPageLoadingTlbConsiderCsLim)
2395{
2396 PCIEMTB const pTb = pReNative->pTbOrg;
2397 uint32_t const cbInstr = (uint8_t)pCallEntry->auParams[0];
2398 uint32_t const idxRange = (uint32_t)pCallEntry->auParams[1];
2399 BODY_SET_CUR_INSTR();
2400 BODY_FLUSH_PENDING_WRITES();
2401 BODY_CONSIDER_CS_LIM_CHECKING(pTb, cbInstr);
2402 BODY_LOAD_TLB_FOR_NEW_PAGE(pTb, 0, idxRange, cbInstr);
2403 //Assert(pVCpu->iem.s.offCurInstrStart == 0);
2404 BODY_CHECK_OPCODES(pTb, idxRange, 0, cbInstr);
2405 return off;
2406}
2407
2408IEM_DECL_IEMNATIVELIVENESSFUNC_DEF(iemNativeLivenessFunc_BltIn_CheckOpcodesOnNewPageLoadingTlbConsiderCsLim)
2409{
2410 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(pOutgoing, pIncoming);
2411 LIVENESS_CONSIDER_CS_LIM_CHECKING(pOutgoing);
2412 LIVENESS_LOAD_TLB_FOR_NEW_PAGE(pOutgoing, pCallEntry);
2413 LIVENESS_CHECK_OPCODES(pOutgoing);
2414 RT_NOREF(pCallEntry);
2415}
2416#endif
2417
2418
2419/**
2420 * Built-in function for jumping in the call sequence.
2421 */
2422IEM_DECL_IEMNATIVERECOMPFUNC_DEF(iemNativeRecompFunc_BltIn_Jump)
2423{
2424 PCIEMTB const pTb = pReNative->pTbOrg;
2425 Assert(pCallEntry->auParams[1] == 0 && pCallEntry->auParams[2] == 0);
2426 Assert(pCallEntry->auParams[0] < pTb->Thrd.cCalls);
2427#if 1
2428 RT_NOREF(pCallEntry, pTb);
2429
2430# ifdef VBOX_WITH_STATISTICS
2431 /* Increment StatNativeTbExitLoopFullTb. */
2432 uint32_t const offStat = RT_UOFFSETOF(VMCPU, iem.s.StatNativeTbExitLoopFullTb);
2433# ifdef RT_ARCH_AMD64
2434 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, UINT8_MAX, UINT8_MAX, offStat);
2435# else
2436 uint8_t const idxStatsTmp1 = iemNativeRegAllocTmp(pReNative, &off);
2437 uint8_t const idxStatsTmp2 = iemNativeRegAllocTmp(pReNative, &off);
2438 off = iemNativeEmitIncStamCounterInVCpu(pReNative, off, idxStatsTmp1, idxStatsTmp2, offStat);
2439 iemNativeRegFreeTmp(pReNative, idxStatsTmp1);
2440 iemNativeRegFreeTmp(pReNative, idxStatsTmp2);
2441# endif
2442# endif
2443# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2444 /** @todo
2445 off = iemNativeEmitAddU32CounterInVCpuEx(pReNative, off, pTb->cInstructions, RT_UOFFSETOF(VMCPUCC, iem.s.cInstructions));
2446 */
2447# endif
2448
2449 /* Jump to the start of the TB. */
2450 uint32_t idxLabel = iemNativeLabelFind(pReNative, kIemNativeLabelType_LoopJumpTarget);
2451 AssertStmt(idxLabel < pReNative->cLabels, IEMNATIVE_DO_LONGJMP(pReNative, VERR_IEM_LABEL_IPE_6)); /** @todo better status */
2452 return iemNativeEmitJmpToLabel(pReNative, off, idxLabel);
2453#else
2454 RT_NOREF(pReNative, pCallEntry, pTb);
2455 return off;
2456#endif
2457}
2458
2459IEM_DECL_IEMNATIVELIVENESSFUNC_DEF(iemNativeLivenessFunc_BltIn_Jump)
2460{
2461 IEM_LIVENESS_RAW_INIT_WITH_XCPT_OR_CALL(pOutgoing, pIncoming);
2462 RT_NOREF(pCallEntry);
2463}
2464
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette