VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllThrdRecompiler.cpp@ 105184

Last change on this file since 105184 was 105179, checked in by vboxsync, 8 months ago

VMM/IEM: Introduce a small cache of recently freed TBs for immediate consumption by the allocator to reduce the amount of time required in ASMBitFirstClear()/ASMBitNextClear() to scan for freed TBs when allocating a new TB, bugref:10653

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 130.3 KB
Line 
1/* $Id: IEMAllThrdRecompiler.cpp 105179 2024-07-08 10:38:14Z vboxsync $ */
2/** @file
3 * IEM - Instruction Decoding and Threaded Recompilation.
4 *
5 * Logging group IEM_RE_THREADED assignments:
6 * - Level 1 (Log) : Errors, exceptions, interrupts and such major events. [same as IEM]
7 * - Flow (LogFlow) : TB calls being emitted.
8 * - Level 2 (Log2) : Basic instruction execution state info. [same as IEM]
9 * - Level 3 (Log3) : More detailed execution state info. [same as IEM]
10 * - Level 4 (Log4) : Decoding mnemonics w/ EIP. [same as IEM]
11 * - Level 5 (Log5) : Decoding details. [same as IEM]
12 * - Level 6 (Log6) : TB opcode range management.
13 * - Level 7 (Log7) : TB obsoletion.
14 * - Level 8 (Log8) : TB compilation.
15 * - Level 9 (Log9) : TB exec.
16 * - Level 10 (Log10): TB block lookup.
17 * - Level 11 (Log11): TB block lookup details.
18 * - Level 12 (Log12): TB insertion.
19 */
20
21/*
22 * Copyright (C) 2011-2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#ifndef LOG_GROUP /* defined when included by tstIEMCheckMc.cpp */
48# define LOG_GROUP LOG_GROUP_IEM_RE_THREADED
49#endif
50#define IEM_WITH_CODE_TLB_AND_OPCODE_BUF /* A bit hackish, but its all in IEMInline.h. */
51#define VMCPU_INCL_CPUM_GST_CTX
52#include <VBox/vmm/iem.h>
53#include <VBox/vmm/cpum.h>
54#include <VBox/vmm/apic.h>
55#include <VBox/vmm/pdm.h>
56#include <VBox/vmm/pgm.h>
57#include <VBox/vmm/iom.h>
58#include <VBox/vmm/em.h>
59#include <VBox/vmm/hm.h>
60#include <VBox/vmm/nem.h>
61#include <VBox/vmm/gim.h>
62#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
63# include <VBox/vmm/em.h>
64# include <VBox/vmm/hm_svm.h>
65#endif
66#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
67# include <VBox/vmm/hmvmxinline.h>
68#endif
69#include <VBox/vmm/tm.h>
70#include <VBox/vmm/dbgf.h>
71#include <VBox/vmm/dbgftrace.h>
72#ifndef TST_IEM_CHECK_MC
73# include "IEMInternal.h"
74#endif
75#include <VBox/vmm/vmcc.h>
76#include <VBox/log.h>
77#include <VBox/err.h>
78#include <VBox/param.h>
79#include <VBox/dis.h>
80#include <VBox/disopcode-x86-amd64.h>
81#include <iprt/asm-math.h>
82#include <iprt/assert.h>
83#include <iprt/mem.h>
84#include <iprt/string.h>
85#include <iprt/sort.h>
86#include <iprt/x86.h>
87
88#ifndef TST_IEM_CHECK_MC
89# include "IEMInline.h"
90# include "IEMOpHlp.h"
91# include "IEMMc.h"
92#endif
93
94#include "IEMThreadedFunctions.h"
95#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
96# include "IEMN8veRecompiler.h"
97#endif
98
99
100/*
101 * Narrow down configs here to avoid wasting time on unused configs here.
102 */
103
104#ifndef IEM_WITH_CODE_TLB
105# error The code TLB must be enabled for the recompiler.
106#endif
107
108#ifndef IEM_WITH_DATA_TLB
109# error The data TLB must be enabled for the recompiler.
110#endif
111
112#ifndef IEM_WITH_SETJMP
113# error The setjmp approach must be enabled for the recompiler.
114#endif
115
116
117
118/**
119 * Calculates the effective address of a ModR/M memory operand, extended version
120 * for use in the recompilers.
121 *
122 * Meant to be used via IEM_MC_CALC_RM_EFF_ADDR.
123 *
124 * May longjmp on internal error.
125 *
126 * @return The effective address.
127 * @param pVCpu The cross context virtual CPU structure of the calling thread.
128 * @param bRm The ModRM byte.
129 * @param cbImmAndRspOffset - First byte: The size of any immediate
130 * following the effective address opcode bytes
131 * (only for RIP relative addressing).
132 * - Second byte: RSP displacement (for POP [ESP]).
133 * @param puInfo Extra info: 32-bit displacement (bits 31:0) and
134 * SIB byte (bits 39:32).
135 *
136 * @note This must be defined in a source file with matching
137 * IEM_WITH_CODE_TLB_AND_OPCODE_BUF define till the define is made default
138 * or implemented differently...
139 */
140RTGCPTR iemOpHlpCalcRmEffAddrJmpEx(PVMCPUCC pVCpu, uint8_t bRm, uint32_t cbImmAndRspOffset, uint64_t *puInfo) IEM_NOEXCEPT_MAY_LONGJMP
141{
142 Log5(("iemOpHlpCalcRmEffAddrJmp: bRm=%#x\n", bRm));
143# define SET_SS_DEF() \
144 do \
145 { \
146 if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SEG_MASK)) \
147 pVCpu->iem.s.iEffSeg = X86_SREG_SS; \
148 } while (0)
149
150 if (!IEM_IS_64BIT_CODE(pVCpu))
151 {
152/** @todo Check the effective address size crap! */
153 if (pVCpu->iem.s.enmEffAddrMode == IEMMODE_16BIT)
154 {
155 uint16_t u16EffAddr;
156
157 /* Handle the disp16 form with no registers first. */
158 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
159 {
160 IEM_OPCODE_GET_NEXT_U16(&u16EffAddr);
161 *puInfo = u16EffAddr;
162 }
163 else
164 {
165 /* Get the displacment. */
166 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
167 {
168 case 0: u16EffAddr = 0; break;
169 case 1: IEM_OPCODE_GET_NEXT_S8_SX_U16(&u16EffAddr); break;
170 case 2: IEM_OPCODE_GET_NEXT_U16(&u16EffAddr); break;
171 default: AssertFailedStmt(IEM_DO_LONGJMP(pVCpu, VERR_IEM_IPE_1)); /* (caller checked for these) */
172 }
173 *puInfo = u16EffAddr;
174
175 /* Add the base and index registers to the disp. */
176 switch (bRm & X86_MODRM_RM_MASK)
177 {
178 case 0: u16EffAddr += pVCpu->cpum.GstCtx.bx + pVCpu->cpum.GstCtx.si; break;
179 case 1: u16EffAddr += pVCpu->cpum.GstCtx.bx + pVCpu->cpum.GstCtx.di; break;
180 case 2: u16EffAddr += pVCpu->cpum.GstCtx.bp + pVCpu->cpum.GstCtx.si; SET_SS_DEF(); break;
181 case 3: u16EffAddr += pVCpu->cpum.GstCtx.bp + pVCpu->cpum.GstCtx.di; SET_SS_DEF(); break;
182 case 4: u16EffAddr += pVCpu->cpum.GstCtx.si; break;
183 case 5: u16EffAddr += pVCpu->cpum.GstCtx.di; break;
184 case 6: u16EffAddr += pVCpu->cpum.GstCtx.bp; SET_SS_DEF(); break;
185 case 7: u16EffAddr += pVCpu->cpum.GstCtx.bx; break;
186 }
187 }
188
189 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#06RX16 uInfo=%#RX64\n", u16EffAddr, *puInfo));
190 return u16EffAddr;
191 }
192
193 Assert(pVCpu->iem.s.enmEffAddrMode == IEMMODE_32BIT);
194 uint32_t u32EffAddr;
195 uint64_t uInfo;
196
197 /* Handle the disp32 form with no registers first. */
198 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
199 {
200 IEM_OPCODE_GET_NEXT_U32(&u32EffAddr);
201 uInfo = u32EffAddr;
202 }
203 else
204 {
205 /* Get the register (or SIB) value. */
206 uInfo = 0;
207 switch ((bRm & X86_MODRM_RM_MASK))
208 {
209 case 0: u32EffAddr = pVCpu->cpum.GstCtx.eax; break;
210 case 1: u32EffAddr = pVCpu->cpum.GstCtx.ecx; break;
211 case 2: u32EffAddr = pVCpu->cpum.GstCtx.edx; break;
212 case 3: u32EffAddr = pVCpu->cpum.GstCtx.ebx; break;
213 case 4: /* SIB */
214 {
215 uint8_t bSib; IEM_OPCODE_GET_NEXT_U8(&bSib);
216 uInfo = (uint64_t)bSib << 32;
217
218 /* Get the index and scale it. */
219 switch ((bSib >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
220 {
221 case 0: u32EffAddr = pVCpu->cpum.GstCtx.eax; break;
222 case 1: u32EffAddr = pVCpu->cpum.GstCtx.ecx; break;
223 case 2: u32EffAddr = pVCpu->cpum.GstCtx.edx; break;
224 case 3: u32EffAddr = pVCpu->cpum.GstCtx.ebx; break;
225 case 4: u32EffAddr = 0; /*none */ break;
226 case 5: u32EffAddr = pVCpu->cpum.GstCtx.ebp; break;
227 case 6: u32EffAddr = pVCpu->cpum.GstCtx.esi; break;
228 case 7: u32EffAddr = pVCpu->cpum.GstCtx.edi; break;
229 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
230 }
231 u32EffAddr <<= (bSib >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
232
233 /* add base */
234 switch (bSib & X86_SIB_BASE_MASK)
235 {
236 case 0: u32EffAddr += pVCpu->cpum.GstCtx.eax; break;
237 case 1: u32EffAddr += pVCpu->cpum.GstCtx.ecx; break;
238 case 2: u32EffAddr += pVCpu->cpum.GstCtx.edx; break;
239 case 3: u32EffAddr += pVCpu->cpum.GstCtx.ebx; break;
240 case 4: u32EffAddr += pVCpu->cpum.GstCtx.esp + (cbImmAndRspOffset >> 8); SET_SS_DEF(); break;
241 case 5:
242 if ((bRm & X86_MODRM_MOD_MASK) != 0)
243 {
244 u32EffAddr += pVCpu->cpum.GstCtx.ebp;
245 SET_SS_DEF();
246 }
247 else
248 {
249 uint32_t u32Disp;
250 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
251 u32EffAddr += u32Disp;
252 uInfo |= u32Disp;
253 }
254 break;
255 case 6: u32EffAddr += pVCpu->cpum.GstCtx.esi; break;
256 case 7: u32EffAddr += pVCpu->cpum.GstCtx.edi; break;
257 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
258 }
259 break;
260 }
261 case 5: u32EffAddr = pVCpu->cpum.GstCtx.ebp; SET_SS_DEF(); break;
262 case 6: u32EffAddr = pVCpu->cpum.GstCtx.esi; break;
263 case 7: u32EffAddr = pVCpu->cpum.GstCtx.edi; break;
264 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
265 }
266
267 /* Get and add the displacement. */
268 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
269 {
270 case 0:
271 break;
272 case 1:
273 {
274 int8_t i8Disp; IEM_OPCODE_GET_NEXT_S8(&i8Disp);
275 u32EffAddr += i8Disp;
276 uInfo |= (uint32_t)(int32_t)i8Disp;
277 break;
278 }
279 case 2:
280 {
281 uint32_t u32Disp; IEM_OPCODE_GET_NEXT_U32(&u32Disp);
282 u32EffAddr += u32Disp;
283 uInfo |= u32Disp;
284 break;
285 }
286 default:
287 AssertFailedStmt(IEM_DO_LONGJMP(pVCpu, VERR_IEM_IPE_2)); /* (caller checked for these) */
288 }
289 }
290
291 *puInfo = uInfo;
292 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RX32 uInfo=%#RX64\n", u32EffAddr, uInfo));
293 return u32EffAddr;
294 }
295
296 uint64_t u64EffAddr;
297 uint64_t uInfo;
298
299 /* Handle the rip+disp32 form with no registers first. */
300 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
301 {
302 IEM_OPCODE_GET_NEXT_S32_SX_U64(&u64EffAddr);
303 uInfo = (uint32_t)u64EffAddr;
304 u64EffAddr += pVCpu->cpum.GstCtx.rip + IEM_GET_INSTR_LEN(pVCpu) + (cbImmAndRspOffset & UINT32_C(0xff));
305 }
306 else
307 {
308 /* Get the register (or SIB) value. */
309 uInfo = 0;
310 switch ((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB)
311 {
312 case 0: u64EffAddr = pVCpu->cpum.GstCtx.rax; break;
313 case 1: u64EffAddr = pVCpu->cpum.GstCtx.rcx; break;
314 case 2: u64EffAddr = pVCpu->cpum.GstCtx.rdx; break;
315 case 3: u64EffAddr = pVCpu->cpum.GstCtx.rbx; break;
316 case 5: u64EffAddr = pVCpu->cpum.GstCtx.rbp; SET_SS_DEF(); break;
317 case 6: u64EffAddr = pVCpu->cpum.GstCtx.rsi; break;
318 case 7: u64EffAddr = pVCpu->cpum.GstCtx.rdi; break;
319 case 8: u64EffAddr = pVCpu->cpum.GstCtx.r8; break;
320 case 9: u64EffAddr = pVCpu->cpum.GstCtx.r9; break;
321 case 10: u64EffAddr = pVCpu->cpum.GstCtx.r10; break;
322 case 11: u64EffAddr = pVCpu->cpum.GstCtx.r11; break;
323 case 13: u64EffAddr = pVCpu->cpum.GstCtx.r13; break;
324 case 14: u64EffAddr = pVCpu->cpum.GstCtx.r14; break;
325 case 15: u64EffAddr = pVCpu->cpum.GstCtx.r15; break;
326 /* SIB */
327 case 4:
328 case 12:
329 {
330 uint8_t bSib; IEM_OPCODE_GET_NEXT_U8(&bSib);
331 uInfo = (uint64_t)bSib << 32;
332
333 /* Get the index and scale it. */
334 switch (((bSib >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK) | pVCpu->iem.s.uRexIndex)
335 {
336 case 0: u64EffAddr = pVCpu->cpum.GstCtx.rax; break;
337 case 1: u64EffAddr = pVCpu->cpum.GstCtx.rcx; break;
338 case 2: u64EffAddr = pVCpu->cpum.GstCtx.rdx; break;
339 case 3: u64EffAddr = pVCpu->cpum.GstCtx.rbx; break;
340 case 4: u64EffAddr = 0; /*none */ break;
341 case 5: u64EffAddr = pVCpu->cpum.GstCtx.rbp; break;
342 case 6: u64EffAddr = pVCpu->cpum.GstCtx.rsi; break;
343 case 7: u64EffAddr = pVCpu->cpum.GstCtx.rdi; break;
344 case 8: u64EffAddr = pVCpu->cpum.GstCtx.r8; break;
345 case 9: u64EffAddr = pVCpu->cpum.GstCtx.r9; break;
346 case 10: u64EffAddr = pVCpu->cpum.GstCtx.r10; break;
347 case 11: u64EffAddr = pVCpu->cpum.GstCtx.r11; break;
348 case 12: u64EffAddr = pVCpu->cpum.GstCtx.r12; break;
349 case 13: u64EffAddr = pVCpu->cpum.GstCtx.r13; break;
350 case 14: u64EffAddr = pVCpu->cpum.GstCtx.r14; break;
351 case 15: u64EffAddr = pVCpu->cpum.GstCtx.r15; break;
352 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
353 }
354 u64EffAddr <<= (bSib >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
355
356 /* add base */
357 switch ((bSib & X86_SIB_BASE_MASK) | pVCpu->iem.s.uRexB)
358 {
359 case 0: u64EffAddr += pVCpu->cpum.GstCtx.rax; break;
360 case 1: u64EffAddr += pVCpu->cpum.GstCtx.rcx; break;
361 case 2: u64EffAddr += pVCpu->cpum.GstCtx.rdx; break;
362 case 3: u64EffAddr += pVCpu->cpum.GstCtx.rbx; break;
363 case 4: u64EffAddr += pVCpu->cpum.GstCtx.rsp + (cbImmAndRspOffset >> 8); SET_SS_DEF(); break;
364 case 6: u64EffAddr += pVCpu->cpum.GstCtx.rsi; break;
365 case 7: u64EffAddr += pVCpu->cpum.GstCtx.rdi; break;
366 case 8: u64EffAddr += pVCpu->cpum.GstCtx.r8; break;
367 case 9: u64EffAddr += pVCpu->cpum.GstCtx.r9; break;
368 case 10: u64EffAddr += pVCpu->cpum.GstCtx.r10; break;
369 case 11: u64EffAddr += pVCpu->cpum.GstCtx.r11; break;
370 case 12: u64EffAddr += pVCpu->cpum.GstCtx.r12; break;
371 case 14: u64EffAddr += pVCpu->cpum.GstCtx.r14; break;
372 case 15: u64EffAddr += pVCpu->cpum.GstCtx.r15; break;
373 /* complicated encodings */
374 case 5:
375 case 13:
376 if ((bRm & X86_MODRM_MOD_MASK) != 0)
377 {
378 if (!pVCpu->iem.s.uRexB)
379 {
380 u64EffAddr += pVCpu->cpum.GstCtx.rbp;
381 SET_SS_DEF();
382 }
383 else
384 u64EffAddr += pVCpu->cpum.GstCtx.r13;
385 }
386 else
387 {
388 uint32_t u32Disp;
389 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
390 u64EffAddr += (int32_t)u32Disp;
391 uInfo |= u32Disp;
392 }
393 break;
394 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
395 }
396 break;
397 }
398 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
399 }
400
401 /* Get and add the displacement. */
402 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
403 {
404 case 0:
405 break;
406 case 1:
407 {
408 int8_t i8Disp;
409 IEM_OPCODE_GET_NEXT_S8(&i8Disp);
410 u64EffAddr += i8Disp;
411 uInfo |= (uint32_t)(int32_t)i8Disp;
412 break;
413 }
414 case 2:
415 {
416 uint32_t u32Disp;
417 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
418 u64EffAddr += (int32_t)u32Disp;
419 uInfo |= u32Disp;
420 break;
421 }
422 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX); /* (caller checked for these) */
423 }
424
425 }
426
427 *puInfo = uInfo;
428 if (pVCpu->iem.s.enmEffAddrMode == IEMMODE_64BIT)
429 {
430 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RGv uInfo=%#RX64\n", u64EffAddr, uInfo));
431 return u64EffAddr;
432 }
433 Assert(pVCpu->iem.s.enmEffAddrMode == IEMMODE_32BIT);
434 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RGv uInfo=%#RX64\n", u64EffAddr & UINT32_MAX, uInfo));
435 return u64EffAddr & UINT32_MAX;
436}
437
438
439/*********************************************************************************************************************************
440* Translation Block Cache. *
441*********************************************************************************************************************************/
442
443/** @callback_method_impl{FNRTSORTCMP, Compare two TBs for pruning sorting purposes.} */
444static DECLCALLBACK(int) iemTbCachePruneCmpTb(void const *pvElement1, void const *pvElement2, void *pvUser)
445{
446 PCIEMTB const pTb1 = (PCIEMTB)pvElement1;
447 PCIEMTB const pTb2 = (PCIEMTB)pvElement2;
448 uint32_t const cMsSinceUse1 = (uint32_t)(uintptr_t)pvUser - pTb1->msLastUsed;
449 uint32_t const cMsSinceUse2 = (uint32_t)(uintptr_t)pvUser - pTb2->msLastUsed;
450 if (cMsSinceUse1 != cMsSinceUse2)
451 return cMsSinceUse1 < cMsSinceUse2 ? -1 : 1;
452 if (pTb1->cUsed != pTb2->cUsed)
453 return pTb1->cUsed > pTb2->cUsed ? -1 : 1;
454 if ((pTb1->fFlags & IEMTB_F_TYPE_MASK) != (pTb2->fFlags & IEMTB_F_TYPE_MASK))
455 return (pTb1->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE ? -1 : 1;
456 return 0;
457}
458
459#ifdef VBOX_STRICT
460/**
461 * Assertion helper that checks a collisions list count.
462 */
463static void iemTbCacheAssertCorrectCount(PIEMTBCACHE pTbCache, uint32_t idxHash, const char *pszOperation)
464{
465 PIEMTB pTb = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
466 int cLeft = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]);
467 while (pTb)
468 {
469 pTb = pTb->pNext;
470 cLeft--;
471 }
472 AssertMsg(cLeft == 0,
473 ("idxHash=%#x cLeft=%d; entry count=%d; %s\n",
474 idxHash, cLeft, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]), pszOperation));
475}
476#endif
477
478
479DECL_NO_INLINE(static, void) iemTbCacheAddWithPruning(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb, uint32_t idxHash)
480{
481 STAM_PROFILE_START(&pTbCache->StatPrune, a);
482
483 /*
484 * First convert the collision list to an array.
485 */
486 PIEMTB apSortedTbs[IEMTBCACHE_PTR_MAX_COUNT];
487 uintptr_t cInserted = 0;
488 PIEMTB pTbCollision = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
489
490 pTbCache->apHash[idxHash] = NULL; /* Must NULL the entry before trying to free anything. */
491
492 while (pTbCollision && cInserted < RT_ELEMENTS(apSortedTbs))
493 {
494 apSortedTbs[cInserted++] = pTbCollision;
495 pTbCollision = pTbCollision->pNext;
496 }
497
498 /* Free any excess (impossible). */
499 if (RT_LIKELY(!pTbCollision))
500 Assert(cInserted == RT_ELEMENTS(apSortedTbs));
501 else
502 do
503 {
504 PIEMTB pTbToFree = pTbCollision;
505 pTbCollision = pTbToFree->pNext;
506 iemTbAllocatorFree(pVCpu, pTbToFree);
507 } while (pTbCollision);
508
509 /*
510 * Sort it by most recently used and usage count.
511 */
512 RTSortApvShell((void **)apSortedTbs, cInserted, iemTbCachePruneCmpTb, (void *)(uintptr_t)pVCpu->iem.s.msRecompilerPollNow);
513
514 /* We keep half the list for now. Perhaps a bit aggressive... */
515 uintptr_t const cKeep = cInserted / 2;
516
517 /* First free up the TBs we don't wish to keep (before creating the new
518 list because otherwise the free code will scan the list for each one
519 without ever finding it). */
520 for (uintptr_t idx = cKeep; idx < cInserted; idx++)
521 iemTbAllocatorFree(pVCpu, apSortedTbs[idx]);
522
523 /* Then chain the new TB together with the ones we like to keep of the
524 existing ones and insert this list into the hash table. */
525 pTbCollision = pTb;
526 for (uintptr_t idx = 0; idx < cKeep; idx++)
527 pTbCollision = pTbCollision->pNext = apSortedTbs[idx];
528 pTbCollision->pNext = NULL;
529
530 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, cKeep + 1);
531#ifdef VBOX_STRICT
532 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "add w/ pruning");
533#endif
534
535 STAM_PROFILE_STOP(&pTbCache->StatPrune, a);
536}
537
538
539static void iemTbCacheAdd(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb)
540{
541 uint32_t const idxHash = IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc);
542 PIEMTB const pTbOldHead = pTbCache->apHash[idxHash];
543 if (!pTbOldHead)
544 {
545 pTb->pNext = NULL;
546 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, 1); /** @todo could make 1 implicit... */
547 }
548 else
549 {
550 STAM_REL_COUNTER_INC(&pTbCache->cCollisions);
551 uintptr_t cCollisions = IEMTBCACHE_PTR_GET_COUNT(pTbOldHead);
552 if (cCollisions < IEMTBCACHE_PTR_MAX_COUNT)
553 {
554 pTb->pNext = IEMTBCACHE_PTR_GET_TB(pTbOldHead);
555 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, cCollisions + 1);
556#ifdef VBOX_STRICT
557 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "add");
558#endif
559 }
560 else
561 iemTbCacheAddWithPruning(pVCpu, pTbCache, pTb, idxHash);
562 }
563}
564
565
566/**
567 * Unlinks @a pTb from the hash table if found in it.
568 *
569 * @returns true if unlinked, false if not present.
570 * @param pTbCache The hash table.
571 * @param pTb The TB to remove.
572 */
573static bool iemTbCacheRemove(PIEMTBCACHE pTbCache, PIEMTB pTb)
574{
575 uint32_t const idxHash = IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc);
576 PIEMTB pTbHash = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
577 uint32_t volatile cLength = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]); RT_NOREF(cLength);
578
579 /*
580 * At the head of the collision list?
581 */
582 if (pTbHash == pTb)
583 {
584 if (!pTb->pNext)
585 pTbCache->apHash[idxHash] = NULL;
586 else
587 {
588 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb->pNext,
589 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - 1);
590#ifdef VBOX_STRICT
591 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "remove #1");
592#endif
593 }
594 return true;
595 }
596
597 /*
598 * Search the collision list.
599 */
600 PIEMTB const pTbHead = pTbHash;
601 while (pTbHash)
602 {
603 PIEMTB const pNextTb = pTbHash->pNext;
604 if (pNextTb == pTb)
605 {
606 pTbHash->pNext = pTb->pNext;
607 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTbHead, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - 1);
608#ifdef VBOX_STRICT
609 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "remove #2");
610#endif
611 return true;
612 }
613 pTbHash = pNextTb;
614 }
615 return false;
616}
617
618
619/**
620 * Looks up a TB for the given PC and flags in the cache.
621 *
622 * @returns Pointer to TB on success, NULL if not found.
623 * @param pVCpu The cross context virtual CPU structure of the
624 * calling thread.
625 * @param pTbCache The translation block cache.
626 * @param GCPhysPc The PC to look up a TB for.
627 * @param fExtraFlags The extra flags to join with IEMCPU::fExec for
628 * the lookup.
629 * @thread EMT(pVCpu)
630 */
631static PIEMTB iemTbCacheLookup(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache,
632 RTGCPHYS GCPhysPc, uint32_t fExtraFlags) IEM_NOEXCEPT_MAY_LONGJMP /** @todo r=bird: no longjumping here, right? iemNativeRecompile is noexcept. */
633{
634 uint32_t const fFlags = ((pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags) & IEMTB_F_KEY_MASK;
635
636 /*
637 * First consult the lookup table entry.
638 */
639 PIEMTB * const ppTbLookup = pVCpu->iem.s.ppTbLookupEntryR3;
640 PIEMTB pTb = *ppTbLookup;
641 if (pTb)
642 {
643 if (pTb->GCPhysPc == GCPhysPc)
644 {
645 if ( (pTb->fFlags & (IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)) == (fFlags | IEMTB_F_TYPE_NATIVE)
646 || (pTb->fFlags & (IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)) == (fFlags | IEMTB_F_TYPE_THREADED) )
647 {
648 if (pTb->x86.fAttr == (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u)
649 {
650 STAM_COUNTER_INC(&pTbCache->cLookupHitsViaTbLookupTable);
651 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
652 pTb->cUsed++;
653#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
654 if ((pTb->fFlags & IEMTB_F_TYPE_NATIVE) || pTb->cUsed != pVCpu->iem.s.uTbNativeRecompileAtUsedCount)
655 {
656 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp: %p (@ %p)\n", fFlags, GCPhysPc, pTb, ppTbLookup));
657 return pTb;
658 }
659 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp: %p (@ %p) - recompiling\n", fFlags, GCPhysPc, pTb, ppTbLookup));
660 return iemNativeRecompile(pVCpu, pTb);
661#else
662 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp: %p (@ %p)\n", fFlags, GCPhysPc, pTb, ppTbLookup));
663 return pTb;
664#endif
665 }
666 }
667 }
668 }
669
670 /*
671 * Then consult the hash table.
672 */
673 uint32_t const idxHash = IEMTBCACHE_HASH_NO_KEY_MASK(pTbCache, fFlags, GCPhysPc);
674#if defined(VBOX_STRICT) || defined(LOG_ENABLED)
675 int cLeft = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]);
676#endif
677 pTb = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
678 while (pTb)
679 {
680 if (pTb->GCPhysPc == GCPhysPc)
681 {
682 if ((pTb->fFlags & IEMTB_F_KEY_MASK) == fFlags)
683 {
684 if (pTb->x86.fAttr == (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u)
685 {
686 STAM_COUNTER_INC(&pTbCache->cLookupHits);
687 AssertMsg(cLeft > 0, ("%d\n", cLeft));
688
689 *ppTbLookup = pTb;
690 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
691 pTb->cUsed++;
692#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
693 if ((pTb->fFlags & IEMTB_F_TYPE_NATIVE) || pTb->cUsed != pVCpu->iem.s.uTbNativeRecompileAtUsedCount)
694 {
695 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: %p (@ %d / %d)\n",
696 fFlags, GCPhysPc, idxHash, pTb, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - cLeft,
697 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
698 return pTb;
699 }
700 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: %p (@ %d / %d) - recompiling\n",
701 fFlags, GCPhysPc, idxHash, pTb, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - cLeft,
702 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
703 return iemNativeRecompile(pVCpu, pTb);
704#else
705 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: %p (@ %d / %d)\n",
706 fFlags, GCPhysPc, idxHash, pTb, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - cLeft,
707 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
708 return pTb;
709#endif
710 }
711 Log11(("TB miss: CS: %#x, wanted %#x\n", pTb->x86.fAttr, (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u));
712 }
713 else
714 Log11(("TB miss: fFlags: %#x, wanted %#x\n", pTb->fFlags, fFlags));
715 }
716 else
717 Log11(("TB miss: GCPhysPc: %#x, wanted %#x\n", pTb->GCPhysPc, GCPhysPc));
718
719 pTb = pTb->pNext;
720#ifdef VBOX_STRICT
721 cLeft--;
722#endif
723 }
724 AssertMsg(cLeft == 0, ("%d\n", cLeft));
725 STAM_REL_COUNTER_INC(&pTbCache->cLookupMisses);
726 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: NULL - (%p L %d)\n", fFlags, GCPhysPc, idxHash,
727 IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]), IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
728 return pTb;
729}
730
731
732/*********************************************************************************************************************************
733* Translation Block Allocator.
734*********************************************************************************************************************************/
735/*
736 * Translation block allocationmanagement.
737 */
738
739#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
740# define IEMTBALLOC_IDX_TO_CHUNK(a_pTbAllocator, a_idxTb) \
741 ((a_idxTb) >> (a_pTbAllocator)->cChunkShift)
742# define IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(a_pTbAllocator, a_idxTb, a_idxChunk) \
743 ((a_idxTb) & (a_pTbAllocator)->fChunkMask)
744# define IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) \
745 ((uint32_t)(a_idxChunk) << (a_pTbAllocator)->cChunkShift)
746#else
747# define IEMTBALLOC_IDX_TO_CHUNK(a_pTbAllocator, a_idxTb) \
748 ((a_idxTb) / (a_pTbAllocator)->cTbsPerChunk)
749# define IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(a_pTbAllocator, a_idxTb, a_idxChunk) \
750 ((a_idxTb) - (a_idxChunk) * (a_pTbAllocator)->cTbsPerChunk)
751# define IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) \
752 ((uint32_t)(a_idxChunk) * (a_pTbAllocator)->cTbsPerChunk)
753#endif
754/** Makes a TB index from a chunk index and TB index within that chunk. */
755#define IEMTBALLOC_IDX_MAKE(a_pTbAllocator, a_idxChunk, a_idxInChunk) \
756 (IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) + (a_idxInChunk))
757
758
759/**
760 * Initializes the TB allocator and cache for an EMT.
761 *
762 * @returns VBox status code.
763 * @param pVM The VM handle.
764 * @param cInitialTbs The initial number of translation blocks to
765 * preallocator.
766 * @param cMaxTbs The max number of translation blocks allowed.
767 * @param cbInitialExec The initial size of the executable memory allocator.
768 * @param cbMaxExec The max size of the executable memory allocator.
769 * @param cbChunkExec The chunk size for executable memory allocator. Zero
770 * or UINT32_MAX for automatically determining this.
771 * @thread EMT
772 */
773DECLCALLBACK(int) iemTbInit(PVMCC pVM, uint32_t cInitialTbs, uint32_t cMaxTbs,
774 uint64_t cbInitialExec, uint64_t cbMaxExec, uint32_t cbChunkExec)
775{
776 PVMCPUCC pVCpu = VMMGetCpu(pVM);
777 Assert(!pVCpu->iem.s.pTbCacheR3);
778 Assert(!pVCpu->iem.s.pTbAllocatorR3);
779
780 /*
781 * Calculate the chunk size of the TB allocator.
782 * The minimum chunk size is 2MiB.
783 */
784 AssertCompile(!(sizeof(IEMTB) & IEMTBCACHE_PTR_COUNT_MASK));
785 uint32_t cbPerChunk = _2M;
786 uint32_t cTbsPerChunk = _2M / sizeof(IEMTB);
787#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
788 uint8_t const cTbShift = ASMBitFirstSetU32((uint32_t)sizeof(IEMTB)) - 1;
789 uint8_t cChunkShift = 21 - cTbShift;
790 AssertCompile(RT_BIT_32(21) == _2M); Assert(RT_BIT_32(cChunkShift) == cTbsPerChunk);
791#endif
792 for (;;)
793 {
794 if (cMaxTbs <= cTbsPerChunk * (uint64_t)RT_ELEMENTS(pVCpu->iem.s.pTbAllocatorR3->aChunks))
795 break;
796 cbPerChunk *= 2;
797 cTbsPerChunk = cbPerChunk / sizeof(IEMTB);
798#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
799 cChunkShift += 1;
800#endif
801 }
802
803 uint32_t cMaxChunks = (cMaxTbs + cTbsPerChunk - 1) / cTbsPerChunk;
804 Assert(cMaxChunks * cTbsPerChunk >= cMaxTbs);
805 Assert(cMaxChunks <= RT_ELEMENTS(pVCpu->iem.s.pTbAllocatorR3->aChunks));
806
807 cMaxTbs = cMaxChunks * cTbsPerChunk;
808
809 /*
810 * Allocate and initalize it.
811 */
812 uint32_t const c64BitWords = RT_ALIGN_32(cMaxTbs, 64) / 64;
813 size_t const cbTbAllocator = RT_UOFFSETOF_DYN(IEMTBALLOCATOR, bmAllocated[c64BitWords]);
814 PIEMTBALLOCATOR const pTbAllocator = (PIEMTBALLOCATOR)RTMemAllocZ(cbTbAllocator);
815 if (!pTbAllocator)
816 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
817 "Failed to allocate %zu bytes (max %u TBs) for the TB allocator of VCpu #%u",
818 cbTbAllocator, cMaxTbs, pVCpu->idCpu);
819 pTbAllocator->uMagic = IEMTBALLOCATOR_MAGIC;
820 pTbAllocator->cMaxChunks = (uint8_t)cMaxChunks;
821 pTbAllocator->cTbsPerChunk = cTbsPerChunk;
822 pTbAllocator->cbPerChunk = cbPerChunk;
823 pTbAllocator->cMaxTbs = cMaxTbs;
824#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
825 pTbAllocator->fChunkMask = cTbsPerChunk - 1;
826 pTbAllocator->cChunkShift = cChunkShift;
827 Assert(RT_BIT_32(cChunkShift) == cTbsPerChunk);
828#endif
829
830 memset(pTbAllocator->bmAllocated, 0xff, c64BitWords * sizeof(uint64_t)); /* Mark all as allocated, clear as chunks are added. */
831 pVCpu->iem.s.pTbAllocatorR3 = pTbAllocator;
832
833 /*
834 * Allocate the initial chunks.
835 */
836 for (uint32_t idxChunk = 0; ; idxChunk++)
837 {
838 PIEMTB const paTbs = pTbAllocator->aChunks[idxChunk].paTbs = (PIEMTB)RTMemPageAllocZ(cbPerChunk);
839 if (!paTbs)
840 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
841 "Failed to initial %zu bytes for the #%u chunk of TBs for VCpu #%u",
842 cbPerChunk, idxChunk, pVCpu->idCpu);
843
844 for (uint32_t iTb = 0; iTb < cTbsPerChunk; iTb++)
845 paTbs[iTb].idxAllocChunk = idxChunk; /* This is not strictly necessary... */
846 ASMBitClearRange(pTbAllocator->bmAllocated, idxChunk * cTbsPerChunk, (idxChunk + 1) * cTbsPerChunk);
847 pTbAllocator->cAllocatedChunks = (uint16_t)(idxChunk + 1);
848 pTbAllocator->cTotalTbs += cTbsPerChunk;
849
850 if ((idxChunk + 1) * cTbsPerChunk >= cInitialTbs)
851 break;
852 }
853
854 /*
855 * Calculate the size of the hash table. We double the max TB count and
856 * round it up to the nearest power of two.
857 */
858 uint32_t cCacheEntries = cMaxTbs * 2;
859 if (!RT_IS_POWER_OF_TWO(cCacheEntries))
860 {
861 uint8_t const iBitTop = ASMBitFirstSetU32(cCacheEntries);
862 cCacheEntries = RT_BIT_32(iBitTop);
863 Assert(cCacheEntries >= cMaxTbs * 2);
864 }
865
866 size_t const cbTbCache = RT_UOFFSETOF_DYN(IEMTBCACHE, apHash[cCacheEntries]);
867 PIEMTBCACHE const pTbCache = (PIEMTBCACHE)RTMemAllocZ(cbTbCache);
868 if (!pTbCache)
869 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
870 "Failed to allocate %zu bytes (%u entries) for the TB cache of VCpu #%u",
871 cbTbCache, cCacheEntries, pVCpu->idCpu);
872
873 /*
874 * Initialize it (assumes zeroed by the allocator).
875 */
876 pTbCache->uMagic = IEMTBCACHE_MAGIC;
877 pTbCache->cHash = cCacheEntries;
878 pTbCache->uHashMask = cCacheEntries - 1;
879 Assert(pTbCache->cHash > pTbCache->uHashMask);
880 pVCpu->iem.s.pTbCacheR3 = pTbCache;
881
882 /*
883 * Initialize the native executable memory allocator.
884 */
885#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
886 int rc = iemExecMemAllocatorInit(pVCpu, cbMaxExec, cbInitialExec, cbChunkExec);
887 AssertLogRelRCReturn(rc, rc);
888#else
889 RT_NOREF(cbMaxExec, cbInitialExec, cbChunkExec);
890#endif
891
892 return VINF_SUCCESS;
893}
894
895
896/**
897 * Inner free worker.
898 */
899static void iemTbAllocatorFreeInner(PVMCPUCC pVCpu, PIEMTBALLOCATOR pTbAllocator,
900 PIEMTB pTb, uint32_t idxChunk, uint32_t idxInChunk)
901{
902 Assert(idxChunk < pTbAllocator->cAllocatedChunks);
903 Assert(idxInChunk < pTbAllocator->cTbsPerChunk);
904 Assert((uintptr_t)(pTb - pTbAllocator->aChunks[idxChunk].paTbs) == idxInChunk);
905 Assert(ASMBitTest(&pTbAllocator->bmAllocated, IEMTBALLOC_IDX_MAKE(pTbAllocator, idxChunk, idxInChunk)));
906#ifdef VBOX_STRICT
907 for (PIEMTB pTbOther = pTbAllocator->pDelayedFreeHead; pTbOther; pTbOther = pTbOther->pNext)
908 Assert(pTbOther != pTb);
909#endif
910
911 /*
912 * Unlink the TB from the hash table.
913 */
914 iemTbCacheRemove(pVCpu->iem.s.pTbCacheR3, pTb);
915
916 /*
917 * Free the TB itself.
918 */
919 switch (pTb->fFlags & IEMTB_F_TYPE_MASK)
920 {
921 case IEMTB_F_TYPE_THREADED:
922 pTbAllocator->cThreadedTbs -= 1;
923 RTMemFree(pTb->Thrd.paCalls);
924 break;
925#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
926 case IEMTB_F_TYPE_NATIVE:
927 pTbAllocator->cNativeTbs -= 1;
928 iemExecMemAllocatorFree(pVCpu, pTb->Native.paInstructions,
929 pTb->Native.cInstructions * sizeof(pTb->Native.paInstructions[0]));
930 pTb->Native.paInstructions = NULL; /* required by iemExecMemAllocatorPrune */
931 break;
932#endif
933 default:
934 AssertFailed();
935 }
936
937 RTMemFree(IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, 0)); /* Frees both the TB lookup table and opcode bytes. */
938
939 pTb->pNext = NULL;
940 pTb->fFlags = 0;
941 pTb->GCPhysPc = UINT64_MAX;
942 pTb->Gen.uPtr = 0;
943 pTb->Gen.uData = 0;
944 pTb->cTbLookupEntries = 0;
945 pTb->cbOpcodes = 0;
946 pTb->pabOpcodes = NULL;
947
948 if (pTbAllocator->idxTbCacheFree < RT_ELEMENTS(pTbAllocator->apTbFreeCache))
949 pTbAllocator->apTbFreeCache[pTbAllocator->idxTbCacheFree++] = pTb;
950 else
951 ASMBitClear(&pTbAllocator->bmAllocated, IEMTBALLOC_IDX_MAKE(pTbAllocator, idxChunk, idxInChunk));
952 Assert(pTbAllocator->cInUseTbs > 0);
953
954 pTbAllocator->cInUseTbs -= 1;
955 STAM_REL_COUNTER_INC(&pTbAllocator->StatFrees);
956}
957
958
959/**
960 * Frees the given TB.
961 *
962 * @param pVCpu The cross context virtual CPU structure of the calling
963 * thread.
964 * @param pTb The translation block to free.
965 * @thread EMT(pVCpu)
966 */
967DECLHIDDEN(void) iemTbAllocatorFree(PVMCPUCC pVCpu, PIEMTB pTb)
968{
969 /*
970 * Validate state.
971 */
972 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
973 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
974 uint8_t const idxChunk = pTb->idxAllocChunk;
975 AssertLogRelReturnVoid(idxChunk < pTbAllocator->cAllocatedChunks);
976 uintptr_t const idxInChunk = pTb - pTbAllocator->aChunks[idxChunk].paTbs;
977 AssertLogRelReturnVoid(idxInChunk < pTbAllocator->cTbsPerChunk);
978
979 /*
980 * Invalidate the TB lookup pointer and call the inner worker.
981 */
982 pVCpu->iem.s.ppTbLookupEntryR3 = &pVCpu->iem.s.pTbLookupEntryDummyR3;
983 iemTbAllocatorFreeInner(pVCpu, pTbAllocator, pTb, idxChunk, (uint32_t)idxInChunk);
984}
985
986
987/**
988 * Schedules a TB for freeing when it's not longer being executed and/or part of
989 * the caller's call stack.
990 *
991 * The TB will be removed from the translation block cache, though, so it isn't
992 * possible to executed it again and the IEMTB::pNext member can be used to link
993 * it together with other TBs awaiting freeing.
994 *
995 * @param pVCpu The cross context virtual CPU structure of the calling
996 * thread.
997 * @param pTb The translation block to schedule for freeing.
998 */
999static void iemTbAlloctorScheduleForFree(PVMCPUCC pVCpu, PIEMTB pTb)
1000{
1001 /*
1002 * Validate state.
1003 */
1004 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1005 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1006 Assert(pTb->idxAllocChunk < pTbAllocator->cAllocatedChunks);
1007 Assert((uintptr_t)(pTb - pTbAllocator->aChunks[pTb->idxAllocChunk].paTbs) < pTbAllocator->cTbsPerChunk);
1008 Assert(ASMBitTest(&pTbAllocator->bmAllocated,
1009 IEMTBALLOC_IDX_MAKE(pTbAllocator, pTb->idxAllocChunk,
1010 (uintptr_t)(pTb - pTbAllocator->aChunks[pTb->idxAllocChunk].paTbs))));
1011 Assert( (pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE
1012 || (pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_THREADED);
1013#ifdef VBOX_STRICT
1014 for (PIEMTB pTbOther = pTbAllocator->pDelayedFreeHead; pTbOther; pTbOther = pTbOther->pNext)
1015 Assert(pTbOther != pTb);
1016#endif
1017
1018 /*
1019 * Remove it from the cache and prepend it to the allocator's todo list.
1020 *
1021 * Note! It could still be in various lookup tables, so we trash the GCPhys
1022 * and CS attribs to ensure it won't be reused.
1023 */
1024 iemTbCacheRemove(pVCpu->iem.s.pTbCacheR3, pTb);
1025 pTb->GCPhysPc = NIL_RTGCPHYS;
1026 pTb->x86.fAttr = UINT16_MAX;
1027
1028 pTb->pNext = pTbAllocator->pDelayedFreeHead;
1029 pTbAllocator->pDelayedFreeHead = pTb;
1030}
1031
1032
1033/**
1034 * Processes the delayed frees.
1035 *
1036 * This is called by the allocator function as well as the native recompile
1037 * function before making any TB or executable memory allocations respectively.
1038 */
1039void iemTbAllocatorProcessDelayedFrees(PVMCPUCC pVCpu, PIEMTBALLOCATOR pTbAllocator)
1040{
1041 /** @todo r-bird: these have already been removed from the cache,
1042 * iemTbAllocatorFree/Inner redoes that, which is a waste of time. */
1043 PIEMTB pTb = pTbAllocator->pDelayedFreeHead;
1044 pTbAllocator->pDelayedFreeHead = NULL;
1045 while (pTb)
1046 {
1047 PIEMTB const pTbNext = pTb->pNext;
1048 Assert(pVCpu->iem.s.pCurTbR3 != pTb);
1049 iemTbAllocatorFree(pVCpu, pTb);
1050 pTb = pTbNext;
1051 }
1052}
1053
1054
1055/**
1056 * Grow the translation block allocator with another chunk.
1057 */
1058static int iemTbAllocatorGrow(PVMCPUCC pVCpu)
1059{
1060 /*
1061 * Validate state.
1062 */
1063 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1064 AssertReturn(pTbAllocator, VERR_WRONG_ORDER);
1065 AssertReturn(pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC, VERR_INVALID_MAGIC);
1066 uint32_t const idxChunk = pTbAllocator->cAllocatedChunks;
1067 AssertReturn(idxChunk < pTbAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1068
1069 /*
1070 * Allocate a new chunk and add it to the allocator.
1071 */
1072 PIEMTB const paTbs = (PIEMTB)RTMemPageAllocZ(pTbAllocator->cbPerChunk);
1073 AssertLogRelReturn(paTbs, VERR_NO_PAGE_MEMORY);
1074 pTbAllocator->aChunks[idxChunk].paTbs = paTbs;
1075
1076 uint32_t const cTbsPerChunk = pTbAllocator->cTbsPerChunk;
1077 for (uint32_t iTb = 0; iTb < cTbsPerChunk; iTb++)
1078 paTbs[iTb].idxAllocChunk = idxChunk; /* This is not strictly necessary... */
1079 ASMBitClearRange(pTbAllocator->bmAllocated, idxChunk * cTbsPerChunk, (idxChunk + 1) * cTbsPerChunk);
1080 pTbAllocator->cAllocatedChunks = (uint16_t)(idxChunk + 1);
1081 pTbAllocator->cTotalTbs += cTbsPerChunk;
1082 pTbAllocator->iStartHint = idxChunk * cTbsPerChunk;
1083
1084 return VINF_SUCCESS;
1085}
1086
1087
1088/**
1089 * Allocates a TB from allocator with free block.
1090 *
1091 * This is common code to both the fast and slow allocator code paths.
1092 */
1093DECL_FORCE_INLINE(PIEMTB) iemTbAllocatorAllocCore(PIEMTBALLOCATOR const pTbAllocator, bool fThreaded)
1094{
1095 Assert(pTbAllocator->cInUseTbs < pTbAllocator->cTotalTbs);
1096
1097 PIEMTB pTb;
1098 if (pTbAllocator->idxTbCacheFree)
1099 pTb = pTbAllocator->apTbFreeCache[--pTbAllocator->idxTbCacheFree];
1100 else
1101 {
1102 int idxTb;
1103 if (pTbAllocator->iStartHint < pTbAllocator->cTotalTbs)
1104 idxTb = ASMBitNextClear(pTbAllocator->bmAllocated,
1105 pTbAllocator->cTotalTbs,
1106 pTbAllocator->iStartHint & ~(uint32_t)63);
1107 else
1108 idxTb = -1;
1109 if (idxTb < 0)
1110 {
1111 idxTb = ASMBitFirstClear(pTbAllocator->bmAllocated, pTbAllocator->cTotalTbs);
1112 AssertLogRelReturn(idxTb >= 0, NULL);
1113 }
1114 Assert((uint32_t)idxTb < pTbAllocator->cTotalTbs);
1115 pTbAllocator->iStartHint = idxTb;
1116 ASMBitSet(pTbAllocator->bmAllocated, idxTb);
1117
1118 /** @todo shift/mask optimization for power of two IEMTB sizes. */
1119 uint32_t const idxChunk = IEMTBALLOC_IDX_TO_CHUNK(pTbAllocator, idxTb);
1120 uint32_t const idxTbInChunk = IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(pTbAllocator, idxTb, idxChunk);
1121 pTb = &pTbAllocator->aChunks[idxChunk].paTbs[idxTbInChunk];
1122 Assert(pTb->idxAllocChunk == idxChunk);
1123 }
1124
1125 pTbAllocator->cInUseTbs += 1;
1126 if (fThreaded)
1127 pTbAllocator->cThreadedTbs += 1;
1128 else
1129 pTbAllocator->cNativeTbs += 1;
1130 STAM_REL_COUNTER_INC(&pTbAllocator->StatAllocs);
1131 return pTb;
1132}
1133
1134
1135/**
1136 * Slow path for iemTbAllocatorAlloc.
1137 */
1138static PIEMTB iemTbAllocatorAllocSlow(PVMCPUCC pVCpu, PIEMTBALLOCATOR const pTbAllocator, bool fThreaded)
1139{
1140 /*
1141 * With some luck we can add another chunk.
1142 */
1143 if (pTbAllocator->cAllocatedChunks < pTbAllocator->cMaxChunks)
1144 {
1145 int rc = iemTbAllocatorGrow(pVCpu);
1146 if (RT_SUCCESS(rc))
1147 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1148 }
1149
1150 /*
1151 * We have to prune stuff. Sigh.
1152 *
1153 * This requires scanning for older TBs and kick them out. Not sure how to
1154 * best do this as we don't want to maintain any list of TBs ordered by last
1155 * usage time. But one reasonably simple approach would be that each time we
1156 * get here we continue a sequential scan of the allocation chunks,
1157 * considering just a smallish number of TBs and freeing a fixed portion of
1158 * them. Say, we consider the next 128 TBs, freeing the least recently used
1159 * in out of groups of 4 TBs, resulting in 32 free TBs.
1160 */
1161 STAM_PROFILE_START(&pTbAllocator->StatPrune, a);
1162 uint32_t const msNow = pVCpu->iem.s.msRecompilerPollNow;
1163 uint32_t const cTbsToPrune = 128;
1164 uint32_t const cTbsPerGroup = 4;
1165 uint32_t cFreedTbs = 0;
1166#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
1167 uint32_t idxTbPruneFrom = pTbAllocator->iPruneFrom & ~(uint32_t)(cTbsToPrune - 1); /* Stay within a chunk! */
1168#else
1169 uint32_t idxTbPruneFrom = pTbAllocator->iPruneFrom;
1170#endif
1171 if (idxTbPruneFrom >= pTbAllocator->cMaxTbs)
1172 idxTbPruneFrom = 0;
1173 for (uint32_t i = 0; i < cTbsToPrune; i += cTbsPerGroup, idxTbPruneFrom += cTbsPerGroup)
1174 {
1175 uint32_t idxChunk = IEMTBALLOC_IDX_TO_CHUNK(pTbAllocator, idxTbPruneFrom);
1176 uint32_t idxInChunk = IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(pTbAllocator, idxTbPruneFrom, idxChunk);
1177 PIEMTB pTb = &pTbAllocator->aChunks[idxChunk].paTbs[idxInChunk];
1178 uint32_t cMsAge = msNow - pTb->msLastUsed;
1179 Assert(pTb->fFlags & IEMTB_F_TYPE_MASK);
1180
1181 for (uint32_t j = 1, idxChunk2 = idxChunk, idxInChunk2 = idxInChunk + 1; j < cTbsPerGroup; j++, idxInChunk2++)
1182 {
1183#ifndef IEMTB_SIZE_IS_POWER_OF_TWO
1184 if (idxInChunk2 < pTbAllocator->cTbsPerChunk)
1185 { /* likely */ }
1186 else
1187 {
1188 idxInChunk2 = 0;
1189 idxChunk2 += 1;
1190 if (idxChunk2 >= pTbAllocator->cAllocatedChunks)
1191 idxChunk2 = 0;
1192 }
1193#endif
1194 PIEMTB const pTb2 = &pTbAllocator->aChunks[idxChunk2].paTbs[idxInChunk2];
1195 uint32_t const cMsAge2 = msNow - pTb2->msLastUsed;
1196 if ( cMsAge2 > cMsAge
1197 || (cMsAge2 == cMsAge && pTb2->cUsed < pTb->cUsed))
1198 {
1199 Assert(pTb2->fFlags & IEMTB_F_TYPE_MASK);
1200 pTb = pTb2;
1201 idxChunk = idxChunk2;
1202 idxInChunk = idxInChunk2;
1203 cMsAge = cMsAge2;
1204 }
1205 }
1206
1207 /* Free the TB. */
1208 iemTbAllocatorFreeInner(pVCpu, pTbAllocator, pTb, idxChunk, idxInChunk);
1209 cFreedTbs++; /* paranoia */
1210 }
1211 pTbAllocator->iPruneFrom = idxTbPruneFrom;
1212 STAM_PROFILE_STOP(&pTbAllocator->StatPrune, a);
1213
1214 /* Flush the TB lookup entry pointer. */
1215 pVCpu->iem.s.ppTbLookupEntryR3 = &pVCpu->iem.s.pTbLookupEntryDummyR3;
1216
1217 /*
1218 * Allocate a TB from the ones we've pruned.
1219 */
1220 if (cFreedTbs)
1221 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1222 return NULL;
1223}
1224
1225
1226/**
1227 * Allocate a translation block.
1228 *
1229 * @returns Pointer to block on success, NULL if we're out and is unable to
1230 * free up an existing one (very unlikely once implemented).
1231 * @param pVCpu The cross context virtual CPU structure of the calling
1232 * thread.
1233 * @param fThreaded Set if threaded TB being allocated, clear if native TB.
1234 * For statistics.
1235 */
1236DECL_FORCE_INLINE(PIEMTB) iemTbAllocatorAlloc(PVMCPUCC pVCpu, bool fThreaded)
1237{
1238 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1239 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1240
1241 /* Free any pending TBs before we proceed. */
1242 if (!pTbAllocator->pDelayedFreeHead)
1243 { /* probably likely */ }
1244 else
1245 iemTbAllocatorProcessDelayedFrees(pVCpu, pTbAllocator);
1246
1247 /* If the allocator is full, take slow code path.*/
1248 if (RT_LIKELY(pTbAllocator->cInUseTbs < pTbAllocator->cTotalTbs))
1249 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1250 return iemTbAllocatorAllocSlow(pVCpu, pTbAllocator, fThreaded);
1251}
1252
1253
1254/**
1255 * This is called when we're out of space for native TBs.
1256 *
1257 * This uses a variation on the pruning in iemTbAllocatorAllocSlow.
1258 * The difference is that we only prune native TBs and will only free any if
1259 * there are least two in a group. The conditions under which we're called are
1260 * different - there will probably be free TBs in the table when we're called.
1261 * Therefore we increase the group size and max scan length, though we'll stop
1262 * scanning once we've reached the requested size (@a cNeededInstrs) and freed
1263 * up at least 8 TBs.
1264 */
1265void iemTbAllocatorFreeupNativeSpace(PVMCPUCC pVCpu, uint32_t cNeededInstrs)
1266{
1267 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1268 AssertReturnVoid(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1269
1270 STAM_REL_PROFILE_START(&pTbAllocator->StatPruneNative, a);
1271
1272 /*
1273 * Flush the delayed free list before we start freeing TBs indiscriminately.
1274 */
1275 iemTbAllocatorProcessDelayedFrees(pVCpu, pTbAllocator);
1276
1277 /*
1278 * Scan and free TBs.
1279 */
1280 uint32_t const msNow = pVCpu->iem.s.msRecompilerPollNow;
1281 uint32_t const cTbsToPrune = 128 * 8;
1282 uint32_t const cTbsPerGroup = 4 * 4;
1283 uint32_t cFreedTbs = 0;
1284 uint32_t cMaxInstrs = 0;
1285 uint32_t idxTbPruneFrom = pTbAllocator->iPruneNativeFrom & ~(uint32_t)(cTbsPerGroup - 1);
1286 for (uint32_t i = 0; i < cTbsToPrune; i += cTbsPerGroup, idxTbPruneFrom += cTbsPerGroup)
1287 {
1288 if (idxTbPruneFrom >= pTbAllocator->cTotalTbs)
1289 idxTbPruneFrom = 0;
1290 uint32_t idxChunk = IEMTBALLOC_IDX_TO_CHUNK(pTbAllocator, idxTbPruneFrom);
1291 uint32_t idxInChunk = IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(pTbAllocator, idxTbPruneFrom, idxChunk);
1292 PIEMTB pTb = &pTbAllocator->aChunks[idxChunk].paTbs[idxInChunk];
1293 uint32_t cMsAge = pTb->fFlags & IEMTB_F_TYPE_NATIVE ? msNow - pTb->msLastUsed : msNow;
1294 uint8_t cNativeTbs = (pTb->fFlags & IEMTB_F_TYPE_NATIVE) != 0;
1295
1296 for (uint32_t j = 1, idxChunk2 = idxChunk, idxInChunk2 = idxInChunk + 1; j < cTbsPerGroup; j++, idxInChunk2++)
1297 {
1298 if (idxInChunk2 < pTbAllocator->cTbsPerChunk)
1299 { /* likely */ }
1300 else
1301 {
1302 idxInChunk2 = 0;
1303 idxChunk2 += 1;
1304 if (idxChunk2 >= pTbAllocator->cAllocatedChunks)
1305 idxChunk2 = 0;
1306 }
1307 PIEMTB const pTb2 = &pTbAllocator->aChunks[idxChunk2].paTbs[idxInChunk2];
1308 if (pTb2->fFlags & IEMTB_F_TYPE_NATIVE)
1309 {
1310 cNativeTbs += 1;
1311 uint32_t const cMsAge2 = msNow - pTb2->msLastUsed;
1312 if ( cMsAge2 > cMsAge
1313 || ( cMsAge2 == cMsAge
1314 && ( pTb2->cUsed < pTb->cUsed
1315 || ( pTb2->cUsed == pTb->cUsed
1316 && pTb2->Native.cInstructions > pTb->Native.cInstructions)))
1317 || !(pTb->fFlags & IEMTB_F_TYPE_NATIVE))
1318 {
1319 pTb = pTb2;
1320 idxChunk = idxChunk2;
1321 idxInChunk = idxInChunk2;
1322 cMsAge = cMsAge2;
1323 }
1324 }
1325 }
1326
1327 /* Free the TB if we found at least two native one in this group. */
1328 if (cNativeTbs >= 2)
1329 {
1330 cMaxInstrs = RT_MAX(cMaxInstrs, pTb->Native.cInstructions);
1331 iemTbAllocatorFreeInner(pVCpu, pTbAllocator, pTb, idxChunk, idxInChunk);
1332 cFreedTbs++;
1333 if (cFreedTbs >= 8 && cMaxInstrs >= cNeededInstrs)
1334 break;
1335 }
1336 }
1337 pTbAllocator->iPruneNativeFrom = idxTbPruneFrom;
1338
1339 STAM_REL_PROFILE_STOP(&pTbAllocator->StatPruneNative, a);
1340}
1341
1342
1343/*********************************************************************************************************************************
1344* Threaded Recompiler Core *
1345*********************************************************************************************************************************/
1346/**
1347 * Formats TB flags (IEM_F_XXX and IEMTB_F_XXX) to string.
1348 * @returns pszBuf.
1349 * @param fFlags The flags.
1350 * @param pszBuf The output buffer.
1351 * @param cbBuf The output buffer size. At least 32 bytes.
1352 */
1353DECLHIDDEN(const char *) iemTbFlagsToString(uint32_t fFlags, char *pszBuf, size_t cbBuf) RT_NOEXCEPT
1354{
1355 Assert(cbBuf >= 32);
1356 static RTSTRTUPLE const s_aModes[] =
1357 {
1358 /* [00] = */ { RT_STR_TUPLE("16BIT") },
1359 /* [01] = */ { RT_STR_TUPLE("32BIT") },
1360 /* [02] = */ { RT_STR_TUPLE("!2!") },
1361 /* [03] = */ { RT_STR_TUPLE("!3!") },
1362 /* [04] = */ { RT_STR_TUPLE("16BIT_PRE_386") },
1363 /* [05] = */ { RT_STR_TUPLE("32BIT_FLAT") },
1364 /* [06] = */ { RT_STR_TUPLE("!6!") },
1365 /* [07] = */ { RT_STR_TUPLE("!7!") },
1366 /* [08] = */ { RT_STR_TUPLE("16BIT_PROT") },
1367 /* [09] = */ { RT_STR_TUPLE("32BIT_PROT") },
1368 /* [0a] = */ { RT_STR_TUPLE("64BIT") },
1369 /* [0b] = */ { RT_STR_TUPLE("!b!") },
1370 /* [0c] = */ { RT_STR_TUPLE("16BIT_PROT_PRE_386") },
1371 /* [0d] = */ { RT_STR_TUPLE("32BIT_PROT_FLAT") },
1372 /* [0e] = */ { RT_STR_TUPLE("!e!") },
1373 /* [0f] = */ { RT_STR_TUPLE("!f!") },
1374 /* [10] = */ { RT_STR_TUPLE("!10!") },
1375 /* [11] = */ { RT_STR_TUPLE("!11!") },
1376 /* [12] = */ { RT_STR_TUPLE("!12!") },
1377 /* [13] = */ { RT_STR_TUPLE("!13!") },
1378 /* [14] = */ { RT_STR_TUPLE("!14!") },
1379 /* [15] = */ { RT_STR_TUPLE("!15!") },
1380 /* [16] = */ { RT_STR_TUPLE("!16!") },
1381 /* [17] = */ { RT_STR_TUPLE("!17!") },
1382 /* [18] = */ { RT_STR_TUPLE("16BIT_PROT_V86") },
1383 /* [19] = */ { RT_STR_TUPLE("32BIT_PROT_V86") },
1384 /* [1a] = */ { RT_STR_TUPLE("!1a!") },
1385 /* [1b] = */ { RT_STR_TUPLE("!1b!") },
1386 /* [1c] = */ { RT_STR_TUPLE("!1c!") },
1387 /* [1d] = */ { RT_STR_TUPLE("!1d!") },
1388 /* [1e] = */ { RT_STR_TUPLE("!1e!") },
1389 /* [1f] = */ { RT_STR_TUPLE("!1f!") },
1390 };
1391 AssertCompile(RT_ELEMENTS(s_aModes) == IEM_F_MODE_MASK + 1);
1392 memcpy(pszBuf, s_aModes[fFlags & IEM_F_MODE_MASK].psz, s_aModes[fFlags & IEM_F_MODE_MASK].cch);
1393 size_t off = s_aModes[fFlags & IEM_F_MODE_MASK].cch;
1394
1395 pszBuf[off++] = ' ';
1396 pszBuf[off++] = 'C';
1397 pszBuf[off++] = 'P';
1398 pszBuf[off++] = 'L';
1399 pszBuf[off++] = '0' + ((fFlags >> IEM_F_X86_CPL_SHIFT) & IEM_F_X86_CPL_SMASK);
1400 Assert(off < 32);
1401
1402 fFlags &= ~(IEM_F_MODE_MASK | IEM_F_X86_CPL_SMASK);
1403
1404 static struct { const char *pszName; uint32_t cchName; uint32_t fFlag; } const s_aFlags[] =
1405 {
1406 { RT_STR_TUPLE("BYPASS_HANDLERS"), IEM_F_BYPASS_HANDLERS },
1407 { RT_STR_TUPLE("PENDING_BRK_INSTR"), IEM_F_PENDING_BRK_INSTR },
1408 { RT_STR_TUPLE("PENDING_BRK_DATA"), IEM_F_PENDING_BRK_DATA },
1409 { RT_STR_TUPLE("PENDING_BRK_X86_IO"), IEM_F_PENDING_BRK_X86_IO },
1410 { RT_STR_TUPLE("X86_DISREGARD_LOCK"), IEM_F_X86_DISREGARD_LOCK },
1411 { RT_STR_TUPLE("X86_CTX_VMX"), IEM_F_X86_CTX_VMX },
1412 { RT_STR_TUPLE("X86_CTX_SVM"), IEM_F_X86_CTX_SVM },
1413 { RT_STR_TUPLE("X86_CTX_IN_GUEST"), IEM_F_X86_CTX_IN_GUEST },
1414 { RT_STR_TUPLE("X86_CTX_SMM"), IEM_F_X86_CTX_SMM },
1415 { RT_STR_TUPLE("INHIBIT_SHADOW"), IEMTB_F_INHIBIT_SHADOW },
1416 { RT_STR_TUPLE("INHIBIT_NMI"), IEMTB_F_INHIBIT_NMI },
1417 { RT_STR_TUPLE("CS_LIM_CHECKS"), IEMTB_F_CS_LIM_CHECKS },
1418 { RT_STR_TUPLE("TYPE_THREADED"), IEMTB_F_TYPE_THREADED },
1419 { RT_STR_TUPLE("TYPE_NATIVE"), IEMTB_F_TYPE_NATIVE },
1420 };
1421 if (fFlags)
1422 for (unsigned i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1423 if (s_aFlags[i].fFlag & fFlags)
1424 {
1425 AssertReturnStmt(off + 1 + s_aFlags[i].cchName + 1 <= cbBuf, pszBuf[off] = '\0', pszBuf);
1426 pszBuf[off++] = ' ';
1427 memcpy(&pszBuf[off], s_aFlags[i].pszName, s_aFlags[i].cchName);
1428 off += s_aFlags[i].cchName;
1429 fFlags &= ~s_aFlags[i].fFlag;
1430 if (!fFlags)
1431 break;
1432 }
1433 pszBuf[off] = '\0';
1434
1435 return pszBuf;
1436}
1437
1438
1439/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
1440static DECLCALLBACK(int) iemThreadedDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
1441{
1442 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
1443 pDis->cbCachedInstr += cbMaxRead;
1444 RT_NOREF(cbMinRead);
1445 return VERR_NO_DATA;
1446}
1447
1448
1449/**
1450 * Worker for iemThreadedDisassembleTb.
1451 */
1452static void iemThreadedDumpLookupTable(PCIEMTB pTb, PCDBGFINFOHLP pHlp, unsigned idxFirst, unsigned cEntries,
1453 const char *pszLeadText = " TB Lookup:") RT_NOEXCEPT
1454{
1455 if (idxFirst + cEntries <= pTb->cTbLookupEntries)
1456 {
1457 PIEMTB * const papTbLookup = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, idxFirst);
1458 pHlp->pfnPrintf(pHlp, "%s", pszLeadText);
1459 for (uint8_t iLookup = 0; iLookup < cEntries; iLookup++)
1460 {
1461 PIEMTB pLookupTb = papTbLookup[iLookup];
1462 if (pLookupTb)
1463 pHlp->pfnPrintf(pHlp, "%c%p (%s)", iLookup ? ',' : ' ', pLookupTb,
1464 (pLookupTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_THREADED ? "threaded"
1465 : (pLookupTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE ? "native"
1466 : "invalid");
1467 else
1468 pHlp->pfnPrintf(pHlp, "%cNULL", iLookup ? ',' : ' ');
1469 }
1470 pHlp->pfnPrintf(pHlp, "\n");
1471 }
1472 else
1473 {
1474 pHlp->pfnPrintf(pHlp, " !!Bogus TB lookup info: idxFirst=%#x L %u > cTbLookupEntries=%#x!!\n",
1475 idxFirst, cEntries, pTb->cTbLookupEntries);
1476 AssertMsgFailed(("idxFirst=%#x L %u > cTbLookupEntries=%#x\n", idxFirst, cEntries, pTb->cTbLookupEntries));
1477 }
1478}
1479
1480
1481DECLHIDDEN(void) iemThreadedDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
1482{
1483 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_THREADED);
1484
1485 char szDisBuf[512];
1486
1487 /*
1488 * Print TB info.
1489 */
1490 pHlp->pfnPrintf(pHlp,
1491 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u cTbLookupEntries=%u\n"
1492 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
1493 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges, pTb->cTbLookupEntries,
1494 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
1495
1496 /*
1497 * This disassembly is driven by the debug info which follows the native
1498 * code and indicates when it starts with the next guest instructions,
1499 * where labels are and such things.
1500 */
1501 DISSTATE Dis;
1502 PCIEMTHRDEDCALLENTRY const paCalls = pTb->Thrd.paCalls;
1503 uint32_t const cCalls = pTb->Thrd.cCalls;
1504 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
1505 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
1506 : DISCPUMODE_64BIT;
1507 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
1508 uint8_t idxRange = UINT8_MAX;
1509 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
1510 uint32_t offRange = 0;
1511 uint32_t offOpcodes = 0;
1512 uint32_t const cbOpcodes = pTb->cbOpcodes;
1513 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
1514 bool fTbLookupSeen0 = false;
1515
1516 for (uint32_t iCall = 0; iCall < cCalls; iCall++)
1517 {
1518 /*
1519 * New opcode range?
1520 */
1521 if ( idxRange == UINT8_MAX
1522 || idxRange >= cRanges
1523 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
1524 {
1525 idxRange += 1;
1526 if (idxRange < cRanges)
1527 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
1528 else
1529 continue;
1530 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
1531 + (pTb->aRanges[idxRange].idxPhysPage == 0
1532 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
1533 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
1534 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
1535 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
1536 pTb->aRanges[idxRange].idxPhysPage);
1537 GCPhysPc += offRange;
1538 }
1539
1540 /*
1541 * Disassemble another guest instruction?
1542 */
1543 if ( paCalls[iCall].offOpcode != offOpcodes
1544 && paCalls[iCall].cbOpcode > 0
1545 && (uint32_t)(cbOpcodes - paCalls[iCall].offOpcode) <= cbOpcodes /* paranoia^2 */ )
1546 {
1547 offOpcodes = paCalls[iCall].offOpcode;
1548 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
1549 uint32_t cbInstr = 1;
1550 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
1551 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
1552 iemThreadedDisasReadBytesDummy, NULL, &Dis, &cbInstr);
1553 if (RT_SUCCESS(rc))
1554 {
1555 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
1556 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
1557 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
1558 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
1559 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
1560 }
1561 else
1562 {
1563 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
1564 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
1565 cbInstr = paCalls[iCall].cbOpcode;
1566 }
1567 GCPhysPc += cbInstr;
1568 offRange += cbInstr;
1569 }
1570
1571 /*
1572 * Dump call details.
1573 */
1574 pHlp->pfnPrintf(pHlp,
1575 " Call #%u to %s (%u args)\n",
1576 iCall, g_apszIemThreadedFunctions[paCalls[iCall].enmFunction],
1577 g_acIemThreadedFunctionUsedArgs[paCalls[iCall].enmFunction]);
1578 if (paCalls[iCall].uTbLookup != 0)
1579 {
1580 uint8_t const idxFirst = IEM_TB_LOOKUP_TAB_GET_IDX(paCalls[iCall].uTbLookup);
1581 fTbLookupSeen0 = idxFirst == 0;
1582 iemThreadedDumpLookupTable(pTb, pHlp, idxFirst, IEM_TB_LOOKUP_TAB_GET_SIZE(paCalls[iCall].uTbLookup));
1583 }
1584
1585 /*
1586 * Snoop fExec.
1587 */
1588 switch (paCalls[iCall].enmFunction)
1589 {
1590 default:
1591 break;
1592 case kIemThreadedFunc_BltIn_CheckMode:
1593 fExec = paCalls[iCall].auParams[0];
1594 break;
1595 }
1596 }
1597
1598 if (!fTbLookupSeen0)
1599 iemThreadedDumpLookupTable(pTb, pHlp, 0, 1, " Fallback TB Lookup:");
1600}
1601
1602
1603
1604/**
1605 * Allocate a translation block for threadeded recompilation.
1606 *
1607 * This is allocated with maxed out call table and storage for opcode bytes,
1608 * because it's only supposed to be called once per EMT to allocate the TB
1609 * pointed to by IEMCPU::pThrdCompileTbR3.
1610 *
1611 * @returns Pointer to the translation block on success, NULL on failure.
1612 * @param pVM The cross context virtual machine structure.
1613 * @param pVCpu The cross context virtual CPU structure of the calling
1614 * thread.
1615 * @param GCPhysPc The physical address corresponding to RIP + CS.BASE.
1616 * @param fExtraFlags Extra flags (IEMTB_F_XXX).
1617 */
1618static PIEMTB iemThreadedTbAlloc(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhysPc, uint32_t fExtraFlags)
1619{
1620 PIEMTB pTb = (PIEMTB)RTMemAllocZ(sizeof(IEMTB));
1621 if (pTb)
1622 {
1623 unsigned const cCalls = 256;
1624 pTb->Thrd.paCalls = (PIEMTHRDEDCALLENTRY)RTMemAlloc(sizeof(IEMTHRDEDCALLENTRY) * cCalls);
1625 if (pTb->Thrd.paCalls)
1626 {
1627 pTb->pabOpcodes = (uint8_t *)RTMemAlloc(cCalls * 16);
1628 if (pTb->pabOpcodes)
1629 {
1630 pVCpu->iem.s.cbOpcodesAllocated = cCalls * 16;
1631 pTb->Thrd.cAllocated = cCalls;
1632 pTb->Thrd.cCalls = 0;
1633 pTb->cbOpcodes = 0;
1634 pTb->pNext = NULL;
1635 pTb->cUsed = 0;
1636 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
1637 pTb->idxAllocChunk = UINT8_MAX;
1638 pTb->GCPhysPc = GCPhysPc;
1639 pTb->x86.fAttr = (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u;
1640 pTb->fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags;
1641 pTb->cInstructions = 0;
1642 pTb->cTbLookupEntries = 1; /* Entry zero is for anything w/o a specific entry. */
1643
1644 /* Init the first opcode range. */
1645 pTb->cRanges = 1;
1646 pTb->aRanges[0].cbOpcodes = 0;
1647 pTb->aRanges[0].offOpcodes = 0;
1648 pTb->aRanges[0].offPhysPage = GCPhysPc & GUEST_PAGE_OFFSET_MASK;
1649 pTb->aRanges[0].u2Unused = 0;
1650 pTb->aRanges[0].idxPhysPage = 0;
1651 pTb->aGCPhysPages[0] = NIL_RTGCPHYS;
1652 pTb->aGCPhysPages[1] = NIL_RTGCPHYS;
1653
1654 return pTb;
1655 }
1656 RTMemFree(pTb->Thrd.paCalls);
1657 }
1658 RTMemFree(pTb);
1659 }
1660 RT_NOREF(pVM);
1661 return NULL;
1662}
1663
1664
1665/**
1666 * Called on the TB that are dedicated for recompilation before it's reused.
1667 *
1668 * @param pVCpu The cross context virtual CPU structure of the calling
1669 * thread.
1670 * @param pTb The translation block to reuse.
1671 * @param GCPhysPc The physical address corresponding to RIP + CS.BASE.
1672 * @param fExtraFlags Extra flags (IEMTB_F_XXX).
1673 */
1674static void iemThreadedTbReuse(PVMCPUCC pVCpu, PIEMTB pTb, RTGCPHYS GCPhysPc, uint32_t fExtraFlags)
1675{
1676 pTb->GCPhysPc = GCPhysPc;
1677 pTb->fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags;
1678 pTb->x86.fAttr = (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u;
1679 pTb->Thrd.cCalls = 0;
1680 pTb->cbOpcodes = 0;
1681 pTb->cInstructions = 0;
1682 pTb->cTbLookupEntries = 1; /* Entry zero is for anything w/o a specific entry. */
1683
1684 /* Init the first opcode range. */
1685 pTb->cRanges = 1;
1686 pTb->aRanges[0].cbOpcodes = 0;
1687 pTb->aRanges[0].offOpcodes = 0;
1688 pTb->aRanges[0].offPhysPage = GCPhysPc & GUEST_PAGE_OFFSET_MASK;
1689 pTb->aRanges[0].u2Unused = 0;
1690 pTb->aRanges[0].idxPhysPage = 0;
1691 pTb->aGCPhysPages[0] = NIL_RTGCPHYS;
1692 pTb->aGCPhysPages[1] = NIL_RTGCPHYS;
1693}
1694
1695
1696/**
1697 * Used to duplicate a threded translation block after recompilation is done.
1698 *
1699 * @returns Pointer to the translation block on success, NULL on failure.
1700 * @param pVM The cross context virtual machine structure.
1701 * @param pVCpu The cross context virtual CPU structure of the calling
1702 * thread.
1703 * @param pTbSrc The TB to duplicate.
1704 */
1705static PIEMTB iemThreadedTbDuplicate(PVMCC pVM, PVMCPUCC pVCpu, PCIEMTB pTbSrc)
1706{
1707 /*
1708 * Just using the heap for now. Will make this more efficient and
1709 * complicated later, don't worry. :-)
1710 */
1711 PIEMTB pTb = iemTbAllocatorAlloc(pVCpu, true /*fThreaded*/);
1712 if (pTb)
1713 {
1714 uint8_t const idxAllocChunk = pTb->idxAllocChunk;
1715 memcpy(pTb, pTbSrc, sizeof(*pTb));
1716 pTb->idxAllocChunk = idxAllocChunk;
1717
1718 unsigned const cCalls = pTbSrc->Thrd.cCalls;
1719 Assert(cCalls > 0);
1720 pTb->Thrd.paCalls = (PIEMTHRDEDCALLENTRY)RTMemDup(pTbSrc->Thrd.paCalls, sizeof(IEMTHRDEDCALLENTRY) * cCalls);
1721 if (pTb->Thrd.paCalls)
1722 {
1723 size_t const cbTbLookup = pTbSrc->cTbLookupEntries * sizeof(PIEMTB);
1724 Assert(cbTbLookup > 0);
1725 size_t const cbOpcodes = pTbSrc->cbOpcodes;
1726 Assert(cbOpcodes > 0);
1727 size_t const cbBoth = cbTbLookup + RT_ALIGN_Z(cbOpcodes, sizeof(PIEMTB));
1728 uint8_t * const pbBoth = (uint8_t *)RTMemAlloc(cbBoth);
1729 if (pbBoth)
1730 {
1731 RT_BZERO(pbBoth, cbTbLookup);
1732 pTb->pabOpcodes = (uint8_t *)memcpy(&pbBoth[cbTbLookup], pTbSrc->pabOpcodes, cbOpcodes);
1733 pTb->Thrd.cAllocated = cCalls;
1734 pTb->pNext = NULL;
1735 pTb->cUsed = 0;
1736 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
1737 pTb->fFlags = pTbSrc->fFlags;
1738
1739 return pTb;
1740 }
1741 RTMemFree(pTb->Thrd.paCalls);
1742 }
1743 iemTbAllocatorFree(pVCpu, pTb);
1744 }
1745 RT_NOREF(pVM);
1746 return NULL;
1747
1748}
1749
1750
1751/**
1752 * Adds the given TB to the hash table.
1753 *
1754 * @param pVCpu The cross context virtual CPU structure of the calling
1755 * thread.
1756 * @param pTbCache The cache to add it to.
1757 * @param pTb The translation block to add.
1758 */
1759static void iemThreadedTbAdd(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb)
1760{
1761 iemTbCacheAdd(pVCpu, pTbCache, pTb);
1762
1763 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbInstr, pTb->cInstructions);
1764 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbLookupEntries, pTb->cTbLookupEntries);
1765 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbThreadedCalls, pTb->Thrd.cCalls);
1766 if (LogIs12Enabled())
1767 {
1768 Log12(("TB added: %p %RGp LB %#x fl=%#x idxHash=%#x cRanges=%u cInstr=%u cCalls=%u\n",
1769 pTb, pTb->GCPhysPc, pTb->cbOpcodes, pTb->fFlags, IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc),
1770 pTb->cRanges, pTb->cInstructions, pTb->Thrd.cCalls));
1771 for (uint8_t idxRange = 0; idxRange < pTb->cRanges; idxRange++)
1772 Log12((" range#%u: offPg=%#05x offOp=%#04x LB %#04x pg#%u=%RGp\n", idxRange, pTb->aRanges[idxRange].offPhysPage,
1773 pTb->aRanges[idxRange].offOpcodes, pTb->aRanges[idxRange].cbOpcodes, pTb->aRanges[idxRange].idxPhysPage,
1774 pTb->aRanges[idxRange].idxPhysPage == 0
1775 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
1776 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]));
1777 }
1778}
1779
1780
1781/**
1782 * Called by opcode verifier functions when they detect a problem.
1783 */
1784void iemThreadedTbObsolete(PVMCPUCC pVCpu, PIEMTB pTb, bool fSafeToFree)
1785{
1786 /* We cannot free the current TB (indicated by fSafeToFree) because:
1787 - A threaded TB will have its current call entry accessed
1788 to update pVCpu->iem.s.cInstructions.
1789 - A native TB will have code left to execute. */
1790 if (fSafeToFree)
1791 iemTbAllocatorFree(pVCpu, pTb);
1792 else
1793 iemTbAlloctorScheduleForFree(pVCpu, pTb);
1794}
1795
1796
1797/*
1798 * Real code.
1799 */
1800
1801#ifdef LOG_ENABLED
1802/**
1803 * Logs the current instruction.
1804 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
1805 * @param pszFunction The IEM function doing the execution.
1806 * @param idxInstr The instruction number in the block.
1807 */
1808static void iemThreadedLogCurInstr(PVMCPUCC pVCpu, const char *pszFunction, uint32_t idxInstr) RT_NOEXCEPT
1809{
1810# ifdef IN_RING3
1811 if (LogIs2Enabled())
1812 {
1813 char szInstr[256];
1814 uint32_t cbInstr = 0;
1815 DBGFR3DisasInstrEx(pVCpu->pVMR3->pUVM, pVCpu->idCpu, 0, 0,
1816 DBGF_DISAS_FLAGS_CURRENT_GUEST | DBGF_DISAS_FLAGS_DEFAULT_MODE,
1817 szInstr, sizeof(szInstr), &cbInstr);
1818
1819 PCX86FXSTATE pFpuCtx = &pVCpu->cpum.GstCtx.XState.x87;
1820 Log2(("**** %s fExec=%x pTb=%p cUsed=%u #%u\n"
1821 " eax=%08x ebx=%08x ecx=%08x edx=%08x esi=%08x edi=%08x\n"
1822 " eip=%08x esp=%08x ebp=%08x iopl=%d tr=%04x\n"
1823 " cs=%04x ss=%04x ds=%04x es=%04x fs=%04x gs=%04x efl=%08x\n"
1824 " fsw=%04x fcw=%04x ftw=%02x mxcsr=%04x/%04x\n"
1825 " %s\n"
1826 , pszFunction, pVCpu->iem.s.fExec, pVCpu->iem.s.pCurTbR3, pVCpu->iem.s.pCurTbR3 ? pVCpu->iem.s.pCurTbR3->cUsed : 0, idxInstr,
1827 pVCpu->cpum.GstCtx.eax, pVCpu->cpum.GstCtx.ebx, pVCpu->cpum.GstCtx.ecx, pVCpu->cpum.GstCtx.edx, pVCpu->cpum.GstCtx.esi, pVCpu->cpum.GstCtx.edi,
1828 pVCpu->cpum.GstCtx.eip, pVCpu->cpum.GstCtx.esp, pVCpu->cpum.GstCtx.ebp, pVCpu->cpum.GstCtx.eflags.Bits.u2IOPL, pVCpu->cpum.GstCtx.tr.Sel,
1829 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.ss.Sel, pVCpu->cpum.GstCtx.ds.Sel, pVCpu->cpum.GstCtx.es.Sel,
1830 pVCpu->cpum.GstCtx.fs.Sel, pVCpu->cpum.GstCtx.gs.Sel, pVCpu->cpum.GstCtx.eflags.u,
1831 pFpuCtx->FSW, pFpuCtx->FCW, pFpuCtx->FTW, pFpuCtx->MXCSR, pFpuCtx->MXCSR_MASK,
1832 szInstr));
1833
1834 /*if (LogIs3Enabled()) - this outputs an insane amount of stuff, so disabled.
1835 DBGFR3InfoEx(pVCpu->pVMR3->pUVM, pVCpu->idCpu, "cpumguest", "verbose", NULL); */
1836 }
1837 else
1838# endif
1839 LogFlow(("%s: cs:rip=%04x:%08RX64 ss:rsp=%04x:%08RX64 EFL=%06x\n", pszFunction, pVCpu->cpum.GstCtx.cs.Sel,
1840 pVCpu->cpum.GstCtx.rip, pVCpu->cpum.GstCtx.ss.Sel, pVCpu->cpum.GstCtx.rsp, pVCpu->cpum.GstCtx.eflags.u));
1841}
1842#endif /* LOG_ENABLED */
1843
1844
1845#if 0
1846static VBOXSTRICTRC iemThreadedCompileLongJumped(PVMCC pVM, PVMCPUCC pVCpu, VBOXSTRICTRC rcStrict)
1847{
1848 RT_NOREF(pVM, pVCpu);
1849 return rcStrict;
1850}
1851#endif
1852
1853
1854/**
1855 * Initializes the decoder state when compiling TBs.
1856 *
1857 * This presumes that fExec has already be initialized.
1858 *
1859 * This is very similar to iemInitDecoder() and iemReInitDecoder(), so may need
1860 * to apply fixes to them as well.
1861 *
1862 * @param pVCpu The cross context virtual CPU structure of the calling
1863 * thread.
1864 * @param fReInit Clear for the first call for a TB, set for subsequent
1865 * calls from inside the compile loop where we can skip a
1866 * couple of things.
1867 * @param fExtraFlags The extra translation block flags when @a fReInit is
1868 * true, otherwise ignored. Only IEMTB_F_INHIBIT_SHADOW is
1869 * checked.
1870 */
1871DECL_FORCE_INLINE(void) iemThreadedCompileInitDecoder(PVMCPUCC pVCpu, bool const fReInit, uint32_t const fExtraFlags)
1872{
1873 /* ASSUMES: That iemInitExec was already called and that anyone changing
1874 CPU state affecting the fExec bits since then will have updated fExec! */
1875 AssertMsg((pVCpu->iem.s.fExec & ~IEM_F_USER_OPTS) == iemCalcExecFlags(pVCpu),
1876 ("fExec=%#x iemCalcExecModeFlags=%#x\n", pVCpu->iem.s.fExec, iemCalcExecFlags(pVCpu)));
1877
1878 IEMMODE const enmMode = IEM_GET_CPU_MODE(pVCpu);
1879
1880 /* Decoder state: */
1881 pVCpu->iem.s.enmDefAddrMode = enmMode; /** @todo check if this is correct... */
1882 pVCpu->iem.s.enmEffAddrMode = enmMode;
1883 if (enmMode != IEMMODE_64BIT)
1884 {
1885 pVCpu->iem.s.enmDefOpSize = enmMode; /** @todo check if this is correct... */
1886 pVCpu->iem.s.enmEffOpSize = enmMode;
1887 }
1888 else
1889 {
1890 pVCpu->iem.s.enmDefOpSize = IEMMODE_32BIT;
1891 pVCpu->iem.s.enmEffOpSize = IEMMODE_32BIT;
1892 }
1893 pVCpu->iem.s.fPrefixes = 0;
1894 pVCpu->iem.s.uRexReg = 0;
1895 pVCpu->iem.s.uRexB = 0;
1896 pVCpu->iem.s.uRexIndex = 0;
1897 pVCpu->iem.s.idxPrefix = 0;
1898 pVCpu->iem.s.uVex3rdReg = 0;
1899 pVCpu->iem.s.uVexLength = 0;
1900 pVCpu->iem.s.fEvexStuff = 0;
1901 pVCpu->iem.s.iEffSeg = X86_SREG_DS;
1902 pVCpu->iem.s.offModRm = 0;
1903 pVCpu->iem.s.iNextMapping = 0;
1904
1905 if (!fReInit)
1906 {
1907 pVCpu->iem.s.cActiveMappings = 0;
1908 pVCpu->iem.s.rcPassUp = VINF_SUCCESS;
1909 pVCpu->iem.s.fEndTb = false;
1910 pVCpu->iem.s.fTbCheckOpcodes = true; /* (check opcodes for before executing the first instruction) */
1911 pVCpu->iem.s.fTbBranched = IEMBRANCHED_F_NO;
1912 pVCpu->iem.s.fTbCrossedPage = false;
1913 pVCpu->iem.s.cInstrTillIrqCheck = !(fExtraFlags & IEMTB_F_INHIBIT_SHADOW) ? 32 : 0;
1914 pVCpu->iem.s.fTbCurInstrIsSti = false;
1915 /* Force RF clearing and TF checking on first instruction in the block
1916 as we don't really know what came before and should assume the worst: */
1917 pVCpu->iem.s.fTbPrevInstr = IEM_CIMPL_F_RFLAGS | IEM_CIMPL_F_END_TB;
1918 }
1919 else
1920 {
1921 Assert(pVCpu->iem.s.cActiveMappings == 0);
1922 Assert(pVCpu->iem.s.rcPassUp == VINF_SUCCESS);
1923 Assert(pVCpu->iem.s.fEndTb == false);
1924 Assert(pVCpu->iem.s.fTbCrossedPage == false);
1925 pVCpu->iem.s.fTbPrevInstr = pVCpu->iem.s.fTbCurInstr;
1926 }
1927 pVCpu->iem.s.fTbCurInstr = 0;
1928
1929#ifdef DBGFTRACE_ENABLED
1930 switch (IEM_GET_CPU_MODE(pVCpu))
1931 {
1932 case IEMMODE_64BIT:
1933 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I64/%u %08llx", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.rip);
1934 break;
1935 case IEMMODE_32BIT:
1936 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I32/%u %04x:%08x", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.eip);
1937 break;
1938 case IEMMODE_16BIT:
1939 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I16/%u %04x:%04x", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.eip);
1940 break;
1941 }
1942#endif
1943}
1944
1945
1946/**
1947 * Initializes the opcode fetcher when starting the compilation.
1948 *
1949 * @param pVCpu The cross context virtual CPU structure of the calling
1950 * thread.
1951 */
1952DECL_FORCE_INLINE(void) iemThreadedCompileInitOpcodeFetching(PVMCPUCC pVCpu)
1953{
1954 /* Almost everything is done by iemGetPcWithPhysAndCode() already. We just need to initialize the index into abOpcode. */
1955#ifdef IEM_WITH_CODE_TLB_AND_OPCODE_BUF
1956 pVCpu->iem.s.offOpcode = 0;
1957#else
1958 RT_NOREF(pVCpu);
1959#endif
1960}
1961
1962
1963/**
1964 * Re-initializes the opcode fetcher between instructions while compiling.
1965 *
1966 * @param pVCpu The cross context virtual CPU structure of the calling
1967 * thread.
1968 */
1969DECL_FORCE_INLINE(void) iemThreadedCompileReInitOpcodeFetching(PVMCPUCC pVCpu)
1970{
1971 if (pVCpu->iem.s.pbInstrBuf)
1972 {
1973 uint64_t off = pVCpu->cpum.GstCtx.rip;
1974 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu));
1975 off += pVCpu->cpum.GstCtx.cs.u64Base;
1976 off -= pVCpu->iem.s.uInstrBufPc;
1977 if (off < pVCpu->iem.s.cbInstrBufTotal)
1978 {
1979 pVCpu->iem.s.offInstrNextByte = (uint32_t)off;
1980 pVCpu->iem.s.offCurInstrStart = (uint16_t)off;
1981 if ((uint16_t)off + 15 <= pVCpu->iem.s.cbInstrBufTotal)
1982 pVCpu->iem.s.cbInstrBuf = (uint16_t)off + 15;
1983 else
1984 pVCpu->iem.s.cbInstrBuf = pVCpu->iem.s.cbInstrBufTotal;
1985 }
1986 else
1987 {
1988 pVCpu->iem.s.pbInstrBuf = NULL;
1989 pVCpu->iem.s.offInstrNextByte = 0;
1990 pVCpu->iem.s.offCurInstrStart = 0;
1991 pVCpu->iem.s.cbInstrBuf = 0;
1992 pVCpu->iem.s.cbInstrBufTotal = 0;
1993 pVCpu->iem.s.GCPhysInstrBuf = NIL_RTGCPHYS;
1994 }
1995 }
1996 else
1997 {
1998 pVCpu->iem.s.offInstrNextByte = 0;
1999 pVCpu->iem.s.offCurInstrStart = 0;
2000 pVCpu->iem.s.cbInstrBuf = 0;
2001 pVCpu->iem.s.cbInstrBufTotal = 0;
2002#ifdef VBOX_STRICT
2003 pVCpu->iem.s.GCPhysInstrBuf = NIL_RTGCPHYS;
2004#endif
2005 }
2006#ifdef IEM_WITH_CODE_TLB_AND_OPCODE_BUF
2007 pVCpu->iem.s.offOpcode = 0;
2008#endif
2009}
2010
2011#ifdef LOG_ENABLED
2012
2013/**
2014 * Inserts a NOP call.
2015 *
2016 * This is for debugging.
2017 *
2018 * @returns true on success, false if we're out of call entries.
2019 * @param pTb The translation block being compiled.
2020 */
2021bool iemThreadedCompileEmitNop(PIEMTB pTb)
2022{
2023 /* Emit the call. */
2024 uint32_t const idxCall = pTb->Thrd.cCalls;
2025 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2026 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2027 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2028 pCall->enmFunction = kIemThreadedFunc_BltIn_Nop;
2029 pCall->idxInstr = pTb->cInstructions - 1;
2030 pCall->cbOpcode = 0;
2031 pCall->offOpcode = 0;
2032 pCall->uTbLookup = 0;
2033 pCall->uUnused0 = 0;
2034 pCall->auParams[0] = 0;
2035 pCall->auParams[1] = 0;
2036 pCall->auParams[2] = 0;
2037 return true;
2038}
2039
2040
2041/**
2042 * Called by iemThreadedCompile if cpu state logging is desired.
2043 *
2044 * @returns true on success, false if we're out of call entries.
2045 * @param pTb The translation block being compiled.
2046 */
2047bool iemThreadedCompileEmitLogCpuState(PIEMTB pTb)
2048{
2049 /* Emit the call. */
2050 uint32_t const idxCall = pTb->Thrd.cCalls;
2051 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2052 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2053 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2054 pCall->enmFunction = kIemThreadedFunc_BltIn_LogCpuState;
2055 pCall->idxInstr = pTb->cInstructions - 1;
2056 pCall->cbOpcode = 0;
2057 pCall->offOpcode = 0;
2058 pCall->uTbLookup = 0;
2059 pCall->uUnused0 = 0;
2060 pCall->auParams[0] = RT_MAKE_U16(pCall->idxInstr, idxCall); /* currently not used, but whatever */
2061 pCall->auParams[1] = 0;
2062 pCall->auParams[2] = 0;
2063 return true;
2064}
2065
2066#endif /* LOG_ENABLED */
2067
2068DECLINLINE(void) iemThreadedCopyOpcodeBytesInline(PCVMCPUCC pVCpu, uint8_t *pbDst, uint8_t cbInstr)
2069{
2070 switch (cbInstr)
2071 {
2072 default: AssertMsgFailed(("%#x\n", cbInstr)); RT_FALL_THROUGH();
2073 case 15: pbDst[14] = pVCpu->iem.s.abOpcode[14]; RT_FALL_THROUGH();
2074 case 14: pbDst[13] = pVCpu->iem.s.abOpcode[13]; RT_FALL_THROUGH();
2075 case 13: pbDst[12] = pVCpu->iem.s.abOpcode[12]; RT_FALL_THROUGH();
2076 case 12: pbDst[11] = pVCpu->iem.s.abOpcode[11]; RT_FALL_THROUGH();
2077 case 11: pbDst[10] = pVCpu->iem.s.abOpcode[10]; RT_FALL_THROUGH();
2078 case 10: pbDst[9] = pVCpu->iem.s.abOpcode[9]; RT_FALL_THROUGH();
2079 case 9: pbDst[8] = pVCpu->iem.s.abOpcode[8]; RT_FALL_THROUGH();
2080 case 8: pbDst[7] = pVCpu->iem.s.abOpcode[7]; RT_FALL_THROUGH();
2081 case 7: pbDst[6] = pVCpu->iem.s.abOpcode[6]; RT_FALL_THROUGH();
2082 case 6: pbDst[5] = pVCpu->iem.s.abOpcode[5]; RT_FALL_THROUGH();
2083 case 5: pbDst[4] = pVCpu->iem.s.abOpcode[4]; RT_FALL_THROUGH();
2084 case 4: pbDst[3] = pVCpu->iem.s.abOpcode[3]; RT_FALL_THROUGH();
2085 case 3: pbDst[2] = pVCpu->iem.s.abOpcode[2]; RT_FALL_THROUGH();
2086 case 2: pbDst[1] = pVCpu->iem.s.abOpcode[1]; RT_FALL_THROUGH();
2087 case 1: pbDst[0] = pVCpu->iem.s.abOpcode[0]; break;
2088 }
2089}
2090
2091
2092/**
2093 * Called by IEM_MC2_BEGIN_EMIT_CALLS() under one of these conditions:
2094 *
2095 * - CS LIM check required.
2096 * - Must recheck opcode bytes.
2097 * - Previous instruction branched.
2098 * - TLB load detected, probably due to page crossing.
2099 *
2100 * @returns true if everything went well, false if we're out of space in the TB
2101 * (e.g. opcode ranges) or needs to start doing CS.LIM checks.
2102 * @param pVCpu The cross context virtual CPU structure of the calling
2103 * thread.
2104 * @param pTb The translation block being compiled.
2105 */
2106bool iemThreadedCompileBeginEmitCallsComplications(PVMCPUCC pVCpu, PIEMTB pTb)
2107{
2108 Log6(("%04x:%08RX64: iemThreadedCompileBeginEmitCallsComplications\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2109 Assert((pVCpu->iem.s.GCPhysInstrBuf & GUEST_PAGE_OFFSET_MASK) == 0);
2110#if 0
2111 if (pVCpu->cpum.GstCtx.rip >= 0xc0000000 && !LogIsEnabled())
2112 RTLogChangeFlags(NULL, 0, RTLOGFLAGS_DISABLED);
2113#endif
2114
2115 /*
2116 * If we're not in 64-bit mode and not already checking CS.LIM we need to
2117 * see if it's needed to start checking.
2118 */
2119 bool fConsiderCsLimChecking;
2120 uint32_t const fMode = pVCpu->iem.s.fExec & IEM_F_MODE_MASK;
2121 if ( fMode == IEM_F_MODE_X86_64BIT
2122 || (pTb->fFlags & IEMTB_F_CS_LIM_CHECKS)
2123 || fMode == IEM_F_MODE_X86_32BIT_PROT_FLAT
2124 || fMode == IEM_F_MODE_X86_32BIT_FLAT)
2125 fConsiderCsLimChecking = false; /* already enabled or not needed */
2126 else
2127 {
2128 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
2129 if (offFromLim >= GUEST_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
2130 fConsiderCsLimChecking = true; /* likely */
2131 else
2132 {
2133 Log8(("%04x:%08RX64: Needs CS.LIM checks (%#RX64)\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, offFromLim));
2134 return false;
2135 }
2136 }
2137
2138 /*
2139 * Prepare call now, even before we know if can accept the instruction in this TB.
2140 * This allows us amending parameters w/o making every case suffer.
2141 */
2142 uint8_t const cbInstr = IEM_GET_INSTR_LEN(pVCpu);
2143 uint16_t const offOpcode = pTb->cbOpcodes;
2144 uint8_t idxRange = pTb->cRanges - 1;
2145
2146 PIEMTHRDEDCALLENTRY const pCall = &pTb->Thrd.paCalls[pTb->Thrd.cCalls];
2147 pCall->idxInstr = pTb->cInstructions;
2148 pCall->cbOpcode = cbInstr;
2149 pCall->offOpcode = offOpcode;
2150 pCall->uTbLookup = 0;
2151 pCall->uUnused0 = 0;
2152 pCall->auParams[0] = (uint32_t)cbInstr
2153 | (uint32_t)(pVCpu->iem.s.fExec << 8) /* liveness: Enough of fExec for IEM_F_MODE_X86_IS_FLAT. */
2154 /* The upper dword is sometimes used for cbStartPage. */;
2155 pCall->auParams[1] = idxRange;
2156 pCall->auParams[2] = offOpcode - pTb->aRanges[idxRange].offOpcodes;
2157
2158/** @todo check if we require IEMTB_F_CS_LIM_CHECKS for any new page we've
2159 * gotten onto. If we do, stop */
2160
2161 /*
2162 * Case 1: We've branched (RIP changed).
2163 *
2164 * Loop check: If the new PC (GCPhysPC) is within a opcode range of this
2165 * TB, end the TB here as it is most likely a loop and if it
2166 * made sense to unroll it, the guest code compiler should've
2167 * done it already.
2168 *
2169 * Sub-case 1a: Same page, no TLB load (fTbCrossedPage is false).
2170 * Req: 1 extra range, no extra phys.
2171 *
2172 * Sub-case 1b: Different page but no page boundrary crossing, so TLB load
2173 * necessary (fTbCrossedPage is true).
2174 * Req: 1 extra range, probably 1 extra phys page entry.
2175 *
2176 * Sub-case 1c: Different page, so TLB load necessary (fTbCrossedPage is true),
2177 * but in addition we cross into the following page and require
2178 * another TLB load.
2179 * Req: 2 extra ranges, probably 2 extra phys page entries.
2180 *
2181 * Sub-case 1d: Same page, so no initial TLB load necessary, but we cross into
2182 * the following page (thus fTbCrossedPage is true).
2183 * Req: 2 extra ranges, probably 1 extra phys page entry.
2184 *
2185 * Note! The setting fTbCrossedPage is done by the iemOpcodeFetchBytesJmp, but
2186 * it may trigger "spuriously" from the CPU point of view because of
2187 * physical page changes that'll invalid the physical TLB and trigger a
2188 * call to the function. In theory this be a big deal, just a bit
2189 * performance loss as we'll pick the LoadingTlb variants.
2190 *
2191 * Note! We do not currently optimize branching to the next instruction (sorry
2192 * 32-bit PIC code). We could maybe do that in the branching code that
2193 * sets (or not) fTbBranched.
2194 */
2195 /** @todo Optimize 'jmp .next_instr' and 'call .next_instr'. Seen the jmp
2196 * variant in win 3.1 code and the call variant in 32-bit linux PIC
2197 * code. This'll require filtering out far jmps and calls, as they
2198 * load CS which should technically be considered indirect since the
2199 * GDT/LDT entry's base address can be modified independently from
2200 * the code. */
2201 if (pVCpu->iem.s.fTbBranched != IEMBRANCHED_F_NO)
2202 {
2203 if ( !pVCpu->iem.s.fTbCrossedPage /* 1a */
2204 || pVCpu->iem.s.offCurInstrStart >= 0 /* 1b */ )
2205 {
2206 /* 1a + 1b - instruction fully within the branched to page. */
2207 Assert(pVCpu->iem.s.offCurInstrStart >= 0);
2208 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr <= GUEST_PAGE_SIZE);
2209
2210 if (!(pVCpu->iem.s.fTbBranched & IEMBRANCHED_F_ZERO))
2211 {
2212 /* Check that we've got a free range. */
2213 idxRange += 1;
2214 if (idxRange < RT_ELEMENTS(pTb->aRanges))
2215 { /* likely */ }
2216 else
2217 {
2218 Log8(("%04x:%08RX64: out of ranges after branch\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2219 return false;
2220 }
2221 pCall->auParams[1] = idxRange;
2222 pCall->auParams[2] = 0;
2223
2224 /* Check that we've got a free page slot. */
2225 AssertCompile(RT_ELEMENTS(pTb->aGCPhysPages) == 2);
2226 RTGCPHYS const GCPhysNew = pVCpu->iem.s.GCPhysInstrBuf & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK;
2227 uint8_t idxPhysPage;
2228 if ((pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == GCPhysNew)
2229 pTb->aRanges[idxRange].idxPhysPage = idxPhysPage = 0;
2230 else if (pTb->aGCPhysPages[0] == NIL_RTGCPHYS)
2231 {
2232 pTb->aGCPhysPages[0] = GCPhysNew;
2233 pTb->aRanges[idxRange].idxPhysPage = 1;
2234 idxPhysPage = UINT8_MAX;
2235 }
2236 else if (pTb->aGCPhysPages[0] == GCPhysNew)
2237 pTb->aRanges[idxRange].idxPhysPage = idxPhysPage = 1;
2238 else if (pTb->aGCPhysPages[1] == NIL_RTGCPHYS)
2239 {
2240 pTb->aGCPhysPages[1] = GCPhysNew;
2241 pTb->aRanges[idxRange].idxPhysPage = 2;
2242 idxPhysPage = UINT8_MAX;
2243 }
2244 else if (pTb->aGCPhysPages[1] == GCPhysNew)
2245 pTb->aRanges[idxRange].idxPhysPage = idxPhysPage = 2;
2246 else
2247 {
2248 Log8(("%04x:%08RX64: out of aGCPhysPages entires after branch\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2249 return false;
2250 }
2251
2252 /* Loop check: We weave the loop check in here to optimize the lookup. */
2253 if (idxPhysPage != UINT8_MAX)
2254 {
2255 uint32_t const offPhysPc = pVCpu->iem.s.offCurInstrStart;
2256 for (uint8_t idxLoopRange = 0; idxLoopRange < idxRange; idxLoopRange++)
2257 if ( pTb->aRanges[idxLoopRange].idxPhysPage == idxPhysPage
2258 && offPhysPc - (uint32_t)pTb->aRanges[idxLoopRange].offPhysPage
2259 < (uint32_t)pTb->aRanges[idxLoopRange].cbOpcodes)
2260 {
2261 Log8(("%04x:%08RX64: loop detected after branch\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2262 STAM_COUNTER_INC(&pVCpu->iem.s.StatTbLoopInTbDetected);
2263 return false;
2264 }
2265 }
2266
2267 /* Finish setting up the new range. */
2268 pTb->aRanges[idxRange].offPhysPage = pVCpu->iem.s.offCurInstrStart;
2269 pTb->aRanges[idxRange].offOpcodes = offOpcode;
2270 pTb->aRanges[idxRange].cbOpcodes = cbInstr;
2271 pTb->aRanges[idxRange].u2Unused = 0;
2272 pTb->cRanges++;
2273 Log6(("%04x:%08RX64: new range #%u same page: offPhysPage=%#x offOpcodes=%#x\n",
2274 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, idxRange, pTb->aRanges[idxRange].offPhysPage,
2275 pTb->aRanges[idxRange].offOpcodes));
2276 }
2277 else
2278 {
2279 Log8(("%04x:%08RX64: zero byte jump\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2280 pTb->aRanges[idxRange].cbOpcodes += cbInstr;
2281 }
2282
2283 /* Determin which function we need to load & check.
2284 Note! For jumps to a new page, we'll set both fTbBranched and
2285 fTbCrossedPage to avoid unnecessary TLB work for intra
2286 page branching */
2287 if ( (pVCpu->iem.s.fTbBranched & (IEMBRANCHED_F_INDIRECT | IEMBRANCHED_F_FAR)) /* Far is basically indirect. */
2288 || pVCpu->iem.s.fTbCrossedPage)
2289 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2290 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesLoadingTlb
2291 : !fConsiderCsLimChecking
2292 ? kIemThreadedFunc_BltIn_CheckOpcodesLoadingTlb
2293 : kIemThreadedFunc_BltIn_CheckOpcodesLoadingTlbConsiderCsLim;
2294 else if (pVCpu->iem.s.fTbBranched & (IEMBRANCHED_F_CONDITIONAL | /* paranoia: */ IEMBRANCHED_F_DIRECT))
2295 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2296 ? kIemThreadedFunc_BltIn_CheckCsLimAndPcAndOpcodes
2297 : !fConsiderCsLimChecking
2298 ? kIemThreadedFunc_BltIn_CheckPcAndOpcodes
2299 : kIemThreadedFunc_BltIn_CheckPcAndOpcodesConsiderCsLim;
2300 else
2301 {
2302 Assert(pVCpu->iem.s.fTbBranched & IEMBRANCHED_F_RELATIVE);
2303 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2304 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodes
2305 : !fConsiderCsLimChecking
2306 ? kIemThreadedFunc_BltIn_CheckOpcodes
2307 : kIemThreadedFunc_BltIn_CheckOpcodesConsiderCsLim;
2308 }
2309 }
2310 else
2311 {
2312 /* 1c + 1d - instruction crosses pages. */
2313 Assert(pVCpu->iem.s.offCurInstrStart < 0);
2314 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr > 0);
2315
2316 /* Lazy bird: Check that this isn't case 1c, since we've already
2317 load the first physical address. End the TB and
2318 make it a case 2b instead.
2319
2320 Hmm. Too much bother to detect, so just do the same
2321 with case 1d as well. */
2322#if 0 /** @todo get back to this later when we've got the actual branch code in
2323 * place. */
2324 uint8_t const cbStartPage = (uint8_t)-pVCpu->iem.s.offCurInstrStart;
2325
2326 /* Check that we've got two free ranges. */
2327 if (idxRange + 2 < RT_ELEMENTS(pTb->aRanges))
2328 { /* likely */ }
2329 else
2330 return false;
2331 idxRange += 1;
2332 pCall->auParams[1] = idxRange;
2333 pCall->auParams[2] = 0;
2334
2335 /* ... */
2336
2337#else
2338 Log8(("%04x:%08RX64: complicated post-branch condition, ending TB.\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2339 return false;
2340#endif
2341 }
2342 }
2343
2344 /*
2345 * Case 2: Page crossing.
2346 *
2347 * Sub-case 2a: The instruction starts on the first byte in the next page.
2348 *
2349 * Sub-case 2b: The instruction has opcode bytes in both the current and
2350 * following page.
2351 *
2352 * Both cases requires a new range table entry and probably a new physical
2353 * page entry. The difference is in which functions to emit and whether to
2354 * add bytes to the current range.
2355 */
2356 else if (pVCpu->iem.s.fTbCrossedPage)
2357 {
2358 /* Check that we've got a free range. */
2359 idxRange += 1;
2360 if (idxRange < RT_ELEMENTS(pTb->aRanges))
2361 { /* likely */ }
2362 else
2363 {
2364 Log8(("%04x:%08RX64: out of ranges while crossing page\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2365 return false;
2366 }
2367
2368 /* Check that we've got a free page slot. */
2369 AssertCompile(RT_ELEMENTS(pTb->aGCPhysPages) == 2);
2370 RTGCPHYS const GCPhysNew = pVCpu->iem.s.GCPhysInstrBuf & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK;
2371 if ((pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == GCPhysNew)
2372 pTb->aRanges[idxRange].idxPhysPage = 0;
2373 else if ( pTb->aGCPhysPages[0] == NIL_RTGCPHYS
2374 || pTb->aGCPhysPages[0] == GCPhysNew)
2375 {
2376 pTb->aGCPhysPages[0] = GCPhysNew;
2377 pTb->aRanges[idxRange].idxPhysPage = 1;
2378 }
2379 else if ( pTb->aGCPhysPages[1] == NIL_RTGCPHYS
2380 || pTb->aGCPhysPages[1] == GCPhysNew)
2381 {
2382 pTb->aGCPhysPages[1] = GCPhysNew;
2383 pTb->aRanges[idxRange].idxPhysPage = 2;
2384 }
2385 else
2386 {
2387 Log8(("%04x:%08RX64: out of aGCPhysPages entires while crossing page\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2388 return false;
2389 }
2390
2391 if (((pTb->aRanges[idxRange - 1].offPhysPage + pTb->aRanges[idxRange - 1].cbOpcodes) & GUEST_PAGE_OFFSET_MASK) == 0)
2392 {
2393 Assert(pVCpu->iem.s.offCurInstrStart == 0);
2394 pCall->auParams[1] = idxRange;
2395 pCall->auParams[2] = 0;
2396
2397 /* Finish setting up the new range. */
2398 pTb->aRanges[idxRange].offPhysPage = pVCpu->iem.s.offCurInstrStart;
2399 pTb->aRanges[idxRange].offOpcodes = offOpcode;
2400 pTb->aRanges[idxRange].cbOpcodes = cbInstr;
2401 pTb->aRanges[idxRange].u2Unused = 0;
2402 pTb->cRanges++;
2403 Log6(("%04x:%08RX64: new range #%u new page (a) %u/%RGp: offPhysPage=%#x offOpcodes=%#x\n",
2404 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, idxRange, pTb->aRanges[idxRange].idxPhysPage, GCPhysNew,
2405 pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].offOpcodes));
2406
2407 /* Determin which function we need to load & check. */
2408 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2409 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNewPageLoadingTlb
2410 : !fConsiderCsLimChecking
2411 ? kIemThreadedFunc_BltIn_CheckOpcodesOnNewPageLoadingTlb
2412 : kIemThreadedFunc_BltIn_CheckOpcodesOnNewPageLoadingTlbConsiderCsLim;
2413 }
2414 else
2415 {
2416 Assert(pVCpu->iem.s.offCurInstrStart < 0);
2417 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr > 0);
2418 uint8_t const cbStartPage = (uint8_t)-pVCpu->iem.s.offCurInstrStart;
2419 pCall->auParams[0] |= (uint64_t)cbStartPage << 32;
2420
2421 /* We've good. Split the instruction over the old and new range table entries. */
2422 pTb->aRanges[idxRange - 1].cbOpcodes += cbStartPage;
2423
2424 pTb->aRanges[idxRange].offPhysPage = 0;
2425 pTb->aRanges[idxRange].offOpcodes = offOpcode + cbStartPage;
2426 pTb->aRanges[idxRange].cbOpcodes = cbInstr - cbStartPage;
2427 pTb->aRanges[idxRange].u2Unused = 0;
2428 pTb->cRanges++;
2429 Log6(("%04x:%08RX64: new range #%u new page (b) %u/%RGp: offPhysPage=%#x offOpcodes=%#x\n",
2430 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, idxRange, pTb->aRanges[idxRange].idxPhysPage, GCPhysNew,
2431 pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].offOpcodes));
2432
2433 /* Determin which function we need to load & check. */
2434 if (pVCpu->iem.s.fTbCheckOpcodes)
2435 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2436 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesAcrossPageLoadingTlb
2437 : !fConsiderCsLimChecking
2438 ? kIemThreadedFunc_BltIn_CheckOpcodesAcrossPageLoadingTlb
2439 : kIemThreadedFunc_BltIn_CheckOpcodesAcrossPageLoadingTlbConsiderCsLim;
2440 else
2441 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2442 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNextPageLoadingTlb
2443 : !fConsiderCsLimChecking
2444 ? kIemThreadedFunc_BltIn_CheckOpcodesOnNextPageLoadingTlb
2445 : kIemThreadedFunc_BltIn_CheckOpcodesOnNextPageLoadingTlbConsiderCsLim;
2446 }
2447 }
2448
2449 /*
2450 * Regular case: No new range required.
2451 */
2452 else
2453 {
2454 Assert(pVCpu->iem.s.fTbCheckOpcodes || (pTb->fFlags & IEMTB_F_CS_LIM_CHECKS));
2455 if (pVCpu->iem.s.fTbCheckOpcodes)
2456 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2457 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodes
2458 : kIemThreadedFunc_BltIn_CheckOpcodes;
2459 else
2460 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckCsLim;
2461
2462 iemThreadedCopyOpcodeBytesInline(pVCpu, &pTb->pabOpcodes[offOpcode], cbInstr);
2463 pTb->cbOpcodes = offOpcode + cbInstr;
2464 pTb->aRanges[idxRange].cbOpcodes += cbInstr;
2465 Assert(pTb->cbOpcodes <= pVCpu->iem.s.cbOpcodesAllocated);
2466 }
2467
2468 /*
2469 * Commit the call.
2470 */
2471 pTb->Thrd.cCalls++;
2472
2473 /*
2474 * Clear state.
2475 */
2476 pVCpu->iem.s.fTbBranched = IEMBRANCHED_F_NO;
2477 pVCpu->iem.s.fTbCrossedPage = false;
2478 pVCpu->iem.s.fTbCheckOpcodes = false;
2479
2480 /*
2481 * Copy opcode bytes.
2482 */
2483 iemThreadedCopyOpcodeBytesInline(pVCpu, &pTb->pabOpcodes[offOpcode], cbInstr);
2484 pTb->cbOpcodes = offOpcode + cbInstr;
2485 Assert(pTb->cbOpcodes <= pVCpu->iem.s.cbOpcodesAllocated);
2486
2487 return true;
2488}
2489
2490
2491/**
2492 * Worker for iemThreadedCompileBeginEmitCallsComplications and
2493 * iemThreadedCompileCheckIrq that checks for pending delivarable events.
2494 *
2495 * @returns true if anything is pending, false if not.
2496 * @param pVCpu The cross context virtual CPU structure of the calling
2497 * thread.
2498 */
2499DECL_FORCE_INLINE(bool) iemThreadedCompileIsIrqOrForceFlagPending(PVMCPUCC pVCpu)
2500{
2501 uint64_t fCpu = pVCpu->fLocalForcedActions;
2502 fCpu &= VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC | VMCPU_FF_INTERRUPT_NMI | VMCPU_FF_INTERRUPT_SMI;
2503#if 1
2504 /** @todo this isn't even close to the NMI/IRQ conditions in EM. */
2505 if (RT_LIKELY( !fCpu
2506 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
2507 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
2508 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx))) ))
2509 return false;
2510 return true;
2511#else
2512 return false;
2513#endif
2514
2515}
2516
2517
2518/**
2519 * Called by iemThreadedCompile when a block requires a mode check.
2520 *
2521 * @returns true if we should continue, false if we're out of call entries.
2522 * @param pVCpu The cross context virtual CPU structure of the calling
2523 * thread.
2524 * @param pTb The translation block being compiled.
2525 */
2526static bool iemThreadedCompileEmitCheckMode(PVMCPUCC pVCpu, PIEMTB pTb)
2527{
2528 /* Emit the call. */
2529 uint32_t const idxCall = pTb->Thrd.cCalls;
2530 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2531 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2532 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2533 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckMode;
2534 pCall->idxInstr = pTb->cInstructions - 1;
2535 pCall->cbOpcode = 0;
2536 pCall->offOpcode = 0;
2537 pCall->uTbLookup = 0;
2538 pCall->uUnused0 = 0;
2539 pCall->auParams[0] = pVCpu->iem.s.fExec;
2540 pCall->auParams[1] = 0;
2541 pCall->auParams[2] = 0;
2542 LogFunc(("%04x:%08RX64 fExec=%#x\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, pVCpu->iem.s.fExec));
2543 return true;
2544}
2545
2546
2547/**
2548 * Called by IEM_MC2_BEGIN_EMIT_CALLS() when IEM_CIMPL_F_CHECK_IRQ_BEFORE is
2549 * set.
2550 *
2551 * @returns true if we should continue, false if an IRQ is deliverable or a
2552 * relevant force flag is pending.
2553 * @param pVCpu The cross context virtual CPU structure of the calling
2554 * thread.
2555 * @param pTb The translation block being compiled.
2556 * @sa iemThreadedCompileCheckIrq
2557 */
2558bool iemThreadedCompileEmitIrqCheckBefore(PVMCPUCC pVCpu, PIEMTB pTb)
2559{
2560 /*
2561 * Skip this we've already emitted a call after the previous instruction
2562 * or if it's the first call, as we're always checking FFs between blocks.
2563 */
2564 uint32_t const idxCall = pTb->Thrd.cCalls;
2565 if ( idxCall > 0
2566 && pTb->Thrd.paCalls[idxCall - 1].enmFunction != kIemThreadedFunc_BltIn_CheckIrq)
2567 {
2568 /* Emit the call. */
2569 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2570 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2571 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2572 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckIrq;
2573 pCall->idxInstr = pTb->cInstructions;
2574 pCall->offOpcode = 0;
2575 pCall->cbOpcode = 0;
2576 pCall->uTbLookup = 0;
2577 pCall->uUnused0 = 0;
2578 pCall->auParams[0] = 0;
2579 pCall->auParams[1] = 0;
2580 pCall->auParams[2] = 0;
2581 LogFunc(("%04x:%08RX64\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2582
2583 /* Reset the IRQ check value. */
2584 pVCpu->iem.s.cInstrTillIrqCheck = !CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx) ? 32 : 0;
2585
2586 /*
2587 * Check for deliverable IRQs and pending force flags.
2588 */
2589 return !iemThreadedCompileIsIrqOrForceFlagPending(pVCpu);
2590 }
2591 return true; /* continue */
2592}
2593
2594
2595/**
2596 * Emits an IRQ check call and checks for pending IRQs.
2597 *
2598 * @returns true if we should continue, false if an IRQ is deliverable or a
2599 * relevant force flag is pending.
2600 * @param pVCpu The cross context virtual CPU structure of the calling
2601 * thread.
2602 * @param pTb The transation block.
2603 * @sa iemThreadedCompileBeginEmitCallsComplications
2604 */
2605static bool iemThreadedCompileCheckIrqAfter(PVMCPUCC pVCpu, PIEMTB pTb)
2606{
2607 /* Check again in a little bit, unless it is immediately following an STI
2608 in which case we *must* check immediately after the next instruction
2609 as well in case it's executed with interrupt inhibition. We could
2610 otherwise miss the interrupt window. See the irq2 wait2 varaiant in
2611 bs3-timers-1 which is doing sti + sti + cli. */
2612 if (!pVCpu->iem.s.fTbCurInstrIsSti)
2613 pVCpu->iem.s.cInstrTillIrqCheck = 32;
2614 else
2615 {
2616 pVCpu->iem.s.fTbCurInstrIsSti = false;
2617 pVCpu->iem.s.cInstrTillIrqCheck = 0;
2618 }
2619 LogFunc(("%04x:%08RX64\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2620
2621 /*
2622 * Emit the call.
2623 */
2624 AssertReturn(pTb->Thrd.cCalls < pTb->Thrd.cAllocated, false);
2625 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[pTb->Thrd.cCalls++];
2626 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckIrq;
2627 pCall->idxInstr = pTb->cInstructions;
2628 pCall->offOpcode = 0;
2629 pCall->cbOpcode = 0;
2630 pCall->uTbLookup = 0;
2631 pCall->uUnused0 = 0;
2632 pCall->auParams[0] = 0;
2633 pCall->auParams[1] = 0;
2634 pCall->auParams[2] = 0;
2635
2636 /*
2637 * Check for deliverable IRQs and pending force flags.
2638 */
2639 return !iemThreadedCompileIsIrqOrForceFlagPending(pVCpu);
2640}
2641
2642
2643/**
2644 * Compiles a new TB and executes it.
2645 *
2646 * We combine compilation and execution here as it makes it simpler code flow
2647 * in the main loop and it allows interpreting while compiling if we want to
2648 * explore that option.
2649 *
2650 * @returns Strict VBox status code.
2651 * @param pVM The cross context virtual machine structure.
2652 * @param pVCpu The cross context virtual CPU structure of the calling
2653 * thread.
2654 * @param GCPhysPc The physical address corresponding to the current
2655 * RIP+CS.BASE.
2656 * @param fExtraFlags Extra translation block flags: IEMTB_F_INHIBIT_SHADOW,
2657 * IEMTB_F_INHIBIT_NMI, IEMTB_F_CS_LIM_CHECKS.
2658 */
2659static VBOXSTRICTRC iemThreadedCompile(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhysPc, uint32_t fExtraFlags) IEM_NOEXCEPT_MAY_LONGJMP
2660{
2661 Assert(!(fExtraFlags & IEMTB_F_TYPE_MASK));
2662 fExtraFlags |= IEMTB_F_TYPE_THREADED;
2663
2664 /*
2665 * Get the TB we use for the recompiling. This is a maxed-out TB so
2666 * that'll we'll make a more efficient copy of when we're done compiling.
2667 */
2668 PIEMTB pTb = pVCpu->iem.s.pThrdCompileTbR3;
2669 if (pTb)
2670 iemThreadedTbReuse(pVCpu, pTb, GCPhysPc, fExtraFlags);
2671 else
2672 {
2673 pTb = iemThreadedTbAlloc(pVM, pVCpu, GCPhysPc, fExtraFlags);
2674 AssertReturn(pTb, VERR_IEM_TB_ALLOC_FAILED);
2675 pVCpu->iem.s.pThrdCompileTbR3 = pTb;
2676 }
2677
2678 /* Set the current TB so iemThreadedCompileLongJumped and the CIMPL
2679 functions may get at it. */
2680 pVCpu->iem.s.pCurTbR3 = pTb;
2681
2682#if 0
2683 /* Make sure the CheckIrq condition matches the one in EM. */
2684 iemThreadedCompileCheckIrqAfter(pVCpu, pTb);
2685 const uint32_t cZeroCalls = 1;
2686#else
2687 const uint32_t cZeroCalls = 0;
2688#endif
2689
2690 /*
2691 * Now for the recomplication. (This mimicks IEMExecLots in many ways.)
2692 */
2693 iemThreadedCompileInitDecoder(pVCpu, false /*fReInit*/, fExtraFlags);
2694 iemThreadedCompileInitOpcodeFetching(pVCpu);
2695 VBOXSTRICTRC rcStrict;
2696 for (;;)
2697 {
2698 /* Process the next instruction. */
2699#ifdef LOG_ENABLED
2700 iemThreadedLogCurInstr(pVCpu, "CC", pTb->cInstructions);
2701 uint16_t const uCsLog = pVCpu->cpum.GstCtx.cs.Sel;
2702 uint64_t const uRipLog = pVCpu->cpum.GstCtx.rip;
2703 Assert(uCsLog != 0 || uRipLog > 0x400 || !IEM_IS_REAL_OR_V86_MODE(pVCpu)); /* Detect executing RM interrupt table. */
2704#endif
2705 uint8_t b; IEM_OPCODE_GET_FIRST_U8(&b);
2706 uint16_t const cCallsPrev = pTb->Thrd.cCalls;
2707
2708 rcStrict = FNIEMOP_CALL(g_apfnIemThreadedRecompilerOneByteMap[b]);
2709#if 0
2710 for (unsigned i = cCallsPrev; i < pTb->Thrd.cCalls; i++)
2711 Log8(("-> %#u/%u - %d %s\n", i, pTb->Thrd.paCalls[i].idxInstr, pTb->Thrd.paCalls[i].enmFunction,
2712 g_apszIemThreadedFunctions[pTb->Thrd.paCalls[i].enmFunction]));
2713#endif
2714 if ( rcStrict == VINF_SUCCESS
2715 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS
2716 && !pVCpu->iem.s.fEndTb)
2717 {
2718 Assert(pTb->Thrd.cCalls > cCallsPrev);
2719 Assert(cCallsPrev - pTb->Thrd.cCalls < 5);
2720
2721 pVCpu->iem.s.cInstructions++;
2722
2723 /* Check for mode change _after_ certain CIMPL calls, so check that
2724 we continue executing with the same mode value. */
2725 if (!(pVCpu->iem.s.fTbCurInstr & (IEM_CIMPL_F_MODE | IEM_CIMPL_F_XCPT | IEM_CIMPL_F_VMEXIT)))
2726 { /* probable */ }
2727 else if (RT_LIKELY(iemThreadedCompileEmitCheckMode(pVCpu, pTb)))
2728 { /* extremely likely */ }
2729 else
2730 break;
2731
2732#if defined(LOG_ENABLED) && 0 /* for debugging */
2733 //iemThreadedCompileEmitNop(pTb);
2734 iemThreadedCompileEmitLogCpuState(pTb);
2735#endif
2736 }
2737 else
2738 {
2739 Log8(("%04x:%08RX64: End TB - %u instr, %u calls, rc=%d\n",
2740 uCsLog, uRipLog, pTb->cInstructions, pTb->Thrd.cCalls, VBOXSTRICTRC_VAL(rcStrict)));
2741 if (rcStrict == VINF_IEM_RECOMPILE_END_TB)
2742 rcStrict = VINF_SUCCESS;
2743
2744 if (pTb->Thrd.cCalls > cZeroCalls)
2745 {
2746 if (cCallsPrev != pTb->Thrd.cCalls)
2747 pVCpu->iem.s.cInstructions++;
2748 break;
2749 }
2750
2751 pVCpu->iem.s.pCurTbR3 = NULL;
2752 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2753 }
2754
2755 /* Check for IRQs? */
2756 if (pVCpu->iem.s.cInstrTillIrqCheck > 0)
2757 pVCpu->iem.s.cInstrTillIrqCheck--;
2758 else if (!iemThreadedCompileCheckIrqAfter(pVCpu, pTb))
2759 break;
2760
2761 /* Still space in the TB? */
2762 if ( pTb->Thrd.cCalls + 5 < pTb->Thrd.cAllocated
2763 && pTb->cbOpcodes + 16 <= pVCpu->iem.s.cbOpcodesAllocated
2764 && pTb->cTbLookupEntries < 127)
2765 iemThreadedCompileInitDecoder(pVCpu, true /*fReInit*/, 0);
2766 else
2767 {
2768 Log8(("%04x:%08RX64: End TB - %u instr, %u calls, %u opcode bytes, %u TB lookup entries - full\n",
2769 uCsLog, uRipLog, pTb->cInstructions, pTb->Thrd.cCalls, pTb->cbOpcodes, pTb->cTbLookupEntries));
2770 break;
2771 }
2772 iemThreadedCompileReInitOpcodeFetching(pVCpu);
2773 }
2774
2775 /*
2776 * Reserve lookup space for the final call entry if necessary.
2777 */
2778 PIEMTHRDEDCALLENTRY pFinalCall = &pTb->Thrd.paCalls[pTb->Thrd.cCalls - 1];
2779 if (pTb->Thrd.cCalls > 1)
2780 {
2781 if (pFinalCall->uTbLookup == 0)
2782 {
2783 pFinalCall->uTbLookup = IEM_TB_LOOKUP_TAB_MAKE(pTb->cTbLookupEntries, 0);
2784 pTb->cTbLookupEntries += 1;
2785 }
2786 }
2787 else if (pFinalCall->uTbLookup != 0)
2788 {
2789 Assert(pTb->cTbLookupEntries > 1);
2790 pFinalCall->uTbLookup -= 1;
2791 pTb->cTbLookupEntries -= 1;
2792 }
2793
2794 /*
2795 * Duplicate the TB into a completed one and link it.
2796 */
2797 pTb = iemThreadedTbDuplicate(pVM, pVCpu, pTb);
2798 AssertReturn(pTb, VERR_IEM_TB_ALLOC_FAILED);
2799
2800 iemThreadedTbAdd(pVCpu, pVCpu->iem.s.pTbCacheR3, pTb);
2801
2802#ifdef IEM_COMPILE_ONLY_MODE
2803 /*
2804 * Execute the translation block.
2805 */
2806#endif
2807
2808 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2809}
2810
2811
2812
2813/*********************************************************************************************************************************
2814* Recompiled Execution Core *
2815*********************************************************************************************************************************/
2816
2817/** Helper for iemTbExec. */
2818DECL_FORCE_INLINE(PIEMTB *) iemTbGetTbLookupEntryWithRip(PCIEMTB pTb, uint8_t uTbLookup, uint64_t uRip)
2819{
2820 uint8_t const idx = IEM_TB_LOOKUP_TAB_GET_IDX_WITH_RIP(uTbLookup, uRip);
2821 Assert(idx < pTb->cTbLookupEntries);
2822 return IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, idx);
2823}
2824
2825
2826/**
2827 * Executes a translation block.
2828 *
2829 * @returns Strict VBox status code.
2830 * @param pVCpu The cross context virtual CPU structure of the calling
2831 * thread.
2832 * @param pTb The translation block to execute.
2833 */
2834static VBOXSTRICTRC iemTbExec(PVMCPUCC pVCpu, PIEMTB pTb) IEM_NOEXCEPT_MAY_LONGJMP
2835{
2836 Assert(!(pVCpu->iem.s.GCPhysInstrBuf & (RTGCPHYS)GUEST_PAGE_OFFSET_MASK));
2837
2838 /*
2839 * Set the current TB so CIMPL functions may get at it.
2840 */
2841 pVCpu->iem.s.pCurTbR3 = pTb;
2842 pVCpu->iem.s.ppTbLookupEntryR3 = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, 0);
2843
2844 /*
2845 * Execute the block.
2846 */
2847#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
2848 if (pTb->fFlags & IEMTB_F_TYPE_NATIVE)
2849 {
2850 pVCpu->iem.s.cTbExecNative++;
2851# ifdef LOG_ENABLED
2852 iemThreadedLogCurInstr(pVCpu, "EXn", 0);
2853# endif
2854
2855# ifndef IEMNATIVE_WITH_RECOMPILER_PROLOGUE_SINGLETON
2856# ifdef RT_ARCH_AMD64
2857 VBOXSTRICTRC const rcStrict = ((PFNIEMTBNATIVE)pTb->Native.paInstructions)(pVCpu);
2858# else
2859 VBOXSTRICTRC const rcStrict = ((PFNIEMTBNATIVE)pTb->Native.paInstructions)(pVCpu, &pVCpu->cpum.GstCtx);
2860# endif
2861# else
2862# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
2863 AssertCompileMemberOffset(VMCPUCC, iem.s.pvTbFramePointerR3, 0x7c8); /* This is assumed in iemNativeTbEntry */
2864# endif
2865# ifdef RT_ARCH_AMD64
2866 VBOXSTRICTRC const rcStrict = iemNativeTbEntry(pVCpu, (uintptr_t)pTb->Native.paInstructions);
2867# else
2868 VBOXSTRICTRC const rcStrict = iemNativeTbEntry(pVCpu, &pVCpu->cpum.GstCtx, (uintptr_t)pTb->Native.paInstructions);
2869# endif
2870# endif
2871
2872# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
2873 pVCpu->iem.s.pvTbFramePointerR3 = NULL;
2874# endif
2875# ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
2876 Assert(pVCpu->iem.s.fSkippingEFlags == 0);
2877# endif
2878 if (RT_LIKELY( rcStrict == VINF_SUCCESS
2879 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS /** @todo this isn't great. */))
2880 { /* likely */ }
2881 else
2882 {
2883 /* pVCpu->iem.s.cInstructions is incremented by iemNativeHlpExecStatusCodeFiddling. */
2884 pVCpu->iem.s.pCurTbR3 = NULL;
2885
2886 /* VINF_IEM_REEXEC_BREAK should be treated as VINF_SUCCESS as it's
2887 only to break out of TB execution early. */
2888 if (rcStrict == VINF_IEM_REEXEC_BREAK)
2889 {
2890 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitReturnBreak);
2891 return iemExecStatusCodeFiddling(pVCpu, VINF_SUCCESS);
2892 }
2893
2894 /* VINF_IEM_REEXEC_BREAK_FF should be treated as VINF_SUCCESS as it's
2895 only to break out of TB execution early due to pending FFs. */
2896 if (rcStrict == VINF_IEM_REEXEC_BREAK_FF)
2897 {
2898 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitReturnBreakFF);
2899 return iemExecStatusCodeFiddling(pVCpu, VINF_SUCCESS);
2900 }
2901
2902 /* VINF_IEM_REEXEC_WITH_FLAGS needs to receive special treatment
2903 and converted to VINF_SUCCESS or whatever is appropriate. */
2904 if (rcStrict == VINF_IEM_REEXEC_FINISH_WITH_FLAGS)
2905 {
2906 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitReturnWithFlags);
2907 return iemExecStatusCodeFiddling(pVCpu, iemFinishInstructionWithFlagsSet(pVCpu, VINF_SUCCESS));
2908 }
2909
2910 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitReturnOtherStatus);
2911 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2912 }
2913 }
2914 else
2915#endif /* VBOX_WITH_IEM_NATIVE_RECOMPILER */
2916 {
2917 /*
2918 * The threaded execution loop.
2919 */
2920 pVCpu->iem.s.cTbExecThreaded++;
2921#ifdef LOG_ENABLED
2922 uint64_t uRipPrev = UINT64_MAX;
2923#endif
2924 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
2925 uint32_t cCallsLeft = pTb->Thrd.cCalls;
2926 while (cCallsLeft-- > 0)
2927 {
2928#ifdef LOG_ENABLED
2929 if (pVCpu->cpum.GstCtx.rip != uRipPrev)
2930 {
2931 uRipPrev = pVCpu->cpum.GstCtx.rip;
2932 iemThreadedLogCurInstr(pVCpu, "EXt", pTb->Thrd.cCalls - cCallsLeft - 1);
2933 }
2934 Log9(("%04x:%08RX64: #%d/%d - %d %s\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
2935 pTb->Thrd.cCalls - cCallsLeft - 1, pCallEntry->idxInstr, pCallEntry->enmFunction,
2936 g_apszIemThreadedFunctions[pCallEntry->enmFunction]));
2937#endif
2938#ifdef VBOX_WITH_STATISTICS
2939 AssertCompile(RT_ELEMENTS(pVCpu->iem.s.acThreadedFuncStats) >= kIemThreadedFunc_End);
2940 pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction] += 1;
2941#endif
2942 VBOXSTRICTRC const rcStrict = g_apfnIemThreadedFunctions[pCallEntry->enmFunction](pVCpu,
2943 pCallEntry->auParams[0],
2944 pCallEntry->auParams[1],
2945 pCallEntry->auParams[2]);
2946 if (RT_LIKELY( rcStrict == VINF_SUCCESS
2947 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS /** @todo this isn't great. */))
2948 pCallEntry++;
2949 else
2950 {
2951 pVCpu->iem.s.cInstructions += pCallEntry->idxInstr; /* This may be one short, but better than zero. */
2952 pVCpu->iem.s.pCurTbR3 = NULL;
2953 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatTbThreadedExecBreaks);
2954 pVCpu->iem.s.ppTbLookupEntryR3 = iemTbGetTbLookupEntryWithRip(pTb, pCallEntry->uTbLookup, pVCpu->cpum.GstCtx.rip);
2955
2956 /* VINF_IEM_REEXEC_BREAK should be treated as VINF_SUCCESS as it's
2957 only to break out of TB execution early. */
2958 if (rcStrict == VINF_IEM_REEXEC_BREAK)
2959 {
2960#ifdef VBOX_WITH_STATISTICS
2961 if (pCallEntry->uTbLookup)
2962 STAM_COUNTER_INC(&pVCpu->iem.s.StatTbThreadedExecBreaksWithLookup);
2963 else
2964 STAM_COUNTER_INC(&pVCpu->iem.s.StatTbThreadedExecBreaksWithoutLookup);
2965#endif
2966 return iemExecStatusCodeFiddling(pVCpu, VINF_SUCCESS);
2967 }
2968 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2969 }
2970 }
2971
2972 /* Update the lookup entry. */
2973 pVCpu->iem.s.ppTbLookupEntryR3 = iemTbGetTbLookupEntryWithRip(pTb, pCallEntry[-1].uTbLookup, pVCpu->cpum.GstCtx.rip);
2974 }
2975
2976 pVCpu->iem.s.cInstructions += pTb->cInstructions;
2977 pVCpu->iem.s.pCurTbR3 = NULL;
2978 return VINF_SUCCESS;
2979}
2980
2981
2982/**
2983 * This is called when the PC doesn't match the current pbInstrBuf.
2984 *
2985 * Upon return, we're ready for opcode fetching. But please note that
2986 * pbInstrBuf can be NULL iff the memory doesn't have readable backing (i.e.
2987 * MMIO or unassigned).
2988 */
2989static RTGCPHYS iemGetPcWithPhysAndCodeMissed(PVMCPUCC pVCpu)
2990{
2991 pVCpu->iem.s.pbInstrBuf = NULL;
2992 pVCpu->iem.s.offCurInstrStart = 0;
2993 pVCpu->iem.s.offInstrNextByte = 0;
2994 iemOpcodeFetchBytesJmp(pVCpu, 0, NULL);
2995 return pVCpu->iem.s.GCPhysInstrBuf + pVCpu->iem.s.offCurInstrStart;
2996}
2997
2998
2999/** @todo need private inline decl for throw/nothrow matching IEM_WITH_SETJMP? */
3000DECL_FORCE_INLINE_THROW(RTGCPHYS) iemGetPcWithPhysAndCode(PVMCPUCC pVCpu)
3001{
3002 /*
3003 * Set uCurTbStartPc to RIP and calc the effective PC.
3004 */
3005 uint64_t uPc = pVCpu->cpum.GstCtx.rip;
3006 pVCpu->iem.s.uCurTbStartPc = uPc;
3007 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu));
3008 uPc += pVCpu->cpum.GstCtx.cs.u64Base;
3009
3010 /*
3011 * Advance within the current buffer (PAGE) when possible.
3012 */
3013 if (pVCpu->iem.s.pbInstrBuf)
3014 {
3015 uint64_t off = uPc - pVCpu->iem.s.uInstrBufPc;
3016 if (off < pVCpu->iem.s.cbInstrBufTotal)
3017 {
3018 pVCpu->iem.s.offInstrNextByte = (uint32_t)off;
3019 pVCpu->iem.s.offCurInstrStart = (uint16_t)off;
3020 if ((uint16_t)off + 15 <= pVCpu->iem.s.cbInstrBufTotal)
3021 pVCpu->iem.s.cbInstrBuf = (uint16_t)off + 15;
3022 else
3023 pVCpu->iem.s.cbInstrBuf = pVCpu->iem.s.cbInstrBufTotal;
3024
3025 return pVCpu->iem.s.GCPhysInstrBuf + off;
3026 }
3027 }
3028 return iemGetPcWithPhysAndCodeMissed(pVCpu);
3029}
3030
3031
3032/**
3033 * Determines the extra IEMTB_F_XXX flags.
3034 *
3035 * @returns A mix of IEMTB_F_INHIBIT_SHADOW, IEMTB_F_INHIBIT_NMI and
3036 * IEMTB_F_CS_LIM_CHECKS (or zero).
3037 * @param pVCpu The cross context virtual CPU structure of the calling
3038 * thread.
3039 */
3040DECL_FORCE_INLINE(uint32_t) iemGetTbFlagsForCurrentPc(PVMCPUCC pVCpu)
3041{
3042 uint32_t fRet = 0;
3043
3044 /*
3045 * Determine the inhibit bits.
3046 */
3047 if (!(pVCpu->cpum.GstCtx.rflags.uBoth & (CPUMCTX_INHIBIT_SHADOW | CPUMCTX_INHIBIT_NMI)))
3048 { /* typical */ }
3049 else
3050 {
3051 if (CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx))
3052 fRet |= IEMTB_F_INHIBIT_SHADOW;
3053 if (CPUMAreInterruptsInhibitedByNmiEx(&pVCpu->cpum.GstCtx))
3054 fRet |= IEMTB_F_INHIBIT_NMI;
3055 }
3056
3057 /*
3058 * Return IEMTB_F_CS_LIM_CHECKS if the current PC is invalid or if it is
3059 * likely to go invalid before the end of the translation block.
3060 */
3061 if (IEM_F_MODE_X86_IS_FLAT(pVCpu->iem.s.fExec))
3062 return fRet;
3063
3064 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
3065 if (offFromLim >= X86_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
3066 return fRet;
3067 return fRet | IEMTB_F_CS_LIM_CHECKS;
3068}
3069
3070
3071VMM_INT_DECL(VBOXSTRICTRC) IEMExecRecompiler(PVMCC pVM, PVMCPUCC pVCpu)
3072{
3073 /*
3074 * See if there is an interrupt pending in TRPM, inject it if we can.
3075 */
3076 if (!TRPMHasTrap(pVCpu))
3077 { /* likely */ }
3078 else
3079 {
3080 VBOXSTRICTRC rcStrict = iemExecInjectPendingTrap(pVCpu);
3081 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
3082 { /*likely */ }
3083 else
3084 return rcStrict;
3085 }
3086
3087 /*
3088 * Init the execution environment.
3089 */
3090#if 1 /** @todo this seems like a good idea, however if we ever share memory
3091 * directly with other threads on the host, it isn't necessarily... */
3092 if (pVM->cCpus == 1)
3093 iemInitExec(pVCpu, IEM_F_X86_DISREGARD_LOCK /*fExecOpts*/);
3094 else
3095#endif
3096 iemInitExec(pVCpu, 0 /*fExecOpts*/);
3097 if (RT_LIKELY(pVCpu->iem.s.msRecompilerPollNow != 0))
3098 { }
3099 else
3100 pVCpu->iem.s.msRecompilerPollNow = (uint32_t)(TMVirtualGetNoCheck(pVM) / RT_NS_1MS);
3101 pVCpu->iem.s.ppTbLookupEntryR3 = &pVCpu->iem.s.pTbLookupEntryDummyR3;
3102
3103 /*
3104 * Run-loop.
3105 *
3106 * If we're using setjmp/longjmp we combine all the catching here to avoid
3107 * having to call setjmp for each block we're executing.
3108 */
3109 PIEMTBCACHE const pTbCache = pVCpu->iem.s.pTbCacheR3;
3110 for (;;)
3111 {
3112 VBOXSTRICTRC rcStrict;
3113 IEM_TRY_SETJMP(pVCpu, rcStrict)
3114 {
3115 uint32_t const cPollRate = 511; /* EM.cpp passes 4095 to IEMExecLots, so an eigth of that seems reasonable for now. */
3116 for (uint32_t iIterations = 0; ; iIterations++)
3117 {
3118 /* Translate PC to physical address, we'll need this for both lookup and compilation. */
3119 RTGCPHYS const GCPhysPc = iemGetPcWithPhysAndCode(pVCpu);
3120 if (RT_LIKELY(pVCpu->iem.s.pbInstrBuf != NULL))
3121 {
3122 uint32_t const fExtraFlags = iemGetTbFlagsForCurrentPc(pVCpu);
3123 PIEMTB const pTb = iemTbCacheLookup(pVCpu, pTbCache, GCPhysPc, fExtraFlags);
3124 if (pTb)
3125 rcStrict = iemTbExec(pVCpu, pTb);
3126 else
3127 rcStrict = iemThreadedCompile(pVM, pVCpu, GCPhysPc, fExtraFlags);
3128 }
3129 else
3130 {
3131 /* This can only happen if the current PC cannot be translated into a
3132 host pointer, which means we're in MMIO or unmapped memory... */
3133#if defined(VBOX_STRICT) && defined(IN_RING3)
3134 rcStrict = DBGFSTOP(pVM);
3135 if (rcStrict != VINF_SUCCESS && rcStrict != VERR_DBGF_NOT_ATTACHED)
3136 return rcStrict;
3137#endif
3138 rcStrict = IEMExecLots(pVCpu, 2048, cPollRate, NULL);
3139 }
3140 if (rcStrict == VINF_SUCCESS)
3141 {
3142 Assert(pVCpu->iem.s.cActiveMappings == 0);
3143
3144 uint64_t fCpu = pVCpu->fLocalForcedActions;
3145 fCpu &= VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR3
3146 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
3147 | VMCPU_FF_TLB_FLUSH
3148 | VMCPU_FF_UNHALT );
3149 /** @todo this isn't even close to the NMI/IRQ conditions in EM. */
3150 if (RT_LIKELY( ( !fCpu
3151 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
3152 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
3153 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx) )) )
3154 && !VM_FF_IS_ANY_SET(pVM, VM_FF_ALL_MASK) ))
3155 {
3156 if (RT_LIKELY( (iIterations & cPollRate) != 0
3157 || !TMTimerPollBoolWith32BitMilliTS(pVM, pVCpu, &pVCpu->iem.s.msRecompilerPollNow)))
3158 { /* likely */ }
3159 else
3160 return VINF_SUCCESS;
3161 }
3162 else
3163 return VINF_SUCCESS;
3164 }
3165 else
3166 return rcStrict;
3167 }
3168 }
3169 IEM_CATCH_LONGJMP_BEGIN(pVCpu, rcStrict);
3170 {
3171 Assert(rcStrict != VINF_IEM_REEXEC_BREAK);
3172 pVCpu->iem.s.cLongJumps++;
3173#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
3174 pVCpu->iem.s.pvTbFramePointerR3 = NULL;
3175#endif
3176 if (pVCpu->iem.s.cActiveMappings > 0)
3177 iemMemRollback(pVCpu);
3178
3179#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
3180 PIEMTB const pTb = pVCpu->iem.s.pCurTbR3;
3181 if (pTb && (pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE)
3182 {
3183 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitLongJump);
3184# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3185 Assert(pVCpu->iem.s.idxTbCurInstr < pTb->cInstructions);
3186 pVCpu->iem.s.cInstructions += pVCpu->iem.s.idxTbCurInstr;
3187# endif
3188 }
3189#endif
3190
3191#if 0 /** @todo do we need to clean up anything? If not, we can drop the pTb = NULL some lines up and change the scope. */
3192 /* If pTb isn't NULL we're in iemTbExec. */
3193 if (!pTb)
3194 {
3195 /* If pCurTbR3 is NULL, we're in iemGetPcWithPhysAndCode.*/
3196 pTb = pVCpu->iem.s.pCurTbR3;
3197 if (pTb)
3198 {
3199 if (pTb == pVCpu->iem.s.pThrdCompileTbR3)
3200 return iemThreadedCompileLongJumped(pVM, pVCpu, rcStrict);
3201 Assert(pTb != pVCpu->iem.s.pNativeCompileTbR3);
3202 }
3203 }
3204#endif
3205 pVCpu->iem.s.pCurTbR3 = NULL;
3206 return rcStrict;
3207 }
3208 IEM_CATCH_LONGJMP_END(pVCpu);
3209 }
3210}
3211
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette