VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllThrdRecompiler.cpp@ 108370

Last change on this file since 108370 was 108370, checked in by vboxsync, 6 weeks ago

VMM/IEM: s/IEM_TB_LOOKUP_TAB_GET_IDX_WITH_RIP/IEM_TB_LOOKUP_TAB_GET_IDX_WITH_PC/ jiraref:VBP-1531

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 161.3 KB
Line 
1/* $Id: IEMAllThrdRecompiler.cpp 108370 2025-02-25 13:26:43Z vboxsync $ */
2/** @file
3 * IEM - Instruction Decoding and Threaded Recompilation.
4 *
5 * Logging group IEM_RE_THREADED assignments:
6 * - Level 1 (Log) : Errors, exceptions, interrupts and such major events. [same as IEM]
7 * - Flow (LogFlow) : TB calls being emitted.
8 * - Level 2 (Log2) : Basic instruction execution state info. [same as IEM]
9 * - Level 3 (Log3) : More detailed execution state info. [same as IEM]
10 * - Level 4 (Log4) : Decoding mnemonics w/ EIP. [same as IEM]
11 * - Level 5 (Log5) : Decoding details. [same as IEM]
12 * - Level 6 (Log6) : TB opcode range management.
13 * - Level 7 (Log7) : TB obsoletion.
14 * - Level 8 (Log8) : TB compilation.
15 * - Level 9 (Log9) : TB exec.
16 * - Level 10 (Log10): TB block lookup.
17 * - Level 11 (Log11): TB block lookup details.
18 * - Level 12 (Log12): TB insertion.
19 */
20
21/*
22 * Copyright (C) 2011-2024 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#ifndef LOG_GROUP /* defined when included by tstIEMCheckMc.cpp */
48# define LOG_GROUP LOG_GROUP_IEM_RE_THREADED
49#endif
50#define IEM_WITH_CODE_TLB_AND_OPCODE_BUF /* A bit hackish, but its all in IEMInline.h. */
51#define VMCPU_INCL_CPUM_GST_CTX
52#ifdef IN_RING0
53# define VBOX_VMM_TARGET_X86
54#endif
55#include <VBox/vmm/iem.h>
56#include <VBox/vmm/cpum.h>
57#include <VBox/vmm/tm.h>
58#include <VBox/vmm/dbgf.h>
59#include <VBox/vmm/dbgftrace.h>
60#ifndef TST_IEM_CHECK_MC
61# include "IEMInternal.h"
62#endif
63#include <VBox/vmm/vmcc.h>
64#include <VBox/log.h>
65#include <VBox/err.h>
66#include <VBox/param.h>
67#include <VBox/dis.h>
68#include <VBox/disopcode-x86-amd64.h>
69#include <iprt/asm-math.h>
70#include <iprt/assert.h>
71#include <iprt/mem.h>
72#include <iprt/string.h>
73#include <iprt/sort.h>
74#include <iprt/x86.h>
75
76#include "IEMInline.h"
77#ifdef VBOX_VMM_TARGET_X86
78# include "target-x86/IEMInline-x86.h"
79# include "target-x86/IEMInlineDecode-x86.h"
80#endif
81#include "IEMOpHlp.h"
82#include "IEMMc.h"
83
84#include "IEMThreadedFunctions.h"
85#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
86# include "IEMN8veRecompiler.h"
87#endif
88
89
90/*
91 * Narrow down configs here to avoid wasting time on unused configs here.
92 */
93
94#ifndef IEM_WITH_CODE_TLB
95# error The code TLB must be enabled for the recompiler.
96#endif
97
98#ifndef IEM_WITH_DATA_TLB
99# error The data TLB must be enabled for the recompiler.
100#endif
101
102
103/*********************************************************************************************************************************
104* Internal Functions *
105*********************************************************************************************************************************/
106#if defined(VBOX_WITH_IEM_NATIVE_RECOMPILER) && defined(VBOX_WITH_SAVE_THREADED_TBS_FOR_PROFILING)
107static void iemThreadedSaveTbForProfiling(PVMCPU pVCpu, PCIEMTB pTb);
108#endif
109
110
111/**
112 * Calculates the effective address of a ModR/M memory operand, extended version
113 * for use in the recompilers.
114 *
115 * Meant to be used via IEM_MC_CALC_RM_EFF_ADDR.
116 *
117 * May longjmp on internal error.
118 *
119 * @return The effective address.
120 * @param pVCpu The cross context virtual CPU structure of the calling thread.
121 * @param bRm The ModRM byte.
122 * @param cbImmAndRspOffset - First byte: The size of any immediate
123 * following the effective address opcode bytes
124 * (only for RIP relative addressing).
125 * - Second byte: RSP displacement (for POP [ESP]).
126 * @param puInfo Extra info: 32-bit displacement (bits 31:0) and
127 * SIB byte (bits 39:32).
128 *
129 * @note This must be defined in a source file with matching
130 * IEM_WITH_CODE_TLB_AND_OPCODE_BUF define till the define is made default
131 * or implemented differently...
132 */
133RTGCPTR iemOpHlpCalcRmEffAddrJmpEx(PVMCPUCC pVCpu, uint8_t bRm, uint32_t cbImmAndRspOffset, uint64_t *puInfo) IEM_NOEXCEPT_MAY_LONGJMP
134{
135 Log5(("iemOpHlpCalcRmEffAddrJmp: bRm=%#x\n", bRm));
136# define SET_SS_DEF() \
137 do \
138 { \
139 if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SEG_MASK)) \
140 pVCpu->iem.s.iEffSeg = X86_SREG_SS; \
141 } while (0)
142
143 if (!IEM_IS_64BIT_CODE(pVCpu))
144 {
145/** @todo Check the effective address size crap! */
146 if (pVCpu->iem.s.enmEffAddrMode == IEMMODE_16BIT)
147 {
148 uint16_t u16EffAddr;
149
150 /* Handle the disp16 form with no registers first. */
151 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
152 {
153 IEM_OPCODE_GET_NEXT_U16(&u16EffAddr);
154 *puInfo = u16EffAddr;
155 }
156 else
157 {
158 /* Get the displacment. */
159 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
160 {
161 case 0: u16EffAddr = 0; break;
162 case 1: IEM_OPCODE_GET_NEXT_S8_SX_U16(&u16EffAddr); break;
163 case 2: IEM_OPCODE_GET_NEXT_U16(&u16EffAddr); break;
164 default: AssertFailedStmt(IEM_DO_LONGJMP(pVCpu, VERR_IEM_IPE_1)); /* (caller checked for these) */
165 }
166 *puInfo = u16EffAddr;
167
168 /* Add the base and index registers to the disp. */
169 switch (bRm & X86_MODRM_RM_MASK)
170 {
171 case 0: u16EffAddr += pVCpu->cpum.GstCtx.bx + pVCpu->cpum.GstCtx.si; break;
172 case 1: u16EffAddr += pVCpu->cpum.GstCtx.bx + pVCpu->cpum.GstCtx.di; break;
173 case 2: u16EffAddr += pVCpu->cpum.GstCtx.bp + pVCpu->cpum.GstCtx.si; SET_SS_DEF(); break;
174 case 3: u16EffAddr += pVCpu->cpum.GstCtx.bp + pVCpu->cpum.GstCtx.di; SET_SS_DEF(); break;
175 case 4: u16EffAddr += pVCpu->cpum.GstCtx.si; break;
176 case 5: u16EffAddr += pVCpu->cpum.GstCtx.di; break;
177 case 6: u16EffAddr += pVCpu->cpum.GstCtx.bp; SET_SS_DEF(); break;
178 case 7: u16EffAddr += pVCpu->cpum.GstCtx.bx; break;
179 }
180 }
181
182 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#06RX16 uInfo=%#RX64\n", u16EffAddr, *puInfo));
183 return u16EffAddr;
184 }
185
186 Assert(pVCpu->iem.s.enmEffAddrMode == IEMMODE_32BIT);
187 uint32_t u32EffAddr;
188 uint64_t uInfo;
189
190 /* Handle the disp32 form with no registers first. */
191 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
192 {
193 IEM_OPCODE_GET_NEXT_U32(&u32EffAddr);
194 uInfo = u32EffAddr;
195 }
196 else
197 {
198 /* Get the register (or SIB) value. */
199 uInfo = 0;
200 switch ((bRm & X86_MODRM_RM_MASK))
201 {
202 case 0: u32EffAddr = pVCpu->cpum.GstCtx.eax; break;
203 case 1: u32EffAddr = pVCpu->cpum.GstCtx.ecx; break;
204 case 2: u32EffAddr = pVCpu->cpum.GstCtx.edx; break;
205 case 3: u32EffAddr = pVCpu->cpum.GstCtx.ebx; break;
206 case 4: /* SIB */
207 {
208 uint8_t bSib; IEM_OPCODE_GET_NEXT_U8(&bSib);
209 uInfo = (uint64_t)bSib << 32;
210
211 /* Get the index and scale it. */
212 switch ((bSib >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
213 {
214 case 0: u32EffAddr = pVCpu->cpum.GstCtx.eax; break;
215 case 1: u32EffAddr = pVCpu->cpum.GstCtx.ecx; break;
216 case 2: u32EffAddr = pVCpu->cpum.GstCtx.edx; break;
217 case 3: u32EffAddr = pVCpu->cpum.GstCtx.ebx; break;
218 case 4: u32EffAddr = 0; /*none */ break;
219 case 5: u32EffAddr = pVCpu->cpum.GstCtx.ebp; break;
220 case 6: u32EffAddr = pVCpu->cpum.GstCtx.esi; break;
221 case 7: u32EffAddr = pVCpu->cpum.GstCtx.edi; break;
222 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
223 }
224 u32EffAddr <<= (bSib >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
225
226 /* add base */
227 switch (bSib & X86_SIB_BASE_MASK)
228 {
229 case 0: u32EffAddr += pVCpu->cpum.GstCtx.eax; break;
230 case 1: u32EffAddr += pVCpu->cpum.GstCtx.ecx; break;
231 case 2: u32EffAddr += pVCpu->cpum.GstCtx.edx; break;
232 case 3: u32EffAddr += pVCpu->cpum.GstCtx.ebx; break;
233 case 4: u32EffAddr += pVCpu->cpum.GstCtx.esp + (cbImmAndRspOffset >> 8); SET_SS_DEF(); break;
234 case 5:
235 if ((bRm & X86_MODRM_MOD_MASK) != 0)
236 {
237 u32EffAddr += pVCpu->cpum.GstCtx.ebp;
238 SET_SS_DEF();
239 }
240 else
241 {
242 uint32_t u32Disp;
243 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
244 u32EffAddr += u32Disp;
245 uInfo |= u32Disp;
246 }
247 break;
248 case 6: u32EffAddr += pVCpu->cpum.GstCtx.esi; break;
249 case 7: u32EffAddr += pVCpu->cpum.GstCtx.edi; break;
250 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
251 }
252 break;
253 }
254 case 5: u32EffAddr = pVCpu->cpum.GstCtx.ebp; SET_SS_DEF(); break;
255 case 6: u32EffAddr = pVCpu->cpum.GstCtx.esi; break;
256 case 7: u32EffAddr = pVCpu->cpum.GstCtx.edi; break;
257 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
258 }
259
260 /* Get and add the displacement. */
261 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
262 {
263 case 0:
264 break;
265 case 1:
266 {
267 int8_t i8Disp; IEM_OPCODE_GET_NEXT_S8(&i8Disp);
268 u32EffAddr += i8Disp;
269 uInfo |= (uint32_t)(int32_t)i8Disp;
270 break;
271 }
272 case 2:
273 {
274 uint32_t u32Disp; IEM_OPCODE_GET_NEXT_U32(&u32Disp);
275 u32EffAddr += u32Disp;
276 uInfo |= u32Disp;
277 break;
278 }
279 default:
280 AssertFailedStmt(IEM_DO_LONGJMP(pVCpu, VERR_IEM_IPE_2)); /* (caller checked for these) */
281 }
282 }
283
284 *puInfo = uInfo;
285 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RX32 uInfo=%#RX64\n", u32EffAddr, uInfo));
286 return u32EffAddr;
287 }
288
289 uint64_t u64EffAddr;
290 uint64_t uInfo;
291
292 /* Handle the rip+disp32 form with no registers first. */
293 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
294 {
295 IEM_OPCODE_GET_NEXT_S32_SX_U64(&u64EffAddr);
296 uInfo = (uint32_t)u64EffAddr;
297 u64EffAddr += pVCpu->cpum.GstCtx.rip + IEM_GET_INSTR_LEN(pVCpu) + (cbImmAndRspOffset & UINT32_C(0xff));
298 }
299 else
300 {
301 /* Get the register (or SIB) value. */
302 uInfo = 0;
303 switch ((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB)
304 {
305 case 0: u64EffAddr = pVCpu->cpum.GstCtx.rax; break;
306 case 1: u64EffAddr = pVCpu->cpum.GstCtx.rcx; break;
307 case 2: u64EffAddr = pVCpu->cpum.GstCtx.rdx; break;
308 case 3: u64EffAddr = pVCpu->cpum.GstCtx.rbx; break;
309 case 5: u64EffAddr = pVCpu->cpum.GstCtx.rbp; SET_SS_DEF(); break;
310 case 6: u64EffAddr = pVCpu->cpum.GstCtx.rsi; break;
311 case 7: u64EffAddr = pVCpu->cpum.GstCtx.rdi; break;
312 case 8: u64EffAddr = pVCpu->cpum.GstCtx.r8; break;
313 case 9: u64EffAddr = pVCpu->cpum.GstCtx.r9; break;
314 case 10: u64EffAddr = pVCpu->cpum.GstCtx.r10; break;
315 case 11: u64EffAddr = pVCpu->cpum.GstCtx.r11; break;
316 case 13: u64EffAddr = pVCpu->cpum.GstCtx.r13; break;
317 case 14: u64EffAddr = pVCpu->cpum.GstCtx.r14; break;
318 case 15: u64EffAddr = pVCpu->cpum.GstCtx.r15; break;
319 /* SIB */
320 case 4:
321 case 12:
322 {
323 uint8_t bSib; IEM_OPCODE_GET_NEXT_U8(&bSib);
324 uInfo = (uint64_t)bSib << 32;
325
326 /* Get the index and scale it. */
327 switch (((bSib >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK) | pVCpu->iem.s.uRexIndex)
328 {
329 case 0: u64EffAddr = pVCpu->cpum.GstCtx.rax; break;
330 case 1: u64EffAddr = pVCpu->cpum.GstCtx.rcx; break;
331 case 2: u64EffAddr = pVCpu->cpum.GstCtx.rdx; break;
332 case 3: u64EffAddr = pVCpu->cpum.GstCtx.rbx; break;
333 case 4: u64EffAddr = 0; /*none */ break;
334 case 5: u64EffAddr = pVCpu->cpum.GstCtx.rbp; break;
335 case 6: u64EffAddr = pVCpu->cpum.GstCtx.rsi; break;
336 case 7: u64EffAddr = pVCpu->cpum.GstCtx.rdi; break;
337 case 8: u64EffAddr = pVCpu->cpum.GstCtx.r8; break;
338 case 9: u64EffAddr = pVCpu->cpum.GstCtx.r9; break;
339 case 10: u64EffAddr = pVCpu->cpum.GstCtx.r10; break;
340 case 11: u64EffAddr = pVCpu->cpum.GstCtx.r11; break;
341 case 12: u64EffAddr = pVCpu->cpum.GstCtx.r12; break;
342 case 13: u64EffAddr = pVCpu->cpum.GstCtx.r13; break;
343 case 14: u64EffAddr = pVCpu->cpum.GstCtx.r14; break;
344 case 15: u64EffAddr = pVCpu->cpum.GstCtx.r15; break;
345 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
346 }
347 u64EffAddr <<= (bSib >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
348
349 /* add base */
350 switch ((bSib & X86_SIB_BASE_MASK) | pVCpu->iem.s.uRexB)
351 {
352 case 0: u64EffAddr += pVCpu->cpum.GstCtx.rax; break;
353 case 1: u64EffAddr += pVCpu->cpum.GstCtx.rcx; break;
354 case 2: u64EffAddr += pVCpu->cpum.GstCtx.rdx; break;
355 case 3: u64EffAddr += pVCpu->cpum.GstCtx.rbx; break;
356 case 4: u64EffAddr += pVCpu->cpum.GstCtx.rsp + (cbImmAndRspOffset >> 8); SET_SS_DEF(); break;
357 case 6: u64EffAddr += pVCpu->cpum.GstCtx.rsi; break;
358 case 7: u64EffAddr += pVCpu->cpum.GstCtx.rdi; break;
359 case 8: u64EffAddr += pVCpu->cpum.GstCtx.r8; break;
360 case 9: u64EffAddr += pVCpu->cpum.GstCtx.r9; break;
361 case 10: u64EffAddr += pVCpu->cpum.GstCtx.r10; break;
362 case 11: u64EffAddr += pVCpu->cpum.GstCtx.r11; break;
363 case 12: u64EffAddr += pVCpu->cpum.GstCtx.r12; break;
364 case 14: u64EffAddr += pVCpu->cpum.GstCtx.r14; break;
365 case 15: u64EffAddr += pVCpu->cpum.GstCtx.r15; break;
366 /* complicated encodings */
367 case 5:
368 case 13:
369 if ((bRm & X86_MODRM_MOD_MASK) != 0)
370 {
371 if (!pVCpu->iem.s.uRexB)
372 {
373 u64EffAddr += pVCpu->cpum.GstCtx.rbp;
374 SET_SS_DEF();
375 }
376 else
377 u64EffAddr += pVCpu->cpum.GstCtx.r13;
378 }
379 else
380 {
381 uint32_t u32Disp;
382 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
383 u64EffAddr += (int32_t)u32Disp;
384 uInfo |= u32Disp;
385 }
386 break;
387 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
388 }
389 break;
390 }
391 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
392 }
393
394 /* Get and add the displacement. */
395 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
396 {
397 case 0:
398 break;
399 case 1:
400 {
401 int8_t i8Disp;
402 IEM_OPCODE_GET_NEXT_S8(&i8Disp);
403 u64EffAddr += i8Disp;
404 uInfo |= (uint32_t)(int32_t)i8Disp;
405 break;
406 }
407 case 2:
408 {
409 uint32_t u32Disp;
410 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
411 u64EffAddr += (int32_t)u32Disp;
412 uInfo |= u32Disp;
413 break;
414 }
415 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX); /* (caller checked for these) */
416 }
417
418 }
419
420 *puInfo = uInfo;
421 if (pVCpu->iem.s.enmEffAddrMode == IEMMODE_64BIT)
422 {
423 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RGv uInfo=%#RX64\n", u64EffAddr, uInfo));
424 return u64EffAddr;
425 }
426 Assert(pVCpu->iem.s.enmEffAddrMode == IEMMODE_32BIT);
427 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RGv uInfo=%#RX64\n", u64EffAddr & UINT32_MAX, uInfo));
428 return u64EffAddr & UINT32_MAX;
429}
430
431
432
433/*********************************************************************************************************************************
434* Translation Block Cache. *
435*********************************************************************************************************************************/
436
437/** @callback_method_impl{FNRTSORTCMP, Compare two TBs for pruning sorting purposes.} */
438static DECLCALLBACK(int) iemTbCachePruneCmpTb(void const *pvElement1, void const *pvElement2, void *pvUser)
439{
440 PCIEMTB const pTb1 = (PCIEMTB)pvElement1;
441 PCIEMTB const pTb2 = (PCIEMTB)pvElement2;
442 uint32_t const cMsSinceUse1 = (uint32_t)(uintptr_t)pvUser - pTb1->msLastUsed;
443 uint32_t const cMsSinceUse2 = (uint32_t)(uintptr_t)pvUser - pTb2->msLastUsed;
444 if (cMsSinceUse1 != cMsSinceUse2)
445 return cMsSinceUse1 < cMsSinceUse2 ? -1 : 1;
446 if (pTb1->cUsed != pTb2->cUsed)
447 return pTb1->cUsed > pTb2->cUsed ? -1 : 1;
448 if ((pTb1->fFlags & IEMTB_F_TYPE_MASK) != (pTb2->fFlags & IEMTB_F_TYPE_MASK))
449 return (pTb1->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE ? -1 : 1;
450 return 0;
451}
452
453#ifdef VBOX_STRICT
454/**
455 * Assertion helper that checks a collisions list count.
456 */
457static void iemTbCacheAssertCorrectCount(PIEMTBCACHE pTbCache, uint32_t idxHash, const char *pszOperation)
458{
459 PIEMTB pTb = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
460 int cLeft = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]);
461 while (pTb)
462 {
463 pTb = pTb->pNext;
464 cLeft--;
465 }
466 AssertMsg(cLeft == 0,
467 ("idxHash=%#x cLeft=%d; entry count=%d; %s\n",
468 idxHash, cLeft, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]), pszOperation));
469}
470#endif
471
472
473DECL_NO_INLINE(static, void) iemTbCacheAddWithPruning(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb, uint32_t idxHash)
474{
475 STAM_PROFILE_START(&pTbCache->StatPrune, a);
476
477 /*
478 * First convert the collision list to an array.
479 */
480 PIEMTB apSortedTbs[IEMTBCACHE_PTR_MAX_COUNT];
481 uintptr_t cInserted = 0;
482 PIEMTB pTbCollision = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
483
484 pTbCache->apHash[idxHash] = NULL; /* Must NULL the entry before trying to free anything. */
485
486 while (pTbCollision && cInserted < RT_ELEMENTS(apSortedTbs))
487 {
488 apSortedTbs[cInserted++] = pTbCollision;
489 pTbCollision = pTbCollision->pNext;
490 }
491
492 /* Free any excess (impossible). */
493 if (RT_LIKELY(!pTbCollision))
494 Assert(cInserted == RT_ELEMENTS(apSortedTbs));
495 else
496 do
497 {
498 PIEMTB pTbToFree = pTbCollision;
499 pTbCollision = pTbToFree->pNext;
500 iemTbAllocatorFree(pVCpu, pTbToFree);
501 } while (pTbCollision);
502
503 /*
504 * Sort it by most recently used and usage count.
505 */
506 RTSortApvShell((void **)apSortedTbs, cInserted, iemTbCachePruneCmpTb, (void *)(uintptr_t)pVCpu->iem.s.msRecompilerPollNow);
507
508 /* We keep half the list for now. Perhaps a bit aggressive... */
509 uintptr_t const cKeep = cInserted / 2;
510
511 /* First free up the TBs we don't wish to keep (before creating the new
512 list because otherwise the free code will scan the list for each one
513 without ever finding it). */
514 for (uintptr_t idx = cKeep; idx < cInserted; idx++)
515 iemTbAllocatorFree(pVCpu, apSortedTbs[idx]);
516
517 /* Then chain the new TB together with the ones we like to keep of the
518 existing ones and insert this list into the hash table. */
519 pTbCollision = pTb;
520 for (uintptr_t idx = 0; idx < cKeep; idx++)
521 pTbCollision = pTbCollision->pNext = apSortedTbs[idx];
522 pTbCollision->pNext = NULL;
523
524 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, cKeep + 1);
525#ifdef VBOX_STRICT
526 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "add w/ pruning");
527#endif
528
529 STAM_PROFILE_STOP(&pTbCache->StatPrune, a);
530}
531
532
533static void iemTbCacheAdd(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb)
534{
535 uint32_t const idxHash = IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc);
536 PIEMTB const pTbOldHead = pTbCache->apHash[idxHash];
537 if (!pTbOldHead)
538 {
539 pTb->pNext = NULL;
540 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, 1); /** @todo could make 1 implicit... */
541 }
542 else
543 {
544 STAM_REL_COUNTER_INC(&pTbCache->cCollisions);
545 uintptr_t cCollisions = IEMTBCACHE_PTR_GET_COUNT(pTbOldHead);
546 if (cCollisions < IEMTBCACHE_PTR_MAX_COUNT)
547 {
548 pTb->pNext = IEMTBCACHE_PTR_GET_TB(pTbOldHead);
549 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, cCollisions + 1);
550#ifdef VBOX_STRICT
551 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "add");
552#endif
553 }
554 else
555 iemTbCacheAddWithPruning(pVCpu, pTbCache, pTb, idxHash);
556 }
557}
558
559
560/**
561 * Unlinks @a pTb from the hash table if found in it.
562 *
563 * @returns true if unlinked, false if not present.
564 * @param pTbCache The hash table.
565 * @param pTb The TB to remove.
566 */
567static bool iemTbCacheRemove(PIEMTBCACHE pTbCache, PIEMTB pTb)
568{
569 uint32_t const idxHash = IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc);
570 PIEMTB pTbHash = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
571 uint32_t volatile cLength = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]); RT_NOREF(cLength);
572
573 /*
574 * At the head of the collision list?
575 */
576 if (pTbHash == pTb)
577 {
578 if (!pTb->pNext)
579 pTbCache->apHash[idxHash] = NULL;
580 else
581 {
582 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb->pNext,
583 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - 1);
584#ifdef VBOX_STRICT
585 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "remove #1");
586#endif
587 }
588 return true;
589 }
590
591 /*
592 * Search the collision list.
593 */
594 PIEMTB const pTbHead = pTbHash;
595 while (pTbHash)
596 {
597 PIEMTB const pNextTb = pTbHash->pNext;
598 if (pNextTb == pTb)
599 {
600 pTbHash->pNext = pTb->pNext;
601 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTbHead, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - 1);
602#ifdef VBOX_STRICT
603 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "remove #2");
604#endif
605 return true;
606 }
607 pTbHash = pNextTb;
608 }
609 return false;
610}
611
612
613/**
614 * Looks up a TB for the given PC and flags in the cache.
615 *
616 * @returns Pointer to TB on success, NULL if not found.
617 * @param pVCpu The cross context virtual CPU structure of the
618 * calling thread.
619 * @param pTbCache The translation block cache.
620 * @param GCPhysPc The PC to look up a TB for.
621 * @param fExtraFlags The extra flags to join with IEMCPU::fExec for
622 * the lookup.
623 * @thread EMT(pVCpu)
624 */
625static PIEMTB iemTbCacheLookup(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache,
626 RTGCPHYS GCPhysPc, uint32_t fExtraFlags) IEM_NOEXCEPT_MAY_LONGJMP /** @todo r=bird: no longjumping here, right? iemNativeRecompile is noexcept. */
627{
628 uint32_t const fFlags = ((pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags) & IEMTB_F_KEY_MASK;
629
630 /*
631 * First consult the lookup table entry.
632 */
633 PIEMTB * const ppTbLookup = pVCpu->iem.s.ppTbLookupEntryR3;
634 PIEMTB pTb = *ppTbLookup;
635 if (pTb)
636 {
637 if (pTb->GCPhysPc == GCPhysPc)
638 {
639 if ( (pTb->fFlags & (IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)) == (fFlags | IEMTB_F_TYPE_NATIVE)
640 || (pTb->fFlags & (IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)) == (fFlags | IEMTB_F_TYPE_THREADED) )
641 {
642 if (pTb->x86.fAttr == (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u)
643 {
644 STAM_COUNTER_INC(&pTbCache->cLookupHitsViaTbLookupTable);
645 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
646 pTb->cUsed++;
647#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
648 if ((pTb->fFlags & IEMTB_F_TYPE_NATIVE) || pTb->cUsed != pVCpu->iem.s.uTbNativeRecompileAtUsedCount)
649 {
650 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp: %p (@ %p)\n", fFlags, GCPhysPc, pTb, ppTbLookup));
651 return pTb;
652 }
653 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp: %p (@ %p) - recompiling\n", fFlags, GCPhysPc, pTb, ppTbLookup));
654# ifdef VBOX_WITH_SAVE_THREADED_TBS_FOR_PROFILING
655 iemThreadedSaveTbForProfiling(pVCpu, pTb);
656# endif
657 return iemNativeRecompile(pVCpu, pTb);
658#else
659 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp: %p (@ %p)\n", fFlags, GCPhysPc, pTb, ppTbLookup));
660 return pTb;
661#endif
662 }
663 }
664 }
665 }
666
667 /*
668 * Then consult the hash table.
669 */
670 uint32_t const idxHash = IEMTBCACHE_HASH_NO_KEY_MASK(pTbCache, fFlags, GCPhysPc);
671#if defined(VBOX_STRICT) || defined(LOG_ENABLED)
672 int cLeft = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]);
673#endif
674 pTb = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
675 while (pTb)
676 {
677 if (pTb->GCPhysPc == GCPhysPc)
678 {
679 if ((pTb->fFlags & IEMTB_F_KEY_MASK) == fFlags)
680 {
681 if (pTb->x86.fAttr == (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u)
682 {
683 STAM_COUNTER_INC(&pTbCache->cLookupHits);
684 AssertMsg(cLeft > 0, ("%d\n", cLeft));
685
686 *ppTbLookup = pTb;
687 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
688 pTb->cUsed++;
689#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
690 if ((pTb->fFlags & IEMTB_F_TYPE_NATIVE) || pTb->cUsed != pVCpu->iem.s.uTbNativeRecompileAtUsedCount)
691 {
692 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: %p (@ %d / %d)\n",
693 fFlags, GCPhysPc, idxHash, pTb, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - cLeft,
694 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
695 return pTb;
696 }
697 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: %p (@ %d / %d) - recompiling\n",
698 fFlags, GCPhysPc, idxHash, pTb, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - cLeft,
699 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
700 return iemNativeRecompile(pVCpu, pTb);
701#else
702 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: %p (@ %d / %d)\n",
703 fFlags, GCPhysPc, idxHash, pTb, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - cLeft,
704 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
705 return pTb;
706#endif
707 }
708 Log11(("TB miss: CS: %#x, wanted %#x\n", pTb->x86.fAttr, (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u));
709 }
710 else
711 Log11(("TB miss: fFlags: %#x, wanted %#x\n", pTb->fFlags, fFlags));
712 }
713 else
714 Log11(("TB miss: GCPhysPc: %#x, wanted %#x\n", pTb->GCPhysPc, GCPhysPc));
715
716 pTb = pTb->pNext;
717#ifdef VBOX_STRICT
718 cLeft--;
719#endif
720 }
721 AssertMsg(cLeft == 0, ("%d\n", cLeft));
722 STAM_REL_COUNTER_INC(&pTbCache->cLookupMisses);
723 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: NULL - (%p L %d)\n", fFlags, GCPhysPc, idxHash,
724 IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]), IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
725 return pTb;
726}
727
728
729/*********************************************************************************************************************************
730* Translation Block Allocator.
731*********************************************************************************************************************************/
732/*
733 * Translation block allocationmanagement.
734 */
735
736#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
737# define IEMTBALLOC_IDX_TO_CHUNK(a_pTbAllocator, a_idxTb) \
738 ((a_idxTb) >> (a_pTbAllocator)->cChunkShift)
739# define IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(a_pTbAllocator, a_idxTb, a_idxChunk) \
740 ((a_idxTb) & (a_pTbAllocator)->fChunkMask)
741# define IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) \
742 ((uint32_t)(a_idxChunk) << (a_pTbAllocator)->cChunkShift)
743#else
744# define IEMTBALLOC_IDX_TO_CHUNK(a_pTbAllocator, a_idxTb) \
745 ((a_idxTb) / (a_pTbAllocator)->cTbsPerChunk)
746# define IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(a_pTbAllocator, a_idxTb, a_idxChunk) \
747 ((a_idxTb) - (a_idxChunk) * (a_pTbAllocator)->cTbsPerChunk)
748# define IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) \
749 ((uint32_t)(a_idxChunk) * (a_pTbAllocator)->cTbsPerChunk)
750#endif
751/** Makes a TB index from a chunk index and TB index within that chunk. */
752#define IEMTBALLOC_IDX_MAKE(a_pTbAllocator, a_idxChunk, a_idxInChunk) \
753 (IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) + (a_idxInChunk))
754
755
756/**
757 * Initializes the TB allocator and cache for an EMT.
758 *
759 * @returns VBox status code.
760 * @param pVM The VM handle.
761 * @param cInitialTbs The initial number of translation blocks to
762 * preallocator.
763 * @param cMaxTbs The max number of translation blocks allowed.
764 * @param cbInitialExec The initial size of the executable memory allocator.
765 * @param cbMaxExec The max size of the executable memory allocator.
766 * @param cbChunkExec The chunk size for executable memory allocator. Zero
767 * or UINT32_MAX for automatically determining this.
768 * @thread EMT
769 */
770DECLCALLBACK(int) iemTbInit(PVMCC pVM, uint32_t cInitialTbs, uint32_t cMaxTbs,
771 uint64_t cbInitialExec, uint64_t cbMaxExec, uint32_t cbChunkExec)
772{
773 PVMCPUCC pVCpu = VMMGetCpu(pVM);
774 Assert(!pVCpu->iem.s.pTbCacheR3);
775 Assert(!pVCpu->iem.s.pTbAllocatorR3);
776
777 /*
778 * Calculate the chunk size of the TB allocator.
779 * The minimum chunk size is 2MiB.
780 */
781 AssertCompile(!(sizeof(IEMTB) & IEMTBCACHE_PTR_COUNT_MASK));
782 uint32_t cbPerChunk = _2M;
783 uint32_t cTbsPerChunk = _2M / sizeof(IEMTB);
784#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
785 uint8_t const cTbShift = ASMBitFirstSetU32((uint32_t)sizeof(IEMTB)) - 1;
786 uint8_t cChunkShift = 21 - cTbShift;
787 AssertCompile(RT_BIT_32(21) == _2M); Assert(RT_BIT_32(cChunkShift) == cTbsPerChunk);
788#endif
789 for (;;)
790 {
791 if (cMaxTbs <= cTbsPerChunk * (uint64_t)RT_ELEMENTS(pVCpu->iem.s.pTbAllocatorR3->aChunks))
792 break;
793 cbPerChunk *= 2;
794 cTbsPerChunk = cbPerChunk / sizeof(IEMTB);
795#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
796 cChunkShift += 1;
797#endif
798 }
799
800 uint32_t cMaxChunks = (cMaxTbs + cTbsPerChunk - 1) / cTbsPerChunk;
801 Assert(cMaxChunks * cTbsPerChunk >= cMaxTbs);
802 Assert(cMaxChunks <= RT_ELEMENTS(pVCpu->iem.s.pTbAllocatorR3->aChunks));
803
804 cMaxTbs = cMaxChunks * cTbsPerChunk;
805
806 /*
807 * Allocate and initalize it.
808 */
809 PIEMTBALLOCATOR const pTbAllocator = (PIEMTBALLOCATOR)RTMemAllocZ(sizeof(*pTbAllocator));
810 if (!pTbAllocator)
811 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
812 "Failed to allocate %zu bytes (max %u TBs) for the TB allocator of VCpu #%u",
813 sizeof(*pTbAllocator), cMaxTbs, pVCpu->idCpu);
814 pTbAllocator->uMagic = IEMTBALLOCATOR_MAGIC;
815 pTbAllocator->cMaxChunks = (uint8_t)cMaxChunks;
816 pTbAllocator->cTbsPerChunk = cTbsPerChunk;
817 pTbAllocator->cbPerChunk = cbPerChunk;
818 pTbAllocator->cMaxTbs = cMaxTbs;
819 pTbAllocator->pTbsFreeHead = NULL;
820#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
821 pTbAllocator->fChunkMask = cTbsPerChunk - 1;
822 pTbAllocator->cChunkShift = cChunkShift;
823 Assert(RT_BIT_32(cChunkShift) == cTbsPerChunk);
824#endif
825
826 pVCpu->iem.s.pTbAllocatorR3 = pTbAllocator;
827
828 /*
829 * Allocate the initial chunks.
830 */
831 for (uint32_t idxChunk = 0; ; idxChunk++)
832 {
833 PIEMTB const paTbs = pTbAllocator->aChunks[idxChunk].paTbs = (PIEMTB)RTMemPageAllocZ(cbPerChunk);
834 if (!paTbs)
835 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
836 "Failed to initial %zu bytes for the #%u chunk of TBs for VCpu #%u",
837 cbPerChunk, idxChunk, pVCpu->idCpu);
838
839 for (uint32_t iTb = 0; iTb < cTbsPerChunk; iTb++)
840 {
841 paTbs[iTb].idxAllocChunk = idxChunk; /* This is not strictly necessary... */
842 paTbs[iTb].pNext = pTbAllocator->pTbsFreeHead;
843 pTbAllocator->pTbsFreeHead = &paTbs[iTb];
844 }
845 pTbAllocator->cAllocatedChunks = (uint16_t)(idxChunk + 1);
846 pTbAllocator->cTotalTbs += cTbsPerChunk;
847
848 if ((idxChunk + 1) * cTbsPerChunk >= cInitialTbs)
849 break;
850 }
851
852 /*
853 * Calculate the size of the hash table. We double the max TB count and
854 * round it up to the nearest power of two.
855 */
856 uint32_t cCacheEntries = cMaxTbs * 2;
857 if (!RT_IS_POWER_OF_TWO(cCacheEntries))
858 {
859 uint8_t const iBitTop = ASMBitFirstSetU32(cCacheEntries);
860 cCacheEntries = RT_BIT_32(iBitTop);
861 Assert(cCacheEntries >= cMaxTbs * 2);
862 }
863
864 size_t const cbTbCache = RT_UOFFSETOF_DYN(IEMTBCACHE, apHash[cCacheEntries]);
865 PIEMTBCACHE const pTbCache = (PIEMTBCACHE)RTMemAllocZ(cbTbCache);
866 if (!pTbCache)
867 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
868 "Failed to allocate %zu bytes (%u entries) for the TB cache of VCpu #%u",
869 cbTbCache, cCacheEntries, pVCpu->idCpu);
870
871 /*
872 * Initialize it (assumes zeroed by the allocator).
873 */
874 pTbCache->uMagic = IEMTBCACHE_MAGIC;
875 pTbCache->cHash = cCacheEntries;
876 pTbCache->uHashMask = cCacheEntries - 1;
877 Assert(pTbCache->cHash > pTbCache->uHashMask);
878 pVCpu->iem.s.pTbCacheR3 = pTbCache;
879
880 /*
881 * Initialize the native executable memory allocator.
882 */
883#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
884 int rc = iemExecMemAllocatorInit(pVCpu, cbMaxExec, cbInitialExec, cbChunkExec);
885 AssertLogRelRCReturn(rc, rc);
886#else
887 RT_NOREF(cbMaxExec, cbInitialExec, cbChunkExec);
888#endif
889
890 return VINF_SUCCESS;
891}
892
893
894/**
895 * Inner free worker.
896 *
897 * The @a a_fType parameter allows us to eliminate the type check when we know
898 * which type of TB is being freed.
899 */
900template<uint32_t a_fType>
901DECL_FORCE_INLINE(void)
902iemTbAllocatorFreeInner(PVMCPUCC pVCpu, PIEMTBALLOCATOR pTbAllocator, PIEMTB pTb, uint32_t idxChunk, uint32_t idxInChunk)
903{
904#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
905 AssertCompile(a_fType == 0 || a_fType == IEMTB_F_TYPE_THREADED || a_fType == IEMTB_F_TYPE_NATIVE);
906#else
907 AssertCompile(a_fType == 0 || a_fType == IEMTB_F_TYPE_THREADED);
908#endif
909 Assert(idxChunk < pTbAllocator->cAllocatedChunks); RT_NOREF(idxChunk);
910 Assert(idxInChunk < pTbAllocator->cTbsPerChunk); RT_NOREF(idxInChunk);
911 Assert((uintptr_t)(pTb - pTbAllocator->aChunks[idxChunk].paTbs) == idxInChunk);
912#ifdef VBOX_STRICT
913 for (PIEMTB pTbOther = pTbAllocator->pDelayedFreeHead; pTbOther; pTbOther = pTbOther->pNext)
914 Assert(pTbOther != pTb);
915#endif
916
917 /*
918 * Unlink the TB from the hash table.
919 */
920 iemTbCacheRemove(pVCpu->iem.s.pTbCacheR3, pTb);
921
922 /*
923 * Free the TB itself.
924 */
925 if RT_CONSTEXPR_IF(a_fType == 0)
926 switch (pTb->fFlags & IEMTB_F_TYPE_MASK)
927 {
928 case IEMTB_F_TYPE_THREADED:
929 pTbAllocator->cThreadedTbs -= 1;
930 RTMemFree(pTb->Thrd.paCalls);
931 break;
932#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
933 case IEMTB_F_TYPE_NATIVE:
934 pTbAllocator->cNativeTbs -= 1;
935 iemExecMemAllocatorFree(pVCpu, pTb->Native.paInstructions,
936 pTb->Native.cInstructions * sizeof(pTb->Native.paInstructions[0]));
937 pTb->Native.paInstructions = NULL; /* required by iemExecMemAllocatorPrune */
938 break;
939#endif
940 default:
941 AssertFailed();
942 }
943#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
944 else if RT_CONSTEXPR_IF(a_fType == IEMTB_F_TYPE_NATIVE)
945 {
946 Assert((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
947 pTbAllocator->cNativeTbs -= 1;
948 iemExecMemAllocatorFree(pVCpu, pTb->Native.paInstructions,
949 pTb->Native.cInstructions * sizeof(pTb->Native.paInstructions[0]));
950 pTb->Native.paInstructions = NULL; /* required by iemExecMemAllocatorPrune */
951 }
952#endif
953 else
954 {
955 Assert((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_THREADED);
956 pTbAllocator->cThreadedTbs -= 1;
957 RTMemFree(pTb->Thrd.paCalls);
958 }
959
960 RTMemFree(IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, 0)); /* Frees both the TB lookup table and opcode bytes. */
961
962 pTb->pNext = pTbAllocator->pTbsFreeHead;
963 pTbAllocator->pTbsFreeHead = pTb;
964 pTb->fFlags = 0;
965 pTb->GCPhysPc = UINT64_MAX;
966 pTb->Gen.uPtr = 0;
967 pTb->Gen.uData = 0;
968 pTb->cTbLookupEntries = 0;
969 pTb->cbOpcodes = 0;
970 pTb->pabOpcodes = NULL;
971
972 Assert(pTbAllocator->cInUseTbs > 0);
973
974 pTbAllocator->cInUseTbs -= 1;
975 STAM_REL_COUNTER_INC(&pTbAllocator->StatFrees);
976}
977
978
979/**
980 * Frees the given TB.
981 *
982 * @param pVCpu The cross context virtual CPU structure of the calling
983 * thread.
984 * @param pTb The translation block to free.
985 * @thread EMT(pVCpu)
986 */
987DECLHIDDEN(void) iemTbAllocatorFree(PVMCPUCC pVCpu, PIEMTB pTb)
988{
989 /*
990 * Validate state.
991 */
992 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
993 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
994 uint8_t const idxChunk = pTb->idxAllocChunk;
995 AssertLogRelReturnVoid(idxChunk < pTbAllocator->cAllocatedChunks);
996 uintptr_t const idxInChunk = pTb - pTbAllocator->aChunks[idxChunk].paTbs;
997 AssertLogRelReturnVoid(idxInChunk < pTbAllocator->cTbsPerChunk);
998
999 /*
1000 * Invalidate the TB lookup pointer and call the inner worker.
1001 */
1002 pVCpu->iem.s.ppTbLookupEntryR3 = &pVCpu->iem.s.pTbLookupEntryDummyR3;
1003 iemTbAllocatorFreeInner<0>(pVCpu, pTbAllocator, pTb, idxChunk, (uint32_t)idxInChunk);
1004}
1005
1006#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
1007
1008/**
1009 * Interface used by iemExecMemAllocatorPrune.
1010 */
1011DECLHIDDEN(void) iemTbAllocatorFreeBulk(PVMCPUCC pVCpu, PIEMTBALLOCATOR pTbAllocator, PIEMTB pTb)
1012{
1013 Assert(pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1014
1015 uint8_t const idxChunk = pTb->idxAllocChunk;
1016 AssertLogRelReturnVoid(idxChunk < pTbAllocator->cAllocatedChunks);
1017 uintptr_t const idxInChunk = pTb - pTbAllocator->aChunks[idxChunk].paTbs;
1018 AssertLogRelReturnVoid(idxInChunk < pTbAllocator->cTbsPerChunk);
1019
1020 iemTbAllocatorFreeInner<IEMTB_F_TYPE_NATIVE>(pVCpu, pTbAllocator, pTb, idxChunk, (uint32_t)idxInChunk);
1021}
1022
1023
1024/**
1025 * Interface used by iemExecMemAllocatorPrune.
1026 */
1027DECLHIDDEN(PIEMTBALLOCATOR) iemTbAllocatorFreeBulkStart(PVMCPUCC pVCpu)
1028{
1029 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1030 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1031
1032 iemTbAllocatorProcessDelayedFrees(pVCpu, pTbAllocator);
1033
1034 /* It should be sufficient to do this once. */
1035 pVCpu->iem.s.ppTbLookupEntryR3 = &pVCpu->iem.s.pTbLookupEntryDummyR3;
1036
1037 return pTbAllocator;
1038}
1039
1040#endif /* VBOX_WITH_IEM_NATIVE_RECOMPILER */
1041
1042/**
1043 * Schedules a TB for freeing when it's not longer being executed and/or part of
1044 * the caller's call stack.
1045 *
1046 * The TB will be removed from the translation block cache, though, so it isn't
1047 * possible to executed it again and the IEMTB::pNext member can be used to link
1048 * it together with other TBs awaiting freeing.
1049 *
1050 * @param pVCpu The cross context virtual CPU structure of the calling
1051 * thread.
1052 * @param pTb The translation block to schedule for freeing.
1053 */
1054static void iemTbAlloctorScheduleForFree(PVMCPUCC pVCpu, PIEMTB pTb)
1055{
1056 /*
1057 * Validate state.
1058 */
1059 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1060 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1061 Assert(pTb->idxAllocChunk < pTbAllocator->cAllocatedChunks);
1062 Assert((uintptr_t)(pTb - pTbAllocator->aChunks[pTb->idxAllocChunk].paTbs) < pTbAllocator->cTbsPerChunk);
1063 Assert( (pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE
1064 || (pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_THREADED);
1065#ifdef VBOX_STRICT
1066 for (PIEMTB pTbOther = pTbAllocator->pDelayedFreeHead; pTbOther; pTbOther = pTbOther->pNext)
1067 Assert(pTbOther != pTb);
1068#endif
1069
1070 /*
1071 * Remove it from the cache and prepend it to the allocator's todo list.
1072 *
1073 * Note! It could still be in various lookup tables, so we trash the GCPhys
1074 * and CS attribs to ensure it won't be reused.
1075 */
1076 iemTbCacheRemove(pVCpu->iem.s.pTbCacheR3, pTb);
1077 pTb->GCPhysPc = NIL_RTGCPHYS;
1078 pTb->x86.fAttr = UINT16_MAX;
1079
1080 pTb->pNext = pTbAllocator->pDelayedFreeHead;
1081 pTbAllocator->pDelayedFreeHead = pTb;
1082}
1083
1084
1085/**
1086 * Processes the delayed frees.
1087 *
1088 * This is called by the allocator function as well as the native recompile
1089 * function before making any TB or executable memory allocations respectively.
1090 */
1091void iemTbAllocatorProcessDelayedFrees(PVMCPUCC pVCpu, PIEMTBALLOCATOR pTbAllocator)
1092{
1093 /** @todo r-bird: these have already been removed from the cache,
1094 * iemTbAllocatorFree/Inner redoes that, which is a waste of time. */
1095 PIEMTB pTb = pTbAllocator->pDelayedFreeHead;
1096 pTbAllocator->pDelayedFreeHead = NULL;
1097 while (pTb)
1098 {
1099 PIEMTB const pTbNext = pTb->pNext;
1100 Assert(pVCpu->iem.s.pCurTbR3 != pTb);
1101 iemTbAllocatorFree(pVCpu, pTb);
1102 pTb = pTbNext;
1103 }
1104}
1105
1106
1107#if 0
1108/**
1109 * Frees all TBs.
1110 */
1111static int iemTbAllocatorFreeAll(PVMCPUCC pVCpu)
1112{
1113 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1114 AssertReturn(pTbAllocator, VERR_WRONG_ORDER);
1115 AssertReturn(pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC, VERR_INVALID_MAGIC);
1116
1117 iemTbAllocatorProcessDelayedFrees(pVCpu, pTbAllocator);
1118
1119 uint32_t idxChunk = pTbAllocator->cAllocatedChunks;
1120 while (idxChunk-- > 0)
1121 {
1122 PIEMTB const paTbs = pTbAllocator->aChunks[idxChunk].paTbs;
1123 uint32_t idxTb = pTbAllocator->cTbsPerChunk;
1124 while (idxTb-- > 0)
1125 {
1126 PIEMTB const pTb = &paTbs[idxTb];
1127 if (pTb->fFlags)
1128 iemTbAllocatorFreeInner<0>(pVCpu, pTbAllocator, pTb, idxChunk, idxTb);
1129 }
1130 }
1131
1132 pVCpu->iem.s.ppTbLookupEntryR3 = &pVCpu->iem.s.pTbLookupEntryDummyR3;
1133
1134# if 1
1135 /* Reset the free list. */
1136 pTbAllocator->pTbsFreeHead = NULL;
1137 idxChunk = pTbAllocator->cAllocatedChunks;
1138 while (idxChunk-- > 0)
1139 {
1140 uint32_t const cTbsPerChunk = pTbAllocator->cTbsPerChunk;
1141 PIEMTB const paTbs = pTbAllocator->aChunks[idxChunk].paTbs;
1142 RT_BZERO(paTbs, sizeof(paTbs[0]) * cTbsPerChunk);
1143 for (uint32_t idxTb = 0; idxTb < cTbsPerChunk; idxTb++)
1144 {
1145 paTbs[idxTb].idxAllocChunk = idxChunk; /* This is not strictly necessary... */
1146 paTbs[idxTb].pNext = pTbAllocator->pTbsFreeHead;
1147 pTbAllocator->pTbsFreeHead = &paTbs[idxTb];
1148 }
1149 }
1150# endif
1151
1152# if 1
1153 /* Completely reset the TB cache. */
1154 RT_BZERO(pVCpu->iem.s.pTbCacheR3->apHash, sizeof(pVCpu->iem.s.pTbCacheR3->apHash[0]) * pVCpu->iem.s.pTbCacheR3->cHash);
1155# endif
1156
1157 return VINF_SUCCESS;
1158}
1159#endif
1160
1161
1162/**
1163 * Grow the translation block allocator with another chunk.
1164 */
1165static int iemTbAllocatorGrow(PVMCPUCC pVCpu)
1166{
1167 /*
1168 * Validate state.
1169 */
1170 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1171 AssertReturn(pTbAllocator, VERR_WRONG_ORDER);
1172 AssertReturn(pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC, VERR_INVALID_MAGIC);
1173 uint32_t const idxChunk = pTbAllocator->cAllocatedChunks;
1174 AssertReturn(idxChunk < pTbAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1175
1176 /*
1177 * Allocate a new chunk and add it to the allocator.
1178 */
1179 PIEMTB const paTbs = (PIEMTB)RTMemPageAllocZ(pTbAllocator->cbPerChunk);
1180 AssertLogRelReturn(paTbs, VERR_NO_PAGE_MEMORY);
1181 pTbAllocator->aChunks[idxChunk].paTbs = paTbs;
1182
1183 uint32_t const cTbsPerChunk = pTbAllocator->cTbsPerChunk;
1184 for (uint32_t iTb = 0; iTb < cTbsPerChunk; iTb++)
1185 {
1186 paTbs[iTb].idxAllocChunk = idxChunk; /* This is not strictly necessary... */
1187 paTbs[iTb].pNext = pTbAllocator->pTbsFreeHead;
1188 pTbAllocator->pTbsFreeHead = &paTbs[iTb];
1189 }
1190 pTbAllocator->cAllocatedChunks = (uint16_t)(idxChunk + 1);
1191 pTbAllocator->cTotalTbs += cTbsPerChunk;
1192
1193 return VINF_SUCCESS;
1194}
1195
1196
1197/**
1198 * Allocates a TB from allocator with free block.
1199 *
1200 * This is common code to both the fast and slow allocator code paths.
1201 */
1202DECL_FORCE_INLINE(PIEMTB) iemTbAllocatorAllocCore(PIEMTBALLOCATOR const pTbAllocator, bool fThreaded)
1203{
1204 Assert(pTbAllocator->cInUseTbs < pTbAllocator->cTotalTbs);
1205 Assert(pTbAllocator->pTbsFreeHead);
1206
1207 PIEMTB const pTb = pTbAllocator->pTbsFreeHead;
1208 pTbAllocator->pTbsFreeHead = pTb->pNext;
1209 pTbAllocator->cInUseTbs += 1;
1210 if (fThreaded)
1211 pTbAllocator->cThreadedTbs += 1;
1212 else
1213 pTbAllocator->cNativeTbs += 1;
1214 STAM_REL_COUNTER_INC(&pTbAllocator->StatAllocs);
1215 return pTb;
1216}
1217
1218
1219/**
1220 * Slow path for iemTbAllocatorAlloc.
1221 */
1222static PIEMTB iemTbAllocatorAllocSlow(PVMCPUCC pVCpu, PIEMTBALLOCATOR const pTbAllocator, bool fThreaded)
1223{
1224 /*
1225 * With some luck we can add another chunk.
1226 */
1227 if (pTbAllocator->cAllocatedChunks < pTbAllocator->cMaxChunks)
1228 {
1229 int rc = iemTbAllocatorGrow(pVCpu);
1230 if (RT_SUCCESS(rc))
1231 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1232 }
1233
1234 /*
1235 * We have to prune stuff. Sigh.
1236 *
1237 * This requires scanning for older TBs and kick them out. Not sure how to
1238 * best do this as we don't want to maintain any list of TBs ordered by last
1239 * usage time. But one reasonably simple approach would be that each time we
1240 * get here we continue a sequential scan of the allocation chunks,
1241 * considering just a smallish number of TBs and freeing a fixed portion of
1242 * them. Say, we consider the next 128 TBs, freeing the least recently used
1243 * in out of groups of 4 TBs, resulting in 32 free TBs.
1244 */
1245 STAM_PROFILE_START(&pTbAllocator->StatPrune, a);
1246 uint32_t const msNow = pVCpu->iem.s.msRecompilerPollNow;
1247 uint32_t const cTbsToPrune = 128;
1248 uint32_t const cTbsPerGroup = 4;
1249 uint32_t cFreedTbs = 0;
1250#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
1251 uint32_t idxTbPruneFrom = pTbAllocator->iPruneFrom & ~(uint32_t)(cTbsToPrune - 1); /* Stay within a chunk! */
1252#else
1253 uint32_t idxTbPruneFrom = pTbAllocator->iPruneFrom;
1254#endif
1255 if (idxTbPruneFrom >= pTbAllocator->cMaxTbs)
1256 idxTbPruneFrom = 0;
1257 for (uint32_t i = 0; i < cTbsToPrune; i += cTbsPerGroup, idxTbPruneFrom += cTbsPerGroup)
1258 {
1259 uint32_t idxChunk = IEMTBALLOC_IDX_TO_CHUNK(pTbAllocator, idxTbPruneFrom);
1260 uint32_t idxInChunk = IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(pTbAllocator, idxTbPruneFrom, idxChunk);
1261 PIEMTB pTb = &pTbAllocator->aChunks[idxChunk].paTbs[idxInChunk];
1262 uint32_t cMsAge = msNow - pTb->msLastUsed;
1263 Assert(pTb->fFlags & IEMTB_F_TYPE_MASK);
1264
1265 for (uint32_t j = 1, idxChunk2 = idxChunk, idxInChunk2 = idxInChunk + 1; j < cTbsPerGroup; j++, idxInChunk2++)
1266 {
1267#ifndef IEMTB_SIZE_IS_POWER_OF_TWO
1268 if (idxInChunk2 < pTbAllocator->cTbsPerChunk)
1269 { /* likely */ }
1270 else
1271 {
1272 idxInChunk2 = 0;
1273 idxChunk2 += 1;
1274 if (idxChunk2 >= pTbAllocator->cAllocatedChunks)
1275 idxChunk2 = 0;
1276 }
1277#endif
1278 PIEMTB const pTb2 = &pTbAllocator->aChunks[idxChunk2].paTbs[idxInChunk2];
1279 uint32_t const cMsAge2 = msNow - pTb2->msLastUsed;
1280 if ( cMsAge2 > cMsAge
1281 || (cMsAge2 == cMsAge && pTb2->cUsed < pTb->cUsed))
1282 {
1283 Assert(pTb2->fFlags & IEMTB_F_TYPE_MASK);
1284 pTb = pTb2;
1285 idxChunk = idxChunk2;
1286 idxInChunk = idxInChunk2;
1287 cMsAge = cMsAge2;
1288 }
1289 }
1290
1291 /* Free the TB. */
1292 iemTbAllocatorFreeInner<0>(pVCpu, pTbAllocator, pTb, idxChunk, idxInChunk);
1293 cFreedTbs++; /* paranoia */
1294 }
1295 pTbAllocator->iPruneFrom = idxTbPruneFrom;
1296 STAM_PROFILE_STOP(&pTbAllocator->StatPrune, a);
1297
1298 /* Flush the TB lookup entry pointer. */
1299 pVCpu->iem.s.ppTbLookupEntryR3 = &pVCpu->iem.s.pTbLookupEntryDummyR3;
1300
1301 /*
1302 * Allocate a TB from the ones we've pruned.
1303 */
1304 if (cFreedTbs)
1305 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1306 return NULL;
1307}
1308
1309
1310/**
1311 * Allocate a translation block.
1312 *
1313 * @returns Pointer to block on success, NULL if we're out and is unable to
1314 * free up an existing one (very unlikely once implemented).
1315 * @param pVCpu The cross context virtual CPU structure of the calling
1316 * thread.
1317 * @param fThreaded Set if threaded TB being allocated, clear if native TB.
1318 * For statistics.
1319 */
1320DECL_FORCE_INLINE(PIEMTB) iemTbAllocatorAlloc(PVMCPUCC pVCpu, bool fThreaded)
1321{
1322 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1323 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1324
1325 /* Free any pending TBs before we proceed. */
1326 if (!pTbAllocator->pDelayedFreeHead)
1327 { /* probably likely */ }
1328 else
1329 iemTbAllocatorProcessDelayedFrees(pVCpu, pTbAllocator);
1330
1331 /* If the allocator is full, take slow code path.*/
1332 if (RT_LIKELY(pTbAllocator->cInUseTbs < pTbAllocator->cTotalTbs))
1333 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1334 return iemTbAllocatorAllocSlow(pVCpu, pTbAllocator, fThreaded);
1335}
1336
1337
1338#if 0 /*def VBOX_WITH_IEM_NATIVE_RECOMPILER*/
1339/**
1340 * This is called when we're out of space for native TBs.
1341 *
1342 * This uses a variation on the pruning in iemTbAllocatorAllocSlow.
1343 * The difference is that we only prune native TBs and will only free any if
1344 * there are least two in a group. The conditions under which we're called are
1345 * different - there will probably be free TBs in the table when we're called.
1346 * Therefore we increase the group size and max scan length, though we'll stop
1347 * scanning once we've reached the requested size (@a cNeededInstrs) and freed
1348 * up at least 8 TBs.
1349 */
1350void iemTbAllocatorFreeupNativeSpace(PVMCPUCC pVCpu, uint32_t cNeededInstrs)
1351{
1352 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1353 AssertReturnVoid(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1354
1355 STAM_REL_PROFILE_START(&pTbAllocator->StatPruneNative, a);
1356
1357 /*
1358 * Flush the delayed free list before we start freeing TBs indiscriminately.
1359 */
1360 iemTbAllocatorProcessDelayedFrees(pVCpu, pTbAllocator);
1361
1362 /*
1363 * Scan and free TBs.
1364 */
1365 uint32_t const msNow = pVCpu->iem.s.msRecompilerPollNow;
1366 uint32_t const cTbsToPrune = 128 * 8;
1367 uint32_t const cTbsPerGroup = 4 * 4;
1368 uint32_t cFreedTbs = 0;
1369 uint32_t cMaxInstrs = 0;
1370 uint32_t idxTbPruneFrom = pTbAllocator->iPruneNativeFrom & ~(uint32_t)(cTbsPerGroup - 1);
1371 for (uint32_t i = 0; i < cTbsToPrune; i += cTbsPerGroup, idxTbPruneFrom += cTbsPerGroup)
1372 {
1373 if (idxTbPruneFrom >= pTbAllocator->cTotalTbs)
1374 idxTbPruneFrom = 0;
1375 uint32_t idxChunk = IEMTBALLOC_IDX_TO_CHUNK(pTbAllocator, idxTbPruneFrom);
1376 uint32_t idxInChunk = IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(pTbAllocator, idxTbPruneFrom, idxChunk);
1377 PIEMTB pTb = &pTbAllocator->aChunks[idxChunk].paTbs[idxInChunk];
1378 uint32_t cMsAge = pTb->fFlags & IEMTB_F_TYPE_NATIVE ? msNow - pTb->msLastUsed : msNow;
1379 uint8_t cNativeTbs = (pTb->fFlags & IEMTB_F_TYPE_NATIVE) != 0;
1380
1381 for (uint32_t j = 1, idxChunk2 = idxChunk, idxInChunk2 = idxInChunk + 1; j < cTbsPerGroup; j++, idxInChunk2++)
1382 {
1383 if (idxInChunk2 < pTbAllocator->cTbsPerChunk)
1384 { /* likely */ }
1385 else
1386 {
1387 idxInChunk2 = 0;
1388 idxChunk2 += 1;
1389 if (idxChunk2 >= pTbAllocator->cAllocatedChunks)
1390 idxChunk2 = 0;
1391 }
1392 PIEMTB const pTb2 = &pTbAllocator->aChunks[idxChunk2].paTbs[idxInChunk2];
1393 if (pTb2->fFlags & IEMTB_F_TYPE_NATIVE)
1394 {
1395 cNativeTbs += 1;
1396 uint32_t const cMsAge2 = msNow - pTb2->msLastUsed;
1397 if ( cMsAge2 > cMsAge
1398 || ( cMsAge2 == cMsAge
1399 && ( pTb2->cUsed < pTb->cUsed
1400 || ( pTb2->cUsed == pTb->cUsed
1401 && pTb2->Native.cInstructions > pTb->Native.cInstructions)))
1402 || !(pTb->fFlags & IEMTB_F_TYPE_NATIVE))
1403 {
1404 pTb = pTb2;
1405 idxChunk = idxChunk2;
1406 idxInChunk = idxInChunk2;
1407 cMsAge = cMsAge2;
1408 }
1409 }
1410 }
1411
1412 /* Free the TB if we found at least two native one in this group. */
1413 if (cNativeTbs >= 2)
1414 {
1415 cMaxInstrs = RT_MAX(cMaxInstrs, pTb->Native.cInstructions);
1416 iemTbAllocatorFreeInner<IEMTB_F_TYPE_NATIVE>(pVCpu, pTbAllocator, pTb, idxChunk, idxInChunk);
1417 cFreedTbs++;
1418 if (cFreedTbs >= 8 && cMaxInstrs >= cNeededInstrs)
1419 break;
1420 }
1421 }
1422 pTbAllocator->iPruneNativeFrom = idxTbPruneFrom;
1423
1424 STAM_REL_PROFILE_STOP(&pTbAllocator->StatPruneNative, a);
1425}
1426#endif /* unused / VBOX_WITH_IEM_NATIVE_RECOMPILER */
1427
1428
1429/*********************************************************************************************************************************
1430* Threaded Recompiler Core *
1431*********************************************************************************************************************************/
1432/**
1433 * Formats TB flags (IEM_F_XXX and IEMTB_F_XXX) to string.
1434 * @returns pszBuf.
1435 * @param fFlags The flags.
1436 * @param pszBuf The output buffer.
1437 * @param cbBuf The output buffer size. At least 32 bytes.
1438 */
1439DECLHIDDEN(const char *) iemTbFlagsToString(uint32_t fFlags, char *pszBuf, size_t cbBuf) RT_NOEXCEPT
1440{
1441 Assert(cbBuf >= 32);
1442 static RTSTRTUPLE const s_aModes[] =
1443 {
1444 /* [00] = */ { RT_STR_TUPLE("16BIT") },
1445 /* [01] = */ { RT_STR_TUPLE("32BIT") },
1446 /* [02] = */ { RT_STR_TUPLE("!2!") },
1447 /* [03] = */ { RT_STR_TUPLE("!3!") },
1448 /* [04] = */ { RT_STR_TUPLE("16BIT_PRE_386") },
1449 /* [05] = */ { RT_STR_TUPLE("32BIT_FLAT") },
1450 /* [06] = */ { RT_STR_TUPLE("!6!") },
1451 /* [07] = */ { RT_STR_TUPLE("!7!") },
1452 /* [08] = */ { RT_STR_TUPLE("16BIT_PROT") },
1453 /* [09] = */ { RT_STR_TUPLE("32BIT_PROT") },
1454 /* [0a] = */ { RT_STR_TUPLE("64BIT") },
1455 /* [0b] = */ { RT_STR_TUPLE("!b!") },
1456 /* [0c] = */ { RT_STR_TUPLE("16BIT_PROT_PRE_386") },
1457 /* [0d] = */ { RT_STR_TUPLE("32BIT_PROT_FLAT") },
1458 /* [0e] = */ { RT_STR_TUPLE("!e!") },
1459 /* [0f] = */ { RT_STR_TUPLE("!f!") },
1460 /* [10] = */ { RT_STR_TUPLE("!10!") },
1461 /* [11] = */ { RT_STR_TUPLE("!11!") },
1462 /* [12] = */ { RT_STR_TUPLE("!12!") },
1463 /* [13] = */ { RT_STR_TUPLE("!13!") },
1464 /* [14] = */ { RT_STR_TUPLE("!14!") },
1465 /* [15] = */ { RT_STR_TUPLE("!15!") },
1466 /* [16] = */ { RT_STR_TUPLE("!16!") },
1467 /* [17] = */ { RT_STR_TUPLE("!17!") },
1468 /* [18] = */ { RT_STR_TUPLE("16BIT_PROT_V86") },
1469 /* [19] = */ { RT_STR_TUPLE("32BIT_PROT_V86") },
1470 /* [1a] = */ { RT_STR_TUPLE("!1a!") },
1471 /* [1b] = */ { RT_STR_TUPLE("!1b!") },
1472 /* [1c] = */ { RT_STR_TUPLE("!1c!") },
1473 /* [1d] = */ { RT_STR_TUPLE("!1d!") },
1474 /* [1e] = */ { RT_STR_TUPLE("!1e!") },
1475 /* [1f] = */ { RT_STR_TUPLE("!1f!") },
1476 };
1477 AssertCompile(RT_ELEMENTS(s_aModes) == IEM_F_MODE_MASK + 1);
1478 memcpy(pszBuf, s_aModes[fFlags & IEM_F_MODE_MASK].psz, s_aModes[fFlags & IEM_F_MODE_MASK].cch);
1479 size_t off = s_aModes[fFlags & IEM_F_MODE_MASK].cch;
1480
1481 pszBuf[off++] = ' ';
1482 pszBuf[off++] = 'C';
1483 pszBuf[off++] = 'P';
1484 pszBuf[off++] = 'L';
1485 pszBuf[off++] = '0' + ((fFlags >> IEM_F_X86_CPL_SHIFT) & IEM_F_X86_CPL_SMASK);
1486 Assert(off < 32);
1487
1488 fFlags &= ~(IEM_F_MODE_MASK | IEM_F_X86_CPL_SMASK);
1489
1490 static struct { const char *pszName; uint32_t cchName; uint32_t fFlag; } const s_aFlags[] =
1491 {
1492 { RT_STR_TUPLE("BYPASS_HANDLERS"), IEM_F_BYPASS_HANDLERS },
1493 { RT_STR_TUPLE("PENDING_BRK_INSTR"), IEM_F_PENDING_BRK_INSTR },
1494 { RT_STR_TUPLE("PENDING_BRK_DATA"), IEM_F_PENDING_BRK_DATA },
1495 { RT_STR_TUPLE("PENDING_BRK_X86_IO"), IEM_F_PENDING_BRK_X86_IO },
1496 { RT_STR_TUPLE("X86_DISREGARD_LOCK"), IEM_F_X86_DISREGARD_LOCK },
1497 { RT_STR_TUPLE("X86_CTX_VMX"), IEM_F_X86_CTX_VMX },
1498 { RT_STR_TUPLE("X86_CTX_SVM"), IEM_F_X86_CTX_SVM },
1499 { RT_STR_TUPLE("X86_CTX_IN_GUEST"), IEM_F_X86_CTX_IN_GUEST },
1500 { RT_STR_TUPLE("X86_CTX_SMM"), IEM_F_X86_CTX_SMM },
1501 { RT_STR_TUPLE("INHIBIT_SHADOW"), IEMTB_F_X86_INHIBIT_SHADOW },
1502 { RT_STR_TUPLE("INHIBIT_NMI"), IEMTB_F_X86_INHIBIT_NMI },
1503 { RT_STR_TUPLE("CS_LIM_CHECKS"), IEMTB_F_X86_CS_LIM_CHECKS },
1504 { RT_STR_TUPLE("TYPE_THREADED"), IEMTB_F_TYPE_THREADED },
1505 { RT_STR_TUPLE("TYPE_NATIVE"), IEMTB_F_TYPE_NATIVE },
1506 };
1507 if (fFlags)
1508 for (unsigned i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1509 if (s_aFlags[i].fFlag & fFlags)
1510 {
1511 AssertReturnStmt(off + 1 + s_aFlags[i].cchName + 1 <= cbBuf, pszBuf[off] = '\0', pszBuf);
1512 pszBuf[off++] = ' ';
1513 memcpy(&pszBuf[off], s_aFlags[i].pszName, s_aFlags[i].cchName);
1514 off += s_aFlags[i].cchName;
1515 fFlags &= ~s_aFlags[i].fFlag;
1516 if (!fFlags)
1517 break;
1518 }
1519 pszBuf[off] = '\0';
1520
1521 return pszBuf;
1522}
1523
1524
1525/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
1526static DECLCALLBACK(int) iemThreadedDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
1527{
1528 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
1529 pDis->cbCachedInstr += cbMaxRead;
1530 RT_NOREF(cbMinRead);
1531 return VERR_NO_DATA;
1532}
1533
1534
1535/**
1536 * Worker for iemThreadedDisassembleTb.
1537 */
1538static void iemThreadedDumpLookupTable(PCIEMTB pTb, PCDBGFINFOHLP pHlp, unsigned idxFirst, unsigned cEntries,
1539 const char *pszLeadText = " TB Lookup:") RT_NOEXCEPT
1540{
1541 if (idxFirst + cEntries <= pTb->cTbLookupEntries)
1542 {
1543 PIEMTB * const papTbLookup = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, idxFirst);
1544 pHlp->pfnPrintf(pHlp, "%s", pszLeadText);
1545 for (uint8_t iLookup = 0; iLookup < cEntries; iLookup++)
1546 {
1547 PIEMTB pLookupTb = papTbLookup[iLookup];
1548 if (pLookupTb)
1549 pHlp->pfnPrintf(pHlp, "%c%p (%s)", iLookup ? ',' : ' ', pLookupTb,
1550 (pLookupTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_THREADED ? "threaded"
1551 : (pLookupTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE ? "native"
1552 : "invalid");
1553 else
1554 pHlp->pfnPrintf(pHlp, "%cNULL", iLookup ? ',' : ' ');
1555 }
1556 pHlp->pfnPrintf(pHlp, "\n");
1557 }
1558 else
1559 {
1560 pHlp->pfnPrintf(pHlp, " !!Bogus TB lookup info: idxFirst=%#x L %u > cTbLookupEntries=%#x!!\n",
1561 idxFirst, cEntries, pTb->cTbLookupEntries);
1562 AssertMsgFailed(("idxFirst=%#x L %u > cTbLookupEntries=%#x\n", idxFirst, cEntries, pTb->cTbLookupEntries));
1563 }
1564}
1565
1566
1567DECLHIDDEN(void) iemThreadedDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
1568{
1569 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_THREADED);
1570
1571 char szDisBuf[512];
1572
1573 /*
1574 * Print TB info.
1575 */
1576 pHlp->pfnPrintf(pHlp,
1577 "pTb=%p: GCPhysPc=%RGp (%RGv) cInstructions=%u LB %#x cRanges=%u cTbLookupEntries=%u\n"
1578 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
1579 pTb, pTb->GCPhysPc, pTb->FlatPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges, pTb->cTbLookupEntries,
1580 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
1581
1582 /*
1583 * This disassembly is driven by the debug info which follows the native
1584 * code and indicates when it starts with the next guest instructions,
1585 * where labels are and such things.
1586 */
1587 DISSTATE Dis;
1588 PCIEMTHRDEDCALLENTRY const paCalls = pTb->Thrd.paCalls;
1589 uint32_t const cCalls = pTb->Thrd.cCalls;
1590 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_X86_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
1591 : (pTb->fFlags & IEM_F_MODE_X86_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
1592 : DISCPUMODE_64BIT;
1593 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
1594 uint8_t idxRange = UINT8_MAX;
1595 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
1596 uint32_t offRange = 0;
1597 uint32_t offOpcodes = 0;
1598 uint32_t const cbOpcodes = pTb->cbOpcodes;
1599 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
1600 bool fTbLookupSeen0 = false;
1601
1602 for (uint32_t iCall = 0; iCall < cCalls; iCall++)
1603 {
1604 /*
1605 * New opcode range?
1606 */
1607 if ( idxRange == UINT8_MAX
1608 || idxRange >= cRanges
1609 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
1610 {
1611 idxRange += 1;
1612 if (idxRange < cRanges)
1613 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
1614 else
1615 continue;
1616 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
1617 + (pTb->aRanges[idxRange].idxPhysPage == 0
1618 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
1619 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
1620 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
1621 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
1622 pTb->aRanges[idxRange].idxPhysPage);
1623 GCPhysPc += offRange;
1624 }
1625
1626 /*
1627 * Disassemble another guest instruction?
1628 */
1629 if ( paCalls[iCall].offOpcode != offOpcodes
1630 && paCalls[iCall].cbOpcode > 0
1631 && (uint32_t)(cbOpcodes - paCalls[iCall].offOpcode) <= cbOpcodes /* paranoia^2 */ )
1632 {
1633 offOpcodes = paCalls[iCall].offOpcode;
1634 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
1635 uint32_t cbInstr = 1;
1636 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
1637 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
1638 iemThreadedDisasReadBytesDummy, NULL, &Dis, &cbInstr);
1639 if (RT_SUCCESS(rc))
1640 {
1641 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
1642 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
1643 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
1644 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
1645 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
1646 }
1647 else
1648 {
1649 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
1650 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
1651 cbInstr = paCalls[iCall].cbOpcode;
1652 }
1653 GCPhysPc += cbInstr;
1654 offRange += cbInstr;
1655 }
1656
1657 /*
1658 * Dump call details.
1659 */
1660 pHlp->pfnPrintf(pHlp,
1661 " Call #%u to %s (%u args)\n",
1662 iCall, g_apszIemThreadedFunctions[paCalls[iCall].enmFunction],
1663 g_acIemThreadedFunctionUsedArgs[paCalls[iCall].enmFunction]);
1664 if (paCalls[iCall].uTbLookup != 0)
1665 {
1666 uint8_t const idxFirst = IEM_TB_LOOKUP_TAB_GET_IDX(paCalls[iCall].uTbLookup);
1667 fTbLookupSeen0 = idxFirst == 0;
1668 iemThreadedDumpLookupTable(pTb, pHlp, idxFirst, IEM_TB_LOOKUP_TAB_GET_SIZE(paCalls[iCall].uTbLookup));
1669 }
1670
1671 /*
1672 * Snoop fExec.
1673 */
1674 switch (paCalls[iCall].enmFunction)
1675 {
1676 default:
1677 break;
1678 case kIemThreadedFunc_BltIn_CheckMode:
1679 fExec = paCalls[iCall].auParams[0];
1680 break;
1681 }
1682 }
1683
1684 if (!fTbLookupSeen0)
1685 iemThreadedDumpLookupTable(pTb, pHlp, 0, 1, " Fallback TB Lookup:");
1686}
1687
1688
1689
1690/**
1691 * Allocate a translation block for threadeded recompilation.
1692 *
1693 * This is allocated with maxed out call table and storage for opcode bytes,
1694 * because it's only supposed to be called once per EMT to allocate the TB
1695 * pointed to by IEMCPU::pThrdCompileTbR3.
1696 *
1697 * @returns Pointer to the translation block on success, NULL on failure.
1698 * @param pVM The cross context virtual machine structure.
1699 * @param pVCpu The cross context virtual CPU structure of the calling
1700 * thread.
1701 * @param GCPhysPc The physical address corresponding to RIP + CS.BASE.
1702 * @param fExtraFlags Extra flags (IEMTB_F_XXX).
1703 */
1704static PIEMTB iemThreadedTbAlloc(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhysPc, uint32_t fExtraFlags)
1705{
1706 PIEMTB pTb = (PIEMTB)RTMemAllocZ(sizeof(IEMTB));
1707 if (pTb)
1708 {
1709 unsigned const cCalls = 256;
1710 pTb->Thrd.paCalls = (PIEMTHRDEDCALLENTRY)RTMemAlloc(sizeof(IEMTHRDEDCALLENTRY) * cCalls);
1711 if (pTb->Thrd.paCalls)
1712 {
1713 pTb->pabOpcodes = (uint8_t *)RTMemAlloc(cCalls * 16);
1714 if (pTb->pabOpcodes)
1715 {
1716 pVCpu->iem.s.cbOpcodesAllocated = cCalls * 16;
1717 pTb->Thrd.cAllocated = cCalls;
1718 pTb->Thrd.cCalls = 0;
1719 pTb->cbOpcodes = 0;
1720 pTb->pNext = NULL;
1721 pTb->cUsed = 0;
1722 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
1723 pTb->idxAllocChunk = UINT8_MAX;
1724 pTb->GCPhysPc = GCPhysPc;
1725 pTb->x86.fAttr = (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u;
1726 pTb->fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags;
1727 pTb->cInstructions = 0;
1728 pTb->cTbLookupEntries = 1; /* Entry zero is for anything w/o a specific entry. */
1729
1730 /* Init the first opcode range. */
1731 pTb->cRanges = 1;
1732 pTb->aRanges[0].cbOpcodes = 0;
1733 pTb->aRanges[0].offOpcodes = 0;
1734 pTb->aRanges[0].offPhysPage = GCPhysPc & GUEST_PAGE_OFFSET_MASK;
1735 pTb->aRanges[0].u2Unused = 0;
1736 pTb->aRanges[0].idxPhysPage = 0;
1737 pTb->aGCPhysPages[0] = NIL_RTGCPHYS;
1738 pTb->aGCPhysPages[1] = NIL_RTGCPHYS;
1739
1740 return pTb;
1741 }
1742 RTMemFree(pTb->Thrd.paCalls);
1743 }
1744 RTMemFree(pTb);
1745 }
1746 RT_NOREF(pVM);
1747 return NULL;
1748}
1749
1750
1751/**
1752 * Called on the TB that are dedicated for recompilation before it's reused.
1753 *
1754 * @param pVCpu The cross context virtual CPU structure of the calling
1755 * thread.
1756 * @param pTb The translation block to reuse.
1757 * @param GCPhysPc The physical address corresponding to RIP + CS.BASE.
1758 * @param fExtraFlags Extra flags (IEMTB_F_XXX).
1759 */
1760static void iemThreadedTbReuse(PVMCPUCC pVCpu, PIEMTB pTb, RTGCPHYS GCPhysPc, uint32_t fExtraFlags)
1761{
1762 pTb->GCPhysPc = GCPhysPc;
1763 pTb->fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags;
1764 pTb->x86.fAttr = (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u;
1765 pTb->Thrd.cCalls = 0;
1766 pTb->cbOpcodes = 0;
1767 pTb->cInstructions = 0;
1768 pTb->cTbLookupEntries = 1; /* Entry zero is for anything w/o a specific entry. */
1769
1770 /* Init the first opcode range. */
1771 pTb->cRanges = 1;
1772 pTb->aRanges[0].cbOpcodes = 0;
1773 pTb->aRanges[0].offOpcodes = 0;
1774 pTb->aRanges[0].offPhysPage = GCPhysPc & GUEST_PAGE_OFFSET_MASK;
1775 pTb->aRanges[0].u2Unused = 0;
1776 pTb->aRanges[0].idxPhysPage = 0;
1777 pTb->aGCPhysPages[0] = NIL_RTGCPHYS;
1778 pTb->aGCPhysPages[1] = NIL_RTGCPHYS;
1779}
1780
1781
1782/**
1783 * Used to duplicate a threded translation block after recompilation is done.
1784 *
1785 * @returns Pointer to the translation block on success, NULL on failure.
1786 * @param pVM The cross context virtual machine structure.
1787 * @param pVCpu The cross context virtual CPU structure of the calling
1788 * thread.
1789 * @param pTbSrc The TB to duplicate.
1790 */
1791static PIEMTB iemThreadedTbDuplicate(PVMCC pVM, PVMCPUCC pVCpu, PCIEMTB pTbSrc)
1792{
1793 /*
1794 * Just using the heap for now. Will make this more efficient and
1795 * complicated later, don't worry. :-)
1796 */
1797 PIEMTB pTb = iemTbAllocatorAlloc(pVCpu, true /*fThreaded*/);
1798 if (pTb)
1799 {
1800 uint8_t const idxAllocChunk = pTb->idxAllocChunk;
1801 memcpy(pTb, pTbSrc, sizeof(*pTb));
1802 pTb->idxAllocChunk = idxAllocChunk;
1803
1804 unsigned const cCalls = pTbSrc->Thrd.cCalls;
1805 Assert(cCalls > 0);
1806 pTb->Thrd.paCalls = (PIEMTHRDEDCALLENTRY)RTMemDup(pTbSrc->Thrd.paCalls, sizeof(IEMTHRDEDCALLENTRY) * cCalls);
1807 if (pTb->Thrd.paCalls)
1808 {
1809 size_t const cbTbLookup = pTbSrc->cTbLookupEntries * sizeof(PIEMTB);
1810 Assert(cbTbLookup > 0);
1811 size_t const cbOpcodes = pTbSrc->cbOpcodes;
1812 Assert(cbOpcodes > 0);
1813 size_t const cbBoth = cbTbLookup + RT_ALIGN_Z(cbOpcodes, sizeof(PIEMTB));
1814 uint8_t * const pbBoth = (uint8_t *)RTMemAlloc(cbBoth);
1815 if (pbBoth)
1816 {
1817 RT_BZERO(pbBoth, cbTbLookup);
1818 pTb->pabOpcodes = (uint8_t *)memcpy(&pbBoth[cbTbLookup], pTbSrc->pabOpcodes, cbOpcodes);
1819 pTb->Thrd.cAllocated = cCalls;
1820 pTb->pNext = NULL;
1821 pTb->cUsed = 0;
1822 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
1823 pTb->fFlags = pTbSrc->fFlags;
1824
1825 return pTb;
1826 }
1827 RTMemFree(pTb->Thrd.paCalls);
1828 }
1829 iemTbAllocatorFree(pVCpu, pTb);
1830 }
1831 RT_NOREF(pVM);
1832 return NULL;
1833
1834}
1835
1836
1837/**
1838 * Adds the given TB to the hash table.
1839 *
1840 * @param pVCpu The cross context virtual CPU structure of the calling
1841 * thread.
1842 * @param pTbCache The cache to add it to.
1843 * @param pTb The translation block to add.
1844 */
1845static void iemThreadedTbAdd(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb)
1846{
1847 iemTbCacheAdd(pVCpu, pTbCache, pTb);
1848
1849 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbInstr, pTb->cInstructions);
1850 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbLookupEntries, pTb->cTbLookupEntries);
1851 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbThreadedCalls, pTb->Thrd.cCalls);
1852 if (LogIs12Enabled())
1853 {
1854 Log12(("TB added: %p %RGp LB %#x fl=%#x idxHash=%#x cRanges=%u cInstr=%u cCalls=%u\n",
1855 pTb, pTb->GCPhysPc, pTb->cbOpcodes, pTb->fFlags, IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc),
1856 pTb->cRanges, pTb->cInstructions, pTb->Thrd.cCalls));
1857 for (uint8_t idxRange = 0; idxRange < pTb->cRanges; idxRange++)
1858 Log12((" range#%u: offPg=%#05x offOp=%#04x LB %#04x pg#%u=%RGp\n", idxRange, pTb->aRanges[idxRange].offPhysPage,
1859 pTb->aRanges[idxRange].offOpcodes, pTb->aRanges[idxRange].cbOpcodes, pTb->aRanges[idxRange].idxPhysPage,
1860 pTb->aRanges[idxRange].idxPhysPage == 0
1861 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
1862 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]));
1863 }
1864}
1865
1866
1867/**
1868 * Called by opcode verifier functions when they detect a problem.
1869 */
1870void iemThreadedTbObsolete(PVMCPUCC pVCpu, PIEMTB pTb, bool fSafeToFree)
1871{
1872 /* We cannot free the current TB (indicated by fSafeToFree) because:
1873 - A threaded TB will have its current call entry accessed
1874 to update pVCpu->iem.s.cInstructions.
1875 - A native TB will have code left to execute. */
1876 if (fSafeToFree)
1877 iemTbAllocatorFree(pVCpu, pTb);
1878 else
1879 iemTbAlloctorScheduleForFree(pVCpu, pTb);
1880}
1881
1882
1883/*
1884 * Real code.
1885 */
1886
1887#ifdef LOG_ENABLED
1888/**
1889 * Logs the current instruction.
1890 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
1891 * @param pszFunction The IEM function doing the execution.
1892 * @param idxInstr The instruction number in the block.
1893 */
1894static void iemThreadedLogCurInstr(PVMCPUCC pVCpu, const char *pszFunction, uint32_t idxInstr) RT_NOEXCEPT
1895{
1896# ifdef IN_RING3
1897 if (LogIs2Enabled())
1898 {
1899 char szInstr[256];
1900 uint32_t cbInstr = 0;
1901 DBGFR3DisasInstrEx(pVCpu->pVMR3->pUVM, pVCpu->idCpu, 0, 0,
1902 DBGF_DISAS_FLAGS_CURRENT_GUEST | DBGF_DISAS_FLAGS_DEFAULT_MODE,
1903 szInstr, sizeof(szInstr), &cbInstr);
1904
1905 PCX86FXSTATE pFpuCtx = &pVCpu->cpum.GstCtx.XState.x87;
1906 Log2(("**** %s fExec=%x pTb=%p cUsed=%u #%u\n"
1907 " eax=%08x ebx=%08x ecx=%08x edx=%08x esi=%08x edi=%08x\n"
1908 " eip=%08x esp=%08x ebp=%08x iopl=%d tr=%04x\n"
1909 " cs=%04x ss=%04x ds=%04x es=%04x fs=%04x gs=%04x efl=%08x\n"
1910 " fsw=%04x fcw=%04x ftw=%02x mxcsr=%04x/%04x\n"
1911 " %s\n"
1912 , pszFunction, pVCpu->iem.s.fExec, pVCpu->iem.s.pCurTbR3, pVCpu->iem.s.pCurTbR3 ? pVCpu->iem.s.pCurTbR3->cUsed : 0, idxInstr,
1913 pVCpu->cpum.GstCtx.eax, pVCpu->cpum.GstCtx.ebx, pVCpu->cpum.GstCtx.ecx, pVCpu->cpum.GstCtx.edx, pVCpu->cpum.GstCtx.esi, pVCpu->cpum.GstCtx.edi,
1914 pVCpu->cpum.GstCtx.eip, pVCpu->cpum.GstCtx.esp, pVCpu->cpum.GstCtx.ebp, pVCpu->cpum.GstCtx.eflags.Bits.u2IOPL, pVCpu->cpum.GstCtx.tr.Sel,
1915 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.ss.Sel, pVCpu->cpum.GstCtx.ds.Sel, pVCpu->cpum.GstCtx.es.Sel,
1916 pVCpu->cpum.GstCtx.fs.Sel, pVCpu->cpum.GstCtx.gs.Sel, pVCpu->cpum.GstCtx.eflags.u,
1917 pFpuCtx->FSW, pFpuCtx->FCW, pFpuCtx->FTW, pFpuCtx->MXCSR, pFpuCtx->MXCSR_MASK,
1918 szInstr));
1919
1920 /*if (LogIs3Enabled()) - this outputs an insane amount of stuff, so disabled.
1921 DBGFR3InfoEx(pVCpu->pVMR3->pUVM, pVCpu->idCpu, "cpumguest", "verbose", NULL); */
1922 }
1923 else
1924# endif
1925 LogFlow(("%s: cs:rip=%04x:%08RX64 ss:rsp=%04x:%08RX64 EFL=%06x\n", pszFunction, pVCpu->cpum.GstCtx.cs.Sel,
1926 pVCpu->cpum.GstCtx.rip, pVCpu->cpum.GstCtx.ss.Sel, pVCpu->cpum.GstCtx.rsp, pVCpu->cpum.GstCtx.eflags.u));
1927}
1928#endif /* LOG_ENABLED */
1929
1930
1931#if 0
1932static VBOXSTRICTRC iemThreadedCompileLongJumped(PVMCC pVM, PVMCPUCC pVCpu, VBOXSTRICTRC rcStrict)
1933{
1934 RT_NOREF(pVM, pVCpu);
1935 return rcStrict;
1936}
1937#endif
1938
1939
1940/**
1941 * Initializes the decoder state when compiling TBs.
1942 *
1943 * This presumes that fExec has already be initialized.
1944 *
1945 * This is very similar to iemInitDecoder() and iemReInitDecoder(), so may need
1946 * to apply fixes to them as well.
1947 *
1948 * @param pVCpu The cross context virtual CPU structure of the calling
1949 * thread.
1950 * @param fReInit Clear for the first call for a TB, set for subsequent
1951 * calls from inside the compile loop where we can skip a
1952 * couple of things.
1953 * @param fExtraFlags The extra translation block flags when @a fReInit is
1954 * true, otherwise ignored. Only IEMTB_F_X86_INHIBIT_SHADOW is
1955 * checked.
1956 */
1957DECL_FORCE_INLINE(void) iemThreadedCompileInitDecoder(PVMCPUCC pVCpu, bool const fReInit, uint32_t const fExtraFlags)
1958{
1959 /* ASSUMES: That iemInitExec was already called and that anyone changing
1960 CPU state affecting the fExec bits since then will have updated fExec! */
1961 AssertMsg((pVCpu->iem.s.fExec & ~IEM_F_USER_OPTS) == iemCalcExecFlags(pVCpu),
1962 ("fExec=%#x iemCalcExecModeFlags=%#x\n", pVCpu->iem.s.fExec, iemCalcExecFlags(pVCpu)));
1963
1964 IEMMODE const enmMode = IEM_GET_CPU_MODE(pVCpu);
1965
1966 /* Decoder state: */
1967 pVCpu->iem.s.enmDefAddrMode = enmMode; /** @todo check if this is correct... */
1968 pVCpu->iem.s.enmEffAddrMode = enmMode;
1969 if (enmMode != IEMMODE_64BIT)
1970 {
1971 pVCpu->iem.s.enmDefOpSize = enmMode; /** @todo check if this is correct... */
1972 pVCpu->iem.s.enmEffOpSize = enmMode;
1973 }
1974 else
1975 {
1976 pVCpu->iem.s.enmDefOpSize = IEMMODE_32BIT;
1977 pVCpu->iem.s.enmEffOpSize = IEMMODE_32BIT;
1978 }
1979 pVCpu->iem.s.fPrefixes = 0;
1980 pVCpu->iem.s.uRexReg = 0;
1981 pVCpu->iem.s.uRexB = 0;
1982 pVCpu->iem.s.uRexIndex = 0;
1983 pVCpu->iem.s.idxPrefix = 0;
1984 pVCpu->iem.s.uVex3rdReg = 0;
1985 pVCpu->iem.s.uVexLength = 0;
1986 pVCpu->iem.s.fEvexStuff = 0;
1987 pVCpu->iem.s.iEffSeg = X86_SREG_DS;
1988 pVCpu->iem.s.offModRm = 0;
1989 pVCpu->iem.s.iNextMapping = 0;
1990
1991 if (!fReInit)
1992 {
1993 pVCpu->iem.s.cActiveMappings = 0;
1994 pVCpu->iem.s.rcPassUp = VINF_SUCCESS;
1995 pVCpu->iem.s.fEndTb = false;
1996 pVCpu->iem.s.fTbCheckOpcodes = true; /* (check opcodes for before executing the first instruction) */
1997 pVCpu->iem.s.fTbBranched = IEMBRANCHED_F_NO;
1998 pVCpu->iem.s.fTbCrossedPage = false;
1999 pVCpu->iem.s.cInstrTillIrqCheck = !(fExtraFlags & IEMTB_F_X86_INHIBIT_SHADOW) ? 32 : 0;
2000 pVCpu->iem.s.idxLastCheckIrqCallNo = UINT16_MAX;
2001 pVCpu->iem.s.fTbCurInstrIsSti = false;
2002 /* Force RF clearing and TF checking on first instruction in the block
2003 as we don't really know what came before and should assume the worst: */
2004 pVCpu->iem.s.fTbPrevInstr = IEM_CIMPL_F_RFLAGS | IEM_CIMPL_F_END_TB;
2005 }
2006 else
2007 {
2008 Assert(pVCpu->iem.s.cActiveMappings == 0);
2009 Assert(pVCpu->iem.s.rcPassUp == VINF_SUCCESS);
2010 Assert(pVCpu->iem.s.fEndTb == false);
2011 Assert(pVCpu->iem.s.fTbCrossedPage == false);
2012 pVCpu->iem.s.fTbPrevInstr = pVCpu->iem.s.fTbCurInstr;
2013 }
2014 pVCpu->iem.s.fTbCurInstr = 0;
2015
2016#ifdef DBGFTRACE_ENABLED
2017 switch (IEM_GET_CPU_MODE(pVCpu))
2018 {
2019 case IEMMODE_64BIT:
2020 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I64/%u %08llx", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.rip);
2021 break;
2022 case IEMMODE_32BIT:
2023 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I32/%u %04x:%08x", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.eip);
2024 break;
2025 case IEMMODE_16BIT:
2026 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I16/%u %04x:%04x", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.eip);
2027 break;
2028 }
2029#endif
2030}
2031
2032
2033/**
2034 * Initializes the opcode fetcher when starting the compilation.
2035 *
2036 * @param pVCpu The cross context virtual CPU structure of the calling
2037 * thread.
2038 */
2039DECL_FORCE_INLINE(void) iemThreadedCompileInitOpcodeFetching(PVMCPUCC pVCpu)
2040{
2041 /* Almost everything is done by iemGetPcWithPhysAndCode() already. We just need to initialize the index into abOpcode. */
2042#ifdef IEM_WITH_CODE_TLB_AND_OPCODE_BUF
2043 pVCpu->iem.s.offOpcode = 0;
2044#else
2045 RT_NOREF(pVCpu);
2046#endif
2047}
2048
2049
2050/**
2051 * Re-initializes the opcode fetcher between instructions while compiling.
2052 *
2053 * @param pVCpu The cross context virtual CPU structure of the calling
2054 * thread.
2055 */
2056DECL_FORCE_INLINE(void) iemThreadedCompileReInitOpcodeFetching(PVMCPUCC pVCpu)
2057{
2058 if (pVCpu->iem.s.pbInstrBuf)
2059 {
2060 uint64_t off = pVCpu->cpum.GstCtx.rip;
2061 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu));
2062 off += pVCpu->cpum.GstCtx.cs.u64Base;
2063 off -= pVCpu->iem.s.uInstrBufPc;
2064 if (off < pVCpu->iem.s.cbInstrBufTotal)
2065 {
2066 pVCpu->iem.s.offInstrNextByte = (uint32_t)off;
2067 pVCpu->iem.s.offCurInstrStart = (uint16_t)off;
2068 if ((uint16_t)off + 15 <= pVCpu->iem.s.cbInstrBufTotal)
2069 pVCpu->iem.s.cbInstrBuf = (uint16_t)off + 15;
2070 else
2071 pVCpu->iem.s.cbInstrBuf = pVCpu->iem.s.cbInstrBufTotal;
2072 }
2073 else
2074 {
2075 pVCpu->iem.s.pbInstrBuf = NULL;
2076 pVCpu->iem.s.offInstrNextByte = 0;
2077 pVCpu->iem.s.offCurInstrStart = 0;
2078 pVCpu->iem.s.cbInstrBuf = 0;
2079 pVCpu->iem.s.cbInstrBufTotal = 0;
2080 pVCpu->iem.s.GCPhysInstrBuf = NIL_RTGCPHYS;
2081 }
2082 }
2083 else
2084 {
2085 pVCpu->iem.s.offInstrNextByte = 0;
2086 pVCpu->iem.s.offCurInstrStart = 0;
2087 pVCpu->iem.s.cbInstrBuf = 0;
2088 pVCpu->iem.s.cbInstrBufTotal = 0;
2089#ifdef VBOX_STRICT
2090 pVCpu->iem.s.GCPhysInstrBuf = NIL_RTGCPHYS;
2091#endif
2092 }
2093#ifdef IEM_WITH_CODE_TLB_AND_OPCODE_BUF
2094 pVCpu->iem.s.offOpcode = 0;
2095#endif
2096}
2097
2098#ifdef LOG_ENABLED
2099
2100/**
2101 * Inserts a NOP call.
2102 *
2103 * This is for debugging.
2104 *
2105 * @returns true on success, false if we're out of call entries.
2106 * @param pTb The translation block being compiled.
2107 */
2108bool iemThreadedCompileEmitNop(PIEMTB pTb)
2109{
2110 /* Emit the call. */
2111 uint32_t const idxCall = pTb->Thrd.cCalls;
2112 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2113 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2114 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2115 pCall->enmFunction = kIemThreadedFunc_BltIn_Nop;
2116 pCall->idxInstr = pTb->cInstructions - 1;
2117 pCall->cbOpcode = 0;
2118 pCall->offOpcode = 0;
2119 pCall->uTbLookup = 0;
2120 pCall->fFlags = 0;
2121 pCall->auParams[0] = 0;
2122 pCall->auParams[1] = 0;
2123 pCall->auParams[2] = 0;
2124 return true;
2125}
2126
2127
2128/**
2129 * Called by iemThreadedCompile if cpu state logging is desired.
2130 *
2131 * @returns true on success, false if we're out of call entries.
2132 * @param pTb The translation block being compiled.
2133 */
2134bool iemThreadedCompileEmitLogCpuState(PIEMTB pTb)
2135{
2136 /* Emit the call. */
2137 uint32_t const idxCall = pTb->Thrd.cCalls;
2138 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2139 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2140 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2141 pCall->enmFunction = kIemThreadedFunc_BltIn_LogCpuState;
2142 pCall->idxInstr = pTb->cInstructions - 1;
2143 pCall->cbOpcode = 0;
2144 pCall->offOpcode = 0;
2145 pCall->uTbLookup = 0;
2146 pCall->fFlags = 0;
2147 pCall->auParams[0] = RT_MAKE_U16(pCall->idxInstr, idxCall); /* currently not used, but whatever */
2148 pCall->auParams[1] = 0;
2149 pCall->auParams[2] = 0;
2150 return true;
2151}
2152
2153#endif /* LOG_ENABLED */
2154
2155DECLINLINE(void) iemThreadedCopyOpcodeBytesInline(PCVMCPUCC pVCpu, uint8_t *pbDst, uint8_t cbInstr)
2156{
2157 switch (cbInstr)
2158 {
2159 default: AssertMsgFailed(("%#x\n", cbInstr)); RT_FALL_THROUGH();
2160 case 15: pbDst[14] = pVCpu->iem.s.abOpcode[14]; RT_FALL_THROUGH();
2161 case 14: pbDst[13] = pVCpu->iem.s.abOpcode[13]; RT_FALL_THROUGH();
2162 case 13: pbDst[12] = pVCpu->iem.s.abOpcode[12]; RT_FALL_THROUGH();
2163 case 12: pbDst[11] = pVCpu->iem.s.abOpcode[11]; RT_FALL_THROUGH();
2164 case 11: pbDst[10] = pVCpu->iem.s.abOpcode[10]; RT_FALL_THROUGH();
2165 case 10: pbDst[9] = pVCpu->iem.s.abOpcode[9]; RT_FALL_THROUGH();
2166 case 9: pbDst[8] = pVCpu->iem.s.abOpcode[8]; RT_FALL_THROUGH();
2167 case 8: pbDst[7] = pVCpu->iem.s.abOpcode[7]; RT_FALL_THROUGH();
2168 case 7: pbDst[6] = pVCpu->iem.s.abOpcode[6]; RT_FALL_THROUGH();
2169 case 6: pbDst[5] = pVCpu->iem.s.abOpcode[5]; RT_FALL_THROUGH();
2170 case 5: pbDst[4] = pVCpu->iem.s.abOpcode[4]; RT_FALL_THROUGH();
2171 case 4: pbDst[3] = pVCpu->iem.s.abOpcode[3]; RT_FALL_THROUGH();
2172 case 3: pbDst[2] = pVCpu->iem.s.abOpcode[2]; RT_FALL_THROUGH();
2173 case 2: pbDst[1] = pVCpu->iem.s.abOpcode[1]; RT_FALL_THROUGH();
2174 case 1: pbDst[0] = pVCpu->iem.s.abOpcode[0]; break;
2175 }
2176}
2177
2178#ifdef IEM_WITH_INTRA_TB_JUMPS
2179
2180/**
2181 * Emits the necessary tail calls for a full TB loop-jump.
2182 */
2183static bool iemThreadedCompileFullTbJump(PVMCPUCC pVCpu, PIEMTB pTb)
2184{
2185 /*
2186 * We need a timer and maybe IRQ check before jumping, so make sure
2187 * we've got sufficient call entries left before emitting anything.
2188 */
2189 uint32_t idxCall = pTb->Thrd.cCalls;
2190 if (idxCall + 1U <= pTb->Thrd.cAllocated)
2191 {
2192 /*
2193 * We're good, emit the calls.
2194 */
2195 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2196 pTb->Thrd.cCalls = (uint16_t)(idxCall + 2);
2197
2198 /* Always check timers as we risk getting stuck in a loop otherwise. We
2199 combine it with an IRQ check if that's not performed in the TB already. */
2200 pCall->enmFunction = pVCpu->iem.s.idxLastCheckIrqCallNo < idxCall
2201 ? kIemThreadedFunc_BltIn_CheckTimers
2202 : kIemThreadedFunc_BltIn_CheckTimersAndIrq;
2203 pCall->idxInstr = 0;
2204 pCall->offOpcode = 0;
2205 pCall->cbOpcode = 0;
2206 pCall->uTbLookup = 0;
2207 pCall->fFlags = 0;
2208 pCall->auParams[0] = 0;
2209 pCall->auParams[1] = 0;
2210 pCall->auParams[2] = 0;
2211 pCall++;
2212
2213 /* The jump callentry[0]. */
2214 pCall->enmFunction = kIemThreadedFunc_BltIn_Jump;
2215 pCall->idxInstr = 0;
2216 pCall->offOpcode = 0;
2217 pCall->cbOpcode = 0;
2218 pCall->uTbLookup = 0;
2219 pCall->fFlags = 0;
2220 pCall->auParams[0] = 0; /* jump target is call zero */
2221 pCall->auParams[1] = 0;
2222 pCall->auParams[2] = 0;
2223
2224 /* Mark callentry #0 as a jump target. */
2225 pTb->Thrd.paCalls[0].fFlags |= IEMTHREADEDCALLENTRY_F_JUMP_TARGET;
2226 }
2227
2228 return false;
2229}
2230
2231/**
2232 * Called by IEM_MC2_BEGIN_EMIT_CALLS when it detects that we're back at the
2233 * first instruction and we didn't just branch to it (that's handled below).
2234 *
2235 * This will emit a loop iff everything is compatible with that.
2236 */
2237DECLHIDDEN(int) iemThreadedCompileBackAtFirstInstruction(PVMCPU pVCpu, PIEMTB pTb) RT_NOEXCEPT
2238{
2239 /* Check if the mode matches. */
2240 if ( (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK)
2241 == (pTb->fFlags & IEMTB_F_KEY_MASK & ~IEMTB_F_X86_CS_LIM_CHECKS))
2242 {
2243 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatTbLoopFullTbDetected2);
2244 iemThreadedCompileFullTbJump(pVCpu, pTb);
2245 }
2246 return VINF_IEM_RECOMPILE_END_TB;
2247}
2248
2249#endif /* IEM_WITH_INTRA_TB_JUMPS */
2250
2251
2252/**
2253 * Called by IEM_MC2_BEGIN_EMIT_CALLS() under one of these conditions:
2254 *
2255 * - CS LIM check required.
2256 * - Must recheck opcode bytes.
2257 * - Previous instruction branched.
2258 * - TLB load detected, probably due to page crossing.
2259 *
2260 * @returns true if everything went well, false if we're out of space in the TB
2261 * (e.g. opcode ranges) or needs to start doing CS.LIM checks.
2262 * @param pVCpu The cross context virtual CPU structure of the calling
2263 * thread.
2264 * @param pTb The translation block being compiled.
2265 */
2266bool iemThreadedCompileBeginEmitCallsComplications(PVMCPUCC pVCpu, PIEMTB pTb)
2267{
2268 Log6(("%04x:%08RX64: iemThreadedCompileBeginEmitCallsComplications\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2269 Assert((pVCpu->iem.s.GCPhysInstrBuf & GUEST_PAGE_OFFSET_MASK) == 0);
2270#if 0
2271 if (pVCpu->cpum.GstCtx.rip >= 0xc0000000 && !LogIsEnabled())
2272 RTLogChangeFlags(NULL, 0, RTLOGFLAGS_DISABLED);
2273#endif
2274
2275 /*
2276 * If we're not in 64-bit mode and not already checking CS.LIM we need to
2277 * see if it's needed to start checking.
2278 */
2279 bool fConsiderCsLimChecking;
2280 uint32_t const fMode = pVCpu->iem.s.fExec & IEM_F_MODE_MASK;
2281 if ( fMode == IEM_F_MODE_X86_64BIT
2282 || (pTb->fFlags & IEMTB_F_X86_CS_LIM_CHECKS)
2283 || fMode == IEM_F_MODE_X86_32BIT_PROT_FLAT
2284 || fMode == IEM_F_MODE_X86_32BIT_FLAT)
2285 fConsiderCsLimChecking = false; /* already enabled or not needed */
2286 else
2287 {
2288 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
2289 if (offFromLim >= GUEST_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
2290 fConsiderCsLimChecking = true; /* likely */
2291 else
2292 {
2293 Log8(("%04x:%08RX64: Needs CS.LIM checks (%#RX64)\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, offFromLim));
2294 return false;
2295 }
2296 }
2297
2298 /*
2299 * Prepare call now, even before we know if can accept the instruction in this TB.
2300 * This allows us amending parameters w/o making every case suffer.
2301 */
2302 uint8_t const cbInstr = IEM_GET_INSTR_LEN(pVCpu);
2303 uint16_t const offOpcode = pTb->cbOpcodes;
2304 uint8_t idxRange = pTb->cRanges - 1;
2305
2306 PIEMTHRDEDCALLENTRY const pCall = &pTb->Thrd.paCalls[pTb->Thrd.cCalls];
2307 pCall->idxInstr = pTb->cInstructions;
2308 pCall->cbOpcode = cbInstr;
2309 pCall->offOpcode = offOpcode;
2310 pCall->uTbLookup = 0;
2311 pCall->fFlags = 0;
2312 pCall->auParams[0] = (uint32_t)cbInstr
2313 | (uint32_t)(pVCpu->iem.s.fExec << 8) /* liveness: Enough of fExec for IEM_F_MODE_X86_IS_FLAT. */
2314 /* The upper dword is sometimes used for cbStartPage. */;
2315 pCall->auParams[1] = idxRange;
2316 pCall->auParams[2] = offOpcode - pTb->aRanges[idxRange].offOpcodes;
2317
2318/** @todo check if we require IEMTB_F_X86_CS_LIM_CHECKS for any new page we've
2319 * gotten onto. If we do, stop */
2320
2321 /*
2322 * Case 1: We've branched (RIP changed).
2323 *
2324 * Loop check: If the new PC (GCPhysPC) is within a opcode range of this
2325 * TB, end the TB here as it is most likely a loop and if it
2326 * made sense to unroll it, the guest code compiler should've
2327 * done it already.
2328 *
2329 * Sub-case 1a: Same page, no TLB load (fTbCrossedPage is false).
2330 * Req: 1 extra range, no extra phys.
2331 *
2332 * Sub-case 1b: Different page but no page boundrary crossing, so TLB load
2333 * necessary (fTbCrossedPage is true).
2334 * Req: 1 extra range, probably 1 extra phys page entry.
2335 *
2336 * Sub-case 1c: Different page, so TLB load necessary (fTbCrossedPage is true),
2337 * but in addition we cross into the following page and require
2338 * another TLB load.
2339 * Req: 2 extra ranges, probably 2 extra phys page entries.
2340 *
2341 * Sub-case 1d: Same page, so no initial TLB load necessary, but we cross into
2342 * the following page (thus fTbCrossedPage is true).
2343 * Req: 2 extra ranges, probably 1 extra phys page entry.
2344 *
2345 * Note! The setting fTbCrossedPage is done by the iemOpcodeFetchBytesJmp, but
2346 * it may trigger "spuriously" from the CPU point of view because of
2347 * physical page changes that'll invalid the physical TLB and trigger a
2348 * call to the function. In theory this be a big deal, just a bit
2349 * performance loss as we'll pick the LoadingTlb variants.
2350 *
2351 * Note! We do not currently optimize branching to the next instruction (sorry
2352 * 32-bit PIC code). We could maybe do that in the branching code that
2353 * sets (or not) fTbBranched.
2354 */
2355 /** @todo Optimize 'jmp .next_instr' and 'call .next_instr'. Seen the jmp
2356 * variant in win 3.1 code and the call variant in 32-bit linux PIC
2357 * code. This'll require filtering out far jmps and calls, as they
2358 * load CS which should technically be considered indirect since the
2359 * GDT/LDT entry's base address can be modified independently from
2360 * the code. */
2361 if (pVCpu->iem.s.fTbBranched != IEMBRANCHED_F_NO)
2362 {
2363 if ( !pVCpu->iem.s.fTbCrossedPage /* 1a */
2364 || pVCpu->iem.s.offCurInstrStart >= 0 /* 1b */ )
2365 {
2366 /* 1a + 1b - instruction fully within the branched to page. */
2367 Assert(pVCpu->iem.s.offCurInstrStart >= 0);
2368 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr <= GUEST_PAGE_SIZE);
2369
2370 if (!(pVCpu->iem.s.fTbBranched & IEMBRANCHED_F_ZERO))
2371 {
2372 /* Check that we've got a free range. */
2373 idxRange += 1;
2374 if (idxRange < RT_ELEMENTS(pTb->aRanges))
2375 { /* likely */ }
2376 else
2377 {
2378 Log8(("%04x:%08RX64: out of ranges after branch\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2379 return false;
2380 }
2381 pCall->auParams[1] = idxRange;
2382 pCall->auParams[2] = 0;
2383
2384 /* Check that we've got a free page slot. */
2385 AssertCompile(RT_ELEMENTS(pTb->aGCPhysPages) == 2);
2386 RTGCPHYS const GCPhysNew = pVCpu->iem.s.GCPhysInstrBuf & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK;
2387 uint8_t idxPhysPage;
2388 if ((pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == GCPhysNew)
2389 pTb->aRanges[idxRange].idxPhysPage = idxPhysPage = 0;
2390 else if (pTb->aGCPhysPages[0] == NIL_RTGCPHYS)
2391 {
2392 pTb->aGCPhysPages[0] = GCPhysNew;
2393 pTb->aRanges[idxRange].idxPhysPage = 1;
2394 idxPhysPage = UINT8_MAX;
2395 }
2396 else if (pTb->aGCPhysPages[0] == GCPhysNew)
2397 pTb->aRanges[idxRange].idxPhysPage = idxPhysPage = 1;
2398 else if (pTb->aGCPhysPages[1] == NIL_RTGCPHYS)
2399 {
2400 pTb->aGCPhysPages[1] = GCPhysNew;
2401 pTb->aRanges[idxRange].idxPhysPage = 2;
2402 idxPhysPage = UINT8_MAX;
2403 }
2404 else if (pTb->aGCPhysPages[1] == GCPhysNew)
2405 pTb->aRanges[idxRange].idxPhysPage = idxPhysPage = 2;
2406 else
2407 {
2408 Log8(("%04x:%08RX64: out of aGCPhysPages entires after branch\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2409 return false;
2410 }
2411
2412 /* Loop check: We weave the loop check in here to optimize the lookup. */
2413 if (idxPhysPage != UINT8_MAX)
2414 {
2415 uint32_t const offPhysPc = pVCpu->iem.s.offCurInstrStart;
2416 for (uint8_t idxLoopRange = 0; idxLoopRange < idxRange; idxLoopRange++)
2417 if ( pTb->aRanges[idxLoopRange].idxPhysPage == idxPhysPage
2418 && offPhysPc - (uint32_t)pTb->aRanges[idxLoopRange].offPhysPage
2419 < (uint32_t)pTb->aRanges[idxLoopRange].cbOpcodes)
2420 {
2421 Log8(("%04x:%08RX64: loop detected after branch\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2422#ifdef IEM_WITH_INTRA_TB_JUMPS
2423 /* If we're looping back to the start of the TB and the mode is still the same,
2424 we could emit a jump optimization. For now we don't do page transitions
2425 as that implies TLB loading and such. */
2426 if ( idxLoopRange == 0
2427 && offPhysPc == pTb->aRanges[0].offPhysPage
2428 && (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK & IEMTB_F_KEY_MASK)
2429 == (pTb->fFlags & IEMTB_F_KEY_MASK & ~IEMTB_F_X86_CS_LIM_CHECKS)
2430 && (pVCpu->iem.s.fTbBranched & ( IEMBRANCHED_F_INDIRECT | IEMBRANCHED_F_FAR
2431 | IEMBRANCHED_F_STACK | IEMBRANCHED_F_RELATIVE))
2432 == IEMBRANCHED_F_RELATIVE)
2433 {
2434 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatTbLoopFullTbDetected);
2435 return iemThreadedCompileFullTbJump(pVCpu, pTb);
2436 }
2437#endif
2438 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatTbLoopInTbDetected);
2439 return false;
2440 }
2441 }
2442
2443 /* Finish setting up the new range. */
2444 pTb->aRanges[idxRange].offPhysPage = pVCpu->iem.s.offCurInstrStart;
2445 pTb->aRanges[idxRange].offOpcodes = offOpcode;
2446 pTb->aRanges[idxRange].cbOpcodes = cbInstr;
2447 pTb->aRanges[idxRange].u2Unused = 0;
2448 pTb->cRanges++;
2449 Log6(("%04x:%08RX64: new range #%u same page: offPhysPage=%#x offOpcodes=%#x\n",
2450 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, idxRange, pTb->aRanges[idxRange].offPhysPage,
2451 pTb->aRanges[idxRange].offOpcodes));
2452 }
2453 else
2454 {
2455 Log8(("%04x:%08RX64: zero byte jump\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2456 pTb->aRanges[idxRange].cbOpcodes += cbInstr;
2457 }
2458
2459 /* Determin which function we need to load & check.
2460 Note! For jumps to a new page, we'll set both fTbBranched and
2461 fTbCrossedPage to avoid unnecessary TLB work for intra
2462 page branching */
2463 if ( (pVCpu->iem.s.fTbBranched & (IEMBRANCHED_F_INDIRECT | IEMBRANCHED_F_FAR)) /* Far is basically indirect. */
2464 || pVCpu->iem.s.fTbCrossedPage)
2465 pCall->enmFunction = pTb->fFlags & IEMTB_F_X86_CS_LIM_CHECKS
2466 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesLoadingTlb
2467 : !fConsiderCsLimChecking
2468 ? kIemThreadedFunc_BltIn_CheckOpcodesLoadingTlb
2469 : kIemThreadedFunc_BltIn_CheckOpcodesLoadingTlbConsiderCsLim;
2470 else if (pVCpu->iem.s.fTbBranched & (IEMBRANCHED_F_CONDITIONAL | /* paranoia: */ IEMBRANCHED_F_DIRECT))
2471 pCall->enmFunction = pTb->fFlags & IEMTB_F_X86_CS_LIM_CHECKS
2472 ? kIemThreadedFunc_BltIn_CheckCsLimAndPcAndOpcodes
2473 : !fConsiderCsLimChecking
2474 ? kIemThreadedFunc_BltIn_CheckPcAndOpcodes
2475 : kIemThreadedFunc_BltIn_CheckPcAndOpcodesConsiderCsLim;
2476 else
2477 {
2478 Assert(pVCpu->iem.s.fTbBranched & IEMBRANCHED_F_RELATIVE);
2479 pCall->enmFunction = pTb->fFlags & IEMTB_F_X86_CS_LIM_CHECKS
2480 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodes
2481 : !fConsiderCsLimChecking
2482 ? kIemThreadedFunc_BltIn_CheckOpcodes
2483 : kIemThreadedFunc_BltIn_CheckOpcodesConsiderCsLim;
2484 }
2485 }
2486 else
2487 {
2488 /* 1c + 1d - instruction crosses pages. */
2489 Assert(pVCpu->iem.s.offCurInstrStart < 0);
2490 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr > 0);
2491
2492 /* Lazy bird: Check that this isn't case 1c, since we've already
2493 load the first physical address. End the TB and
2494 make it a case 2b instead.
2495
2496 Hmm. Too much bother to detect, so just do the same
2497 with case 1d as well. */
2498#if 0 /** @todo get back to this later when we've got the actual branch code in
2499 * place. */
2500 uint8_t const cbStartPage = (uint8_t)-pVCpu->iem.s.offCurInstrStart;
2501
2502 /* Check that we've got two free ranges. */
2503 if (idxRange + 2 < RT_ELEMENTS(pTb->aRanges))
2504 { /* likely */ }
2505 else
2506 return false;
2507 idxRange += 1;
2508 pCall->auParams[1] = idxRange;
2509 pCall->auParams[2] = 0;
2510
2511 /* ... */
2512
2513#else
2514 Log8(("%04x:%08RX64: complicated post-branch condition, ending TB.\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2515 return false;
2516#endif
2517 }
2518 }
2519
2520 /*
2521 * Case 2: Page crossing.
2522 *
2523 * Sub-case 2a: The instruction starts on the first byte in the next page.
2524 *
2525 * Sub-case 2b: The instruction has opcode bytes in both the current and
2526 * following page.
2527 *
2528 * Both cases requires a new range table entry and probably a new physical
2529 * page entry. The difference is in which functions to emit and whether to
2530 * add bytes to the current range.
2531 */
2532 else if (pVCpu->iem.s.fTbCrossedPage)
2533 {
2534 /* Check that we've got a free range. */
2535 idxRange += 1;
2536 if (idxRange < RT_ELEMENTS(pTb->aRanges))
2537 { /* likely */ }
2538 else
2539 {
2540 Log8(("%04x:%08RX64: out of ranges while crossing page\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2541 return false;
2542 }
2543
2544 /* Check that we've got a free page slot. */
2545 AssertCompile(RT_ELEMENTS(pTb->aGCPhysPages) == 2);
2546 RTGCPHYS const GCPhysNew = pVCpu->iem.s.GCPhysInstrBuf & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK;
2547 if ((pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == GCPhysNew)
2548 pTb->aRanges[idxRange].idxPhysPage = 0;
2549 else if ( pTb->aGCPhysPages[0] == NIL_RTGCPHYS
2550 || pTb->aGCPhysPages[0] == GCPhysNew)
2551 {
2552 pTb->aGCPhysPages[0] = GCPhysNew;
2553 pTb->aRanges[idxRange].idxPhysPage = 1;
2554 }
2555 else if ( pTb->aGCPhysPages[1] == NIL_RTGCPHYS
2556 || pTb->aGCPhysPages[1] == GCPhysNew)
2557 {
2558 pTb->aGCPhysPages[1] = GCPhysNew;
2559 pTb->aRanges[idxRange].idxPhysPage = 2;
2560 }
2561 else
2562 {
2563 Log8(("%04x:%08RX64: out of aGCPhysPages entires while crossing page\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2564 return false;
2565 }
2566
2567 if (((pTb->aRanges[idxRange - 1].offPhysPage + pTb->aRanges[idxRange - 1].cbOpcodes) & GUEST_PAGE_OFFSET_MASK) == 0)
2568 {
2569 Assert(pVCpu->iem.s.offCurInstrStart == 0);
2570 pCall->auParams[1] = idxRange;
2571 pCall->auParams[2] = 0;
2572
2573 /* Finish setting up the new range. */
2574 pTb->aRanges[idxRange].offPhysPage = pVCpu->iem.s.offCurInstrStart;
2575 pTb->aRanges[idxRange].offOpcodes = offOpcode;
2576 pTb->aRanges[idxRange].cbOpcodes = cbInstr;
2577 pTb->aRanges[idxRange].u2Unused = 0;
2578 pTb->cRanges++;
2579 Log6(("%04x:%08RX64: new range #%u new page (a) %u/%RGp: offPhysPage=%#x offOpcodes=%#x\n",
2580 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, idxRange, pTb->aRanges[idxRange].idxPhysPage, GCPhysNew,
2581 pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].offOpcodes));
2582
2583 /* Determin which function we need to load & check. */
2584 pCall->enmFunction = pTb->fFlags & IEMTB_F_X86_CS_LIM_CHECKS
2585 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNewPageLoadingTlb
2586 : !fConsiderCsLimChecking
2587 ? kIemThreadedFunc_BltIn_CheckOpcodesOnNewPageLoadingTlb
2588 : kIemThreadedFunc_BltIn_CheckOpcodesOnNewPageLoadingTlbConsiderCsLim;
2589 }
2590 else
2591 {
2592 Assert(pVCpu->iem.s.offCurInstrStart < 0);
2593 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr > 0);
2594 uint8_t const cbStartPage = (uint8_t)-pVCpu->iem.s.offCurInstrStart;
2595 pCall->auParams[0] |= (uint64_t)cbStartPage << 32;
2596
2597 /* We've good. Split the instruction over the old and new range table entries. */
2598 pTb->aRanges[idxRange - 1].cbOpcodes += cbStartPage;
2599
2600 pTb->aRanges[idxRange].offPhysPage = 0;
2601 pTb->aRanges[idxRange].offOpcodes = offOpcode + cbStartPage;
2602 pTb->aRanges[idxRange].cbOpcodes = cbInstr - cbStartPage;
2603 pTb->aRanges[idxRange].u2Unused = 0;
2604 pTb->cRanges++;
2605 Log6(("%04x:%08RX64: new range #%u new page (b) %u/%RGp: offPhysPage=%#x offOpcodes=%#x\n",
2606 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, idxRange, pTb->aRanges[idxRange].idxPhysPage, GCPhysNew,
2607 pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].offOpcodes));
2608
2609 /* Determin which function we need to load & check. */
2610 if (pVCpu->iem.s.fTbCheckOpcodes)
2611 pCall->enmFunction = pTb->fFlags & IEMTB_F_X86_CS_LIM_CHECKS
2612 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesAcrossPageLoadingTlb
2613 : !fConsiderCsLimChecking
2614 ? kIemThreadedFunc_BltIn_CheckOpcodesAcrossPageLoadingTlb
2615 : kIemThreadedFunc_BltIn_CheckOpcodesAcrossPageLoadingTlbConsiderCsLim;
2616 else
2617 pCall->enmFunction = pTb->fFlags & IEMTB_F_X86_CS_LIM_CHECKS
2618 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNextPageLoadingTlb
2619 : !fConsiderCsLimChecking
2620 ? kIemThreadedFunc_BltIn_CheckOpcodesOnNextPageLoadingTlb
2621 : kIemThreadedFunc_BltIn_CheckOpcodesOnNextPageLoadingTlbConsiderCsLim;
2622 }
2623 }
2624
2625 /*
2626 * Regular case: No new range required.
2627 */
2628 else
2629 {
2630 Assert(pVCpu->iem.s.fTbCheckOpcodes || (pTb->fFlags & IEMTB_F_X86_CS_LIM_CHECKS));
2631 if (pVCpu->iem.s.fTbCheckOpcodes)
2632 pCall->enmFunction = pTb->fFlags & IEMTB_F_X86_CS_LIM_CHECKS
2633 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodes
2634 : kIemThreadedFunc_BltIn_CheckOpcodes;
2635 else
2636 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckCsLim;
2637
2638 iemThreadedCopyOpcodeBytesInline(pVCpu, &pTb->pabOpcodes[offOpcode], cbInstr);
2639 pTb->cbOpcodes = offOpcode + cbInstr;
2640 pTb->aRanges[idxRange].cbOpcodes += cbInstr;
2641 Assert(pTb->cbOpcodes <= pVCpu->iem.s.cbOpcodesAllocated);
2642 }
2643
2644 /*
2645 * Commit the call.
2646 */
2647 pTb->Thrd.cCalls++;
2648
2649 /*
2650 * Clear state.
2651 */
2652 pVCpu->iem.s.fTbBranched = IEMBRANCHED_F_NO;
2653 pVCpu->iem.s.fTbCrossedPage = false;
2654 pVCpu->iem.s.fTbCheckOpcodes = false;
2655
2656 /*
2657 * Copy opcode bytes.
2658 */
2659 iemThreadedCopyOpcodeBytesInline(pVCpu, &pTb->pabOpcodes[offOpcode], cbInstr);
2660 pTb->cbOpcodes = offOpcode + cbInstr;
2661 Assert(pTb->cbOpcodes <= pVCpu->iem.s.cbOpcodesAllocated);
2662
2663 return true;
2664}
2665
2666
2667/**
2668 * Worker for iemThreadedCompileBeginEmitCallsComplications and
2669 * iemThreadedCompileCheckIrq that checks for pending delivarable events.
2670 *
2671 * @returns true if anything is pending, false if not.
2672 * @param pVCpu The cross context virtual CPU structure of the calling
2673 * thread.
2674 */
2675DECL_FORCE_INLINE(bool) iemThreadedCompileIsIrqOrForceFlagPending(PVMCPUCC pVCpu)
2676{
2677 uint64_t fCpu = pVCpu->fLocalForcedActions;
2678 fCpu &= VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC | VMCPU_FF_INTERRUPT_NMI | VMCPU_FF_INTERRUPT_SMI;
2679#if 1
2680 /** @todo this isn't even close to the NMI/IRQ conditions in EM. */
2681 if (RT_LIKELY( !fCpu
2682 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
2683 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
2684 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx))) ))
2685 return false;
2686 return true;
2687#else
2688 return false;
2689#endif
2690
2691}
2692
2693
2694/**
2695 * Called by iemThreadedCompile when a block requires a mode check.
2696 *
2697 * @returns true if we should continue, false if we're out of call entries.
2698 * @param pVCpu The cross context virtual CPU structure of the calling
2699 * thread.
2700 * @param pTb The translation block being compiled.
2701 */
2702static bool iemThreadedCompileEmitCheckMode(PVMCPUCC pVCpu, PIEMTB pTb)
2703{
2704 /* Emit the call. */
2705 uint32_t const idxCall = pTb->Thrd.cCalls;
2706 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2707 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2708 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2709 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckMode;
2710 pCall->idxInstr = pTb->cInstructions - 1;
2711 pCall->cbOpcode = 0;
2712 pCall->offOpcode = 0;
2713 pCall->uTbLookup = 0;
2714 pCall->fFlags = 0;
2715 pCall->auParams[0] = pVCpu->iem.s.fExec;
2716 pCall->auParams[1] = 0;
2717 pCall->auParams[2] = 0;
2718 LogFunc(("%04x:%08RX64 fExec=%#x\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, pVCpu->iem.s.fExec));
2719 return true;
2720}
2721
2722
2723/**
2724 * Called by IEM_MC2_BEGIN_EMIT_CALLS() when IEM_CIMPL_F_CHECK_IRQ_BEFORE is
2725 * set.
2726 *
2727 * @returns true if we should continue, false if an IRQ is deliverable or a
2728 * relevant force flag is pending.
2729 * @param pVCpu The cross context virtual CPU structure of the calling
2730 * thread.
2731 * @param pTb The translation block being compiled.
2732 * @sa iemThreadedCompileCheckIrq
2733 */
2734bool iemThreadedCompileEmitIrqCheckBefore(PVMCPUCC pVCpu, PIEMTB pTb)
2735{
2736 /*
2737 * Skip this we've already emitted a call after the previous instruction
2738 * or if it's the first call, as we're always checking FFs between blocks.
2739 */
2740 uint32_t const idxCall = pTb->Thrd.cCalls;
2741 if ( idxCall > 0
2742 && pTb->Thrd.paCalls[idxCall - 1].enmFunction != kIemThreadedFunc_BltIn_CheckIrq)
2743 {
2744 /* Emit the call. */
2745 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2746 pVCpu->iem.s.idxLastCheckIrqCallNo = (uint16_t)idxCall;
2747 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2748 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2749 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckIrq;
2750 pCall->idxInstr = pTb->cInstructions;
2751 pCall->offOpcode = 0;
2752 pCall->cbOpcode = 0;
2753 pCall->uTbLookup = 0;
2754 pCall->fFlags = 0;
2755 pCall->auParams[0] = 0;
2756 pCall->auParams[1] = 0;
2757 pCall->auParams[2] = 0;
2758 LogFunc(("%04x:%08RX64\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2759
2760 /* Reset the IRQ check value. */
2761 pVCpu->iem.s.cInstrTillIrqCheck = !CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx) ? 32 : 0;
2762
2763 /*
2764 * Check for deliverable IRQs and pending force flags.
2765 */
2766 return !iemThreadedCompileIsIrqOrForceFlagPending(pVCpu);
2767 }
2768 return true; /* continue */
2769}
2770
2771
2772/**
2773 * Emits an IRQ check call and checks for pending IRQs.
2774 *
2775 * @returns true if we should continue, false if an IRQ is deliverable or a
2776 * relevant force flag is pending.
2777 * @param pVCpu The cross context virtual CPU structure of the calling
2778 * thread.
2779 * @param pTb The transation block.
2780 * @sa iemThreadedCompileBeginEmitCallsComplications
2781 */
2782static bool iemThreadedCompileCheckIrqAfter(PVMCPUCC pVCpu, PIEMTB pTb)
2783{
2784 /* Check again in a little bit, unless it is immediately following an STI
2785 in which case we *must* check immediately after the next instruction
2786 as well in case it's executed with interrupt inhibition. We could
2787 otherwise miss the interrupt window. See the irq2 wait2 varaiant in
2788 bs3-timers-1 which is doing sti + sti + cli. */
2789 if (!pVCpu->iem.s.fTbCurInstrIsSti)
2790 pVCpu->iem.s.cInstrTillIrqCheck = 32;
2791 else
2792 {
2793 pVCpu->iem.s.fTbCurInstrIsSti = false;
2794 pVCpu->iem.s.cInstrTillIrqCheck = 0;
2795 }
2796 LogFunc(("%04x:%08RX64\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2797
2798 /*
2799 * Emit the call.
2800 */
2801 uint32_t const idxCall = pTb->Thrd.cCalls;
2802 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2803 pVCpu->iem.s.idxLastCheckIrqCallNo = (uint16_t)idxCall;
2804 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2805 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2806 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckIrq;
2807 pCall->idxInstr = pTb->cInstructions;
2808 pCall->offOpcode = 0;
2809 pCall->cbOpcode = 0;
2810 pCall->uTbLookup = 0;
2811 pCall->fFlags = 0;
2812 pCall->auParams[0] = 0;
2813 pCall->auParams[1] = 0;
2814 pCall->auParams[2] = 0;
2815
2816 /*
2817 * Check for deliverable IRQs and pending force flags.
2818 */
2819 return !iemThreadedCompileIsIrqOrForceFlagPending(pVCpu);
2820}
2821
2822
2823/**
2824 * Compiles a new TB and executes it.
2825 *
2826 * We combine compilation and execution here as it makes it simpler code flow
2827 * in the main loop and it allows interpreting while compiling if we want to
2828 * explore that option.
2829 *
2830 * @returns Strict VBox status code.
2831 * @param pVM The cross context virtual machine structure.
2832 * @param pVCpu The cross context virtual CPU structure of the calling
2833 * thread.
2834 * @param GCPhysPc The physical address corresponding to the current
2835 * RIP+CS.BASE.
2836 * @param fExtraFlags Extra translation block flags: IEMTB_F_X86_INHIBIT_SHADOW,
2837 * IEMTB_F_X86_INHIBIT_NMI, IEMTB_F_X86_CS_LIM_CHECKS.
2838 */
2839static IEM_DECL_MSC_GUARD_IGNORE VBOXSTRICTRC
2840iemThreadedCompile(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhysPc, uint32_t fExtraFlags) IEM_NOEXCEPT_MAY_LONGJMP
2841{
2842 IEMTLBTRACE_TB_COMPILE(pVCpu, GCPhysPc);
2843 Assert(!(fExtraFlags & IEMTB_F_TYPE_MASK));
2844 fExtraFlags |= IEMTB_F_TYPE_THREADED;
2845
2846 /*
2847 * Get the TB we use for the recompiling. This is a maxed-out TB so
2848 * that'll we'll make a more efficient copy of when we're done compiling.
2849 */
2850 PIEMTB pTb = pVCpu->iem.s.pThrdCompileTbR3;
2851 if (pTb)
2852 iemThreadedTbReuse(pVCpu, pTb, GCPhysPc, fExtraFlags);
2853 else
2854 {
2855 pTb = iemThreadedTbAlloc(pVM, pVCpu, GCPhysPc, fExtraFlags);
2856 AssertReturn(pTb, VERR_IEM_TB_ALLOC_FAILED);
2857 pVCpu->iem.s.pThrdCompileTbR3 = pTb;
2858 }
2859 pTb->FlatPc = pVCpu->iem.s.uInstrBufPc | (GCPhysPc & GUEST_PAGE_OFFSET_MASK);
2860
2861 /* Set the current TB so iemThreadedCompileLongJumped and the CIMPL
2862 functions may get at it. */
2863 pVCpu->iem.s.pCurTbR3 = pTb;
2864
2865#if 0
2866 /* Make sure the CheckIrq condition matches the one in EM. */
2867 iemThreadedCompileCheckIrqAfter(pVCpu, pTb);
2868 const uint32_t cZeroCalls = 1;
2869#else
2870 const uint32_t cZeroCalls = 0;
2871#endif
2872
2873 /*
2874 * Now for the recomplication. (This mimicks IEMExecLots in many ways.)
2875 */
2876 iemThreadedCompileInitDecoder(pVCpu, false /*fReInit*/, fExtraFlags);
2877 iemThreadedCompileInitOpcodeFetching(pVCpu);
2878 VBOXSTRICTRC rcStrict;
2879 for (;;)
2880 {
2881 /* Process the next instruction. */
2882#ifdef LOG_ENABLED
2883 iemThreadedLogCurInstr(pVCpu, "CC", pTb->cInstructions);
2884 uint16_t const uCsLog = pVCpu->cpum.GstCtx.cs.Sel;
2885 uint64_t const uRipLog = pVCpu->cpum.GstCtx.rip;
2886 Assert(uCsLog != 0 || uRipLog > 0x400 || !IEM_IS_REAL_OR_V86_MODE(pVCpu)); /* Detect executing RM interrupt table. */
2887#endif
2888 uint8_t b; IEM_OPCODE_GET_FIRST_U8(&b);
2889 uint16_t const cCallsPrev = pTb->Thrd.cCalls;
2890
2891 rcStrict = FNIEMOP_CALL(g_apfnIemThreadedRecompilerOneByteMap[b]);
2892#if 0
2893 for (unsigned i = cCallsPrev; i < pTb->Thrd.cCalls; i++)
2894 Log8(("-> %#u/%u - %d %s\n", i, pTb->Thrd.paCalls[i].idxInstr, pTb->Thrd.paCalls[i].enmFunction,
2895 g_apszIemThreadedFunctions[pTb->Thrd.paCalls[i].enmFunction]));
2896#endif
2897 if ( rcStrict == VINF_SUCCESS
2898 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS
2899 && !pVCpu->iem.s.fEndTb)
2900 {
2901 Assert(pTb->Thrd.cCalls > cCallsPrev);
2902 Assert(cCallsPrev - pTb->Thrd.cCalls < 5);
2903
2904 pVCpu->iem.s.cInstructions++;
2905
2906 /* Check for mode change _after_ certain CIMPL calls, so check that
2907 we continue executing with the same mode value. */
2908 if (!(pVCpu->iem.s.fTbCurInstr & (IEM_CIMPL_F_MODE | IEM_CIMPL_F_XCPT | IEM_CIMPL_F_VMEXIT)))
2909 { /* probable */ }
2910 else if (RT_LIKELY(iemThreadedCompileEmitCheckMode(pVCpu, pTb)))
2911 { /* extremely likely */ }
2912 else
2913 break;
2914
2915#if defined(LOG_ENABLED) && 0 /* for debugging */
2916 //iemThreadedCompileEmitNop(pTb);
2917 iemThreadedCompileEmitLogCpuState(pTb);
2918#endif
2919 }
2920 else
2921 {
2922 Log8(("%04x:%08RX64: End TB - %u instr, %u calls, rc=%d\n",
2923 uCsLog, uRipLog, pTb->cInstructions, pTb->Thrd.cCalls, VBOXSTRICTRC_VAL(rcStrict)));
2924 if (rcStrict == VINF_IEM_RECOMPILE_END_TB)
2925 rcStrict = VINF_SUCCESS;
2926
2927 if (pTb->Thrd.cCalls > cZeroCalls)
2928 {
2929 if (cCallsPrev != pTb->Thrd.cCalls)
2930 pVCpu->iem.s.cInstructions++;
2931 break;
2932 }
2933
2934 pVCpu->iem.s.pCurTbR3 = NULL;
2935 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2936 }
2937
2938 /* Check for IRQs? */
2939 if (pVCpu->iem.s.cInstrTillIrqCheck > 0)
2940 pVCpu->iem.s.cInstrTillIrqCheck--;
2941 else if (!iemThreadedCompileCheckIrqAfter(pVCpu, pTb))
2942 break;
2943
2944 /* Still space in the TB? */
2945 if ( pTb->Thrd.cCalls + 5 < pTb->Thrd.cAllocated
2946 && pTb->cbOpcodes + 16 <= pVCpu->iem.s.cbOpcodesAllocated
2947 && pTb->cTbLookupEntries < 127)
2948 iemThreadedCompileInitDecoder(pVCpu, true /*fReInit*/, 0);
2949 else
2950 {
2951 Log8(("%04x:%08RX64: End TB - %u instr, %u calls, %u opcode bytes, %u TB lookup entries - full\n",
2952 uCsLog, uRipLog, pTb->cInstructions, pTb->Thrd.cCalls, pTb->cbOpcodes, pTb->cTbLookupEntries));
2953 break;
2954 }
2955 iemThreadedCompileReInitOpcodeFetching(pVCpu);
2956 }
2957
2958 /*
2959 * Reserve lookup space for the final call entry if necessary.
2960 */
2961 PIEMTHRDEDCALLENTRY pFinalCall = &pTb->Thrd.paCalls[pTb->Thrd.cCalls - 1];
2962 if (pTb->Thrd.cCalls > 1)
2963 {
2964 if (pFinalCall->uTbLookup == 0)
2965 {
2966 pFinalCall->uTbLookup = IEM_TB_LOOKUP_TAB_MAKE(pTb->cTbLookupEntries, 0);
2967 pTb->cTbLookupEntries += 1;
2968 }
2969 }
2970 else if (pFinalCall->uTbLookup != 0)
2971 {
2972 Assert(pTb->cTbLookupEntries > 1);
2973 pFinalCall->uTbLookup -= 1;
2974 pTb->cTbLookupEntries -= 1;
2975 }
2976
2977 /*
2978 * Duplicate the TB into a completed one and link it.
2979 */
2980 pTb = iemThreadedTbDuplicate(pVM, pVCpu, pTb);
2981 AssertReturn(pTb, VERR_IEM_TB_ALLOC_FAILED);
2982
2983 iemThreadedTbAdd(pVCpu, pVCpu->iem.s.pTbCacheR3, pTb);
2984
2985#ifdef IEM_COMPILE_ONLY_MODE
2986 /*
2987 * Execute the translation block.
2988 */
2989#endif
2990
2991 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2992}
2993
2994
2995
2996/*********************************************************************************************************************************
2997* Threaded Translation Block Saving and Restoring for Profiling the Native Recompiler *
2998*********************************************************************************************************************************/
2999#if defined(VBOX_WITH_IEM_NATIVE_RECOMPILER) && defined(VBOX_WITH_SAVE_THREADED_TBS_FOR_PROFILING)
3000# include <iprt/message.h>
3001
3002static const SSMFIELD g_aIemThreadedTbFields[] =
3003{
3004 SSMFIELD_ENTRY( IEMTB, cUsed),
3005 SSMFIELD_ENTRY( IEMTB, msLastUsed),
3006 SSMFIELD_ENTRY_GCPHYS(IEMTB, GCPhysPc),
3007 SSMFIELD_ENTRY( IEMTB, fFlags),
3008 SSMFIELD_ENTRY( IEMTB, x86.fAttr),
3009 SSMFIELD_ENTRY( IEMTB, cRanges),
3010 SSMFIELD_ENTRY( IEMTB, cInstructions),
3011 SSMFIELD_ENTRY( IEMTB, Thrd.cCalls),
3012 SSMFIELD_ENTRY( IEMTB, cTbLookupEntries),
3013 SSMFIELD_ENTRY( IEMTB, cbOpcodes),
3014 SSMFIELD_ENTRY( IEMTB, FlatPc),
3015 SSMFIELD_ENTRY_GCPHYS(IEMTB, aGCPhysPages[0]),
3016 SSMFIELD_ENTRY_GCPHYS(IEMTB, aGCPhysPages[1]),
3017 SSMFIELD_ENTRY_TERM()
3018};
3019
3020/**
3021 * Saves a threaded TB to a dedicated saved state file.
3022 */
3023static void iemThreadedSaveTbForProfiling(PVMCPU pVCpu, PCIEMTB pTb)
3024{
3025 /* Only VCPU #0 for now. */
3026 if (pVCpu->idCpu != 0)
3027 return;
3028
3029 /*
3030 * Get the SSM handle, lazily opening the output file.
3031 */
3032 PSSMHANDLE const pNil = (PSSMHANDLE)~(uintptr_t)0; Assert(!RT_VALID_PTR(pNil));
3033 PSSMHANDLE pSSM = pVCpu->iem.s.pSsmThreadedTbsForProfiling;
3034 if (pSSM && pSSM != pNil)
3035 { /* likely */ }
3036 else if (pSSM)
3037 return;
3038 else
3039 {
3040 pVCpu->iem.s.pSsmThreadedTbsForProfiling = pNil;
3041 int rc = SSMR3Open("ThreadedTBsForRecompilerProfiling.sav", NULL, NULL, SSM_OPEN_F_FOR_WRITING, &pSSM);
3042 AssertLogRelRCReturnVoid(rc);
3043
3044 rc = SSMR3WriteFileHeader(pSSM, 1);
3045 AssertLogRelRCReturnVoid(rc); /* leaks SSM handle, but whatever. */
3046
3047 rc = SSMR3WriteUnitBegin(pSSM, "threaded-tbs", 1, 0);
3048 AssertLogRelRCReturnVoid(rc); /* leaks SSM handle, but whatever. */
3049 pVCpu->iem.s.pSsmThreadedTbsForProfiling = pSSM;
3050 }
3051
3052 /*
3053 * Do the actual saving.
3054 */
3055 SSMR3PutU32(pSSM, 0); /* Indicates that another TB follows. */
3056
3057 /* The basic structure. */
3058 SSMR3PutStructEx(pSSM, pTb, sizeof(*pTb), 0 /*fFlags*/, g_aIemThreadedTbFields, NULL);
3059
3060 /* The ranges. */
3061 for (uint32_t iRange = 0; iRange < pTb->cRanges; iRange++)
3062 {
3063 SSMR3PutU16(pSSM, pTb->aRanges[iRange].offOpcodes);
3064 SSMR3PutU16(pSSM, pTb->aRanges[iRange].cbOpcodes);
3065 SSMR3PutU16(pSSM, pTb->aRanges[iRange].offPhysPage | (pTb->aRanges[iRange].idxPhysPage << 14));
3066 }
3067
3068 /* The opcodes. */
3069 SSMR3PutMem(pSSM, pTb->pabOpcodes, pTb->cbOpcodes);
3070
3071 /* The threaded call table. */
3072 int rc = SSMR3PutMem(pSSM, pTb->Thrd.paCalls, sizeof(*pTb->Thrd.paCalls) * pTb->Thrd.cCalls);
3073 AssertLogRelMsgStmt(RT_SUCCESS(rc), ("rc=%Rrc\n", rc), pVCpu->iem.s.pSsmThreadedTbsForProfiling = pNil);
3074}
3075
3076
3077/**
3078 * Called by IEMR3Term to finish any open profile files.
3079 *
3080 * @note This is not called on the EMT for @a pVCpu, but rather on the thread
3081 * driving the VM termination.
3082 */
3083DECLHIDDEN(void) iemThreadedSaveTbForProfilingCleanup(PVMCPU pVCpu)
3084{
3085 PSSMHANDLE const pSSM = pVCpu->iem.s.pSsmThreadedTbsForProfiling;
3086 pVCpu->iem.s.pSsmThreadedTbsForProfiling = NULL;
3087 if (RT_VALID_PTR(pSSM))
3088 {
3089 /* Indicate that this is the end. */
3090 SSMR3PutU32(pSSM, UINT32_MAX);
3091
3092 int rc = SSMR3WriteUnitComplete(pSSM);
3093 AssertLogRelRC(rc);
3094 rc = SSMR3WriteFileFooter(pSSM);
3095 AssertLogRelRC(rc);
3096 rc = SSMR3Close(pSSM);
3097 AssertLogRelRC(rc);
3098 }
3099}
3100
3101#endif /* VBOX_WITH_IEM_NATIVE_RECOMPILER && VBOX_WITH_SAVE_THREADED_TBS_FOR_PROFILING */
3102
3103#ifdef IN_RING3
3104/**
3105 * API use to process what iemThreadedSaveTbForProfiling() saved.
3106 *
3107 * @note Do not mix build types or revisions. Local changes between saving the
3108 * TBs and calling this API may cause unexpected trouble.
3109 */
3110VMMR3DECL(int) IEMR3ThreadedProfileRecompilingSavedTbs(PVM pVM, const char *pszFilename, uint32_t cMinTbs)
3111{
3112# if defined(VBOX_WITH_IEM_NATIVE_RECOMPILER) && defined(VBOX_WITH_SAVE_THREADED_TBS_FOR_PROFILING)
3113 PVMCPU const pVCpu = pVM->apCpusR3[0];
3114
3115 /* We need to keep an eye on the TB allocator. */
3116 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
3117
3118 /*
3119 * Load the TBs from the file.
3120 */
3121 PSSMHANDLE pSSM = NULL;
3122 int rc = SSMR3Open(pszFilename, NULL, NULL, 0, &pSSM);
3123 if (RT_SUCCESS(rc))
3124 {
3125 uint32_t cTbs = 0;
3126 PIEMTB pTbHead = NULL;
3127 PIEMTB *ppTbTail = &pTbHead;
3128 uint32_t uVersion;
3129 rc = SSMR3Seek(pSSM, "threaded-tbs", 0, &uVersion);
3130 if (RT_SUCCESS(rc))
3131 {
3132 for (;; cTbs++)
3133 {
3134 /* Check for the end tag. */
3135 uint32_t uTag = 0;
3136 rc = SSMR3GetU32(pSSM, &uTag);
3137 AssertRCBreak(rc);
3138 if (uTag == UINT32_MAX)
3139 break;
3140 AssertBreakStmt(uTag == 0, rc = VERR_SSM_DATA_UNIT_FORMAT_CHANGED);
3141
3142 /* Do we have room for another TB? */
3143 if (pTbAllocator->cInUseTbs + 2 >= pTbAllocator->cMaxTbs)
3144 {
3145 RTMsgInfo("Too many TBs to load, stopping loading early.\n");
3146 break;
3147 }
3148
3149 /* Allocate a new TB. */
3150 PIEMTB pTb = iemTbAllocatorAlloc(pVCpu, true /*fThreaded*/);
3151 AssertBreakStmt(uTag == 0, rc = VERR_OUT_OF_RESOURCES);
3152
3153 uint8_t const idxAllocChunk = pTb->idxAllocChunk;
3154 RT_ZERO(*pTb);
3155 pTb->idxAllocChunk = idxAllocChunk;
3156
3157 rc = SSMR3GetStructEx(pSSM, pTb, sizeof(*pTb), 0, g_aIemThreadedTbFields, NULL);
3158 if (RT_SUCCESS(rc))
3159 {
3160 AssertStmt(pTb->Thrd.cCalls > 0 && pTb->Thrd.cCalls <= _8K, rc = VERR_SSM_DATA_UNIT_FORMAT_CHANGED);
3161 AssertStmt(pTb->cbOpcodes > 0 && pTb->cbOpcodes <= _8K, rc = VERR_SSM_DATA_UNIT_FORMAT_CHANGED);
3162 AssertStmt(pTb->cRanges > 0 && pTb->cRanges <= RT_ELEMENTS(pTb->aRanges), rc = VERR_SSM_DATA_UNIT_FORMAT_CHANGED);
3163 AssertStmt(pTb->cTbLookupEntries > 0 && pTb->cTbLookupEntries <= 136, rc = VERR_SSM_DATA_UNIT_FORMAT_CHANGED);
3164
3165 if (RT_SUCCESS(rc))
3166 for (uint32_t iRange = 0; iRange < pTb->cRanges; iRange++)
3167 {
3168 SSMR3GetU16(pSSM, &pTb->aRanges[iRange].offOpcodes);
3169 SSMR3GetU16(pSSM, &pTb->aRanges[iRange].cbOpcodes);
3170 uint16_t uTmp = 0;
3171 rc = SSMR3GetU16(pSSM, &uTmp);
3172 AssertRCBreak(rc);
3173 pTb->aRanges[iRange].offPhysPage = uTmp & GUEST_PAGE_OFFSET_MASK;
3174 pTb->aRanges[iRange].idxPhysPage = uTmp >> 14;
3175
3176 AssertBreakStmt(pTb->aRanges[iRange].idxPhysPage <= RT_ELEMENTS(pTb->aGCPhysPages),
3177 rc = VERR_SSM_DATA_UNIT_FORMAT_CHANGED);
3178 AssertBreakStmt(pTb->aRanges[iRange].offOpcodes < pTb->cbOpcodes,
3179 rc = VERR_SSM_DATA_UNIT_FORMAT_CHANGED);
3180 AssertBreakStmt(pTb->aRanges[iRange].offOpcodes + pTb->aRanges[iRange].cbOpcodes <= pTb->cbOpcodes,
3181 rc = VERR_SSM_DATA_UNIT_FORMAT_CHANGED);
3182 }
3183
3184 if (RT_SUCCESS(rc))
3185 {
3186 pTb->Thrd.paCalls = (PIEMTHRDEDCALLENTRY)RTMemAllocZ(sizeof(IEMTHRDEDCALLENTRY) * pTb->Thrd.cCalls);
3187 if (pTb->Thrd.paCalls)
3188 {
3189 size_t const cbTbLookup = pTb->cTbLookupEntries * sizeof(PIEMTB);
3190 Assert(cbTbLookup > 0);
3191 size_t const cbOpcodes = pTb->cbOpcodes;
3192 Assert(cbOpcodes > 0);
3193 size_t const cbBoth = cbTbLookup + RT_ALIGN_Z(cbOpcodes, sizeof(PIEMTB));
3194 uint8_t * const pbBoth = (uint8_t *)RTMemAllocZ(cbBoth);
3195 if (pbBoth)
3196 {
3197 pTb->pabOpcodes = &pbBoth[cbTbLookup];
3198 SSMR3GetMem(pSSM, pTb->pabOpcodes, pTb->cbOpcodes);
3199 rc = SSMR3GetMem(pSSM, pTb->Thrd.paCalls, sizeof(IEMTHRDEDCALLENTRY) * pTb->Thrd.cCalls);
3200 if (RT_SUCCESS(rc))
3201 {
3202 *ppTbTail = pTb;
3203 ppTbTail = &pTb->pNext;
3204 continue;
3205 }
3206 }
3207 else
3208 rc = VERR_NO_MEMORY;
3209 RTMemFree(pTb->Thrd.paCalls);
3210 }
3211 else
3212 rc = VERR_NO_MEMORY;
3213 }
3214 }
3215 iemTbAllocatorFree(pVCpu, pTb);
3216 break;
3217 }
3218 if (RT_FAILURE(rc))
3219 RTMsgError("Load error: %Rrc (cTbs=%u)", rc, cTbs);
3220 }
3221 else
3222 RTMsgError("SSMR3Seek failed on '%s': %Rrc", pszFilename, rc);
3223 SSMR3Close(pSSM);
3224 if (RT_SUCCESS(rc))
3225 {
3226 /*
3227 * Recompile the TBs.
3228 */
3229 if (pTbHead)
3230 {
3231 RTMsgInfo("Loaded %u TBs\n", cTbs);
3232 if (cTbs < cMinTbs)
3233 {
3234 RTMsgInfo("Duplicating TBs to reach %u TB target\n", cMinTbs);
3235 for (PIEMTB pTb = pTbHead;
3236 cTbs < cMinTbs && pTbAllocator->cInUseTbs + 2 <= pTbAllocator->cMaxTbs;
3237 pTb = pTb->pNext)
3238 {
3239 PIEMTB pTbCopy = iemThreadedTbDuplicate(pVM, pVCpu, pTb);
3240 if (!pTbCopy)
3241 break;
3242 *ppTbTail = pTbCopy;
3243 ppTbTail = &pTbCopy->pNext;
3244 cTbs++;
3245 }
3246 }
3247
3248 PIEMTB pTbWarmup = iemThreadedTbDuplicate(pVM, pVCpu, pTbHead);
3249 if (pTbWarmup)
3250 {
3251 iemNativeRecompile(pVCpu, pTbWarmup);
3252 RTThreadSleep(512); /* to make the start visible in the profiler. */
3253 RTMsgInfo("Ready, set, go!\n");
3254
3255 if ((pTbWarmup->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE)
3256 {
3257 uint32_t cFailed = 0;
3258 uint64_t const nsStart = RTTimeNanoTS();
3259 for (PIEMTB pTb = pTbHead; pTb; pTb = pTb->pNext)
3260 {
3261 iemNativeRecompile(pVCpu, pTb);
3262 if ((pTb->fFlags & IEMTB_F_TYPE_MASK) != IEMTB_F_TYPE_NATIVE)
3263 cFailed++;
3264 }
3265 uint64_t const cNsElapsed = RTTimeNanoTS() - nsStart;
3266 RTMsgInfo("Recompiled %u TBs in %'RU64 ns - averaging %'RU64 ns/TB\n",
3267 cTbs, cNsElapsed, (cNsElapsed + cTbs - 1) / cTbs);
3268 if (cFailed)
3269 {
3270 RTMsgError("Unforuntately %u TB failed!", cFailed);
3271 rc = VERR_GENERAL_FAILURE;
3272 }
3273 RTThreadSleep(128); /* Another gap in the profiler timeline. */
3274 }
3275 else
3276 {
3277 RTMsgError("Failed to recompile the first TB!");
3278 rc = VERR_GENERAL_FAILURE;
3279 }
3280 }
3281 else
3282 rc = VERR_NO_MEMORY;
3283 }
3284 else
3285 {
3286 RTMsgError("'%s' contains no TBs!", pszFilename);
3287 rc = VERR_NO_DATA;
3288 }
3289 }
3290 }
3291 else
3292 RTMsgError("SSMR3Open failed on '%s': %Rrc", pszFilename, rc);
3293 return rc;
3294
3295# else
3296 RT_NOREF(pVM, pszFilename, cMinTbs);
3297 return VERR_NOT_IMPLEMENTED;
3298# endif
3299}
3300#endif /* IN_RING3 */
3301
3302
3303/*********************************************************************************************************************************
3304* Recompiled Execution Core *
3305*********************************************************************************************************************************/
3306
3307/** Default TB factor.
3308 * This is basically the number of nanoseconds we guess executing a TB takes
3309 * on average. We estimates it high if we can.
3310 * @note Best if this is a power of two so it can be translated to a shift. */
3311#define IEM_TIMER_POLL_DEFAULT_FACTOR UINT32_C(64)
3312/** The minimum number of nanoseconds we can allow between timer pollings.
3313 * This must take the cost of TMTimerPollBoolWithNanoTS into mind. We put that
3314 * cost at 104 ns now, thus this constant is at 256 ns. */
3315#define IEM_TIMER_POLL_MIN_NS UINT32_C(256)
3316/** The IEM_TIMER_POLL_MIN_NS value roughly translated to TBs, with some grains
3317 * of salt thrown in.
3318 * The idea is that we will be able to make progress with guest code execution
3319 * before polling timers and between running timers. */
3320#define IEM_TIMER_POLL_MIN_ITER UINT32_C(12)
3321/** The maximum number of nanoseconds we can allow between timer pollings.
3322 * This probably shouldn't be too high, as we don't have any timer
3323 * reprogramming feedback in the polling code. So, when a device reschedule a
3324 * timer for an earlier delivery, we won't know about it. */
3325#define IEM_TIMER_POLL_MAX_NS UINT32_C(8388608) /* 0x800000 ns = 8.4 ms */
3326/** The IEM_TIMER_POLL_MAX_NS value roughly translated to TBs, with some grains
3327 * of salt thrown in.
3328 * This helps control fluctuations in the NU benchmark. */
3329#define IEM_TIMER_POLL_MAX_ITER _512K
3330
3331#ifdef IEM_WITH_ADAPTIVE_TIMER_POLLING
3332/**
3333 * Calculates the number of TBs till the next timer polling using defaults.
3334 *
3335 * This is used when the previous run wasn't long enough to provide sufficient
3336 * data and when comming back from the HALT state and we haven't actually
3337 * executed anything for a while.
3338 */
3339DECL_FORCE_INLINE(uint32_t) iemPollTimersCalcDefaultCountdown(uint64_t cNsDelta) RT_NOEXCEPT
3340{
3341 if (cNsDelta >= IEM_TIMER_POLL_MAX_NS)
3342 return RT_MIN(IEM_TIMER_POLL_MAX_NS / IEM_TIMER_POLL_DEFAULT_FACTOR, IEM_TIMER_POLL_MAX_ITER);
3343
3344 cNsDelta = RT_BIT_64(ASMBitFirstSetU32(cNsDelta) - 1); /* round down to power of 2 */
3345 uint32_t const cRet = cNsDelta / IEM_TIMER_POLL_DEFAULT_FACTOR;
3346 if (cRet >= IEM_TIMER_POLL_MIN_ITER)
3347 {
3348 if (cRet <= IEM_TIMER_POLL_MAX_ITER)
3349 return cRet;
3350 return IEM_TIMER_POLL_MAX_ITER;
3351 }
3352 return IEM_TIMER_POLL_MIN_ITER;
3353}
3354#endif
3355
3356
3357/**
3358 * Helper for polling timers.
3359 */
3360DECLHIDDEN(int) iemPollTimers(PVMCC pVM, PVMCPUCC pVCpu) RT_NOEXCEPT
3361{
3362 STAM_PROFILE_START(&pVCpu->iem.s.StatTimerPoll, a);
3363
3364 /*
3365 * Check for VM_FF_TM_VIRTUAL_SYNC and call TMR3VirtualSyncFF if set.
3366 * This is something all EMTs can do.
3367 */
3368 /* If the virtual sync FF is set, respond to it. */
3369 bool fRanTimers = VM_FF_IS_SET(pVM, VM_FF_TM_VIRTUAL_SYNC);
3370 if (!fRanTimers)
3371 { /* likely */ }
3372 else
3373 {
3374 STAM_PROFILE_START(&pVCpu->iem.s.StatTimerPollRun, b);
3375 TMR3VirtualSyncFF(pVM, pVCpu);
3376 STAM_PROFILE_STOP(&pVCpu->iem.s.StatTimerPollRun, b);
3377 }
3378
3379 /*
3380 * Poll timers.
3381 *
3382 * On the 10980xe the polling averaging 314 ticks, with a min of 201, while
3383 * running a norton utilities DOS benchmark program. TSC runs at 3GHz,
3384 * translating that to 104 ns and 67 ns respectively. (An M2 booting win11
3385 * has an average of 2 ticks / 84 ns.)
3386 *
3387 * With the same setup the TMR3VirtualSyncFF and else branch here profiles
3388 * to 79751 ticks / 26583 ns on average, with a min of 1194 ticks / 398 ns.
3389 * (An M2 booting win11 has an average of 24 ticks / 1008 ns, with a min of
3390 * 8 ticks / 336 ns.)
3391 *
3392 * If we get a zero return value we run timers. Non-timer EMTs shouldn't
3393 * ever see a zero value here, so we just call TMR3TimerQueuesDo. However,
3394 * we do not re-run timers if we already called TMR3VirtualSyncFF above, we
3395 * try to make sure some code is executed first.
3396 */
3397 uint64_t nsNow = 0;
3398 uint64_t cNsDelta = TMTimerPollBoolWithNanoTS(pVM, pVCpu, &nsNow);
3399 if (cNsDelta >= 1) /* It is okay to run virtual sync timers a little early. */
3400 { /* likely */ }
3401 else if (!fRanTimers || VM_FF_IS_SET(pVM, VM_FF_TM_VIRTUAL_SYNC))
3402 {
3403 STAM_PROFILE_START(&pVCpu->iem.s.StatTimerPollRun, b);
3404 TMR3TimerQueuesDo(pVM);
3405 fRanTimers = true;
3406 nsNow = 0;
3407 cNsDelta = TMTimerPollBoolWithNanoTS(pVM, pVCpu, &nsNow);
3408 STAM_PROFILE_STOP(&pVCpu->iem.s.StatTimerPollRun, b);
3409 }
3410 else
3411 cNsDelta = 33;
3412
3413 /*
3414 * Calc interval and update the timestamps.
3415 */
3416 uint64_t const cNsSinceLast = nsNow - pVCpu->iem.s.nsRecompilerPollNow;
3417 pVCpu->iem.s.nsRecompilerPollNow = nsNow;
3418 pVCpu->iem.s.msRecompilerPollNow = (uint32_t)(nsNow / RT_NS_1MS);
3419
3420 /*
3421 * Set the next polling count down value.
3422 *
3423 * We take the previous value and adjust it according to the cNsSinceLast
3424 * value, if it's not within reason. This can't be too accurate since the
3425 * CheckIrq and intra-TB-checks aren't evenly spaced, they depends highly
3426 * on the guest code.
3427 */
3428#ifdef IEM_WITH_ADAPTIVE_TIMER_POLLING
3429 uint32_t cItersTillNextPoll = pVCpu->iem.s.cTbsTillNextTimerPollPrev;
3430 if (cNsDelta >= RT_NS_1SEC / 4)
3431 {
3432 /*
3433 * Non-timer EMTs should end up here with a fixed 500ms delta, just return
3434 * the max and keep the polling over head to the deadicated timer EMT.
3435 */
3436 AssertCompile(IEM_TIMER_POLL_MAX_ITER * IEM_TIMER_POLL_DEFAULT_FACTOR <= RT_NS_100MS);
3437 cItersTillNextPoll = IEM_TIMER_POLL_MAX_ITER;
3438 }
3439 else
3440 {
3441 /*
3442 * This is the timer EMT.
3443 */
3444 if (cNsDelta <= IEM_TIMER_POLL_MIN_NS)
3445 {
3446 STAM_COUNTER_INC(&pVCpu->iem.s.StatTimerPollTiny);
3447 cItersTillNextPoll = IEM_TIMER_POLL_MIN_ITER;
3448 }
3449 else
3450 {
3451 uint32_t const cNsDeltaAdj = cNsDelta >= IEM_TIMER_POLL_MAX_NS ? IEM_TIMER_POLL_MAX_NS : (uint32_t)cNsDelta;
3452 uint32_t const cNsDeltaSlack = cNsDelta >= IEM_TIMER_POLL_MAX_NS ? IEM_TIMER_POLL_MAX_NS / 2 : cNsDeltaAdj / 4;
3453 if ( cNsSinceLast < RT_MAX(IEM_TIMER_POLL_MIN_NS, 64)
3454 || cItersTillNextPoll < IEM_TIMER_POLL_MIN_ITER /* paranoia */)
3455 {
3456 STAM_COUNTER_INC(&pVCpu->iem.s.StatTimerPollDefaultCalc);
3457 cItersTillNextPoll = iemPollTimersCalcDefaultCountdown(cNsDeltaAdj);
3458 }
3459 else if ( cNsSinceLast >= cNsDeltaAdj + cNsDeltaSlack
3460 || cNsSinceLast <= cNsDeltaAdj - cNsDeltaSlack)
3461 {
3462 if (cNsSinceLast >= cItersTillNextPoll)
3463 {
3464 uint32_t uFactor = (uint32_t)(cNsSinceLast + cItersTillNextPoll - 1) / cItersTillNextPoll;
3465 cItersTillNextPoll = cNsDeltaAdj / uFactor;
3466 STAM_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTimerPollFactorDivision, uFactor);
3467 }
3468 else
3469 {
3470 uint32_t uFactor = cItersTillNextPoll / (uint32_t)cNsSinceLast;
3471 cItersTillNextPoll = cNsDeltaAdj * uFactor;
3472 STAM_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTimerPollFactorMultiplication, uFactor);
3473 }
3474
3475 if (cItersTillNextPoll >= IEM_TIMER_POLL_MIN_ITER)
3476 {
3477 if (cItersTillNextPoll <= IEM_TIMER_POLL_MAX_ITER)
3478 { /* likely */ }
3479 else
3480 {
3481 STAM_COUNTER_INC(&pVCpu->iem.s.StatTimerPollMax);
3482 cItersTillNextPoll = IEM_TIMER_POLL_MAX_ITER;
3483 }
3484 }
3485 else
3486 cItersTillNextPoll = IEM_TIMER_POLL_MIN_ITER;
3487 }
3488 else
3489 STAM_COUNTER_INC(&pVCpu->iem.s.StatTimerPollUnchanged);
3490 }
3491 pVCpu->iem.s.cTbsTillNextTimerPollPrev = cItersTillNextPoll;
3492 }
3493#else
3494/** Poll timers every 400 us / 2500 Hz. (source: thin air) */
3495# define IEM_TIMER_POLL_IDEAL_NS (400U * RT_NS_1US)
3496 uint32_t cItersTillNextPoll = pVCpu->iem.s.cTbsTillNextTimerPollPrev;
3497 uint32_t const cNsIdealPollInterval = IEM_TIMER_POLL_IDEAL_NS;
3498 int64_t const nsFromIdeal = cNsSinceLast - cNsIdealPollInterval;
3499 if (nsFromIdeal < 0)
3500 {
3501 if ((uint64_t)-nsFromIdeal > cNsIdealPollInterval / 8 && cItersTillNextPoll < _64K)
3502 {
3503 cItersTillNextPoll += cItersTillNextPoll / 8;
3504 pVCpu->iem.s.cTbsTillNextTimerPollPrev = cItersTillNextPoll;
3505 }
3506 }
3507 else
3508 {
3509 if ((uint64_t)nsFromIdeal > cNsIdealPollInterval / 8 && cItersTillNextPoll > 256)
3510 {
3511 cItersTillNextPoll -= cItersTillNextPoll / 8;
3512 pVCpu->iem.s.cTbsTillNextTimerPollPrev = cItersTillNextPoll;
3513 }
3514 }
3515#endif
3516 pVCpu->iem.s.cTbsTillNextTimerPoll = cItersTillNextPoll;
3517
3518 /*
3519 * Repeat the IRQ and FF checks.
3520 */
3521 if (cNsDelta > 0)
3522 {
3523 uint32_t fCpu = pVCpu->fLocalForcedActions;
3524 fCpu &= VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR3
3525 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
3526 | VMCPU_FF_TLB_FLUSH
3527 | VMCPU_FF_UNHALT );
3528 if (RT_LIKELY( ( !fCpu
3529 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
3530 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
3531 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx)) ) )
3532 && !VM_FF_IS_ANY_SET(pVCpu->CTX_SUFF(pVM), VM_FF_ALL_MASK) ))
3533 {
3534 STAM_PROFILE_STOP(&pVCpu->iem.s.StatTimerPoll, a);
3535 return VINF_SUCCESS;
3536 }
3537 }
3538 STAM_PROFILE_STOP(&pVCpu->iem.s.StatTimerPoll, a);
3539 return VINF_IEM_REEXEC_BREAK_FF;
3540}
3541
3542
3543/** Helper for iemTbExec. */
3544DECL_FORCE_INLINE(PIEMTB *) iemTbGetTbLookupEntryWithRip(PCIEMTB pTb, uint8_t uTbLookup, uint64_t uRip)
3545{
3546 uint8_t const idx = IEM_TB_LOOKUP_TAB_GET_IDX_WITH_PC(uTbLookup, uRip);
3547 Assert(idx < pTb->cTbLookupEntries);
3548 return IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, idx);
3549}
3550
3551
3552/**
3553 * Executes a translation block.
3554 *
3555 * @returns Strict VBox status code.
3556 * @param pVCpu The cross context virtual CPU structure of the calling
3557 * thread.
3558 * @param pTb The translation block to execute.
3559 */
3560static IEM_DECL_MSC_GUARD_IGNORE VBOXSTRICTRC iemTbExec(PVMCPUCC pVCpu, PIEMTB pTb) IEM_NOEXCEPT_MAY_LONGJMP
3561{
3562 Assert(!(pVCpu->iem.s.GCPhysInstrBuf & (RTGCPHYS)GUEST_PAGE_OFFSET_MASK));
3563
3564 /*
3565 * Set the current TB so CIMPL functions may get at it.
3566 */
3567 pVCpu->iem.s.pCurTbR3 = pTb;
3568 pVCpu->iem.s.ppTbLookupEntryR3 = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, 0);
3569
3570 /*
3571 * Execute the block.
3572 */
3573#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
3574 if (pTb->fFlags & IEMTB_F_TYPE_NATIVE)
3575 {
3576 pVCpu->iem.s.cTbExecNative++;
3577 IEMTLBTRACE_TB_EXEC_N8VE(pVCpu, pTb);
3578# ifdef LOG_ENABLED
3579 iemThreadedLogCurInstr(pVCpu, "EXn", 0);
3580# endif
3581
3582# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
3583 AssertCompileMemberOffset(VMCPUCC, iem.s.pvTbFramePointerR3, 0x7c8); /* This is assumed in iemNativeTbEntry */
3584# endif
3585# ifdef RT_ARCH_AMD64
3586 VBOXSTRICTRC const rcStrict = iemNativeTbEntry(pVCpu, (uintptr_t)pTb->Native.paInstructions);
3587# else
3588 VBOXSTRICTRC const rcStrict = iemNativeTbEntry(pVCpu, &pVCpu->cpum.GstCtx, (uintptr_t)pTb->Native.paInstructions);
3589# endif
3590
3591# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
3592 pVCpu->iem.s.pvTbFramePointerR3 = NULL;
3593# endif
3594# ifdef IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS
3595 /* Restore FPCR/MXCSR if the TB modified it. */
3596 if (pVCpu->iem.s.uRegFpCtrl != IEMNATIVE_SIMD_FP_CTRL_REG_NOT_MODIFIED)
3597 {
3598 iemNativeFpCtrlRegRestore(pVCpu->iem.s.uRegFpCtrl);
3599 /* Reset for the next round saving us an unconditional instruction on next TB entry. */
3600 pVCpu->iem.s.uRegFpCtrl = IEMNATIVE_SIMD_FP_CTRL_REG_NOT_MODIFIED;
3601 }
3602# endif
3603# ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
3604 Assert(pVCpu->iem.s.fSkippingEFlags == 0);
3605# endif
3606 if (RT_LIKELY( rcStrict == VINF_SUCCESS
3607 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS /** @todo this isn't great. */))
3608 { /* likely */ }
3609 else
3610 {
3611 /* pVCpu->iem.s.cInstructions is incremented by iemNativeHlpExecStatusCodeFiddling. */
3612 pVCpu->iem.s.pCurTbR3 = NULL;
3613
3614 /* VINF_IEM_REEXEC_BREAK should be treated as VINF_SUCCESS as it's
3615 only to break out of TB execution early. */
3616 if (rcStrict == VINF_IEM_REEXEC_BREAK)
3617 {
3618 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitReturnBreak);
3619 return iemExecStatusCodeFiddling(pVCpu, VINF_SUCCESS);
3620 }
3621
3622 /* VINF_IEM_REEXEC_BREAK_FF should be treated as VINF_SUCCESS as it's
3623 only to break out of TB execution early due to pending FFs. */
3624 if (rcStrict == VINF_IEM_REEXEC_BREAK_FF)
3625 {
3626 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitReturnBreakFF);
3627 return iemExecStatusCodeFiddling(pVCpu, VINF_SUCCESS);
3628 }
3629
3630 /* VINF_IEM_REEXEC_WITH_FLAGS needs to receive special treatment
3631 and converted to VINF_SUCCESS or whatever is appropriate. */
3632 if (rcStrict == VINF_IEM_REEXEC_FINISH_WITH_FLAGS)
3633 {
3634 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitReturnWithFlags);
3635 return iemExecStatusCodeFiddling(pVCpu, iemFinishInstructionWithFlagsSet(pVCpu, VINF_SUCCESS));
3636 }
3637
3638 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitReturnOtherStatus);
3639 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
3640 }
3641 }
3642 else
3643#endif /* VBOX_WITH_IEM_NATIVE_RECOMPILER */
3644 {
3645 /*
3646 * The threaded execution loop.
3647 */
3648 pVCpu->iem.s.cTbExecThreaded++;
3649 IEMTLBTRACE_TB_EXEC_THRD(pVCpu, pTb);
3650#ifdef LOG_ENABLED
3651 uint64_t uRipPrev = UINT64_MAX;
3652#endif
3653 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
3654 uint32_t cCallsLeft = pTb->Thrd.cCalls;
3655 while (cCallsLeft-- > 0)
3656 {
3657#ifdef LOG_ENABLED
3658 if (pVCpu->cpum.GstCtx.rip != uRipPrev)
3659 {
3660 uRipPrev = pVCpu->cpum.GstCtx.rip;
3661 iemThreadedLogCurInstr(pVCpu, "EXt", pTb->Thrd.cCalls - cCallsLeft - 1);
3662 }
3663 Log9(("%04x:%08RX64: #%d/%d - %d %s\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
3664 pTb->Thrd.cCalls - cCallsLeft - 1, pCallEntry->idxInstr, pCallEntry->enmFunction,
3665 g_apszIemThreadedFunctions[pCallEntry->enmFunction]));
3666#endif
3667#ifdef VBOX_WITH_STATISTICS
3668 AssertCompile(RT_ELEMENTS(pVCpu->iem.s.acThreadedFuncStats) >= kIemThreadedFunc_End);
3669 pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction] += 1;
3670#endif
3671 VBOXSTRICTRC const rcStrict = g_apfnIemThreadedFunctions[pCallEntry->enmFunction](pVCpu,
3672 pCallEntry->auParams[0],
3673 pCallEntry->auParams[1],
3674 pCallEntry->auParams[2]);
3675 if (RT_LIKELY( rcStrict == VINF_SUCCESS
3676 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS /** @todo this isn't great. */))
3677 pCallEntry++;
3678 else if (rcStrict == VINF_IEM_REEXEC_JUMP)
3679 {
3680 Assert(pVCpu->iem.s.rcPassUp == VINF_SUCCESS);
3681 Assert(cCallsLeft == 0);
3682 uint32_t const idxTarget = (uint32_t)pCallEntry->auParams[0];
3683 cCallsLeft = pTb->Thrd.cCalls;
3684 AssertBreak(idxTarget < cCallsLeft - 1);
3685 cCallsLeft -= idxTarget;
3686 pCallEntry = &pTb->Thrd.paCalls[idxTarget];
3687 AssertBreak(pCallEntry->fFlags & IEMTHREADEDCALLENTRY_F_JUMP_TARGET);
3688 }
3689 else
3690 {
3691 pVCpu->iem.s.cInstructions += pCallEntry->idxInstr; /* This may be one short, but better than zero. */
3692 pVCpu->iem.s.pCurTbR3 = NULL;
3693 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatTbThreadedExecBreaks);
3694 pVCpu->iem.s.ppTbLookupEntryR3 = iemTbGetTbLookupEntryWithRip(pTb, pCallEntry->uTbLookup, pVCpu->cpum.GstCtx.rip);
3695
3696 /* VINF_IEM_REEXEC_BREAK should be treated as VINF_SUCCESS as it's
3697 only to break out of TB execution early. */
3698 if (rcStrict == VINF_IEM_REEXEC_BREAK)
3699 {
3700#ifdef VBOX_WITH_STATISTICS
3701 if (pCallEntry->uTbLookup)
3702 STAM_COUNTER_INC(&pVCpu->iem.s.StatTbThreadedExecBreaksWithLookup);
3703 else
3704 STAM_COUNTER_INC(&pVCpu->iem.s.StatTbThreadedExecBreaksWithoutLookup);
3705#endif
3706 return iemExecStatusCodeFiddling(pVCpu, VINF_SUCCESS);
3707 }
3708 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
3709 }
3710 }
3711
3712 /* Update the lookup entry. */
3713 pVCpu->iem.s.ppTbLookupEntryR3 = iemTbGetTbLookupEntryWithRip(pTb, pCallEntry[-1].uTbLookup, pVCpu->cpum.GstCtx.rip);
3714 }
3715
3716 pVCpu->iem.s.cInstructions += pTb->cInstructions;
3717 pVCpu->iem.s.pCurTbR3 = NULL;
3718 return VINF_SUCCESS;
3719}
3720
3721
3722/**
3723 * This is called when the PC doesn't match the current pbInstrBuf.
3724 *
3725 * Upon return, we're ready for opcode fetching. But please note that
3726 * pbInstrBuf can be NULL iff the memory doesn't have readable backing (i.e.
3727 * MMIO or unassigned).
3728 */
3729static RTGCPHYS iemGetPcWithPhysAndCodeMissed(PVMCPUCC pVCpu)
3730{
3731 pVCpu->iem.s.pbInstrBuf = NULL;
3732 pVCpu->iem.s.offCurInstrStart = 0;
3733 pVCpu->iem.s.offInstrNextByte = 0;
3734 iemOpcodeFetchBytesJmp(pVCpu, 0, NULL);
3735 return pVCpu->iem.s.GCPhysInstrBuf + pVCpu->iem.s.offCurInstrStart;
3736}
3737
3738
3739/** @todo need private inline decl for throw/nothrow matching IEM_WITH_SETJMP? */
3740DECL_FORCE_INLINE_THROW(RTGCPHYS) iemGetPcWithPhysAndCode(PVMCPUCC pVCpu)
3741{
3742 /*
3743 * Set uCurTbStartPc to RIP and calc the effective PC.
3744 */
3745 uint64_t uPc = pVCpu->cpum.GstCtx.rip;
3746#if 0 /* unused */
3747 pVCpu->iem.s.uCurTbStartPc = uPc;
3748#endif
3749 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu));
3750 uPc += pVCpu->cpum.GstCtx.cs.u64Base;
3751
3752 /*
3753 * Advance within the current buffer (PAGE) when possible.
3754 */
3755 if (pVCpu->iem.s.pbInstrBuf)
3756 {
3757 uint64_t off = uPc - pVCpu->iem.s.uInstrBufPc;
3758 if (off < pVCpu->iem.s.cbInstrBufTotal)
3759 {
3760 pVCpu->iem.s.offInstrNextByte = (uint32_t)off;
3761 pVCpu->iem.s.offCurInstrStart = (uint16_t)off;
3762 if ((uint16_t)off + 15 <= pVCpu->iem.s.cbInstrBufTotal)
3763 pVCpu->iem.s.cbInstrBuf = (uint16_t)off + 15;
3764 else
3765 pVCpu->iem.s.cbInstrBuf = pVCpu->iem.s.cbInstrBufTotal;
3766
3767 return pVCpu->iem.s.GCPhysInstrBuf + off;
3768 }
3769 }
3770 return iemGetPcWithPhysAndCodeMissed(pVCpu);
3771}
3772
3773
3774/**
3775 * Determines the extra IEMTB_F_XXX flags.
3776 *
3777 * @returns A mix of IEMTB_F_X86_INHIBIT_SHADOW, IEMTB_F_X86_INHIBIT_NMI and
3778 * IEMTB_F_X86_CS_LIM_CHECKS (or zero).
3779 * @param pVCpu The cross context virtual CPU structure of the calling
3780 * thread.
3781 */
3782DECL_FORCE_INLINE(uint32_t) iemGetTbFlagsForCurrentPc(PVMCPUCC pVCpu)
3783{
3784 uint32_t fRet = 0;
3785
3786 /*
3787 * Determine the inhibit bits.
3788 */
3789 if (!(pVCpu->cpum.GstCtx.rflags.uBoth & (CPUMCTX_INHIBIT_SHADOW | CPUMCTX_INHIBIT_NMI)))
3790 { /* typical */ }
3791 else
3792 {
3793 if (CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx))
3794 fRet |= IEMTB_F_X86_INHIBIT_SHADOW;
3795 if (CPUMAreInterruptsInhibitedByNmiEx(&pVCpu->cpum.GstCtx))
3796 fRet |= IEMTB_F_X86_INHIBIT_NMI;
3797 }
3798
3799 /*
3800 * Return IEMTB_F_X86_CS_LIM_CHECKS if the current PC is invalid or if it is
3801 * likely to go invalid before the end of the translation block.
3802 */
3803 if (IEM_F_MODE_X86_IS_FLAT(pVCpu->iem.s.fExec))
3804 return fRet;
3805
3806 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
3807 if (offFromLim >= X86_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
3808 return fRet;
3809 return fRet | IEMTB_F_X86_CS_LIM_CHECKS;
3810}
3811
3812
3813VMM_INT_DECL(VBOXSTRICTRC) IEMExecRecompiler(PVMCC pVM, PVMCPUCC pVCpu, bool fWasHalted)
3814{
3815 /*
3816 * See if there is an interrupt pending in TRPM, inject it if we can.
3817 */
3818 if (!TRPMHasTrap(pVCpu))
3819 { /* likely */ }
3820 else
3821 {
3822 VBOXSTRICTRC rcStrict = iemExecInjectPendingTrap(pVCpu);
3823 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
3824 { /*likely */ }
3825 else
3826 return rcStrict;
3827 }
3828
3829 /*
3830 * Init the execution environment.
3831 */
3832#if 1 /** @todo this seems like a good idea, however if we ever share memory
3833 * directly with other threads on the host, it isn't necessarily... */
3834 if (pVM->cCpus == 1)
3835 iemInitExec(pVCpu, IEM_F_X86_DISREGARD_LOCK /*fExecOpts*/);
3836 else
3837#endif
3838 iemInitExec(pVCpu, 0 /*fExecOpts*/);
3839
3840 if (RT_LIKELY(!fWasHalted && pVCpu->iem.s.msRecompilerPollNow != 0))
3841 { }
3842 else
3843 {
3844 /* Do polling after halt and the first time we get here. */
3845#ifdef IEM_WITH_ADAPTIVE_TIMER_POLLING
3846 uint64_t nsNow = 0;
3847 uint32_t const cItersTillPoll = iemPollTimersCalcDefaultCountdown(TMTimerPollBoolWithNanoTS(pVM, pVCpu, &nsNow));
3848 pVCpu->iem.s.cTbsTillNextTimerPollPrev = cItersTillPoll;
3849 pVCpu->iem.s.cTbsTillNextTimerPoll = cItersTillPoll;
3850#else
3851 uint64_t const nsNow = TMVirtualGetNoCheck(pVM);
3852#endif
3853 pVCpu->iem.s.nsRecompilerPollNow = nsNow;
3854 pVCpu->iem.s.msRecompilerPollNow = (uint32_t)(nsNow / RT_NS_1MS);
3855 }
3856 pVCpu->iem.s.ppTbLookupEntryR3 = &pVCpu->iem.s.pTbLookupEntryDummyR3;
3857
3858 /*
3859 * Run-loop.
3860 *
3861 * If we're using setjmp/longjmp we combine all the catching here to avoid
3862 * having to call setjmp for each block we're executing.
3863 */
3864 PIEMTBCACHE const pTbCache = pVCpu->iem.s.pTbCacheR3;
3865 for (;;)
3866 {
3867 VBOXSTRICTRC rcStrict;
3868 IEM_TRY_SETJMP(pVCpu, rcStrict)
3869 {
3870 for (;;)
3871 {
3872 /* Translate PC to physical address, we'll need this for both lookup and compilation. */
3873 RTGCPHYS const GCPhysPc = iemGetPcWithPhysAndCode(pVCpu);
3874 if (RT_LIKELY(pVCpu->iem.s.pbInstrBuf != NULL))
3875 {
3876 uint32_t const fExtraFlags = iemGetTbFlagsForCurrentPc(pVCpu);
3877 PIEMTB const pTb = iemTbCacheLookup(pVCpu, pTbCache, GCPhysPc, fExtraFlags);
3878 if (pTb)
3879 rcStrict = iemTbExec(pVCpu, pTb);
3880 else
3881 rcStrict = iemThreadedCompile(pVM, pVCpu, GCPhysPc, fExtraFlags);
3882 }
3883 else
3884 {
3885 /* This can only happen if the current PC cannot be translated into a
3886 host pointer, which means we're in MMIO or unmapped memory... */
3887#if defined(VBOX_STRICT) && defined(IN_RING3)
3888 rcStrict = DBGFSTOP(pVM);
3889 if (rcStrict != VINF_SUCCESS && rcStrict != VERR_DBGF_NOT_ATTACHED)
3890 return rcStrict;
3891#endif
3892 rcStrict = IEMExecLots(pVCpu, 2048, 511, NULL);
3893 }
3894 if (rcStrict == VINF_SUCCESS)
3895 {
3896 Assert(pVCpu->iem.s.cActiveMappings == 0);
3897
3898 /* Note! This IRQ/FF check is repeated in iemPollTimers, iemThreadedFunc_BltIn_CheckIrq
3899 and emitted by iemNativeRecompFunc_BltIn_CheckIrq. */
3900 uint64_t fCpu = pVCpu->fLocalForcedActions;
3901 fCpu &= VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR3
3902 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
3903 | VMCPU_FF_TLB_FLUSH
3904 | VMCPU_FF_UNHALT );
3905 /** @todo this isn't even close to the NMI/IRQ conditions in EM. */
3906 if (RT_LIKELY( ( !fCpu
3907 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
3908 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
3909 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx) )) )
3910 && !VM_FF_IS_ANY_SET(pVM, VM_FF_ALL_MASK) ))
3911 {
3912 /* Once in a while we need to poll timers here. */
3913 if ((int32_t)--pVCpu->iem.s.cTbsTillNextTimerPoll > 0)
3914 { /* likely */ }
3915 else
3916 {
3917 int rc = iemPollTimers(pVM, pVCpu);
3918 if (rc != VINF_SUCCESS)
3919 return VINF_SUCCESS;
3920 }
3921 }
3922 else
3923 return VINF_SUCCESS;
3924 }
3925 else
3926 return rcStrict;
3927 }
3928 }
3929 IEM_CATCH_LONGJMP_BEGIN(pVCpu, rcStrict);
3930 {
3931 Assert(rcStrict != VINF_IEM_REEXEC_BREAK);
3932 pVCpu->iem.s.cLongJumps++;
3933#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
3934 pVCpu->iem.s.pvTbFramePointerR3 = NULL;
3935#endif
3936 if (pVCpu->iem.s.cActiveMappings > 0)
3937 iemMemRollback(pVCpu);
3938
3939#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
3940 PIEMTB const pTb = pVCpu->iem.s.pCurTbR3;
3941 if (pTb && (pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE)
3942 {
3943 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitLongJump);
3944# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3945 Assert(pVCpu->iem.s.idxTbCurInstr < pTb->cInstructions);
3946 pVCpu->iem.s.cInstructions += pVCpu->iem.s.idxTbCurInstr;
3947# endif
3948
3949#ifdef IEMNATIVE_WITH_SIMD_FP_NATIVE_EMITTERS
3950 /* Restore FPCR/MXCSR if the TB modified it. */
3951 if (pVCpu->iem.s.uRegFpCtrl != IEMNATIVE_SIMD_FP_CTRL_REG_NOT_MODIFIED)
3952 {
3953 iemNativeFpCtrlRegRestore(pVCpu->iem.s.uRegFpCtrl);
3954 /* Reset for the next round saving us an unconditional instruction on next TB entry. */
3955 pVCpu->iem.s.uRegFpCtrl = IEMNATIVE_SIMD_FP_CTRL_REG_NOT_MODIFIED;
3956 }
3957#endif
3958 }
3959#endif
3960
3961#if 0 /** @todo do we need to clean up anything? If not, we can drop the pTb = NULL some lines up and change the scope. */
3962 /* If pTb isn't NULL we're in iemTbExec. */
3963 if (!pTb)
3964 {
3965 /* If pCurTbR3 is NULL, we're in iemGetPcWithPhysAndCode.*/
3966 pTb = pVCpu->iem.s.pCurTbR3;
3967 if (pTb)
3968 {
3969 if (pTb == pVCpu->iem.s.pThrdCompileTbR3)
3970 return iemThreadedCompileLongJumped(pVM, pVCpu, rcStrict);
3971 Assert(pTb != pVCpu->iem.s.pNativeCompileTbR3);
3972 }
3973 }
3974#endif
3975 pVCpu->iem.s.pCurTbR3 = NULL;
3976 return rcStrict;
3977 }
3978 IEM_CATCH_LONGJMP_END(pVCpu);
3979 }
3980}
3981
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette