VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllThrdRecompiler.cpp@ 105462

Last change on this file since 105462 was 105191, checked in by vboxsync, 5 months ago

VMM/IEM: Redo TB allocation and get rid of the allocation bitmap entirely. Free translation blocks are now linked LIFO style in a single list, bugref:10653

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 128.5 KB
Line 
1/* $Id: IEMAllThrdRecompiler.cpp 105191 2024-07-08 14:50:02Z vboxsync $ */
2/** @file
3 * IEM - Instruction Decoding and Threaded Recompilation.
4 *
5 * Logging group IEM_RE_THREADED assignments:
6 * - Level 1 (Log) : Errors, exceptions, interrupts and such major events. [same as IEM]
7 * - Flow (LogFlow) : TB calls being emitted.
8 * - Level 2 (Log2) : Basic instruction execution state info. [same as IEM]
9 * - Level 3 (Log3) : More detailed execution state info. [same as IEM]
10 * - Level 4 (Log4) : Decoding mnemonics w/ EIP. [same as IEM]
11 * - Level 5 (Log5) : Decoding details. [same as IEM]
12 * - Level 6 (Log6) : TB opcode range management.
13 * - Level 7 (Log7) : TB obsoletion.
14 * - Level 8 (Log8) : TB compilation.
15 * - Level 9 (Log9) : TB exec.
16 * - Level 10 (Log10): TB block lookup.
17 * - Level 11 (Log11): TB block lookup details.
18 * - Level 12 (Log12): TB insertion.
19 */
20
21/*
22 * Copyright (C) 2011-2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#ifndef LOG_GROUP /* defined when included by tstIEMCheckMc.cpp */
48# define LOG_GROUP LOG_GROUP_IEM_RE_THREADED
49#endif
50#define IEM_WITH_CODE_TLB_AND_OPCODE_BUF /* A bit hackish, but its all in IEMInline.h. */
51#define VMCPU_INCL_CPUM_GST_CTX
52#include <VBox/vmm/iem.h>
53#include <VBox/vmm/cpum.h>
54#include <VBox/vmm/apic.h>
55#include <VBox/vmm/pdm.h>
56#include <VBox/vmm/pgm.h>
57#include <VBox/vmm/iom.h>
58#include <VBox/vmm/em.h>
59#include <VBox/vmm/hm.h>
60#include <VBox/vmm/nem.h>
61#include <VBox/vmm/gim.h>
62#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
63# include <VBox/vmm/em.h>
64# include <VBox/vmm/hm_svm.h>
65#endif
66#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
67# include <VBox/vmm/hmvmxinline.h>
68#endif
69#include <VBox/vmm/tm.h>
70#include <VBox/vmm/dbgf.h>
71#include <VBox/vmm/dbgftrace.h>
72#ifndef TST_IEM_CHECK_MC
73# include "IEMInternal.h"
74#endif
75#include <VBox/vmm/vmcc.h>
76#include <VBox/log.h>
77#include <VBox/err.h>
78#include <VBox/param.h>
79#include <VBox/dis.h>
80#include <VBox/disopcode-x86-amd64.h>
81#include <iprt/asm-math.h>
82#include <iprt/assert.h>
83#include <iprt/mem.h>
84#include <iprt/string.h>
85#include <iprt/sort.h>
86#include <iprt/x86.h>
87
88#ifndef TST_IEM_CHECK_MC
89# include "IEMInline.h"
90# include "IEMOpHlp.h"
91# include "IEMMc.h"
92#endif
93
94#include "IEMThreadedFunctions.h"
95#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
96# include "IEMN8veRecompiler.h"
97#endif
98
99
100/*
101 * Narrow down configs here to avoid wasting time on unused configs here.
102 */
103
104#ifndef IEM_WITH_CODE_TLB
105# error The code TLB must be enabled for the recompiler.
106#endif
107
108#ifndef IEM_WITH_DATA_TLB
109# error The data TLB must be enabled for the recompiler.
110#endif
111
112#ifndef IEM_WITH_SETJMP
113# error The setjmp approach must be enabled for the recompiler.
114#endif
115
116
117
118/**
119 * Calculates the effective address of a ModR/M memory operand, extended version
120 * for use in the recompilers.
121 *
122 * Meant to be used via IEM_MC_CALC_RM_EFF_ADDR.
123 *
124 * May longjmp on internal error.
125 *
126 * @return The effective address.
127 * @param pVCpu The cross context virtual CPU structure of the calling thread.
128 * @param bRm The ModRM byte.
129 * @param cbImmAndRspOffset - First byte: The size of any immediate
130 * following the effective address opcode bytes
131 * (only for RIP relative addressing).
132 * - Second byte: RSP displacement (for POP [ESP]).
133 * @param puInfo Extra info: 32-bit displacement (bits 31:0) and
134 * SIB byte (bits 39:32).
135 *
136 * @note This must be defined in a source file with matching
137 * IEM_WITH_CODE_TLB_AND_OPCODE_BUF define till the define is made default
138 * or implemented differently...
139 */
140RTGCPTR iemOpHlpCalcRmEffAddrJmpEx(PVMCPUCC pVCpu, uint8_t bRm, uint32_t cbImmAndRspOffset, uint64_t *puInfo) IEM_NOEXCEPT_MAY_LONGJMP
141{
142 Log5(("iemOpHlpCalcRmEffAddrJmp: bRm=%#x\n", bRm));
143# define SET_SS_DEF() \
144 do \
145 { \
146 if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SEG_MASK)) \
147 pVCpu->iem.s.iEffSeg = X86_SREG_SS; \
148 } while (0)
149
150 if (!IEM_IS_64BIT_CODE(pVCpu))
151 {
152/** @todo Check the effective address size crap! */
153 if (pVCpu->iem.s.enmEffAddrMode == IEMMODE_16BIT)
154 {
155 uint16_t u16EffAddr;
156
157 /* Handle the disp16 form with no registers first. */
158 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
159 {
160 IEM_OPCODE_GET_NEXT_U16(&u16EffAddr);
161 *puInfo = u16EffAddr;
162 }
163 else
164 {
165 /* Get the displacment. */
166 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
167 {
168 case 0: u16EffAddr = 0; break;
169 case 1: IEM_OPCODE_GET_NEXT_S8_SX_U16(&u16EffAddr); break;
170 case 2: IEM_OPCODE_GET_NEXT_U16(&u16EffAddr); break;
171 default: AssertFailedStmt(IEM_DO_LONGJMP(pVCpu, VERR_IEM_IPE_1)); /* (caller checked for these) */
172 }
173 *puInfo = u16EffAddr;
174
175 /* Add the base and index registers to the disp. */
176 switch (bRm & X86_MODRM_RM_MASK)
177 {
178 case 0: u16EffAddr += pVCpu->cpum.GstCtx.bx + pVCpu->cpum.GstCtx.si; break;
179 case 1: u16EffAddr += pVCpu->cpum.GstCtx.bx + pVCpu->cpum.GstCtx.di; break;
180 case 2: u16EffAddr += pVCpu->cpum.GstCtx.bp + pVCpu->cpum.GstCtx.si; SET_SS_DEF(); break;
181 case 3: u16EffAddr += pVCpu->cpum.GstCtx.bp + pVCpu->cpum.GstCtx.di; SET_SS_DEF(); break;
182 case 4: u16EffAddr += pVCpu->cpum.GstCtx.si; break;
183 case 5: u16EffAddr += pVCpu->cpum.GstCtx.di; break;
184 case 6: u16EffAddr += pVCpu->cpum.GstCtx.bp; SET_SS_DEF(); break;
185 case 7: u16EffAddr += pVCpu->cpum.GstCtx.bx; break;
186 }
187 }
188
189 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#06RX16 uInfo=%#RX64\n", u16EffAddr, *puInfo));
190 return u16EffAddr;
191 }
192
193 Assert(pVCpu->iem.s.enmEffAddrMode == IEMMODE_32BIT);
194 uint32_t u32EffAddr;
195 uint64_t uInfo;
196
197 /* Handle the disp32 form with no registers first. */
198 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
199 {
200 IEM_OPCODE_GET_NEXT_U32(&u32EffAddr);
201 uInfo = u32EffAddr;
202 }
203 else
204 {
205 /* Get the register (or SIB) value. */
206 uInfo = 0;
207 switch ((bRm & X86_MODRM_RM_MASK))
208 {
209 case 0: u32EffAddr = pVCpu->cpum.GstCtx.eax; break;
210 case 1: u32EffAddr = pVCpu->cpum.GstCtx.ecx; break;
211 case 2: u32EffAddr = pVCpu->cpum.GstCtx.edx; break;
212 case 3: u32EffAddr = pVCpu->cpum.GstCtx.ebx; break;
213 case 4: /* SIB */
214 {
215 uint8_t bSib; IEM_OPCODE_GET_NEXT_U8(&bSib);
216 uInfo = (uint64_t)bSib << 32;
217
218 /* Get the index and scale it. */
219 switch ((bSib >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
220 {
221 case 0: u32EffAddr = pVCpu->cpum.GstCtx.eax; break;
222 case 1: u32EffAddr = pVCpu->cpum.GstCtx.ecx; break;
223 case 2: u32EffAddr = pVCpu->cpum.GstCtx.edx; break;
224 case 3: u32EffAddr = pVCpu->cpum.GstCtx.ebx; break;
225 case 4: u32EffAddr = 0; /*none */ break;
226 case 5: u32EffAddr = pVCpu->cpum.GstCtx.ebp; break;
227 case 6: u32EffAddr = pVCpu->cpum.GstCtx.esi; break;
228 case 7: u32EffAddr = pVCpu->cpum.GstCtx.edi; break;
229 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
230 }
231 u32EffAddr <<= (bSib >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
232
233 /* add base */
234 switch (bSib & X86_SIB_BASE_MASK)
235 {
236 case 0: u32EffAddr += pVCpu->cpum.GstCtx.eax; break;
237 case 1: u32EffAddr += pVCpu->cpum.GstCtx.ecx; break;
238 case 2: u32EffAddr += pVCpu->cpum.GstCtx.edx; break;
239 case 3: u32EffAddr += pVCpu->cpum.GstCtx.ebx; break;
240 case 4: u32EffAddr += pVCpu->cpum.GstCtx.esp + (cbImmAndRspOffset >> 8); SET_SS_DEF(); break;
241 case 5:
242 if ((bRm & X86_MODRM_MOD_MASK) != 0)
243 {
244 u32EffAddr += pVCpu->cpum.GstCtx.ebp;
245 SET_SS_DEF();
246 }
247 else
248 {
249 uint32_t u32Disp;
250 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
251 u32EffAddr += u32Disp;
252 uInfo |= u32Disp;
253 }
254 break;
255 case 6: u32EffAddr += pVCpu->cpum.GstCtx.esi; break;
256 case 7: u32EffAddr += pVCpu->cpum.GstCtx.edi; break;
257 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
258 }
259 break;
260 }
261 case 5: u32EffAddr = pVCpu->cpum.GstCtx.ebp; SET_SS_DEF(); break;
262 case 6: u32EffAddr = pVCpu->cpum.GstCtx.esi; break;
263 case 7: u32EffAddr = pVCpu->cpum.GstCtx.edi; break;
264 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
265 }
266
267 /* Get and add the displacement. */
268 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
269 {
270 case 0:
271 break;
272 case 1:
273 {
274 int8_t i8Disp; IEM_OPCODE_GET_NEXT_S8(&i8Disp);
275 u32EffAddr += i8Disp;
276 uInfo |= (uint32_t)(int32_t)i8Disp;
277 break;
278 }
279 case 2:
280 {
281 uint32_t u32Disp; IEM_OPCODE_GET_NEXT_U32(&u32Disp);
282 u32EffAddr += u32Disp;
283 uInfo |= u32Disp;
284 break;
285 }
286 default:
287 AssertFailedStmt(IEM_DO_LONGJMP(pVCpu, VERR_IEM_IPE_2)); /* (caller checked for these) */
288 }
289 }
290
291 *puInfo = uInfo;
292 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RX32 uInfo=%#RX64\n", u32EffAddr, uInfo));
293 return u32EffAddr;
294 }
295
296 uint64_t u64EffAddr;
297 uint64_t uInfo;
298
299 /* Handle the rip+disp32 form with no registers first. */
300 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
301 {
302 IEM_OPCODE_GET_NEXT_S32_SX_U64(&u64EffAddr);
303 uInfo = (uint32_t)u64EffAddr;
304 u64EffAddr += pVCpu->cpum.GstCtx.rip + IEM_GET_INSTR_LEN(pVCpu) + (cbImmAndRspOffset & UINT32_C(0xff));
305 }
306 else
307 {
308 /* Get the register (or SIB) value. */
309 uInfo = 0;
310 switch ((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB)
311 {
312 case 0: u64EffAddr = pVCpu->cpum.GstCtx.rax; break;
313 case 1: u64EffAddr = pVCpu->cpum.GstCtx.rcx; break;
314 case 2: u64EffAddr = pVCpu->cpum.GstCtx.rdx; break;
315 case 3: u64EffAddr = pVCpu->cpum.GstCtx.rbx; break;
316 case 5: u64EffAddr = pVCpu->cpum.GstCtx.rbp; SET_SS_DEF(); break;
317 case 6: u64EffAddr = pVCpu->cpum.GstCtx.rsi; break;
318 case 7: u64EffAddr = pVCpu->cpum.GstCtx.rdi; break;
319 case 8: u64EffAddr = pVCpu->cpum.GstCtx.r8; break;
320 case 9: u64EffAddr = pVCpu->cpum.GstCtx.r9; break;
321 case 10: u64EffAddr = pVCpu->cpum.GstCtx.r10; break;
322 case 11: u64EffAddr = pVCpu->cpum.GstCtx.r11; break;
323 case 13: u64EffAddr = pVCpu->cpum.GstCtx.r13; break;
324 case 14: u64EffAddr = pVCpu->cpum.GstCtx.r14; break;
325 case 15: u64EffAddr = pVCpu->cpum.GstCtx.r15; break;
326 /* SIB */
327 case 4:
328 case 12:
329 {
330 uint8_t bSib; IEM_OPCODE_GET_NEXT_U8(&bSib);
331 uInfo = (uint64_t)bSib << 32;
332
333 /* Get the index and scale it. */
334 switch (((bSib >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK) | pVCpu->iem.s.uRexIndex)
335 {
336 case 0: u64EffAddr = pVCpu->cpum.GstCtx.rax; break;
337 case 1: u64EffAddr = pVCpu->cpum.GstCtx.rcx; break;
338 case 2: u64EffAddr = pVCpu->cpum.GstCtx.rdx; break;
339 case 3: u64EffAddr = pVCpu->cpum.GstCtx.rbx; break;
340 case 4: u64EffAddr = 0; /*none */ break;
341 case 5: u64EffAddr = pVCpu->cpum.GstCtx.rbp; break;
342 case 6: u64EffAddr = pVCpu->cpum.GstCtx.rsi; break;
343 case 7: u64EffAddr = pVCpu->cpum.GstCtx.rdi; break;
344 case 8: u64EffAddr = pVCpu->cpum.GstCtx.r8; break;
345 case 9: u64EffAddr = pVCpu->cpum.GstCtx.r9; break;
346 case 10: u64EffAddr = pVCpu->cpum.GstCtx.r10; break;
347 case 11: u64EffAddr = pVCpu->cpum.GstCtx.r11; break;
348 case 12: u64EffAddr = pVCpu->cpum.GstCtx.r12; break;
349 case 13: u64EffAddr = pVCpu->cpum.GstCtx.r13; break;
350 case 14: u64EffAddr = pVCpu->cpum.GstCtx.r14; break;
351 case 15: u64EffAddr = pVCpu->cpum.GstCtx.r15; break;
352 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
353 }
354 u64EffAddr <<= (bSib >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
355
356 /* add base */
357 switch ((bSib & X86_SIB_BASE_MASK) | pVCpu->iem.s.uRexB)
358 {
359 case 0: u64EffAddr += pVCpu->cpum.GstCtx.rax; break;
360 case 1: u64EffAddr += pVCpu->cpum.GstCtx.rcx; break;
361 case 2: u64EffAddr += pVCpu->cpum.GstCtx.rdx; break;
362 case 3: u64EffAddr += pVCpu->cpum.GstCtx.rbx; break;
363 case 4: u64EffAddr += pVCpu->cpum.GstCtx.rsp + (cbImmAndRspOffset >> 8); SET_SS_DEF(); break;
364 case 6: u64EffAddr += pVCpu->cpum.GstCtx.rsi; break;
365 case 7: u64EffAddr += pVCpu->cpum.GstCtx.rdi; break;
366 case 8: u64EffAddr += pVCpu->cpum.GstCtx.r8; break;
367 case 9: u64EffAddr += pVCpu->cpum.GstCtx.r9; break;
368 case 10: u64EffAddr += pVCpu->cpum.GstCtx.r10; break;
369 case 11: u64EffAddr += pVCpu->cpum.GstCtx.r11; break;
370 case 12: u64EffAddr += pVCpu->cpum.GstCtx.r12; break;
371 case 14: u64EffAddr += pVCpu->cpum.GstCtx.r14; break;
372 case 15: u64EffAddr += pVCpu->cpum.GstCtx.r15; break;
373 /* complicated encodings */
374 case 5:
375 case 13:
376 if ((bRm & X86_MODRM_MOD_MASK) != 0)
377 {
378 if (!pVCpu->iem.s.uRexB)
379 {
380 u64EffAddr += pVCpu->cpum.GstCtx.rbp;
381 SET_SS_DEF();
382 }
383 else
384 u64EffAddr += pVCpu->cpum.GstCtx.r13;
385 }
386 else
387 {
388 uint32_t u32Disp;
389 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
390 u64EffAddr += (int32_t)u32Disp;
391 uInfo |= u32Disp;
392 }
393 break;
394 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
395 }
396 break;
397 }
398 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
399 }
400
401 /* Get and add the displacement. */
402 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
403 {
404 case 0:
405 break;
406 case 1:
407 {
408 int8_t i8Disp;
409 IEM_OPCODE_GET_NEXT_S8(&i8Disp);
410 u64EffAddr += i8Disp;
411 uInfo |= (uint32_t)(int32_t)i8Disp;
412 break;
413 }
414 case 2:
415 {
416 uint32_t u32Disp;
417 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
418 u64EffAddr += (int32_t)u32Disp;
419 uInfo |= u32Disp;
420 break;
421 }
422 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX); /* (caller checked for these) */
423 }
424
425 }
426
427 *puInfo = uInfo;
428 if (pVCpu->iem.s.enmEffAddrMode == IEMMODE_64BIT)
429 {
430 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RGv uInfo=%#RX64\n", u64EffAddr, uInfo));
431 return u64EffAddr;
432 }
433 Assert(pVCpu->iem.s.enmEffAddrMode == IEMMODE_32BIT);
434 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RGv uInfo=%#RX64\n", u64EffAddr & UINT32_MAX, uInfo));
435 return u64EffAddr & UINT32_MAX;
436}
437
438
439/*********************************************************************************************************************************
440* Translation Block Cache. *
441*********************************************************************************************************************************/
442
443/** @callback_method_impl{FNRTSORTCMP, Compare two TBs for pruning sorting purposes.} */
444static DECLCALLBACK(int) iemTbCachePruneCmpTb(void const *pvElement1, void const *pvElement2, void *pvUser)
445{
446 PCIEMTB const pTb1 = (PCIEMTB)pvElement1;
447 PCIEMTB const pTb2 = (PCIEMTB)pvElement2;
448 uint32_t const cMsSinceUse1 = (uint32_t)(uintptr_t)pvUser - pTb1->msLastUsed;
449 uint32_t const cMsSinceUse2 = (uint32_t)(uintptr_t)pvUser - pTb2->msLastUsed;
450 if (cMsSinceUse1 != cMsSinceUse2)
451 return cMsSinceUse1 < cMsSinceUse2 ? -1 : 1;
452 if (pTb1->cUsed != pTb2->cUsed)
453 return pTb1->cUsed > pTb2->cUsed ? -1 : 1;
454 if ((pTb1->fFlags & IEMTB_F_TYPE_MASK) != (pTb2->fFlags & IEMTB_F_TYPE_MASK))
455 return (pTb1->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE ? -1 : 1;
456 return 0;
457}
458
459#ifdef VBOX_STRICT
460/**
461 * Assertion helper that checks a collisions list count.
462 */
463static void iemTbCacheAssertCorrectCount(PIEMTBCACHE pTbCache, uint32_t idxHash, const char *pszOperation)
464{
465 PIEMTB pTb = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
466 int cLeft = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]);
467 while (pTb)
468 {
469 pTb = pTb->pNext;
470 cLeft--;
471 }
472 AssertMsg(cLeft == 0,
473 ("idxHash=%#x cLeft=%d; entry count=%d; %s\n",
474 idxHash, cLeft, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]), pszOperation));
475}
476#endif
477
478
479DECL_NO_INLINE(static, void) iemTbCacheAddWithPruning(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb, uint32_t idxHash)
480{
481 STAM_PROFILE_START(&pTbCache->StatPrune, a);
482
483 /*
484 * First convert the collision list to an array.
485 */
486 PIEMTB apSortedTbs[IEMTBCACHE_PTR_MAX_COUNT];
487 uintptr_t cInserted = 0;
488 PIEMTB pTbCollision = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
489
490 pTbCache->apHash[idxHash] = NULL; /* Must NULL the entry before trying to free anything. */
491
492 while (pTbCollision && cInserted < RT_ELEMENTS(apSortedTbs))
493 {
494 apSortedTbs[cInserted++] = pTbCollision;
495 pTbCollision = pTbCollision->pNext;
496 }
497
498 /* Free any excess (impossible). */
499 if (RT_LIKELY(!pTbCollision))
500 Assert(cInserted == RT_ELEMENTS(apSortedTbs));
501 else
502 do
503 {
504 PIEMTB pTbToFree = pTbCollision;
505 pTbCollision = pTbToFree->pNext;
506 iemTbAllocatorFree(pVCpu, pTbToFree);
507 } while (pTbCollision);
508
509 /*
510 * Sort it by most recently used and usage count.
511 */
512 RTSortApvShell((void **)apSortedTbs, cInserted, iemTbCachePruneCmpTb, (void *)(uintptr_t)pVCpu->iem.s.msRecompilerPollNow);
513
514 /* We keep half the list for now. Perhaps a bit aggressive... */
515 uintptr_t const cKeep = cInserted / 2;
516
517 /* First free up the TBs we don't wish to keep (before creating the new
518 list because otherwise the free code will scan the list for each one
519 without ever finding it). */
520 for (uintptr_t idx = cKeep; idx < cInserted; idx++)
521 iemTbAllocatorFree(pVCpu, apSortedTbs[idx]);
522
523 /* Then chain the new TB together with the ones we like to keep of the
524 existing ones and insert this list into the hash table. */
525 pTbCollision = pTb;
526 for (uintptr_t idx = 0; idx < cKeep; idx++)
527 pTbCollision = pTbCollision->pNext = apSortedTbs[idx];
528 pTbCollision->pNext = NULL;
529
530 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, cKeep + 1);
531#ifdef VBOX_STRICT
532 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "add w/ pruning");
533#endif
534
535 STAM_PROFILE_STOP(&pTbCache->StatPrune, a);
536}
537
538
539static void iemTbCacheAdd(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb)
540{
541 uint32_t const idxHash = IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc);
542 PIEMTB const pTbOldHead = pTbCache->apHash[idxHash];
543 if (!pTbOldHead)
544 {
545 pTb->pNext = NULL;
546 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, 1); /** @todo could make 1 implicit... */
547 }
548 else
549 {
550 STAM_REL_COUNTER_INC(&pTbCache->cCollisions);
551 uintptr_t cCollisions = IEMTBCACHE_PTR_GET_COUNT(pTbOldHead);
552 if (cCollisions < IEMTBCACHE_PTR_MAX_COUNT)
553 {
554 pTb->pNext = IEMTBCACHE_PTR_GET_TB(pTbOldHead);
555 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, cCollisions + 1);
556#ifdef VBOX_STRICT
557 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "add");
558#endif
559 }
560 else
561 iemTbCacheAddWithPruning(pVCpu, pTbCache, pTb, idxHash);
562 }
563}
564
565
566/**
567 * Unlinks @a pTb from the hash table if found in it.
568 *
569 * @returns true if unlinked, false if not present.
570 * @param pTbCache The hash table.
571 * @param pTb The TB to remove.
572 */
573static bool iemTbCacheRemove(PIEMTBCACHE pTbCache, PIEMTB pTb)
574{
575 uint32_t const idxHash = IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc);
576 PIEMTB pTbHash = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
577 uint32_t volatile cLength = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]); RT_NOREF(cLength);
578
579 /*
580 * At the head of the collision list?
581 */
582 if (pTbHash == pTb)
583 {
584 if (!pTb->pNext)
585 pTbCache->apHash[idxHash] = NULL;
586 else
587 {
588 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb->pNext,
589 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - 1);
590#ifdef VBOX_STRICT
591 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "remove #1");
592#endif
593 }
594 return true;
595 }
596
597 /*
598 * Search the collision list.
599 */
600 PIEMTB const pTbHead = pTbHash;
601 while (pTbHash)
602 {
603 PIEMTB const pNextTb = pTbHash->pNext;
604 if (pNextTb == pTb)
605 {
606 pTbHash->pNext = pTb->pNext;
607 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTbHead, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - 1);
608#ifdef VBOX_STRICT
609 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "remove #2");
610#endif
611 return true;
612 }
613 pTbHash = pNextTb;
614 }
615 return false;
616}
617
618
619/**
620 * Looks up a TB for the given PC and flags in the cache.
621 *
622 * @returns Pointer to TB on success, NULL if not found.
623 * @param pVCpu The cross context virtual CPU structure of the
624 * calling thread.
625 * @param pTbCache The translation block cache.
626 * @param GCPhysPc The PC to look up a TB for.
627 * @param fExtraFlags The extra flags to join with IEMCPU::fExec for
628 * the lookup.
629 * @thread EMT(pVCpu)
630 */
631static PIEMTB iemTbCacheLookup(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache,
632 RTGCPHYS GCPhysPc, uint32_t fExtraFlags) IEM_NOEXCEPT_MAY_LONGJMP /** @todo r=bird: no longjumping here, right? iemNativeRecompile is noexcept. */
633{
634 uint32_t const fFlags = ((pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags) & IEMTB_F_KEY_MASK;
635
636 /*
637 * First consult the lookup table entry.
638 */
639 PIEMTB * const ppTbLookup = pVCpu->iem.s.ppTbLookupEntryR3;
640 PIEMTB pTb = *ppTbLookup;
641 if (pTb)
642 {
643 if (pTb->GCPhysPc == GCPhysPc)
644 {
645 if ( (pTb->fFlags & (IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)) == (fFlags | IEMTB_F_TYPE_NATIVE)
646 || (pTb->fFlags & (IEMTB_F_KEY_MASK | IEMTB_F_TYPE_MASK)) == (fFlags | IEMTB_F_TYPE_THREADED) )
647 {
648 if (pTb->x86.fAttr == (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u)
649 {
650 STAM_COUNTER_INC(&pTbCache->cLookupHitsViaTbLookupTable);
651 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
652 pTb->cUsed++;
653#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
654 if ((pTb->fFlags & IEMTB_F_TYPE_NATIVE) || pTb->cUsed != pVCpu->iem.s.uTbNativeRecompileAtUsedCount)
655 {
656 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp: %p (@ %p)\n", fFlags, GCPhysPc, pTb, ppTbLookup));
657 return pTb;
658 }
659 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp: %p (@ %p) - recompiling\n", fFlags, GCPhysPc, pTb, ppTbLookup));
660 return iemNativeRecompile(pVCpu, pTb);
661#else
662 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp: %p (@ %p)\n", fFlags, GCPhysPc, pTb, ppTbLookup));
663 return pTb;
664#endif
665 }
666 }
667 }
668 }
669
670 /*
671 * Then consult the hash table.
672 */
673 uint32_t const idxHash = IEMTBCACHE_HASH_NO_KEY_MASK(pTbCache, fFlags, GCPhysPc);
674#if defined(VBOX_STRICT) || defined(LOG_ENABLED)
675 int cLeft = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]);
676#endif
677 pTb = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
678 while (pTb)
679 {
680 if (pTb->GCPhysPc == GCPhysPc)
681 {
682 if ((pTb->fFlags & IEMTB_F_KEY_MASK) == fFlags)
683 {
684 if (pTb->x86.fAttr == (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u)
685 {
686 STAM_COUNTER_INC(&pTbCache->cLookupHits);
687 AssertMsg(cLeft > 0, ("%d\n", cLeft));
688
689 *ppTbLookup = pTb;
690 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
691 pTb->cUsed++;
692#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
693 if ((pTb->fFlags & IEMTB_F_TYPE_NATIVE) || pTb->cUsed != pVCpu->iem.s.uTbNativeRecompileAtUsedCount)
694 {
695 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: %p (@ %d / %d)\n",
696 fFlags, GCPhysPc, idxHash, pTb, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - cLeft,
697 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
698 return pTb;
699 }
700 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: %p (@ %d / %d) - recompiling\n",
701 fFlags, GCPhysPc, idxHash, pTb, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - cLeft,
702 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
703 return iemNativeRecompile(pVCpu, pTb);
704#else
705 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: %p (@ %d / %d)\n",
706 fFlags, GCPhysPc, idxHash, pTb, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - cLeft,
707 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
708 return pTb;
709#endif
710 }
711 Log11(("TB miss: CS: %#x, wanted %#x\n", pTb->x86.fAttr, (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u));
712 }
713 else
714 Log11(("TB miss: fFlags: %#x, wanted %#x\n", pTb->fFlags, fFlags));
715 }
716 else
717 Log11(("TB miss: GCPhysPc: %#x, wanted %#x\n", pTb->GCPhysPc, GCPhysPc));
718
719 pTb = pTb->pNext;
720#ifdef VBOX_STRICT
721 cLeft--;
722#endif
723 }
724 AssertMsg(cLeft == 0, ("%d\n", cLeft));
725 STAM_REL_COUNTER_INC(&pTbCache->cLookupMisses);
726 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: NULL - (%p L %d)\n", fFlags, GCPhysPc, idxHash,
727 IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]), IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
728 return pTb;
729}
730
731
732/*********************************************************************************************************************************
733* Translation Block Allocator.
734*********************************************************************************************************************************/
735/*
736 * Translation block allocationmanagement.
737 */
738
739#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
740# define IEMTBALLOC_IDX_TO_CHUNK(a_pTbAllocator, a_idxTb) \
741 ((a_idxTb) >> (a_pTbAllocator)->cChunkShift)
742# define IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(a_pTbAllocator, a_idxTb, a_idxChunk) \
743 ((a_idxTb) & (a_pTbAllocator)->fChunkMask)
744# define IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) \
745 ((uint32_t)(a_idxChunk) << (a_pTbAllocator)->cChunkShift)
746#else
747# define IEMTBALLOC_IDX_TO_CHUNK(a_pTbAllocator, a_idxTb) \
748 ((a_idxTb) / (a_pTbAllocator)->cTbsPerChunk)
749# define IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(a_pTbAllocator, a_idxTb, a_idxChunk) \
750 ((a_idxTb) - (a_idxChunk) * (a_pTbAllocator)->cTbsPerChunk)
751# define IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) \
752 ((uint32_t)(a_idxChunk) * (a_pTbAllocator)->cTbsPerChunk)
753#endif
754/** Makes a TB index from a chunk index and TB index within that chunk. */
755#define IEMTBALLOC_IDX_MAKE(a_pTbAllocator, a_idxChunk, a_idxInChunk) \
756 (IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) + (a_idxInChunk))
757
758
759/**
760 * Initializes the TB allocator and cache for an EMT.
761 *
762 * @returns VBox status code.
763 * @param pVM The VM handle.
764 * @param cInitialTbs The initial number of translation blocks to
765 * preallocator.
766 * @param cMaxTbs The max number of translation blocks allowed.
767 * @param cbInitialExec The initial size of the executable memory allocator.
768 * @param cbMaxExec The max size of the executable memory allocator.
769 * @param cbChunkExec The chunk size for executable memory allocator. Zero
770 * or UINT32_MAX for automatically determining this.
771 * @thread EMT
772 */
773DECLCALLBACK(int) iemTbInit(PVMCC pVM, uint32_t cInitialTbs, uint32_t cMaxTbs,
774 uint64_t cbInitialExec, uint64_t cbMaxExec, uint32_t cbChunkExec)
775{
776 PVMCPUCC pVCpu = VMMGetCpu(pVM);
777 Assert(!pVCpu->iem.s.pTbCacheR3);
778 Assert(!pVCpu->iem.s.pTbAllocatorR3);
779
780 /*
781 * Calculate the chunk size of the TB allocator.
782 * The minimum chunk size is 2MiB.
783 */
784 AssertCompile(!(sizeof(IEMTB) & IEMTBCACHE_PTR_COUNT_MASK));
785 uint32_t cbPerChunk = _2M;
786 uint32_t cTbsPerChunk = _2M / sizeof(IEMTB);
787#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
788 uint8_t const cTbShift = ASMBitFirstSetU32((uint32_t)sizeof(IEMTB)) - 1;
789 uint8_t cChunkShift = 21 - cTbShift;
790 AssertCompile(RT_BIT_32(21) == _2M); Assert(RT_BIT_32(cChunkShift) == cTbsPerChunk);
791#endif
792 for (;;)
793 {
794 if (cMaxTbs <= cTbsPerChunk * (uint64_t)RT_ELEMENTS(pVCpu->iem.s.pTbAllocatorR3->aChunks))
795 break;
796 cbPerChunk *= 2;
797 cTbsPerChunk = cbPerChunk / sizeof(IEMTB);
798#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
799 cChunkShift += 1;
800#endif
801 }
802
803 uint32_t cMaxChunks = (cMaxTbs + cTbsPerChunk - 1) / cTbsPerChunk;
804 Assert(cMaxChunks * cTbsPerChunk >= cMaxTbs);
805 Assert(cMaxChunks <= RT_ELEMENTS(pVCpu->iem.s.pTbAllocatorR3->aChunks));
806
807 cMaxTbs = cMaxChunks * cTbsPerChunk;
808
809 /*
810 * Allocate and initalize it.
811 */
812 PIEMTBALLOCATOR const pTbAllocator = (PIEMTBALLOCATOR)RTMemAllocZ(sizeof(*pTbAllocator));
813 if (!pTbAllocator)
814 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
815 "Failed to allocate %zu bytes (max %u TBs) for the TB allocator of VCpu #%u",
816 sizeof(*pTbAllocator), cMaxTbs, pVCpu->idCpu);
817 pTbAllocator->uMagic = IEMTBALLOCATOR_MAGIC;
818 pTbAllocator->cMaxChunks = (uint8_t)cMaxChunks;
819 pTbAllocator->cTbsPerChunk = cTbsPerChunk;
820 pTbAllocator->cbPerChunk = cbPerChunk;
821 pTbAllocator->cMaxTbs = cMaxTbs;
822 pTbAllocator->pTbsFreeHead = NULL;
823#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
824 pTbAllocator->fChunkMask = cTbsPerChunk - 1;
825 pTbAllocator->cChunkShift = cChunkShift;
826 Assert(RT_BIT_32(cChunkShift) == cTbsPerChunk);
827#endif
828
829 pVCpu->iem.s.pTbAllocatorR3 = pTbAllocator;
830
831 /*
832 * Allocate the initial chunks.
833 */
834 for (uint32_t idxChunk = 0; ; idxChunk++)
835 {
836 PIEMTB const paTbs = pTbAllocator->aChunks[idxChunk].paTbs = (PIEMTB)RTMemPageAllocZ(cbPerChunk);
837 if (!paTbs)
838 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
839 "Failed to initial %zu bytes for the #%u chunk of TBs for VCpu #%u",
840 cbPerChunk, idxChunk, pVCpu->idCpu);
841
842 for (uint32_t iTb = 0; iTb < cTbsPerChunk; iTb++)
843 {
844 paTbs[iTb].idxAllocChunk = idxChunk; /* This is not strictly necessary... */
845 paTbs[iTb].pNext = pTbAllocator->pTbsFreeHead;
846 pTbAllocator->pTbsFreeHead = &paTbs[iTb];
847 }
848 pTbAllocator->cAllocatedChunks = (uint16_t)(idxChunk + 1);
849 pTbAllocator->cTotalTbs += cTbsPerChunk;
850
851 if ((idxChunk + 1) * cTbsPerChunk >= cInitialTbs)
852 break;
853 }
854
855 /*
856 * Calculate the size of the hash table. We double the max TB count and
857 * round it up to the nearest power of two.
858 */
859 uint32_t cCacheEntries = cMaxTbs * 2;
860 if (!RT_IS_POWER_OF_TWO(cCacheEntries))
861 {
862 uint8_t const iBitTop = ASMBitFirstSetU32(cCacheEntries);
863 cCacheEntries = RT_BIT_32(iBitTop);
864 Assert(cCacheEntries >= cMaxTbs * 2);
865 }
866
867 size_t const cbTbCache = RT_UOFFSETOF_DYN(IEMTBCACHE, apHash[cCacheEntries]);
868 PIEMTBCACHE const pTbCache = (PIEMTBCACHE)RTMemAllocZ(cbTbCache);
869 if (!pTbCache)
870 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
871 "Failed to allocate %zu bytes (%u entries) for the TB cache of VCpu #%u",
872 cbTbCache, cCacheEntries, pVCpu->idCpu);
873
874 /*
875 * Initialize it (assumes zeroed by the allocator).
876 */
877 pTbCache->uMagic = IEMTBCACHE_MAGIC;
878 pTbCache->cHash = cCacheEntries;
879 pTbCache->uHashMask = cCacheEntries - 1;
880 Assert(pTbCache->cHash > pTbCache->uHashMask);
881 pVCpu->iem.s.pTbCacheR3 = pTbCache;
882
883 /*
884 * Initialize the native executable memory allocator.
885 */
886#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
887 int rc = iemExecMemAllocatorInit(pVCpu, cbMaxExec, cbInitialExec, cbChunkExec);
888 AssertLogRelRCReturn(rc, rc);
889#else
890 RT_NOREF(cbMaxExec, cbInitialExec, cbChunkExec);
891#endif
892
893 return VINF_SUCCESS;
894}
895
896
897/**
898 * Inner free worker.
899 */
900static void iemTbAllocatorFreeInner(PVMCPUCC pVCpu, PIEMTBALLOCATOR pTbAllocator,
901 PIEMTB pTb, uint32_t idxChunk, uint32_t idxInChunk)
902{
903 Assert(idxChunk < pTbAllocator->cAllocatedChunks); RT_NOREF(idxChunk);
904 Assert(idxInChunk < pTbAllocator->cTbsPerChunk); RT_NOREF(idxInChunk);
905 Assert((uintptr_t)(pTb - pTbAllocator->aChunks[idxChunk].paTbs) == idxInChunk);
906#ifdef VBOX_STRICT
907 for (PIEMTB pTbOther = pTbAllocator->pDelayedFreeHead; pTbOther; pTbOther = pTbOther->pNext)
908 Assert(pTbOther != pTb);
909#endif
910
911 /*
912 * Unlink the TB from the hash table.
913 */
914 iemTbCacheRemove(pVCpu->iem.s.pTbCacheR3, pTb);
915
916 /*
917 * Free the TB itself.
918 */
919 switch (pTb->fFlags & IEMTB_F_TYPE_MASK)
920 {
921 case IEMTB_F_TYPE_THREADED:
922 pTbAllocator->cThreadedTbs -= 1;
923 RTMemFree(pTb->Thrd.paCalls);
924 break;
925#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
926 case IEMTB_F_TYPE_NATIVE:
927 pTbAllocator->cNativeTbs -= 1;
928 iemExecMemAllocatorFree(pVCpu, pTb->Native.paInstructions,
929 pTb->Native.cInstructions * sizeof(pTb->Native.paInstructions[0]));
930 pTb->Native.paInstructions = NULL; /* required by iemExecMemAllocatorPrune */
931 break;
932#endif
933 default:
934 AssertFailed();
935 }
936
937 RTMemFree(IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, 0)); /* Frees both the TB lookup table and opcode bytes. */
938
939 pTb->pNext = pTbAllocator->pTbsFreeHead;
940 pTbAllocator->pTbsFreeHead = pTb;
941 pTb->fFlags = 0;
942 pTb->GCPhysPc = UINT64_MAX;
943 pTb->Gen.uPtr = 0;
944 pTb->Gen.uData = 0;
945 pTb->cTbLookupEntries = 0;
946 pTb->cbOpcodes = 0;
947 pTb->pabOpcodes = NULL;
948
949 Assert(pTbAllocator->cInUseTbs > 0);
950
951 pTbAllocator->cInUseTbs -= 1;
952 STAM_REL_COUNTER_INC(&pTbAllocator->StatFrees);
953}
954
955
956/**
957 * Frees the given TB.
958 *
959 * @param pVCpu The cross context virtual CPU structure of the calling
960 * thread.
961 * @param pTb The translation block to free.
962 * @thread EMT(pVCpu)
963 */
964DECLHIDDEN(void) iemTbAllocatorFree(PVMCPUCC pVCpu, PIEMTB pTb)
965{
966 /*
967 * Validate state.
968 */
969 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
970 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
971 uint8_t const idxChunk = pTb->idxAllocChunk;
972 AssertLogRelReturnVoid(idxChunk < pTbAllocator->cAllocatedChunks);
973 uintptr_t const idxInChunk = pTb - pTbAllocator->aChunks[idxChunk].paTbs;
974 AssertLogRelReturnVoid(idxInChunk < pTbAllocator->cTbsPerChunk);
975
976 /*
977 * Invalidate the TB lookup pointer and call the inner worker.
978 */
979 pVCpu->iem.s.ppTbLookupEntryR3 = &pVCpu->iem.s.pTbLookupEntryDummyR3;
980 iemTbAllocatorFreeInner(pVCpu, pTbAllocator, pTb, idxChunk, (uint32_t)idxInChunk);
981}
982
983
984/**
985 * Schedules a TB for freeing when it's not longer being executed and/or part of
986 * the caller's call stack.
987 *
988 * The TB will be removed from the translation block cache, though, so it isn't
989 * possible to executed it again and the IEMTB::pNext member can be used to link
990 * it together with other TBs awaiting freeing.
991 *
992 * @param pVCpu The cross context virtual CPU structure of the calling
993 * thread.
994 * @param pTb The translation block to schedule for freeing.
995 */
996static void iemTbAlloctorScheduleForFree(PVMCPUCC pVCpu, PIEMTB pTb)
997{
998 /*
999 * Validate state.
1000 */
1001 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1002 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1003 Assert(pTb->idxAllocChunk < pTbAllocator->cAllocatedChunks);
1004 Assert((uintptr_t)(pTb - pTbAllocator->aChunks[pTb->idxAllocChunk].paTbs) < pTbAllocator->cTbsPerChunk);
1005 Assert( (pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE
1006 || (pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_THREADED);
1007#ifdef VBOX_STRICT
1008 for (PIEMTB pTbOther = pTbAllocator->pDelayedFreeHead; pTbOther; pTbOther = pTbOther->pNext)
1009 Assert(pTbOther != pTb);
1010#endif
1011
1012 /*
1013 * Remove it from the cache and prepend it to the allocator's todo list.
1014 *
1015 * Note! It could still be in various lookup tables, so we trash the GCPhys
1016 * and CS attribs to ensure it won't be reused.
1017 */
1018 iemTbCacheRemove(pVCpu->iem.s.pTbCacheR3, pTb);
1019 pTb->GCPhysPc = NIL_RTGCPHYS;
1020 pTb->x86.fAttr = UINT16_MAX;
1021
1022 pTb->pNext = pTbAllocator->pDelayedFreeHead;
1023 pTbAllocator->pDelayedFreeHead = pTb;
1024}
1025
1026
1027/**
1028 * Processes the delayed frees.
1029 *
1030 * This is called by the allocator function as well as the native recompile
1031 * function before making any TB or executable memory allocations respectively.
1032 */
1033void iemTbAllocatorProcessDelayedFrees(PVMCPUCC pVCpu, PIEMTBALLOCATOR pTbAllocator)
1034{
1035 /** @todo r-bird: these have already been removed from the cache,
1036 * iemTbAllocatorFree/Inner redoes that, which is a waste of time. */
1037 PIEMTB pTb = pTbAllocator->pDelayedFreeHead;
1038 pTbAllocator->pDelayedFreeHead = NULL;
1039 while (pTb)
1040 {
1041 PIEMTB const pTbNext = pTb->pNext;
1042 Assert(pVCpu->iem.s.pCurTbR3 != pTb);
1043 iemTbAllocatorFree(pVCpu, pTb);
1044 pTb = pTbNext;
1045 }
1046}
1047
1048
1049/**
1050 * Grow the translation block allocator with another chunk.
1051 */
1052static int iemTbAllocatorGrow(PVMCPUCC pVCpu)
1053{
1054 /*
1055 * Validate state.
1056 */
1057 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1058 AssertReturn(pTbAllocator, VERR_WRONG_ORDER);
1059 AssertReturn(pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC, VERR_INVALID_MAGIC);
1060 uint32_t const idxChunk = pTbAllocator->cAllocatedChunks;
1061 AssertReturn(idxChunk < pTbAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1062
1063 /*
1064 * Allocate a new chunk and add it to the allocator.
1065 */
1066 PIEMTB const paTbs = (PIEMTB)RTMemPageAllocZ(pTbAllocator->cbPerChunk);
1067 AssertLogRelReturn(paTbs, VERR_NO_PAGE_MEMORY);
1068 pTbAllocator->aChunks[idxChunk].paTbs = paTbs;
1069
1070 uint32_t const cTbsPerChunk = pTbAllocator->cTbsPerChunk;
1071 for (uint32_t iTb = 0; iTb < cTbsPerChunk; iTb++)
1072 {
1073 paTbs[iTb].idxAllocChunk = idxChunk; /* This is not strictly necessary... */
1074 paTbs[iTb].pNext = pTbAllocator->pTbsFreeHead;
1075 pTbAllocator->pTbsFreeHead = &paTbs[iTb];
1076 }
1077 pTbAllocator->cAllocatedChunks = (uint16_t)(idxChunk + 1);
1078 pTbAllocator->cTotalTbs += cTbsPerChunk;
1079
1080 return VINF_SUCCESS;
1081}
1082
1083
1084/**
1085 * Allocates a TB from allocator with free block.
1086 *
1087 * This is common code to both the fast and slow allocator code paths.
1088 */
1089DECL_FORCE_INLINE(PIEMTB) iemTbAllocatorAllocCore(PIEMTBALLOCATOR const pTbAllocator, bool fThreaded)
1090{
1091 Assert(pTbAllocator->cInUseTbs < pTbAllocator->cTotalTbs);
1092 Assert(pTbAllocator->pTbsFreeHead);
1093
1094 PIEMTB const pTb = pTbAllocator->pTbsFreeHead;
1095 pTbAllocator->pTbsFreeHead = pTb->pNext;
1096 pTbAllocator->cInUseTbs += 1;
1097 if (fThreaded)
1098 pTbAllocator->cThreadedTbs += 1;
1099 else
1100 pTbAllocator->cNativeTbs += 1;
1101 STAM_REL_COUNTER_INC(&pTbAllocator->StatAllocs);
1102 return pTb;
1103}
1104
1105
1106/**
1107 * Slow path for iemTbAllocatorAlloc.
1108 */
1109static PIEMTB iemTbAllocatorAllocSlow(PVMCPUCC pVCpu, PIEMTBALLOCATOR const pTbAllocator, bool fThreaded)
1110{
1111 /*
1112 * With some luck we can add another chunk.
1113 */
1114 if (pTbAllocator->cAllocatedChunks < pTbAllocator->cMaxChunks)
1115 {
1116 int rc = iemTbAllocatorGrow(pVCpu);
1117 if (RT_SUCCESS(rc))
1118 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1119 }
1120
1121 /*
1122 * We have to prune stuff. Sigh.
1123 *
1124 * This requires scanning for older TBs and kick them out. Not sure how to
1125 * best do this as we don't want to maintain any list of TBs ordered by last
1126 * usage time. But one reasonably simple approach would be that each time we
1127 * get here we continue a sequential scan of the allocation chunks,
1128 * considering just a smallish number of TBs and freeing a fixed portion of
1129 * them. Say, we consider the next 128 TBs, freeing the least recently used
1130 * in out of groups of 4 TBs, resulting in 32 free TBs.
1131 */
1132 STAM_PROFILE_START(&pTbAllocator->StatPrune, a);
1133 uint32_t const msNow = pVCpu->iem.s.msRecompilerPollNow;
1134 uint32_t const cTbsToPrune = 128;
1135 uint32_t const cTbsPerGroup = 4;
1136 uint32_t cFreedTbs = 0;
1137#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
1138 uint32_t idxTbPruneFrom = pTbAllocator->iPruneFrom & ~(uint32_t)(cTbsToPrune - 1); /* Stay within a chunk! */
1139#else
1140 uint32_t idxTbPruneFrom = pTbAllocator->iPruneFrom;
1141#endif
1142 if (idxTbPruneFrom >= pTbAllocator->cMaxTbs)
1143 idxTbPruneFrom = 0;
1144 for (uint32_t i = 0; i < cTbsToPrune; i += cTbsPerGroup, idxTbPruneFrom += cTbsPerGroup)
1145 {
1146 uint32_t idxChunk = IEMTBALLOC_IDX_TO_CHUNK(pTbAllocator, idxTbPruneFrom);
1147 uint32_t idxInChunk = IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(pTbAllocator, idxTbPruneFrom, idxChunk);
1148 PIEMTB pTb = &pTbAllocator->aChunks[idxChunk].paTbs[idxInChunk];
1149 uint32_t cMsAge = msNow - pTb->msLastUsed;
1150 Assert(pTb->fFlags & IEMTB_F_TYPE_MASK);
1151
1152 for (uint32_t j = 1, idxChunk2 = idxChunk, idxInChunk2 = idxInChunk + 1; j < cTbsPerGroup; j++, idxInChunk2++)
1153 {
1154#ifndef IEMTB_SIZE_IS_POWER_OF_TWO
1155 if (idxInChunk2 < pTbAllocator->cTbsPerChunk)
1156 { /* likely */ }
1157 else
1158 {
1159 idxInChunk2 = 0;
1160 idxChunk2 += 1;
1161 if (idxChunk2 >= pTbAllocator->cAllocatedChunks)
1162 idxChunk2 = 0;
1163 }
1164#endif
1165 PIEMTB const pTb2 = &pTbAllocator->aChunks[idxChunk2].paTbs[idxInChunk2];
1166 uint32_t const cMsAge2 = msNow - pTb2->msLastUsed;
1167 if ( cMsAge2 > cMsAge
1168 || (cMsAge2 == cMsAge && pTb2->cUsed < pTb->cUsed))
1169 {
1170 Assert(pTb2->fFlags & IEMTB_F_TYPE_MASK);
1171 pTb = pTb2;
1172 idxChunk = idxChunk2;
1173 idxInChunk = idxInChunk2;
1174 cMsAge = cMsAge2;
1175 }
1176 }
1177
1178 /* Free the TB. */
1179 iemTbAllocatorFreeInner(pVCpu, pTbAllocator, pTb, idxChunk, idxInChunk);
1180 cFreedTbs++; /* paranoia */
1181 }
1182 pTbAllocator->iPruneFrom = idxTbPruneFrom;
1183 STAM_PROFILE_STOP(&pTbAllocator->StatPrune, a);
1184
1185 /* Flush the TB lookup entry pointer. */
1186 pVCpu->iem.s.ppTbLookupEntryR3 = &pVCpu->iem.s.pTbLookupEntryDummyR3;
1187
1188 /*
1189 * Allocate a TB from the ones we've pruned.
1190 */
1191 if (cFreedTbs)
1192 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1193 return NULL;
1194}
1195
1196
1197/**
1198 * Allocate a translation block.
1199 *
1200 * @returns Pointer to block on success, NULL if we're out and is unable to
1201 * free up an existing one (very unlikely once implemented).
1202 * @param pVCpu The cross context virtual CPU structure of the calling
1203 * thread.
1204 * @param fThreaded Set if threaded TB being allocated, clear if native TB.
1205 * For statistics.
1206 */
1207DECL_FORCE_INLINE(PIEMTB) iemTbAllocatorAlloc(PVMCPUCC pVCpu, bool fThreaded)
1208{
1209 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1210 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1211
1212 /* Free any pending TBs before we proceed. */
1213 if (!pTbAllocator->pDelayedFreeHead)
1214 { /* probably likely */ }
1215 else
1216 iemTbAllocatorProcessDelayedFrees(pVCpu, pTbAllocator);
1217
1218 /* If the allocator is full, take slow code path.*/
1219 if (RT_LIKELY(pTbAllocator->cInUseTbs < pTbAllocator->cTotalTbs))
1220 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1221 return iemTbAllocatorAllocSlow(pVCpu, pTbAllocator, fThreaded);
1222}
1223
1224
1225/**
1226 * This is called when we're out of space for native TBs.
1227 *
1228 * This uses a variation on the pruning in iemTbAllocatorAllocSlow.
1229 * The difference is that we only prune native TBs and will only free any if
1230 * there are least two in a group. The conditions under which we're called are
1231 * different - there will probably be free TBs in the table when we're called.
1232 * Therefore we increase the group size and max scan length, though we'll stop
1233 * scanning once we've reached the requested size (@a cNeededInstrs) and freed
1234 * up at least 8 TBs.
1235 */
1236void iemTbAllocatorFreeupNativeSpace(PVMCPUCC pVCpu, uint32_t cNeededInstrs)
1237{
1238 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1239 AssertReturnVoid(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1240
1241 STAM_REL_PROFILE_START(&pTbAllocator->StatPruneNative, a);
1242
1243 /*
1244 * Flush the delayed free list before we start freeing TBs indiscriminately.
1245 */
1246 iemTbAllocatorProcessDelayedFrees(pVCpu, pTbAllocator);
1247
1248 /*
1249 * Scan and free TBs.
1250 */
1251 uint32_t const msNow = pVCpu->iem.s.msRecompilerPollNow;
1252 uint32_t const cTbsToPrune = 128 * 8;
1253 uint32_t const cTbsPerGroup = 4 * 4;
1254 uint32_t cFreedTbs = 0;
1255 uint32_t cMaxInstrs = 0;
1256 uint32_t idxTbPruneFrom = pTbAllocator->iPruneNativeFrom & ~(uint32_t)(cTbsPerGroup - 1);
1257 for (uint32_t i = 0; i < cTbsToPrune; i += cTbsPerGroup, idxTbPruneFrom += cTbsPerGroup)
1258 {
1259 if (idxTbPruneFrom >= pTbAllocator->cTotalTbs)
1260 idxTbPruneFrom = 0;
1261 uint32_t idxChunk = IEMTBALLOC_IDX_TO_CHUNK(pTbAllocator, idxTbPruneFrom);
1262 uint32_t idxInChunk = IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(pTbAllocator, idxTbPruneFrom, idxChunk);
1263 PIEMTB pTb = &pTbAllocator->aChunks[idxChunk].paTbs[idxInChunk];
1264 uint32_t cMsAge = pTb->fFlags & IEMTB_F_TYPE_NATIVE ? msNow - pTb->msLastUsed : msNow;
1265 uint8_t cNativeTbs = (pTb->fFlags & IEMTB_F_TYPE_NATIVE) != 0;
1266
1267 for (uint32_t j = 1, idxChunk2 = idxChunk, idxInChunk2 = idxInChunk + 1; j < cTbsPerGroup; j++, idxInChunk2++)
1268 {
1269 if (idxInChunk2 < pTbAllocator->cTbsPerChunk)
1270 { /* likely */ }
1271 else
1272 {
1273 idxInChunk2 = 0;
1274 idxChunk2 += 1;
1275 if (idxChunk2 >= pTbAllocator->cAllocatedChunks)
1276 idxChunk2 = 0;
1277 }
1278 PIEMTB const pTb2 = &pTbAllocator->aChunks[idxChunk2].paTbs[idxInChunk2];
1279 if (pTb2->fFlags & IEMTB_F_TYPE_NATIVE)
1280 {
1281 cNativeTbs += 1;
1282 uint32_t const cMsAge2 = msNow - pTb2->msLastUsed;
1283 if ( cMsAge2 > cMsAge
1284 || ( cMsAge2 == cMsAge
1285 && ( pTb2->cUsed < pTb->cUsed
1286 || ( pTb2->cUsed == pTb->cUsed
1287 && pTb2->Native.cInstructions > pTb->Native.cInstructions)))
1288 || !(pTb->fFlags & IEMTB_F_TYPE_NATIVE))
1289 {
1290 pTb = pTb2;
1291 idxChunk = idxChunk2;
1292 idxInChunk = idxInChunk2;
1293 cMsAge = cMsAge2;
1294 }
1295 }
1296 }
1297
1298 /* Free the TB if we found at least two native one in this group. */
1299 if (cNativeTbs >= 2)
1300 {
1301 cMaxInstrs = RT_MAX(cMaxInstrs, pTb->Native.cInstructions);
1302 iemTbAllocatorFreeInner(pVCpu, pTbAllocator, pTb, idxChunk, idxInChunk);
1303 cFreedTbs++;
1304 if (cFreedTbs >= 8 && cMaxInstrs >= cNeededInstrs)
1305 break;
1306 }
1307 }
1308 pTbAllocator->iPruneNativeFrom = idxTbPruneFrom;
1309
1310 STAM_REL_PROFILE_STOP(&pTbAllocator->StatPruneNative, a);
1311}
1312
1313
1314/*********************************************************************************************************************************
1315* Threaded Recompiler Core *
1316*********************************************************************************************************************************/
1317/**
1318 * Formats TB flags (IEM_F_XXX and IEMTB_F_XXX) to string.
1319 * @returns pszBuf.
1320 * @param fFlags The flags.
1321 * @param pszBuf The output buffer.
1322 * @param cbBuf The output buffer size. At least 32 bytes.
1323 */
1324DECLHIDDEN(const char *) iemTbFlagsToString(uint32_t fFlags, char *pszBuf, size_t cbBuf) RT_NOEXCEPT
1325{
1326 Assert(cbBuf >= 32);
1327 static RTSTRTUPLE const s_aModes[] =
1328 {
1329 /* [00] = */ { RT_STR_TUPLE("16BIT") },
1330 /* [01] = */ { RT_STR_TUPLE("32BIT") },
1331 /* [02] = */ { RT_STR_TUPLE("!2!") },
1332 /* [03] = */ { RT_STR_TUPLE("!3!") },
1333 /* [04] = */ { RT_STR_TUPLE("16BIT_PRE_386") },
1334 /* [05] = */ { RT_STR_TUPLE("32BIT_FLAT") },
1335 /* [06] = */ { RT_STR_TUPLE("!6!") },
1336 /* [07] = */ { RT_STR_TUPLE("!7!") },
1337 /* [08] = */ { RT_STR_TUPLE("16BIT_PROT") },
1338 /* [09] = */ { RT_STR_TUPLE("32BIT_PROT") },
1339 /* [0a] = */ { RT_STR_TUPLE("64BIT") },
1340 /* [0b] = */ { RT_STR_TUPLE("!b!") },
1341 /* [0c] = */ { RT_STR_TUPLE("16BIT_PROT_PRE_386") },
1342 /* [0d] = */ { RT_STR_TUPLE("32BIT_PROT_FLAT") },
1343 /* [0e] = */ { RT_STR_TUPLE("!e!") },
1344 /* [0f] = */ { RT_STR_TUPLE("!f!") },
1345 /* [10] = */ { RT_STR_TUPLE("!10!") },
1346 /* [11] = */ { RT_STR_TUPLE("!11!") },
1347 /* [12] = */ { RT_STR_TUPLE("!12!") },
1348 /* [13] = */ { RT_STR_TUPLE("!13!") },
1349 /* [14] = */ { RT_STR_TUPLE("!14!") },
1350 /* [15] = */ { RT_STR_TUPLE("!15!") },
1351 /* [16] = */ { RT_STR_TUPLE("!16!") },
1352 /* [17] = */ { RT_STR_TUPLE("!17!") },
1353 /* [18] = */ { RT_STR_TUPLE("16BIT_PROT_V86") },
1354 /* [19] = */ { RT_STR_TUPLE("32BIT_PROT_V86") },
1355 /* [1a] = */ { RT_STR_TUPLE("!1a!") },
1356 /* [1b] = */ { RT_STR_TUPLE("!1b!") },
1357 /* [1c] = */ { RT_STR_TUPLE("!1c!") },
1358 /* [1d] = */ { RT_STR_TUPLE("!1d!") },
1359 /* [1e] = */ { RT_STR_TUPLE("!1e!") },
1360 /* [1f] = */ { RT_STR_TUPLE("!1f!") },
1361 };
1362 AssertCompile(RT_ELEMENTS(s_aModes) == IEM_F_MODE_MASK + 1);
1363 memcpy(pszBuf, s_aModes[fFlags & IEM_F_MODE_MASK].psz, s_aModes[fFlags & IEM_F_MODE_MASK].cch);
1364 size_t off = s_aModes[fFlags & IEM_F_MODE_MASK].cch;
1365
1366 pszBuf[off++] = ' ';
1367 pszBuf[off++] = 'C';
1368 pszBuf[off++] = 'P';
1369 pszBuf[off++] = 'L';
1370 pszBuf[off++] = '0' + ((fFlags >> IEM_F_X86_CPL_SHIFT) & IEM_F_X86_CPL_SMASK);
1371 Assert(off < 32);
1372
1373 fFlags &= ~(IEM_F_MODE_MASK | IEM_F_X86_CPL_SMASK);
1374
1375 static struct { const char *pszName; uint32_t cchName; uint32_t fFlag; } const s_aFlags[] =
1376 {
1377 { RT_STR_TUPLE("BYPASS_HANDLERS"), IEM_F_BYPASS_HANDLERS },
1378 { RT_STR_TUPLE("PENDING_BRK_INSTR"), IEM_F_PENDING_BRK_INSTR },
1379 { RT_STR_TUPLE("PENDING_BRK_DATA"), IEM_F_PENDING_BRK_DATA },
1380 { RT_STR_TUPLE("PENDING_BRK_X86_IO"), IEM_F_PENDING_BRK_X86_IO },
1381 { RT_STR_TUPLE("X86_DISREGARD_LOCK"), IEM_F_X86_DISREGARD_LOCK },
1382 { RT_STR_TUPLE("X86_CTX_VMX"), IEM_F_X86_CTX_VMX },
1383 { RT_STR_TUPLE("X86_CTX_SVM"), IEM_F_X86_CTX_SVM },
1384 { RT_STR_TUPLE("X86_CTX_IN_GUEST"), IEM_F_X86_CTX_IN_GUEST },
1385 { RT_STR_TUPLE("X86_CTX_SMM"), IEM_F_X86_CTX_SMM },
1386 { RT_STR_TUPLE("INHIBIT_SHADOW"), IEMTB_F_INHIBIT_SHADOW },
1387 { RT_STR_TUPLE("INHIBIT_NMI"), IEMTB_F_INHIBIT_NMI },
1388 { RT_STR_TUPLE("CS_LIM_CHECKS"), IEMTB_F_CS_LIM_CHECKS },
1389 { RT_STR_TUPLE("TYPE_THREADED"), IEMTB_F_TYPE_THREADED },
1390 { RT_STR_TUPLE("TYPE_NATIVE"), IEMTB_F_TYPE_NATIVE },
1391 };
1392 if (fFlags)
1393 for (unsigned i = 0; i < RT_ELEMENTS(s_aFlags); i++)
1394 if (s_aFlags[i].fFlag & fFlags)
1395 {
1396 AssertReturnStmt(off + 1 + s_aFlags[i].cchName + 1 <= cbBuf, pszBuf[off] = '\0', pszBuf);
1397 pszBuf[off++] = ' ';
1398 memcpy(&pszBuf[off], s_aFlags[i].pszName, s_aFlags[i].cchName);
1399 off += s_aFlags[i].cchName;
1400 fFlags &= ~s_aFlags[i].fFlag;
1401 if (!fFlags)
1402 break;
1403 }
1404 pszBuf[off] = '\0';
1405
1406 return pszBuf;
1407}
1408
1409
1410/** @callback_method_impl{FNDISREADBYTES, Dummy.} */
1411static DECLCALLBACK(int) iemThreadedDisasReadBytesDummy(PDISSTATE pDis, uint8_t offInstr, uint8_t cbMinRead, uint8_t cbMaxRead)
1412{
1413 RT_BZERO(&pDis->Instr.ab[offInstr], cbMaxRead);
1414 pDis->cbCachedInstr += cbMaxRead;
1415 RT_NOREF(cbMinRead);
1416 return VERR_NO_DATA;
1417}
1418
1419
1420/**
1421 * Worker for iemThreadedDisassembleTb.
1422 */
1423static void iemThreadedDumpLookupTable(PCIEMTB pTb, PCDBGFINFOHLP pHlp, unsigned idxFirst, unsigned cEntries,
1424 const char *pszLeadText = " TB Lookup:") RT_NOEXCEPT
1425{
1426 if (idxFirst + cEntries <= pTb->cTbLookupEntries)
1427 {
1428 PIEMTB * const papTbLookup = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, idxFirst);
1429 pHlp->pfnPrintf(pHlp, "%s", pszLeadText);
1430 for (uint8_t iLookup = 0; iLookup < cEntries; iLookup++)
1431 {
1432 PIEMTB pLookupTb = papTbLookup[iLookup];
1433 if (pLookupTb)
1434 pHlp->pfnPrintf(pHlp, "%c%p (%s)", iLookup ? ',' : ' ', pLookupTb,
1435 (pLookupTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_THREADED ? "threaded"
1436 : (pLookupTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE ? "native"
1437 : "invalid");
1438 else
1439 pHlp->pfnPrintf(pHlp, "%cNULL", iLookup ? ',' : ' ');
1440 }
1441 pHlp->pfnPrintf(pHlp, "\n");
1442 }
1443 else
1444 {
1445 pHlp->pfnPrintf(pHlp, " !!Bogus TB lookup info: idxFirst=%#x L %u > cTbLookupEntries=%#x!!\n",
1446 idxFirst, cEntries, pTb->cTbLookupEntries);
1447 AssertMsgFailed(("idxFirst=%#x L %u > cTbLookupEntries=%#x\n", idxFirst, cEntries, pTb->cTbLookupEntries));
1448 }
1449}
1450
1451
1452DECLHIDDEN(void) iemThreadedDisassembleTb(PCIEMTB pTb, PCDBGFINFOHLP pHlp) RT_NOEXCEPT
1453{
1454 AssertReturnVoid((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_THREADED);
1455
1456 char szDisBuf[512];
1457
1458 /*
1459 * Print TB info.
1460 */
1461 pHlp->pfnPrintf(pHlp,
1462 "pTb=%p: GCPhysPc=%RGp cInstructions=%u LB %#x cRanges=%u cTbLookupEntries=%u\n"
1463 "pTb=%p: cUsed=%u msLastUsed=%u fFlags=%#010x %s\n",
1464 pTb, pTb->GCPhysPc, pTb->cInstructions, pTb->cbOpcodes, pTb->cRanges, pTb->cTbLookupEntries,
1465 pTb, pTb->cUsed, pTb->msLastUsed, pTb->fFlags, iemTbFlagsToString(pTb->fFlags, szDisBuf, sizeof(szDisBuf)));
1466
1467 /*
1468 * This disassembly is driven by the debug info which follows the native
1469 * code and indicates when it starts with the next guest instructions,
1470 * where labels are and such things.
1471 */
1472 DISSTATE Dis;
1473 PCIEMTHRDEDCALLENTRY const paCalls = pTb->Thrd.paCalls;
1474 uint32_t const cCalls = pTb->Thrd.cCalls;
1475 DISCPUMODE enmGstCpuMode = (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_16BIT ? DISCPUMODE_16BIT
1476 : (pTb->fFlags & IEM_F_MODE_CPUMODE_MASK) == IEMMODE_32BIT ? DISCPUMODE_32BIT
1477 : DISCPUMODE_64BIT;
1478 uint32_t fExec = pTb->fFlags & UINT32_C(0x00ffffff);
1479 uint8_t idxRange = UINT8_MAX;
1480 uint8_t const cRanges = RT_MIN(pTb->cRanges, RT_ELEMENTS(pTb->aRanges));
1481 uint32_t offRange = 0;
1482 uint32_t offOpcodes = 0;
1483 uint32_t const cbOpcodes = pTb->cbOpcodes;
1484 RTGCPHYS GCPhysPc = pTb->GCPhysPc;
1485 bool fTbLookupSeen0 = false;
1486
1487 for (uint32_t iCall = 0; iCall < cCalls; iCall++)
1488 {
1489 /*
1490 * New opcode range?
1491 */
1492 if ( idxRange == UINT8_MAX
1493 || idxRange >= cRanges
1494 || offRange >= pTb->aRanges[idxRange].cbOpcodes)
1495 {
1496 idxRange += 1;
1497 if (idxRange < cRanges)
1498 offRange = !idxRange ? 0 : offRange - pTb->aRanges[idxRange - 1].cbOpcodes;
1499 else
1500 continue;
1501 GCPhysPc = pTb->aRanges[idxRange].offPhysPage
1502 + (pTb->aRanges[idxRange].idxPhysPage == 0
1503 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
1504 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]);
1505 pHlp->pfnPrintf(pHlp, " Range #%u: GCPhysPc=%RGp LB %#x [idxPg=%d]\n",
1506 idxRange, GCPhysPc, pTb->aRanges[idxRange].cbOpcodes,
1507 pTb->aRanges[idxRange].idxPhysPage);
1508 GCPhysPc += offRange;
1509 }
1510
1511 /*
1512 * Disassemble another guest instruction?
1513 */
1514 if ( paCalls[iCall].offOpcode != offOpcodes
1515 && paCalls[iCall].cbOpcode > 0
1516 && (uint32_t)(cbOpcodes - paCalls[iCall].offOpcode) <= cbOpcodes /* paranoia^2 */ )
1517 {
1518 offOpcodes = paCalls[iCall].offOpcode;
1519 uint8_t const cbInstrMax = RT_MIN(cbOpcodes - offOpcodes, 15);
1520 uint32_t cbInstr = 1;
1521 int rc = DISInstrWithPrefetchedBytes(GCPhysPc, enmGstCpuMode, DISOPTYPE_ALL,
1522 &pTb->pabOpcodes[offOpcodes], cbInstrMax,
1523 iemThreadedDisasReadBytesDummy, NULL, &Dis, &cbInstr);
1524 if (RT_SUCCESS(rc))
1525 {
1526 DISFormatYasmEx(&Dis, szDisBuf, sizeof(szDisBuf),
1527 DIS_FMT_FLAGS_BYTES_WIDTH_MAKE(10) | DIS_FMT_FLAGS_BYTES_LEFT
1528 | DIS_FMT_FLAGS_RELATIVE_BRANCH | DIS_FMT_FLAGS_C_HEX,
1529 NULL /*pfnGetSymbol*/, NULL /*pvUser*/);
1530 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %s\n", GCPhysPc, szDisBuf);
1531 }
1532 else
1533 {
1534 pHlp->pfnPrintf(pHlp, " %%%%%RGp: %.*Rhxs - guest disassembly failure %Rrc\n",
1535 GCPhysPc, cbInstrMax, &pTb->pabOpcodes[offOpcodes], rc);
1536 cbInstr = paCalls[iCall].cbOpcode;
1537 }
1538 GCPhysPc += cbInstr;
1539 offRange += cbInstr;
1540 }
1541
1542 /*
1543 * Dump call details.
1544 */
1545 pHlp->pfnPrintf(pHlp,
1546 " Call #%u to %s (%u args)\n",
1547 iCall, g_apszIemThreadedFunctions[paCalls[iCall].enmFunction],
1548 g_acIemThreadedFunctionUsedArgs[paCalls[iCall].enmFunction]);
1549 if (paCalls[iCall].uTbLookup != 0)
1550 {
1551 uint8_t const idxFirst = IEM_TB_LOOKUP_TAB_GET_IDX(paCalls[iCall].uTbLookup);
1552 fTbLookupSeen0 = idxFirst == 0;
1553 iemThreadedDumpLookupTable(pTb, pHlp, idxFirst, IEM_TB_LOOKUP_TAB_GET_SIZE(paCalls[iCall].uTbLookup));
1554 }
1555
1556 /*
1557 * Snoop fExec.
1558 */
1559 switch (paCalls[iCall].enmFunction)
1560 {
1561 default:
1562 break;
1563 case kIemThreadedFunc_BltIn_CheckMode:
1564 fExec = paCalls[iCall].auParams[0];
1565 break;
1566 }
1567 }
1568
1569 if (!fTbLookupSeen0)
1570 iemThreadedDumpLookupTable(pTb, pHlp, 0, 1, " Fallback TB Lookup:");
1571}
1572
1573
1574
1575/**
1576 * Allocate a translation block for threadeded recompilation.
1577 *
1578 * This is allocated with maxed out call table and storage for opcode bytes,
1579 * because it's only supposed to be called once per EMT to allocate the TB
1580 * pointed to by IEMCPU::pThrdCompileTbR3.
1581 *
1582 * @returns Pointer to the translation block on success, NULL on failure.
1583 * @param pVM The cross context virtual machine structure.
1584 * @param pVCpu The cross context virtual CPU structure of the calling
1585 * thread.
1586 * @param GCPhysPc The physical address corresponding to RIP + CS.BASE.
1587 * @param fExtraFlags Extra flags (IEMTB_F_XXX).
1588 */
1589static PIEMTB iemThreadedTbAlloc(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhysPc, uint32_t fExtraFlags)
1590{
1591 PIEMTB pTb = (PIEMTB)RTMemAllocZ(sizeof(IEMTB));
1592 if (pTb)
1593 {
1594 unsigned const cCalls = 256;
1595 pTb->Thrd.paCalls = (PIEMTHRDEDCALLENTRY)RTMemAlloc(sizeof(IEMTHRDEDCALLENTRY) * cCalls);
1596 if (pTb->Thrd.paCalls)
1597 {
1598 pTb->pabOpcodes = (uint8_t *)RTMemAlloc(cCalls * 16);
1599 if (pTb->pabOpcodes)
1600 {
1601 pVCpu->iem.s.cbOpcodesAllocated = cCalls * 16;
1602 pTb->Thrd.cAllocated = cCalls;
1603 pTb->Thrd.cCalls = 0;
1604 pTb->cbOpcodes = 0;
1605 pTb->pNext = NULL;
1606 pTb->cUsed = 0;
1607 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
1608 pTb->idxAllocChunk = UINT8_MAX;
1609 pTb->GCPhysPc = GCPhysPc;
1610 pTb->x86.fAttr = (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u;
1611 pTb->fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags;
1612 pTb->cInstructions = 0;
1613 pTb->cTbLookupEntries = 1; /* Entry zero is for anything w/o a specific entry. */
1614
1615 /* Init the first opcode range. */
1616 pTb->cRanges = 1;
1617 pTb->aRanges[0].cbOpcodes = 0;
1618 pTb->aRanges[0].offOpcodes = 0;
1619 pTb->aRanges[0].offPhysPage = GCPhysPc & GUEST_PAGE_OFFSET_MASK;
1620 pTb->aRanges[0].u2Unused = 0;
1621 pTb->aRanges[0].idxPhysPage = 0;
1622 pTb->aGCPhysPages[0] = NIL_RTGCPHYS;
1623 pTb->aGCPhysPages[1] = NIL_RTGCPHYS;
1624
1625 return pTb;
1626 }
1627 RTMemFree(pTb->Thrd.paCalls);
1628 }
1629 RTMemFree(pTb);
1630 }
1631 RT_NOREF(pVM);
1632 return NULL;
1633}
1634
1635
1636/**
1637 * Called on the TB that are dedicated for recompilation before it's reused.
1638 *
1639 * @param pVCpu The cross context virtual CPU structure of the calling
1640 * thread.
1641 * @param pTb The translation block to reuse.
1642 * @param GCPhysPc The physical address corresponding to RIP + CS.BASE.
1643 * @param fExtraFlags Extra flags (IEMTB_F_XXX).
1644 */
1645static void iemThreadedTbReuse(PVMCPUCC pVCpu, PIEMTB pTb, RTGCPHYS GCPhysPc, uint32_t fExtraFlags)
1646{
1647 pTb->GCPhysPc = GCPhysPc;
1648 pTb->fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags;
1649 pTb->x86.fAttr = (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u;
1650 pTb->Thrd.cCalls = 0;
1651 pTb->cbOpcodes = 0;
1652 pTb->cInstructions = 0;
1653 pTb->cTbLookupEntries = 1; /* Entry zero is for anything w/o a specific entry. */
1654
1655 /* Init the first opcode range. */
1656 pTb->cRanges = 1;
1657 pTb->aRanges[0].cbOpcodes = 0;
1658 pTb->aRanges[0].offOpcodes = 0;
1659 pTb->aRanges[0].offPhysPage = GCPhysPc & GUEST_PAGE_OFFSET_MASK;
1660 pTb->aRanges[0].u2Unused = 0;
1661 pTb->aRanges[0].idxPhysPage = 0;
1662 pTb->aGCPhysPages[0] = NIL_RTGCPHYS;
1663 pTb->aGCPhysPages[1] = NIL_RTGCPHYS;
1664}
1665
1666
1667/**
1668 * Used to duplicate a threded translation block after recompilation is done.
1669 *
1670 * @returns Pointer to the translation block on success, NULL on failure.
1671 * @param pVM The cross context virtual machine structure.
1672 * @param pVCpu The cross context virtual CPU structure of the calling
1673 * thread.
1674 * @param pTbSrc The TB to duplicate.
1675 */
1676static PIEMTB iemThreadedTbDuplicate(PVMCC pVM, PVMCPUCC pVCpu, PCIEMTB pTbSrc)
1677{
1678 /*
1679 * Just using the heap for now. Will make this more efficient and
1680 * complicated later, don't worry. :-)
1681 */
1682 PIEMTB pTb = iemTbAllocatorAlloc(pVCpu, true /*fThreaded*/);
1683 if (pTb)
1684 {
1685 uint8_t const idxAllocChunk = pTb->idxAllocChunk;
1686 memcpy(pTb, pTbSrc, sizeof(*pTb));
1687 pTb->idxAllocChunk = idxAllocChunk;
1688
1689 unsigned const cCalls = pTbSrc->Thrd.cCalls;
1690 Assert(cCalls > 0);
1691 pTb->Thrd.paCalls = (PIEMTHRDEDCALLENTRY)RTMemDup(pTbSrc->Thrd.paCalls, sizeof(IEMTHRDEDCALLENTRY) * cCalls);
1692 if (pTb->Thrd.paCalls)
1693 {
1694 size_t const cbTbLookup = pTbSrc->cTbLookupEntries * sizeof(PIEMTB);
1695 Assert(cbTbLookup > 0);
1696 size_t const cbOpcodes = pTbSrc->cbOpcodes;
1697 Assert(cbOpcodes > 0);
1698 size_t const cbBoth = cbTbLookup + RT_ALIGN_Z(cbOpcodes, sizeof(PIEMTB));
1699 uint8_t * const pbBoth = (uint8_t *)RTMemAlloc(cbBoth);
1700 if (pbBoth)
1701 {
1702 RT_BZERO(pbBoth, cbTbLookup);
1703 pTb->pabOpcodes = (uint8_t *)memcpy(&pbBoth[cbTbLookup], pTbSrc->pabOpcodes, cbOpcodes);
1704 pTb->Thrd.cAllocated = cCalls;
1705 pTb->pNext = NULL;
1706 pTb->cUsed = 0;
1707 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
1708 pTb->fFlags = pTbSrc->fFlags;
1709
1710 return pTb;
1711 }
1712 RTMemFree(pTb->Thrd.paCalls);
1713 }
1714 iemTbAllocatorFree(pVCpu, pTb);
1715 }
1716 RT_NOREF(pVM);
1717 return NULL;
1718
1719}
1720
1721
1722/**
1723 * Adds the given TB to the hash table.
1724 *
1725 * @param pVCpu The cross context virtual CPU structure of the calling
1726 * thread.
1727 * @param pTbCache The cache to add it to.
1728 * @param pTb The translation block to add.
1729 */
1730static void iemThreadedTbAdd(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb)
1731{
1732 iemTbCacheAdd(pVCpu, pTbCache, pTb);
1733
1734 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbInstr, pTb->cInstructions);
1735 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbLookupEntries, pTb->cTbLookupEntries);
1736 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbThreadedCalls, pTb->Thrd.cCalls);
1737 if (LogIs12Enabled())
1738 {
1739 Log12(("TB added: %p %RGp LB %#x fl=%#x idxHash=%#x cRanges=%u cInstr=%u cCalls=%u\n",
1740 pTb, pTb->GCPhysPc, pTb->cbOpcodes, pTb->fFlags, IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc),
1741 pTb->cRanges, pTb->cInstructions, pTb->Thrd.cCalls));
1742 for (uint8_t idxRange = 0; idxRange < pTb->cRanges; idxRange++)
1743 Log12((" range#%u: offPg=%#05x offOp=%#04x LB %#04x pg#%u=%RGp\n", idxRange, pTb->aRanges[idxRange].offPhysPage,
1744 pTb->aRanges[idxRange].offOpcodes, pTb->aRanges[idxRange].cbOpcodes, pTb->aRanges[idxRange].idxPhysPage,
1745 pTb->aRanges[idxRange].idxPhysPage == 0
1746 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
1747 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]));
1748 }
1749}
1750
1751
1752/**
1753 * Called by opcode verifier functions when they detect a problem.
1754 */
1755void iemThreadedTbObsolete(PVMCPUCC pVCpu, PIEMTB pTb, bool fSafeToFree)
1756{
1757 /* We cannot free the current TB (indicated by fSafeToFree) because:
1758 - A threaded TB will have its current call entry accessed
1759 to update pVCpu->iem.s.cInstructions.
1760 - A native TB will have code left to execute. */
1761 if (fSafeToFree)
1762 iemTbAllocatorFree(pVCpu, pTb);
1763 else
1764 iemTbAlloctorScheduleForFree(pVCpu, pTb);
1765}
1766
1767
1768/*
1769 * Real code.
1770 */
1771
1772#ifdef LOG_ENABLED
1773/**
1774 * Logs the current instruction.
1775 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
1776 * @param pszFunction The IEM function doing the execution.
1777 * @param idxInstr The instruction number in the block.
1778 */
1779static void iemThreadedLogCurInstr(PVMCPUCC pVCpu, const char *pszFunction, uint32_t idxInstr) RT_NOEXCEPT
1780{
1781# ifdef IN_RING3
1782 if (LogIs2Enabled())
1783 {
1784 char szInstr[256];
1785 uint32_t cbInstr = 0;
1786 DBGFR3DisasInstrEx(pVCpu->pVMR3->pUVM, pVCpu->idCpu, 0, 0,
1787 DBGF_DISAS_FLAGS_CURRENT_GUEST | DBGF_DISAS_FLAGS_DEFAULT_MODE,
1788 szInstr, sizeof(szInstr), &cbInstr);
1789
1790 PCX86FXSTATE pFpuCtx = &pVCpu->cpum.GstCtx.XState.x87;
1791 Log2(("**** %s fExec=%x pTb=%p cUsed=%u #%u\n"
1792 " eax=%08x ebx=%08x ecx=%08x edx=%08x esi=%08x edi=%08x\n"
1793 " eip=%08x esp=%08x ebp=%08x iopl=%d tr=%04x\n"
1794 " cs=%04x ss=%04x ds=%04x es=%04x fs=%04x gs=%04x efl=%08x\n"
1795 " fsw=%04x fcw=%04x ftw=%02x mxcsr=%04x/%04x\n"
1796 " %s\n"
1797 , pszFunction, pVCpu->iem.s.fExec, pVCpu->iem.s.pCurTbR3, pVCpu->iem.s.pCurTbR3 ? pVCpu->iem.s.pCurTbR3->cUsed : 0, idxInstr,
1798 pVCpu->cpum.GstCtx.eax, pVCpu->cpum.GstCtx.ebx, pVCpu->cpum.GstCtx.ecx, pVCpu->cpum.GstCtx.edx, pVCpu->cpum.GstCtx.esi, pVCpu->cpum.GstCtx.edi,
1799 pVCpu->cpum.GstCtx.eip, pVCpu->cpum.GstCtx.esp, pVCpu->cpum.GstCtx.ebp, pVCpu->cpum.GstCtx.eflags.Bits.u2IOPL, pVCpu->cpum.GstCtx.tr.Sel,
1800 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.ss.Sel, pVCpu->cpum.GstCtx.ds.Sel, pVCpu->cpum.GstCtx.es.Sel,
1801 pVCpu->cpum.GstCtx.fs.Sel, pVCpu->cpum.GstCtx.gs.Sel, pVCpu->cpum.GstCtx.eflags.u,
1802 pFpuCtx->FSW, pFpuCtx->FCW, pFpuCtx->FTW, pFpuCtx->MXCSR, pFpuCtx->MXCSR_MASK,
1803 szInstr));
1804
1805 /*if (LogIs3Enabled()) - this outputs an insane amount of stuff, so disabled.
1806 DBGFR3InfoEx(pVCpu->pVMR3->pUVM, pVCpu->idCpu, "cpumguest", "verbose", NULL); */
1807 }
1808 else
1809# endif
1810 LogFlow(("%s: cs:rip=%04x:%08RX64 ss:rsp=%04x:%08RX64 EFL=%06x\n", pszFunction, pVCpu->cpum.GstCtx.cs.Sel,
1811 pVCpu->cpum.GstCtx.rip, pVCpu->cpum.GstCtx.ss.Sel, pVCpu->cpum.GstCtx.rsp, pVCpu->cpum.GstCtx.eflags.u));
1812}
1813#endif /* LOG_ENABLED */
1814
1815
1816#if 0
1817static VBOXSTRICTRC iemThreadedCompileLongJumped(PVMCC pVM, PVMCPUCC pVCpu, VBOXSTRICTRC rcStrict)
1818{
1819 RT_NOREF(pVM, pVCpu);
1820 return rcStrict;
1821}
1822#endif
1823
1824
1825/**
1826 * Initializes the decoder state when compiling TBs.
1827 *
1828 * This presumes that fExec has already be initialized.
1829 *
1830 * This is very similar to iemInitDecoder() and iemReInitDecoder(), so may need
1831 * to apply fixes to them as well.
1832 *
1833 * @param pVCpu The cross context virtual CPU structure of the calling
1834 * thread.
1835 * @param fReInit Clear for the first call for a TB, set for subsequent
1836 * calls from inside the compile loop where we can skip a
1837 * couple of things.
1838 * @param fExtraFlags The extra translation block flags when @a fReInit is
1839 * true, otherwise ignored. Only IEMTB_F_INHIBIT_SHADOW is
1840 * checked.
1841 */
1842DECL_FORCE_INLINE(void) iemThreadedCompileInitDecoder(PVMCPUCC pVCpu, bool const fReInit, uint32_t const fExtraFlags)
1843{
1844 /* ASSUMES: That iemInitExec was already called and that anyone changing
1845 CPU state affecting the fExec bits since then will have updated fExec! */
1846 AssertMsg((pVCpu->iem.s.fExec & ~IEM_F_USER_OPTS) == iemCalcExecFlags(pVCpu),
1847 ("fExec=%#x iemCalcExecModeFlags=%#x\n", pVCpu->iem.s.fExec, iemCalcExecFlags(pVCpu)));
1848
1849 IEMMODE const enmMode = IEM_GET_CPU_MODE(pVCpu);
1850
1851 /* Decoder state: */
1852 pVCpu->iem.s.enmDefAddrMode = enmMode; /** @todo check if this is correct... */
1853 pVCpu->iem.s.enmEffAddrMode = enmMode;
1854 if (enmMode != IEMMODE_64BIT)
1855 {
1856 pVCpu->iem.s.enmDefOpSize = enmMode; /** @todo check if this is correct... */
1857 pVCpu->iem.s.enmEffOpSize = enmMode;
1858 }
1859 else
1860 {
1861 pVCpu->iem.s.enmDefOpSize = IEMMODE_32BIT;
1862 pVCpu->iem.s.enmEffOpSize = IEMMODE_32BIT;
1863 }
1864 pVCpu->iem.s.fPrefixes = 0;
1865 pVCpu->iem.s.uRexReg = 0;
1866 pVCpu->iem.s.uRexB = 0;
1867 pVCpu->iem.s.uRexIndex = 0;
1868 pVCpu->iem.s.idxPrefix = 0;
1869 pVCpu->iem.s.uVex3rdReg = 0;
1870 pVCpu->iem.s.uVexLength = 0;
1871 pVCpu->iem.s.fEvexStuff = 0;
1872 pVCpu->iem.s.iEffSeg = X86_SREG_DS;
1873 pVCpu->iem.s.offModRm = 0;
1874 pVCpu->iem.s.iNextMapping = 0;
1875
1876 if (!fReInit)
1877 {
1878 pVCpu->iem.s.cActiveMappings = 0;
1879 pVCpu->iem.s.rcPassUp = VINF_SUCCESS;
1880 pVCpu->iem.s.fEndTb = false;
1881 pVCpu->iem.s.fTbCheckOpcodes = true; /* (check opcodes for before executing the first instruction) */
1882 pVCpu->iem.s.fTbBranched = IEMBRANCHED_F_NO;
1883 pVCpu->iem.s.fTbCrossedPage = false;
1884 pVCpu->iem.s.cInstrTillIrqCheck = !(fExtraFlags & IEMTB_F_INHIBIT_SHADOW) ? 32 : 0;
1885 pVCpu->iem.s.fTbCurInstrIsSti = false;
1886 /* Force RF clearing and TF checking on first instruction in the block
1887 as we don't really know what came before and should assume the worst: */
1888 pVCpu->iem.s.fTbPrevInstr = IEM_CIMPL_F_RFLAGS | IEM_CIMPL_F_END_TB;
1889 }
1890 else
1891 {
1892 Assert(pVCpu->iem.s.cActiveMappings == 0);
1893 Assert(pVCpu->iem.s.rcPassUp == VINF_SUCCESS);
1894 Assert(pVCpu->iem.s.fEndTb == false);
1895 Assert(pVCpu->iem.s.fTbCrossedPage == false);
1896 pVCpu->iem.s.fTbPrevInstr = pVCpu->iem.s.fTbCurInstr;
1897 }
1898 pVCpu->iem.s.fTbCurInstr = 0;
1899
1900#ifdef DBGFTRACE_ENABLED
1901 switch (IEM_GET_CPU_MODE(pVCpu))
1902 {
1903 case IEMMODE_64BIT:
1904 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I64/%u %08llx", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.rip);
1905 break;
1906 case IEMMODE_32BIT:
1907 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I32/%u %04x:%08x", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.eip);
1908 break;
1909 case IEMMODE_16BIT:
1910 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I16/%u %04x:%04x", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.eip);
1911 break;
1912 }
1913#endif
1914}
1915
1916
1917/**
1918 * Initializes the opcode fetcher when starting the compilation.
1919 *
1920 * @param pVCpu The cross context virtual CPU structure of the calling
1921 * thread.
1922 */
1923DECL_FORCE_INLINE(void) iemThreadedCompileInitOpcodeFetching(PVMCPUCC pVCpu)
1924{
1925 /* Almost everything is done by iemGetPcWithPhysAndCode() already. We just need to initialize the index into abOpcode. */
1926#ifdef IEM_WITH_CODE_TLB_AND_OPCODE_BUF
1927 pVCpu->iem.s.offOpcode = 0;
1928#else
1929 RT_NOREF(pVCpu);
1930#endif
1931}
1932
1933
1934/**
1935 * Re-initializes the opcode fetcher between instructions while compiling.
1936 *
1937 * @param pVCpu The cross context virtual CPU structure of the calling
1938 * thread.
1939 */
1940DECL_FORCE_INLINE(void) iemThreadedCompileReInitOpcodeFetching(PVMCPUCC pVCpu)
1941{
1942 if (pVCpu->iem.s.pbInstrBuf)
1943 {
1944 uint64_t off = pVCpu->cpum.GstCtx.rip;
1945 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu));
1946 off += pVCpu->cpum.GstCtx.cs.u64Base;
1947 off -= pVCpu->iem.s.uInstrBufPc;
1948 if (off < pVCpu->iem.s.cbInstrBufTotal)
1949 {
1950 pVCpu->iem.s.offInstrNextByte = (uint32_t)off;
1951 pVCpu->iem.s.offCurInstrStart = (uint16_t)off;
1952 if ((uint16_t)off + 15 <= pVCpu->iem.s.cbInstrBufTotal)
1953 pVCpu->iem.s.cbInstrBuf = (uint16_t)off + 15;
1954 else
1955 pVCpu->iem.s.cbInstrBuf = pVCpu->iem.s.cbInstrBufTotal;
1956 }
1957 else
1958 {
1959 pVCpu->iem.s.pbInstrBuf = NULL;
1960 pVCpu->iem.s.offInstrNextByte = 0;
1961 pVCpu->iem.s.offCurInstrStart = 0;
1962 pVCpu->iem.s.cbInstrBuf = 0;
1963 pVCpu->iem.s.cbInstrBufTotal = 0;
1964 pVCpu->iem.s.GCPhysInstrBuf = NIL_RTGCPHYS;
1965 }
1966 }
1967 else
1968 {
1969 pVCpu->iem.s.offInstrNextByte = 0;
1970 pVCpu->iem.s.offCurInstrStart = 0;
1971 pVCpu->iem.s.cbInstrBuf = 0;
1972 pVCpu->iem.s.cbInstrBufTotal = 0;
1973#ifdef VBOX_STRICT
1974 pVCpu->iem.s.GCPhysInstrBuf = NIL_RTGCPHYS;
1975#endif
1976 }
1977#ifdef IEM_WITH_CODE_TLB_AND_OPCODE_BUF
1978 pVCpu->iem.s.offOpcode = 0;
1979#endif
1980}
1981
1982#ifdef LOG_ENABLED
1983
1984/**
1985 * Inserts a NOP call.
1986 *
1987 * This is for debugging.
1988 *
1989 * @returns true on success, false if we're out of call entries.
1990 * @param pTb The translation block being compiled.
1991 */
1992bool iemThreadedCompileEmitNop(PIEMTB pTb)
1993{
1994 /* Emit the call. */
1995 uint32_t const idxCall = pTb->Thrd.cCalls;
1996 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
1997 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
1998 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
1999 pCall->enmFunction = kIemThreadedFunc_BltIn_Nop;
2000 pCall->idxInstr = pTb->cInstructions - 1;
2001 pCall->cbOpcode = 0;
2002 pCall->offOpcode = 0;
2003 pCall->uTbLookup = 0;
2004 pCall->uUnused0 = 0;
2005 pCall->auParams[0] = 0;
2006 pCall->auParams[1] = 0;
2007 pCall->auParams[2] = 0;
2008 return true;
2009}
2010
2011
2012/**
2013 * Called by iemThreadedCompile if cpu state logging is desired.
2014 *
2015 * @returns true on success, false if we're out of call entries.
2016 * @param pTb The translation block being compiled.
2017 */
2018bool iemThreadedCompileEmitLogCpuState(PIEMTB pTb)
2019{
2020 /* Emit the call. */
2021 uint32_t const idxCall = pTb->Thrd.cCalls;
2022 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2023 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2024 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2025 pCall->enmFunction = kIemThreadedFunc_BltIn_LogCpuState;
2026 pCall->idxInstr = pTb->cInstructions - 1;
2027 pCall->cbOpcode = 0;
2028 pCall->offOpcode = 0;
2029 pCall->uTbLookup = 0;
2030 pCall->uUnused0 = 0;
2031 pCall->auParams[0] = RT_MAKE_U16(pCall->idxInstr, idxCall); /* currently not used, but whatever */
2032 pCall->auParams[1] = 0;
2033 pCall->auParams[2] = 0;
2034 return true;
2035}
2036
2037#endif /* LOG_ENABLED */
2038
2039DECLINLINE(void) iemThreadedCopyOpcodeBytesInline(PCVMCPUCC pVCpu, uint8_t *pbDst, uint8_t cbInstr)
2040{
2041 switch (cbInstr)
2042 {
2043 default: AssertMsgFailed(("%#x\n", cbInstr)); RT_FALL_THROUGH();
2044 case 15: pbDst[14] = pVCpu->iem.s.abOpcode[14]; RT_FALL_THROUGH();
2045 case 14: pbDst[13] = pVCpu->iem.s.abOpcode[13]; RT_FALL_THROUGH();
2046 case 13: pbDst[12] = pVCpu->iem.s.abOpcode[12]; RT_FALL_THROUGH();
2047 case 12: pbDst[11] = pVCpu->iem.s.abOpcode[11]; RT_FALL_THROUGH();
2048 case 11: pbDst[10] = pVCpu->iem.s.abOpcode[10]; RT_FALL_THROUGH();
2049 case 10: pbDst[9] = pVCpu->iem.s.abOpcode[9]; RT_FALL_THROUGH();
2050 case 9: pbDst[8] = pVCpu->iem.s.abOpcode[8]; RT_FALL_THROUGH();
2051 case 8: pbDst[7] = pVCpu->iem.s.abOpcode[7]; RT_FALL_THROUGH();
2052 case 7: pbDst[6] = pVCpu->iem.s.abOpcode[6]; RT_FALL_THROUGH();
2053 case 6: pbDst[5] = pVCpu->iem.s.abOpcode[5]; RT_FALL_THROUGH();
2054 case 5: pbDst[4] = pVCpu->iem.s.abOpcode[4]; RT_FALL_THROUGH();
2055 case 4: pbDst[3] = pVCpu->iem.s.abOpcode[3]; RT_FALL_THROUGH();
2056 case 3: pbDst[2] = pVCpu->iem.s.abOpcode[2]; RT_FALL_THROUGH();
2057 case 2: pbDst[1] = pVCpu->iem.s.abOpcode[1]; RT_FALL_THROUGH();
2058 case 1: pbDst[0] = pVCpu->iem.s.abOpcode[0]; break;
2059 }
2060}
2061
2062
2063/**
2064 * Called by IEM_MC2_BEGIN_EMIT_CALLS() under one of these conditions:
2065 *
2066 * - CS LIM check required.
2067 * - Must recheck opcode bytes.
2068 * - Previous instruction branched.
2069 * - TLB load detected, probably due to page crossing.
2070 *
2071 * @returns true if everything went well, false if we're out of space in the TB
2072 * (e.g. opcode ranges) or needs to start doing CS.LIM checks.
2073 * @param pVCpu The cross context virtual CPU structure of the calling
2074 * thread.
2075 * @param pTb The translation block being compiled.
2076 */
2077bool iemThreadedCompileBeginEmitCallsComplications(PVMCPUCC pVCpu, PIEMTB pTb)
2078{
2079 Log6(("%04x:%08RX64: iemThreadedCompileBeginEmitCallsComplications\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2080 Assert((pVCpu->iem.s.GCPhysInstrBuf & GUEST_PAGE_OFFSET_MASK) == 0);
2081#if 0
2082 if (pVCpu->cpum.GstCtx.rip >= 0xc0000000 && !LogIsEnabled())
2083 RTLogChangeFlags(NULL, 0, RTLOGFLAGS_DISABLED);
2084#endif
2085
2086 /*
2087 * If we're not in 64-bit mode and not already checking CS.LIM we need to
2088 * see if it's needed to start checking.
2089 */
2090 bool fConsiderCsLimChecking;
2091 uint32_t const fMode = pVCpu->iem.s.fExec & IEM_F_MODE_MASK;
2092 if ( fMode == IEM_F_MODE_X86_64BIT
2093 || (pTb->fFlags & IEMTB_F_CS_LIM_CHECKS)
2094 || fMode == IEM_F_MODE_X86_32BIT_PROT_FLAT
2095 || fMode == IEM_F_MODE_X86_32BIT_FLAT)
2096 fConsiderCsLimChecking = false; /* already enabled or not needed */
2097 else
2098 {
2099 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
2100 if (offFromLim >= GUEST_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
2101 fConsiderCsLimChecking = true; /* likely */
2102 else
2103 {
2104 Log8(("%04x:%08RX64: Needs CS.LIM checks (%#RX64)\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, offFromLim));
2105 return false;
2106 }
2107 }
2108
2109 /*
2110 * Prepare call now, even before we know if can accept the instruction in this TB.
2111 * This allows us amending parameters w/o making every case suffer.
2112 */
2113 uint8_t const cbInstr = IEM_GET_INSTR_LEN(pVCpu);
2114 uint16_t const offOpcode = pTb->cbOpcodes;
2115 uint8_t idxRange = pTb->cRanges - 1;
2116
2117 PIEMTHRDEDCALLENTRY const pCall = &pTb->Thrd.paCalls[pTb->Thrd.cCalls];
2118 pCall->idxInstr = pTb->cInstructions;
2119 pCall->cbOpcode = cbInstr;
2120 pCall->offOpcode = offOpcode;
2121 pCall->uTbLookup = 0;
2122 pCall->uUnused0 = 0;
2123 pCall->auParams[0] = (uint32_t)cbInstr
2124 | (uint32_t)(pVCpu->iem.s.fExec << 8) /* liveness: Enough of fExec for IEM_F_MODE_X86_IS_FLAT. */
2125 /* The upper dword is sometimes used for cbStartPage. */;
2126 pCall->auParams[1] = idxRange;
2127 pCall->auParams[2] = offOpcode - pTb->aRanges[idxRange].offOpcodes;
2128
2129/** @todo check if we require IEMTB_F_CS_LIM_CHECKS for any new page we've
2130 * gotten onto. If we do, stop */
2131
2132 /*
2133 * Case 1: We've branched (RIP changed).
2134 *
2135 * Loop check: If the new PC (GCPhysPC) is within a opcode range of this
2136 * TB, end the TB here as it is most likely a loop and if it
2137 * made sense to unroll it, the guest code compiler should've
2138 * done it already.
2139 *
2140 * Sub-case 1a: Same page, no TLB load (fTbCrossedPage is false).
2141 * Req: 1 extra range, no extra phys.
2142 *
2143 * Sub-case 1b: Different page but no page boundrary crossing, so TLB load
2144 * necessary (fTbCrossedPage is true).
2145 * Req: 1 extra range, probably 1 extra phys page entry.
2146 *
2147 * Sub-case 1c: Different page, so TLB load necessary (fTbCrossedPage is true),
2148 * but in addition we cross into the following page and require
2149 * another TLB load.
2150 * Req: 2 extra ranges, probably 2 extra phys page entries.
2151 *
2152 * Sub-case 1d: Same page, so no initial TLB load necessary, but we cross into
2153 * the following page (thus fTbCrossedPage is true).
2154 * Req: 2 extra ranges, probably 1 extra phys page entry.
2155 *
2156 * Note! The setting fTbCrossedPage is done by the iemOpcodeFetchBytesJmp, but
2157 * it may trigger "spuriously" from the CPU point of view because of
2158 * physical page changes that'll invalid the physical TLB and trigger a
2159 * call to the function. In theory this be a big deal, just a bit
2160 * performance loss as we'll pick the LoadingTlb variants.
2161 *
2162 * Note! We do not currently optimize branching to the next instruction (sorry
2163 * 32-bit PIC code). We could maybe do that in the branching code that
2164 * sets (or not) fTbBranched.
2165 */
2166 /** @todo Optimize 'jmp .next_instr' and 'call .next_instr'. Seen the jmp
2167 * variant in win 3.1 code and the call variant in 32-bit linux PIC
2168 * code. This'll require filtering out far jmps and calls, as they
2169 * load CS which should technically be considered indirect since the
2170 * GDT/LDT entry's base address can be modified independently from
2171 * the code. */
2172 if (pVCpu->iem.s.fTbBranched != IEMBRANCHED_F_NO)
2173 {
2174 if ( !pVCpu->iem.s.fTbCrossedPage /* 1a */
2175 || pVCpu->iem.s.offCurInstrStart >= 0 /* 1b */ )
2176 {
2177 /* 1a + 1b - instruction fully within the branched to page. */
2178 Assert(pVCpu->iem.s.offCurInstrStart >= 0);
2179 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr <= GUEST_PAGE_SIZE);
2180
2181 if (!(pVCpu->iem.s.fTbBranched & IEMBRANCHED_F_ZERO))
2182 {
2183 /* Check that we've got a free range. */
2184 idxRange += 1;
2185 if (idxRange < RT_ELEMENTS(pTb->aRanges))
2186 { /* likely */ }
2187 else
2188 {
2189 Log8(("%04x:%08RX64: out of ranges after branch\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2190 return false;
2191 }
2192 pCall->auParams[1] = idxRange;
2193 pCall->auParams[2] = 0;
2194
2195 /* Check that we've got a free page slot. */
2196 AssertCompile(RT_ELEMENTS(pTb->aGCPhysPages) == 2);
2197 RTGCPHYS const GCPhysNew = pVCpu->iem.s.GCPhysInstrBuf & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK;
2198 uint8_t idxPhysPage;
2199 if ((pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == GCPhysNew)
2200 pTb->aRanges[idxRange].idxPhysPage = idxPhysPage = 0;
2201 else if (pTb->aGCPhysPages[0] == NIL_RTGCPHYS)
2202 {
2203 pTb->aGCPhysPages[0] = GCPhysNew;
2204 pTb->aRanges[idxRange].idxPhysPage = 1;
2205 idxPhysPage = UINT8_MAX;
2206 }
2207 else if (pTb->aGCPhysPages[0] == GCPhysNew)
2208 pTb->aRanges[idxRange].idxPhysPage = idxPhysPage = 1;
2209 else if (pTb->aGCPhysPages[1] == NIL_RTGCPHYS)
2210 {
2211 pTb->aGCPhysPages[1] = GCPhysNew;
2212 pTb->aRanges[idxRange].idxPhysPage = 2;
2213 idxPhysPage = UINT8_MAX;
2214 }
2215 else if (pTb->aGCPhysPages[1] == GCPhysNew)
2216 pTb->aRanges[idxRange].idxPhysPage = idxPhysPage = 2;
2217 else
2218 {
2219 Log8(("%04x:%08RX64: out of aGCPhysPages entires after branch\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2220 return false;
2221 }
2222
2223 /* Loop check: We weave the loop check in here to optimize the lookup. */
2224 if (idxPhysPage != UINT8_MAX)
2225 {
2226 uint32_t const offPhysPc = pVCpu->iem.s.offCurInstrStart;
2227 for (uint8_t idxLoopRange = 0; idxLoopRange < idxRange; idxLoopRange++)
2228 if ( pTb->aRanges[idxLoopRange].idxPhysPage == idxPhysPage
2229 && offPhysPc - (uint32_t)pTb->aRanges[idxLoopRange].offPhysPage
2230 < (uint32_t)pTb->aRanges[idxLoopRange].cbOpcodes)
2231 {
2232 Log8(("%04x:%08RX64: loop detected after branch\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2233 STAM_COUNTER_INC(&pVCpu->iem.s.StatTbLoopInTbDetected);
2234 return false;
2235 }
2236 }
2237
2238 /* Finish setting up the new range. */
2239 pTb->aRanges[idxRange].offPhysPage = pVCpu->iem.s.offCurInstrStart;
2240 pTb->aRanges[idxRange].offOpcodes = offOpcode;
2241 pTb->aRanges[idxRange].cbOpcodes = cbInstr;
2242 pTb->aRanges[idxRange].u2Unused = 0;
2243 pTb->cRanges++;
2244 Log6(("%04x:%08RX64: new range #%u same page: offPhysPage=%#x offOpcodes=%#x\n",
2245 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, idxRange, pTb->aRanges[idxRange].offPhysPage,
2246 pTb->aRanges[idxRange].offOpcodes));
2247 }
2248 else
2249 {
2250 Log8(("%04x:%08RX64: zero byte jump\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2251 pTb->aRanges[idxRange].cbOpcodes += cbInstr;
2252 }
2253
2254 /* Determin which function we need to load & check.
2255 Note! For jumps to a new page, we'll set both fTbBranched and
2256 fTbCrossedPage to avoid unnecessary TLB work for intra
2257 page branching */
2258 if ( (pVCpu->iem.s.fTbBranched & (IEMBRANCHED_F_INDIRECT | IEMBRANCHED_F_FAR)) /* Far is basically indirect. */
2259 || pVCpu->iem.s.fTbCrossedPage)
2260 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2261 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesLoadingTlb
2262 : !fConsiderCsLimChecking
2263 ? kIemThreadedFunc_BltIn_CheckOpcodesLoadingTlb
2264 : kIemThreadedFunc_BltIn_CheckOpcodesLoadingTlbConsiderCsLim;
2265 else if (pVCpu->iem.s.fTbBranched & (IEMBRANCHED_F_CONDITIONAL | /* paranoia: */ IEMBRANCHED_F_DIRECT))
2266 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2267 ? kIemThreadedFunc_BltIn_CheckCsLimAndPcAndOpcodes
2268 : !fConsiderCsLimChecking
2269 ? kIemThreadedFunc_BltIn_CheckPcAndOpcodes
2270 : kIemThreadedFunc_BltIn_CheckPcAndOpcodesConsiderCsLim;
2271 else
2272 {
2273 Assert(pVCpu->iem.s.fTbBranched & IEMBRANCHED_F_RELATIVE);
2274 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2275 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodes
2276 : !fConsiderCsLimChecking
2277 ? kIemThreadedFunc_BltIn_CheckOpcodes
2278 : kIemThreadedFunc_BltIn_CheckOpcodesConsiderCsLim;
2279 }
2280 }
2281 else
2282 {
2283 /* 1c + 1d - instruction crosses pages. */
2284 Assert(pVCpu->iem.s.offCurInstrStart < 0);
2285 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr > 0);
2286
2287 /* Lazy bird: Check that this isn't case 1c, since we've already
2288 load the first physical address. End the TB and
2289 make it a case 2b instead.
2290
2291 Hmm. Too much bother to detect, so just do the same
2292 with case 1d as well. */
2293#if 0 /** @todo get back to this later when we've got the actual branch code in
2294 * place. */
2295 uint8_t const cbStartPage = (uint8_t)-pVCpu->iem.s.offCurInstrStart;
2296
2297 /* Check that we've got two free ranges. */
2298 if (idxRange + 2 < RT_ELEMENTS(pTb->aRanges))
2299 { /* likely */ }
2300 else
2301 return false;
2302 idxRange += 1;
2303 pCall->auParams[1] = idxRange;
2304 pCall->auParams[2] = 0;
2305
2306 /* ... */
2307
2308#else
2309 Log8(("%04x:%08RX64: complicated post-branch condition, ending TB.\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2310 return false;
2311#endif
2312 }
2313 }
2314
2315 /*
2316 * Case 2: Page crossing.
2317 *
2318 * Sub-case 2a: The instruction starts on the first byte in the next page.
2319 *
2320 * Sub-case 2b: The instruction has opcode bytes in both the current and
2321 * following page.
2322 *
2323 * Both cases requires a new range table entry and probably a new physical
2324 * page entry. The difference is in which functions to emit and whether to
2325 * add bytes to the current range.
2326 */
2327 else if (pVCpu->iem.s.fTbCrossedPage)
2328 {
2329 /* Check that we've got a free range. */
2330 idxRange += 1;
2331 if (idxRange < RT_ELEMENTS(pTb->aRanges))
2332 { /* likely */ }
2333 else
2334 {
2335 Log8(("%04x:%08RX64: out of ranges while crossing page\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2336 return false;
2337 }
2338
2339 /* Check that we've got a free page slot. */
2340 AssertCompile(RT_ELEMENTS(pTb->aGCPhysPages) == 2);
2341 RTGCPHYS const GCPhysNew = pVCpu->iem.s.GCPhysInstrBuf & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK;
2342 if ((pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == GCPhysNew)
2343 pTb->aRanges[idxRange].idxPhysPage = 0;
2344 else if ( pTb->aGCPhysPages[0] == NIL_RTGCPHYS
2345 || pTb->aGCPhysPages[0] == GCPhysNew)
2346 {
2347 pTb->aGCPhysPages[0] = GCPhysNew;
2348 pTb->aRanges[idxRange].idxPhysPage = 1;
2349 }
2350 else if ( pTb->aGCPhysPages[1] == NIL_RTGCPHYS
2351 || pTb->aGCPhysPages[1] == GCPhysNew)
2352 {
2353 pTb->aGCPhysPages[1] = GCPhysNew;
2354 pTb->aRanges[idxRange].idxPhysPage = 2;
2355 }
2356 else
2357 {
2358 Log8(("%04x:%08RX64: out of aGCPhysPages entires while crossing page\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2359 return false;
2360 }
2361
2362 if (((pTb->aRanges[idxRange - 1].offPhysPage + pTb->aRanges[idxRange - 1].cbOpcodes) & GUEST_PAGE_OFFSET_MASK) == 0)
2363 {
2364 Assert(pVCpu->iem.s.offCurInstrStart == 0);
2365 pCall->auParams[1] = idxRange;
2366 pCall->auParams[2] = 0;
2367
2368 /* Finish setting up the new range. */
2369 pTb->aRanges[idxRange].offPhysPage = pVCpu->iem.s.offCurInstrStart;
2370 pTb->aRanges[idxRange].offOpcodes = offOpcode;
2371 pTb->aRanges[idxRange].cbOpcodes = cbInstr;
2372 pTb->aRanges[idxRange].u2Unused = 0;
2373 pTb->cRanges++;
2374 Log6(("%04x:%08RX64: new range #%u new page (a) %u/%RGp: offPhysPage=%#x offOpcodes=%#x\n",
2375 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, idxRange, pTb->aRanges[idxRange].idxPhysPage, GCPhysNew,
2376 pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].offOpcodes));
2377
2378 /* Determin which function we need to load & check. */
2379 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2380 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNewPageLoadingTlb
2381 : !fConsiderCsLimChecking
2382 ? kIemThreadedFunc_BltIn_CheckOpcodesOnNewPageLoadingTlb
2383 : kIemThreadedFunc_BltIn_CheckOpcodesOnNewPageLoadingTlbConsiderCsLim;
2384 }
2385 else
2386 {
2387 Assert(pVCpu->iem.s.offCurInstrStart < 0);
2388 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr > 0);
2389 uint8_t const cbStartPage = (uint8_t)-pVCpu->iem.s.offCurInstrStart;
2390 pCall->auParams[0] |= (uint64_t)cbStartPage << 32;
2391
2392 /* We've good. Split the instruction over the old and new range table entries. */
2393 pTb->aRanges[idxRange - 1].cbOpcodes += cbStartPage;
2394
2395 pTb->aRanges[idxRange].offPhysPage = 0;
2396 pTb->aRanges[idxRange].offOpcodes = offOpcode + cbStartPage;
2397 pTb->aRanges[idxRange].cbOpcodes = cbInstr - cbStartPage;
2398 pTb->aRanges[idxRange].u2Unused = 0;
2399 pTb->cRanges++;
2400 Log6(("%04x:%08RX64: new range #%u new page (b) %u/%RGp: offPhysPage=%#x offOpcodes=%#x\n",
2401 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, idxRange, pTb->aRanges[idxRange].idxPhysPage, GCPhysNew,
2402 pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].offOpcodes));
2403
2404 /* Determin which function we need to load & check. */
2405 if (pVCpu->iem.s.fTbCheckOpcodes)
2406 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2407 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesAcrossPageLoadingTlb
2408 : !fConsiderCsLimChecking
2409 ? kIemThreadedFunc_BltIn_CheckOpcodesAcrossPageLoadingTlb
2410 : kIemThreadedFunc_BltIn_CheckOpcodesAcrossPageLoadingTlbConsiderCsLim;
2411 else
2412 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2413 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNextPageLoadingTlb
2414 : !fConsiderCsLimChecking
2415 ? kIemThreadedFunc_BltIn_CheckOpcodesOnNextPageLoadingTlb
2416 : kIemThreadedFunc_BltIn_CheckOpcodesOnNextPageLoadingTlbConsiderCsLim;
2417 }
2418 }
2419
2420 /*
2421 * Regular case: No new range required.
2422 */
2423 else
2424 {
2425 Assert(pVCpu->iem.s.fTbCheckOpcodes || (pTb->fFlags & IEMTB_F_CS_LIM_CHECKS));
2426 if (pVCpu->iem.s.fTbCheckOpcodes)
2427 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2428 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodes
2429 : kIemThreadedFunc_BltIn_CheckOpcodes;
2430 else
2431 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckCsLim;
2432
2433 iemThreadedCopyOpcodeBytesInline(pVCpu, &pTb->pabOpcodes[offOpcode], cbInstr);
2434 pTb->cbOpcodes = offOpcode + cbInstr;
2435 pTb->aRanges[idxRange].cbOpcodes += cbInstr;
2436 Assert(pTb->cbOpcodes <= pVCpu->iem.s.cbOpcodesAllocated);
2437 }
2438
2439 /*
2440 * Commit the call.
2441 */
2442 pTb->Thrd.cCalls++;
2443
2444 /*
2445 * Clear state.
2446 */
2447 pVCpu->iem.s.fTbBranched = IEMBRANCHED_F_NO;
2448 pVCpu->iem.s.fTbCrossedPage = false;
2449 pVCpu->iem.s.fTbCheckOpcodes = false;
2450
2451 /*
2452 * Copy opcode bytes.
2453 */
2454 iemThreadedCopyOpcodeBytesInline(pVCpu, &pTb->pabOpcodes[offOpcode], cbInstr);
2455 pTb->cbOpcodes = offOpcode + cbInstr;
2456 Assert(pTb->cbOpcodes <= pVCpu->iem.s.cbOpcodesAllocated);
2457
2458 return true;
2459}
2460
2461
2462/**
2463 * Worker for iemThreadedCompileBeginEmitCallsComplications and
2464 * iemThreadedCompileCheckIrq that checks for pending delivarable events.
2465 *
2466 * @returns true if anything is pending, false if not.
2467 * @param pVCpu The cross context virtual CPU structure of the calling
2468 * thread.
2469 */
2470DECL_FORCE_INLINE(bool) iemThreadedCompileIsIrqOrForceFlagPending(PVMCPUCC pVCpu)
2471{
2472 uint64_t fCpu = pVCpu->fLocalForcedActions;
2473 fCpu &= VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC | VMCPU_FF_INTERRUPT_NMI | VMCPU_FF_INTERRUPT_SMI;
2474#if 1
2475 /** @todo this isn't even close to the NMI/IRQ conditions in EM. */
2476 if (RT_LIKELY( !fCpu
2477 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
2478 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
2479 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx))) ))
2480 return false;
2481 return true;
2482#else
2483 return false;
2484#endif
2485
2486}
2487
2488
2489/**
2490 * Called by iemThreadedCompile when a block requires a mode check.
2491 *
2492 * @returns true if we should continue, false if we're out of call entries.
2493 * @param pVCpu The cross context virtual CPU structure of the calling
2494 * thread.
2495 * @param pTb The translation block being compiled.
2496 */
2497static bool iemThreadedCompileEmitCheckMode(PVMCPUCC pVCpu, PIEMTB pTb)
2498{
2499 /* Emit the call. */
2500 uint32_t const idxCall = pTb->Thrd.cCalls;
2501 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2502 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2503 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2504 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckMode;
2505 pCall->idxInstr = pTb->cInstructions - 1;
2506 pCall->cbOpcode = 0;
2507 pCall->offOpcode = 0;
2508 pCall->uTbLookup = 0;
2509 pCall->uUnused0 = 0;
2510 pCall->auParams[0] = pVCpu->iem.s.fExec;
2511 pCall->auParams[1] = 0;
2512 pCall->auParams[2] = 0;
2513 LogFunc(("%04x:%08RX64 fExec=%#x\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, pVCpu->iem.s.fExec));
2514 return true;
2515}
2516
2517
2518/**
2519 * Called by IEM_MC2_BEGIN_EMIT_CALLS() when IEM_CIMPL_F_CHECK_IRQ_BEFORE is
2520 * set.
2521 *
2522 * @returns true if we should continue, false if an IRQ is deliverable or a
2523 * relevant force flag is pending.
2524 * @param pVCpu The cross context virtual CPU structure of the calling
2525 * thread.
2526 * @param pTb The translation block being compiled.
2527 * @sa iemThreadedCompileCheckIrq
2528 */
2529bool iemThreadedCompileEmitIrqCheckBefore(PVMCPUCC pVCpu, PIEMTB pTb)
2530{
2531 /*
2532 * Skip this we've already emitted a call after the previous instruction
2533 * or if it's the first call, as we're always checking FFs between blocks.
2534 */
2535 uint32_t const idxCall = pTb->Thrd.cCalls;
2536 if ( idxCall > 0
2537 && pTb->Thrd.paCalls[idxCall - 1].enmFunction != kIemThreadedFunc_BltIn_CheckIrq)
2538 {
2539 /* Emit the call. */
2540 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2541 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2542 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2543 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckIrq;
2544 pCall->idxInstr = pTb->cInstructions;
2545 pCall->offOpcode = 0;
2546 pCall->cbOpcode = 0;
2547 pCall->uTbLookup = 0;
2548 pCall->uUnused0 = 0;
2549 pCall->auParams[0] = 0;
2550 pCall->auParams[1] = 0;
2551 pCall->auParams[2] = 0;
2552 LogFunc(("%04x:%08RX64\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2553
2554 /* Reset the IRQ check value. */
2555 pVCpu->iem.s.cInstrTillIrqCheck = !CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx) ? 32 : 0;
2556
2557 /*
2558 * Check for deliverable IRQs and pending force flags.
2559 */
2560 return !iemThreadedCompileIsIrqOrForceFlagPending(pVCpu);
2561 }
2562 return true; /* continue */
2563}
2564
2565
2566/**
2567 * Emits an IRQ check call and checks for pending IRQs.
2568 *
2569 * @returns true if we should continue, false if an IRQ is deliverable or a
2570 * relevant force flag is pending.
2571 * @param pVCpu The cross context virtual CPU structure of the calling
2572 * thread.
2573 * @param pTb The transation block.
2574 * @sa iemThreadedCompileBeginEmitCallsComplications
2575 */
2576static bool iemThreadedCompileCheckIrqAfter(PVMCPUCC pVCpu, PIEMTB pTb)
2577{
2578 /* Check again in a little bit, unless it is immediately following an STI
2579 in which case we *must* check immediately after the next instruction
2580 as well in case it's executed with interrupt inhibition. We could
2581 otherwise miss the interrupt window. See the irq2 wait2 varaiant in
2582 bs3-timers-1 which is doing sti + sti + cli. */
2583 if (!pVCpu->iem.s.fTbCurInstrIsSti)
2584 pVCpu->iem.s.cInstrTillIrqCheck = 32;
2585 else
2586 {
2587 pVCpu->iem.s.fTbCurInstrIsSti = false;
2588 pVCpu->iem.s.cInstrTillIrqCheck = 0;
2589 }
2590 LogFunc(("%04x:%08RX64\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2591
2592 /*
2593 * Emit the call.
2594 */
2595 AssertReturn(pTb->Thrd.cCalls < pTb->Thrd.cAllocated, false);
2596 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[pTb->Thrd.cCalls++];
2597 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckIrq;
2598 pCall->idxInstr = pTb->cInstructions;
2599 pCall->offOpcode = 0;
2600 pCall->cbOpcode = 0;
2601 pCall->uTbLookup = 0;
2602 pCall->uUnused0 = 0;
2603 pCall->auParams[0] = 0;
2604 pCall->auParams[1] = 0;
2605 pCall->auParams[2] = 0;
2606
2607 /*
2608 * Check for deliverable IRQs and pending force flags.
2609 */
2610 return !iemThreadedCompileIsIrqOrForceFlagPending(pVCpu);
2611}
2612
2613
2614/**
2615 * Compiles a new TB and executes it.
2616 *
2617 * We combine compilation and execution here as it makes it simpler code flow
2618 * in the main loop and it allows interpreting while compiling if we want to
2619 * explore that option.
2620 *
2621 * @returns Strict VBox status code.
2622 * @param pVM The cross context virtual machine structure.
2623 * @param pVCpu The cross context virtual CPU structure of the calling
2624 * thread.
2625 * @param GCPhysPc The physical address corresponding to the current
2626 * RIP+CS.BASE.
2627 * @param fExtraFlags Extra translation block flags: IEMTB_F_INHIBIT_SHADOW,
2628 * IEMTB_F_INHIBIT_NMI, IEMTB_F_CS_LIM_CHECKS.
2629 */
2630static VBOXSTRICTRC iemThreadedCompile(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhysPc, uint32_t fExtraFlags) IEM_NOEXCEPT_MAY_LONGJMP
2631{
2632 Assert(!(fExtraFlags & IEMTB_F_TYPE_MASK));
2633 fExtraFlags |= IEMTB_F_TYPE_THREADED;
2634
2635 /*
2636 * Get the TB we use for the recompiling. This is a maxed-out TB so
2637 * that'll we'll make a more efficient copy of when we're done compiling.
2638 */
2639 PIEMTB pTb = pVCpu->iem.s.pThrdCompileTbR3;
2640 if (pTb)
2641 iemThreadedTbReuse(pVCpu, pTb, GCPhysPc, fExtraFlags);
2642 else
2643 {
2644 pTb = iemThreadedTbAlloc(pVM, pVCpu, GCPhysPc, fExtraFlags);
2645 AssertReturn(pTb, VERR_IEM_TB_ALLOC_FAILED);
2646 pVCpu->iem.s.pThrdCompileTbR3 = pTb;
2647 }
2648
2649 /* Set the current TB so iemThreadedCompileLongJumped and the CIMPL
2650 functions may get at it. */
2651 pVCpu->iem.s.pCurTbR3 = pTb;
2652
2653#if 0
2654 /* Make sure the CheckIrq condition matches the one in EM. */
2655 iemThreadedCompileCheckIrqAfter(pVCpu, pTb);
2656 const uint32_t cZeroCalls = 1;
2657#else
2658 const uint32_t cZeroCalls = 0;
2659#endif
2660
2661 /*
2662 * Now for the recomplication. (This mimicks IEMExecLots in many ways.)
2663 */
2664 iemThreadedCompileInitDecoder(pVCpu, false /*fReInit*/, fExtraFlags);
2665 iemThreadedCompileInitOpcodeFetching(pVCpu);
2666 VBOXSTRICTRC rcStrict;
2667 for (;;)
2668 {
2669 /* Process the next instruction. */
2670#ifdef LOG_ENABLED
2671 iemThreadedLogCurInstr(pVCpu, "CC", pTb->cInstructions);
2672 uint16_t const uCsLog = pVCpu->cpum.GstCtx.cs.Sel;
2673 uint64_t const uRipLog = pVCpu->cpum.GstCtx.rip;
2674 Assert(uCsLog != 0 || uRipLog > 0x400 || !IEM_IS_REAL_OR_V86_MODE(pVCpu)); /* Detect executing RM interrupt table. */
2675#endif
2676 uint8_t b; IEM_OPCODE_GET_FIRST_U8(&b);
2677 uint16_t const cCallsPrev = pTb->Thrd.cCalls;
2678
2679 rcStrict = FNIEMOP_CALL(g_apfnIemThreadedRecompilerOneByteMap[b]);
2680#if 0
2681 for (unsigned i = cCallsPrev; i < pTb->Thrd.cCalls; i++)
2682 Log8(("-> %#u/%u - %d %s\n", i, pTb->Thrd.paCalls[i].idxInstr, pTb->Thrd.paCalls[i].enmFunction,
2683 g_apszIemThreadedFunctions[pTb->Thrd.paCalls[i].enmFunction]));
2684#endif
2685 if ( rcStrict == VINF_SUCCESS
2686 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS
2687 && !pVCpu->iem.s.fEndTb)
2688 {
2689 Assert(pTb->Thrd.cCalls > cCallsPrev);
2690 Assert(cCallsPrev - pTb->Thrd.cCalls < 5);
2691
2692 pVCpu->iem.s.cInstructions++;
2693
2694 /* Check for mode change _after_ certain CIMPL calls, so check that
2695 we continue executing with the same mode value. */
2696 if (!(pVCpu->iem.s.fTbCurInstr & (IEM_CIMPL_F_MODE | IEM_CIMPL_F_XCPT | IEM_CIMPL_F_VMEXIT)))
2697 { /* probable */ }
2698 else if (RT_LIKELY(iemThreadedCompileEmitCheckMode(pVCpu, pTb)))
2699 { /* extremely likely */ }
2700 else
2701 break;
2702
2703#if defined(LOG_ENABLED) && 0 /* for debugging */
2704 //iemThreadedCompileEmitNop(pTb);
2705 iemThreadedCompileEmitLogCpuState(pTb);
2706#endif
2707 }
2708 else
2709 {
2710 Log8(("%04x:%08RX64: End TB - %u instr, %u calls, rc=%d\n",
2711 uCsLog, uRipLog, pTb->cInstructions, pTb->Thrd.cCalls, VBOXSTRICTRC_VAL(rcStrict)));
2712 if (rcStrict == VINF_IEM_RECOMPILE_END_TB)
2713 rcStrict = VINF_SUCCESS;
2714
2715 if (pTb->Thrd.cCalls > cZeroCalls)
2716 {
2717 if (cCallsPrev != pTb->Thrd.cCalls)
2718 pVCpu->iem.s.cInstructions++;
2719 break;
2720 }
2721
2722 pVCpu->iem.s.pCurTbR3 = NULL;
2723 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2724 }
2725
2726 /* Check for IRQs? */
2727 if (pVCpu->iem.s.cInstrTillIrqCheck > 0)
2728 pVCpu->iem.s.cInstrTillIrqCheck--;
2729 else if (!iemThreadedCompileCheckIrqAfter(pVCpu, pTb))
2730 break;
2731
2732 /* Still space in the TB? */
2733 if ( pTb->Thrd.cCalls + 5 < pTb->Thrd.cAllocated
2734 && pTb->cbOpcodes + 16 <= pVCpu->iem.s.cbOpcodesAllocated
2735 && pTb->cTbLookupEntries < 127)
2736 iemThreadedCompileInitDecoder(pVCpu, true /*fReInit*/, 0);
2737 else
2738 {
2739 Log8(("%04x:%08RX64: End TB - %u instr, %u calls, %u opcode bytes, %u TB lookup entries - full\n",
2740 uCsLog, uRipLog, pTb->cInstructions, pTb->Thrd.cCalls, pTb->cbOpcodes, pTb->cTbLookupEntries));
2741 break;
2742 }
2743 iemThreadedCompileReInitOpcodeFetching(pVCpu);
2744 }
2745
2746 /*
2747 * Reserve lookup space for the final call entry if necessary.
2748 */
2749 PIEMTHRDEDCALLENTRY pFinalCall = &pTb->Thrd.paCalls[pTb->Thrd.cCalls - 1];
2750 if (pTb->Thrd.cCalls > 1)
2751 {
2752 if (pFinalCall->uTbLookup == 0)
2753 {
2754 pFinalCall->uTbLookup = IEM_TB_LOOKUP_TAB_MAKE(pTb->cTbLookupEntries, 0);
2755 pTb->cTbLookupEntries += 1;
2756 }
2757 }
2758 else if (pFinalCall->uTbLookup != 0)
2759 {
2760 Assert(pTb->cTbLookupEntries > 1);
2761 pFinalCall->uTbLookup -= 1;
2762 pTb->cTbLookupEntries -= 1;
2763 }
2764
2765 /*
2766 * Duplicate the TB into a completed one and link it.
2767 */
2768 pTb = iemThreadedTbDuplicate(pVM, pVCpu, pTb);
2769 AssertReturn(pTb, VERR_IEM_TB_ALLOC_FAILED);
2770
2771 iemThreadedTbAdd(pVCpu, pVCpu->iem.s.pTbCacheR3, pTb);
2772
2773#ifdef IEM_COMPILE_ONLY_MODE
2774 /*
2775 * Execute the translation block.
2776 */
2777#endif
2778
2779 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2780}
2781
2782
2783
2784/*********************************************************************************************************************************
2785* Recompiled Execution Core *
2786*********************************************************************************************************************************/
2787
2788/** Helper for iemTbExec. */
2789DECL_FORCE_INLINE(PIEMTB *) iemTbGetTbLookupEntryWithRip(PCIEMTB pTb, uint8_t uTbLookup, uint64_t uRip)
2790{
2791 uint8_t const idx = IEM_TB_LOOKUP_TAB_GET_IDX_WITH_RIP(uTbLookup, uRip);
2792 Assert(idx < pTb->cTbLookupEntries);
2793 return IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, idx);
2794}
2795
2796
2797/**
2798 * Executes a translation block.
2799 *
2800 * @returns Strict VBox status code.
2801 * @param pVCpu The cross context virtual CPU structure of the calling
2802 * thread.
2803 * @param pTb The translation block to execute.
2804 */
2805static VBOXSTRICTRC iemTbExec(PVMCPUCC pVCpu, PIEMTB pTb) IEM_NOEXCEPT_MAY_LONGJMP
2806{
2807 Assert(!(pVCpu->iem.s.GCPhysInstrBuf & (RTGCPHYS)GUEST_PAGE_OFFSET_MASK));
2808
2809 /*
2810 * Set the current TB so CIMPL functions may get at it.
2811 */
2812 pVCpu->iem.s.pCurTbR3 = pTb;
2813 pVCpu->iem.s.ppTbLookupEntryR3 = IEMTB_GET_TB_LOOKUP_TAB_ENTRY(pTb, 0);
2814
2815 /*
2816 * Execute the block.
2817 */
2818#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
2819 if (pTb->fFlags & IEMTB_F_TYPE_NATIVE)
2820 {
2821 pVCpu->iem.s.cTbExecNative++;
2822# ifdef LOG_ENABLED
2823 iemThreadedLogCurInstr(pVCpu, "EXn", 0);
2824# endif
2825
2826# ifndef IEMNATIVE_WITH_RECOMPILER_PROLOGUE_SINGLETON
2827# ifdef RT_ARCH_AMD64
2828 VBOXSTRICTRC const rcStrict = ((PFNIEMTBNATIVE)pTb->Native.paInstructions)(pVCpu);
2829# else
2830 VBOXSTRICTRC const rcStrict = ((PFNIEMTBNATIVE)pTb->Native.paInstructions)(pVCpu, &pVCpu->cpum.GstCtx);
2831# endif
2832# else
2833# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
2834 AssertCompileMemberOffset(VMCPUCC, iem.s.pvTbFramePointerR3, 0x7c8); /* This is assumed in iemNativeTbEntry */
2835# endif
2836# ifdef RT_ARCH_AMD64
2837 VBOXSTRICTRC const rcStrict = iemNativeTbEntry(pVCpu, (uintptr_t)pTb->Native.paInstructions);
2838# else
2839 VBOXSTRICTRC const rcStrict = iemNativeTbEntry(pVCpu, &pVCpu->cpum.GstCtx, (uintptr_t)pTb->Native.paInstructions);
2840# endif
2841# endif
2842
2843# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
2844 pVCpu->iem.s.pvTbFramePointerR3 = NULL;
2845# endif
2846# ifdef IEMNATIVE_STRICT_EFLAGS_SKIPPING
2847 Assert(pVCpu->iem.s.fSkippingEFlags == 0);
2848# endif
2849 if (RT_LIKELY( rcStrict == VINF_SUCCESS
2850 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS /** @todo this isn't great. */))
2851 { /* likely */ }
2852 else
2853 {
2854 /* pVCpu->iem.s.cInstructions is incremented by iemNativeHlpExecStatusCodeFiddling. */
2855 pVCpu->iem.s.pCurTbR3 = NULL;
2856
2857 /* VINF_IEM_REEXEC_BREAK should be treated as VINF_SUCCESS as it's
2858 only to break out of TB execution early. */
2859 if (rcStrict == VINF_IEM_REEXEC_BREAK)
2860 {
2861 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitReturnBreak);
2862 return iemExecStatusCodeFiddling(pVCpu, VINF_SUCCESS);
2863 }
2864
2865 /* VINF_IEM_REEXEC_BREAK_FF should be treated as VINF_SUCCESS as it's
2866 only to break out of TB execution early due to pending FFs. */
2867 if (rcStrict == VINF_IEM_REEXEC_BREAK_FF)
2868 {
2869 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitReturnBreakFF);
2870 return iemExecStatusCodeFiddling(pVCpu, VINF_SUCCESS);
2871 }
2872
2873 /* VINF_IEM_REEXEC_WITH_FLAGS needs to receive special treatment
2874 and converted to VINF_SUCCESS or whatever is appropriate. */
2875 if (rcStrict == VINF_IEM_REEXEC_FINISH_WITH_FLAGS)
2876 {
2877 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitReturnWithFlags);
2878 return iemExecStatusCodeFiddling(pVCpu, iemFinishInstructionWithFlagsSet(pVCpu, VINF_SUCCESS));
2879 }
2880
2881 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitReturnOtherStatus);
2882 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2883 }
2884 }
2885 else
2886#endif /* VBOX_WITH_IEM_NATIVE_RECOMPILER */
2887 {
2888 /*
2889 * The threaded execution loop.
2890 */
2891 pVCpu->iem.s.cTbExecThreaded++;
2892#ifdef LOG_ENABLED
2893 uint64_t uRipPrev = UINT64_MAX;
2894#endif
2895 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
2896 uint32_t cCallsLeft = pTb->Thrd.cCalls;
2897 while (cCallsLeft-- > 0)
2898 {
2899#ifdef LOG_ENABLED
2900 if (pVCpu->cpum.GstCtx.rip != uRipPrev)
2901 {
2902 uRipPrev = pVCpu->cpum.GstCtx.rip;
2903 iemThreadedLogCurInstr(pVCpu, "EXt", pTb->Thrd.cCalls - cCallsLeft - 1);
2904 }
2905 Log9(("%04x:%08RX64: #%d/%d - %d %s\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
2906 pTb->Thrd.cCalls - cCallsLeft - 1, pCallEntry->idxInstr, pCallEntry->enmFunction,
2907 g_apszIemThreadedFunctions[pCallEntry->enmFunction]));
2908#endif
2909#ifdef VBOX_WITH_STATISTICS
2910 AssertCompile(RT_ELEMENTS(pVCpu->iem.s.acThreadedFuncStats) >= kIemThreadedFunc_End);
2911 pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction] += 1;
2912#endif
2913 VBOXSTRICTRC const rcStrict = g_apfnIemThreadedFunctions[pCallEntry->enmFunction](pVCpu,
2914 pCallEntry->auParams[0],
2915 pCallEntry->auParams[1],
2916 pCallEntry->auParams[2]);
2917 if (RT_LIKELY( rcStrict == VINF_SUCCESS
2918 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS /** @todo this isn't great. */))
2919 pCallEntry++;
2920 else
2921 {
2922 pVCpu->iem.s.cInstructions += pCallEntry->idxInstr; /* This may be one short, but better than zero. */
2923 pVCpu->iem.s.pCurTbR3 = NULL;
2924 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatTbThreadedExecBreaks);
2925 pVCpu->iem.s.ppTbLookupEntryR3 = iemTbGetTbLookupEntryWithRip(pTb, pCallEntry->uTbLookup, pVCpu->cpum.GstCtx.rip);
2926
2927 /* VINF_IEM_REEXEC_BREAK should be treated as VINF_SUCCESS as it's
2928 only to break out of TB execution early. */
2929 if (rcStrict == VINF_IEM_REEXEC_BREAK)
2930 {
2931#ifdef VBOX_WITH_STATISTICS
2932 if (pCallEntry->uTbLookup)
2933 STAM_COUNTER_INC(&pVCpu->iem.s.StatTbThreadedExecBreaksWithLookup);
2934 else
2935 STAM_COUNTER_INC(&pVCpu->iem.s.StatTbThreadedExecBreaksWithoutLookup);
2936#endif
2937 return iemExecStatusCodeFiddling(pVCpu, VINF_SUCCESS);
2938 }
2939 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2940 }
2941 }
2942
2943 /* Update the lookup entry. */
2944 pVCpu->iem.s.ppTbLookupEntryR3 = iemTbGetTbLookupEntryWithRip(pTb, pCallEntry[-1].uTbLookup, pVCpu->cpum.GstCtx.rip);
2945 }
2946
2947 pVCpu->iem.s.cInstructions += pTb->cInstructions;
2948 pVCpu->iem.s.pCurTbR3 = NULL;
2949 return VINF_SUCCESS;
2950}
2951
2952
2953/**
2954 * This is called when the PC doesn't match the current pbInstrBuf.
2955 *
2956 * Upon return, we're ready for opcode fetching. But please note that
2957 * pbInstrBuf can be NULL iff the memory doesn't have readable backing (i.e.
2958 * MMIO or unassigned).
2959 */
2960static RTGCPHYS iemGetPcWithPhysAndCodeMissed(PVMCPUCC pVCpu)
2961{
2962 pVCpu->iem.s.pbInstrBuf = NULL;
2963 pVCpu->iem.s.offCurInstrStart = 0;
2964 pVCpu->iem.s.offInstrNextByte = 0;
2965 iemOpcodeFetchBytesJmp(pVCpu, 0, NULL);
2966 return pVCpu->iem.s.GCPhysInstrBuf + pVCpu->iem.s.offCurInstrStart;
2967}
2968
2969
2970/** @todo need private inline decl for throw/nothrow matching IEM_WITH_SETJMP? */
2971DECL_FORCE_INLINE_THROW(RTGCPHYS) iemGetPcWithPhysAndCode(PVMCPUCC pVCpu)
2972{
2973 /*
2974 * Set uCurTbStartPc to RIP and calc the effective PC.
2975 */
2976 uint64_t uPc = pVCpu->cpum.GstCtx.rip;
2977 pVCpu->iem.s.uCurTbStartPc = uPc;
2978 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu));
2979 uPc += pVCpu->cpum.GstCtx.cs.u64Base;
2980
2981 /*
2982 * Advance within the current buffer (PAGE) when possible.
2983 */
2984 if (pVCpu->iem.s.pbInstrBuf)
2985 {
2986 uint64_t off = uPc - pVCpu->iem.s.uInstrBufPc;
2987 if (off < pVCpu->iem.s.cbInstrBufTotal)
2988 {
2989 pVCpu->iem.s.offInstrNextByte = (uint32_t)off;
2990 pVCpu->iem.s.offCurInstrStart = (uint16_t)off;
2991 if ((uint16_t)off + 15 <= pVCpu->iem.s.cbInstrBufTotal)
2992 pVCpu->iem.s.cbInstrBuf = (uint16_t)off + 15;
2993 else
2994 pVCpu->iem.s.cbInstrBuf = pVCpu->iem.s.cbInstrBufTotal;
2995
2996 return pVCpu->iem.s.GCPhysInstrBuf + off;
2997 }
2998 }
2999 return iemGetPcWithPhysAndCodeMissed(pVCpu);
3000}
3001
3002
3003/**
3004 * Determines the extra IEMTB_F_XXX flags.
3005 *
3006 * @returns A mix of IEMTB_F_INHIBIT_SHADOW, IEMTB_F_INHIBIT_NMI and
3007 * IEMTB_F_CS_LIM_CHECKS (or zero).
3008 * @param pVCpu The cross context virtual CPU structure of the calling
3009 * thread.
3010 */
3011DECL_FORCE_INLINE(uint32_t) iemGetTbFlagsForCurrentPc(PVMCPUCC pVCpu)
3012{
3013 uint32_t fRet = 0;
3014
3015 /*
3016 * Determine the inhibit bits.
3017 */
3018 if (!(pVCpu->cpum.GstCtx.rflags.uBoth & (CPUMCTX_INHIBIT_SHADOW | CPUMCTX_INHIBIT_NMI)))
3019 { /* typical */ }
3020 else
3021 {
3022 if (CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx))
3023 fRet |= IEMTB_F_INHIBIT_SHADOW;
3024 if (CPUMAreInterruptsInhibitedByNmiEx(&pVCpu->cpum.GstCtx))
3025 fRet |= IEMTB_F_INHIBIT_NMI;
3026 }
3027
3028 /*
3029 * Return IEMTB_F_CS_LIM_CHECKS if the current PC is invalid or if it is
3030 * likely to go invalid before the end of the translation block.
3031 */
3032 if (IEM_F_MODE_X86_IS_FLAT(pVCpu->iem.s.fExec))
3033 return fRet;
3034
3035 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
3036 if (offFromLim >= X86_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
3037 return fRet;
3038 return fRet | IEMTB_F_CS_LIM_CHECKS;
3039}
3040
3041
3042VMM_INT_DECL(VBOXSTRICTRC) IEMExecRecompiler(PVMCC pVM, PVMCPUCC pVCpu)
3043{
3044 /*
3045 * See if there is an interrupt pending in TRPM, inject it if we can.
3046 */
3047 if (!TRPMHasTrap(pVCpu))
3048 { /* likely */ }
3049 else
3050 {
3051 VBOXSTRICTRC rcStrict = iemExecInjectPendingTrap(pVCpu);
3052 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
3053 { /*likely */ }
3054 else
3055 return rcStrict;
3056 }
3057
3058 /*
3059 * Init the execution environment.
3060 */
3061#if 1 /** @todo this seems like a good idea, however if we ever share memory
3062 * directly with other threads on the host, it isn't necessarily... */
3063 if (pVM->cCpus == 1)
3064 iemInitExec(pVCpu, IEM_F_X86_DISREGARD_LOCK /*fExecOpts*/);
3065 else
3066#endif
3067 iemInitExec(pVCpu, 0 /*fExecOpts*/);
3068 if (RT_LIKELY(pVCpu->iem.s.msRecompilerPollNow != 0))
3069 { }
3070 else
3071 pVCpu->iem.s.msRecompilerPollNow = (uint32_t)(TMVirtualGetNoCheck(pVM) / RT_NS_1MS);
3072 pVCpu->iem.s.ppTbLookupEntryR3 = &pVCpu->iem.s.pTbLookupEntryDummyR3;
3073
3074 /*
3075 * Run-loop.
3076 *
3077 * If we're using setjmp/longjmp we combine all the catching here to avoid
3078 * having to call setjmp for each block we're executing.
3079 */
3080 PIEMTBCACHE const pTbCache = pVCpu->iem.s.pTbCacheR3;
3081 for (;;)
3082 {
3083 VBOXSTRICTRC rcStrict;
3084 IEM_TRY_SETJMP(pVCpu, rcStrict)
3085 {
3086 uint32_t const cPollRate = 511; /* EM.cpp passes 4095 to IEMExecLots, so an eigth of that seems reasonable for now. */
3087 for (uint32_t iIterations = 0; ; iIterations++)
3088 {
3089 /* Translate PC to physical address, we'll need this for both lookup and compilation. */
3090 RTGCPHYS const GCPhysPc = iemGetPcWithPhysAndCode(pVCpu);
3091 if (RT_LIKELY(pVCpu->iem.s.pbInstrBuf != NULL))
3092 {
3093 uint32_t const fExtraFlags = iemGetTbFlagsForCurrentPc(pVCpu);
3094 PIEMTB const pTb = iemTbCacheLookup(pVCpu, pTbCache, GCPhysPc, fExtraFlags);
3095 if (pTb)
3096 rcStrict = iemTbExec(pVCpu, pTb);
3097 else
3098 rcStrict = iemThreadedCompile(pVM, pVCpu, GCPhysPc, fExtraFlags);
3099 }
3100 else
3101 {
3102 /* This can only happen if the current PC cannot be translated into a
3103 host pointer, which means we're in MMIO or unmapped memory... */
3104#if defined(VBOX_STRICT) && defined(IN_RING3)
3105 rcStrict = DBGFSTOP(pVM);
3106 if (rcStrict != VINF_SUCCESS && rcStrict != VERR_DBGF_NOT_ATTACHED)
3107 return rcStrict;
3108#endif
3109 rcStrict = IEMExecLots(pVCpu, 2048, cPollRate, NULL);
3110 }
3111 if (rcStrict == VINF_SUCCESS)
3112 {
3113 Assert(pVCpu->iem.s.cActiveMappings == 0);
3114
3115 uint64_t fCpu = pVCpu->fLocalForcedActions;
3116 fCpu &= VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR3
3117 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
3118 | VMCPU_FF_TLB_FLUSH
3119 | VMCPU_FF_UNHALT );
3120 /** @todo this isn't even close to the NMI/IRQ conditions in EM. */
3121 if (RT_LIKELY( ( !fCpu
3122 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
3123 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
3124 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx) )) )
3125 && !VM_FF_IS_ANY_SET(pVM, VM_FF_ALL_MASK) ))
3126 {
3127 if (RT_LIKELY( (iIterations & cPollRate) != 0
3128 || !TMTimerPollBoolWith32BitMilliTS(pVM, pVCpu, &pVCpu->iem.s.msRecompilerPollNow)))
3129 { /* likely */ }
3130 else
3131 return VINF_SUCCESS;
3132 }
3133 else
3134 return VINF_SUCCESS;
3135 }
3136 else
3137 return rcStrict;
3138 }
3139 }
3140 IEM_CATCH_LONGJMP_BEGIN(pVCpu, rcStrict);
3141 {
3142 Assert(rcStrict != VINF_IEM_REEXEC_BREAK);
3143 pVCpu->iem.s.cLongJumps++;
3144#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
3145 pVCpu->iem.s.pvTbFramePointerR3 = NULL;
3146#endif
3147 if (pVCpu->iem.s.cActiveMappings > 0)
3148 iemMemRollback(pVCpu);
3149
3150#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
3151 PIEMTB const pTb = pVCpu->iem.s.pCurTbR3;
3152 if (pTb && (pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE)
3153 {
3154 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatNativeTbExitLongJump);
3155# ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
3156 Assert(pVCpu->iem.s.idxTbCurInstr < pTb->cInstructions);
3157 pVCpu->iem.s.cInstructions += pVCpu->iem.s.idxTbCurInstr;
3158# endif
3159 }
3160#endif
3161
3162#if 0 /** @todo do we need to clean up anything? If not, we can drop the pTb = NULL some lines up and change the scope. */
3163 /* If pTb isn't NULL we're in iemTbExec. */
3164 if (!pTb)
3165 {
3166 /* If pCurTbR3 is NULL, we're in iemGetPcWithPhysAndCode.*/
3167 pTb = pVCpu->iem.s.pCurTbR3;
3168 if (pTb)
3169 {
3170 if (pTb == pVCpu->iem.s.pThrdCompileTbR3)
3171 return iemThreadedCompileLongJumped(pVM, pVCpu, rcStrict);
3172 Assert(pTb != pVCpu->iem.s.pNativeCompileTbR3);
3173 }
3174 }
3175#endif
3176 pVCpu->iem.s.pCurTbR3 = NULL;
3177 return rcStrict;
3178 }
3179 IEM_CATCH_LONGJMP_END(pVCpu);
3180 }
3181}
3182
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette