VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/HMVMXR0.cpp@ 96989

Last change on this file since 96989 was 96989, checked in by vboxsync, 2 years ago

VMM/HMVMXR0: Nested VMX: bugref:10092 Let pointer to ASMBitClear for VMREAD/VMWRITE bitmaps be 32-bit aligned.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 296.2 KB
Line 
1/* $Id: HMVMXR0.cpp 96989 2022-10-05 06:16:49Z vboxsync $ */
2/** @file
3 * HM VMX (Intel VT-x) - Host Context Ring-0.
4 */
5
6/*
7 * Copyright (C) 2012-2022 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#define LOG_GROUP LOG_GROUP_HM
33#define VMCPU_INCL_CPUM_GST_CTX
34#include <iprt/x86.h>
35#include <iprt/asm-amd64-x86.h>
36#include <iprt/thread.h>
37#include <iprt/mem.h>
38#include <iprt/mp.h>
39
40#include <VBox/vmm/pdmapi.h>
41#include <VBox/vmm/dbgf.h>
42#include <VBox/vmm/iem.h>
43#include <VBox/vmm/iom.h>
44#include <VBox/vmm/tm.h>
45#include <VBox/vmm/em.h>
46#include <VBox/vmm/gcm.h>
47#include <VBox/vmm/gim.h>
48#include <VBox/vmm/apic.h>
49#include "HMInternal.h"
50#include <VBox/vmm/vmcc.h>
51#include <VBox/vmm/hmvmxinline.h>
52#include "HMVMXR0.h"
53#include "VMXInternal.h"
54#include "dtrace/VBoxVMM.h"
55
56
57/*********************************************************************************************************************************
58* Defined Constants And Macros *
59*********************************************************************************************************************************/
60#ifdef DEBUG_ramshankar
61# define HMVMX_ALWAYS_SAVE_GUEST_RFLAGS
62# define HMVMX_ALWAYS_SAVE_RO_GUEST_STATE
63# define HMVMX_ALWAYS_SAVE_FULL_GUEST_STATE
64# define HMVMX_ALWAYS_SYNC_FULL_GUEST_STATE
65# define HMVMX_ALWAYS_CLEAN_TRANSIENT
66# define HMVMX_ALWAYS_CHECK_GUEST_STATE
67# define HMVMX_ALWAYS_TRAP_ALL_XCPTS
68# define HMVMX_ALWAYS_TRAP_PF
69# define HMVMX_ALWAYS_FLUSH_TLB
70# define HMVMX_ALWAYS_SWAP_EFER
71#endif
72
73
74/*********************************************************************************************************************************
75* Structures and Typedefs *
76*********************************************************************************************************************************/
77/**
78 * VMX page allocation information.
79 */
80typedef struct
81{
82 uint32_t fValid; /**< Whether to allocate this page (e.g, based on a CPU feature). */
83 uint32_t uPadding0; /**< Padding to ensure array of these structs are aligned to a multiple of 8. */
84 PRTHCPHYS pHCPhys; /**< Where to store the host-physical address of the allocation. */
85 PRTR0PTR ppVirt; /**< Where to store the host-virtual address of the allocation. */
86} VMXPAGEALLOCINFO;
87/** Pointer to VMX page-allocation info. */
88typedef VMXPAGEALLOCINFO *PVMXPAGEALLOCINFO;
89/** Pointer to a const VMX page-allocation info. */
90typedef const VMXPAGEALLOCINFO *PCVMXPAGEALLOCINFO;
91AssertCompileSizeAlignment(VMXPAGEALLOCINFO, 8);
92
93
94/*********************************************************************************************************************************
95* Internal Functions *
96*********************************************************************************************************************************/
97static bool hmR0VmxShouldSwapEferMsr(PCVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient);
98static int hmR0VmxExitHostNmi(PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo);
99
100
101/**
102 * Checks if the given MSR is part of the lastbranch-from-IP MSR stack.
103 * @returns @c true if it's part of LBR stack, @c false otherwise.
104 *
105 * @param pVM The cross context VM structure.
106 * @param idMsr The MSR.
107 * @param pidxMsr Where to store the index of the MSR in the LBR MSR array.
108 * Optional, can be NULL.
109 *
110 * @remarks Must only be called when LBR is enabled.
111 */
112DECL_FORCE_INLINE(bool) hmR0VmxIsLbrBranchFromMsr(PCVMCC pVM, uint32_t idMsr, uint32_t *pidxMsr)
113{
114 Assert(pVM->hmr0.s.vmx.fLbr);
115 Assert(pVM->hmr0.s.vmx.idLbrFromIpMsrFirst);
116 uint32_t const cLbrStack = pVM->hmr0.s.vmx.idLbrFromIpMsrLast - pVM->hmr0.s.vmx.idLbrFromIpMsrFirst + 1;
117 uint32_t const idxMsr = idMsr - pVM->hmr0.s.vmx.idLbrFromIpMsrFirst;
118 if (idxMsr < cLbrStack)
119 {
120 if (pidxMsr)
121 *pidxMsr = idxMsr;
122 return true;
123 }
124 return false;
125}
126
127
128/**
129 * Checks if the given MSR is part of the lastbranch-to-IP MSR stack.
130 * @returns @c true if it's part of LBR stack, @c false otherwise.
131 *
132 * @param pVM The cross context VM structure.
133 * @param idMsr The MSR.
134 * @param pidxMsr Where to store the index of the MSR in the LBR MSR array.
135 * Optional, can be NULL.
136 *
137 * @remarks Must only be called when LBR is enabled and when lastbranch-to-IP MSRs
138 * are supported by the CPU (see hmR0VmxSetupLbrMsrRange).
139 */
140DECL_FORCE_INLINE(bool) hmR0VmxIsLbrBranchToMsr(PCVMCC pVM, uint32_t idMsr, uint32_t *pidxMsr)
141{
142 Assert(pVM->hmr0.s.vmx.fLbr);
143 if (pVM->hmr0.s.vmx.idLbrToIpMsrFirst)
144 {
145 uint32_t const cLbrStack = pVM->hmr0.s.vmx.idLbrToIpMsrLast - pVM->hmr0.s.vmx.idLbrToIpMsrFirst + 1;
146 uint32_t const idxMsr = idMsr - pVM->hmr0.s.vmx.idLbrToIpMsrFirst;
147 if (idxMsr < cLbrStack)
148 {
149 if (pidxMsr)
150 *pidxMsr = idxMsr;
151 return true;
152 }
153 }
154 return false;
155}
156
157
158/**
159 * Gets the active (in use) VMCS info. object for the specified VCPU.
160 *
161 * This is either the guest or nested-guest VMCS info. and need not necessarily
162 * pertain to the "current" VMCS (in the VMX definition of the term). For instance,
163 * if the VM-entry failed due to an invalid-guest state, we may have "cleared" the
164 * current VMCS while returning to ring-3. However, the VMCS info. object for that
165 * VMCS would still be active and returned here so that we could dump the VMCS
166 * fields to ring-3 for diagnostics. This function is thus only used to
167 * distinguish between the nested-guest or guest VMCS.
168 *
169 * @returns The active VMCS information.
170 * @param pVCpu The cross context virtual CPU structure.
171 *
172 * @thread EMT.
173 * @remarks This function may be called with preemption or interrupts disabled!
174 */
175DECLINLINE(PVMXVMCSINFO) hmGetVmxActiveVmcsInfo(PVMCPUCC pVCpu)
176{
177 if (!pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs)
178 return &pVCpu->hmr0.s.vmx.VmcsInfo;
179 return &pVCpu->hmr0.s.vmx.VmcsInfoNstGst;
180}
181
182
183/**
184 * Returns whether the VM-exit MSR-store area differs from the VM-exit MSR-load
185 * area.
186 *
187 * @returns @c true if it's different, @c false otherwise.
188 * @param pVmcsInfo The VMCS info. object.
189 */
190DECL_FORCE_INLINE(bool) hmR0VmxIsSeparateExitMsrStoreAreaVmcs(PCVMXVMCSINFO pVmcsInfo)
191{
192 return RT_BOOL( pVmcsInfo->pvGuestMsrStore != pVmcsInfo->pvGuestMsrLoad
193 && pVmcsInfo->pvGuestMsrStore);
194}
195
196
197/**
198 * Sets the given Processor-based VM-execution controls.
199 *
200 * @param pVmxTransient The VMX-transient structure.
201 * @param uProcCtls The Processor-based VM-execution controls to set.
202 */
203static void hmR0VmxSetProcCtlsVmcs(PVMXTRANSIENT pVmxTransient, uint32_t uProcCtls)
204{
205 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
206 if ((pVmcsInfo->u32ProcCtls & uProcCtls) != uProcCtls)
207 {
208 pVmcsInfo->u32ProcCtls |= uProcCtls;
209 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVmcsInfo->u32ProcCtls);
210 AssertRC(rc);
211 }
212}
213
214
215/**
216 * Removes the given Processor-based VM-execution controls.
217 *
218 * @param pVCpu The cross context virtual CPU structure.
219 * @param pVmxTransient The VMX-transient structure.
220 * @param uProcCtls The Processor-based VM-execution controls to remove.
221 *
222 * @remarks When executing a nested-guest, this will not remove any of the specified
223 * controls if the nested hypervisor has set any one of them.
224 */
225static void hmR0VmxRemoveProcCtlsVmcs(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, uint32_t uProcCtls)
226{
227 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
228 if (pVmcsInfo->u32ProcCtls & uProcCtls)
229 {
230#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
231 if ( !pVmxTransient->fIsNestedGuest
232 || !CPUMIsGuestVmxProcCtlsSet(&pVCpu->cpum.GstCtx, uProcCtls))
233#else
234 NOREF(pVCpu);
235 if (!pVmxTransient->fIsNestedGuest)
236#endif
237 {
238 pVmcsInfo->u32ProcCtls &= ~uProcCtls;
239 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVmcsInfo->u32ProcCtls);
240 AssertRC(rc);
241 }
242 }
243}
244
245
246/**
247 * Sets the TSC offset for the current VMCS.
248 *
249 * @param uTscOffset The TSC offset to set.
250 * @param pVmcsInfo The VMCS info. object.
251 */
252static void hmR0VmxSetTscOffsetVmcs(PVMXVMCSINFO pVmcsInfo, uint64_t uTscOffset)
253{
254 if (pVmcsInfo->u64TscOffset != uTscOffset)
255 {
256 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_TSC_OFFSET_FULL, uTscOffset);
257 AssertRC(rc);
258 pVmcsInfo->u64TscOffset = uTscOffset;
259 }
260}
261
262
263/**
264 * Loads the VMCS specified by the VMCS info. object.
265 *
266 * @returns VBox status code.
267 * @param pVmcsInfo The VMCS info. object.
268 *
269 * @remarks Can be called with interrupts disabled.
270 */
271static int hmR0VmxLoadVmcs(PVMXVMCSINFO pVmcsInfo)
272{
273 Assert(pVmcsInfo->HCPhysVmcs != 0 && pVmcsInfo->HCPhysVmcs != NIL_RTHCPHYS);
274 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
275
276 int rc = VMXLoadVmcs(pVmcsInfo->HCPhysVmcs);
277 if (RT_SUCCESS(rc))
278 pVmcsInfo->fVmcsState |= VMX_V_VMCS_LAUNCH_STATE_CURRENT;
279 return rc;
280}
281
282
283/**
284 * Clears the VMCS specified by the VMCS info. object.
285 *
286 * @returns VBox status code.
287 * @param pVmcsInfo The VMCS info. object.
288 *
289 * @remarks Can be called with interrupts disabled.
290 */
291static int hmR0VmxClearVmcs(PVMXVMCSINFO pVmcsInfo)
292{
293 Assert(pVmcsInfo->HCPhysVmcs != 0 && pVmcsInfo->HCPhysVmcs != NIL_RTHCPHYS);
294 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
295
296 int rc = VMXClearVmcs(pVmcsInfo->HCPhysVmcs);
297 if (RT_SUCCESS(rc))
298 pVmcsInfo->fVmcsState = VMX_V_VMCS_LAUNCH_STATE_CLEAR;
299 return rc;
300}
301
302
303/**
304 * Checks whether the MSR belongs to the set of guest MSRs that we restore
305 * lazily while leaving VT-x.
306 *
307 * @returns true if it does, false otherwise.
308 * @param pVCpu The cross context virtual CPU structure.
309 * @param idMsr The MSR to check.
310 */
311static bool hmR0VmxIsLazyGuestMsr(PCVMCPUCC pVCpu, uint32_t idMsr)
312{
313 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.fAllow64BitGuests)
314 {
315 switch (idMsr)
316 {
317 case MSR_K8_LSTAR:
318 case MSR_K6_STAR:
319 case MSR_K8_SF_MASK:
320 case MSR_K8_KERNEL_GS_BASE:
321 return true;
322 }
323 }
324 return false;
325}
326
327
328/**
329 * Loads a set of guests MSRs to allow read/passthru to the guest.
330 *
331 * The name of this function is slightly confusing. This function does NOT
332 * postpone loading, but loads the MSR right now. "hmR0VmxLazy" is simply a
333 * common prefix for functions dealing with "lazy restoration" of the shared
334 * MSRs.
335 *
336 * @param pVCpu The cross context virtual CPU structure.
337 *
338 * @remarks No-long-jump zone!!!
339 */
340static void hmR0VmxLazyLoadGuestMsrs(PVMCPUCC pVCpu)
341{
342 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
343 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
344
345 Assert(pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_SAVED_HOST);
346 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.fAllow64BitGuests)
347 {
348 /*
349 * If the guest MSRs are not loaded -and- if all the guest MSRs are identical
350 * to the MSRs on the CPU (which are the saved host MSRs, see assertion above) then
351 * we can skip a few MSR writes.
352 *
353 * Otherwise, it implies either 1. they're not loaded, or 2. they're loaded but the
354 * guest MSR values in the guest-CPU context might be different to what's currently
355 * loaded in the CPU. In either case, we need to write the new guest MSR values to the
356 * CPU, see @bugref{8728}.
357 */
358 PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
359 if ( !(pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)
360 && pCtx->msrKERNELGSBASE == pVCpu->hmr0.s.vmx.u64HostMsrKernelGsBase
361 && pCtx->msrLSTAR == pVCpu->hmr0.s.vmx.u64HostMsrLStar
362 && pCtx->msrSTAR == pVCpu->hmr0.s.vmx.u64HostMsrStar
363 && pCtx->msrSFMASK == pVCpu->hmr0.s.vmx.u64HostMsrSfMask)
364 {
365#ifdef VBOX_STRICT
366 Assert(ASMRdMsr(MSR_K8_KERNEL_GS_BASE) == pCtx->msrKERNELGSBASE);
367 Assert(ASMRdMsr(MSR_K8_LSTAR) == pCtx->msrLSTAR);
368 Assert(ASMRdMsr(MSR_K6_STAR) == pCtx->msrSTAR);
369 Assert(ASMRdMsr(MSR_K8_SF_MASK) == pCtx->msrSFMASK);
370#endif
371 }
372 else
373 {
374 ASMWrMsr(MSR_K8_KERNEL_GS_BASE, pCtx->msrKERNELGSBASE);
375 ASMWrMsr(MSR_K8_LSTAR, pCtx->msrLSTAR);
376 ASMWrMsr(MSR_K6_STAR, pCtx->msrSTAR);
377 /* The system call flag mask register isn't as benign and accepting of all
378 values as the above, so mask it to avoid #GP'ing on corrupted input. */
379 Assert(!(pCtx->msrSFMASK & ~(uint64_t)UINT32_MAX));
380 ASMWrMsr(MSR_K8_SF_MASK, pCtx->msrSFMASK & UINT32_MAX);
381 }
382 }
383 pVCpu->hmr0.s.vmx.fLazyMsrs |= VMX_LAZY_MSRS_LOADED_GUEST;
384}
385
386
387/**
388 * Checks if the specified guest MSR is part of the VM-entry MSR-load area.
389 *
390 * @returns @c true if found, @c false otherwise.
391 * @param pVmcsInfo The VMCS info. object.
392 * @param idMsr The MSR to find.
393 */
394static bool hmR0VmxIsAutoLoadGuestMsr(PCVMXVMCSINFO pVmcsInfo, uint32_t idMsr)
395{
396 PCVMXAUTOMSR pMsrs = (PCVMXAUTOMSR)pVmcsInfo->pvGuestMsrLoad;
397 uint32_t const cMsrs = pVmcsInfo->cEntryMsrLoad;
398 Assert(pMsrs);
399 Assert(sizeof(*pMsrs) * cMsrs <= X86_PAGE_4K_SIZE);
400 for (uint32_t i = 0; i < cMsrs; i++)
401 {
402 if (pMsrs[i].u32Msr == idMsr)
403 return true;
404 }
405 return false;
406}
407
408
409/**
410 * Performs lazy restoration of the set of host MSRs if they were previously
411 * loaded with guest MSR values.
412 *
413 * @param pVCpu The cross context virtual CPU structure.
414 *
415 * @remarks No-long-jump zone!!!
416 * @remarks The guest MSRs should have been saved back into the guest-CPU
417 * context by hmR0VmxImportGuestState()!!!
418 */
419static void hmR0VmxLazyRestoreHostMsrs(PVMCPUCC pVCpu)
420{
421 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
422 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
423
424 if (pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)
425 {
426 Assert(pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_SAVED_HOST);
427 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.fAllow64BitGuests)
428 {
429 ASMWrMsr(MSR_K8_LSTAR, pVCpu->hmr0.s.vmx.u64HostMsrLStar);
430 ASMWrMsr(MSR_K6_STAR, pVCpu->hmr0.s.vmx.u64HostMsrStar);
431 ASMWrMsr(MSR_K8_SF_MASK, pVCpu->hmr0.s.vmx.u64HostMsrSfMask);
432 ASMWrMsr(MSR_K8_KERNEL_GS_BASE, pVCpu->hmr0.s.vmx.u64HostMsrKernelGsBase);
433 }
434 }
435 pVCpu->hmr0.s.vmx.fLazyMsrs &= ~(VMX_LAZY_MSRS_LOADED_GUEST | VMX_LAZY_MSRS_SAVED_HOST);
436}
437
438
439/**
440 * Sets pfnStartVm to the best suited variant.
441 *
442 * This must be called whenever anything changes relative to the hmR0VmXStartVm
443 * variant selection:
444 * - pVCpu->hm.s.fLoadSaveGuestXcr0
445 * - HM_WSF_IBPB_ENTRY in pVCpu->hmr0.s.fWorldSwitcher
446 * - HM_WSF_IBPB_EXIT in pVCpu->hmr0.s.fWorldSwitcher
447 * - Perhaps: CPUMIsGuestFPUStateActive() (windows only)
448 * - Perhaps: CPUMCTX.fXStateMask (windows only)
449 *
450 * We currently ASSUME that neither HM_WSF_IBPB_ENTRY nor HM_WSF_IBPB_EXIT
451 * cannot be changed at runtime.
452 */
453static void hmR0VmxUpdateStartVmFunction(PVMCPUCC pVCpu)
454{
455 static const struct CLANGWORKAROUND { PFNHMVMXSTARTVM pfn; } s_aHmR0VmxStartVmFunctions[] =
456 {
457 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_SansL1dEntry_SansMdsEntry_SansIbpbExit },
458 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_SansL1dEntry_SansMdsEntry_SansIbpbExit },
459 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_SansL1dEntry_SansMdsEntry_SansIbpbExit },
460 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_SansL1dEntry_SansMdsEntry_SansIbpbExit },
461 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_WithL1dEntry_SansMdsEntry_SansIbpbExit },
462 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_WithL1dEntry_SansMdsEntry_SansIbpbExit },
463 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_WithL1dEntry_SansMdsEntry_SansIbpbExit },
464 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_WithL1dEntry_SansMdsEntry_SansIbpbExit },
465 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_SansL1dEntry_WithMdsEntry_SansIbpbExit },
466 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_SansL1dEntry_WithMdsEntry_SansIbpbExit },
467 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_SansL1dEntry_WithMdsEntry_SansIbpbExit },
468 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_SansL1dEntry_WithMdsEntry_SansIbpbExit },
469 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_WithL1dEntry_WithMdsEntry_SansIbpbExit },
470 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_WithL1dEntry_WithMdsEntry_SansIbpbExit },
471 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_WithL1dEntry_WithMdsEntry_SansIbpbExit },
472 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_WithL1dEntry_WithMdsEntry_SansIbpbExit },
473 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_SansL1dEntry_SansMdsEntry_WithIbpbExit },
474 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_SansL1dEntry_SansMdsEntry_WithIbpbExit },
475 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_SansL1dEntry_SansMdsEntry_WithIbpbExit },
476 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_SansL1dEntry_SansMdsEntry_WithIbpbExit },
477 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_WithL1dEntry_SansMdsEntry_WithIbpbExit },
478 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_WithL1dEntry_SansMdsEntry_WithIbpbExit },
479 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_WithL1dEntry_SansMdsEntry_WithIbpbExit },
480 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_WithL1dEntry_SansMdsEntry_WithIbpbExit },
481 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_SansL1dEntry_WithMdsEntry_WithIbpbExit },
482 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_SansL1dEntry_WithMdsEntry_WithIbpbExit },
483 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_SansL1dEntry_WithMdsEntry_WithIbpbExit },
484 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_SansL1dEntry_WithMdsEntry_WithIbpbExit },
485 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_WithL1dEntry_WithMdsEntry_WithIbpbExit },
486 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_WithL1dEntry_WithMdsEntry_WithIbpbExit },
487 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_WithL1dEntry_WithMdsEntry_WithIbpbExit },
488 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_WithL1dEntry_WithMdsEntry_WithIbpbExit },
489 };
490 uintptr_t const idx = (pVCpu->hmr0.s.fLoadSaveGuestXcr0 ? 1 : 0)
491 | (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_IBPB_ENTRY ? 2 : 0)
492 | (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_L1D_ENTRY ? 4 : 0)
493 | (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_MDS_ENTRY ? 8 : 0)
494 | (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_IBPB_EXIT ? 16 : 0);
495 PFNHMVMXSTARTVM const pfnStartVm = s_aHmR0VmxStartVmFunctions[idx].pfn;
496 if (pVCpu->hmr0.s.vmx.pfnStartVm != pfnStartVm)
497 pVCpu->hmr0.s.vmx.pfnStartVm = pfnStartVm;
498}
499
500
501/**
502 * Pushes a 2-byte value onto the real-mode (in virtual-8086 mode) guest's
503 * stack.
504 *
505 * @returns Strict VBox status code (i.e. informational status codes too).
506 * @retval VINF_EM_RESET if pushing a value to the stack caused a triple-fault.
507 * @param pVCpu The cross context virtual CPU structure.
508 * @param uValue The value to push to the guest stack.
509 */
510static VBOXSTRICTRC hmR0VmxRealModeGuestStackPush(PVMCPUCC pVCpu, uint16_t uValue)
511{
512 /*
513 * The stack limit is 0xffff in real-on-virtual 8086 mode. Real-mode with weird stack limits cannot be run in
514 * virtual 8086 mode in VT-x. See Intel spec. 26.3.1.2 "Checks on Guest Segment Registers".
515 * See Intel Instruction reference for PUSH and Intel spec. 22.33.1 "Segment Wraparound".
516 */
517 PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
518 if (pCtx->sp == 1)
519 return VINF_EM_RESET;
520 pCtx->sp -= sizeof(uint16_t); /* May wrap around which is expected behaviour. */
521 int rc = PGMPhysSimpleWriteGCPhys(pVCpu->CTX_SUFF(pVM), pCtx->ss.u64Base + pCtx->sp, &uValue, sizeof(uint16_t));
522 AssertRC(rc);
523 return rc;
524}
525
526
527/**
528 * Wrapper around VMXWriteVmcs16 taking a pVCpu parameter so VCC doesn't complain about
529 * unreferenced local parameters in the template code...
530 */
531DECL_FORCE_INLINE(int) hmR0VmxWriteVmcs16(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint16_t u16Val)
532{
533 RT_NOREF(pVCpu);
534 return VMXWriteVmcs16(uFieldEnc, u16Val);
535}
536
537
538/**
539 * Wrapper around VMXWriteVmcs32 taking a pVCpu parameter so VCC doesn't complain about
540 * unreferenced local parameters in the template code...
541 */
542DECL_FORCE_INLINE(int) hmR0VmxWriteVmcs32(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint32_t u32Val)
543{
544 RT_NOREF(pVCpu);
545 return VMXWriteVmcs32(uFieldEnc, u32Val);
546}
547
548
549/**
550 * Wrapper around VMXWriteVmcs64 taking a pVCpu parameter so VCC doesn't complain about
551 * unreferenced local parameters in the template code...
552 */
553DECL_FORCE_INLINE(int) hmR0VmxWriteVmcs64(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint64_t u64Val)
554{
555 RT_NOREF(pVCpu);
556 return VMXWriteVmcs64(uFieldEnc, u64Val);
557}
558
559
560/**
561 * Wrapper around VMXReadVmcs16 taking a pVCpu parameter so VCC doesn't complain about
562 * unreferenced local parameters in the template code...
563 */
564DECL_FORCE_INLINE(int) hmR0VmxReadVmcs16(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint16_t *pu16Val)
565{
566 RT_NOREF(pVCpu);
567 return VMXReadVmcs16(uFieldEnc, pu16Val);
568}
569
570
571/**
572 * Wrapper around VMXReadVmcs32 taking a pVCpu parameter so VCC doesn't complain about
573 * unreferenced local parameters in the template code...
574 */
575DECL_FORCE_INLINE(int) hmR0VmxReadVmcs32(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint32_t *pu32Val)
576{
577 RT_NOREF(pVCpu);
578 return VMXReadVmcs32(uFieldEnc, pu32Val);
579}
580
581
582/**
583 * Wrapper around VMXReadVmcs64 taking a pVCpu parameter so VCC doesn't complain about
584 * unreferenced local parameters in the template code...
585 */
586DECL_FORCE_INLINE(int) hmR0VmxReadVmcs64(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint64_t *pu64Val)
587{
588 RT_NOREF(pVCpu);
589 return VMXReadVmcs64(uFieldEnc, pu64Val);
590}
591
592
593/*
594 * Instantiate the code we share with the NEM darwin backend.
595 */
596#define VCPU_2_VMXSTATE(a_pVCpu) (a_pVCpu)->hm.s
597#define VCPU_2_VMXSTATS(a_pVCpu) (a_pVCpu)->hm.s
598
599#define VM_IS_VMX_UNRESTRICTED_GUEST(a_pVM) (a_pVM)->hmr0.s.vmx.fUnrestrictedGuest
600#define VM_IS_VMX_NESTED_PAGING(a_pVM) (a_pVM)->hmr0.s.fNestedPaging
601#define VM_IS_VMX_PREEMPT_TIMER_USED(a_pVM) (a_pVM)->hmr0.s.vmx.fUsePreemptTimer
602#define VM_IS_VMX_LBR(a_pVM) (a_pVM)->hmr0.s.vmx.fLbr
603
604#define VMX_VMCS_WRITE_16(a_pVCpu, a_FieldEnc, a_Val) hmR0VmxWriteVmcs16((a_pVCpu), (a_FieldEnc), (a_Val))
605#define VMX_VMCS_WRITE_32(a_pVCpu, a_FieldEnc, a_Val) hmR0VmxWriteVmcs32((a_pVCpu), (a_FieldEnc), (a_Val))
606#define VMX_VMCS_WRITE_64(a_pVCpu, a_FieldEnc, a_Val) hmR0VmxWriteVmcs64((a_pVCpu), (a_FieldEnc), (a_Val))
607#define VMX_VMCS_WRITE_NW(a_pVCpu, a_FieldEnc, a_Val) hmR0VmxWriteVmcs64((a_pVCpu), (a_FieldEnc), (a_Val))
608
609#define VMX_VMCS_READ_16(a_pVCpu, a_FieldEnc, a_pVal) hmR0VmxReadVmcs16((a_pVCpu), (a_FieldEnc), (a_pVal))
610#define VMX_VMCS_READ_32(a_pVCpu, a_FieldEnc, a_pVal) hmR0VmxReadVmcs32((a_pVCpu), (a_FieldEnc), (a_pVal))
611#define VMX_VMCS_READ_64(a_pVCpu, a_FieldEnc, a_pVal) hmR0VmxReadVmcs64((a_pVCpu), (a_FieldEnc), (a_pVal))
612#define VMX_VMCS_READ_NW(a_pVCpu, a_FieldEnc, a_pVal) hmR0VmxReadVmcs64((a_pVCpu), (a_FieldEnc), (a_pVal))
613
614#include "../VMMAll/VMXAllTemplate.cpp.h"
615
616#undef VMX_VMCS_WRITE_16
617#undef VMX_VMCS_WRITE_32
618#undef VMX_VMCS_WRITE_64
619#undef VMX_VMCS_WRITE_NW
620
621#undef VMX_VMCS_READ_16
622#undef VMX_VMCS_READ_32
623#undef VMX_VMCS_READ_64
624#undef VMX_VMCS_READ_NW
625
626#undef VM_IS_VMX_PREEMPT_TIMER_USED
627#undef VM_IS_VMX_NESTED_PAGING
628#undef VM_IS_VMX_UNRESTRICTED_GUEST
629#undef VCPU_2_VMXSTATS
630#undef VCPU_2_VMXSTATE
631
632
633/**
634 * Updates the VM's last error record.
635 *
636 * If there was a VMX instruction error, reads the error data from the VMCS and
637 * updates VCPU's last error record as well.
638 *
639 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
640 * Can be NULL if @a rc is not VERR_VMX_UNABLE_TO_START_VM or
641 * VERR_VMX_INVALID_VMCS_FIELD.
642 * @param rc The error code.
643 */
644static void hmR0VmxUpdateErrorRecord(PVMCPUCC pVCpu, int rc)
645{
646 if ( rc == VERR_VMX_INVALID_VMCS_FIELD
647 || rc == VERR_VMX_UNABLE_TO_START_VM)
648 {
649 AssertPtrReturnVoid(pVCpu);
650 VMXReadVmcs32(VMX_VMCS32_RO_VM_INSTR_ERROR, &pVCpu->hm.s.vmx.LastError.u32InstrError);
651 }
652 pVCpu->CTX_SUFF(pVM)->hm.s.ForR3.rcInit = rc;
653}
654
655
656/**
657 * Enters VMX root mode operation on the current CPU.
658 *
659 * @returns VBox status code.
660 * @param pHostCpu The HM physical-CPU structure.
661 * @param pVM The cross context VM structure. Can be
662 * NULL, after a resume.
663 * @param HCPhysCpuPage Physical address of the VMXON region.
664 * @param pvCpuPage Pointer to the VMXON region.
665 */
666static int hmR0VmxEnterRootMode(PHMPHYSCPU pHostCpu, PVMCC pVM, RTHCPHYS HCPhysCpuPage, void *pvCpuPage)
667{
668 Assert(pHostCpu);
669 Assert(HCPhysCpuPage && HCPhysCpuPage != NIL_RTHCPHYS);
670 Assert(RT_ALIGN_T(HCPhysCpuPage, _4K, RTHCPHYS) == HCPhysCpuPage);
671 Assert(pvCpuPage);
672 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
673
674 if (pVM)
675 {
676 /* Write the VMCS revision identifier to the VMXON region. */
677 *(uint32_t *)pvCpuPage = RT_BF_GET(g_HmMsrs.u.vmx.u64Basic, VMX_BF_BASIC_VMCS_ID);
678 }
679
680 /* Paranoid: Disable interrupts as, in theory, interrupt handlers might mess with CR4. */
681 RTCCUINTREG const fEFlags = ASMIntDisableFlags();
682
683 /* Enable the VMX bit in CR4 if necessary. */
684 RTCCUINTREG const uOldCr4 = SUPR0ChangeCR4(X86_CR4_VMXE, RTCCUINTREG_MAX);
685
686 /* Record whether VMXE was already prior to us enabling it above. */
687 pHostCpu->fVmxeAlreadyEnabled = RT_BOOL(uOldCr4 & X86_CR4_VMXE);
688
689 /* Enter VMX root mode. */
690 int rc = VMXEnable(HCPhysCpuPage);
691 if (RT_FAILURE(rc))
692 {
693 /* Restore CR4.VMXE if it was not set prior to our attempt to set it above. */
694 if (!pHostCpu->fVmxeAlreadyEnabled)
695 SUPR0ChangeCR4(0 /* fOrMask */, ~(uint64_t)X86_CR4_VMXE);
696
697 if (pVM)
698 pVM->hm.s.ForR3.vmx.HCPhysVmxEnableError = HCPhysCpuPage;
699 }
700
701 /* Restore interrupts. */
702 ASMSetFlags(fEFlags);
703 return rc;
704}
705
706
707/**
708 * Exits VMX root mode operation on the current CPU.
709 *
710 * @returns VBox status code.
711 * @param pHostCpu The HM physical-CPU structure.
712 */
713static int hmR0VmxLeaveRootMode(PHMPHYSCPU pHostCpu)
714{
715 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
716
717 /* Paranoid: Disable interrupts as, in theory, interrupts handlers might mess with CR4. */
718 RTCCUINTREG const fEFlags = ASMIntDisableFlags();
719
720 /* If we're for some reason not in VMX root mode, then don't leave it. */
721 RTCCUINTREG const uHostCr4 = ASMGetCR4();
722
723 int rc;
724 if (uHostCr4 & X86_CR4_VMXE)
725 {
726 /* Exit VMX root mode and clear the VMX bit in CR4. */
727 VMXDisable();
728
729 /* Clear CR4.VMXE only if it was clear prior to use setting it. */
730 if (!pHostCpu->fVmxeAlreadyEnabled)
731 SUPR0ChangeCR4(0 /* fOrMask */, ~(uint64_t)X86_CR4_VMXE);
732
733 rc = VINF_SUCCESS;
734 }
735 else
736 rc = VERR_VMX_NOT_IN_VMX_ROOT_MODE;
737
738 /* Restore interrupts. */
739 ASMSetFlags(fEFlags);
740 return rc;
741}
742
743
744/**
745 * Allocates pages specified as specified by an array of VMX page allocation info
746 * objects.
747 *
748 * The pages contents are zero'd after allocation.
749 *
750 * @returns VBox status code.
751 * @param phMemObj Where to return the handle to the allocation.
752 * @param paAllocInfo The pointer to the first element of the VMX
753 * page-allocation info object array.
754 * @param cEntries The number of elements in the @a paAllocInfo array.
755 */
756static int hmR0VmxPagesAllocZ(PRTR0MEMOBJ phMemObj, PVMXPAGEALLOCINFO paAllocInfo, uint32_t cEntries)
757{
758 *phMemObj = NIL_RTR0MEMOBJ;
759
760 /* Figure out how many pages to allocate. */
761 uint32_t cPages = 0;
762 for (uint32_t iPage = 0; iPage < cEntries; iPage++)
763 cPages += !!paAllocInfo[iPage].fValid;
764
765 /* Allocate the pages. */
766 if (cPages)
767 {
768 size_t const cbPages = cPages << HOST_PAGE_SHIFT;
769 int rc = RTR0MemObjAllocPage(phMemObj, cbPages, false /* fExecutable */);
770 if (RT_FAILURE(rc))
771 return rc;
772
773 /* Zero the contents and assign each page to the corresponding VMX page-allocation entry. */
774 void *pvFirstPage = RTR0MemObjAddress(*phMemObj);
775 RT_BZERO(pvFirstPage, cbPages);
776
777 uint32_t iPage = 0;
778 for (uint32_t i = 0; i < cEntries; i++)
779 if (paAllocInfo[i].fValid)
780 {
781 RTHCPHYS const HCPhysPage = RTR0MemObjGetPagePhysAddr(*phMemObj, iPage);
782 void *pvPage = (void *)((uintptr_t)pvFirstPage + (iPage << X86_PAGE_4K_SHIFT));
783 Assert(HCPhysPage && HCPhysPage != NIL_RTHCPHYS);
784 AssertPtr(pvPage);
785
786 Assert(paAllocInfo[iPage].pHCPhys);
787 Assert(paAllocInfo[iPage].ppVirt);
788 *paAllocInfo[iPage].pHCPhys = HCPhysPage;
789 *paAllocInfo[iPage].ppVirt = pvPage;
790
791 /* Move to next page. */
792 ++iPage;
793 }
794
795 /* Make sure all valid (requested) pages have been assigned. */
796 Assert(iPage == cPages);
797 }
798 return VINF_SUCCESS;
799}
800
801
802/**
803 * Frees pages allocated using hmR0VmxPagesAllocZ.
804 *
805 * @param phMemObj Pointer to the memory object handle. Will be set to
806 * NIL.
807 */
808DECL_FORCE_INLINE(void) hmR0VmxPagesFree(PRTR0MEMOBJ phMemObj)
809{
810 /* We can cleanup wholesale since it's all one allocation. */
811 if (*phMemObj != NIL_RTR0MEMOBJ)
812 {
813 RTR0MemObjFree(*phMemObj, true /* fFreeMappings */);
814 *phMemObj = NIL_RTR0MEMOBJ;
815 }
816}
817
818
819/**
820 * Initializes a VMCS info. object.
821 *
822 * @param pVmcsInfo The VMCS info. object.
823 * @param pVmcsInfoShared The VMCS info. object shared with ring-3.
824 */
825static void hmR0VmxVmcsInfoInit(PVMXVMCSINFO pVmcsInfo, PVMXVMCSINFOSHARED pVmcsInfoShared)
826{
827 RT_ZERO(*pVmcsInfo);
828 RT_ZERO(*pVmcsInfoShared);
829
830 pVmcsInfo->pShared = pVmcsInfoShared;
831 Assert(pVmcsInfo->hMemObj == NIL_RTR0MEMOBJ);
832 pVmcsInfo->HCPhysVmcs = NIL_RTHCPHYS;
833 pVmcsInfo->HCPhysShadowVmcs = NIL_RTHCPHYS;
834 pVmcsInfo->HCPhysMsrBitmap = NIL_RTHCPHYS;
835 pVmcsInfo->HCPhysGuestMsrLoad = NIL_RTHCPHYS;
836 pVmcsInfo->HCPhysGuestMsrStore = NIL_RTHCPHYS;
837 pVmcsInfo->HCPhysHostMsrLoad = NIL_RTHCPHYS;
838 pVmcsInfo->HCPhysVirtApic = NIL_RTHCPHYS;
839 pVmcsInfo->HCPhysEPTP = NIL_RTHCPHYS;
840 pVmcsInfo->u64VmcsLinkPtr = NIL_RTHCPHYS;
841 pVmcsInfo->idHostCpuState = NIL_RTCPUID;
842 pVmcsInfo->idHostCpuExec = NIL_RTCPUID;
843}
844
845
846/**
847 * Frees the VT-x structures for a VMCS info. object.
848 *
849 * @param pVmcsInfo The VMCS info. object.
850 * @param pVmcsInfoShared The VMCS info. object shared with ring-3.
851 */
852static void hmR0VmxVmcsInfoFree(PVMXVMCSINFO pVmcsInfo, PVMXVMCSINFOSHARED pVmcsInfoShared)
853{
854 hmR0VmxPagesFree(&pVmcsInfo->hMemObj);
855 hmR0VmxVmcsInfoInit(pVmcsInfo, pVmcsInfoShared);
856}
857
858
859/**
860 * Allocates the VT-x structures for a VMCS info. object.
861 *
862 * @returns VBox status code.
863 * @param pVCpu The cross context virtual CPU structure.
864 * @param pVmcsInfo The VMCS info. object.
865 * @param fIsNstGstVmcs Whether this is a nested-guest VMCS.
866 *
867 * @remarks The caller is expected to take care of any and all allocation failures.
868 * This function will not perform any cleanup for failures half-way
869 * through.
870 */
871static int hmR0VmxAllocVmcsInfo(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, bool fIsNstGstVmcs)
872{
873 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
874
875 bool const fMsrBitmaps = RT_BOOL(g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_MSR_BITMAPS);
876 bool const fShadowVmcs = !fIsNstGstVmcs ? pVM->hmr0.s.vmx.fUseVmcsShadowing : pVM->cpum.ro.GuestFeatures.fVmxVmcsShadowing;
877 Assert(!pVM->cpum.ro.GuestFeatures.fVmxVmcsShadowing); /* VMCS shadowing is not yet exposed to the guest. */
878 VMXPAGEALLOCINFO aAllocInfo[] =
879 {
880 { true, 0 /* Unused */, &pVmcsInfo->HCPhysVmcs, &pVmcsInfo->pvVmcs },
881 { true, 0 /* Unused */, &pVmcsInfo->HCPhysGuestMsrLoad, &pVmcsInfo->pvGuestMsrLoad },
882 { true, 0 /* Unused */, &pVmcsInfo->HCPhysHostMsrLoad, &pVmcsInfo->pvHostMsrLoad },
883 { fMsrBitmaps, 0 /* Unused */, &pVmcsInfo->HCPhysMsrBitmap, &pVmcsInfo->pvMsrBitmap },
884 { fShadowVmcs, 0 /* Unused */, &pVmcsInfo->HCPhysShadowVmcs, &pVmcsInfo->pvShadowVmcs },
885 };
886
887 int rc = hmR0VmxPagesAllocZ(&pVmcsInfo->hMemObj, &aAllocInfo[0], RT_ELEMENTS(aAllocInfo));
888 if (RT_FAILURE(rc))
889 return rc;
890
891 /*
892 * We use the same page for VM-entry MSR-load and VM-exit MSR store areas.
893 * Because they contain a symmetric list of guest MSRs to load on VM-entry and store on VM-exit.
894 */
895 AssertCompile(RT_ELEMENTS(aAllocInfo) > 0);
896 Assert(pVmcsInfo->HCPhysGuestMsrLoad != NIL_RTHCPHYS);
897 pVmcsInfo->pvGuestMsrStore = pVmcsInfo->pvGuestMsrLoad;
898 pVmcsInfo->HCPhysGuestMsrStore = pVmcsInfo->HCPhysGuestMsrLoad;
899
900 /*
901 * Get the virtual-APIC page rather than allocating them again.
902 */
903 if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_TPR_SHADOW)
904 {
905 if (!fIsNstGstVmcs)
906 {
907 if (PDMHasApic(pVM))
908 {
909 rc = APICGetApicPageForCpu(pVCpu, &pVmcsInfo->HCPhysVirtApic, (PRTR0PTR)&pVmcsInfo->pbVirtApic, NULL /*pR3Ptr*/);
910 if (RT_FAILURE(rc))
911 return rc;
912 Assert(pVmcsInfo->pbVirtApic);
913 Assert(pVmcsInfo->HCPhysVirtApic && pVmcsInfo->HCPhysVirtApic != NIL_RTHCPHYS);
914 }
915 }
916 else
917 {
918 pVmcsInfo->pbVirtApic = &pVCpu->cpum.GstCtx.hwvirt.vmx.abVirtApicPage[0];
919 pVmcsInfo->HCPhysVirtApic = GVMMR0ConvertGVMPtr2HCPhys(pVM, pVmcsInfo->pbVirtApic);
920 Assert(pVmcsInfo->HCPhysVirtApic && pVmcsInfo->HCPhysVirtApic != NIL_RTHCPHYS);
921 }
922 }
923
924 return VINF_SUCCESS;
925}
926
927
928/**
929 * Free all VT-x structures for the VM.
930 *
931 * @returns IPRT status code.
932 * @param pVM The cross context VM structure.
933 */
934static void hmR0VmxStructsFree(PVMCC pVM)
935{
936 hmR0VmxPagesFree(&pVM->hmr0.s.vmx.hMemObj);
937#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
938 if (pVM->hmr0.s.vmx.fUseVmcsShadowing)
939 {
940 RTMemFree(pVM->hmr0.s.vmx.paShadowVmcsFields);
941 pVM->hmr0.s.vmx.paShadowVmcsFields = NULL;
942 RTMemFree(pVM->hmr0.s.vmx.paShadowVmcsRoFields);
943 pVM->hmr0.s.vmx.paShadowVmcsRoFields = NULL;
944 }
945#endif
946
947 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
948 {
949 PVMCPUCC pVCpu = VMCC_GET_CPU(pVM, idCpu);
950 hmR0VmxVmcsInfoFree(&pVCpu->hmr0.s.vmx.VmcsInfo, &pVCpu->hm.s.vmx.VmcsInfo);
951#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
952 if (pVM->cpum.ro.GuestFeatures.fVmx)
953 hmR0VmxVmcsInfoFree(&pVCpu->hmr0.s.vmx.VmcsInfoNstGst, &pVCpu->hm.s.vmx.VmcsInfoNstGst);
954#endif
955 }
956}
957
958
959/**
960 * Allocate all VT-x structures for the VM.
961 *
962 * @returns IPRT status code.
963 * @param pVM The cross context VM structure.
964 *
965 * @remarks This functions will cleanup on memory allocation failures.
966 */
967static int hmR0VmxStructsAlloc(PVMCC pVM)
968{
969 /*
970 * Sanity check the VMCS size reported by the CPU as we assume 4KB allocations.
971 * The VMCS size cannot be more than 4096 bytes.
972 *
973 * See Intel spec. Appendix A.1 "Basic VMX Information".
974 */
975 uint32_t const cbVmcs = RT_BF_GET(g_HmMsrs.u.vmx.u64Basic, VMX_BF_BASIC_VMCS_SIZE);
976 if (cbVmcs <= X86_PAGE_4K_SIZE)
977 { /* likely */ }
978 else
979 {
980 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_INVALID_VMCS_SIZE;
981 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
982 }
983
984 /*
985 * Allocate per-VM VT-x structures.
986 */
987 bool const fVirtApicAccess = RT_BOOL(g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS);
988 bool const fUseVmcsShadowing = pVM->hmr0.s.vmx.fUseVmcsShadowing;
989 VMXPAGEALLOCINFO aAllocInfo[] =
990 {
991 { fVirtApicAccess, 0 /* Unused */, &pVM->hmr0.s.vmx.HCPhysApicAccess, (PRTR0PTR)&pVM->hmr0.s.vmx.pbApicAccess },
992 { fUseVmcsShadowing, 0 /* Unused */, &pVM->hmr0.s.vmx.HCPhysVmreadBitmap, &pVM->hmr0.s.vmx.pvVmreadBitmap },
993 { fUseVmcsShadowing, 0 /* Unused */, &pVM->hmr0.s.vmx.HCPhysVmwriteBitmap, &pVM->hmr0.s.vmx.pvVmwriteBitmap },
994#ifdef VBOX_WITH_CRASHDUMP_MAGIC
995 { true, 0 /* Unused */, &pVM->hmr0.s.vmx.HCPhysScratch, (PRTR0PTR)&pVM->hmr0.s.vmx.pbScratch },
996#endif
997 };
998
999 int rc = hmR0VmxPagesAllocZ(&pVM->hmr0.s.vmx.hMemObj, &aAllocInfo[0], RT_ELEMENTS(aAllocInfo));
1000 if (RT_SUCCESS(rc))
1001 {
1002#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
1003 /* Allocate the shadow VMCS-fields array. */
1004 if (fUseVmcsShadowing)
1005 {
1006 Assert(!pVM->hmr0.s.vmx.cShadowVmcsFields);
1007 Assert(!pVM->hmr0.s.vmx.cShadowVmcsRoFields);
1008 pVM->hmr0.s.vmx.paShadowVmcsFields = (uint32_t *)RTMemAllocZ(sizeof(g_aVmcsFields));
1009 pVM->hmr0.s.vmx.paShadowVmcsRoFields = (uint32_t *)RTMemAllocZ(sizeof(g_aVmcsFields));
1010 if (!pVM->hmr0.s.vmx.paShadowVmcsFields || !pVM->hmr0.s.vmx.paShadowVmcsRoFields)
1011 rc = VERR_NO_MEMORY;
1012 }
1013#endif
1014
1015 /*
1016 * Allocate per-VCPU VT-x structures.
1017 */
1018 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus && RT_SUCCESS(rc); idCpu++)
1019 {
1020 /* Allocate the guest VMCS structures. */
1021 PVMCPUCC pVCpu = VMCC_GET_CPU(pVM, idCpu);
1022 rc = hmR0VmxAllocVmcsInfo(pVCpu, &pVCpu->hmr0.s.vmx.VmcsInfo, false /* fIsNstGstVmcs */);
1023
1024#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
1025 /* Allocate the nested-guest VMCS structures, when the VMX feature is exposed to the guest. */
1026 if (pVM->cpum.ro.GuestFeatures.fVmx && RT_SUCCESS(rc))
1027 rc = hmR0VmxAllocVmcsInfo(pVCpu, &pVCpu->hmr0.s.vmx.VmcsInfoNstGst, true /* fIsNstGstVmcs */);
1028#endif
1029 }
1030 if (RT_SUCCESS(rc))
1031 return VINF_SUCCESS;
1032 }
1033 hmR0VmxStructsFree(pVM);
1034 return rc;
1035}
1036
1037
1038/**
1039 * Pre-initializes non-zero fields in VMX structures that will be allocated.
1040 *
1041 * @param pVM The cross context VM structure.
1042 */
1043static void hmR0VmxStructsInit(PVMCC pVM)
1044{
1045 /* Paranoia. */
1046 Assert(pVM->hmr0.s.vmx.pbApicAccess == NULL);
1047#ifdef VBOX_WITH_CRASHDUMP_MAGIC
1048 Assert(pVM->hmr0.s.vmx.pbScratch == NULL);
1049#endif
1050
1051 /*
1052 * Initialize members up-front so we can cleanup en masse on allocation failures.
1053 */
1054#ifdef VBOX_WITH_CRASHDUMP_MAGIC
1055 pVM->hmr0.s.vmx.HCPhysScratch = NIL_RTHCPHYS;
1056#endif
1057 pVM->hmr0.s.vmx.HCPhysApicAccess = NIL_RTHCPHYS;
1058 pVM->hmr0.s.vmx.HCPhysVmreadBitmap = NIL_RTHCPHYS;
1059 pVM->hmr0.s.vmx.HCPhysVmwriteBitmap = NIL_RTHCPHYS;
1060 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
1061 {
1062 PVMCPUCC pVCpu = VMCC_GET_CPU(pVM, idCpu);
1063 hmR0VmxVmcsInfoInit(&pVCpu->hmr0.s.vmx.VmcsInfo, &pVCpu->hm.s.vmx.VmcsInfo);
1064 hmR0VmxVmcsInfoInit(&pVCpu->hmr0.s.vmx.VmcsInfoNstGst, &pVCpu->hm.s.vmx.VmcsInfoNstGst);
1065 }
1066}
1067
1068#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
1069/**
1070 * Returns whether an MSR at the given MSR-bitmap offset is intercepted or not.
1071 *
1072 * @returns @c true if the MSR is intercepted, @c false otherwise.
1073 * @param pbMsrBitmap The MSR bitmap.
1074 * @param offMsr The MSR byte offset.
1075 * @param iBit The bit offset from the byte offset.
1076 */
1077DECLINLINE(bool) hmR0VmxIsMsrBitSet(uint8_t const *pbMsrBitmap, uint16_t offMsr, int32_t iBit)
1078{
1079 Assert(offMsr + (iBit >> 3) <= X86_PAGE_4K_SIZE);
1080 return ASMBitTest(pbMsrBitmap + offMsr, iBit);
1081}
1082#endif
1083
1084/**
1085 * Sets the permission bits for the specified MSR in the given MSR bitmap.
1086 *
1087 * If the passed VMCS is a nested-guest VMCS, this function ensures that the
1088 * read/write intercept is cleared from the MSR bitmap used for hardware-assisted
1089 * VMX execution of the nested-guest, only if nested-guest is also not intercepting
1090 * the read/write access of this MSR.
1091 *
1092 * @param pVCpu The cross context virtual CPU structure.
1093 * @param pVmcsInfo The VMCS info. object.
1094 * @param fIsNstGstVmcs Whether this is a nested-guest VMCS.
1095 * @param idMsr The MSR value.
1096 * @param fMsrpm The MSR permissions (see VMXMSRPM_XXX). This must
1097 * include both a read -and- a write permission!
1098 *
1099 * @sa CPUMGetVmxMsrPermission.
1100 * @remarks Can be called with interrupts disabled.
1101 */
1102static void hmR0VmxSetMsrPermission(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, bool fIsNstGstVmcs, uint32_t idMsr, uint32_t fMsrpm)
1103{
1104 uint8_t *pbMsrBitmap = (uint8_t *)pVmcsInfo->pvMsrBitmap;
1105 Assert(pbMsrBitmap);
1106 Assert(VMXMSRPM_IS_FLAG_VALID(fMsrpm));
1107
1108 /*
1109 * MSR-bitmap Layout:
1110 * Byte index MSR range Interpreted as
1111 * 0x000 - 0x3ff 0x00000000 - 0x00001fff Low MSR read bits.
1112 * 0x400 - 0x7ff 0xc0000000 - 0xc0001fff High MSR read bits.
1113 * 0x800 - 0xbff 0x00000000 - 0x00001fff Low MSR write bits.
1114 * 0xc00 - 0xfff 0xc0000000 - 0xc0001fff High MSR write bits.
1115 *
1116 * A bit corresponding to an MSR within the above range causes a VM-exit
1117 * if the bit is 1 on executions of RDMSR/WRMSR. If an MSR falls out of
1118 * the MSR range, it always cause a VM-exit.
1119 *
1120 * See Intel spec. 24.6.9 "MSR-Bitmap Address".
1121 */
1122 uint16_t const offBitmapRead = 0;
1123 uint16_t const offBitmapWrite = 0x800;
1124 uint16_t offMsr;
1125 int32_t iBit;
1126 if (idMsr <= UINT32_C(0x00001fff))
1127 {
1128 offMsr = 0;
1129 iBit = idMsr;
1130 }
1131 else if (idMsr - UINT32_C(0xc0000000) <= UINT32_C(0x00001fff))
1132 {
1133 offMsr = 0x400;
1134 iBit = idMsr - UINT32_C(0xc0000000);
1135 }
1136 else
1137 AssertMsgFailedReturnVoid(("Invalid MSR %#RX32\n", idMsr));
1138
1139 /*
1140 * Set the MSR read permission.
1141 */
1142 uint16_t const offMsrRead = offBitmapRead + offMsr;
1143 Assert(offMsrRead + (iBit >> 3) < offBitmapWrite);
1144 if (fMsrpm & VMXMSRPM_ALLOW_RD)
1145 {
1146#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
1147 bool const fClear = !fIsNstGstVmcs ? true
1148 : !hmR0VmxIsMsrBitSet(pVCpu->cpum.GstCtx.hwvirt.vmx.abMsrBitmap, offMsrRead, iBit);
1149#else
1150 RT_NOREF2(pVCpu, fIsNstGstVmcs);
1151 bool const fClear = true;
1152#endif
1153 if (fClear)
1154 ASMBitClear(pbMsrBitmap + offMsrRead, iBit);
1155 }
1156 else
1157 ASMBitSet(pbMsrBitmap + offMsrRead, iBit);
1158
1159 /*
1160 * Set the MSR write permission.
1161 */
1162 uint16_t const offMsrWrite = offBitmapWrite + offMsr;
1163 Assert(offMsrWrite + (iBit >> 3) < X86_PAGE_4K_SIZE);
1164 if (fMsrpm & VMXMSRPM_ALLOW_WR)
1165 {
1166#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
1167 bool const fClear = !fIsNstGstVmcs ? true
1168 : !hmR0VmxIsMsrBitSet(pVCpu->cpum.GstCtx.hwvirt.vmx.abMsrBitmap, offMsrWrite, iBit);
1169#else
1170 RT_NOREF2(pVCpu, fIsNstGstVmcs);
1171 bool const fClear = true;
1172#endif
1173 if (fClear)
1174 ASMBitClear(pbMsrBitmap + offMsrWrite, iBit);
1175 }
1176 else
1177 ASMBitSet(pbMsrBitmap + offMsrWrite, iBit);
1178}
1179
1180
1181/**
1182 * Updates the VMCS with the number of effective MSRs in the auto-load/store MSR
1183 * area.
1184 *
1185 * @returns VBox status code.
1186 * @param pVCpu The cross context virtual CPU structure.
1187 * @param pVmcsInfo The VMCS info. object.
1188 * @param cMsrs The number of MSRs.
1189 */
1190static int hmR0VmxSetAutoLoadStoreMsrCount(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, uint32_t cMsrs)
1191{
1192 /* Shouldn't ever happen but there -is- a number. We're well within the recommended 512. */
1193 uint32_t const cMaxSupportedMsrs = VMX_MISC_MAX_MSRS(g_HmMsrs.u.vmx.u64Misc);
1194 if (RT_LIKELY(cMsrs < cMaxSupportedMsrs))
1195 {
1196 /* Commit the MSR counts to the VMCS and update the cache. */
1197 if (pVmcsInfo->cEntryMsrLoad != cMsrs)
1198 {
1199 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, cMsrs); AssertRC(rc);
1200 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, cMsrs); AssertRC(rc);
1201 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT, cMsrs); AssertRC(rc);
1202 pVmcsInfo->cEntryMsrLoad = cMsrs;
1203 pVmcsInfo->cExitMsrStore = cMsrs;
1204 pVmcsInfo->cExitMsrLoad = cMsrs;
1205 }
1206 return VINF_SUCCESS;
1207 }
1208
1209 LogRel(("Auto-load/store MSR count exceeded! cMsrs=%u MaxSupported=%u\n", cMsrs, cMaxSupportedMsrs));
1210 pVCpu->hm.s.u32HMError = VMX_UFC_INSUFFICIENT_GUEST_MSR_STORAGE;
1211 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
1212}
1213
1214
1215/**
1216 * Adds a new (or updates the value of an existing) guest/host MSR
1217 * pair to be swapped during the world-switch as part of the
1218 * auto-load/store MSR area in the VMCS.
1219 *
1220 * @returns VBox status code.
1221 * @param pVCpu The cross context virtual CPU structure.
1222 * @param pVmxTransient The VMX-transient structure.
1223 * @param idMsr The MSR.
1224 * @param uGuestMsrValue Value of the guest MSR.
1225 * @param fSetReadWrite Whether to set the guest read/write access of this
1226 * MSR (thus not causing a VM-exit).
1227 * @param fUpdateHostMsr Whether to update the value of the host MSR if
1228 * necessary.
1229 */
1230static int hmR0VmxAddAutoLoadStoreMsr(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient, uint32_t idMsr, uint64_t uGuestMsrValue,
1231 bool fSetReadWrite, bool fUpdateHostMsr)
1232{
1233 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
1234 bool const fIsNstGstVmcs = pVmxTransient->fIsNestedGuest;
1235 PVMXAUTOMSR pGuestMsrLoad = (PVMXAUTOMSR)pVmcsInfo->pvGuestMsrLoad;
1236 uint32_t cMsrs = pVmcsInfo->cEntryMsrLoad;
1237 uint32_t i;
1238
1239 /* Paranoia. */
1240 Assert(pGuestMsrLoad);
1241
1242#ifndef DEBUG_bird
1243 LogFlowFunc(("pVCpu=%p idMsr=%#RX32 uGuestMsrValue=%#RX64\n", pVCpu, idMsr, uGuestMsrValue));
1244#endif
1245
1246 /* Check if the MSR already exists in the VM-entry MSR-load area. */
1247 for (i = 0; i < cMsrs; i++)
1248 {
1249 if (pGuestMsrLoad[i].u32Msr == idMsr)
1250 break;
1251 }
1252
1253 bool fAdded = false;
1254 if (i == cMsrs)
1255 {
1256 /* The MSR does not exist, bump the MSR count to make room for the new MSR. */
1257 ++cMsrs;
1258 int rc = hmR0VmxSetAutoLoadStoreMsrCount(pVCpu, pVmcsInfo, cMsrs);
1259 AssertMsgRCReturn(rc, ("Insufficient space to add MSR to VM-entry MSR-load/store area %u\n", idMsr), rc);
1260
1261 /* Set the guest to read/write this MSR without causing VM-exits. */
1262 if ( fSetReadWrite
1263 && (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS))
1264 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, fIsNstGstVmcs, idMsr, VMXMSRPM_ALLOW_RD_WR);
1265
1266 Log4Func(("Added MSR %#RX32, cMsrs=%u\n", idMsr, cMsrs));
1267 fAdded = true;
1268 }
1269
1270 /* Update the MSR value for the newly added or already existing MSR. */
1271 pGuestMsrLoad[i].u32Msr = idMsr;
1272 pGuestMsrLoad[i].u64Value = uGuestMsrValue;
1273
1274 /* Create the corresponding slot in the VM-exit MSR-store area if we use a different page. */
1275 if (hmR0VmxIsSeparateExitMsrStoreAreaVmcs(pVmcsInfo))
1276 {
1277 PVMXAUTOMSR pGuestMsrStore = (PVMXAUTOMSR)pVmcsInfo->pvGuestMsrStore;
1278 pGuestMsrStore[i].u32Msr = idMsr;
1279 pGuestMsrStore[i].u64Value = uGuestMsrValue;
1280 }
1281
1282 /* Update the corresponding slot in the host MSR area. */
1283 PVMXAUTOMSR pHostMsr = (PVMXAUTOMSR)pVmcsInfo->pvHostMsrLoad;
1284 Assert(pHostMsr != pVmcsInfo->pvGuestMsrLoad);
1285 Assert(pHostMsr != pVmcsInfo->pvGuestMsrStore);
1286 pHostMsr[i].u32Msr = idMsr;
1287
1288 /*
1289 * Only if the caller requests to update the host MSR value AND we've newly added the
1290 * MSR to the host MSR area do we actually update the value. Otherwise, it will be
1291 * updated by hmR0VmxUpdateAutoLoadHostMsrs().
1292 *
1293 * We do this for performance reasons since reading MSRs may be quite expensive.
1294 */
1295 if (fAdded)
1296 {
1297 if (fUpdateHostMsr)
1298 {
1299 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
1300 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1301 pHostMsr[i].u64Value = ASMRdMsr(idMsr);
1302 }
1303 else
1304 {
1305 /* Someone else can do the work. */
1306 pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs = false;
1307 }
1308 }
1309 return VINF_SUCCESS;
1310}
1311
1312
1313/**
1314 * Removes a guest/host MSR pair to be swapped during the world-switch from the
1315 * auto-load/store MSR area in the VMCS.
1316 *
1317 * @returns VBox status code.
1318 * @param pVCpu The cross context virtual CPU structure.
1319 * @param pVmxTransient The VMX-transient structure.
1320 * @param idMsr The MSR.
1321 */
1322static int hmR0VmxRemoveAutoLoadStoreMsr(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient, uint32_t idMsr)
1323{
1324 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
1325 bool const fIsNstGstVmcs = pVmxTransient->fIsNestedGuest;
1326 PVMXAUTOMSR pGuestMsrLoad = (PVMXAUTOMSR)pVmcsInfo->pvGuestMsrLoad;
1327 uint32_t cMsrs = pVmcsInfo->cEntryMsrLoad;
1328
1329#ifndef DEBUG_bird
1330 LogFlowFunc(("pVCpu=%p idMsr=%#RX32\n", pVCpu, idMsr));
1331#endif
1332
1333 for (uint32_t i = 0; i < cMsrs; i++)
1334 {
1335 /* Find the MSR. */
1336 if (pGuestMsrLoad[i].u32Msr == idMsr)
1337 {
1338 /*
1339 * If it's the last MSR, we only need to reduce the MSR count.
1340 * If it's -not- the last MSR, copy the last MSR in place of it and reduce the MSR count.
1341 */
1342 if (i < cMsrs - 1)
1343 {
1344 /* Remove it from the VM-entry MSR-load area. */
1345 pGuestMsrLoad[i].u32Msr = pGuestMsrLoad[cMsrs - 1].u32Msr;
1346 pGuestMsrLoad[i].u64Value = pGuestMsrLoad[cMsrs - 1].u64Value;
1347
1348 /* Remove it from the VM-exit MSR-store area if it's in a different page. */
1349 if (hmR0VmxIsSeparateExitMsrStoreAreaVmcs(pVmcsInfo))
1350 {
1351 PVMXAUTOMSR pGuestMsrStore = (PVMXAUTOMSR)pVmcsInfo->pvGuestMsrStore;
1352 Assert(pGuestMsrStore[i].u32Msr == idMsr);
1353 pGuestMsrStore[i].u32Msr = pGuestMsrStore[cMsrs - 1].u32Msr;
1354 pGuestMsrStore[i].u64Value = pGuestMsrStore[cMsrs - 1].u64Value;
1355 }
1356
1357 /* Remove it from the VM-exit MSR-load area. */
1358 PVMXAUTOMSR pHostMsr = (PVMXAUTOMSR)pVmcsInfo->pvHostMsrLoad;
1359 Assert(pHostMsr[i].u32Msr == idMsr);
1360 pHostMsr[i].u32Msr = pHostMsr[cMsrs - 1].u32Msr;
1361 pHostMsr[i].u64Value = pHostMsr[cMsrs - 1].u64Value;
1362 }
1363
1364 /* Reduce the count to reflect the removed MSR and bail. */
1365 --cMsrs;
1366 break;
1367 }
1368 }
1369
1370 /* Update the VMCS if the count changed (meaning the MSR was found and removed). */
1371 if (cMsrs != pVmcsInfo->cEntryMsrLoad)
1372 {
1373 int rc = hmR0VmxSetAutoLoadStoreMsrCount(pVCpu, pVmcsInfo, cMsrs);
1374 AssertRCReturn(rc, rc);
1375
1376 /* We're no longer swapping MSRs during the world-switch, intercept guest read/writes to them. */
1377 if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
1378 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, fIsNstGstVmcs, idMsr, VMXMSRPM_EXIT_RD | VMXMSRPM_EXIT_WR);
1379
1380 Log4Func(("Removed MSR %#RX32, cMsrs=%u\n", idMsr, cMsrs));
1381 return VINF_SUCCESS;
1382 }
1383
1384 return VERR_NOT_FOUND;
1385}
1386
1387
1388/**
1389 * Updates the value of all host MSRs in the VM-exit MSR-load area.
1390 *
1391 * @param pVCpu The cross context virtual CPU structure.
1392 * @param pVmcsInfo The VMCS info. object.
1393 *
1394 * @remarks No-long-jump zone!!!
1395 */
1396static void hmR0VmxUpdateAutoLoadHostMsrs(PCVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo)
1397{
1398 RT_NOREF(pVCpu);
1399 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1400
1401 PVMXAUTOMSR pHostMsrLoad = (PVMXAUTOMSR)pVmcsInfo->pvHostMsrLoad;
1402 uint32_t const cMsrs = pVmcsInfo->cExitMsrLoad;
1403 Assert(pHostMsrLoad);
1404 Assert(sizeof(*pHostMsrLoad) * cMsrs <= X86_PAGE_4K_SIZE);
1405 LogFlowFunc(("pVCpu=%p cMsrs=%u\n", pVCpu, cMsrs));
1406 for (uint32_t i = 0; i < cMsrs; i++)
1407 {
1408 /*
1409 * Performance hack for the host EFER MSR. We use the cached value rather than re-read it.
1410 * Strict builds will catch mismatches in hmR0VmxCheckAutoLoadStoreMsrs(). See @bugref{7368}.
1411 */
1412 if (pHostMsrLoad[i].u32Msr == MSR_K6_EFER)
1413 pHostMsrLoad[i].u64Value = g_uHmVmxHostMsrEfer;
1414 else
1415 pHostMsrLoad[i].u64Value = ASMRdMsr(pHostMsrLoad[i].u32Msr);
1416 }
1417}
1418
1419
1420/**
1421 * Saves a set of host MSRs to allow read/write passthru access to the guest and
1422 * perform lazy restoration of the host MSRs while leaving VT-x.
1423 *
1424 * @param pVCpu The cross context virtual CPU structure.
1425 *
1426 * @remarks No-long-jump zone!!!
1427 */
1428static void hmR0VmxLazySaveHostMsrs(PVMCPUCC pVCpu)
1429{
1430 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1431
1432 /*
1433 * Note: If you're adding MSRs here, make sure to update the MSR-bitmap accesses in hmR0VmxSetupVmcsProcCtls().
1434 */
1435 if (!(pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_SAVED_HOST))
1436 {
1437 Assert(!(pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)); /* Guest MSRs better not be loaded now. */
1438 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.fAllow64BitGuests)
1439 {
1440 pVCpu->hmr0.s.vmx.u64HostMsrLStar = ASMRdMsr(MSR_K8_LSTAR);
1441 pVCpu->hmr0.s.vmx.u64HostMsrStar = ASMRdMsr(MSR_K6_STAR);
1442 pVCpu->hmr0.s.vmx.u64HostMsrSfMask = ASMRdMsr(MSR_K8_SF_MASK);
1443 pVCpu->hmr0.s.vmx.u64HostMsrKernelGsBase = ASMRdMsr(MSR_K8_KERNEL_GS_BASE);
1444 }
1445 pVCpu->hmr0.s.vmx.fLazyMsrs |= VMX_LAZY_MSRS_SAVED_HOST;
1446 }
1447}
1448
1449
1450#ifdef VBOX_STRICT
1451
1452/**
1453 * Verifies that our cached host EFER MSR value has not changed since we cached it.
1454 *
1455 * @param pVmcsInfo The VMCS info. object.
1456 */
1457static void hmR0VmxCheckHostEferMsr(PCVMXVMCSINFO pVmcsInfo)
1458{
1459 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1460
1461 if (pVmcsInfo->u32ExitCtls & VMX_EXIT_CTLS_LOAD_EFER_MSR)
1462 {
1463 uint64_t const uHostEferMsr = ASMRdMsr(MSR_K6_EFER);
1464 uint64_t const uHostEferMsrCache = g_uHmVmxHostMsrEfer;
1465 uint64_t uVmcsEferMsrVmcs;
1466 int rc = VMXReadVmcs64(VMX_VMCS64_HOST_EFER_FULL, &uVmcsEferMsrVmcs);
1467 AssertRC(rc);
1468
1469 AssertMsgReturnVoid(uHostEferMsr == uVmcsEferMsrVmcs,
1470 ("EFER Host/VMCS mismatch! host=%#RX64 vmcs=%#RX64\n", uHostEferMsr, uVmcsEferMsrVmcs));
1471 AssertMsgReturnVoid(uHostEferMsr == uHostEferMsrCache,
1472 ("EFER Host/Cache mismatch! host=%#RX64 cache=%#RX64\n", uHostEferMsr, uHostEferMsrCache));
1473 }
1474}
1475
1476
1477/**
1478 * Verifies whether the guest/host MSR pairs in the auto-load/store area in the
1479 * VMCS are correct.
1480 *
1481 * @param pVCpu The cross context virtual CPU structure.
1482 * @param pVmcsInfo The VMCS info. object.
1483 * @param fIsNstGstVmcs Whether this is a nested-guest VMCS.
1484 */
1485static void hmR0VmxCheckAutoLoadStoreMsrs(PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo, bool fIsNstGstVmcs)
1486{
1487 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1488
1489 /* Read the various MSR-area counts from the VMCS. */
1490 uint32_t cEntryLoadMsrs;
1491 uint32_t cExitStoreMsrs;
1492 uint32_t cExitLoadMsrs;
1493 int rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, &cEntryLoadMsrs); AssertRC(rc);
1494 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, &cExitStoreMsrs); AssertRC(rc);
1495 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT, &cExitLoadMsrs); AssertRC(rc);
1496
1497 /* Verify all the MSR counts are the same. */
1498 Assert(cEntryLoadMsrs == cExitStoreMsrs);
1499 Assert(cExitStoreMsrs == cExitLoadMsrs);
1500 uint32_t const cMsrs = cExitLoadMsrs;
1501
1502 /* Verify the MSR counts do not exceed the maximum count supported by the hardware. */
1503 Assert(cMsrs < VMX_MISC_MAX_MSRS(g_HmMsrs.u.vmx.u64Misc));
1504
1505 /* Verify the MSR counts are within the allocated page size. */
1506 Assert(sizeof(VMXAUTOMSR) * cMsrs <= X86_PAGE_4K_SIZE);
1507
1508 /* Verify the relevant contents of the MSR areas match. */
1509 PCVMXAUTOMSR pGuestMsrLoad = (PCVMXAUTOMSR)pVmcsInfo->pvGuestMsrLoad;
1510 PCVMXAUTOMSR pGuestMsrStore = (PCVMXAUTOMSR)pVmcsInfo->pvGuestMsrStore;
1511 PCVMXAUTOMSR pHostMsrLoad = (PCVMXAUTOMSR)pVmcsInfo->pvHostMsrLoad;
1512 bool const fSeparateExitMsrStorePage = hmR0VmxIsSeparateExitMsrStoreAreaVmcs(pVmcsInfo);
1513 for (uint32_t i = 0; i < cMsrs; i++)
1514 {
1515 /* Verify that the MSRs are paired properly and that the host MSR has the correct value. */
1516 if (fSeparateExitMsrStorePage)
1517 {
1518 AssertMsgReturnVoid(pGuestMsrLoad->u32Msr == pGuestMsrStore->u32Msr,
1519 ("GuestMsrLoad=%#RX32 GuestMsrStore=%#RX32 cMsrs=%u\n",
1520 pGuestMsrLoad->u32Msr, pGuestMsrStore->u32Msr, cMsrs));
1521 }
1522
1523 AssertMsgReturnVoid(pHostMsrLoad->u32Msr == pGuestMsrLoad->u32Msr,
1524 ("HostMsrLoad=%#RX32 GuestMsrLoad=%#RX32 cMsrs=%u\n",
1525 pHostMsrLoad->u32Msr, pGuestMsrLoad->u32Msr, cMsrs));
1526
1527 uint64_t const u64HostMsr = ASMRdMsr(pHostMsrLoad->u32Msr);
1528 AssertMsgReturnVoid(pHostMsrLoad->u64Value == u64HostMsr,
1529 ("u32Msr=%#RX32 VMCS Value=%#RX64 ASMRdMsr=%#RX64 cMsrs=%u\n",
1530 pHostMsrLoad->u32Msr, pHostMsrLoad->u64Value, u64HostMsr, cMsrs));
1531
1532 /* Verify that cached host EFER MSR matches what's loaded on the CPU. */
1533 bool const fIsEferMsr = RT_BOOL(pHostMsrLoad->u32Msr == MSR_K6_EFER);
1534 AssertMsgReturnVoid(!fIsEferMsr || u64HostMsr == g_uHmVmxHostMsrEfer,
1535 ("Cached=%#RX64 ASMRdMsr=%#RX64 cMsrs=%u\n", g_uHmVmxHostMsrEfer, u64HostMsr, cMsrs));
1536
1537 /* Verify that the accesses are as expected in the MSR bitmap for auto-load/store MSRs. */
1538 if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
1539 {
1540 uint32_t const fMsrpm = CPUMGetVmxMsrPermission(pVmcsInfo->pvMsrBitmap, pGuestMsrLoad->u32Msr);
1541 if (fIsEferMsr)
1542 {
1543 AssertMsgReturnVoid((fMsrpm & VMXMSRPM_EXIT_RD), ("Passthru read for EFER MSR!?\n"));
1544 AssertMsgReturnVoid((fMsrpm & VMXMSRPM_EXIT_WR), ("Passthru write for EFER MSR!?\n"));
1545 }
1546 else
1547 {
1548 /* Verify LBR MSRs (used only for debugging) are intercepted. We don't passthru these MSRs to the guest yet. */
1549 PCVMCC pVM = pVCpu->CTX_SUFF(pVM);
1550 if ( pVM->hmr0.s.vmx.fLbr
1551 && ( hmR0VmxIsLbrBranchFromMsr(pVM, pGuestMsrLoad->u32Msr, NULL /* pidxMsr */)
1552 || hmR0VmxIsLbrBranchToMsr(pVM, pGuestMsrLoad->u32Msr, NULL /* pidxMsr */)
1553 || pGuestMsrLoad->u32Msr == pVM->hmr0.s.vmx.idLbrTosMsr))
1554 {
1555 AssertMsgReturnVoid((fMsrpm & VMXMSRPM_MASK) == VMXMSRPM_EXIT_RD_WR,
1556 ("u32Msr=%#RX32 cMsrs=%u Passthru read/write for LBR MSRs!\n",
1557 pGuestMsrLoad->u32Msr, cMsrs));
1558 }
1559 else if (!fIsNstGstVmcs)
1560 {
1561 AssertMsgReturnVoid((fMsrpm & VMXMSRPM_MASK) == VMXMSRPM_ALLOW_RD_WR,
1562 ("u32Msr=%#RX32 cMsrs=%u No passthru read/write!\n", pGuestMsrLoad->u32Msr, cMsrs));
1563 }
1564 else
1565 {
1566 /*
1567 * A nested-guest VMCS must -also- allow read/write passthrough for the MSR for us to
1568 * execute a nested-guest with MSR passthrough.
1569 *
1570 * Check if the nested-guest MSR bitmap allows passthrough, and if so, assert that we
1571 * allow passthrough too.
1572 */
1573 void const *pvMsrBitmapNstGst = pVCpu->cpum.GstCtx.hwvirt.vmx.abMsrBitmap;
1574 Assert(pvMsrBitmapNstGst);
1575 uint32_t const fMsrpmNstGst = CPUMGetVmxMsrPermission(pvMsrBitmapNstGst, pGuestMsrLoad->u32Msr);
1576 AssertMsgReturnVoid(fMsrpm == fMsrpmNstGst,
1577 ("u32Msr=%#RX32 cMsrs=%u Permission mismatch fMsrpm=%#x fMsrpmNstGst=%#x!\n",
1578 pGuestMsrLoad->u32Msr, cMsrs, fMsrpm, fMsrpmNstGst));
1579 }
1580 }
1581 }
1582
1583 /* Move to the next MSR. */
1584 pHostMsrLoad++;
1585 pGuestMsrLoad++;
1586 pGuestMsrStore++;
1587 }
1588}
1589
1590#endif /* VBOX_STRICT */
1591
1592/**
1593 * Flushes the TLB using EPT.
1594 *
1595 * @returns VBox status code.
1596 * @param pVCpu The cross context virtual CPU structure of the calling
1597 * EMT. Can be NULL depending on @a enmTlbFlush.
1598 * @param pVmcsInfo The VMCS info. object. Can be NULL depending on @a
1599 * enmTlbFlush.
1600 * @param enmTlbFlush Type of flush.
1601 *
1602 * @remarks Caller is responsible for making sure this function is called only
1603 * when NestedPaging is supported and providing @a enmTlbFlush that is
1604 * supported by the CPU.
1605 * @remarks Can be called with interrupts disabled.
1606 */
1607static void hmR0VmxFlushEpt(PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo, VMXTLBFLUSHEPT enmTlbFlush)
1608{
1609 uint64_t au64Descriptor[2];
1610 if (enmTlbFlush == VMXTLBFLUSHEPT_ALL_CONTEXTS)
1611 au64Descriptor[0] = 0;
1612 else
1613 {
1614 Assert(pVCpu);
1615 Assert(pVmcsInfo);
1616 au64Descriptor[0] = pVmcsInfo->HCPhysEPTP;
1617 }
1618 au64Descriptor[1] = 0; /* MBZ. Intel spec. 33.3 "VMX Instructions" */
1619
1620 int rc = VMXR0InvEPT(enmTlbFlush, &au64Descriptor[0]);
1621 AssertMsg(rc == VINF_SUCCESS, ("VMXR0InvEPT %#x %#RHp failed. rc=%Rrc\n", enmTlbFlush, au64Descriptor[0], rc));
1622
1623 if ( RT_SUCCESS(rc)
1624 && pVCpu)
1625 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushNestedPaging);
1626}
1627
1628
1629/**
1630 * Flushes the TLB using VPID.
1631 *
1632 * @returns VBox status code.
1633 * @param pVCpu The cross context virtual CPU structure of the calling
1634 * EMT. Can be NULL depending on @a enmTlbFlush.
1635 * @param enmTlbFlush Type of flush.
1636 * @param GCPtr Virtual address of the page to flush (can be 0 depending
1637 * on @a enmTlbFlush).
1638 *
1639 * @remarks Can be called with interrupts disabled.
1640 */
1641static void hmR0VmxFlushVpid(PVMCPUCC pVCpu, VMXTLBFLUSHVPID enmTlbFlush, RTGCPTR GCPtr)
1642{
1643 Assert(pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fVpid);
1644
1645 uint64_t au64Descriptor[2];
1646 if (enmTlbFlush == VMXTLBFLUSHVPID_ALL_CONTEXTS)
1647 {
1648 au64Descriptor[0] = 0;
1649 au64Descriptor[1] = 0;
1650 }
1651 else
1652 {
1653 AssertPtr(pVCpu);
1654 AssertMsg(pVCpu->hmr0.s.uCurrentAsid != 0, ("VMXR0InvVPID: invalid ASID %lu\n", pVCpu->hmr0.s.uCurrentAsid));
1655 AssertMsg(pVCpu->hmr0.s.uCurrentAsid <= UINT16_MAX, ("VMXR0InvVPID: invalid ASID %lu\n", pVCpu->hmr0.s.uCurrentAsid));
1656 au64Descriptor[0] = pVCpu->hmr0.s.uCurrentAsid;
1657 au64Descriptor[1] = GCPtr;
1658 }
1659
1660 int rc = VMXR0InvVPID(enmTlbFlush, &au64Descriptor[0]);
1661 AssertMsg(rc == VINF_SUCCESS,
1662 ("VMXR0InvVPID %#x %u %RGv failed with %Rrc\n", enmTlbFlush, pVCpu ? pVCpu->hmr0.s.uCurrentAsid : 0, GCPtr, rc));
1663
1664 if ( RT_SUCCESS(rc)
1665 && pVCpu)
1666 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushAsid);
1667 NOREF(rc);
1668}
1669
1670
1671/**
1672 * Invalidates a guest page by guest virtual address. Only relevant for EPT/VPID,
1673 * otherwise there is nothing really to invalidate.
1674 *
1675 * @returns VBox status code.
1676 * @param pVCpu The cross context virtual CPU structure.
1677 * @param GCVirt Guest virtual address of the page to invalidate.
1678 */
1679VMMR0DECL(int) VMXR0InvalidatePage(PVMCPUCC pVCpu, RTGCPTR GCVirt)
1680{
1681 AssertPtr(pVCpu);
1682 LogFlowFunc(("pVCpu=%p GCVirt=%RGv\n", pVCpu, GCVirt));
1683
1684 if (!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_TLB_FLUSH))
1685 {
1686 /*
1687 * We must invalidate the guest TLB entry in either case, we cannot ignore it even for
1688 * the EPT case. See @bugref{6043} and @bugref{6177}.
1689 *
1690 * Set the VMCPU_FF_TLB_FLUSH force flag and flush before VM-entry in hmR0VmxFlushTLB*()
1691 * as this function maybe called in a loop with individual addresses.
1692 */
1693 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
1694 if (pVM->hmr0.s.vmx.fVpid)
1695 {
1696 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_INDIV_ADDR)
1697 {
1698 hmR0VmxFlushVpid(pVCpu, VMXTLBFLUSHVPID_INDIV_ADDR, GCVirt);
1699 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbInvlpgVirt);
1700 }
1701 else
1702 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
1703 }
1704 else if (pVM->hmr0.s.fNestedPaging)
1705 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
1706 }
1707
1708 return VINF_SUCCESS;
1709}
1710
1711
1712/**
1713 * Dummy placeholder for tagged-TLB flush handling before VM-entry. Used in the
1714 * case where neither EPT nor VPID is supported by the CPU.
1715 *
1716 * @param pHostCpu The HM physical-CPU structure.
1717 * @param pVCpu The cross context virtual CPU structure.
1718 *
1719 * @remarks Called with interrupts disabled.
1720 */
1721static void hmR0VmxFlushTaggedTlbNone(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu)
1722{
1723 AssertPtr(pVCpu);
1724 AssertPtr(pHostCpu);
1725
1726 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH);
1727
1728 Assert(pHostCpu->idCpu != NIL_RTCPUID);
1729 pVCpu->hmr0.s.idLastCpu = pHostCpu->idCpu;
1730 pVCpu->hmr0.s.cTlbFlushes = pHostCpu->cTlbFlushes;
1731 pVCpu->hmr0.s.fForceTLBFlush = false;
1732 return;
1733}
1734
1735
1736/**
1737 * Flushes the tagged-TLB entries for EPT+VPID CPUs as necessary.
1738 *
1739 * @param pHostCpu The HM physical-CPU structure.
1740 * @param pVCpu The cross context virtual CPU structure.
1741 * @param pVmcsInfo The VMCS info. object.
1742 *
1743 * @remarks All references to "ASID" in this function pertains to "VPID" in Intel's
1744 * nomenclature. The reason is, to avoid confusion in compare statements
1745 * since the host-CPU copies are named "ASID".
1746 *
1747 * @remarks Called with interrupts disabled.
1748 */
1749static void hmR0VmxFlushTaggedTlbBoth(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo)
1750{
1751#ifdef VBOX_WITH_STATISTICS
1752 bool fTlbFlushed = false;
1753# define HMVMX_SET_TAGGED_TLB_FLUSHED() do { fTlbFlushed = true; } while (0)
1754# define HMVMX_UPDATE_FLUSH_SKIPPED_STAT() do { \
1755 if (!fTlbFlushed) \
1756 STAM_COUNTER_INC(&pVCpu->hm.s.StatNoFlushTlbWorldSwitch); \
1757 } while (0)
1758#else
1759# define HMVMX_SET_TAGGED_TLB_FLUSHED() do { } while (0)
1760# define HMVMX_UPDATE_FLUSH_SKIPPED_STAT() do { } while (0)
1761#endif
1762
1763 AssertPtr(pVCpu);
1764 AssertPtr(pHostCpu);
1765 Assert(pHostCpu->idCpu != NIL_RTCPUID);
1766
1767 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
1768 AssertMsg(pVM->hmr0.s.fNestedPaging && pVM->hmr0.s.vmx.fVpid,
1769 ("hmR0VmxFlushTaggedTlbBoth cannot be invoked unless NestedPaging & VPID are enabled."
1770 "fNestedPaging=%RTbool fVpid=%RTbool", pVM->hmr0.s.fNestedPaging, pVM->hmr0.s.vmx.fVpid));
1771
1772 /*
1773 * Force a TLB flush for the first world-switch if the current CPU differs from the one we
1774 * ran on last. If the TLB flush count changed, another VM (VCPU rather) has hit the ASID
1775 * limit while flushing the TLB or the host CPU is online after a suspend/resume, so we
1776 * cannot reuse the current ASID anymore.
1777 */
1778 if ( pVCpu->hmr0.s.idLastCpu != pHostCpu->idCpu
1779 || pVCpu->hmr0.s.cTlbFlushes != pHostCpu->cTlbFlushes)
1780 {
1781 ++pHostCpu->uCurrentAsid;
1782 if (pHostCpu->uCurrentAsid >= g_uHmMaxAsid)
1783 {
1784 pHostCpu->uCurrentAsid = 1; /* Wraparound to 1; host uses 0. */
1785 pHostCpu->cTlbFlushes++; /* All VCPUs that run on this host CPU must use a new VPID. */
1786 pHostCpu->fFlushAsidBeforeUse = true; /* All VCPUs that run on this host CPU must flush their new VPID before use. */
1787 }
1788
1789 pVCpu->hmr0.s.uCurrentAsid = pHostCpu->uCurrentAsid;
1790 pVCpu->hmr0.s.idLastCpu = pHostCpu->idCpu;
1791 pVCpu->hmr0.s.cTlbFlushes = pHostCpu->cTlbFlushes;
1792
1793 /*
1794 * Flush by EPT when we get rescheduled to a new host CPU to ensure EPT-only tagged mappings are also
1795 * invalidated. We don't need to flush-by-VPID here as flushing by EPT covers it. See @bugref{6568}.
1796 */
1797 hmR0VmxFlushEpt(pVCpu, pVmcsInfo, pVM->hmr0.s.vmx.enmTlbFlushEpt);
1798 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch);
1799 HMVMX_SET_TAGGED_TLB_FLUSHED();
1800 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH);
1801 }
1802 else if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH)) /* Check for explicit TLB flushes. */
1803 {
1804 /*
1805 * Changes to the EPT paging structure by VMM requires flushing-by-EPT as the CPU
1806 * creates guest-physical (ie. only EPT-tagged) mappings while traversing the EPT
1807 * tables when EPT is in use. Flushing-by-VPID will only flush linear (only
1808 * VPID-tagged) and combined (EPT+VPID tagged) mappings but not guest-physical
1809 * mappings, see @bugref{6568}.
1810 *
1811 * See Intel spec. 28.3.2 "Creating and Using Cached Translation Information".
1812 */
1813 hmR0VmxFlushEpt(pVCpu, pVmcsInfo, pVM->hmr0.s.vmx.enmTlbFlushEpt);
1814 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlb);
1815 HMVMX_SET_TAGGED_TLB_FLUSHED();
1816 }
1817 else if (pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb)
1818 {
1819 /*
1820 * The nested-guest specifies its own guest-physical address to use as the APIC-access
1821 * address which requires flushing the TLB of EPT cached structures.
1822 *
1823 * See Intel spec. 28.3.3.4 "Guidelines for Use of the INVEPT Instruction".
1824 */
1825 hmR0VmxFlushEpt(pVCpu, pVmcsInfo, pVM->hmr0.s.vmx.enmTlbFlushEpt);
1826 pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb = false;
1827 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbNstGst);
1828 HMVMX_SET_TAGGED_TLB_FLUSHED();
1829 }
1830
1831
1832 pVCpu->hmr0.s.fForceTLBFlush = false;
1833 HMVMX_UPDATE_FLUSH_SKIPPED_STAT();
1834
1835 Assert(pVCpu->hmr0.s.idLastCpu == pHostCpu->idCpu);
1836 Assert(pVCpu->hmr0.s.cTlbFlushes == pHostCpu->cTlbFlushes);
1837 AssertMsg(pVCpu->hmr0.s.cTlbFlushes == pHostCpu->cTlbFlushes,
1838 ("Flush count mismatch for cpu %d (%u vs %u)\n", pHostCpu->idCpu, pVCpu->hmr0.s.cTlbFlushes, pHostCpu->cTlbFlushes));
1839 AssertMsg(pHostCpu->uCurrentAsid >= 1 && pHostCpu->uCurrentAsid < g_uHmMaxAsid,
1840 ("Cpu[%u] uCurrentAsid=%u cTlbFlushes=%u pVCpu->idLastCpu=%u pVCpu->cTlbFlushes=%u\n", pHostCpu->idCpu,
1841 pHostCpu->uCurrentAsid, pHostCpu->cTlbFlushes, pVCpu->hmr0.s.idLastCpu, pVCpu->hmr0.s.cTlbFlushes));
1842 AssertMsg(pVCpu->hmr0.s.uCurrentAsid >= 1 && pVCpu->hmr0.s.uCurrentAsid < g_uHmMaxAsid,
1843 ("Cpu[%u] pVCpu->uCurrentAsid=%u\n", pHostCpu->idCpu, pVCpu->hmr0.s.uCurrentAsid));
1844
1845 /* Update VMCS with the VPID. */
1846 int rc = VMXWriteVmcs16(VMX_VMCS16_VPID, pVCpu->hmr0.s.uCurrentAsid);
1847 AssertRC(rc);
1848
1849#undef HMVMX_SET_TAGGED_TLB_FLUSHED
1850}
1851
1852
1853/**
1854 * Flushes the tagged-TLB entries for EPT CPUs as necessary.
1855 *
1856 * @param pHostCpu The HM physical-CPU structure.
1857 * @param pVCpu The cross context virtual CPU structure.
1858 * @param pVmcsInfo The VMCS info. object.
1859 *
1860 * @remarks Called with interrupts disabled.
1861 */
1862static void hmR0VmxFlushTaggedTlbEpt(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo)
1863{
1864 AssertPtr(pVCpu);
1865 AssertPtr(pHostCpu);
1866 Assert(pHostCpu->idCpu != NIL_RTCPUID);
1867 AssertMsg(pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging, ("hmR0VmxFlushTaggedTlbEpt cannot be invoked without NestedPaging."));
1868 AssertMsg(!pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fVpid, ("hmR0VmxFlushTaggedTlbEpt cannot be invoked with VPID."));
1869
1870 /*
1871 * Force a TLB flush for the first world-switch if the current CPU differs from the one we ran on last.
1872 * A change in the TLB flush count implies the host CPU is online after a suspend/resume.
1873 */
1874 if ( pVCpu->hmr0.s.idLastCpu != pHostCpu->idCpu
1875 || pVCpu->hmr0.s.cTlbFlushes != pHostCpu->cTlbFlushes)
1876 {
1877 pVCpu->hmr0.s.fForceTLBFlush = true;
1878 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch);
1879 }
1880
1881 /* Check for explicit TLB flushes. */
1882 if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
1883 {
1884 pVCpu->hmr0.s.fForceTLBFlush = true;
1885 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlb);
1886 }
1887
1888 /* Check for TLB flushes while switching to/from a nested-guest. */
1889 if (pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb)
1890 {
1891 pVCpu->hmr0.s.fForceTLBFlush = true;
1892 pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb = false;
1893 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbNstGst);
1894 }
1895
1896 pVCpu->hmr0.s.idLastCpu = pHostCpu->idCpu;
1897 pVCpu->hmr0.s.cTlbFlushes = pHostCpu->cTlbFlushes;
1898
1899 if (pVCpu->hmr0.s.fForceTLBFlush)
1900 {
1901 hmR0VmxFlushEpt(pVCpu, pVmcsInfo, pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.enmTlbFlushEpt);
1902 pVCpu->hmr0.s.fForceTLBFlush = false;
1903 }
1904}
1905
1906
1907/**
1908 * Flushes the tagged-TLB entries for VPID CPUs as necessary.
1909 *
1910 * @param pHostCpu The HM physical-CPU structure.
1911 * @param pVCpu The cross context virtual CPU structure.
1912 *
1913 * @remarks Called with interrupts disabled.
1914 */
1915static void hmR0VmxFlushTaggedTlbVpid(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu)
1916{
1917 AssertPtr(pVCpu);
1918 AssertPtr(pHostCpu);
1919 Assert(pHostCpu->idCpu != NIL_RTCPUID);
1920 AssertMsg(pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fVpid, ("hmR0VmxFlushTlbVpid cannot be invoked without VPID."));
1921 AssertMsg(!pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging, ("hmR0VmxFlushTlbVpid cannot be invoked with NestedPaging"));
1922
1923 /*
1924 * Force a TLB flush for the first world switch if the current CPU differs from the one we
1925 * ran on last. If the TLB flush count changed, another VM (VCPU rather) has hit the ASID
1926 * limit while flushing the TLB or the host CPU is online after a suspend/resume, so we
1927 * cannot reuse the current ASID anymore.
1928 */
1929 if ( pVCpu->hmr0.s.idLastCpu != pHostCpu->idCpu
1930 || pVCpu->hmr0.s.cTlbFlushes != pHostCpu->cTlbFlushes)
1931 {
1932 pVCpu->hmr0.s.fForceTLBFlush = true;
1933 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch);
1934 }
1935
1936 /* Check for explicit TLB flushes. */
1937 if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
1938 {
1939 /*
1940 * If we ever support VPID flush combinations other than ALL or SINGLE-context (see
1941 * hmR0VmxSetupTaggedTlb()) we would need to explicitly flush in this case (add an
1942 * fExplicitFlush = true here and change the pHostCpu->fFlushAsidBeforeUse check below to
1943 * include fExplicitFlush's too) - an obscure corner case.
1944 */
1945 pVCpu->hmr0.s.fForceTLBFlush = true;
1946 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlb);
1947 }
1948
1949 /* Check for TLB flushes while switching to/from a nested-guest. */
1950 if (pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb)
1951 {
1952 pVCpu->hmr0.s.fForceTLBFlush = true;
1953 pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb = false;
1954 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbNstGst);
1955 }
1956
1957 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
1958 pVCpu->hmr0.s.idLastCpu = pHostCpu->idCpu;
1959 if (pVCpu->hmr0.s.fForceTLBFlush)
1960 {
1961 ++pHostCpu->uCurrentAsid;
1962 if (pHostCpu->uCurrentAsid >= g_uHmMaxAsid)
1963 {
1964 pHostCpu->uCurrentAsid = 1; /* Wraparound to 1; host uses 0 */
1965 pHostCpu->cTlbFlushes++; /* All VCPUs that run on this host CPU must use a new VPID. */
1966 pHostCpu->fFlushAsidBeforeUse = true; /* All VCPUs that run on this host CPU must flush their new VPID before use. */
1967 }
1968
1969 pVCpu->hmr0.s.fForceTLBFlush = false;
1970 pVCpu->hmr0.s.cTlbFlushes = pHostCpu->cTlbFlushes;
1971 pVCpu->hmr0.s.uCurrentAsid = pHostCpu->uCurrentAsid;
1972 if (pHostCpu->fFlushAsidBeforeUse)
1973 {
1974 if (pVM->hmr0.s.vmx.enmTlbFlushVpid == VMXTLBFLUSHVPID_SINGLE_CONTEXT)
1975 hmR0VmxFlushVpid(pVCpu, VMXTLBFLUSHVPID_SINGLE_CONTEXT, 0 /* GCPtr */);
1976 else if (pVM->hmr0.s.vmx.enmTlbFlushVpid == VMXTLBFLUSHVPID_ALL_CONTEXTS)
1977 {
1978 hmR0VmxFlushVpid(pVCpu, VMXTLBFLUSHVPID_ALL_CONTEXTS, 0 /* GCPtr */);
1979 pHostCpu->fFlushAsidBeforeUse = false;
1980 }
1981 else
1982 {
1983 /* hmR0VmxSetupTaggedTlb() ensures we never get here. Paranoia. */
1984 AssertMsgFailed(("Unsupported VPID-flush context type.\n"));
1985 }
1986 }
1987 }
1988
1989 AssertMsg(pVCpu->hmr0.s.cTlbFlushes == pHostCpu->cTlbFlushes,
1990 ("Flush count mismatch for cpu %d (%u vs %u)\n", pHostCpu->idCpu, pVCpu->hmr0.s.cTlbFlushes, pHostCpu->cTlbFlushes));
1991 AssertMsg(pHostCpu->uCurrentAsid >= 1 && pHostCpu->uCurrentAsid < g_uHmMaxAsid,
1992 ("Cpu[%u] uCurrentAsid=%u cTlbFlushes=%u pVCpu->idLastCpu=%u pVCpu->cTlbFlushes=%u\n", pHostCpu->idCpu,
1993 pHostCpu->uCurrentAsid, pHostCpu->cTlbFlushes, pVCpu->hmr0.s.idLastCpu, pVCpu->hmr0.s.cTlbFlushes));
1994 AssertMsg(pVCpu->hmr0.s.uCurrentAsid >= 1 && pVCpu->hmr0.s.uCurrentAsid < g_uHmMaxAsid,
1995 ("Cpu[%u] pVCpu->uCurrentAsid=%u\n", pHostCpu->idCpu, pVCpu->hmr0.s.uCurrentAsid));
1996
1997 int rc = VMXWriteVmcs16(VMX_VMCS16_VPID, pVCpu->hmr0.s.uCurrentAsid);
1998 AssertRC(rc);
1999}
2000
2001
2002/**
2003 * Flushes the guest TLB entry based on CPU capabilities.
2004 *
2005 * @param pHostCpu The HM physical-CPU structure.
2006 * @param pVCpu The cross context virtual CPU structure.
2007 * @param pVmcsInfo The VMCS info. object.
2008 *
2009 * @remarks Called with interrupts disabled.
2010 */
2011static void hmR0VmxFlushTaggedTlb(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2012{
2013#ifdef HMVMX_ALWAYS_FLUSH_TLB
2014 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
2015#endif
2016 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2017 switch (pVM->hmr0.s.vmx.enmTlbFlushType)
2018 {
2019 case VMXTLBFLUSHTYPE_EPT_VPID: hmR0VmxFlushTaggedTlbBoth(pHostCpu, pVCpu, pVmcsInfo); break;
2020 case VMXTLBFLUSHTYPE_EPT: hmR0VmxFlushTaggedTlbEpt(pHostCpu, pVCpu, pVmcsInfo); break;
2021 case VMXTLBFLUSHTYPE_VPID: hmR0VmxFlushTaggedTlbVpid(pHostCpu, pVCpu); break;
2022 case VMXTLBFLUSHTYPE_NONE: hmR0VmxFlushTaggedTlbNone(pHostCpu, pVCpu); break;
2023 default:
2024 AssertMsgFailed(("Invalid flush-tag function identifier\n"));
2025 break;
2026 }
2027 /* Don't assert that VMCPU_FF_TLB_FLUSH should no longer be pending. It can be set by other EMTs. */
2028}
2029
2030
2031/**
2032 * Sets up the appropriate tagged TLB-flush level and handler for flushing guest
2033 * TLB entries from the host TLB before VM-entry.
2034 *
2035 * @returns VBox status code.
2036 * @param pVM The cross context VM structure.
2037 */
2038static int hmR0VmxSetupTaggedTlb(PVMCC pVM)
2039{
2040 /*
2041 * Determine optimal flush type for nested paging.
2042 * We cannot ignore EPT if no suitable flush-types is supported by the CPU as we've already setup
2043 * unrestricted guest execution (see hmR3InitFinalizeR0()).
2044 */
2045 if (pVM->hmr0.s.fNestedPaging)
2046 {
2047 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT)
2048 {
2049 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT_SINGLE_CONTEXT)
2050 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_SINGLE_CONTEXT;
2051 else if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT_ALL_CONTEXTS)
2052 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_ALL_CONTEXTS;
2053 else
2054 {
2055 /* Shouldn't happen. EPT is supported but no suitable flush-types supported. */
2056 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NOT_SUPPORTED;
2057 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_EPT_FLUSH_TYPE_UNSUPPORTED;
2058 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2059 }
2060
2061 /* Make sure the write-back cacheable memory type for EPT is supported. */
2062 if (RT_UNLIKELY(!(g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_MEMTYPE_WB)))
2063 {
2064 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NOT_SUPPORTED;
2065 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_EPT_MEM_TYPE_NOT_WB;
2066 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2067 }
2068
2069 /* EPT requires a page-walk length of 4. */
2070 if (RT_UNLIKELY(!(g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_PAGE_WALK_LENGTH_4)))
2071 {
2072 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NOT_SUPPORTED;
2073 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_EPT_PAGE_WALK_LENGTH_UNSUPPORTED;
2074 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2075 }
2076 }
2077 else
2078 {
2079 /* Shouldn't happen. EPT is supported but INVEPT instruction is not supported. */
2080 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NOT_SUPPORTED;
2081 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_EPT_INVEPT_UNAVAILABLE;
2082 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2083 }
2084 }
2085
2086 /*
2087 * Determine optimal flush type for VPID.
2088 */
2089 if (pVM->hmr0.s.vmx.fVpid)
2090 {
2091 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID)
2092 {
2093 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_SINGLE_CONTEXT)
2094 pVM->hmr0.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_SINGLE_CONTEXT;
2095 else if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_ALL_CONTEXTS)
2096 pVM->hmr0.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_ALL_CONTEXTS;
2097 else
2098 {
2099 /* Neither SINGLE nor ALL-context flush types for VPID is supported by the CPU. Ignore VPID capability. */
2100 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_INDIV_ADDR)
2101 LogRelFunc(("Only INDIV_ADDR supported. Ignoring VPID.\n"));
2102 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_SINGLE_CONTEXT_RETAIN_GLOBALS)
2103 LogRelFunc(("Only SINGLE_CONTEXT_RETAIN_GLOBALS supported. Ignoring VPID.\n"));
2104 pVM->hmr0.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_NOT_SUPPORTED;
2105 pVM->hmr0.s.vmx.fVpid = false;
2106 }
2107 }
2108 else
2109 {
2110 /* Shouldn't happen. VPID is supported but INVVPID is not supported by the CPU. Ignore VPID capability. */
2111 Log4Func(("VPID supported without INVEPT support. Ignoring VPID.\n"));
2112 pVM->hmr0.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_NOT_SUPPORTED;
2113 pVM->hmr0.s.vmx.fVpid = false;
2114 }
2115 }
2116
2117 /*
2118 * Setup the handler for flushing tagged-TLBs.
2119 */
2120 if (pVM->hmr0.s.fNestedPaging && pVM->hmr0.s.vmx.fVpid)
2121 pVM->hmr0.s.vmx.enmTlbFlushType = VMXTLBFLUSHTYPE_EPT_VPID;
2122 else if (pVM->hmr0.s.fNestedPaging)
2123 pVM->hmr0.s.vmx.enmTlbFlushType = VMXTLBFLUSHTYPE_EPT;
2124 else if (pVM->hmr0.s.vmx.fVpid)
2125 pVM->hmr0.s.vmx.enmTlbFlushType = VMXTLBFLUSHTYPE_VPID;
2126 else
2127 pVM->hmr0.s.vmx.enmTlbFlushType = VMXTLBFLUSHTYPE_NONE;
2128
2129
2130 /*
2131 * Copy out the result to ring-3.
2132 */
2133 pVM->hm.s.ForR3.vmx.fVpid = pVM->hmr0.s.vmx.fVpid;
2134 pVM->hm.s.ForR3.vmx.enmTlbFlushType = pVM->hmr0.s.vmx.enmTlbFlushType;
2135 pVM->hm.s.ForR3.vmx.enmTlbFlushEpt = pVM->hmr0.s.vmx.enmTlbFlushEpt;
2136 pVM->hm.s.ForR3.vmx.enmTlbFlushVpid = pVM->hmr0.s.vmx.enmTlbFlushVpid;
2137 return VINF_SUCCESS;
2138}
2139
2140
2141/**
2142 * Sets up the LBR MSR ranges based on the host CPU.
2143 *
2144 * @returns VBox status code.
2145 * @param pVM The cross context VM structure.
2146 *
2147 * @sa nemR3DarwinSetupLbrMsrRange
2148 */
2149static int hmR0VmxSetupLbrMsrRange(PVMCC pVM)
2150{
2151 Assert(pVM->hmr0.s.vmx.fLbr);
2152 uint32_t idLbrFromIpMsrFirst;
2153 uint32_t idLbrFromIpMsrLast;
2154 uint32_t idLbrToIpMsrFirst;
2155 uint32_t idLbrToIpMsrLast;
2156 uint32_t idLbrTosMsr;
2157
2158 /*
2159 * Determine the LBR MSRs supported for this host CPU family and model.
2160 *
2161 * See Intel spec. 17.4.8 "LBR Stack".
2162 * See Intel "Model-Specific Registers" spec.
2163 */
2164 uint32_t const uFamilyModel = (g_CpumHostFeatures.s.uFamily << 8)
2165 | g_CpumHostFeatures.s.uModel;
2166 switch (uFamilyModel)
2167 {
2168 case 0x0f01: case 0x0f02:
2169 idLbrFromIpMsrFirst = MSR_P4_LASTBRANCH_0;
2170 idLbrFromIpMsrLast = MSR_P4_LASTBRANCH_3;
2171 idLbrToIpMsrFirst = 0x0;
2172 idLbrToIpMsrLast = 0x0;
2173 idLbrTosMsr = MSR_P4_LASTBRANCH_TOS;
2174 break;
2175
2176 case 0x065c: case 0x065f: case 0x064e: case 0x065e: case 0x068e:
2177 case 0x069e: case 0x0655: case 0x0666: case 0x067a: case 0x0667:
2178 case 0x066a: case 0x066c: case 0x067d: case 0x067e:
2179 idLbrFromIpMsrFirst = MSR_LASTBRANCH_0_FROM_IP;
2180 idLbrFromIpMsrLast = MSR_LASTBRANCH_31_FROM_IP;
2181 idLbrToIpMsrFirst = MSR_LASTBRANCH_0_TO_IP;
2182 idLbrToIpMsrLast = MSR_LASTBRANCH_31_TO_IP;
2183 idLbrTosMsr = MSR_LASTBRANCH_TOS;
2184 break;
2185
2186 case 0x063d: case 0x0647: case 0x064f: case 0x0656: case 0x063c:
2187 case 0x0645: case 0x0646: case 0x063f: case 0x062a: case 0x062d:
2188 case 0x063a: case 0x063e: case 0x061a: case 0x061e: case 0x061f:
2189 case 0x062e: case 0x0625: case 0x062c: case 0x062f:
2190 idLbrFromIpMsrFirst = MSR_LASTBRANCH_0_FROM_IP;
2191 idLbrFromIpMsrLast = MSR_LASTBRANCH_15_FROM_IP;
2192 idLbrToIpMsrFirst = MSR_LASTBRANCH_0_TO_IP;
2193 idLbrToIpMsrLast = MSR_LASTBRANCH_15_TO_IP;
2194 idLbrTosMsr = MSR_LASTBRANCH_TOS;
2195 break;
2196
2197 case 0x0617: case 0x061d: case 0x060f:
2198 idLbrFromIpMsrFirst = MSR_CORE2_LASTBRANCH_0_FROM_IP;
2199 idLbrFromIpMsrLast = MSR_CORE2_LASTBRANCH_3_FROM_IP;
2200 idLbrToIpMsrFirst = MSR_CORE2_LASTBRANCH_0_TO_IP;
2201 idLbrToIpMsrLast = MSR_CORE2_LASTBRANCH_3_TO_IP;
2202 idLbrTosMsr = MSR_CORE2_LASTBRANCH_TOS;
2203 break;
2204
2205 /* Atom and related microarchitectures we don't care about:
2206 case 0x0637: case 0x064a: case 0x064c: case 0x064d: case 0x065a:
2207 case 0x065d: case 0x061c: case 0x0626: case 0x0627: case 0x0635:
2208 case 0x0636: */
2209 /* All other CPUs: */
2210 default:
2211 {
2212 LogRelFunc(("Could not determine LBR stack size for the CPU model %#x\n", uFamilyModel));
2213 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_LBR_STACK_SIZE_UNKNOWN;
2214 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2215 }
2216 }
2217
2218 /*
2219 * Validate.
2220 */
2221 uint32_t const cLbrStack = idLbrFromIpMsrLast - idLbrFromIpMsrFirst + 1;
2222 PCVMCPU pVCpu0 = VMCC_GET_CPU_0(pVM);
2223 AssertCompile( RT_ELEMENTS(pVCpu0->hm.s.vmx.VmcsInfo.au64LbrFromIpMsr)
2224 == RT_ELEMENTS(pVCpu0->hm.s.vmx.VmcsInfo.au64LbrToIpMsr));
2225 if (cLbrStack > RT_ELEMENTS(pVCpu0->hm.s.vmx.VmcsInfo.au64LbrFromIpMsr))
2226 {
2227 LogRelFunc(("LBR stack size of the CPU (%u) exceeds our buffer size\n", cLbrStack));
2228 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_LBR_STACK_SIZE_OVERFLOW;
2229 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2230 }
2231 NOREF(pVCpu0);
2232
2233 /*
2234 * Update the LBR info. to the VM struct. for use later.
2235 */
2236 pVM->hmr0.s.vmx.idLbrTosMsr = idLbrTosMsr;
2237
2238 pVM->hm.s.ForR3.vmx.idLbrFromIpMsrFirst = pVM->hmr0.s.vmx.idLbrFromIpMsrFirst = idLbrFromIpMsrFirst;
2239 pVM->hm.s.ForR3.vmx.idLbrFromIpMsrLast = pVM->hmr0.s.vmx.idLbrFromIpMsrLast = idLbrFromIpMsrLast;
2240
2241 pVM->hm.s.ForR3.vmx.idLbrToIpMsrFirst = pVM->hmr0.s.vmx.idLbrToIpMsrFirst = idLbrToIpMsrFirst;
2242 pVM->hm.s.ForR3.vmx.idLbrToIpMsrLast = pVM->hmr0.s.vmx.idLbrToIpMsrLast = idLbrToIpMsrLast;
2243 return VINF_SUCCESS;
2244}
2245
2246
2247#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
2248/**
2249 * Sets up the shadow VMCS fields arrays.
2250 *
2251 * This function builds arrays of VMCS fields to sync the shadow VMCS later while
2252 * executing the guest.
2253 *
2254 * @returns VBox status code.
2255 * @param pVM The cross context VM structure.
2256 */
2257static int hmR0VmxSetupShadowVmcsFieldsArrays(PVMCC pVM)
2258{
2259 /*
2260 * Paranoia. Ensure we haven't exposed the VMWRITE-All VMX feature to the guest
2261 * when the host does not support it.
2262 */
2263 bool const fGstVmwriteAll = pVM->cpum.ro.GuestFeatures.fVmxVmwriteAll;
2264 if ( !fGstVmwriteAll
2265 || (g_HmMsrs.u.vmx.u64Misc & VMX_MISC_VMWRITE_ALL))
2266 { /* likely. */ }
2267 else
2268 {
2269 LogRelFunc(("VMX VMWRITE-All feature exposed to the guest but host CPU does not support it!\n"));
2270 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_GST_HOST_VMWRITE_ALL;
2271 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2272 }
2273
2274 uint32_t const cVmcsFields = RT_ELEMENTS(g_aVmcsFields);
2275 uint32_t cRwFields = 0;
2276 uint32_t cRoFields = 0;
2277 for (uint32_t i = 0; i < cVmcsFields; i++)
2278 {
2279 VMXVMCSFIELD VmcsField;
2280 VmcsField.u = g_aVmcsFields[i];
2281
2282 /*
2283 * We will be writing "FULL" (64-bit) fields while syncing the shadow VMCS.
2284 * Therefore, "HIGH" (32-bit portion of 64-bit) fields must not be included
2285 * in the shadow VMCS fields array as they would be redundant.
2286 *
2287 * If the VMCS field depends on a CPU feature that is not exposed to the guest,
2288 * we must not include it in the shadow VMCS fields array. Guests attempting to
2289 * VMREAD/VMWRITE such VMCS fields would cause a VM-exit and we shall emulate
2290 * the required behavior.
2291 */
2292 if ( VmcsField.n.fAccessType == VMX_VMCSFIELD_ACCESS_FULL
2293 && CPUMIsGuestVmxVmcsFieldValid(pVM, VmcsField.u))
2294 {
2295 /*
2296 * Read-only fields are placed in a separate array so that while syncing shadow
2297 * VMCS fields later (which is more performance critical) we can avoid branches.
2298 *
2299 * However, if the guest can write to all fields (including read-only fields),
2300 * we treat it a as read/write field. Otherwise, writing to these fields would
2301 * cause a VMWRITE instruction error while syncing the shadow VMCS.
2302 */
2303 if ( fGstVmwriteAll
2304 || !VMXIsVmcsFieldReadOnly(VmcsField.u))
2305 pVM->hmr0.s.vmx.paShadowVmcsFields[cRwFields++] = VmcsField.u;
2306 else
2307 pVM->hmr0.s.vmx.paShadowVmcsRoFields[cRoFields++] = VmcsField.u;
2308 }
2309 }
2310
2311 /* Update the counts. */
2312 pVM->hmr0.s.vmx.cShadowVmcsFields = cRwFields;
2313 pVM->hmr0.s.vmx.cShadowVmcsRoFields = cRoFields;
2314 return VINF_SUCCESS;
2315}
2316
2317
2318/**
2319 * Sets up the VMREAD and VMWRITE bitmaps.
2320 *
2321 * @param pVM The cross context VM structure.
2322 */
2323static void hmR0VmxSetupVmreadVmwriteBitmaps(PVMCC pVM)
2324{
2325 /*
2326 * By default, ensure guest attempts to access any VMCS fields cause VM-exits.
2327 */
2328 uint32_t const cbBitmap = X86_PAGE_4K_SIZE;
2329 uint8_t *pbVmreadBitmap = (uint8_t *)pVM->hmr0.s.vmx.pvVmreadBitmap;
2330 uint8_t *pbVmwriteBitmap = (uint8_t *)pVM->hmr0.s.vmx.pvVmwriteBitmap;
2331 ASMMemFill32(pbVmreadBitmap, cbBitmap, UINT32_C(0xffffffff));
2332 ASMMemFill32(pbVmwriteBitmap, cbBitmap, UINT32_C(0xffffffff));
2333
2334 /*
2335 * Skip intercepting VMREAD/VMWRITE to guest read/write fields in the
2336 * VMREAD and VMWRITE bitmaps.
2337 */
2338 {
2339 uint32_t const *paShadowVmcsFields = pVM->hmr0.s.vmx.paShadowVmcsFields;
2340 uint32_t const cShadowVmcsFields = pVM->hmr0.s.vmx.cShadowVmcsFields;
2341 for (uint32_t i = 0; i < cShadowVmcsFields; i++)
2342 {
2343 uint32_t const uVmcsField = paShadowVmcsFields[i];
2344 Assert(!(uVmcsField & VMX_VMCSFIELD_RSVD_MASK));
2345 Assert(uVmcsField >> 3 < cbBitmap);
2346
2347 ASMBitClear(pbVmreadBitmap, uVmcsField & 0x7fff);
2348 ASMBitClear(pbVmwriteBitmap, uVmcsField & 0x7fff);
2349 }
2350 }
2351
2352 /*
2353 * Skip intercepting VMREAD for guest read-only fields in the VMREAD bitmap
2354 * if the host supports VMWRITE to all supported VMCS fields.
2355 */
2356 if (g_HmMsrs.u.vmx.u64Misc & VMX_MISC_VMWRITE_ALL)
2357 {
2358 uint32_t const *paShadowVmcsRoFields = pVM->hmr0.s.vmx.paShadowVmcsRoFields;
2359 uint32_t const cShadowVmcsRoFields = pVM->hmr0.s.vmx.cShadowVmcsRoFields;
2360 for (uint32_t i = 0; i < cShadowVmcsRoFields; i++)
2361 {
2362 uint32_t const uVmcsField = paShadowVmcsRoFields[i];
2363 Assert(!(uVmcsField & VMX_VMCSFIELD_RSVD_MASK));
2364 Assert(uVmcsField >> 3 < cbBitmap);
2365 ASMBitClear(pbVmreadBitmap, uVmcsField & 0x7fff);
2366 }
2367 }
2368}
2369#endif /* VBOX_WITH_NESTED_HWVIRT_VMX */
2370
2371
2372/**
2373 * Sets up the virtual-APIC page address for the VMCS.
2374 *
2375 * @param pVmcsInfo The VMCS info. object.
2376 */
2377DECLINLINE(void) hmR0VmxSetupVmcsVirtApicAddr(PCVMXVMCSINFO pVmcsInfo)
2378{
2379 RTHCPHYS const HCPhysVirtApic = pVmcsInfo->HCPhysVirtApic;
2380 Assert(HCPhysVirtApic != NIL_RTHCPHYS);
2381 Assert(!(HCPhysVirtApic & 0xfff)); /* Bits 11:0 MBZ. */
2382 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_VIRT_APIC_PAGEADDR_FULL, HCPhysVirtApic);
2383 AssertRC(rc);
2384}
2385
2386
2387/**
2388 * Sets up the MSR-bitmap address for the VMCS.
2389 *
2390 * @param pVmcsInfo The VMCS info. object.
2391 */
2392DECLINLINE(void) hmR0VmxSetupVmcsMsrBitmapAddr(PCVMXVMCSINFO pVmcsInfo)
2393{
2394 RTHCPHYS const HCPhysMsrBitmap = pVmcsInfo->HCPhysMsrBitmap;
2395 Assert(HCPhysMsrBitmap != NIL_RTHCPHYS);
2396 Assert(!(HCPhysMsrBitmap & 0xfff)); /* Bits 11:0 MBZ. */
2397 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_MSR_BITMAP_FULL, HCPhysMsrBitmap);
2398 AssertRC(rc);
2399}
2400
2401
2402/**
2403 * Sets up the APIC-access page address for the VMCS.
2404 *
2405 * @param pVCpu The cross context virtual CPU structure.
2406 */
2407DECLINLINE(void) hmR0VmxSetupVmcsApicAccessAddr(PVMCPUCC pVCpu)
2408{
2409 RTHCPHYS const HCPhysApicAccess = pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.HCPhysApicAccess;
2410 Assert(HCPhysApicAccess != NIL_RTHCPHYS);
2411 Assert(!(HCPhysApicAccess & 0xfff)); /* Bits 11:0 MBZ. */
2412 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_APIC_ACCESSADDR_FULL, HCPhysApicAccess);
2413 AssertRC(rc);
2414}
2415
2416#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
2417
2418/**
2419 * Sets up the VMREAD bitmap address for the VMCS.
2420 *
2421 * @param pVCpu The cross context virtual CPU structure.
2422 */
2423DECLINLINE(void) hmR0VmxSetupVmcsVmreadBitmapAddr(PVMCPUCC pVCpu)
2424{
2425 RTHCPHYS const HCPhysVmreadBitmap = pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.HCPhysVmreadBitmap;
2426 Assert(HCPhysVmreadBitmap != NIL_RTHCPHYS);
2427 Assert(!(HCPhysVmreadBitmap & 0xfff)); /* Bits 11:0 MBZ. */
2428 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_VMREAD_BITMAP_FULL, HCPhysVmreadBitmap);
2429 AssertRC(rc);
2430}
2431
2432
2433/**
2434 * Sets up the VMWRITE bitmap address for the VMCS.
2435 *
2436 * @param pVCpu The cross context virtual CPU structure.
2437 */
2438DECLINLINE(void) hmR0VmxSetupVmcsVmwriteBitmapAddr(PVMCPUCC pVCpu)
2439{
2440 RTHCPHYS const HCPhysVmwriteBitmap = pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.HCPhysVmwriteBitmap;
2441 Assert(HCPhysVmwriteBitmap != NIL_RTHCPHYS);
2442 Assert(!(HCPhysVmwriteBitmap & 0xfff)); /* Bits 11:0 MBZ. */
2443 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_VMWRITE_BITMAP_FULL, HCPhysVmwriteBitmap);
2444 AssertRC(rc);
2445}
2446
2447#endif
2448
2449/**
2450 * Sets up the VM-entry MSR load, VM-exit MSR-store and VM-exit MSR-load addresses
2451 * in the VMCS.
2452 *
2453 * @returns VBox status code.
2454 * @param pVmcsInfo The VMCS info. object.
2455 */
2456DECLINLINE(int) hmR0VmxSetupVmcsAutoLoadStoreMsrAddrs(PVMXVMCSINFO pVmcsInfo)
2457{
2458 RTHCPHYS const HCPhysGuestMsrLoad = pVmcsInfo->HCPhysGuestMsrLoad;
2459 Assert(HCPhysGuestMsrLoad != NIL_RTHCPHYS);
2460 Assert(!(HCPhysGuestMsrLoad & 0xf)); /* Bits 3:0 MBZ. */
2461
2462 RTHCPHYS const HCPhysGuestMsrStore = pVmcsInfo->HCPhysGuestMsrStore;
2463 Assert(HCPhysGuestMsrStore != NIL_RTHCPHYS);
2464 Assert(!(HCPhysGuestMsrStore & 0xf)); /* Bits 3:0 MBZ. */
2465
2466 RTHCPHYS const HCPhysHostMsrLoad = pVmcsInfo->HCPhysHostMsrLoad;
2467 Assert(HCPhysHostMsrLoad != NIL_RTHCPHYS);
2468 Assert(!(HCPhysHostMsrLoad & 0xf)); /* Bits 3:0 MBZ. */
2469
2470 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_ENTRY_MSR_LOAD_FULL, HCPhysGuestMsrLoad); AssertRC(rc);
2471 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_EXIT_MSR_STORE_FULL, HCPhysGuestMsrStore); AssertRC(rc);
2472 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_EXIT_MSR_LOAD_FULL, HCPhysHostMsrLoad); AssertRC(rc);
2473 return VINF_SUCCESS;
2474}
2475
2476
2477/**
2478 * Sets up MSR permissions in the MSR bitmap of a VMCS info. object.
2479 *
2480 * @param pVCpu The cross context virtual CPU structure.
2481 * @param pVmcsInfo The VMCS info. object.
2482 */
2483static void hmR0VmxSetupVmcsMsrPermissions(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2484{
2485 Assert(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS);
2486
2487 /*
2488 * By default, ensure guest attempts to access any MSR cause VM-exits.
2489 * This shall later be relaxed for specific MSRs as necessary.
2490 *
2491 * Note: For nested-guests, the entire bitmap will be merged prior to
2492 * executing the nested-guest using hardware-assisted VMX and hence there
2493 * is no need to perform this operation. See hmR0VmxMergeMsrBitmapNested.
2494 */
2495 Assert(pVmcsInfo->pvMsrBitmap);
2496 ASMMemFill32(pVmcsInfo->pvMsrBitmap, X86_PAGE_4K_SIZE, UINT32_C(0xffffffff));
2497
2498 /*
2499 * The guest can access the following MSRs (read, write) without causing
2500 * VM-exits; they are loaded/stored automatically using fields in the VMCS.
2501 */
2502 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2503 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_SYSENTER_CS, VMXMSRPM_ALLOW_RD_WR);
2504 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_SYSENTER_ESP, VMXMSRPM_ALLOW_RD_WR);
2505 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_SYSENTER_EIP, VMXMSRPM_ALLOW_RD_WR);
2506 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_GS_BASE, VMXMSRPM_ALLOW_RD_WR);
2507 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_FS_BASE, VMXMSRPM_ALLOW_RD_WR);
2508
2509 /*
2510 * The IA32_PRED_CMD and IA32_FLUSH_CMD MSRs are write-only and has no state
2511 * associated with then. We never need to intercept access (writes need to be
2512 * executed without causing a VM-exit, reads will #GP fault anyway).
2513 *
2514 * The IA32_SPEC_CTRL MSR is read/write and has state. We allow the guest to
2515 * read/write them. We swap the guest/host MSR value using the
2516 * auto-load/store MSR area.
2517 */
2518 if (pVM->cpum.ro.GuestFeatures.fIbpb)
2519 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_PRED_CMD, VMXMSRPM_ALLOW_RD_WR);
2520 if (pVM->cpum.ro.GuestFeatures.fFlushCmd)
2521 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_FLUSH_CMD, VMXMSRPM_ALLOW_RD_WR);
2522 if (pVM->cpum.ro.GuestFeatures.fIbrs)
2523 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_SPEC_CTRL, VMXMSRPM_ALLOW_RD_WR);
2524
2525 /*
2526 * Allow full read/write access for the following MSRs (mandatory for VT-x)
2527 * required for 64-bit guests.
2528 */
2529 if (pVM->hmr0.s.fAllow64BitGuests)
2530 {
2531 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_LSTAR, VMXMSRPM_ALLOW_RD_WR);
2532 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K6_STAR, VMXMSRPM_ALLOW_RD_WR);
2533 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_SF_MASK, VMXMSRPM_ALLOW_RD_WR);
2534 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_KERNEL_GS_BASE, VMXMSRPM_ALLOW_RD_WR);
2535 }
2536
2537 /*
2538 * IA32_EFER MSR is always intercepted, see @bugref{9180#c37}.
2539 */
2540#ifdef VBOX_STRICT
2541 Assert(pVmcsInfo->pvMsrBitmap);
2542 uint32_t const fMsrpmEfer = CPUMGetVmxMsrPermission(pVmcsInfo->pvMsrBitmap, MSR_K6_EFER);
2543 Assert(fMsrpmEfer == VMXMSRPM_EXIT_RD_WR);
2544#endif
2545}
2546
2547
2548/**
2549 * Sets up pin-based VM-execution controls in the VMCS.
2550 *
2551 * @returns VBox status code.
2552 * @param pVCpu The cross context virtual CPU structure.
2553 * @param pVmcsInfo The VMCS info. object.
2554 */
2555static int hmR0VmxSetupVmcsPinCtls(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2556{
2557 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2558 uint32_t fVal = g_HmMsrs.u.vmx.PinCtls.n.allowed0; /* Bits set here must always be set. */
2559 uint32_t const fZap = g_HmMsrs.u.vmx.PinCtls.n.allowed1; /* Bits cleared here must always be cleared. */
2560
2561 fVal |= VMX_PIN_CTLS_EXT_INT_EXIT /* External interrupts cause a VM-exit. */
2562 | VMX_PIN_CTLS_NMI_EXIT; /* Non-maskable interrupts (NMIs) cause a VM-exit. */
2563
2564 if (g_HmMsrs.u.vmx.PinCtls.n.allowed1 & VMX_PIN_CTLS_VIRT_NMI)
2565 fVal |= VMX_PIN_CTLS_VIRT_NMI; /* Use virtual NMIs and virtual-NMI blocking features. */
2566
2567 /* Enable the VMX-preemption timer. */
2568 if (pVM->hmr0.s.vmx.fUsePreemptTimer)
2569 {
2570 Assert(g_HmMsrs.u.vmx.PinCtls.n.allowed1 & VMX_PIN_CTLS_PREEMPT_TIMER);
2571 fVal |= VMX_PIN_CTLS_PREEMPT_TIMER;
2572 }
2573
2574#if 0
2575 /* Enable posted-interrupt processing. */
2576 if (pVM->hm.s.fPostedIntrs)
2577 {
2578 Assert(g_HmMsrs.u.vmx.PinCtls.n.allowed1 & VMX_PIN_CTLS_POSTED_INT);
2579 Assert(g_HmMsrs.u.vmx.ExitCtls.n.allowed1 & VMX_EXIT_CTLS_ACK_EXT_INT);
2580 fVal |= VMX_PIN_CTLS_POSTED_INT;
2581 }
2582#endif
2583
2584 if ((fVal & fZap) != fVal)
2585 {
2586 LogRelFunc(("Invalid pin-based VM-execution controls combo! Cpu=%#RX32 fVal=%#RX32 fZap=%#RX32\n",
2587 g_HmMsrs.u.vmx.PinCtls.n.allowed0, fVal, fZap));
2588 pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PIN_EXEC;
2589 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2590 }
2591
2592 /* Commit it to the VMCS and update our cache. */
2593 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PIN_EXEC, fVal);
2594 AssertRC(rc);
2595 pVmcsInfo->u32PinCtls = fVal;
2596
2597 return VINF_SUCCESS;
2598}
2599
2600
2601/**
2602 * Sets up secondary processor-based VM-execution controls in the VMCS.
2603 *
2604 * @returns VBox status code.
2605 * @param pVCpu The cross context virtual CPU structure.
2606 * @param pVmcsInfo The VMCS info. object.
2607 */
2608static int hmR0VmxSetupVmcsProcCtls2(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2609{
2610 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2611 uint32_t fVal = g_HmMsrs.u.vmx.ProcCtls2.n.allowed0; /* Bits set here must be set in the VMCS. */
2612 uint32_t const fZap = g_HmMsrs.u.vmx.ProcCtls2.n.allowed1; /* Bits cleared here must be cleared in the VMCS. */
2613
2614 /* WBINVD causes a VM-exit. */
2615 if (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_WBINVD_EXIT)
2616 fVal |= VMX_PROC_CTLS2_WBINVD_EXIT;
2617
2618 /* Enable EPT (aka nested-paging). */
2619 if (pVM->hmr0.s.fNestedPaging)
2620 fVal |= VMX_PROC_CTLS2_EPT;
2621
2622 /* Enable the INVPCID instruction if we expose it to the guest and is supported
2623 by the hardware. Without this, guest executing INVPCID would cause a #UD. */
2624 if ( pVM->cpum.ro.GuestFeatures.fInvpcid
2625 && (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_INVPCID))
2626 fVal |= VMX_PROC_CTLS2_INVPCID;
2627
2628 /* Enable VPID. */
2629 if (pVM->hmr0.s.vmx.fVpid)
2630 fVal |= VMX_PROC_CTLS2_VPID;
2631
2632 /* Enable unrestricted guest execution. */
2633 if (pVM->hmr0.s.vmx.fUnrestrictedGuest)
2634 fVal |= VMX_PROC_CTLS2_UNRESTRICTED_GUEST;
2635
2636#if 0
2637 if (pVM->hm.s.fVirtApicRegs)
2638 {
2639 /* Enable APIC-register virtualization. */
2640 Assert(g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_APIC_REG_VIRT);
2641 fVal |= VMX_PROC_CTLS2_APIC_REG_VIRT;
2642
2643 /* Enable virtual-interrupt delivery. */
2644 Assert(g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_INTR_DELIVERY);
2645 fVal |= VMX_PROC_CTLS2_VIRT_INTR_DELIVERY;
2646 }
2647#endif
2648
2649 /* Virtualize-APIC accesses if supported by the CPU. The virtual-APIC page is
2650 where the TPR shadow resides. */
2651 /** @todo VIRT_X2APIC support, it's mutually exclusive with this. So must be
2652 * done dynamically. */
2653 if (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
2654 {
2655 fVal |= VMX_PROC_CTLS2_VIRT_APIC_ACCESS;
2656 hmR0VmxSetupVmcsApicAccessAddr(pVCpu);
2657 }
2658
2659 /* Enable the RDTSCP instruction if we expose it to the guest and is supported
2660 by the hardware. Without this, guest executing RDTSCP would cause a #UD. */
2661 if ( pVM->cpum.ro.GuestFeatures.fRdTscP
2662 && (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_RDTSCP))
2663 fVal |= VMX_PROC_CTLS2_RDTSCP;
2664
2665 /* Enable Pause-Loop exiting. */
2666 if ( (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_PAUSE_LOOP_EXIT)
2667 && pVM->hm.s.vmx.cPleGapTicks
2668 && pVM->hm.s.vmx.cPleWindowTicks)
2669 {
2670 fVal |= VMX_PROC_CTLS2_PAUSE_LOOP_EXIT;
2671
2672 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PLE_GAP, pVM->hm.s.vmx.cPleGapTicks); AssertRC(rc);
2673 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PLE_WINDOW, pVM->hm.s.vmx.cPleWindowTicks); AssertRC(rc);
2674 }
2675
2676 if ((fVal & fZap) != fVal)
2677 {
2678 LogRelFunc(("Invalid secondary processor-based VM-execution controls combo! cpu=%#RX32 fVal=%#RX32 fZap=%#RX32\n",
2679 g_HmMsrs.u.vmx.ProcCtls2.n.allowed0, fVal, fZap));
2680 pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PROC_EXEC2;
2681 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2682 }
2683
2684 /* Commit it to the VMCS and update our cache. */
2685 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC2, fVal);
2686 AssertRC(rc);
2687 pVmcsInfo->u32ProcCtls2 = fVal;
2688
2689 return VINF_SUCCESS;
2690}
2691
2692
2693/**
2694 * Sets up processor-based VM-execution controls in the VMCS.
2695 *
2696 * @returns VBox status code.
2697 * @param pVCpu The cross context virtual CPU structure.
2698 * @param pVmcsInfo The VMCS info. object.
2699 */
2700static int hmR0VmxSetupVmcsProcCtls(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2701{
2702 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2703 uint32_t fVal = g_HmMsrs.u.vmx.ProcCtls.n.allowed0; /* Bits set here must be set in the VMCS. */
2704 uint32_t const fZap = g_HmMsrs.u.vmx.ProcCtls.n.allowed1; /* Bits cleared here must be cleared in the VMCS. */
2705
2706 fVal |= VMX_PROC_CTLS_HLT_EXIT /* HLT causes a VM-exit. */
2707 | VMX_PROC_CTLS_USE_TSC_OFFSETTING /* Use TSC-offsetting. */
2708 | VMX_PROC_CTLS_MOV_DR_EXIT /* MOV DRx causes a VM-exit. */
2709 | VMX_PROC_CTLS_UNCOND_IO_EXIT /* All IO instructions cause a VM-exit. */
2710 | VMX_PROC_CTLS_RDPMC_EXIT /* RDPMC causes a VM-exit. */
2711 | VMX_PROC_CTLS_MONITOR_EXIT /* MONITOR causes a VM-exit. */
2712 | VMX_PROC_CTLS_MWAIT_EXIT; /* MWAIT causes a VM-exit. */
2713
2714 /* We toggle VMX_PROC_CTLS_MOV_DR_EXIT later, check if it's not -always- needed to be set or clear. */
2715 if ( !(g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_MOV_DR_EXIT)
2716 || (g_HmMsrs.u.vmx.ProcCtls.n.allowed0 & VMX_PROC_CTLS_MOV_DR_EXIT))
2717 {
2718 pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PROC_MOV_DRX_EXIT;
2719 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2720 }
2721
2722 /* Without nested paging, INVLPG (also affects INVPCID) and MOV CR3 instructions should cause VM-exits. */
2723 if (!pVM->hmr0.s.fNestedPaging)
2724 {
2725 Assert(!pVM->hmr0.s.vmx.fUnrestrictedGuest);
2726 fVal |= VMX_PROC_CTLS_INVLPG_EXIT
2727 | VMX_PROC_CTLS_CR3_LOAD_EXIT
2728 | VMX_PROC_CTLS_CR3_STORE_EXIT;
2729 }
2730
2731 /* Use TPR shadowing if supported by the CPU. */
2732 if ( PDMHasApic(pVM)
2733 && (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_TPR_SHADOW))
2734 {
2735 fVal |= VMX_PROC_CTLS_USE_TPR_SHADOW; /* CR8 reads from the Virtual-APIC page. */
2736 /* CR8 writes cause a VM-exit based on TPR threshold. */
2737 Assert(!(fVal & VMX_PROC_CTLS_CR8_STORE_EXIT));
2738 Assert(!(fVal & VMX_PROC_CTLS_CR8_LOAD_EXIT));
2739 hmR0VmxSetupVmcsVirtApicAddr(pVmcsInfo);
2740 }
2741 else
2742 {
2743 /* Some 32-bit CPUs do not support CR8 load/store exiting as MOV CR8 is
2744 invalid on 32-bit Intel CPUs. Set this control only for 64-bit guests. */
2745 if (pVM->hmr0.s.fAllow64BitGuests)
2746 fVal |= VMX_PROC_CTLS_CR8_STORE_EXIT /* CR8 reads cause a VM-exit. */
2747 | VMX_PROC_CTLS_CR8_LOAD_EXIT; /* CR8 writes cause a VM-exit. */
2748 }
2749
2750 /* Use MSR-bitmaps if supported by the CPU. */
2751 if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_MSR_BITMAPS)
2752 {
2753 fVal |= VMX_PROC_CTLS_USE_MSR_BITMAPS;
2754 hmR0VmxSetupVmcsMsrBitmapAddr(pVmcsInfo);
2755 }
2756
2757 /* Use the secondary processor-based VM-execution controls if supported by the CPU. */
2758 if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_SECONDARY_CTLS)
2759 fVal |= VMX_PROC_CTLS_USE_SECONDARY_CTLS;
2760
2761 if ((fVal & fZap) != fVal)
2762 {
2763 LogRelFunc(("Invalid processor-based VM-execution controls combo! cpu=%#RX32 fVal=%#RX32 fZap=%#RX32\n",
2764 g_HmMsrs.u.vmx.ProcCtls.n.allowed0, fVal, fZap));
2765 pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PROC_EXEC;
2766 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2767 }
2768
2769 /* Commit it to the VMCS and update our cache. */
2770 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, fVal);
2771 AssertRC(rc);
2772 pVmcsInfo->u32ProcCtls = fVal;
2773
2774 /* Set up MSR permissions that don't change through the lifetime of the VM. */
2775 if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
2776 hmR0VmxSetupVmcsMsrPermissions(pVCpu, pVmcsInfo);
2777
2778 /* Set up secondary processor-based VM-execution controls if the CPU supports it. */
2779 if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_SECONDARY_CTLS)
2780 return hmR0VmxSetupVmcsProcCtls2(pVCpu, pVmcsInfo);
2781
2782 /* Sanity check, should not really happen. */
2783 if (RT_LIKELY(!pVM->hmr0.s.vmx.fUnrestrictedGuest))
2784 { /* likely */ }
2785 else
2786 {
2787 pVCpu->hm.s.u32HMError = VMX_UFC_INVALID_UX_COMBO;
2788 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2789 }
2790
2791 /* Old CPUs without secondary processor-based VM-execution controls would end up here. */
2792 return VINF_SUCCESS;
2793}
2794
2795
2796/**
2797 * Sets up miscellaneous (everything other than Pin, Processor and secondary
2798 * Processor-based VM-execution) control fields in the VMCS.
2799 *
2800 * @returns VBox status code.
2801 * @param pVCpu The cross context virtual CPU structure.
2802 * @param pVmcsInfo The VMCS info. object.
2803 */
2804static int hmR0VmxSetupVmcsMiscCtls(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2805{
2806#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
2807 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fUseVmcsShadowing)
2808 {
2809 hmR0VmxSetupVmcsVmreadBitmapAddr(pVCpu);
2810 hmR0VmxSetupVmcsVmwriteBitmapAddr(pVCpu);
2811 }
2812#endif
2813
2814 Assert(pVmcsInfo->u64VmcsLinkPtr == NIL_RTHCPHYS);
2815 int rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_VMCS_LINK_PTR_FULL, NIL_RTHCPHYS);
2816 AssertRC(rc);
2817
2818 rc = hmR0VmxSetupVmcsAutoLoadStoreMsrAddrs(pVmcsInfo);
2819 if (RT_SUCCESS(rc))
2820 {
2821 uint64_t const u64Cr0Mask = vmxHCGetFixedCr0Mask(pVCpu);
2822 uint64_t const u64Cr4Mask = vmxHCGetFixedCr4Mask(pVCpu);
2823
2824 rc = VMXWriteVmcsNw(VMX_VMCS_CTRL_CR0_MASK, u64Cr0Mask); AssertRC(rc);
2825 rc = VMXWriteVmcsNw(VMX_VMCS_CTRL_CR4_MASK, u64Cr4Mask); AssertRC(rc);
2826
2827 pVmcsInfo->u64Cr0Mask = u64Cr0Mask;
2828 pVmcsInfo->u64Cr4Mask = u64Cr4Mask;
2829
2830 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fLbr)
2831 {
2832 rc = VMXWriteVmcsNw(VMX_VMCS64_GUEST_DEBUGCTL_FULL, MSR_IA32_DEBUGCTL_LBR);
2833 AssertRC(rc);
2834 }
2835 return VINF_SUCCESS;
2836 }
2837 else
2838 LogRelFunc(("Failed to initialize VMCS auto-load/store MSR addresses. rc=%Rrc\n", rc));
2839 return rc;
2840}
2841
2842
2843/**
2844 * Sets up the initial exception bitmap in the VMCS based on static conditions.
2845 *
2846 * We shall setup those exception intercepts that don't change during the
2847 * lifetime of the VM here. The rest are done dynamically while loading the
2848 * guest state.
2849 *
2850 * @param pVCpu The cross context virtual CPU structure.
2851 * @param pVmcsInfo The VMCS info. object.
2852 */
2853static void hmR0VmxSetupVmcsXcptBitmap(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2854{
2855 /*
2856 * The following exceptions are always intercepted:
2857 *
2858 * #AC - To prevent the guest from hanging the CPU and for dealing with
2859 * split-lock detecting host configs.
2860 * #DB - To maintain the DR6 state even when intercepting DRx reads/writes and
2861 * recursive #DBs can cause a CPU hang.
2862 * #PF - To sync our shadow page tables when nested-paging is not used.
2863 */
2864 bool const fNestedPaging = pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging;
2865 uint32_t const uXcptBitmap = RT_BIT(X86_XCPT_AC)
2866 | RT_BIT(X86_XCPT_DB)
2867 | (fNestedPaging ? 0 : RT_BIT(X86_XCPT_PF));
2868
2869 /* Commit it to the VMCS. */
2870 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, uXcptBitmap);
2871 AssertRC(rc);
2872
2873 /* Update our cache of the exception bitmap. */
2874 pVmcsInfo->u32XcptBitmap = uXcptBitmap;
2875}
2876
2877
2878#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
2879/**
2880 * Sets up the VMCS for executing a nested-guest using hardware-assisted VMX.
2881 *
2882 * @returns VBox status code.
2883 * @param pVmcsInfo The VMCS info. object.
2884 */
2885static int hmR0VmxSetupVmcsCtlsNested(PVMXVMCSINFO pVmcsInfo)
2886{
2887 Assert(pVmcsInfo->u64VmcsLinkPtr == NIL_RTHCPHYS);
2888 int rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_VMCS_LINK_PTR_FULL, NIL_RTHCPHYS);
2889 AssertRC(rc);
2890
2891 rc = hmR0VmxSetupVmcsAutoLoadStoreMsrAddrs(pVmcsInfo);
2892 if (RT_SUCCESS(rc))
2893 {
2894 if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_MSR_BITMAPS)
2895 hmR0VmxSetupVmcsMsrBitmapAddr(pVmcsInfo);
2896
2897 /* Paranoia - We've not yet initialized these, they shall be done while merging the VMCS. */
2898 Assert(!pVmcsInfo->u64Cr0Mask);
2899 Assert(!pVmcsInfo->u64Cr4Mask);
2900 return VINF_SUCCESS;
2901 }
2902 LogRelFunc(("Failed to set up the VMCS link pointer in the nested-guest VMCS. rc=%Rrc\n", rc));
2903 return rc;
2904}
2905#endif
2906
2907
2908/**
2909 * Selector FNHMSVMVMRUN implementation.
2910 */
2911static DECLCALLBACK(int) hmR0VmxStartVmSelector(PVMXVMCSINFO pVmcsInfo, PVMCPUCC pVCpu, bool fResume)
2912{
2913 hmR0VmxUpdateStartVmFunction(pVCpu);
2914 return pVCpu->hmr0.s.vmx.pfnStartVm(pVmcsInfo, pVCpu, fResume);
2915}
2916
2917
2918/**
2919 * Sets up the VMCS for executing a guest (or nested-guest) using hardware-assisted
2920 * VMX.
2921 *
2922 * @returns VBox status code.
2923 * @param pVCpu The cross context virtual CPU structure.
2924 * @param pVmcsInfo The VMCS info. object.
2925 * @param fIsNstGstVmcs Whether this is a nested-guest VMCS.
2926 */
2927static int hmR0VmxSetupVmcs(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, bool fIsNstGstVmcs)
2928{
2929 Assert(pVmcsInfo->pvVmcs);
2930 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
2931
2932 /* Set the CPU specified revision identifier at the beginning of the VMCS structure. */
2933 *(uint32_t *)pVmcsInfo->pvVmcs = RT_BF_GET(g_HmMsrs.u.vmx.u64Basic, VMX_BF_BASIC_VMCS_ID);
2934 const char * const pszVmcs = fIsNstGstVmcs ? "nested-guest VMCS" : "guest VMCS";
2935
2936 LogFlowFunc(("\n"));
2937
2938 /*
2939 * Initialize the VMCS using VMCLEAR before loading the VMCS.
2940 * See Intel spec. 31.6 "Preparation And Launching A Virtual Machine".
2941 */
2942 int rc = hmR0VmxClearVmcs(pVmcsInfo);
2943 if (RT_SUCCESS(rc))
2944 {
2945 rc = hmR0VmxLoadVmcs(pVmcsInfo);
2946 if (RT_SUCCESS(rc))
2947 {
2948 /*
2949 * Initialize the hardware-assisted VMX execution handler for guest and nested-guest VMCS.
2950 * The host is always 64-bit since we no longer support 32-bit hosts.
2951 * Currently we have just a single handler for all guest modes as well, see @bugref{6208#c73}.
2952 */
2953 if (!fIsNstGstVmcs)
2954 {
2955 rc = hmR0VmxSetupVmcsPinCtls(pVCpu, pVmcsInfo);
2956 if (RT_SUCCESS(rc))
2957 {
2958 rc = hmR0VmxSetupVmcsProcCtls(pVCpu, pVmcsInfo);
2959 if (RT_SUCCESS(rc))
2960 {
2961 rc = hmR0VmxSetupVmcsMiscCtls(pVCpu, pVmcsInfo);
2962 if (RT_SUCCESS(rc))
2963 {
2964 hmR0VmxSetupVmcsXcptBitmap(pVCpu, pVmcsInfo);
2965#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
2966 /*
2967 * If a shadow VMCS is allocated for the VMCS info. object, initialize the
2968 * VMCS revision ID and shadow VMCS indicator bit. Also, clear the VMCS
2969 * making it fit for use when VMCS shadowing is later enabled.
2970 */
2971 if (pVmcsInfo->pvShadowVmcs)
2972 {
2973 VMXVMCSREVID VmcsRevId;
2974 VmcsRevId.u = RT_BF_GET(g_HmMsrs.u.vmx.u64Basic, VMX_BF_BASIC_VMCS_ID);
2975 VmcsRevId.n.fIsShadowVmcs = 1;
2976 *(uint32_t *)pVmcsInfo->pvShadowVmcs = VmcsRevId.u;
2977 rc = vmxHCClearShadowVmcs(pVmcsInfo);
2978 if (RT_SUCCESS(rc))
2979 { /* likely */ }
2980 else
2981 LogRelFunc(("Failed to initialize shadow VMCS. rc=%Rrc\n", rc));
2982 }
2983#endif
2984 }
2985 else
2986 LogRelFunc(("Failed to setup miscellaneous controls. rc=%Rrc\n", rc));
2987 }
2988 else
2989 LogRelFunc(("Failed to setup processor-based VM-execution controls. rc=%Rrc\n", rc));
2990 }
2991 else
2992 LogRelFunc(("Failed to setup pin-based controls. rc=%Rrc\n", rc));
2993 }
2994 else
2995 {
2996#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
2997 rc = hmR0VmxSetupVmcsCtlsNested(pVmcsInfo);
2998 if (RT_SUCCESS(rc))
2999 { /* likely */ }
3000 else
3001 LogRelFunc(("Failed to initialize nested-guest VMCS. rc=%Rrc\n", rc));
3002#else
3003 AssertFailed();
3004#endif
3005 }
3006 }
3007 else
3008 LogRelFunc(("Failed to load the %s. rc=%Rrc\n", rc, pszVmcs));
3009 }
3010 else
3011 LogRelFunc(("Failed to clear the %s. rc=%Rrc\n", rc, pszVmcs));
3012
3013 /* Sync any CPU internal VMCS data back into our VMCS in memory. */
3014 if (RT_SUCCESS(rc))
3015 {
3016 rc = hmR0VmxClearVmcs(pVmcsInfo);
3017 if (RT_SUCCESS(rc))
3018 { /* likely */ }
3019 else
3020 LogRelFunc(("Failed to clear the %s post setup. rc=%Rrc\n", rc, pszVmcs));
3021 }
3022
3023 /*
3024 * Update the last-error record both for failures and success, so we
3025 * can propagate the status code back to ring-3 for diagnostics.
3026 */
3027 hmR0VmxUpdateErrorRecord(pVCpu, rc);
3028 NOREF(pszVmcs);
3029 return rc;
3030}
3031
3032
3033/**
3034 * Does global VT-x initialization (called during module initialization).
3035 *
3036 * @returns VBox status code.
3037 */
3038VMMR0DECL(int) VMXR0GlobalInit(void)
3039{
3040#ifdef HMVMX_USE_FUNCTION_TABLE
3041 AssertCompile(VMX_EXIT_MAX + 1 == RT_ELEMENTS(g_aVMExitHandlers));
3042# ifdef VBOX_STRICT
3043 for (unsigned i = 0; i < RT_ELEMENTS(g_aVMExitHandlers); i++)
3044 Assert(g_aVMExitHandlers[i].pfn);
3045# endif
3046#endif
3047 return VINF_SUCCESS;
3048}
3049
3050
3051/**
3052 * Does global VT-x termination (called during module termination).
3053 */
3054VMMR0DECL(void) VMXR0GlobalTerm()
3055{
3056 /* Nothing to do currently. */
3057}
3058
3059
3060/**
3061 * Sets up and activates VT-x on the current CPU.
3062 *
3063 * @returns VBox status code.
3064 * @param pHostCpu The HM physical-CPU structure.
3065 * @param pVM The cross context VM structure. Can be
3066 * NULL after a host resume operation.
3067 * @param pvCpuPage Pointer to the VMXON region (can be NULL if @a
3068 * fEnabledByHost is @c true).
3069 * @param HCPhysCpuPage Physical address of the VMXON region (can be 0 if
3070 * @a fEnabledByHost is @c true).
3071 * @param fEnabledByHost Set if SUPR0EnableVTx() or similar was used to
3072 * enable VT-x on the host.
3073 * @param pHwvirtMsrs Pointer to the hardware-virtualization MSRs.
3074 */
3075VMMR0DECL(int) VMXR0EnableCpu(PHMPHYSCPU pHostCpu, PVMCC pVM, void *pvCpuPage, RTHCPHYS HCPhysCpuPage, bool fEnabledByHost,
3076 PCSUPHWVIRTMSRS pHwvirtMsrs)
3077{
3078 AssertPtr(pHostCpu);
3079 AssertPtr(pHwvirtMsrs);
3080 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
3081
3082 /* Enable VT-x if it's not already enabled by the host. */
3083 if (!fEnabledByHost)
3084 {
3085 int rc = hmR0VmxEnterRootMode(pHostCpu, pVM, HCPhysCpuPage, pvCpuPage);
3086 if (RT_FAILURE(rc))
3087 return rc;
3088 }
3089
3090 /*
3091 * Flush all EPT tagged-TLB entries (in case VirtualBox or any other hypervisor have been
3092 * using EPTPs) so we don't retain any stale guest-physical mappings which won't get
3093 * invalidated when flushing by VPID.
3094 */
3095 if (pHwvirtMsrs->u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT_ALL_CONTEXTS)
3096 {
3097 hmR0VmxFlushEpt(NULL /* pVCpu */, NULL /* pVmcsInfo */, VMXTLBFLUSHEPT_ALL_CONTEXTS);
3098 pHostCpu->fFlushAsidBeforeUse = false;
3099 }
3100 else
3101 pHostCpu->fFlushAsidBeforeUse = true;
3102
3103 /* Ensure each VCPU scheduled on this CPU gets a new VPID on resume. See @bugref{6255}. */
3104 ++pHostCpu->cTlbFlushes;
3105
3106 return VINF_SUCCESS;
3107}
3108
3109
3110/**
3111 * Deactivates VT-x on the current CPU.
3112 *
3113 * @returns VBox status code.
3114 * @param pHostCpu The HM physical-CPU structure.
3115 * @param pvCpuPage Pointer to the VMXON region.
3116 * @param HCPhysCpuPage Physical address of the VMXON region.
3117 *
3118 * @remarks This function should never be called when SUPR0EnableVTx() or
3119 * similar was used to enable VT-x on the host.
3120 */
3121VMMR0DECL(int) VMXR0DisableCpu(PHMPHYSCPU pHostCpu, void *pvCpuPage, RTHCPHYS HCPhysCpuPage)
3122{
3123 RT_NOREF2(pvCpuPage, HCPhysCpuPage);
3124
3125 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
3126 return hmR0VmxLeaveRootMode(pHostCpu);
3127}
3128
3129
3130/**
3131 * Does per-VM VT-x initialization.
3132 *
3133 * @returns VBox status code.
3134 * @param pVM The cross context VM structure.
3135 */
3136VMMR0DECL(int) VMXR0InitVM(PVMCC pVM)
3137{
3138 AssertPtr(pVM);
3139 LogFlowFunc(("pVM=%p\n", pVM));
3140
3141 hmR0VmxStructsInit(pVM);
3142 int rc = hmR0VmxStructsAlloc(pVM);
3143 if (RT_FAILURE(rc))
3144 {
3145 LogRelFunc(("Failed to allocated VMX structures. rc=%Rrc\n", rc));
3146 return rc;
3147 }
3148
3149 /* Setup the crash dump page. */
3150#ifdef VBOX_WITH_CRASHDUMP_MAGIC
3151 strcpy((char *)pVM->hmr0.s.vmx.pbScratch, "SCRATCH Magic");
3152 *(uint64_t *)(pVM->hmr0.s.vmx.pbScratch + 16) = UINT64_C(0xdeadbeefdeadbeef);
3153#endif
3154 return VINF_SUCCESS;
3155}
3156
3157
3158/**
3159 * Does per-VM VT-x termination.
3160 *
3161 * @returns VBox status code.
3162 * @param pVM The cross context VM structure.
3163 */
3164VMMR0DECL(int) VMXR0TermVM(PVMCC pVM)
3165{
3166 AssertPtr(pVM);
3167 LogFlowFunc(("pVM=%p\n", pVM));
3168
3169#ifdef VBOX_WITH_CRASHDUMP_MAGIC
3170 if (pVM->hmr0.s.vmx.pbScratch)
3171 RT_BZERO(pVM->hmr0.s.vmx.pbScratch, X86_PAGE_4K_SIZE);
3172#endif
3173 hmR0VmxStructsFree(pVM);
3174 return VINF_SUCCESS;
3175}
3176
3177
3178/**
3179 * Sets up the VM for execution using hardware-assisted VMX.
3180 * This function is only called once per-VM during initialization.
3181 *
3182 * @returns VBox status code.
3183 * @param pVM The cross context VM structure.
3184 */
3185VMMR0DECL(int) VMXR0SetupVM(PVMCC pVM)
3186{
3187 AssertPtr(pVM);
3188 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
3189
3190 LogFlowFunc(("pVM=%p\n", pVM));
3191
3192 /*
3193 * At least verify if VMX is enabled, since we can't check if we're in VMX root mode or not
3194 * without causing a #GP.
3195 */
3196 RTCCUINTREG const uHostCr4 = ASMGetCR4();
3197 if (RT_LIKELY(uHostCr4 & X86_CR4_VMXE))
3198 { /* likely */ }
3199 else
3200 return VERR_VMX_NOT_IN_VMX_ROOT_MODE;
3201
3202 /*
3203 * Check that nested paging is supported if enabled and copy over the flag to the
3204 * ring-0 only structure.
3205 */
3206 bool const fNestedPaging = pVM->hm.s.fNestedPagingCfg;
3207 AssertReturn( !fNestedPaging
3208 || (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_EPT), /** @todo use a ring-0 copy of ProcCtls2.n.allowed1 */
3209 VERR_INCOMPATIBLE_CONFIG);
3210 pVM->hmr0.s.fNestedPaging = fNestedPaging;
3211 pVM->hmr0.s.fAllow64BitGuests = pVM->hm.s.fAllow64BitGuestsCfg;
3212
3213 /*
3214 * Without unrestricted guest execution, pRealModeTSS and pNonPagingModeEPTPageTable *must*
3215 * always be allocated. We no longer support the highly unlikely case of unrestricted guest
3216 * without pRealModeTSS, see hmR3InitFinalizeR0Intel().
3217 */
3218 bool const fUnrestrictedGuest = pVM->hm.s.vmx.fUnrestrictedGuestCfg;
3219 AssertReturn( !fUnrestrictedGuest
3220 || ( (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_UNRESTRICTED_GUEST)
3221 && fNestedPaging),
3222 VERR_INCOMPATIBLE_CONFIG);
3223 if ( !fUnrestrictedGuest
3224 && ( !pVM->hm.s.vmx.pNonPagingModeEPTPageTable
3225 || !pVM->hm.s.vmx.pRealModeTSS))
3226 {
3227 LogRelFunc(("Invalid real-on-v86 state.\n"));
3228 return VERR_INTERNAL_ERROR;
3229 }
3230 pVM->hmr0.s.vmx.fUnrestrictedGuest = fUnrestrictedGuest;
3231
3232 /* Initialize these always, see hmR3InitFinalizeR0().*/
3233 pVM->hm.s.ForR3.vmx.enmTlbFlushEpt = pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NONE;
3234 pVM->hm.s.ForR3.vmx.enmTlbFlushVpid = pVM->hmr0.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_NONE;
3235
3236 /* Setup the tagged-TLB flush handlers. */
3237 int rc = hmR0VmxSetupTaggedTlb(pVM);
3238 if (RT_FAILURE(rc))
3239 {
3240 LogRelFunc(("Failed to setup tagged TLB. rc=%Rrc\n", rc));
3241 return rc;
3242 }
3243
3244 /* Determine LBR capabilities. */
3245 pVM->hmr0.s.vmx.fLbr = pVM->hm.s.vmx.fLbrCfg;
3246 if (pVM->hmr0.s.vmx.fLbr)
3247 {
3248 rc = hmR0VmxSetupLbrMsrRange(pVM);
3249 if (RT_FAILURE(rc))
3250 {
3251 LogRelFunc(("Failed to setup LBR MSR range. rc=%Rrc\n", rc));
3252 return rc;
3253 }
3254 }
3255
3256#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
3257 /* Setup the shadow VMCS fields array and VMREAD/VMWRITE bitmaps. */
3258 if (pVM->hmr0.s.vmx.fUseVmcsShadowing)
3259 {
3260 rc = hmR0VmxSetupShadowVmcsFieldsArrays(pVM);
3261 if (RT_SUCCESS(rc))
3262 hmR0VmxSetupVmreadVmwriteBitmaps(pVM);
3263 else
3264 {
3265 LogRelFunc(("Failed to setup shadow VMCS fields arrays. rc=%Rrc\n", rc));
3266 return rc;
3267 }
3268 }
3269#endif
3270
3271 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
3272 {
3273 PVMCPUCC pVCpu = VMCC_GET_CPU(pVM, idCpu);
3274 Log4Func(("pVCpu=%p idCpu=%RU32\n", pVCpu, pVCpu->idCpu));
3275
3276 pVCpu->hmr0.s.vmx.pfnStartVm = hmR0VmxStartVmSelector;
3277
3278 rc = hmR0VmxSetupVmcs(pVCpu, &pVCpu->hmr0.s.vmx.VmcsInfo, false /* fIsNstGstVmcs */);
3279 if (RT_SUCCESS(rc))
3280 {
3281#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
3282 if (pVM->cpum.ro.GuestFeatures.fVmx)
3283 {
3284 rc = hmR0VmxSetupVmcs(pVCpu, &pVCpu->hmr0.s.vmx.VmcsInfoNstGst, true /* fIsNstGstVmcs */);
3285 if (RT_SUCCESS(rc))
3286 { /* likely */ }
3287 else
3288 {
3289 LogRelFunc(("Nested-guest VMCS setup failed. rc=%Rrc\n", rc));
3290 return rc;
3291 }
3292 }
3293#endif
3294 }
3295 else
3296 {
3297 LogRelFunc(("VMCS setup failed. rc=%Rrc\n", rc));
3298 return rc;
3299 }
3300 }
3301
3302 return VINF_SUCCESS;
3303}
3304
3305
3306/**
3307 * Saves the host control registers (CR0, CR3, CR4) into the host-state area in
3308 * the VMCS.
3309 * @returns CR4 for passing along to hmR0VmxExportHostSegmentRegs.
3310 */
3311static uint64_t hmR0VmxExportHostControlRegs(void)
3312{
3313 int rc = VMXWriteVmcsNw(VMX_VMCS_HOST_CR0, ASMGetCR0()); AssertRC(rc);
3314 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_CR3, ASMGetCR3()); AssertRC(rc);
3315 uint64_t uHostCr4 = ASMGetCR4();
3316 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_CR4, uHostCr4); AssertRC(rc);
3317 return uHostCr4;
3318}
3319
3320
3321/**
3322 * Saves the host segment registers and GDTR, IDTR, (TR, GS and FS bases) into
3323 * the host-state area in the VMCS.
3324 *
3325 * @returns VBox status code.
3326 * @param pVCpu The cross context virtual CPU structure.
3327 * @param uHostCr4 The host CR4 value.
3328 */
3329static int hmR0VmxExportHostSegmentRegs(PVMCPUCC pVCpu, uint64_t uHostCr4)
3330{
3331 /*
3332 * If we've executed guest code using hardware-assisted VMX, the host-state bits
3333 * will be messed up. We should -not- save the messed up state without restoring
3334 * the original host-state, see @bugref{7240}.
3335 *
3336 * This apparently can happen (most likely the FPU changes), deal with it rather than
3337 * asserting. Was observed booting Solaris 10u10 32-bit guest.
3338 */
3339 if (pVCpu->hmr0.s.vmx.fRestoreHostFlags > VMX_RESTORE_HOST_REQUIRED)
3340 {
3341 Log4Func(("Restoring Host State: fRestoreHostFlags=%#RX32 HostCpuId=%u\n", pVCpu->hmr0.s.vmx.fRestoreHostFlags,
3342 pVCpu->idCpu));
3343 VMXRestoreHostState(pVCpu->hmr0.s.vmx.fRestoreHostFlags, &pVCpu->hmr0.s.vmx.RestoreHost);
3344 pVCpu->hmr0.s.vmx.fRestoreHostFlags = 0;
3345 }
3346
3347 /*
3348 * Get all the host info.
3349 * ASSUME it is safe to use rdfsbase and friends if the CR4.FSGSBASE bit is set
3350 * without also checking the cpuid bit.
3351 */
3352 uint32_t fRestoreHostFlags;
3353#if RT_INLINE_ASM_EXTERNAL
3354 if (uHostCr4 & X86_CR4_FSGSBASE)
3355 {
3356 hmR0VmxExportHostSegmentRegsAsmHlp(&pVCpu->hmr0.s.vmx.RestoreHost, true /*fHaveFsGsBase*/);
3357 fRestoreHostFlags = VMX_RESTORE_HOST_CAN_USE_WRFSBASE_AND_WRGSBASE;
3358 }
3359 else
3360 {
3361 hmR0VmxExportHostSegmentRegsAsmHlp(&pVCpu->hmr0.s.vmx.RestoreHost, false /*fHaveFsGsBase*/);
3362 fRestoreHostFlags = 0;
3363 }
3364 RTSEL uSelES = pVCpu->hmr0.s.vmx.RestoreHost.uHostSelES;
3365 RTSEL uSelDS = pVCpu->hmr0.s.vmx.RestoreHost.uHostSelDS;
3366 RTSEL uSelFS = pVCpu->hmr0.s.vmx.RestoreHost.uHostSelFS;
3367 RTSEL uSelGS = pVCpu->hmr0.s.vmx.RestoreHost.uHostSelGS;
3368#else
3369 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR = ASMGetTR();
3370 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelSS = ASMGetSS();
3371 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelCS = ASMGetCS();
3372 ASMGetGDTR((PRTGDTR)&pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr);
3373 ASMGetIDTR((PRTIDTR)&pVCpu->hmr0.s.vmx.RestoreHost.HostIdtr);
3374 if (uHostCr4 & X86_CR4_FSGSBASE)
3375 {
3376 pVCpu->hmr0.s.vmx.RestoreHost.uHostFSBase = ASMGetFSBase();
3377 pVCpu->hmr0.s.vmx.RestoreHost.uHostGSBase = ASMGetGSBase();
3378 fRestoreHostFlags = VMX_RESTORE_HOST_CAN_USE_WRFSBASE_AND_WRGSBASE;
3379 }
3380 else
3381 {
3382 pVCpu->hmr0.s.vmx.RestoreHost.uHostFSBase = ASMRdMsr(MSR_K8_FS_BASE);
3383 pVCpu->hmr0.s.vmx.RestoreHost.uHostGSBase = ASMRdMsr(MSR_K8_GS_BASE);
3384 fRestoreHostFlags = 0;
3385 }
3386 RTSEL uSelES, uSelDS, uSelFS, uSelGS;
3387 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelDS = uSelDS = ASMGetDS();
3388 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelES = uSelES = ASMGetES();
3389 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelFS = uSelFS = ASMGetFS();
3390 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelGS = uSelGS = ASMGetGS();
3391#endif
3392
3393 /*
3394 * Determine if the host segment registers are suitable for VT-x. Otherwise use zero to
3395 * gain VM-entry and restore them before we get preempted.
3396 *
3397 * See Intel spec. 26.2.3 "Checks on Host Segment and Descriptor-Table Registers".
3398 */
3399 RTSEL const uSelAll = uSelFS | uSelGS | uSelES | uSelDS;
3400 if (uSelAll & (X86_SEL_RPL | X86_SEL_LDT))
3401 {
3402 if (!(uSelAll & X86_SEL_LDT))
3403 {
3404#define VMXLOCAL_ADJUST_HOST_SEG(a_Seg, a_uVmcsVar) \
3405 do { \
3406 (a_uVmcsVar) = pVCpu->hmr0.s.vmx.RestoreHost.uHostSel##a_Seg; \
3407 if ((a_uVmcsVar) & X86_SEL_RPL) \
3408 { \
3409 fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_##a_Seg; \
3410 (a_uVmcsVar) = 0; \
3411 } \
3412 } while (0)
3413 VMXLOCAL_ADJUST_HOST_SEG(DS, uSelDS);
3414 VMXLOCAL_ADJUST_HOST_SEG(ES, uSelES);
3415 VMXLOCAL_ADJUST_HOST_SEG(FS, uSelFS);
3416 VMXLOCAL_ADJUST_HOST_SEG(GS, uSelGS);
3417#undef VMXLOCAL_ADJUST_HOST_SEG
3418 }
3419 else
3420 {
3421#define VMXLOCAL_ADJUST_HOST_SEG(a_Seg, a_uVmcsVar) \
3422 do { \
3423 (a_uVmcsVar) = pVCpu->hmr0.s.vmx.RestoreHost.uHostSel##a_Seg; \
3424 if ((a_uVmcsVar) & (X86_SEL_RPL | X86_SEL_LDT)) \
3425 { \
3426 if (!((a_uVmcsVar) & X86_SEL_LDT)) \
3427 fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_##a_Seg; \
3428 else \
3429 { \
3430 uint32_t const fAttr = ASMGetSegAttr(a_uVmcsVar); \
3431 if ((fAttr & X86_DESC_P) && fAttr != UINT32_MAX) \
3432 fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_##a_Seg; \
3433 } \
3434 (a_uVmcsVar) = 0; \
3435 } \
3436 } while (0)
3437 VMXLOCAL_ADJUST_HOST_SEG(DS, uSelDS);
3438 VMXLOCAL_ADJUST_HOST_SEG(ES, uSelES);
3439 VMXLOCAL_ADJUST_HOST_SEG(FS, uSelFS);
3440 VMXLOCAL_ADJUST_HOST_SEG(GS, uSelGS);
3441#undef VMXLOCAL_ADJUST_HOST_SEG
3442 }
3443 }
3444
3445 /* Verification based on Intel spec. 26.2.3 "Checks on Host Segment and Descriptor-Table Registers" */
3446 Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR & X86_SEL_RPL)); Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR & X86_SEL_LDT)); Assert(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR);
3447 Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelCS & X86_SEL_RPL)); Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelCS & X86_SEL_LDT)); Assert(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelCS);
3448 Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelSS & X86_SEL_RPL)); Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelSS & X86_SEL_LDT));
3449 Assert(!(uSelDS & X86_SEL_RPL)); Assert(!(uSelDS & X86_SEL_LDT));
3450 Assert(!(uSelES & X86_SEL_RPL)); Assert(!(uSelES & X86_SEL_LDT));
3451 Assert(!(uSelFS & X86_SEL_RPL)); Assert(!(uSelFS & X86_SEL_LDT));
3452 Assert(!(uSelGS & X86_SEL_RPL)); Assert(!(uSelGS & X86_SEL_LDT));
3453
3454 /*
3455 * Determine if we need to manually need to restore the GDTR and IDTR limits as VT-x zaps
3456 * them to the maximum limit (0xffff) on every VM-exit.
3457 */
3458 if (pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.cb != 0xffff)
3459 fRestoreHostFlags |= VMX_RESTORE_HOST_GDTR;
3460
3461 /*
3462 * IDT limit is effectively capped at 0xfff. (See Intel spec. 6.14.1 "64-Bit Mode IDT" and
3463 * Intel spec. 6.2 "Exception and Interrupt Vectors".) Therefore if the host has the limit
3464 * as 0xfff, VT-x bloating the limit to 0xffff shouldn't cause any different CPU behavior.
3465 * However, several hosts either insists on 0xfff being the limit (Windows Patch Guard) or
3466 * uses the limit for other purposes (darwin puts the CPU ID in there but botches sidt
3467 * alignment in at least one consumer). So, we're only allowing the IDTR.LIMIT to be left
3468 * at 0xffff on hosts where we are sure it won't cause trouble.
3469 */
3470#if defined(RT_OS_LINUX) || defined(RT_OS_SOLARIS)
3471 if (pVCpu->hmr0.s.vmx.RestoreHost.HostIdtr.cb < 0x0fff)
3472#else
3473 if (pVCpu->hmr0.s.vmx.RestoreHost.HostIdtr.cb != 0xffff)
3474#endif
3475 fRestoreHostFlags |= VMX_RESTORE_HOST_IDTR;
3476
3477 /*
3478 * Host TR base. Verify that TR selector doesn't point past the GDT. Masking off the TI
3479 * and RPL bits is effectively what the CPU does for "scaling by 8". TI is always 0 and
3480 * RPL should be too in most cases.
3481 */
3482 RTSEL const uSelTR = pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR;
3483 AssertMsgReturn((uSelTR | X86_SEL_RPL_LDT) <= pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.cb,
3484 ("TR selector exceeds limit. TR=%RTsel cbGdt=%#x\n", uSelTR, pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.cb),
3485 VERR_VMX_INVALID_HOST_STATE);
3486
3487 PCX86DESCHC pDesc = (PCX86DESCHC)(pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.uAddr + (uSelTR & X86_SEL_MASK));
3488 uintptr_t const uTRBase = X86DESC64_BASE(pDesc);
3489
3490 /*
3491 * VT-x unconditionally restores the TR limit to 0x67 and type to 11 (32-bit busy TSS) on
3492 * all VM-exits. The type is the same for 64-bit busy TSS[1]. The limit needs manual
3493 * restoration if the host has something else. Task switching is not supported in 64-bit
3494 * mode[2], but the limit still matters as IOPM is supported in 64-bit mode. Restoring the
3495 * limit lazily while returning to ring-3 is safe because IOPM is not applicable in ring-0.
3496 *
3497 * [1] See Intel spec. 3.5 "System Descriptor Types".
3498 * [2] See Intel spec. 7.2.3 "TSS Descriptor in 64-bit mode".
3499 */
3500 Assert(pDesc->System.u4Type == 11);
3501 if ( pDesc->System.u16LimitLow != 0x67
3502 || pDesc->System.u4LimitHigh)
3503 {
3504 fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_TR;
3505
3506 /* If the host has made GDT read-only, we would need to temporarily toggle CR0.WP before writing the GDT. */
3507 if (g_fHmHostKernelFeatures & SUPKERNELFEATURES_GDT_READ_ONLY)
3508 fRestoreHostFlags |= VMX_RESTORE_HOST_GDT_READ_ONLY;
3509 if (g_fHmHostKernelFeatures & SUPKERNELFEATURES_GDT_NEED_WRITABLE)
3510 {
3511 /* The GDT is read-only but the writable GDT is available. */
3512 fRestoreHostFlags |= VMX_RESTORE_HOST_GDT_NEED_WRITABLE;
3513 pVCpu->hmr0.s.vmx.RestoreHost.HostGdtrRw.cb = pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.cb;
3514 int rc = SUPR0GetCurrentGdtRw(&pVCpu->hmr0.s.vmx.RestoreHost.HostGdtrRw.uAddr);
3515 AssertRCReturn(rc, rc);
3516 }
3517 }
3518
3519 pVCpu->hmr0.s.vmx.fRestoreHostFlags = fRestoreHostFlags;
3520
3521 /*
3522 * Do all the VMCS updates in one block to assist nested virtualization.
3523 */
3524 int rc;
3525 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_CS_SEL, pVCpu->hmr0.s.vmx.RestoreHost.uHostSelCS); AssertRC(rc);
3526 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_SS_SEL, pVCpu->hmr0.s.vmx.RestoreHost.uHostSelSS); AssertRC(rc);
3527 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_DS_SEL, uSelDS); AssertRC(rc);
3528 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_ES_SEL, uSelES); AssertRC(rc);
3529 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_FS_SEL, uSelFS); AssertRC(rc);
3530 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_GS_SEL, uSelGS); AssertRC(rc);
3531 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_TR_SEL, pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR); AssertRC(rc);
3532 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_GDTR_BASE, pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.uAddr); AssertRC(rc);
3533 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_IDTR_BASE, pVCpu->hmr0.s.vmx.RestoreHost.HostIdtr.uAddr); AssertRC(rc);
3534 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_TR_BASE, uTRBase); AssertRC(rc);
3535 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_FS_BASE, pVCpu->hmr0.s.vmx.RestoreHost.uHostFSBase); AssertRC(rc);
3536 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_GS_BASE, pVCpu->hmr0.s.vmx.RestoreHost.uHostGSBase); AssertRC(rc);
3537
3538 return VINF_SUCCESS;
3539}
3540
3541
3542/**
3543 * Exports certain host MSRs in the VM-exit MSR-load area and some in the
3544 * host-state area of the VMCS.
3545 *
3546 * These MSRs will be automatically restored on the host after every successful
3547 * VM-exit.
3548 *
3549 * @param pVCpu The cross context virtual CPU structure.
3550 *
3551 * @remarks No-long-jump zone!!!
3552 */
3553static void hmR0VmxExportHostMsrs(PVMCPUCC pVCpu)
3554{
3555 AssertPtr(pVCpu);
3556
3557 /*
3558 * Save MSRs that we restore lazily (due to preemption or transition to ring-3)
3559 * rather than swapping them on every VM-entry.
3560 */
3561 hmR0VmxLazySaveHostMsrs(pVCpu);
3562
3563 /*
3564 * Host Sysenter MSRs.
3565 */
3566 int rc = VMXWriteVmcs32(VMX_VMCS32_HOST_SYSENTER_CS, ASMRdMsr_Low(MSR_IA32_SYSENTER_CS)); AssertRC(rc);
3567 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr(MSR_IA32_SYSENTER_ESP)); AssertRC(rc);
3568 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr(MSR_IA32_SYSENTER_EIP)); AssertRC(rc);
3569
3570 /*
3571 * Host EFER MSR.
3572 *
3573 * If the CPU supports the newer VMCS controls for managing EFER, use it. Otherwise it's
3574 * done as part of auto-load/store MSR area in the VMCS, see hmR0VmxExportGuestMsrs().
3575 */
3576 if (g_fHmVmxSupportsVmcsEfer)
3577 {
3578 rc = VMXWriteVmcs64(VMX_VMCS64_HOST_EFER_FULL, g_uHmVmxHostMsrEfer);
3579 AssertRC(rc);
3580 }
3581
3582 /** @todo IA32_PERF_GLOBALCTRL, IA32_PAT also see
3583 * hmR0VmxExportGuestEntryExitCtls(). */
3584}
3585
3586
3587/**
3588 * Figures out if we need to swap the EFER MSR which is particularly expensive.
3589 *
3590 * We check all relevant bits. For now, that's everything besides LMA/LME, as
3591 * these two bits are handled by VM-entry, see hmR0VMxExportGuestEntryExitCtls().
3592 *
3593 * @returns true if we need to load guest EFER, false otherwise.
3594 * @param pVCpu The cross context virtual CPU structure.
3595 * @param pVmxTransient The VMX-transient structure.
3596 *
3597 * @remarks Requires EFER, CR4.
3598 * @remarks No-long-jump zone!!!
3599 */
3600static bool hmR0VmxShouldSwapEferMsr(PCVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient)
3601{
3602#ifdef HMVMX_ALWAYS_SWAP_EFER
3603 RT_NOREF2(pVCpu, pVmxTransient);
3604 return true;
3605#else
3606 PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
3607 uint64_t const u64HostEfer = g_uHmVmxHostMsrEfer;
3608 uint64_t const u64GuestEfer = pCtx->msrEFER;
3609
3610# ifdef VBOX_WITH_NESTED_HWVIRT_VMX
3611 /*
3612 * For nested-guests, we shall honor swapping the EFER MSR when requested by
3613 * the nested-guest.
3614 */
3615 if ( pVmxTransient->fIsNestedGuest
3616 && ( CPUMIsGuestVmxEntryCtlsSet(pCtx, VMX_ENTRY_CTLS_LOAD_EFER_MSR)
3617 || CPUMIsGuestVmxExitCtlsSet(pCtx, VMX_EXIT_CTLS_SAVE_EFER_MSR)
3618 || CPUMIsGuestVmxExitCtlsSet(pCtx, VMX_EXIT_CTLS_LOAD_EFER_MSR)))
3619 return true;
3620# else
3621 RT_NOREF(pVmxTransient);
3622#endif
3623
3624 /*
3625 * For 64-bit guests, if EFER.SCE bit differs, we need to swap the EFER MSR
3626 * to ensure that the guest's SYSCALL behaviour isn't broken, see @bugref{7386}.
3627 */
3628 if ( CPUMIsGuestInLongModeEx(pCtx)
3629 && (u64GuestEfer & MSR_K6_EFER_SCE) != (u64HostEfer & MSR_K6_EFER_SCE))
3630 return true;
3631
3632 /*
3633 * If the guest uses PAE and EFER.NXE bit differs, we need to swap the EFER MSR
3634 * as it affects guest paging. 64-bit paging implies CR4.PAE as well.
3635 *
3636 * See Intel spec. 4.5 "IA-32e Paging".
3637 * See Intel spec. 4.1.1 "Three Paging Modes".
3638 *
3639 * Verify that we always intercept CR4.PAE and CR0.PG bits, so we don't need to
3640 * import CR4 and CR0 from the VMCS here as those bits are always up to date.
3641 */
3642 Assert(vmxHCGetFixedCr4Mask(pVCpu) & X86_CR4_PAE);
3643 Assert(vmxHCGetFixedCr0Mask(pVCpu) & X86_CR0_PG);
3644 if ( (pCtx->cr4 & X86_CR4_PAE)
3645 && (pCtx->cr0 & X86_CR0_PG))
3646 {
3647 /*
3648 * If nested paging is not used, verify that the guest paging mode matches the
3649 * shadow paging mode which is/will be placed in the VMCS (which is what will
3650 * actually be used while executing the guest and not the CR4 shadow value).
3651 */
3652 AssertMsg( pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging
3653 || pVCpu->hm.s.enmShadowMode == PGMMODE_PAE
3654 || pVCpu->hm.s.enmShadowMode == PGMMODE_PAE_NX
3655 || pVCpu->hm.s.enmShadowMode == PGMMODE_AMD64
3656 || pVCpu->hm.s.enmShadowMode == PGMMODE_AMD64_NX,
3657 ("enmShadowMode=%u\n", pVCpu->hm.s.enmShadowMode));
3658 if ((u64GuestEfer & MSR_K6_EFER_NXE) != (u64HostEfer & MSR_K6_EFER_NXE))
3659 {
3660 /* Verify that the host is NX capable. */
3661 Assert(g_CpumHostFeatures.s.fNoExecute);
3662 return true;
3663 }
3664 }
3665
3666 return false;
3667#endif
3668}
3669
3670
3671/**
3672 * Exports the guest's RSP into the guest-state area in the VMCS.
3673 *
3674 * @param pVCpu The cross context virtual CPU structure.
3675 *
3676 * @remarks No-long-jump zone!!!
3677 */
3678static void hmR0VmxExportGuestRsp(PVMCPUCC pVCpu)
3679{
3680 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_RSP)
3681 {
3682 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_RSP);
3683
3684 int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_RSP, pVCpu->cpum.GstCtx.rsp);
3685 AssertRC(rc);
3686
3687 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_RSP);
3688 Log4Func(("rsp=%#RX64\n", pVCpu->cpum.GstCtx.rsp));
3689 }
3690}
3691
3692
3693/**
3694 * Exports the guest hardware-virtualization state.
3695 *
3696 * @returns VBox status code.
3697 * @param pVCpu The cross context virtual CPU structure.
3698 * @param pVmxTransient The VMX-transient structure.
3699 *
3700 * @remarks No-long-jump zone!!!
3701 */
3702static int hmR0VmxExportGuestHwvirtState(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient)
3703{
3704 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_HWVIRT)
3705 {
3706#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
3707 /*
3708 * Check if the VMX feature is exposed to the guest and if the host CPU supports
3709 * VMCS shadowing.
3710 */
3711 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fUseVmcsShadowing)
3712 {
3713 /*
3714 * If the nested hypervisor has loaded a current VMCS and is in VMX root mode,
3715 * copy the nested hypervisor's current VMCS into the shadow VMCS and enable
3716 * VMCS shadowing to skip intercepting some or all VMREAD/VMWRITE VM-exits.
3717 *
3718 * We check for VMX root mode here in case the guest executes VMXOFF without
3719 * clearing the current VMCS pointer and our VMXOFF instruction emulation does
3720 * not clear the current VMCS pointer.
3721 */
3722 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
3723 if ( CPUMIsGuestInVmxRootMode(&pVCpu->cpum.GstCtx)
3724 && !CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx)
3725 && CPUMIsGuestVmxCurrentVmcsValid(&pVCpu->cpum.GstCtx))
3726 {
3727 /* Paranoia. */
3728 Assert(!pVmxTransient->fIsNestedGuest);
3729
3730 /*
3731 * For performance reasons, also check if the nested hypervisor's current VMCS
3732 * was newly loaded or modified before copying it to the shadow VMCS.
3733 */
3734 if (!pVCpu->hm.s.vmx.fCopiedNstGstToShadowVmcs)
3735 {
3736 int rc = vmxHCCopyNstGstToShadowVmcs(pVCpu, pVmcsInfo);
3737 AssertRCReturn(rc, rc);
3738 pVCpu->hm.s.vmx.fCopiedNstGstToShadowVmcs = true;
3739 }
3740 vmxHCEnableVmcsShadowing(pVCpu, pVmcsInfo);
3741 }
3742 else
3743 vmxHCDisableVmcsShadowing(pVCpu, pVmcsInfo);
3744 }
3745#else
3746 NOREF(pVmxTransient);
3747#endif
3748 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_HWVIRT);
3749 }
3750 return VINF_SUCCESS;
3751}
3752
3753
3754/**
3755 * Exports the guest debug registers into the guest-state area in the VMCS.
3756 * The guest debug bits are partially shared with the host (e.g. DR6, DR0-3).
3757 *
3758 * This also sets up whether \#DB and MOV DRx accesses cause VM-exits.
3759 *
3760 * @returns VBox status code.
3761 * @param pVCpu The cross context virtual CPU structure.
3762 * @param pVmxTransient The VMX-transient structure.
3763 *
3764 * @remarks No-long-jump zone!!!
3765 */
3766static int hmR0VmxExportSharedDebugState(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
3767{
3768 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
3769
3770 /** @todo NSTVMX: Figure out what we want to do with nested-guest instruction
3771 * stepping. */
3772 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
3773 if (pVmxTransient->fIsNestedGuest)
3774 {
3775 int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_DR7, CPUMGetGuestDR7(pVCpu));
3776 AssertRC(rc);
3777
3778 /*
3779 * We don't want to always intercept MOV DRx for nested-guests as it causes
3780 * problems when the nested hypervisor isn't intercepting them, see @bugref{10080}.
3781 * Instead, they are strictly only requested when the nested hypervisor intercepts
3782 * them -- handled while merging VMCS controls.
3783 *
3784 * If neither the outer nor the nested-hypervisor is intercepting MOV DRx,
3785 * then the nested-guest debug state should be actively loaded on the host so that
3786 * nested-guest reads its own debug registers without causing VM-exits.
3787 */
3788 if ( !(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_MOV_DR_EXIT)
3789 && !CPUMIsGuestDebugStateActive(pVCpu))
3790 CPUMR0LoadGuestDebugState(pVCpu, true /* include DR6 */);
3791 return VINF_SUCCESS;
3792 }
3793
3794#ifdef VBOX_STRICT
3795 /* Validate. Intel spec. 26.3.1.1 "Checks on Guest Controls Registers, Debug Registers, MSRs" */
3796 if (pVmcsInfo->u32EntryCtls & VMX_ENTRY_CTLS_LOAD_DEBUG)
3797 {
3798 /* Validate. Intel spec. 17.2 "Debug Registers", recompiler paranoia checks. */
3799 Assert((pVCpu->cpum.GstCtx.dr[7] & (X86_DR7_MBZ_MASK | X86_DR7_RAZ_MASK)) == 0);
3800 Assert((pVCpu->cpum.GstCtx.dr[7] & X86_DR7_RA1_MASK) == X86_DR7_RA1_MASK);
3801 }
3802#endif
3803
3804 bool fSteppingDB = false;
3805 bool fInterceptMovDRx = false;
3806 uint32_t uProcCtls = pVmcsInfo->u32ProcCtls;
3807 if (pVCpu->hm.s.fSingleInstruction)
3808 {
3809 /* If the CPU supports the monitor trap flag, use it for single stepping in DBGF and avoid intercepting #DB. */
3810 if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_MONITOR_TRAP_FLAG)
3811 {
3812 uProcCtls |= VMX_PROC_CTLS_MONITOR_TRAP_FLAG;
3813 Assert(fSteppingDB == false);
3814 }
3815 else
3816 {
3817 pVCpu->cpum.GstCtx.eflags.u32 |= X86_EFL_TF;
3818 pVCpu->hm.s.fCtxChanged |= HM_CHANGED_GUEST_RFLAGS;
3819 pVCpu->hmr0.s.fClearTrapFlag = true;
3820 fSteppingDB = true;
3821 }
3822 }
3823
3824 uint64_t u64GuestDr7;
3825 if ( fSteppingDB
3826 || (CPUMGetHyperDR7(pVCpu) & X86_DR7_ENABLED_MASK))
3827 {
3828 /*
3829 * Use the combined guest and host DRx values found in the hypervisor register set
3830 * because the hypervisor debugger has breakpoints active or someone is single stepping
3831 * on the host side without a monitor trap flag.
3832 *
3833 * Note! DBGF expects a clean DR6 state before executing guest code.
3834 */
3835 if (!CPUMIsHyperDebugStateActive(pVCpu))
3836 {
3837 CPUMR0LoadHyperDebugState(pVCpu, true /* include DR6 */);
3838 Assert(CPUMIsHyperDebugStateActive(pVCpu));
3839 Assert(!CPUMIsGuestDebugStateActive(pVCpu));
3840 }
3841
3842 /* Update DR7 with the hypervisor value (other DRx registers are handled by CPUM one way or another). */
3843 u64GuestDr7 = CPUMGetHyperDR7(pVCpu);
3844 pVCpu->hmr0.s.fUsingHyperDR7 = true;
3845 fInterceptMovDRx = true;
3846 }
3847 else
3848 {
3849 /*
3850 * If the guest has enabled debug registers, we need to load them prior to
3851 * executing guest code so they'll trigger at the right time.
3852 */
3853 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_DR7);
3854 if (pVCpu->cpum.GstCtx.dr[7] & (X86_DR7_ENABLED_MASK | X86_DR7_GD))
3855 {
3856 if (!CPUMIsGuestDebugStateActive(pVCpu))
3857 {
3858 CPUMR0LoadGuestDebugState(pVCpu, true /* include DR6 */);
3859 Assert(CPUMIsGuestDebugStateActive(pVCpu));
3860 Assert(!CPUMIsHyperDebugStateActive(pVCpu));
3861 STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxArmed);
3862 }
3863 Assert(!fInterceptMovDRx);
3864 }
3865 else if (!CPUMIsGuestDebugStateActive(pVCpu))
3866 {
3867 /*
3868 * If no debugging enabled, we'll lazy load DR0-3. Unlike on AMD-V, we
3869 * must intercept #DB in order to maintain a correct DR6 guest value, and
3870 * because we need to intercept it to prevent nested #DBs from hanging the
3871 * CPU, we end up always having to intercept it. See hmR0VmxSetupVmcsXcptBitmap().
3872 */
3873 fInterceptMovDRx = true;
3874 }
3875
3876 /* Update DR7 with the actual guest value. */
3877 u64GuestDr7 = pVCpu->cpum.GstCtx.dr[7];
3878 pVCpu->hmr0.s.fUsingHyperDR7 = false;
3879 }
3880
3881 if (fInterceptMovDRx)
3882 uProcCtls |= VMX_PROC_CTLS_MOV_DR_EXIT;
3883 else
3884 uProcCtls &= ~VMX_PROC_CTLS_MOV_DR_EXIT;
3885
3886 /*
3887 * Update the processor-based VM-execution controls with the MOV-DRx intercepts and the
3888 * monitor-trap flag and update our cache.
3889 */
3890 if (uProcCtls != pVmcsInfo->u32ProcCtls)
3891 {
3892 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, uProcCtls);
3893 AssertRC(rc);
3894 pVmcsInfo->u32ProcCtls = uProcCtls;
3895 }
3896
3897 /*
3898 * Update guest DR7.
3899 */
3900 int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_DR7, u64GuestDr7);
3901 AssertRC(rc);
3902
3903 /*
3904 * If we have forced EFLAGS.TF to be set because we're single-stepping in the hypervisor debugger,
3905 * we need to clear interrupt inhibition if any as otherwise it causes a VM-entry failure.
3906 *
3907 * See Intel spec. 26.3.1.5 "Checks on Guest Non-Register State".
3908 */
3909 if (fSteppingDB)
3910 {
3911 Assert(pVCpu->hm.s.fSingleInstruction);
3912 Assert(pVCpu->cpum.GstCtx.eflags.Bits.u1TF);
3913
3914 uint32_t fIntrState = 0;
3915 rc = VMXReadVmcs32(VMX_VMCS32_GUEST_INT_STATE, &fIntrState);
3916 AssertRC(rc);
3917
3918 if (fIntrState & (VMX_VMCS_GUEST_INT_STATE_BLOCK_STI | VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS))
3919 {
3920 fIntrState &= ~(VMX_VMCS_GUEST_INT_STATE_BLOCK_STI | VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS);
3921 rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_INT_STATE, fIntrState);
3922 AssertRC(rc);
3923 }
3924 }
3925
3926 return VINF_SUCCESS;
3927}
3928
3929
3930/**
3931 * Exports certain guest MSRs into the VM-entry MSR-load and VM-exit MSR-store
3932 * areas.
3933 *
3934 * These MSRs will automatically be loaded to the host CPU on every successful
3935 * VM-entry and stored from the host CPU on every successful VM-exit.
3936 *
3937 * We creates/updates MSR slots for the host MSRs in the VM-exit MSR-load area. The
3938 * actual host MSR values are not- updated here for performance reasons. See
3939 * hmR0VmxExportHostMsrs().
3940 *
3941 * We also exports the guest sysenter MSRs into the guest-state area in the VMCS.
3942 *
3943 * @returns VBox status code.
3944 * @param pVCpu The cross context virtual CPU structure.
3945 * @param pVmxTransient The VMX-transient structure.
3946 *
3947 * @remarks No-long-jump zone!!!
3948 */
3949static int hmR0VmxExportGuestMsrs(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient)
3950{
3951 AssertPtr(pVCpu);
3952 AssertPtr(pVmxTransient);
3953
3954 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
3955 PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
3956
3957 /*
3958 * MSRs that we use the auto-load/store MSR area in the VMCS.
3959 * For 64-bit hosts, we load/restore them lazily, see hmR0VmxLazyLoadGuestMsrs(),
3960 * nothing to do here. The host MSR values are updated when it's safe in
3961 * hmR0VmxLazySaveHostMsrs().
3962 *
3963 * For nested-guests, the guests MSRs from the VM-entry MSR-load area are already
3964 * loaded (into the guest-CPU context) by the VMLAUNCH/VMRESUME instruction
3965 * emulation. The merged MSR permission bitmap will ensure that we get VM-exits
3966 * for any MSR that are not part of the lazy MSRs so we do not need to place
3967 * those MSRs into the auto-load/store MSR area. Nothing to do here.
3968 */
3969 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_VMX_GUEST_AUTO_MSRS)
3970 {
3971 /* No auto-load/store MSRs currently. */
3972 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_VMX_GUEST_AUTO_MSRS);
3973 }
3974
3975 /*
3976 * Guest Sysenter MSRs.
3977 */
3978 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SYSENTER_MSR_MASK)
3979 {
3980 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_SYSENTER_MSRS);
3981
3982 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SYSENTER_CS_MSR)
3983 {
3984 int rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_SYSENTER_CS, pCtx->SysEnter.cs);
3985 AssertRC(rc);
3986 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_SYSENTER_CS_MSR);
3987 }
3988
3989 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SYSENTER_EIP_MSR)
3990 {
3991 int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_SYSENTER_EIP, pCtx->SysEnter.eip);
3992 AssertRC(rc);
3993 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_SYSENTER_EIP_MSR);
3994 }
3995
3996 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SYSENTER_ESP_MSR)
3997 {
3998 int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_SYSENTER_ESP, pCtx->SysEnter.esp);
3999 AssertRC(rc);
4000 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_SYSENTER_ESP_MSR);
4001 }
4002 }
4003
4004 /*
4005 * Guest/host EFER MSR.
4006 */
4007 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_EFER_MSR)
4008 {
4009 /* Whether we are using the VMCS to swap the EFER MSR must have been
4010 determined earlier while exporting VM-entry/VM-exit controls. */
4011 Assert(!(ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_VMX_ENTRY_EXIT_CTLS));
4012 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_EFER);
4013
4014 if (hmR0VmxShouldSwapEferMsr(pVCpu, pVmxTransient))
4015 {
4016 /*
4017 * EFER.LME is written by software, while EFER.LMA is set by the CPU to (CR0.PG & EFER.LME).
4018 * This means a guest can set EFER.LME=1 while CR0.PG=0 and EFER.LMA can remain 0.
4019 * VT-x requires that "IA-32e mode guest" VM-entry control must be identical to EFER.LMA
4020 * and to CR0.PG. Without unrestricted execution, CR0.PG (used for VT-x, not the shadow)
4021 * must always be 1. This forces us to effectively clear both EFER.LMA and EFER.LME until
4022 * the guest has also set CR0.PG=1. Otherwise, we would run into an invalid-guest state
4023 * during VM-entry.
4024 */
4025 uint64_t uGuestEferMsr = pCtx->msrEFER;
4026 if (!pVM->hmr0.s.vmx.fUnrestrictedGuest)
4027 {
4028 if (!(pCtx->msrEFER & MSR_K6_EFER_LMA))
4029 uGuestEferMsr &= ~MSR_K6_EFER_LME;
4030 else
4031 Assert((pCtx->msrEFER & (MSR_K6_EFER_LMA | MSR_K6_EFER_LME)) == (MSR_K6_EFER_LMA | MSR_K6_EFER_LME));
4032 }
4033
4034 /*
4035 * If the CPU supports VMCS controls for swapping EFER, use it. Otherwise, we have no option
4036 * but to use the auto-load store MSR area in the VMCS for swapping EFER. See @bugref{7368}.
4037 */
4038 if (g_fHmVmxSupportsVmcsEfer)
4039 {
4040 int rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_EFER_FULL, uGuestEferMsr);
4041 AssertRC(rc);
4042 }
4043 else
4044 {
4045 /*
4046 * We shall use the auto-load/store MSR area only for loading the EFER MSR but we must
4047 * continue to intercept guest read and write accesses to it, see @bugref{7386#c16}.
4048 */
4049 int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_K6_EFER, uGuestEferMsr,
4050 false /* fSetReadWrite */, false /* fUpdateHostMsr */);
4051 AssertRCReturn(rc, rc);
4052 }
4053
4054 Log4Func(("efer=%#RX64 shadow=%#RX64\n", uGuestEferMsr, pCtx->msrEFER));
4055 }
4056 else if (!g_fHmVmxSupportsVmcsEfer)
4057 hmR0VmxRemoveAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_K6_EFER);
4058
4059 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_EFER_MSR);
4060 }
4061
4062 /*
4063 * Other MSRs.
4064 */
4065 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_OTHER_MSRS)
4066 {
4067 /* Speculation Control (R/W). */
4068 HMVMX_CPUMCTX_ASSERT(pVCpu, HM_CHANGED_GUEST_OTHER_MSRS);
4069 if (pVM->cpum.ro.GuestFeatures.fIbrs)
4070 {
4071 int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_IA32_SPEC_CTRL, CPUMGetGuestSpecCtrl(pVCpu),
4072 false /* fSetReadWrite */, false /* fUpdateHostMsr */);
4073 AssertRCReturn(rc, rc);
4074 }
4075
4076 /* Last Branch Record. */
4077 if (pVM->hmr0.s.vmx.fLbr)
4078 {
4079 PVMXVMCSINFOSHARED const pVmcsInfoShared = pVmxTransient->pVmcsInfo->pShared;
4080 uint32_t const idFromIpMsrStart = pVM->hmr0.s.vmx.idLbrFromIpMsrFirst;
4081 uint32_t const idToIpMsrStart = pVM->hmr0.s.vmx.idLbrToIpMsrFirst;
4082 uint32_t const cLbrStack = pVM->hmr0.s.vmx.idLbrFromIpMsrLast - pVM->hmr0.s.vmx.idLbrFromIpMsrFirst + 1;
4083 Assert(cLbrStack <= 32);
4084 for (uint32_t i = 0; i < cLbrStack; i++)
4085 {
4086 int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, idFromIpMsrStart + i,
4087 pVmcsInfoShared->au64LbrFromIpMsr[i],
4088 false /* fSetReadWrite */, false /* fUpdateHostMsr */);
4089 AssertRCReturn(rc, rc);
4090
4091 /* Some CPUs don't have a Branch-To-IP MSR (P4 and related Xeons). */
4092 if (idToIpMsrStart != 0)
4093 {
4094 rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, idToIpMsrStart + i,
4095 pVmcsInfoShared->au64LbrToIpMsr[i],
4096 false /* fSetReadWrite */, false /* fUpdateHostMsr */);
4097 AssertRCReturn(rc, rc);
4098 }
4099 }
4100
4101 /* Add LBR top-of-stack MSR (which contains the index to the most recent record). */
4102 int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, pVM->hmr0.s.vmx.idLbrTosMsr,
4103 pVmcsInfoShared->u64LbrTosMsr, false /* fSetReadWrite */,
4104 false /* fUpdateHostMsr */);
4105 AssertRCReturn(rc, rc);
4106 }
4107
4108 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_OTHER_MSRS);
4109 }
4110
4111 return VINF_SUCCESS;
4112}
4113
4114
4115/**
4116 * Wrapper for running the guest code in VT-x.
4117 *
4118 * @returns VBox status code, no informational status codes.
4119 * @param pVCpu The cross context virtual CPU structure.
4120 * @param pVmxTransient The VMX-transient structure.
4121 *
4122 * @remarks No-long-jump zone!!!
4123 */
4124DECLINLINE(int) hmR0VmxRunGuest(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient)
4125{
4126 /* Mark that HM is the keeper of all guest-CPU registers now that we're going to execute guest code. */
4127 pVCpu->cpum.GstCtx.fExtrn |= HMVMX_CPUMCTX_EXTRN_ALL | CPUMCTX_EXTRN_KEEPER_HM;
4128
4129 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
4130 bool const fResumeVM = RT_BOOL(pVmcsInfo->fVmcsState & VMX_V_VMCS_LAUNCH_STATE_LAUNCHED);
4131#ifdef VBOX_WITH_STATISTICS
4132 if (fResumeVM)
4133 STAM_COUNTER_INC(&pVCpu->hm.s.StatVmxVmResume);
4134 else
4135 STAM_COUNTER_INC(&pVCpu->hm.s.StatVmxVmLaunch);
4136#endif
4137 int rc = pVCpu->hmr0.s.vmx.pfnStartVm(pVmcsInfo, pVCpu, fResumeVM);
4138 AssertMsg(rc <= VINF_SUCCESS, ("%Rrc\n", rc));
4139 return rc;
4140}
4141
4142
4143/**
4144 * Reports world-switch error and dumps some useful debug info.
4145 *
4146 * @param pVCpu The cross context virtual CPU structure.
4147 * @param rcVMRun The return code from VMLAUNCH/VMRESUME.
4148 * @param pVmxTransient The VMX-transient structure (only
4149 * exitReason updated).
4150 */
4151static void hmR0VmxReportWorldSwitchError(PVMCPUCC pVCpu, int rcVMRun, PVMXTRANSIENT pVmxTransient)
4152{
4153 Assert(pVCpu);
4154 Assert(pVmxTransient);
4155 HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
4156
4157 Log4Func(("VM-entry failure: %Rrc\n", rcVMRun));
4158 switch (rcVMRun)
4159 {
4160 case VERR_VMX_INVALID_VMXON_PTR:
4161 AssertFailed();
4162 break;
4163 case VINF_SUCCESS: /* VMLAUNCH/VMRESUME succeeded but VM-entry failed... yeah, true story. */
4164 case VERR_VMX_UNABLE_TO_START_VM: /* VMLAUNCH/VMRESUME itself failed. */
4165 {
4166 int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_REASON, &pVCpu->hm.s.vmx.LastError.u32ExitReason);
4167 rc |= VMXReadVmcs32(VMX_VMCS32_RO_VM_INSTR_ERROR, &pVCpu->hm.s.vmx.LastError.u32InstrError);
4168 AssertRC(rc);
4169 vmxHCReadExitQualVmcs(pVCpu, pVmxTransient);
4170
4171 pVCpu->hm.s.vmx.LastError.idEnteredCpu = pVCpu->hmr0.s.idEnteredCpu;
4172 /* LastError.idCurrentCpu was already updated in hmR0VmxPreRunGuestCommitted().
4173 Cannot do it here as we may have been long preempted. */
4174
4175#ifdef VBOX_STRICT
4176 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
4177 Log4(("uExitReason %#RX32 (VmxTransient %#RX16)\n", pVCpu->hm.s.vmx.LastError.u32ExitReason,
4178 pVmxTransient->uExitReason));
4179 Log4(("Exit Qualification %#RX64\n", pVmxTransient->uExitQual));
4180 Log4(("InstrError %#RX32\n", pVCpu->hm.s.vmx.LastError.u32InstrError));
4181 if (pVCpu->hm.s.vmx.LastError.u32InstrError <= HMVMX_INSTR_ERROR_MAX)
4182 Log4(("InstrError Desc. \"%s\"\n", g_apszVmxInstrErrors[pVCpu->hm.s.vmx.LastError.u32InstrError]));
4183 else
4184 Log4(("InstrError Desc. Range exceeded %u\n", HMVMX_INSTR_ERROR_MAX));
4185 Log4(("Entered host CPU %u\n", pVCpu->hm.s.vmx.LastError.idEnteredCpu));
4186 Log4(("Current host CPU %u\n", pVCpu->hm.s.vmx.LastError.idCurrentCpu));
4187
4188 static struct
4189 {
4190 /** Name of the field to log. */
4191 const char *pszName;
4192 /** The VMCS field. */
4193 uint32_t uVmcsField;
4194 /** Whether host support of this field needs to be checked. */
4195 bool fCheckSupport;
4196 } const s_aVmcsFields[] =
4197 {
4198 { "VMX_VMCS32_CTRL_PIN_EXEC", VMX_VMCS32_CTRL_PIN_EXEC, false },
4199 { "VMX_VMCS32_CTRL_PROC_EXEC", VMX_VMCS32_CTRL_PROC_EXEC, false },
4200 { "VMX_VMCS32_CTRL_PROC_EXEC2", VMX_VMCS32_CTRL_PROC_EXEC2, true },
4201 { "VMX_VMCS32_CTRL_ENTRY", VMX_VMCS32_CTRL_ENTRY, false },
4202 { "VMX_VMCS32_CTRL_EXIT", VMX_VMCS32_CTRL_EXIT, false },
4203 { "VMX_VMCS32_CTRL_CR3_TARGET_COUNT", VMX_VMCS32_CTRL_CR3_TARGET_COUNT, false },
4204 { "VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO", VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, false },
4205 { "VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE", VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE, false },
4206 { "VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH", VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH, false },
4207 { "VMX_VMCS32_CTRL_TPR_THRESHOLD", VMX_VMCS32_CTRL_TPR_THRESHOLD, false },
4208 { "VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT", VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, false },
4209 { "VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT", VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT, false },
4210 { "VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT", VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, false },
4211 { "VMX_VMCS32_CTRL_EXCEPTION_BITMAP", VMX_VMCS32_CTRL_EXCEPTION_BITMAP, false },
4212 { "VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MASK", VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MASK, false },
4213 { "VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MATCH", VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MATCH, false },
4214 { "VMX_VMCS_CTRL_CR0_MASK", VMX_VMCS_CTRL_CR0_MASK, false },
4215 { "VMX_VMCS_CTRL_CR0_READ_SHADOW", VMX_VMCS_CTRL_CR0_READ_SHADOW, false },
4216 { "VMX_VMCS_CTRL_CR4_MASK", VMX_VMCS_CTRL_CR4_MASK, false },
4217 { "VMX_VMCS_CTRL_CR4_READ_SHADOW", VMX_VMCS_CTRL_CR4_READ_SHADOW, false },
4218 { "VMX_VMCS64_CTRL_EPTP_FULL", VMX_VMCS64_CTRL_EPTP_FULL, true },
4219 { "VMX_VMCS_GUEST_RIP", VMX_VMCS_GUEST_RIP, false },
4220 { "VMX_VMCS_GUEST_RSP", VMX_VMCS_GUEST_RSP, false },
4221 { "VMX_VMCS_GUEST_RFLAGS", VMX_VMCS_GUEST_RFLAGS, false },
4222 { "VMX_VMCS16_VPID", VMX_VMCS16_VPID, true, },
4223 { "VMX_VMCS_HOST_CR0", VMX_VMCS_HOST_CR0, false },
4224 { "VMX_VMCS_HOST_CR3", VMX_VMCS_HOST_CR3, false },
4225 { "VMX_VMCS_HOST_CR4", VMX_VMCS_HOST_CR4, false },
4226 /* The order of selector fields below are fixed! */
4227 { "VMX_VMCS16_HOST_ES_SEL", VMX_VMCS16_HOST_ES_SEL, false },
4228 { "VMX_VMCS16_HOST_CS_SEL", VMX_VMCS16_HOST_CS_SEL, false },
4229 { "VMX_VMCS16_HOST_SS_SEL", VMX_VMCS16_HOST_SS_SEL, false },
4230 { "VMX_VMCS16_HOST_DS_SEL", VMX_VMCS16_HOST_DS_SEL, false },
4231 { "VMX_VMCS16_HOST_FS_SEL", VMX_VMCS16_HOST_FS_SEL, false },
4232 { "VMX_VMCS16_HOST_GS_SEL", VMX_VMCS16_HOST_GS_SEL, false },
4233 { "VMX_VMCS16_HOST_TR_SEL", VMX_VMCS16_HOST_TR_SEL, false },
4234 /* End of ordered selector fields. */
4235 { "VMX_VMCS_HOST_TR_BASE", VMX_VMCS_HOST_TR_BASE, false },
4236 { "VMX_VMCS_HOST_GDTR_BASE", VMX_VMCS_HOST_GDTR_BASE, false },
4237 { "VMX_VMCS_HOST_IDTR_BASE", VMX_VMCS_HOST_IDTR_BASE, false },
4238 { "VMX_VMCS32_HOST_SYSENTER_CS", VMX_VMCS32_HOST_SYSENTER_CS, false },
4239 { "VMX_VMCS_HOST_SYSENTER_EIP", VMX_VMCS_HOST_SYSENTER_EIP, false },
4240 { "VMX_VMCS_HOST_SYSENTER_ESP", VMX_VMCS_HOST_SYSENTER_ESP, false },
4241 { "VMX_VMCS_HOST_RSP", VMX_VMCS_HOST_RSP, false },
4242 { "VMX_VMCS_HOST_RIP", VMX_VMCS_HOST_RIP, false }
4243 };
4244
4245 RTGDTR HostGdtr;
4246 ASMGetGDTR(&HostGdtr);
4247
4248 uint32_t const cVmcsFields = RT_ELEMENTS(s_aVmcsFields);
4249 for (uint32_t i = 0; i < cVmcsFields; i++)
4250 {
4251 uint32_t const uVmcsField = s_aVmcsFields[i].uVmcsField;
4252
4253 bool fSupported;
4254 if (!s_aVmcsFields[i].fCheckSupport)
4255 fSupported = true;
4256 else
4257 {
4258 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
4259 switch (uVmcsField)
4260 {
4261 case VMX_VMCS64_CTRL_EPTP_FULL: fSupported = pVM->hmr0.s.fNestedPaging; break;
4262 case VMX_VMCS16_VPID: fSupported = pVM->hmr0.s.vmx.fVpid; break;
4263 case VMX_VMCS32_CTRL_PROC_EXEC2:
4264 fSupported = RT_BOOL(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_SECONDARY_CTLS);
4265 break;
4266 default:
4267 AssertMsgFailedReturnVoid(("Failed to provide VMCS field support for %#RX32\n", uVmcsField));
4268 }
4269 }
4270
4271 if (fSupported)
4272 {
4273 uint8_t const uWidth = RT_BF_GET(uVmcsField, VMX_BF_VMCSFIELD_WIDTH);
4274 switch (uWidth)
4275 {
4276 case VMX_VMCSFIELD_WIDTH_16BIT:
4277 {
4278 uint16_t u16Val;
4279 rc = VMXReadVmcs16(uVmcsField, &u16Val);
4280 AssertRC(rc);
4281 Log4(("%-40s = %#RX16\n", s_aVmcsFields[i].pszName, u16Val));
4282
4283 if ( uVmcsField >= VMX_VMCS16_HOST_ES_SEL
4284 && uVmcsField <= VMX_VMCS16_HOST_TR_SEL)
4285 {
4286 if (u16Val < HostGdtr.cbGdt)
4287 {
4288 /* Order of selectors in s_apszSel is fixed and matches the order in s_aVmcsFields. */
4289 static const char * const s_apszSel[] = { "Host ES", "Host CS", "Host SS", "Host DS",
4290 "Host FS", "Host GS", "Host TR" };
4291 uint8_t const idxSel = RT_BF_GET(uVmcsField, VMX_BF_VMCSFIELD_INDEX);
4292 Assert(idxSel < RT_ELEMENTS(s_apszSel));
4293 PCX86DESCHC pDesc = (PCX86DESCHC)(HostGdtr.pGdt + (u16Val & X86_SEL_MASK));
4294 hmR0DumpDescriptor(pDesc, u16Val, s_apszSel[idxSel]);
4295 }
4296 else
4297 Log4((" Selector value exceeds GDT limit!\n"));
4298 }
4299 break;
4300 }
4301
4302 case VMX_VMCSFIELD_WIDTH_32BIT:
4303 {
4304 uint32_t u32Val;
4305 rc = VMXReadVmcs32(uVmcsField, &u32Val);
4306 AssertRC(rc);
4307 Log4(("%-40s = %#RX32\n", s_aVmcsFields[i].pszName, u32Val));
4308 break;
4309 }
4310
4311 case VMX_VMCSFIELD_WIDTH_64BIT:
4312 case VMX_VMCSFIELD_WIDTH_NATURAL:
4313 {
4314 uint64_t u64Val;
4315 rc = VMXReadVmcs64(uVmcsField, &u64Val);
4316 AssertRC(rc);
4317 Log4(("%-40s = %#RX64\n", s_aVmcsFields[i].pszName, u64Val));
4318 break;
4319 }
4320 }
4321 }
4322 }
4323
4324 Log4(("MSR_K6_EFER = %#RX64\n", ASMRdMsr(MSR_K6_EFER)));
4325 Log4(("MSR_K8_CSTAR = %#RX64\n", ASMRdMsr(MSR_K8_CSTAR)));
4326 Log4(("MSR_K8_LSTAR = %#RX64\n", ASMRdMsr(MSR_K8_LSTAR)));
4327 Log4(("MSR_K6_STAR = %#RX64\n", ASMRdMsr(MSR_K6_STAR)));
4328 Log4(("MSR_K8_SF_MASK = %#RX64\n", ASMRdMsr(MSR_K8_SF_MASK)));
4329 Log4(("MSR_K8_KERNEL_GS_BASE = %#RX64\n", ASMRdMsr(MSR_K8_KERNEL_GS_BASE)));
4330#endif /* VBOX_STRICT */
4331 break;
4332 }
4333
4334 default:
4335 /* Impossible */
4336 AssertMsgFailed(("hmR0VmxReportWorldSwitchError %Rrc (%#x)\n", rcVMRun, rcVMRun));
4337 break;
4338 }
4339}
4340
4341
4342/**
4343 * Sets up the usage of TSC-offsetting and updates the VMCS.
4344 *
4345 * If offsetting is not possible, cause VM-exits on RDTSC(P)s. Also sets up the
4346 * VMX-preemption timer.
4347 *
4348 * @returns VBox status code.
4349 * @param pVCpu The cross context virtual CPU structure.
4350 * @param pVmxTransient The VMX-transient structure.
4351 * @param idCurrentCpu The current CPU number.
4352 *
4353 * @remarks No-long-jump zone!!!
4354 */
4355static void hmR0VmxUpdateTscOffsettingAndPreemptTimer(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, RTCPUID idCurrentCpu)
4356{
4357 bool fOffsettedTsc;
4358 bool fParavirtTsc;
4359 uint64_t uTscOffset;
4360 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
4361 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
4362
4363 if (pVM->hmr0.s.vmx.fUsePreemptTimer)
4364 {
4365 /* The TMCpuTickGetDeadlineAndTscOffset function is expensive (calling it on
4366 every entry slowed down the bs2-test1 CPUID testcase by ~33% (on an 10980xe). */
4367 uint64_t cTicksToDeadline;
4368 if ( idCurrentCpu == pVCpu->hmr0.s.idLastCpu
4369 && TMVirtualSyncIsCurrentDeadlineVersion(pVM, pVCpu->hmr0.s.vmx.uTscDeadlineVersion))
4370 {
4371 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatVmxPreemptionReusingDeadline);
4372 fOffsettedTsc = TMCpuTickCanUseRealTSC(pVM, pVCpu, &uTscOffset, &fParavirtTsc);
4373 cTicksToDeadline = pVCpu->hmr0.s.vmx.uTscDeadline - SUPReadTsc();
4374 if ((int64_t)cTicksToDeadline > 0)
4375 { /* hopefully */ }
4376 else
4377 {
4378 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatVmxPreemptionReusingDeadlineExpired);
4379 cTicksToDeadline = 0;
4380 }
4381 }
4382 else
4383 {
4384 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatVmxPreemptionRecalcingDeadline);
4385 cTicksToDeadline = TMCpuTickGetDeadlineAndTscOffset(pVM, pVCpu, &uTscOffset, &fOffsettedTsc, &fParavirtTsc,
4386 &pVCpu->hmr0.s.vmx.uTscDeadline,
4387 &pVCpu->hmr0.s.vmx.uTscDeadlineVersion);
4388 pVCpu->hmr0.s.vmx.uTscDeadline += cTicksToDeadline;
4389 if (cTicksToDeadline >= 128)
4390 { /* hopefully */ }
4391 else
4392 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatVmxPreemptionRecalcingDeadlineExpired);
4393 }
4394
4395 /* Make sure the returned values have sane upper and lower boundaries. */
4396 uint64_t const u64CpuHz = SUPGetCpuHzFromGipBySetIndex(g_pSUPGlobalInfoPage, pVCpu->iHostCpuSet);
4397 cTicksToDeadline = RT_MIN(cTicksToDeadline, u64CpuHz / 64); /* 1/64th of a second, 15.625ms. */ /** @todo r=bird: Once real+virtual timers move to separate thread, we can raise the upper limit (16ms isn't much). ASSUMES working poke cpu function. */
4398 cTicksToDeadline = RT_MAX(cTicksToDeadline, u64CpuHz / 32678); /* 1/32768th of a second, ~30us. */
4399 cTicksToDeadline >>= pVM->hm.s.vmx.cPreemptTimerShift;
4400
4401 /** @todo r=ramshankar: We need to find a way to integrate nested-guest
4402 * preemption timers here. We probably need to clamp the preemption timer,
4403 * after converting the timer value to the host. */
4404 uint32_t const cPreemptionTickCount = (uint32_t)RT_MIN(cTicksToDeadline, UINT32_MAX - 16);
4405 int rc = VMXWriteVmcs32(VMX_VMCS32_PREEMPT_TIMER_VALUE, cPreemptionTickCount);
4406 AssertRC(rc);
4407 }
4408 else
4409 fOffsettedTsc = TMCpuTickCanUseRealTSC(pVM, pVCpu, &uTscOffset, &fParavirtTsc);
4410
4411 if (fParavirtTsc)
4412 {
4413 /* Currently neither Hyper-V nor KVM need to update their paravirt. TSC
4414 information before every VM-entry, hence disable it for performance sake. */
4415#if 0
4416 int rc = GIMR0UpdateParavirtTsc(pVM, 0 /* u64Offset */);
4417 AssertRC(rc);
4418#endif
4419 STAM_COUNTER_INC(&pVCpu->hm.s.StatTscParavirt);
4420 }
4421
4422 if ( fOffsettedTsc
4423 && RT_LIKELY(!pVCpu->hmr0.s.fDebugWantRdTscExit))
4424 {
4425 if (pVmxTransient->fIsNestedGuest)
4426 uTscOffset = CPUMApplyNestedGuestTscOffset(pVCpu, uTscOffset);
4427 hmR0VmxSetTscOffsetVmcs(pVmcsInfo, uTscOffset);
4428 hmR0VmxRemoveProcCtlsVmcs(pVCpu, pVmxTransient, VMX_PROC_CTLS_RDTSC_EXIT);
4429 }
4430 else
4431 {
4432 /* We can't use TSC-offsetting (non-fixed TSC, warp drive active etc.), VM-exit on RDTSC(P). */
4433 hmR0VmxSetProcCtlsVmcs(pVmxTransient, VMX_PROC_CTLS_RDTSC_EXIT);
4434 }
4435}
4436
4437
4438/**
4439 * Worker for VMXR0ImportStateOnDemand.
4440 *
4441 * @returns VBox status code.
4442 * @param pVCpu The cross context virtual CPU structure.
4443 * @param pVmcsInfo The VMCS info. object.
4444 * @param fWhat What to import, CPUMCTX_EXTRN_XXX.
4445 */
4446static int hmR0VmxImportGuestState(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, uint64_t fWhat)
4447{
4448 int rc = VINF_SUCCESS;
4449 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
4450 PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
4451 uint32_t u32Val;
4452
4453 /*
4454 * Note! This is hack to workaround a mysterious BSOD observed with release builds
4455 * on Windows 10 64-bit hosts. Profile and debug builds are not affected and
4456 * neither are other host platforms.
4457 *
4458 * Committing this temporarily as it prevents BSOD.
4459 *
4460 * Update: This is very likely a compiler optimization bug, see @bugref{9180}.
4461 */
4462#ifdef RT_OS_WINDOWS
4463 if (pVM == 0 || pVM == (void *)(uintptr_t)-1)
4464 return VERR_HM_IPE_1;
4465#endif
4466
4467 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatImportGuestState, x);
4468
4469 /*
4470 * We disable interrupts to make the updating of the state and in particular
4471 * the fExtrn modification atomic wrt to preemption hooks.
4472 */
4473 RTCCUINTREG const fEFlags = ASMIntDisableFlags();
4474
4475 fWhat &= pCtx->fExtrn;
4476 if (fWhat)
4477 {
4478 do
4479 {
4480 if (fWhat & CPUMCTX_EXTRN_RIP)
4481 vmxHCImportGuestRip(pVCpu);
4482
4483 if (fWhat & CPUMCTX_EXTRN_RFLAGS)
4484 vmxHCImportGuestRFlags(pVCpu, pVmcsInfo);
4485
4486 if (fWhat & (CPUMCTX_EXTRN_INHIBIT_INT | CPUMCTX_EXTRN_INHIBIT_NMI))
4487 vmxHCImportGuestIntrState(pVCpu, pVmcsInfo);
4488
4489 if (fWhat & CPUMCTX_EXTRN_RSP)
4490 {
4491 rc = VMXReadVmcsNw(VMX_VMCS_GUEST_RSP, &pCtx->rsp);
4492 AssertRC(rc);
4493 }
4494
4495 if (fWhat & CPUMCTX_EXTRN_SREG_MASK)
4496 {
4497 PVMXVMCSINFOSHARED pVmcsInfoShared = pVmcsInfo->pShared;
4498 bool const fRealOnV86Active = pVmcsInfoShared->RealMode.fRealOnV86Active;
4499 if (fWhat & CPUMCTX_EXTRN_CS)
4500 {
4501 vmxHCImportGuestSegReg(pVCpu, X86_SREG_CS);
4502 vmxHCImportGuestRip(pVCpu);
4503 if (fRealOnV86Active)
4504 pCtx->cs.Attr.u = pVmcsInfoShared->RealMode.AttrCS.u;
4505 EMHistoryUpdatePC(pVCpu, pCtx->cs.u64Base + pCtx->rip, true /* fFlattened */);
4506 }
4507 if (fWhat & CPUMCTX_EXTRN_SS)
4508 {
4509 vmxHCImportGuestSegReg(pVCpu, X86_SREG_SS);
4510 if (fRealOnV86Active)
4511 pCtx->ss.Attr.u = pVmcsInfoShared->RealMode.AttrSS.u;
4512 }
4513 if (fWhat & CPUMCTX_EXTRN_DS)
4514 {
4515 vmxHCImportGuestSegReg(pVCpu, X86_SREG_DS);
4516 if (fRealOnV86Active)
4517 pCtx->ds.Attr.u = pVmcsInfoShared->RealMode.AttrDS.u;
4518 }
4519 if (fWhat & CPUMCTX_EXTRN_ES)
4520 {
4521 vmxHCImportGuestSegReg(pVCpu, X86_SREG_ES);
4522 if (fRealOnV86Active)
4523 pCtx->es.Attr.u = pVmcsInfoShared->RealMode.AttrES.u;
4524 }
4525 if (fWhat & CPUMCTX_EXTRN_FS)
4526 {
4527 vmxHCImportGuestSegReg(pVCpu, X86_SREG_FS);
4528 if (fRealOnV86Active)
4529 pCtx->fs.Attr.u = pVmcsInfoShared->RealMode.AttrFS.u;
4530 }
4531 if (fWhat & CPUMCTX_EXTRN_GS)
4532 {
4533 vmxHCImportGuestSegReg(pVCpu, X86_SREG_GS);
4534 if (fRealOnV86Active)
4535 pCtx->gs.Attr.u = pVmcsInfoShared->RealMode.AttrGS.u;
4536 }
4537 }
4538
4539 if (fWhat & CPUMCTX_EXTRN_TABLE_MASK)
4540 {
4541 if (fWhat & CPUMCTX_EXTRN_LDTR)
4542 vmxHCImportGuestLdtr(pVCpu);
4543
4544 if (fWhat & CPUMCTX_EXTRN_GDTR)
4545 {
4546 rc = VMXReadVmcsNw(VMX_VMCS_GUEST_GDTR_BASE, &pCtx->gdtr.pGdt); AssertRC(rc);
4547 rc = VMXReadVmcs32(VMX_VMCS32_GUEST_GDTR_LIMIT, &u32Val); AssertRC(rc);
4548 pCtx->gdtr.cbGdt = u32Val;
4549 }
4550
4551 /* Guest IDTR. */
4552 if (fWhat & CPUMCTX_EXTRN_IDTR)
4553 {
4554 rc = VMXReadVmcsNw(VMX_VMCS_GUEST_IDTR_BASE, &pCtx->idtr.pIdt); AssertRC(rc);
4555 rc = VMXReadVmcs32(VMX_VMCS32_GUEST_IDTR_LIMIT, &u32Val); AssertRC(rc);
4556 pCtx->idtr.cbIdt = u32Val;
4557 }
4558
4559 /* Guest TR. */
4560 if (fWhat & CPUMCTX_EXTRN_TR)
4561 {
4562 /* Real-mode emulation using virtual-8086 mode has the fake TSS (pRealModeTSS) in TR,
4563 don't need to import that one. */
4564 if (!pVmcsInfo->pShared->RealMode.fRealOnV86Active)
4565 vmxHCImportGuestTr(pVCpu);
4566 }
4567 }
4568
4569 if (fWhat & CPUMCTX_EXTRN_DR7)
4570 {
4571 if (!pVCpu->hmr0.s.fUsingHyperDR7)
4572 {
4573 rc = VMXReadVmcsNw(VMX_VMCS_GUEST_DR7, &pCtx->dr[7]);
4574 AssertRC(rc);
4575 }
4576 }
4577
4578 if (fWhat & CPUMCTX_EXTRN_SYSENTER_MSRS)
4579 {
4580 rc = VMXReadVmcsNw(VMX_VMCS_GUEST_SYSENTER_EIP, &pCtx->SysEnter.eip); AssertRC(rc);
4581 rc = VMXReadVmcsNw(VMX_VMCS_GUEST_SYSENTER_ESP, &pCtx->SysEnter.esp); AssertRC(rc);
4582 rc = VMXReadVmcs32(VMX_VMCS32_GUEST_SYSENTER_CS, &u32Val); AssertRC(rc);
4583 pCtx->SysEnter.cs = u32Val;
4584 }
4585
4586 if (fWhat & CPUMCTX_EXTRN_KERNEL_GS_BASE)
4587 {
4588 if ( pVM->hmr0.s.fAllow64BitGuests
4589 && (pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST))
4590 pCtx->msrKERNELGSBASE = ASMRdMsr(MSR_K8_KERNEL_GS_BASE);
4591 }
4592
4593 if (fWhat & CPUMCTX_EXTRN_SYSCALL_MSRS)
4594 {
4595 if ( pVM->hmr0.s.fAllow64BitGuests
4596 && (pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST))
4597 {
4598 pCtx->msrLSTAR = ASMRdMsr(MSR_K8_LSTAR);
4599 pCtx->msrSTAR = ASMRdMsr(MSR_K6_STAR);
4600 pCtx->msrSFMASK = ASMRdMsr(MSR_K8_SF_MASK);
4601 }
4602 }
4603
4604 if (fWhat & (CPUMCTX_EXTRN_TSC_AUX | CPUMCTX_EXTRN_OTHER_MSRS))
4605 {
4606 PVMXVMCSINFOSHARED pVmcsInfoShared = pVmcsInfo->pShared;
4607 PCVMXAUTOMSR pMsrs = (PCVMXAUTOMSR)pVmcsInfo->pvGuestMsrStore;
4608 uint32_t const cMsrs = pVmcsInfo->cExitMsrStore;
4609 Assert(pMsrs);
4610 Assert(cMsrs <= VMX_MISC_MAX_MSRS(g_HmMsrs.u.vmx.u64Misc));
4611 Assert(sizeof(*pMsrs) * cMsrs <= X86_PAGE_4K_SIZE);
4612 for (uint32_t i = 0; i < cMsrs; i++)
4613 {
4614 uint32_t const idMsr = pMsrs[i].u32Msr;
4615 switch (idMsr)
4616 {
4617 case MSR_K8_TSC_AUX: CPUMSetGuestTscAux(pVCpu, pMsrs[i].u64Value); break;
4618 case MSR_IA32_SPEC_CTRL: CPUMSetGuestSpecCtrl(pVCpu, pMsrs[i].u64Value); break;
4619 case MSR_K6_EFER: /* Can't be changed without causing a VM-exit */ break;
4620 default:
4621 {
4622 uint32_t idxLbrMsr;
4623 if (pVM->hmr0.s.vmx.fLbr)
4624 {
4625 if (hmR0VmxIsLbrBranchFromMsr(pVM, idMsr, &idxLbrMsr))
4626 {
4627 Assert(idxLbrMsr < RT_ELEMENTS(pVmcsInfoShared->au64LbrFromIpMsr));
4628 pVmcsInfoShared->au64LbrFromIpMsr[idxLbrMsr] = pMsrs[i].u64Value;
4629 break;
4630 }
4631 if (hmR0VmxIsLbrBranchToMsr(pVM, idMsr, &idxLbrMsr))
4632 {
4633 Assert(idxLbrMsr < RT_ELEMENTS(pVmcsInfoShared->au64LbrFromIpMsr));
4634 pVmcsInfoShared->au64LbrToIpMsr[idxLbrMsr] = pMsrs[i].u64Value;
4635 break;
4636 }
4637 if (idMsr == pVM->hmr0.s.vmx.idLbrTosMsr)
4638 {
4639 pVmcsInfoShared->u64LbrTosMsr = pMsrs[i].u64Value;
4640 break;
4641 }
4642 /* Fallthru (no break) */
4643 }
4644 pCtx->fExtrn = 0;
4645 pVCpu->hm.s.u32HMError = pMsrs->u32Msr;
4646 ASMSetFlags(fEFlags);
4647 AssertMsgFailed(("Unexpected MSR in auto-load/store area. idMsr=%#RX32 cMsrs=%u\n", idMsr, cMsrs));
4648 return VERR_HM_UNEXPECTED_LD_ST_MSR;
4649 }
4650 }
4651 }
4652 }
4653
4654 if (fWhat & CPUMCTX_EXTRN_CR_MASK)
4655 {
4656 if (fWhat & CPUMCTX_EXTRN_CR0)
4657 {
4658 uint64_t u64Cr0;
4659 uint64_t u64Shadow;
4660 rc = VMXReadVmcsNw(VMX_VMCS_GUEST_CR0, &u64Cr0); AssertRC(rc);
4661 rc = VMXReadVmcsNw(VMX_VMCS_CTRL_CR0_READ_SHADOW, &u64Shadow); AssertRC(rc);
4662#ifndef VBOX_WITH_NESTED_HWVIRT_VMX
4663 u64Cr0 = (u64Cr0 & ~pVmcsInfo->u64Cr0Mask)
4664 | (u64Shadow & pVmcsInfo->u64Cr0Mask);
4665#else
4666 if (!CPUMIsGuestInVmxNonRootMode(pCtx))
4667 {
4668 u64Cr0 = (u64Cr0 & ~pVmcsInfo->u64Cr0Mask)
4669 | (u64Shadow & pVmcsInfo->u64Cr0Mask);
4670 }
4671 else
4672 {
4673 /*
4674 * We've merged the guest and nested-guest's CR0 guest/host mask while executing
4675 * the nested-guest using hardware-assisted VMX. Accordingly we need to
4676 * re-construct CR0. See @bugref{9180#c95} for details.
4677 */
4678 PCVMXVMCSINFO const pVmcsInfoGst = &pVCpu->hmr0.s.vmx.VmcsInfo;
4679 PVMXVVMCS const pVmcsNstGst = &pVCpu->cpum.GstCtx.hwvirt.vmx.Vmcs;
4680 u64Cr0 = (u64Cr0 & ~pVmcsInfo->u64Cr0Mask)
4681 | (pVmcsNstGst->u64GuestCr0.u & pVmcsNstGst->u64Cr0Mask.u)
4682 | (u64Shadow & (pVmcsInfoGst->u64Cr0Mask & ~pVmcsNstGst->u64Cr0Mask.u));
4683 }
4684#endif
4685 VMMRZCallRing3Disable(pVCpu); /* May call into PGM which has Log statements. */
4686 CPUMSetGuestCR0(pVCpu, u64Cr0);
4687 VMMRZCallRing3Enable(pVCpu);
4688 }
4689
4690 if (fWhat & CPUMCTX_EXTRN_CR4)
4691 {
4692 uint64_t u64Cr4;
4693 uint64_t u64Shadow;
4694 rc = VMXReadVmcsNw(VMX_VMCS_GUEST_CR4, &u64Cr4); AssertRC(rc);
4695 rc |= VMXReadVmcsNw(VMX_VMCS_CTRL_CR4_READ_SHADOW, &u64Shadow); AssertRC(rc);
4696#ifndef VBOX_WITH_NESTED_HWVIRT_VMX
4697 u64Cr4 = (u64Cr4 & ~pVmcsInfo->u64Cr4Mask)
4698 | (u64Shadow & pVmcsInfo->u64Cr4Mask);
4699#else
4700 if (!CPUMIsGuestInVmxNonRootMode(pCtx))
4701 {
4702 u64Cr4 = (u64Cr4 & ~pVmcsInfo->u64Cr4Mask)
4703 | (u64Shadow & pVmcsInfo->u64Cr4Mask);
4704 }
4705 else
4706 {
4707 /*
4708 * We've merged the guest and nested-guest's CR4 guest/host mask while executing
4709 * the nested-guest using hardware-assisted VMX. Accordingly we need to
4710 * re-construct CR4. See @bugref{9180#c95} for details.
4711 */
4712 PCVMXVMCSINFO const pVmcsInfoGst = &pVCpu->hmr0.s.vmx.VmcsInfo;
4713 PVMXVVMCS const pVmcsNstGst = &pVCpu->cpum.GstCtx.hwvirt.vmx.Vmcs;
4714 u64Cr4 = (u64Cr4 & ~pVmcsInfo->u64Cr4Mask)
4715 | (pVmcsNstGst->u64GuestCr4.u & pVmcsNstGst->u64Cr4Mask.u)
4716 | (u64Shadow & (pVmcsInfoGst->u64Cr4Mask & ~pVmcsNstGst->u64Cr4Mask.u));
4717 }
4718#endif
4719 pCtx->cr4 = u64Cr4;
4720 }
4721
4722 if (fWhat & CPUMCTX_EXTRN_CR3)
4723 {
4724 /* CR0.PG bit changes are always intercepted, so it's up to date. */
4725 if ( pVM->hmr0.s.vmx.fUnrestrictedGuest
4726 || ( pVM->hmr0.s.fNestedPaging
4727 && CPUMIsGuestPagingEnabledEx(pCtx)))
4728 {
4729 uint64_t u64Cr3;
4730 rc = VMXReadVmcsNw(VMX_VMCS_GUEST_CR3, &u64Cr3); AssertRC(rc);
4731 if (pCtx->cr3 != u64Cr3)
4732 {
4733 pCtx->cr3 = u64Cr3;
4734 VMCPU_FF_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3);
4735 }
4736
4737 /*
4738 * If the guest is in PAE mode, sync back the PDPE's into the guest state.
4739 * CR4.PAE, CR0.PG, EFER MSR changes are always intercepted, so they're up to date.
4740 */
4741 if (CPUMIsGuestInPAEModeEx(pCtx))
4742 {
4743 X86PDPE aPaePdpes[4];
4744 rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE0_FULL, &aPaePdpes[0].u); AssertRC(rc);
4745 rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE1_FULL, &aPaePdpes[1].u); AssertRC(rc);
4746 rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE2_FULL, &aPaePdpes[2].u); AssertRC(rc);
4747 rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE3_FULL, &aPaePdpes[3].u); AssertRC(rc);
4748 if (memcmp(&aPaePdpes[0], &pCtx->aPaePdpes[0], sizeof(aPaePdpes)))
4749 {
4750 memcpy(&pCtx->aPaePdpes[0], &aPaePdpes[0], sizeof(aPaePdpes));
4751 /* PGM now updates PAE PDPTEs while updating CR3. */
4752 VMCPU_FF_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3);
4753 }
4754 }
4755 }
4756 }
4757 }
4758
4759#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
4760 if (fWhat & CPUMCTX_EXTRN_HWVIRT)
4761 {
4762 if ( (pVmcsInfo->u32ProcCtls2 & VMX_PROC_CTLS2_VMCS_SHADOWING)
4763 && !CPUMIsGuestInVmxNonRootMode(pCtx))
4764 {
4765 Assert(CPUMIsGuestInVmxRootMode(pCtx));
4766 rc = vmxHCCopyShadowToNstGstVmcs(pVCpu, pVmcsInfo);
4767 if (RT_SUCCESS(rc))
4768 { /* likely */ }
4769 else
4770 break;
4771 }
4772 }
4773#endif
4774 } while (0);
4775
4776 if (RT_SUCCESS(rc))
4777 {
4778 /* Update fExtrn. */
4779 pCtx->fExtrn &= ~fWhat;
4780
4781 /* If everything has been imported, clear the HM keeper bit. */
4782 if (!(pCtx->fExtrn & HMVMX_CPUMCTX_EXTRN_ALL))
4783 {
4784 pCtx->fExtrn &= ~CPUMCTX_EXTRN_KEEPER_HM;
4785 Assert(!pCtx->fExtrn);
4786 }
4787 }
4788 }
4789 else
4790 AssertMsg(!pCtx->fExtrn || (pCtx->fExtrn & HMVMX_CPUMCTX_EXTRN_ALL), ("%#RX64\n", pCtx->fExtrn));
4791
4792 /*
4793 * Restore interrupts.
4794 */
4795 ASMSetFlags(fEFlags);
4796
4797 STAM_PROFILE_ADV_STOP(& pVCpu->hm.s.StatImportGuestState, x);
4798
4799 if (RT_SUCCESS(rc))
4800 { /* likely */ }
4801 else
4802 return rc;
4803
4804 /*
4805 * Honor any pending CR3 updates.
4806 *
4807 * Consider this scenario: VM-exit -> VMMRZCallRing3Enable() -> do stuff that causes a longjmp -> VMXR0CallRing3Callback()
4808 * -> VMMRZCallRing3Disable() -> hmR0VmxImportGuestState() -> Sets VMCPU_FF_HM_UPDATE_CR3 pending -> return from the longjmp
4809 * -> continue with VM-exit handling -> hmR0VmxImportGuestState() and here we are.
4810 *
4811 * The reason for such complicated handling is because VM-exits that call into PGM expect CR3 to be up-to-date and thus
4812 * if any CR3-saves -before- the VM-exit (longjmp) postponed the CR3 update via the force-flag, any VM-exit handler that
4813 * calls into PGM when it re-saves CR3 will end up here and we call PGMUpdateCR3(). This is why the code below should
4814 * -NOT- check if CPUMCTX_EXTRN_CR3 is set!
4815 *
4816 * The longjmp exit path can't check these CR3 force-flags and call code that takes a lock again. We cover for it here.
4817 *
4818 * The force-flag is checked first as it's cheaper for potential superfluous calls to this function.
4819 */
4820 if ( VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3)
4821 && VMMRZCallRing3IsEnabled(pVCpu))
4822 {
4823 Assert(!(ASMAtomicUoReadU64(&pCtx->fExtrn) & CPUMCTX_EXTRN_CR3));
4824 PGMUpdateCR3(pVCpu, CPUMGetGuestCR3(pVCpu));
4825 Assert(!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3));
4826 }
4827
4828 return VINF_SUCCESS;
4829}
4830
4831
4832/**
4833 * Saves the guest state from the VMCS into the guest-CPU context.
4834 *
4835 * @returns VBox status code.
4836 * @param pVCpu The cross context virtual CPU structure.
4837 * @param fWhat What to import, CPUMCTX_EXTRN_XXX.
4838 */
4839VMMR0DECL(int) VMXR0ImportStateOnDemand(PVMCPUCC pVCpu, uint64_t fWhat)
4840{
4841 AssertPtr(pVCpu);
4842 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
4843 return hmR0VmxImportGuestState(pVCpu, pVmcsInfo, fWhat);
4844}
4845
4846
4847/**
4848 * Gets VMX VM-exit auxiliary information.
4849 *
4850 * @returns VBox status code.
4851 * @param pVCpu The cross context virtual CPU structure.
4852 * @param pVmxExitAux Where to store the VM-exit auxiliary info.
4853 * @param fWhat What to fetch, HMVMX_READ_XXX.
4854 */
4855VMMR0DECL(int) VMXR0GetExitAuxInfo(PVMCPUCC pVCpu, PVMXEXITAUX pVmxExitAux, uint32_t fWhat)
4856{
4857 PVMXTRANSIENT pVmxTransient = pVCpu->hmr0.s.vmx.pVmxTransient;
4858 if (RT_LIKELY(pVmxTransient))
4859 {
4860 AssertCompile(sizeof(fWhat) == sizeof(pVmxTransient->fVmcsFieldsRead));
4861
4862 /* The exit reason is always available. */
4863 pVmxExitAux->uReason = pVmxTransient->uExitReason;
4864
4865 if (fWhat & HMVMX_READ_EXIT_QUALIFICATION)
4866 {
4867 vmxHCReadExitQualVmcs(pVCpu, pVmxTransient);
4868 pVmxExitAux->u64Qual = pVmxTransient->uExitQual;
4869#ifdef VBOX_STRICT
4870 fWhat &= ~HMVMX_READ_EXIT_QUALIFICATION;
4871#endif
4872 }
4873
4874 if (fWhat & HMVMX_READ_IDT_VECTORING_INFO)
4875 {
4876 vmxHCReadIdtVectoringInfoVmcs(pVCpu, pVmxTransient);
4877 pVmxExitAux->uIdtVectoringInfo = pVmxTransient->uIdtVectoringInfo;
4878#ifdef VBOX_STRICT
4879 fWhat &= ~HMVMX_READ_IDT_VECTORING_INFO;
4880#endif
4881 }
4882
4883 if (fWhat & HMVMX_READ_IDT_VECTORING_ERROR_CODE)
4884 {
4885 vmxHCReadIdtVectoringErrorCodeVmcs(pVCpu, pVmxTransient);
4886 pVmxExitAux->uIdtVectoringErrCode = pVmxTransient->uIdtVectoringErrorCode;
4887#ifdef VBOX_STRICT
4888 fWhat &= ~HMVMX_READ_IDT_VECTORING_ERROR_CODE;
4889#endif
4890 }
4891
4892 if (fWhat & HMVMX_READ_EXIT_INSTR_LEN)
4893 {
4894 vmxHCReadExitInstrLenVmcs(pVCpu, pVmxTransient);
4895 pVmxExitAux->cbInstr = pVmxTransient->cbExitInstr;
4896#ifdef VBOX_STRICT
4897 fWhat &= ~HMVMX_READ_EXIT_INSTR_LEN;
4898#endif
4899 }
4900
4901 if (fWhat & HMVMX_READ_EXIT_INTERRUPTION_INFO)
4902 {
4903 vmxHCReadExitIntInfoVmcs(pVCpu, pVmxTransient);
4904 pVmxExitAux->uExitIntInfo = pVmxTransient->uExitIntInfo;
4905#ifdef VBOX_STRICT
4906 fWhat &= ~HMVMX_READ_EXIT_INTERRUPTION_INFO;
4907#endif
4908 }
4909
4910 if (fWhat & HMVMX_READ_EXIT_INTERRUPTION_ERROR_CODE)
4911 {
4912 vmxHCReadExitIntErrorCodeVmcs(pVCpu, pVmxTransient);
4913 pVmxExitAux->uExitIntErrCode = pVmxTransient->uExitIntErrorCode;
4914#ifdef VBOX_STRICT
4915 fWhat &= ~HMVMX_READ_EXIT_INTERRUPTION_ERROR_CODE;
4916#endif
4917 }
4918
4919 if (fWhat & HMVMX_READ_EXIT_INSTR_INFO)
4920 {
4921 vmxHCReadExitInstrInfoVmcs(pVCpu, pVmxTransient);
4922 pVmxExitAux->InstrInfo.u = pVmxTransient->ExitInstrInfo.u;
4923#ifdef VBOX_STRICT
4924 fWhat &= ~HMVMX_READ_EXIT_INSTR_INFO;
4925#endif
4926 }
4927
4928 if (fWhat & HMVMX_READ_GUEST_LINEAR_ADDR)
4929 {
4930 vmxHCReadGuestLinearAddrVmcs(pVCpu, pVmxTransient);
4931 pVmxExitAux->u64GuestLinearAddr = pVmxTransient->uGuestLinearAddr;
4932#ifdef VBOX_STRICT
4933 fWhat &= ~HMVMX_READ_GUEST_LINEAR_ADDR;
4934#endif
4935 }
4936
4937 if (fWhat & HMVMX_READ_GUEST_PHYSICAL_ADDR)
4938 {
4939 vmxHCReadGuestPhysicalAddrVmcs(pVCpu, pVmxTransient);
4940 pVmxExitAux->u64GuestPhysAddr = pVmxTransient->uGuestPhysicalAddr;
4941#ifdef VBOX_STRICT
4942 fWhat &= ~HMVMX_READ_GUEST_PHYSICAL_ADDR;
4943#endif
4944 }
4945
4946 if (fWhat & HMVMX_READ_GUEST_PENDING_DBG_XCPTS)
4947 {
4948#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
4949 vmxHCReadGuestPendingDbgXctps(pVCpu, pVmxTransient);
4950 pVmxExitAux->u64GuestPendingDbgXcpts = pVmxTransient->uGuestPendingDbgXcpts;
4951#else
4952 pVmxExitAux->u64GuestPendingDbgXcpts = 0;
4953#endif
4954#ifdef VBOX_STRICT
4955 fWhat &= ~HMVMX_READ_GUEST_PENDING_DBG_XCPTS;
4956#endif
4957 }
4958
4959 AssertMsg(!fWhat, ("fWhat=%#RX32 fVmcsFieldsRead=%#RX32\n", fWhat, pVmxTransient->fVmcsFieldsRead));
4960 return VINF_SUCCESS;
4961 }
4962 return VERR_NOT_AVAILABLE;
4963}
4964
4965
4966/**
4967 * Does the necessary state syncing before returning to ring-3 for any reason
4968 * (longjmp, preemption, voluntary exits to ring-3) from VT-x.
4969 *
4970 * @returns VBox status code.
4971 * @param pVCpu The cross context virtual CPU structure.
4972 * @param fImportState Whether to import the guest state from the VMCS back
4973 * to the guest-CPU context.
4974 *
4975 * @remarks No-long-jmp zone!!!
4976 */
4977static int hmR0VmxLeave(PVMCPUCC pVCpu, bool fImportState)
4978{
4979 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
4980 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
4981
4982 RTCPUID const idCpu = RTMpCpuId();
4983 Log4Func(("HostCpuId=%u\n", idCpu));
4984
4985 /*
4986 * !!! IMPORTANT !!!
4987 * If you modify code here, check whether VMXR0CallRing3Callback() needs to be updated too.
4988 */
4989
4990 /* Save the guest state if necessary. */
4991 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
4992 if (fImportState)
4993 {
4994 int rc = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, HMVMX_CPUMCTX_EXTRN_ALL);
4995 AssertRCReturn(rc, rc);
4996 }
4997
4998 /* Restore host FPU state if necessary. We will resync on next R0 reentry. */
4999 CPUMR0FpuStateMaybeSaveGuestAndRestoreHost(pVCpu);
5000 Assert(!CPUMIsGuestFPUStateActive(pVCpu));
5001
5002 /* Restore host debug registers if necessary. We will resync on next R0 reentry. */
5003#ifdef VBOX_STRICT
5004 if (CPUMIsHyperDebugStateActive(pVCpu))
5005 Assert(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_MOV_DR_EXIT);
5006#endif
5007 CPUMR0DebugStateMaybeSaveGuestAndRestoreHost(pVCpu, true /* save DR6 */);
5008 Assert(!CPUMIsGuestDebugStateActive(pVCpu));
5009 Assert(!CPUMIsHyperDebugStateActive(pVCpu));
5010
5011 /* Restore host-state bits that VT-x only restores partially. */
5012 if (pVCpu->hmr0.s.vmx.fRestoreHostFlags > VMX_RESTORE_HOST_REQUIRED)
5013 {
5014 Log4Func(("Restoring Host State: fRestoreHostFlags=%#RX32 HostCpuId=%u\n", pVCpu->hmr0.s.vmx.fRestoreHostFlags, idCpu));
5015 VMXRestoreHostState(pVCpu->hmr0.s.vmx.fRestoreHostFlags, &pVCpu->hmr0.s.vmx.RestoreHost);
5016 }
5017 pVCpu->hmr0.s.vmx.fRestoreHostFlags = 0;
5018
5019 /* Restore the lazy host MSRs as we're leaving VT-x context. */
5020 if (pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)
5021 {
5022 /* We shouldn't restore the host MSRs without saving the guest MSRs first. */
5023 if (!fImportState)
5024 {
5025 int rc = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, CPUMCTX_EXTRN_KERNEL_GS_BASE | CPUMCTX_EXTRN_SYSCALL_MSRS);
5026 AssertRCReturn(rc, rc);
5027 }
5028 hmR0VmxLazyRestoreHostMsrs(pVCpu);
5029 Assert(!pVCpu->hmr0.s.vmx.fLazyMsrs);
5030 }
5031 else
5032 pVCpu->hmr0.s.vmx.fLazyMsrs = 0;
5033
5034 /* Update auto-load/store host MSRs values when we re-enter VT-x (as we could be on a different CPU). */
5035 pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs = false;
5036
5037 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatEntry);
5038 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatImportGuestState);
5039 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExportGuestState);
5040 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatPreExit);
5041 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitHandling);
5042 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitIO);
5043 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitMovCRx);
5044 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitXcptNmi);
5045 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitVmentry);
5046 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchLongJmpToR3);
5047
5048 VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED_HM, VMCPUSTATE_STARTED_EXEC);
5049
5050 /** @todo This partially defeats the purpose of having preemption hooks.
5051 * The problem is, deregistering the hooks should be moved to a place that
5052 * lasts until the EMT is about to be destroyed not everytime while leaving HM
5053 * context.
5054 */
5055 int rc = hmR0VmxClearVmcs(pVmcsInfo);
5056 AssertRCReturn(rc, rc);
5057
5058#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
5059 /*
5060 * A valid shadow VMCS is made active as part of VM-entry. It is necessary to
5061 * clear a shadow VMCS before allowing that VMCS to become active on another
5062 * logical processor. We may or may not be importing guest state which clears
5063 * it, so cover for it here.
5064 *
5065 * See Intel spec. 24.11.1 "Software Use of Virtual-Machine Control Structures".
5066 */
5067 if ( pVmcsInfo->pvShadowVmcs
5068 && pVmcsInfo->fShadowVmcsState != VMX_V_VMCS_LAUNCH_STATE_CLEAR)
5069 {
5070 rc = vmxHCClearShadowVmcs(pVmcsInfo);
5071 AssertRCReturn(rc, rc);
5072 }
5073
5074 /*
5075 * Flag that we need to re-export the host state if we switch to this VMCS before
5076 * executing guest or nested-guest code.
5077 */
5078 pVmcsInfo->idHostCpuState = NIL_RTCPUID;
5079#endif
5080
5081 Log4Func(("Cleared Vmcs. HostCpuId=%u\n", idCpu));
5082 NOREF(idCpu);
5083 return VINF_SUCCESS;
5084}
5085
5086
5087/**
5088 * Leaves the VT-x session.
5089 *
5090 * @returns VBox status code.
5091 * @param pVCpu The cross context virtual CPU structure.
5092 *
5093 * @remarks No-long-jmp zone!!!
5094 */
5095static int hmR0VmxLeaveSession(PVMCPUCC pVCpu)
5096{
5097 HM_DISABLE_PREEMPT(pVCpu);
5098 HMVMX_ASSERT_CPU_SAFE(pVCpu);
5099 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
5100 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
5101
5102 /* When thread-context hooks are used, we can avoid doing the leave again if we had been preempted before
5103 and done this from the VMXR0ThreadCtxCallback(). */
5104 if (!pVCpu->hmr0.s.fLeaveDone)
5105 {
5106 int rc2 = hmR0VmxLeave(pVCpu, true /* fImportState */);
5107 AssertRCReturnStmt(rc2, HM_RESTORE_PREEMPT(), rc2);
5108 pVCpu->hmr0.s.fLeaveDone = true;
5109 }
5110 Assert(!pVCpu->cpum.GstCtx.fExtrn);
5111
5112 /*
5113 * !!! IMPORTANT !!!
5114 * If you modify code here, make sure to check whether VMXR0CallRing3Callback() needs to be updated too.
5115 */
5116
5117 /* Deregister hook now that we've left HM context before re-enabling preemption. */
5118 /** @todo Deregistering here means we need to VMCLEAR always
5119 * (longjmp/exit-to-r3) in VT-x which is not efficient, eliminate need
5120 * for calling VMMR0ThreadCtxHookDisable here! */
5121 VMMR0ThreadCtxHookDisable(pVCpu);
5122
5123 /* Leave HM context. This takes care of local init (term) and deregistering the longjmp-to-ring-3 callback. */
5124 int rc = HMR0LeaveCpu(pVCpu);
5125 HM_RESTORE_PREEMPT();
5126 return rc;
5127}
5128
5129
5130/**
5131 * Take necessary actions before going back to ring-3.
5132 *
5133 * An action requires us to go back to ring-3. This function does the necessary
5134 * steps before we can safely return to ring-3. This is not the same as longjmps
5135 * to ring-3, this is voluntary and prepares the guest so it may continue
5136 * executing outside HM (recompiler/IEM).
5137 *
5138 * @returns VBox status code.
5139 * @param pVCpu The cross context virtual CPU structure.
5140 * @param rcExit The reason for exiting to ring-3. Can be
5141 * VINF_VMM_UNKNOWN_RING3_CALL.
5142 */
5143static int hmR0VmxExitToRing3(PVMCPUCC pVCpu, VBOXSTRICTRC rcExit)
5144{
5145 HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
5146
5147 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
5148 if (RT_UNLIKELY(rcExit == VERR_VMX_INVALID_VMCS_PTR))
5149 {
5150 VMXGetCurrentVmcs(&pVCpu->hm.s.vmx.LastError.HCPhysCurrentVmcs);
5151 pVCpu->hm.s.vmx.LastError.u32VmcsRev = *(uint32_t *)pVmcsInfo->pvVmcs;
5152 pVCpu->hm.s.vmx.LastError.idEnteredCpu = pVCpu->hmr0.s.idEnteredCpu;
5153 /* LastError.idCurrentCpu was updated in hmR0VmxPreRunGuestCommitted(). */
5154 }
5155
5156 /* Please, no longjumps here (any logging shouldn't flush jump back to ring-3). NO LOGGING BEFORE THIS POINT! */
5157 VMMRZCallRing3Disable(pVCpu);
5158 Log4Func(("rcExit=%d\n", VBOXSTRICTRC_VAL(rcExit)));
5159
5160 /*
5161 * Convert any pending HM events back to TRPM due to premature exits to ring-3.
5162 * We need to do this only on returns to ring-3 and not for longjmps to ring3.
5163 *
5164 * This is because execution may continue from ring-3 and we would need to inject
5165 * the event from there (hence place it back in TRPM).
5166 */
5167 if (pVCpu->hm.s.Event.fPending)
5168 {
5169 vmxHCPendingEventToTrpmTrap(pVCpu);
5170 Assert(!pVCpu->hm.s.Event.fPending);
5171
5172 /* Clear the events from the VMCS. */
5173 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, 0); AssertRC(rc);
5174 rc = VMXWriteVmcs32(VMX_VMCS_GUEST_PENDING_DEBUG_XCPTS, 0); AssertRC(rc);
5175 }
5176#ifdef VBOX_STRICT
5177 /*
5178 * We check for rcExit here since for errors like VERR_VMX_UNABLE_TO_START_VM (which are
5179 * fatal), we don't care about verifying duplicate injection of events. Errors like
5180 * VERR_EM_INTERPRET are converted to their VINF_* counterparts -prior- to calling this
5181 * function so those should and will be checked below.
5182 */
5183 else if (RT_SUCCESS(rcExit))
5184 {
5185 /*
5186 * Ensure we don't accidentally clear a pending HM event without clearing the VMCS.
5187 * This can be pretty hard to debug otherwise, interrupts might get injected twice
5188 * occasionally, see @bugref{9180#c42}.
5189 *
5190 * However, if the VM-entry failed, any VM entry-interruption info. field would
5191 * be left unmodified as the event would not have been injected to the guest. In
5192 * such cases, don't assert, we're not going to continue guest execution anyway.
5193 */
5194 uint32_t uExitReason;
5195 uint32_t uEntryIntInfo;
5196 int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_REASON, &uExitReason);
5197 rc |= VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, &uEntryIntInfo);
5198 AssertRC(rc);
5199 AssertMsg(VMX_EXIT_REASON_HAS_ENTRY_FAILED(uExitReason) || !VMX_ENTRY_INT_INFO_IS_VALID(uEntryIntInfo),
5200 ("uExitReason=%#RX32 uEntryIntInfo=%#RX32 rcExit=%d\n", uExitReason, uEntryIntInfo, VBOXSTRICTRC_VAL(rcExit)));
5201 }
5202#endif
5203
5204 /*
5205 * Clear the interrupt-window and NMI-window VMCS controls as we could have got
5206 * a VM-exit with higher priority than interrupt-window or NMI-window VM-exits
5207 * (e.g. TPR below threshold).
5208 */
5209 if (!CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
5210 {
5211 vmxHCClearIntWindowExitVmcs(pVCpu, pVmcsInfo);
5212 vmxHCClearNmiWindowExitVmcs(pVCpu, pVmcsInfo);
5213 }
5214
5215 /* If we're emulating an instruction, we shouldn't have any TRPM traps pending
5216 and if we're injecting an event we should have a TRPM trap pending. */
5217 AssertMsg(rcExit != VINF_EM_RAW_INJECT_TRPM_EVENT || TRPMHasTrap(pVCpu), ("%Rrc\n", VBOXSTRICTRC_VAL(rcExit)));
5218#ifndef DEBUG_bird /* Triggered after firing an NMI against NT4SP1, possibly a triple fault in progress. */
5219 AssertMsg(rcExit != VINF_EM_RAW_EMULATE_INSTR || !TRPMHasTrap(pVCpu), ("%Rrc\n", VBOXSTRICTRC_VAL(rcExit)));
5220#endif
5221
5222 /* Save guest state and restore host state bits. */
5223 int rc = hmR0VmxLeaveSession(pVCpu);
5224 AssertRCReturn(rc, rc);
5225 STAM_COUNTER_DEC(&pVCpu->hm.s.StatSwitchLongJmpToR3);
5226
5227 /* Thread-context hooks are unregistered at this point!!! */
5228 /* Ring-3 callback notifications are unregistered at this point!!! */
5229
5230 /* Sync recompiler state. */
5231 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TO_R3);
5232 CPUMSetChangedFlags(pVCpu, CPUM_CHANGED_SYSENTER_MSR
5233 | CPUM_CHANGED_LDTR
5234 | CPUM_CHANGED_GDTR
5235 | CPUM_CHANGED_IDTR
5236 | CPUM_CHANGED_TR
5237 | CPUM_CHANGED_HIDDEN_SEL_REGS);
5238 if ( pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging
5239 && CPUMIsGuestPagingEnabledEx(&pVCpu->cpum.GstCtx))
5240 CPUMSetChangedFlags(pVCpu, CPUM_CHANGED_GLOBAL_TLB_FLUSH);
5241
5242 Assert(!pVCpu->hmr0.s.fClearTrapFlag);
5243
5244 /* Update the exit-to-ring 3 reason. */
5245 pVCpu->hm.s.rcLastExitToR3 = VBOXSTRICTRC_VAL(rcExit);
5246
5247 /* On our way back from ring-3 reload the guest state if there is a possibility of it being changed. */
5248 if ( rcExit != VINF_EM_RAW_INTERRUPT
5249 || CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
5250 {
5251 Assert(!(pVCpu->cpum.GstCtx.fExtrn & HMVMX_CPUMCTX_EXTRN_ALL));
5252 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST);
5253 }
5254
5255 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchExitToR3);
5256 VMMRZCallRing3Enable(pVCpu);
5257 return rc;
5258}
5259
5260
5261/**
5262 * VMMRZCallRing3() callback wrapper which saves the guest state before we
5263 * longjump due to a ring-0 assertion.
5264 *
5265 * @returns VBox status code.
5266 * @param pVCpu The cross context virtual CPU structure.
5267 */
5268VMMR0DECL(int) VMXR0AssertionCallback(PVMCPUCC pVCpu)
5269{
5270 /*
5271 * !!! IMPORTANT !!!
5272 * If you modify code here, check whether hmR0VmxLeave() and hmR0VmxLeaveSession() needs to be updated too.
5273 * This is a stripped down version which gets out ASAP, trying to not trigger any further assertions.
5274 */
5275 VMMR0AssertionRemoveNotification(pVCpu);
5276 VMMRZCallRing3Disable(pVCpu);
5277 HM_DISABLE_PREEMPT(pVCpu);
5278
5279 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
5280 vmxHCImportGuestState(pVCpu, pVmcsInfo, HMVMX_CPUMCTX_EXTRN_ALL);
5281 CPUMR0FpuStateMaybeSaveGuestAndRestoreHost(pVCpu);
5282 CPUMR0DebugStateMaybeSaveGuestAndRestoreHost(pVCpu, true /* save DR6 */);
5283
5284 /* Restore host-state bits that VT-x only restores partially. */
5285 if (pVCpu->hmr0.s.vmx.fRestoreHostFlags > VMX_RESTORE_HOST_REQUIRED)
5286 VMXRestoreHostState(pVCpu->hmr0.s.vmx.fRestoreHostFlags, &pVCpu->hmr0.s.vmx.RestoreHost);
5287 pVCpu->hmr0.s.vmx.fRestoreHostFlags = 0;
5288
5289 /* Restore the lazy host MSRs as we're leaving VT-x context. */
5290 if (pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)
5291 hmR0VmxLazyRestoreHostMsrs(pVCpu);
5292
5293 /* Update auto-load/store host MSRs values when we re-enter VT-x (as we could be on a different CPU). */
5294 pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs = false;
5295 VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED_HM, VMCPUSTATE_STARTED_EXEC);
5296
5297 /* Clear the current VMCS data back to memory (shadow VMCS if any would have been
5298 cleared as part of importing the guest state above. */
5299 hmR0VmxClearVmcs(pVmcsInfo);
5300
5301 /** @todo eliminate the need for calling VMMR0ThreadCtxHookDisable here! */
5302 VMMR0ThreadCtxHookDisable(pVCpu);
5303
5304 /* Leave HM context. This takes care of local init (term). */
5305 HMR0LeaveCpu(pVCpu);
5306 HM_RESTORE_PREEMPT();
5307 return VINF_SUCCESS;
5308}
5309
5310
5311/**
5312 * Enters the VT-x session.
5313 *
5314 * @returns VBox status code.
5315 * @param pVCpu The cross context virtual CPU structure.
5316 */
5317VMMR0DECL(int) VMXR0Enter(PVMCPUCC pVCpu)
5318{
5319 AssertPtr(pVCpu);
5320 Assert(pVCpu->CTX_SUFF(pVM)->hm.s.vmx.fSupported);
5321 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
5322
5323 LogFlowFunc(("pVCpu=%p\n", pVCpu));
5324 Assert((pVCpu->hm.s.fCtxChanged & (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE))
5325 == (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE));
5326
5327#ifdef VBOX_STRICT
5328 /* At least verify VMX is enabled, since we can't check if we're in VMX root mode without #GP'ing. */
5329 RTCCUINTREG uHostCr4 = ASMGetCR4();
5330 if (!(uHostCr4 & X86_CR4_VMXE))
5331 {
5332 LogRelFunc(("X86_CR4_VMXE bit in CR4 is not set!\n"));
5333 return VERR_VMX_X86_CR4_VMXE_CLEARED;
5334 }
5335#endif
5336
5337 /*
5338 * Do the EMT scheduled L1D and MDS flush here if needed.
5339 */
5340 if (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_L1D_SCHED)
5341 ASMWrMsr(MSR_IA32_FLUSH_CMD, MSR_IA32_FLUSH_CMD_F_L1D);
5342 else if (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_MDS_SCHED)
5343 hmR0MdsClear();
5344
5345 /*
5346 * Load the appropriate VMCS as the current and active one.
5347 */
5348 PVMXVMCSINFO pVmcsInfo;
5349 bool const fInNestedGuestMode = CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx);
5350 if (!fInNestedGuestMode)
5351 pVmcsInfo = &pVCpu->hmr0.s.vmx.VmcsInfo;
5352 else
5353 pVmcsInfo = &pVCpu->hmr0.s.vmx.VmcsInfoNstGst;
5354 int rc = hmR0VmxLoadVmcs(pVmcsInfo);
5355 if (RT_SUCCESS(rc))
5356 {
5357 pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs = fInNestedGuestMode;
5358 pVCpu->hm.s.vmx.fSwitchedToNstGstVmcsCopyForRing3 = fInNestedGuestMode;
5359 pVCpu->hmr0.s.fLeaveDone = false;
5360 Log4Func(("Loaded Vmcs. HostCpuId=%u\n", RTMpCpuId()));
5361 }
5362 return rc;
5363}
5364
5365
5366/**
5367 * The thread-context callback.
5368 *
5369 * This is used together with RTThreadCtxHookCreate() on platforms which
5370 * supports it, and directly from VMMR0EmtPrepareForBlocking() and
5371 * VMMR0EmtResumeAfterBlocking() on platforms which don't.
5372 *
5373 * @param enmEvent The thread-context event.
5374 * @param pVCpu The cross context virtual CPU structure.
5375 * @param fGlobalInit Whether global VT-x/AMD-V init. was used.
5376 * @thread EMT(pVCpu)
5377 */
5378VMMR0DECL(void) VMXR0ThreadCtxCallback(RTTHREADCTXEVENT enmEvent, PVMCPUCC pVCpu, bool fGlobalInit)
5379{
5380 AssertPtr(pVCpu);
5381 RT_NOREF1(fGlobalInit);
5382
5383 switch (enmEvent)
5384 {
5385 case RTTHREADCTXEVENT_OUT:
5386 {
5387 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
5388 VMCPU_ASSERT_EMT(pVCpu);
5389
5390 /* No longjmps (logger flushes, locks) in this fragile context. */
5391 VMMRZCallRing3Disable(pVCpu);
5392 Log4Func(("Preempting: HostCpuId=%u\n", RTMpCpuId()));
5393
5394 /* Restore host-state (FPU, debug etc.) */
5395 if (!pVCpu->hmr0.s.fLeaveDone)
5396 {
5397 /*
5398 * Do -not- import the guest-state here as we might already be in the middle of importing
5399 * it, esp. bad if we're holding the PGM lock, see comment in hmR0VmxImportGuestState().
5400 */
5401 hmR0VmxLeave(pVCpu, false /* fImportState */);
5402 pVCpu->hmr0.s.fLeaveDone = true;
5403 }
5404
5405 /* Leave HM context, takes care of local init (term). */
5406 int rc = HMR0LeaveCpu(pVCpu);
5407 AssertRC(rc);
5408
5409 /* Restore longjmp state. */
5410 VMMRZCallRing3Enable(pVCpu);
5411 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatSwitchPreempt);
5412 break;
5413 }
5414
5415 case RTTHREADCTXEVENT_IN:
5416 {
5417 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
5418 VMCPU_ASSERT_EMT(pVCpu);
5419
5420 /* Do the EMT scheduled L1D and MDS flush here if needed. */
5421 if (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_L1D_SCHED)
5422 ASMWrMsr(MSR_IA32_FLUSH_CMD, MSR_IA32_FLUSH_CMD_F_L1D);
5423 else if (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_MDS_SCHED)
5424 hmR0MdsClear();
5425
5426 /* No longjmps here, as we don't want to trigger preemption (& its hook) while resuming. */
5427 VMMRZCallRing3Disable(pVCpu);
5428 Log4Func(("Resumed: HostCpuId=%u\n", RTMpCpuId()));
5429
5430 /* Initialize the bare minimum state required for HM. This takes care of
5431 initializing VT-x if necessary (onlined CPUs, local init etc.) */
5432 int rc = hmR0EnterCpu(pVCpu);
5433 AssertRC(rc);
5434 Assert( (pVCpu->hm.s.fCtxChanged & (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE))
5435 == (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE));
5436
5437 /* Load the active VMCS as the current one. */
5438 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
5439 rc = hmR0VmxLoadVmcs(pVmcsInfo);
5440 AssertRC(rc);
5441 Log4Func(("Resumed: Loaded Vmcs. HostCpuId=%u\n", RTMpCpuId()));
5442 pVCpu->hmr0.s.fLeaveDone = false;
5443
5444 /* Restore longjmp state. */
5445 VMMRZCallRing3Enable(pVCpu);
5446 break;
5447 }
5448
5449 default:
5450 break;
5451 }
5452}
5453
5454
5455/**
5456 * Exports the host state into the VMCS host-state area.
5457 * Sets up the VM-exit MSR-load area.
5458 *
5459 * The CPU state will be loaded from these fields on every successful VM-exit.
5460 *
5461 * @returns VBox status code.
5462 * @param pVCpu The cross context virtual CPU structure.
5463 *
5464 * @remarks No-long-jump zone!!!
5465 */
5466static int hmR0VmxExportHostState(PVMCPUCC pVCpu)
5467{
5468 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
5469
5470 int rc = VINF_SUCCESS;
5471 if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_HOST_CONTEXT)
5472 {
5473 uint64_t uHostCr4 = hmR0VmxExportHostControlRegs();
5474
5475 rc = hmR0VmxExportHostSegmentRegs(pVCpu, uHostCr4);
5476 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
5477
5478 hmR0VmxExportHostMsrs(pVCpu);
5479
5480 pVCpu->hm.s.fCtxChanged &= ~HM_CHANGED_HOST_CONTEXT;
5481 }
5482 return rc;
5483}
5484
5485
5486/**
5487 * Saves the host state in the VMCS host-state.
5488 *
5489 * @returns VBox status code.
5490 * @param pVCpu The cross context virtual CPU structure.
5491 *
5492 * @remarks No-long-jump zone!!!
5493 */
5494VMMR0DECL(int) VMXR0ExportHostState(PVMCPUCC pVCpu)
5495{
5496 AssertPtr(pVCpu);
5497 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
5498
5499 /*
5500 * Export the host state here while entering HM context.
5501 * When thread-context hooks are used, we might get preempted and have to re-save the host
5502 * state but most of the time we won't be, so do it here before we disable interrupts.
5503 */
5504 return hmR0VmxExportHostState(pVCpu);
5505}
5506
5507
5508/**
5509 * Exports the guest state into the VMCS guest-state area.
5510 *
5511 * The will typically be done before VM-entry when the guest-CPU state and the
5512 * VMCS state may potentially be out of sync.
5513 *
5514 * Sets up the VM-entry MSR-load and VM-exit MSR-store areas. Sets up the
5515 * VM-entry controls.
5516 * Sets up the appropriate VMX non-root function to execute guest code based on
5517 * the guest CPU mode.
5518 *
5519 * @returns VBox strict status code.
5520 * @retval VINF_EM_RESCHEDULE_REM if we try to emulate non-paged guest code
5521 * without unrestricted guest execution and the VMMDev is not presently
5522 * mapped (e.g. EFI32).
5523 *
5524 * @param pVCpu The cross context virtual CPU structure.
5525 * @param pVmxTransient The VMX-transient structure.
5526 *
5527 * @remarks No-long-jump zone!!!
5528 */
5529static VBOXSTRICTRC hmR0VmxExportGuestState(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
5530{
5531 AssertPtr(pVCpu);
5532 HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
5533 LogFlowFunc(("pVCpu=%p\n", pVCpu));
5534
5535 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatExportGuestState, x);
5536
5537 /*
5538 * Determine real-on-v86 mode.
5539 * Used when the guest is in real-mode and unrestricted guest execution is not used.
5540 */
5541 PVMXVMCSINFOSHARED pVmcsInfoShared = pVmxTransient->pVmcsInfo->pShared;
5542 if ( pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fUnrestrictedGuest
5543 || !CPUMIsGuestInRealModeEx(&pVCpu->cpum.GstCtx))
5544 pVmcsInfoShared->RealMode.fRealOnV86Active = false;
5545 else
5546 {
5547 Assert(!pVmxTransient->fIsNestedGuest);
5548 pVmcsInfoShared->RealMode.fRealOnV86Active = true;
5549 }
5550
5551 /*
5552 * Any ordering dependency among the sub-functions below must be explicitly stated using comments.
5553 * Ideally, assert that the cross-dependent bits are up-to-date at the point of using it.
5554 */
5555 int rc = vmxHCExportGuestEntryExitCtls(pVCpu, pVmxTransient);
5556 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
5557
5558 rc = vmxHCExportGuestCR0(pVCpu, pVmxTransient);
5559 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
5560
5561 VBOXSTRICTRC rcStrict = vmxHCExportGuestCR3AndCR4(pVCpu, pVmxTransient);
5562 if (rcStrict == VINF_SUCCESS)
5563 { /* likely */ }
5564 else
5565 {
5566 Assert(rcStrict == VINF_EM_RESCHEDULE_REM || RT_FAILURE_NP(rcStrict));
5567 return rcStrict;
5568 }
5569
5570 rc = vmxHCExportGuestSegRegsXdtr(pVCpu, pVmxTransient);
5571 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
5572
5573 rc = hmR0VmxExportGuestMsrs(pVCpu, pVmxTransient);
5574 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
5575
5576 vmxHCExportGuestApicTpr(pVCpu, pVmxTransient);
5577 vmxHCExportGuestXcptIntercepts(pVCpu, pVmxTransient);
5578 vmxHCExportGuestRip(pVCpu);
5579 hmR0VmxExportGuestRsp(pVCpu);
5580 vmxHCExportGuestRflags(pVCpu, pVmxTransient);
5581
5582 rc = hmR0VmxExportGuestHwvirtState(pVCpu, pVmxTransient);
5583 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
5584
5585 /* Clear any bits that may be set but exported unconditionally or unused/reserved bits. */
5586 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~( (HM_CHANGED_GUEST_GPRS_MASK & ~HM_CHANGED_GUEST_RSP)
5587 | HM_CHANGED_GUEST_CR2
5588 | (HM_CHANGED_GUEST_DR_MASK & ~HM_CHANGED_GUEST_DR7)
5589 | HM_CHANGED_GUEST_X87
5590 | HM_CHANGED_GUEST_SSE_AVX
5591 | HM_CHANGED_GUEST_OTHER_XSAVE
5592 | HM_CHANGED_GUEST_XCRx
5593 | HM_CHANGED_GUEST_KERNEL_GS_BASE /* Part of lazy or auto load-store MSRs. */
5594 | HM_CHANGED_GUEST_SYSCALL_MSRS /* Part of lazy or auto load-store MSRs. */
5595 | HM_CHANGED_GUEST_TSC_AUX
5596 | HM_CHANGED_GUEST_OTHER_MSRS
5597 | (HM_CHANGED_KEEPER_STATE_MASK & ~HM_CHANGED_VMX_MASK)));
5598
5599 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExportGuestState, x);
5600 return rc;
5601}
5602
5603
5604/**
5605 * Exports the state shared between the host and guest into the VMCS.
5606 *
5607 * @param pVCpu The cross context virtual CPU structure.
5608 * @param pVmxTransient The VMX-transient structure.
5609 *
5610 * @remarks No-long-jump zone!!!
5611 */
5612static void hmR0VmxExportSharedState(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
5613{
5614 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
5615 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
5616
5617 if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_DR_MASK)
5618 {
5619 int rc = hmR0VmxExportSharedDebugState(pVCpu, pVmxTransient);
5620 AssertRC(rc);
5621 pVCpu->hm.s.fCtxChanged &= ~HM_CHANGED_GUEST_DR_MASK;
5622
5623 /* Loading shared debug bits might have changed eflags.TF bit for debugging purposes. */
5624 if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_RFLAGS)
5625 vmxHCExportGuestRflags(pVCpu, pVmxTransient);
5626 }
5627
5628 if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_VMX_GUEST_LAZY_MSRS)
5629 {
5630 hmR0VmxLazyLoadGuestMsrs(pVCpu);
5631 pVCpu->hm.s.fCtxChanged &= ~HM_CHANGED_VMX_GUEST_LAZY_MSRS;
5632 }
5633
5634 AssertMsg(!(pVCpu->hm.s.fCtxChanged & HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE),
5635 ("fCtxChanged=%#RX64\n", pVCpu->hm.s.fCtxChanged));
5636}
5637
5638
5639/**
5640 * Worker for loading the guest-state bits in the inner VT-x execution loop.
5641 *
5642 * @returns Strict VBox status code (i.e. informational status codes too).
5643 * @retval VINF_EM_RESCHEDULE_REM if we try to emulate non-paged guest code
5644 * without unrestricted guest execution and the VMMDev is not presently
5645 * mapped (e.g. EFI32).
5646 *
5647 * @param pVCpu The cross context virtual CPU structure.
5648 * @param pVmxTransient The VMX-transient structure.
5649 *
5650 * @remarks No-long-jump zone!!!
5651 */
5652static VBOXSTRICTRC hmR0VmxExportGuestStateOptimal(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
5653{
5654 HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
5655 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
5656
5657#ifdef HMVMX_ALWAYS_SYNC_FULL_GUEST_STATE
5658 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST);
5659#endif
5660
5661 /*
5662 * For many VM-exits only RIP/RSP/RFLAGS (and HWVIRT state when executing a nested-guest)
5663 * changes. First try to export only these without going through all other changed-flag checks.
5664 */
5665 VBOXSTRICTRC rcStrict;
5666 uint64_t const fCtxMask = HM_CHANGED_ALL_GUEST & ~HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE;
5667 uint64_t const fMinimalMask = HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RSP | HM_CHANGED_GUEST_RFLAGS | HM_CHANGED_GUEST_HWVIRT;
5668 uint64_t const fCtxChanged = ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged);
5669
5670 /* If only RIP/RSP/RFLAGS/HWVIRT changed, export only those (quicker, happens more often).*/
5671 if ( (fCtxChanged & fMinimalMask)
5672 && !(fCtxChanged & (fCtxMask & ~fMinimalMask)))
5673 {
5674 vmxHCExportGuestRip(pVCpu);
5675 hmR0VmxExportGuestRsp(pVCpu);
5676 vmxHCExportGuestRflags(pVCpu, pVmxTransient);
5677 rcStrict = hmR0VmxExportGuestHwvirtState(pVCpu, pVmxTransient);
5678 STAM_COUNTER_INC(&pVCpu->hm.s.StatExportMinimal);
5679 }
5680 /* If anything else also changed, go through the full export routine and export as required. */
5681 else if (fCtxChanged & fCtxMask)
5682 {
5683 rcStrict = hmR0VmxExportGuestState(pVCpu, pVmxTransient);
5684 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
5685 { /* likely */}
5686 else
5687 {
5688 AssertMsg(rcStrict == VINF_EM_RESCHEDULE_REM, ("Failed to export guest state! rc=%Rrc\n",
5689 VBOXSTRICTRC_VAL(rcStrict)));
5690 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
5691 return rcStrict;
5692 }
5693 STAM_COUNTER_INC(&pVCpu->hm.s.StatExportFull);
5694 }
5695 /* Nothing changed, nothing to load here. */
5696 else
5697 rcStrict = VINF_SUCCESS;
5698
5699#ifdef VBOX_STRICT
5700 /* All the guest state bits should be loaded except maybe the host context and/or the shared host/guest bits. */
5701 uint64_t const fCtxChangedCur = ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged);
5702 AssertMsg(!(fCtxChangedCur & fCtxMask), ("fCtxChangedCur=%#RX64\n", fCtxChangedCur));
5703#endif
5704 return rcStrict;
5705}
5706
5707
5708/**
5709 * Map the APIC-access page for virtualizing APIC accesses.
5710 *
5711 * This can cause a longjumps to R3 due to the acquisition of the PGM lock. Hence,
5712 * this not done as part of exporting guest state, see @bugref{8721}.
5713 *
5714 * @returns VBox status code.
5715 * @param pVCpu The cross context virtual CPU structure.
5716 * @param GCPhysApicBase The guest-physical address of the APIC access page.
5717 */
5718static int hmR0VmxMapHCApicAccessPage(PVMCPUCC pVCpu, RTGCPHYS GCPhysApicBase)
5719{
5720 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
5721 Assert(GCPhysApicBase);
5722
5723 LogFunc(("Mapping HC APIC-access page at %#RGp\n", GCPhysApicBase));
5724
5725 /* Unalias the existing mapping. */
5726 int rc = PGMHandlerPhysicalReset(pVM, GCPhysApicBase);
5727 AssertRCReturn(rc, rc);
5728
5729 /* Map the HC APIC-access page in place of the MMIO page, also updates the shadow page tables if necessary. */
5730 Assert(pVM->hmr0.s.vmx.HCPhysApicAccess != NIL_RTHCPHYS);
5731 rc = IOMR0MmioMapMmioHCPage(pVM, pVCpu, GCPhysApicBase, pVM->hmr0.s.vmx.HCPhysApicAccess, X86_PTE_RW | X86_PTE_P);
5732 AssertRCReturn(rc, rc);
5733
5734 return VINF_SUCCESS;
5735}
5736
5737
5738/**
5739 * Worker function passed to RTMpOnSpecific() that is to be called on the target
5740 * CPU.
5741 *
5742 * @param idCpu The ID for the CPU the function is called on.
5743 * @param pvUser1 Null, not used.
5744 * @param pvUser2 Null, not used.
5745 */
5746static DECLCALLBACK(void) hmR0DispatchHostNmi(RTCPUID idCpu, void *pvUser1, void *pvUser2)
5747{
5748 RT_NOREF3(idCpu, pvUser1, pvUser2);
5749 VMXDispatchHostNmi();
5750}
5751
5752
5753/**
5754 * Dispatching an NMI on the host CPU that received it.
5755 *
5756 * @returns VBox status code.
5757 * @param pVCpu The cross context virtual CPU structure.
5758 * @param pVmcsInfo The VMCS info. object corresponding to the VMCS that was
5759 * executing when receiving the host NMI in VMX non-root
5760 * operation.
5761 */
5762static int hmR0VmxExitHostNmi(PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo)
5763{
5764 RTCPUID const idCpu = pVmcsInfo->idHostCpuExec;
5765 Assert(idCpu != NIL_RTCPUID);
5766
5767 /*
5768 * We don't want to delay dispatching the NMI any more than we have to. However,
5769 * we have already chosen -not- to dispatch NMIs when interrupts were still disabled
5770 * after executing guest or nested-guest code for the following reasons:
5771 *
5772 * - We would need to perform VMREADs with interrupts disabled and is orders of
5773 * magnitude worse when we run as a nested hypervisor without VMCS shadowing
5774 * supported by the host hypervisor.
5775 *
5776 * - It affects the common VM-exit scenario and keeps interrupts disabled for a
5777 * longer period of time just for handling an edge case like host NMIs which do
5778 * not occur nearly as frequently as other VM-exits.
5779 *
5780 * Let's cover the most likely scenario first. Check if we are on the target CPU
5781 * and dispatch the NMI right away. This should be much faster than calling into
5782 * RTMpOnSpecific() machinery.
5783 */
5784 bool fDispatched = false;
5785 RTCCUINTREG const fEFlags = ASMIntDisableFlags();
5786 if (idCpu == RTMpCpuId())
5787 {
5788 VMXDispatchHostNmi();
5789 fDispatched = true;
5790 }
5791 ASMSetFlags(fEFlags);
5792 if (fDispatched)
5793 {
5794 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatExitHostNmiInGC);
5795 return VINF_SUCCESS;
5796 }
5797
5798 /*
5799 * RTMpOnSpecific() waits until the worker function has run on the target CPU. So
5800 * there should be no race or recursion even if we are unlucky enough to be preempted
5801 * (to the target CPU) without dispatching the host NMI above.
5802 */
5803 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatExitHostNmiInGCIpi);
5804 return RTMpOnSpecific(idCpu, &hmR0DispatchHostNmi, NULL /* pvUser1 */, NULL /* pvUser2 */);
5805}
5806
5807
5808#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
5809/**
5810 * Merges the guest with the nested-guest MSR bitmap in preparation of executing the
5811 * nested-guest using hardware-assisted VMX.
5812 *
5813 * @param pVCpu The cross context virtual CPU structure.
5814 * @param pVmcsInfoNstGst The nested-guest VMCS info. object.
5815 * @param pVmcsInfoGst The guest VMCS info. object.
5816 */
5817static void hmR0VmxMergeMsrBitmapNested(PCVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfoNstGst, PCVMXVMCSINFO pVmcsInfoGst)
5818{
5819 uint32_t const cbMsrBitmap = X86_PAGE_4K_SIZE;
5820 uint64_t *pu64MsrBitmap = (uint64_t *)pVmcsInfoNstGst->pvMsrBitmap;
5821 Assert(pu64MsrBitmap);
5822
5823 /*
5824 * We merge the guest MSR bitmap with the nested-guest MSR bitmap such that any
5825 * MSR that is intercepted by the guest is also intercepted while executing the
5826 * nested-guest using hardware-assisted VMX.
5827 *
5828 * Note! If the nested-guest is not using an MSR bitmap, every MSR must cause a
5829 * nested-guest VM-exit even if the outer guest is not intercepting some
5830 * MSRs. We cannot assume the caller has initialized the nested-guest
5831 * MSR bitmap in this case.
5832 *
5833 * The nested hypervisor may also switch whether it uses MSR bitmaps for
5834 * each of its VM-entry, hence initializing it once per-VM while setting
5835 * up the nested-guest VMCS is not sufficient.
5836 */
5837 PCVMXVVMCS const pVmcsNstGst = &pVCpu->cpum.GstCtx.hwvirt.vmx.Vmcs;
5838 if (pVmcsNstGst->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
5839 {
5840 uint64_t const *pu64MsrBitmapNstGst = (uint64_t const *)&pVCpu->cpum.GstCtx.hwvirt.vmx.abMsrBitmap[0];
5841 uint64_t const *pu64MsrBitmapGst = (uint64_t const *)pVmcsInfoGst->pvMsrBitmap;
5842 Assert(pu64MsrBitmapNstGst);
5843 Assert(pu64MsrBitmapGst);
5844
5845 /** @todo Detect and use EVEX.POR? */
5846 uint32_t const cFrags = cbMsrBitmap / sizeof(uint64_t);
5847 for (uint32_t i = 0; i < cFrags; i++)
5848 pu64MsrBitmap[i] = pu64MsrBitmapNstGst[i] | pu64MsrBitmapGst[i];
5849 }
5850 else
5851 ASMMemFill32(pu64MsrBitmap, cbMsrBitmap, UINT32_C(0xffffffff));
5852}
5853
5854
5855/**
5856 * Merges the guest VMCS in to the nested-guest VMCS controls in preparation of
5857 * hardware-assisted VMX execution of the nested-guest.
5858 *
5859 * For a guest, we don't modify these controls once we set up the VMCS and hence
5860 * this function is never called.
5861 *
5862 * For nested-guests since the nested hypervisor provides these controls on every
5863 * nested-guest VM-entry and could potentially change them everytime we need to
5864 * merge them before every nested-guest VM-entry.
5865 *
5866 * @returns VBox status code.
5867 * @param pVCpu The cross context virtual CPU structure.
5868 */
5869static int hmR0VmxMergeVmcsNested(PVMCPUCC pVCpu)
5870{
5871 PVMCC const pVM = pVCpu->CTX_SUFF(pVM);
5872 PCVMXVMCSINFO const pVmcsInfoGst = &pVCpu->hmr0.s.vmx.VmcsInfo;
5873 PCVMXVVMCS const pVmcsNstGst = &pVCpu->cpum.GstCtx.hwvirt.vmx.Vmcs;
5874
5875 /*
5876 * Merge the controls with the requirements of the guest VMCS.
5877 *
5878 * We do not need to validate the nested-guest VMX features specified in the nested-guest
5879 * VMCS with the features supported by the physical CPU as it's already done by the
5880 * VMLAUNCH/VMRESUME instruction emulation.
5881 *
5882 * This is because the VMX features exposed by CPUM (through CPUID/MSRs) to the guest are
5883 * derived from the VMX features supported by the physical CPU.
5884 */
5885
5886 /* Pin-based VM-execution controls. */
5887 uint32_t const u32PinCtls = pVmcsNstGst->u32PinCtls | pVmcsInfoGst->u32PinCtls;
5888
5889 /* Processor-based VM-execution controls. */
5890 uint32_t u32ProcCtls = (pVmcsNstGst->u32ProcCtls & ~VMX_PROC_CTLS_USE_IO_BITMAPS)
5891 | (pVmcsInfoGst->u32ProcCtls & ~( VMX_PROC_CTLS_INT_WINDOW_EXIT
5892 | VMX_PROC_CTLS_NMI_WINDOW_EXIT
5893 | VMX_PROC_CTLS_MOV_DR_EXIT
5894 | VMX_PROC_CTLS_USE_TPR_SHADOW
5895 | VMX_PROC_CTLS_MONITOR_TRAP_FLAG));
5896
5897 /* Secondary processor-based VM-execution controls. */
5898 uint32_t const u32ProcCtls2 = (pVmcsNstGst->u32ProcCtls2 & ~VMX_PROC_CTLS2_VPID)
5899 | (pVmcsInfoGst->u32ProcCtls2 & ~( VMX_PROC_CTLS2_VIRT_APIC_ACCESS
5900 | VMX_PROC_CTLS2_INVPCID
5901 | VMX_PROC_CTLS2_VMCS_SHADOWING
5902 | VMX_PROC_CTLS2_RDTSCP
5903 | VMX_PROC_CTLS2_XSAVES_XRSTORS
5904 | VMX_PROC_CTLS2_APIC_REG_VIRT
5905 | VMX_PROC_CTLS2_VIRT_INT_DELIVERY
5906 | VMX_PROC_CTLS2_VMFUNC));
5907
5908 /*
5909 * VM-entry controls:
5910 * These controls contains state that depends on the nested-guest state (primarily
5911 * EFER MSR) and is thus not constant between VMLAUNCH/VMRESUME and the nested-guest
5912 * VM-exit. Although the nested hypervisor cannot change it, we need to in order to
5913 * properly continue executing the nested-guest if the EFER MSR changes but does not
5914 * cause a nested-guest VM-exits.
5915 *
5916 * VM-exit controls:
5917 * These controls specify the host state on return. We cannot use the controls from
5918 * the nested hypervisor state as is as it would contain the guest state rather than
5919 * the host state. Since the host state is subject to change (e.g. preemption, trips
5920 * to ring-3, longjmp and rescheduling to a different host CPU) they are not constant
5921 * through VMLAUNCH/VMRESUME and the nested-guest VM-exit.
5922 *
5923 * VM-entry MSR-load:
5924 * The guest MSRs from the VM-entry MSR-load area are already loaded into the guest-CPU
5925 * context by the VMLAUNCH/VMRESUME instruction emulation.
5926 *
5927 * VM-exit MSR-store:
5928 * The VM-exit emulation will take care of populating the MSRs from the guest-CPU context
5929 * back into the VM-exit MSR-store area.
5930 *
5931 * VM-exit MSR-load areas:
5932 * This must contain the real host MSRs with hardware-assisted VMX execution. Hence, we
5933 * can entirely ignore what the nested hypervisor wants to load here.
5934 */
5935
5936 /*
5937 * Exception bitmap.
5938 *
5939 * We could remove #UD from the guest bitmap and merge it with the nested-guest bitmap
5940 * here (and avoid doing anything while exporting nested-guest state), but to keep the
5941 * code more flexible if intercepting exceptions become more dynamic in the future we do
5942 * it as part of exporting the nested-guest state.
5943 */
5944 uint32_t const u32XcptBitmap = pVmcsNstGst->u32XcptBitmap | pVmcsInfoGst->u32XcptBitmap;
5945
5946 /*
5947 * CR0/CR4 guest/host mask.
5948 *
5949 * Modifications by the nested-guest to CR0/CR4 bits owned by the host and the guest must
5950 * cause VM-exits, so we need to merge them here.
5951 */
5952 uint64_t const u64Cr0Mask = pVmcsNstGst->u64Cr0Mask.u | pVmcsInfoGst->u64Cr0Mask;
5953 uint64_t const u64Cr4Mask = pVmcsNstGst->u64Cr4Mask.u | pVmcsInfoGst->u64Cr4Mask;
5954
5955 /*
5956 * Page-fault error-code mask and match.
5957 *
5958 * Although we require unrestricted guest execution (and thereby nested-paging) for
5959 * hardware-assisted VMX execution of nested-guests and thus the outer guest doesn't
5960 * normally intercept #PFs, it might intercept them for debugging purposes.
5961 *
5962 * If the outer guest is not intercepting #PFs, we can use the nested-guest #PF filters.
5963 * If the outer guest is intercepting #PFs, we must intercept all #PFs.
5964 */
5965 uint32_t u32XcptPFMask;
5966 uint32_t u32XcptPFMatch;
5967 if (!(pVmcsInfoGst->u32XcptBitmap & RT_BIT(X86_XCPT_PF)))
5968 {
5969 u32XcptPFMask = pVmcsNstGst->u32XcptPFMask;
5970 u32XcptPFMatch = pVmcsNstGst->u32XcptPFMatch;
5971 }
5972 else
5973 {
5974 u32XcptPFMask = 0;
5975 u32XcptPFMatch = 0;
5976 }
5977
5978 /*
5979 * Pause-Loop exiting.
5980 */
5981 /** @todo r=bird: given that both pVM->hm.s.vmx.cPleGapTicks and
5982 * pVM->hm.s.vmx.cPleWindowTicks defaults to zero, I cannot see how
5983 * this will work... */
5984 uint32_t const cPleGapTicks = RT_MIN(pVM->hm.s.vmx.cPleGapTicks, pVmcsNstGst->u32PleGap);
5985 uint32_t const cPleWindowTicks = RT_MIN(pVM->hm.s.vmx.cPleWindowTicks, pVmcsNstGst->u32PleWindow);
5986
5987 /*
5988 * Pending debug exceptions.
5989 * Currently just copy whatever the nested-guest provides us.
5990 */
5991 uint64_t const uPendingDbgXcpts = pVmcsNstGst->u64GuestPendingDbgXcpts.u;
5992
5993 /*
5994 * I/O Bitmap.
5995 *
5996 * We do not use the I/O bitmap that may be provided by the nested hypervisor as we always
5997 * intercept all I/O port accesses.
5998 */
5999 Assert(u32ProcCtls & VMX_PROC_CTLS_UNCOND_IO_EXIT);
6000 Assert(!(u32ProcCtls & VMX_PROC_CTLS_USE_IO_BITMAPS));
6001
6002 /*
6003 * VMCS shadowing.
6004 *
6005 * We do not yet expose VMCS shadowing to the guest and thus VMCS shadowing should not be
6006 * enabled while executing the nested-guest.
6007 */
6008 Assert(!(u32ProcCtls2 & VMX_PROC_CTLS2_VMCS_SHADOWING));
6009
6010 /*
6011 * APIC-access page.
6012 */
6013 RTHCPHYS HCPhysApicAccess;
6014 if (u32ProcCtls2 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
6015 {
6016 Assert(g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS);
6017 RTGCPHYS const GCPhysApicAccess = pVmcsNstGst->u64AddrApicAccess.u;
6018
6019 /* Disabling this since we adjust the shadow page table permissions in (SyncHandlerPte). */
6020#if 0
6021 /** @todo NSTVMX: This is not really correct but currently is required to make
6022 * things work. We need to re-enable the page handler when we fallback to
6023 * IEM execution of the nested-guest! */
6024 PGMHandlerPhysicalPageTempOff(pVM, GCPhysApicAccess, GCPhysApicAccess);
6025#endif
6026
6027 void *pvPage;
6028 PGMPAGEMAPLOCK PgLockApicAccess;
6029 int rc = PGMPhysGCPhys2CCPtr(pVM, GCPhysApicAccess, &pvPage, &PgLockApicAccess);
6030 if (RT_SUCCESS(rc))
6031 {
6032 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysApicAccess, &HCPhysApicAccess);
6033 AssertMsgRCReturn(rc, ("Failed to get host-physical address for APIC-access page at %#RGp\n", GCPhysApicAccess), rc);
6034
6035 /** @todo Handle proper releasing of page-mapping lock later. */
6036 PGMPhysReleasePageMappingLock(pVCpu->CTX_SUFF(pVM), &PgLockApicAccess);
6037 }
6038 else
6039 return rc;
6040 }
6041 else
6042 HCPhysApicAccess = 0;
6043
6044 /*
6045 * Virtual-APIC page and TPR threshold.
6046 */
6047 RTHCPHYS HCPhysVirtApic;
6048 uint32_t u32TprThreshold;
6049 if (u32ProcCtls & VMX_PROC_CTLS_USE_TPR_SHADOW)
6050 {
6051 Assert(g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_TPR_SHADOW);
6052 RTGCPHYS const GCPhysVirtApic = pVmcsNstGst->u64AddrVirtApic.u;
6053
6054 void *pvPage;
6055 PGMPAGEMAPLOCK PgLockVirtApic;
6056 int rc = PGMPhysGCPhys2CCPtr(pVM, GCPhysVirtApic, &pvPage, &PgLockVirtApic);
6057 if (RT_SUCCESS(rc))
6058 {
6059 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysVirtApic, &HCPhysVirtApic);
6060 AssertMsgRCReturn(rc, ("Failed to get host-physical address for virtual-APIC page at %#RGp\n", GCPhysVirtApic), rc);
6061
6062 /** @todo Handle proper releasing of page-mapping lock later. */
6063 PGMPhysReleasePageMappingLock(pVCpu->CTX_SUFF(pVM), &PgLockVirtApic);
6064 }
6065 else
6066 return rc;
6067
6068 u32TprThreshold = pVmcsNstGst->u32TprThreshold;
6069 }
6070 else
6071 {
6072 HCPhysVirtApic = 0;
6073 u32TprThreshold = 0;
6074
6075 /*
6076 * We must make sure CR8 reads/write must cause VM-exits when TPR shadowing is not
6077 * used by the nested hypervisor. Preventing MMIO accesses to the physical APIC will
6078 * be taken care of by EPT/shadow paging.
6079 */
6080 if (pVM->hmr0.s.fAllow64BitGuests)
6081 u32ProcCtls |= VMX_PROC_CTLS_CR8_STORE_EXIT
6082 | VMX_PROC_CTLS_CR8_LOAD_EXIT;
6083 }
6084
6085 /*
6086 * Validate basic assumptions.
6087 */
6088 PVMXVMCSINFO pVmcsInfoNstGst = &pVCpu->hmr0.s.vmx.VmcsInfoNstGst;
6089 Assert(pVM->hmr0.s.vmx.fUnrestrictedGuest);
6090 Assert(g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_SECONDARY_CTLS);
6091 Assert(hmGetVmxActiveVmcsInfo(pVCpu) == pVmcsInfoNstGst);
6092
6093 /*
6094 * Commit it to the nested-guest VMCS.
6095 */
6096 int rc = VINF_SUCCESS;
6097 if (pVmcsInfoNstGst->u32PinCtls != u32PinCtls)
6098 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PIN_EXEC, u32PinCtls);
6099 if (pVmcsInfoNstGst->u32ProcCtls != u32ProcCtls)
6100 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, u32ProcCtls);
6101 if (pVmcsInfoNstGst->u32ProcCtls2 != u32ProcCtls2)
6102 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC2, u32ProcCtls2);
6103 if (pVmcsInfoNstGst->u32XcptBitmap != u32XcptBitmap)
6104 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, u32XcptBitmap);
6105 if (pVmcsInfoNstGst->u64Cr0Mask != u64Cr0Mask)
6106 rc |= VMXWriteVmcsNw(VMX_VMCS_CTRL_CR0_MASK, u64Cr0Mask);
6107 if (pVmcsInfoNstGst->u64Cr4Mask != u64Cr4Mask)
6108 rc |= VMXWriteVmcsNw(VMX_VMCS_CTRL_CR4_MASK, u64Cr4Mask);
6109 if (pVmcsInfoNstGst->u32XcptPFMask != u32XcptPFMask)
6110 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MASK, u32XcptPFMask);
6111 if (pVmcsInfoNstGst->u32XcptPFMatch != u32XcptPFMatch)
6112 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MATCH, u32XcptPFMatch);
6113 if ( !(u32ProcCtls & VMX_PROC_CTLS_PAUSE_EXIT)
6114 && (u32ProcCtls2 & VMX_PROC_CTLS2_PAUSE_LOOP_EXIT))
6115 {
6116 Assert(g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_PAUSE_LOOP_EXIT);
6117 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PLE_GAP, cPleGapTicks);
6118 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PLE_WINDOW, cPleWindowTicks);
6119 }
6120 if (u32ProcCtls & VMX_PROC_CTLS_USE_TPR_SHADOW)
6121 {
6122 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_TPR_THRESHOLD, u32TprThreshold);
6123 rc |= VMXWriteVmcs64(VMX_VMCS64_CTRL_VIRT_APIC_PAGEADDR_FULL, HCPhysVirtApic);
6124 }
6125 if (u32ProcCtls2 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
6126 rc |= VMXWriteVmcs64(VMX_VMCS64_CTRL_APIC_ACCESSADDR_FULL, HCPhysApicAccess);
6127 rc |= VMXWriteVmcsNw(VMX_VMCS_GUEST_PENDING_DEBUG_XCPTS, uPendingDbgXcpts);
6128 AssertRC(rc);
6129
6130 /*
6131 * Update the nested-guest VMCS cache.
6132 */
6133 pVmcsInfoNstGst->u32PinCtls = u32PinCtls;
6134 pVmcsInfoNstGst->u32ProcCtls = u32ProcCtls;
6135 pVmcsInfoNstGst->u32ProcCtls2 = u32ProcCtls2;
6136 pVmcsInfoNstGst->u32XcptBitmap = u32XcptBitmap;
6137 pVmcsInfoNstGst->u64Cr0Mask = u64Cr0Mask;
6138 pVmcsInfoNstGst->u64Cr4Mask = u64Cr4Mask;
6139 pVmcsInfoNstGst->u32XcptPFMask = u32XcptPFMask;
6140 pVmcsInfoNstGst->u32XcptPFMatch = u32XcptPFMatch;
6141 pVmcsInfoNstGst->HCPhysVirtApic = HCPhysVirtApic;
6142
6143 /*
6144 * We need to flush the TLB if we are switching the APIC-access page address.
6145 * See Intel spec. 28.3.3.4 "Guidelines for Use of the INVEPT Instruction".
6146 */
6147 if (u32ProcCtls2 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
6148 pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb = true;
6149
6150 /*
6151 * MSR bitmap.
6152 *
6153 * The MSR bitmap address has already been initialized while setting up the nested-guest
6154 * VMCS, here we need to merge the MSR bitmaps.
6155 */
6156 if (u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
6157 hmR0VmxMergeMsrBitmapNested(pVCpu, pVmcsInfoNstGst, pVmcsInfoGst);
6158
6159 return VINF_SUCCESS;
6160}
6161#endif /* VBOX_WITH_NESTED_HWVIRT_VMX */
6162
6163
6164/**
6165 * Does the preparations before executing guest code in VT-x.
6166 *
6167 * This may cause longjmps to ring-3 and may even result in rescheduling to the
6168 * recompiler/IEM. We must be cautious what we do here regarding committing
6169 * guest-state information into the VMCS assuming we assuredly execute the
6170 * guest in VT-x mode.
6171 *
6172 * If we fall back to the recompiler/IEM after updating the VMCS and clearing
6173 * the common-state (TRPM/forceflags), we must undo those changes so that the
6174 * recompiler/IEM can (and should) use them when it resumes guest execution.
6175 * Otherwise such operations must be done when we can no longer exit to ring-3.
6176 *
6177 * @returns Strict VBox status code (i.e. informational status codes too).
6178 * @retval VINF_SUCCESS if we can proceed with running the guest, interrupts
6179 * have been disabled.
6180 * @retval VINF_VMX_VMEXIT if a nested-guest VM-exit occurs (e.g., while evaluating
6181 * pending events).
6182 * @retval VINF_EM_RESET if a triple-fault occurs while injecting a
6183 * double-fault into the guest.
6184 * @retval VINF_EM_DBG_STEPPED if @a fStepping is true and an event was
6185 * dispatched directly.
6186 * @retval VINF_* scheduling changes, we have to go back to ring-3.
6187 *
6188 * @param pVCpu The cross context virtual CPU structure.
6189 * @param pVmxTransient The VMX-transient structure.
6190 * @param fStepping Whether we are single-stepping the guest in the
6191 * hypervisor debugger. Makes us ignore some of the reasons
6192 * for returning to ring-3, and return VINF_EM_DBG_STEPPED
6193 * if event dispatching took place.
6194 */
6195static VBOXSTRICTRC hmR0VmxPreRunGuest(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, bool fStepping)
6196{
6197 Assert(VMMRZCallRing3IsEnabled(pVCpu));
6198
6199 Log4Func(("fIsNested=%RTbool fStepping=%RTbool\n", pVmxTransient->fIsNestedGuest, fStepping));
6200
6201#ifdef VBOX_WITH_NESTED_HWVIRT_ONLY_IN_IEM
6202 if (pVmxTransient->fIsNestedGuest)
6203 {
6204 RT_NOREF2(pVCpu, fStepping);
6205 Log2Func(("Rescheduling to IEM due to nested-hwvirt or forced IEM exec -> VINF_EM_RESCHEDULE_REM\n"));
6206 return VINF_EM_RESCHEDULE_REM;
6207 }
6208#endif
6209
6210 /*
6211 * Check and process force flag actions, some of which might require us to go back to ring-3.
6212 */
6213 VBOXSTRICTRC rcStrict = vmxHCCheckForceFlags(pVCpu, pVmxTransient->fIsNestedGuest, fStepping);
6214 if (rcStrict == VINF_SUCCESS)
6215 {
6216 /* FFs don't get set all the time. */
6217#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6218 if ( pVmxTransient->fIsNestedGuest
6219 && !CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
6220 {
6221 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchNstGstVmexit);
6222 return VINF_VMX_VMEXIT;
6223 }
6224#endif
6225 }
6226 else
6227 return rcStrict;
6228
6229 /*
6230 * Virtualize memory-mapped accesses to the physical APIC (may take locks).
6231 */
6232 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
6233 if ( !pVCpu->hm.s.vmx.u64GstMsrApicBase
6234 && (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
6235 && PDMHasApic(pVM))
6236 {
6237 /* Get the APIC base MSR from the virtual APIC device. */
6238 uint64_t const uApicBaseMsr = APICGetBaseMsrNoCheck(pVCpu);
6239
6240 /* Map the APIC access page. */
6241 int rc = hmR0VmxMapHCApicAccessPage(pVCpu, uApicBaseMsr & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK);
6242 AssertRCReturn(rc, rc);
6243
6244 /* Update the per-VCPU cache of the APIC base MSR corresponding to the mapped APIC access page. */
6245 pVCpu->hm.s.vmx.u64GstMsrApicBase = uApicBaseMsr;
6246 }
6247
6248#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6249 /*
6250 * Merge guest VMCS controls with the nested-guest VMCS controls.
6251 *
6252 * Even if we have not executed the guest prior to this (e.g. when resuming from a
6253 * saved state), we should be okay with merging controls as we initialize the
6254 * guest VMCS controls as part of VM setup phase.
6255 */
6256 if ( pVmxTransient->fIsNestedGuest
6257 && !pVCpu->hm.s.vmx.fMergedNstGstCtls)
6258 {
6259 int rc = hmR0VmxMergeVmcsNested(pVCpu);
6260 AssertRCReturn(rc, rc);
6261 pVCpu->hm.s.vmx.fMergedNstGstCtls = true;
6262 }
6263#endif
6264
6265 /*
6266 * Evaluate events to be injected into the guest.
6267 *
6268 * Events in TRPM can be injected without inspecting the guest state.
6269 * If any new events (interrupts/NMI) are pending currently, we try to set up the
6270 * guest to cause a VM-exit the next time they are ready to receive the event.
6271 */
6272 if (TRPMHasTrap(pVCpu))
6273 vmxHCTrpmTrapToPendingEvent(pVCpu);
6274
6275 uint32_t fIntrState;
6276 rcStrict = vmxHCEvaluatePendingEvent(pVCpu, pVmxTransient->pVmcsInfo, pVmxTransient->fIsNestedGuest,
6277 &fIntrState);
6278
6279#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6280 /*
6281 * While evaluating pending events if something failed (unlikely) or if we were
6282 * preparing to run a nested-guest but performed a nested-guest VM-exit, we should bail.
6283 */
6284 if (rcStrict != VINF_SUCCESS)
6285 return rcStrict;
6286 if ( pVmxTransient->fIsNestedGuest
6287 && !CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
6288 {
6289 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchNstGstVmexit);
6290 return VINF_VMX_VMEXIT;
6291 }
6292#else
6293 Assert(rcStrict == VINF_SUCCESS);
6294#endif
6295
6296 /*
6297 * Event injection may take locks (currently the PGM lock for real-on-v86 case) and thus
6298 * needs to be done with longjmps or interrupts + preemption enabled. Event injection might
6299 * also result in triple-faulting the VM.
6300 *
6301 * With nested-guests, the above does not apply since unrestricted guest execution is a
6302 * requirement. Regardless, we do this here to avoid duplicating code elsewhere.
6303 */
6304 rcStrict = vmxHCInjectPendingEvent(pVCpu, pVmxTransient->pVmcsInfo, pVmxTransient->fIsNestedGuest,
6305 fIntrState, fStepping);
6306 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
6307 { /* likely */ }
6308 else
6309 {
6310 AssertMsg(rcStrict == VINF_EM_RESET || (rcStrict == VINF_EM_DBG_STEPPED && fStepping),
6311 ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
6312 return rcStrict;
6313 }
6314
6315 /*
6316 * A longjump might result in importing CR3 even for VM-exits that don't necessarily
6317 * import CR3 themselves. We will need to update them here, as even as late as the above
6318 * hmR0VmxInjectPendingEvent() call may lazily import guest-CPU state on demand causing
6319 * the below force flags to be set.
6320 */
6321 if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3))
6322 {
6323 Assert(!(ASMAtomicUoReadU64(&pVCpu->cpum.GstCtx.fExtrn) & CPUMCTX_EXTRN_CR3));
6324 int rc2 = PGMUpdateCR3(pVCpu, CPUMGetGuestCR3(pVCpu));
6325 AssertMsgReturn(rc2 == VINF_SUCCESS || rc2 == VINF_PGM_SYNC_CR3,
6326 ("%Rrc\n", rc2), RT_FAILURE_NP(rc2) ? rc2 : VERR_IPE_UNEXPECTED_INFO_STATUS);
6327 Assert(!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3));
6328 }
6329
6330#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6331 /* Paranoia. */
6332 Assert(!pVmxTransient->fIsNestedGuest || CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx));
6333#endif
6334
6335 /*
6336 * No longjmps to ring-3 from this point on!!!
6337 * Asserts() will still longjmp to ring-3 (but won't return), which is intentional, better than a kernel panic.
6338 * This also disables flushing of the R0-logger instance (if any).
6339 */
6340 VMMRZCallRing3Disable(pVCpu);
6341
6342 /*
6343 * Export the guest state bits.
6344 *
6345 * We cannot perform longjmps while loading the guest state because we do not preserve the
6346 * host/guest state (although the VMCS will be preserved) across longjmps which can cause
6347 * CPU migration.
6348 *
6349 * If we are injecting events to a real-on-v86 mode guest, we would have updated RIP and some segment
6350 * registers. Hence, exporting of the guest state needs to be done -after- injection of events.
6351 */
6352 rcStrict = hmR0VmxExportGuestStateOptimal(pVCpu, pVmxTransient);
6353 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
6354 { /* likely */ }
6355 else
6356 {
6357 VMMRZCallRing3Enable(pVCpu);
6358 return rcStrict;
6359 }
6360
6361 /*
6362 * We disable interrupts so that we don't miss any interrupts that would flag preemption
6363 * (IPI/timers etc.) when thread-context hooks aren't used and we've been running with
6364 * preemption disabled for a while. Since this is purely to aid the
6365 * RTThreadPreemptIsPending() code, it doesn't matter that it may temporarily reenable and
6366 * disable interrupt on NT.
6367 *
6368 * We need to check for force-flags that could've possible been altered since we last
6369 * checked them (e.g. by PDMGetInterrupt() leaving the PDM critical section,
6370 * see @bugref{6398}).
6371 *
6372 * We also check a couple of other force-flags as a last opportunity to get the EMT back
6373 * to ring-3 before executing guest code.
6374 */
6375 pVmxTransient->fEFlags = ASMIntDisableFlags();
6376
6377 if ( ( !VM_FF_IS_ANY_SET(pVM, VM_FF_EMT_RENDEZVOUS | VM_FF_TM_VIRTUAL_SYNC)
6378 && !VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_HM_TO_R3_MASK))
6379 || ( fStepping /* Optimized for the non-stepping case, so a bit of unnecessary work when stepping. */
6380 && !VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_HM_TO_R3_MASK & ~(VMCPU_FF_TIMER | VMCPU_FF_PDM_CRITSECT))) )
6381 {
6382 if (!RTThreadPreemptIsPending(NIL_RTTHREAD))
6383 {
6384#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6385 /*
6386 * If we are executing a nested-guest make sure that we should intercept subsequent
6387 * events. The one we are injecting might be part of VM-entry. This is mainly to keep
6388 * the VM-exit instruction emulation happy.
6389 */
6390 if (pVmxTransient->fIsNestedGuest)
6391 CPUMSetGuestVmxInterceptEvents(&pVCpu->cpum.GstCtx, true);
6392#endif
6393
6394 /*
6395 * We've injected any pending events. This is really the point of no return (to ring-3).
6396 *
6397 * Note! The caller expects to continue with interrupts & longjmps disabled on successful
6398 * returns from this function, so do -not- enable them here.
6399 */
6400 pVCpu->hm.s.Event.fPending = false;
6401 return VINF_SUCCESS;
6402 }
6403
6404 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchPendingHostIrq);
6405 rcStrict = VINF_EM_RAW_INTERRUPT;
6406 }
6407 else
6408 {
6409 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchHmToR3FF);
6410 rcStrict = VINF_EM_RAW_TO_R3;
6411 }
6412
6413 ASMSetFlags(pVmxTransient->fEFlags);
6414 VMMRZCallRing3Enable(pVCpu);
6415
6416 return rcStrict;
6417}
6418
6419
6420/**
6421 * Final preparations before executing guest code using hardware-assisted VMX.
6422 *
6423 * We can no longer get preempted to a different host CPU and there are no returns
6424 * to ring-3. We ignore any errors that may happen from this point (e.g. VMWRITE
6425 * failures), this function is not intended to fail sans unrecoverable hardware
6426 * errors.
6427 *
6428 * @param pVCpu The cross context virtual CPU structure.
6429 * @param pVmxTransient The VMX-transient structure.
6430 *
6431 * @remarks Called with preemption disabled.
6432 * @remarks No-long-jump zone!!!
6433 */
6434static void hmR0VmxPreRunGuestCommitted(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
6435{
6436 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
6437 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
6438 Assert(!pVCpu->hm.s.Event.fPending);
6439
6440 /*
6441 * Indicate start of guest execution and where poking EMT out of guest-context is recognized.
6442 */
6443 VMCPU_ASSERT_STATE(pVCpu, VMCPUSTATE_STARTED_HM);
6444 VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC);
6445
6446 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
6447 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
6448 PHMPHYSCPU pHostCpu = hmR0GetCurrentCpu();
6449 RTCPUID const idCurrentCpu = pHostCpu->idCpu;
6450
6451 if (!CPUMIsGuestFPUStateActive(pVCpu))
6452 {
6453 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatLoadGuestFpuState, x);
6454 if (CPUMR0LoadGuestFPU(pVM, pVCpu) == VINF_CPUM_HOST_CR0_MODIFIED)
6455 pVCpu->hm.s.fCtxChanged |= HM_CHANGED_HOST_CONTEXT;
6456 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatLoadGuestFpuState, x);
6457 STAM_COUNTER_INC(&pVCpu->hm.s.StatLoadGuestFpu);
6458 }
6459
6460 /*
6461 * Re-export the host state bits as we may've been preempted (only happens when
6462 * thread-context hooks are used or when the VM start function changes) or if
6463 * the host CR0 is modified while loading the guest FPU state above.
6464 *
6465 * The 64-on-32 switcher saves the (64-bit) host state into the VMCS and if we
6466 * changed the switcher back to 32-bit, we *must* save the 32-bit host state here,
6467 * see @bugref{8432}.
6468 *
6469 * This may also happen when switching to/from a nested-guest VMCS without leaving
6470 * ring-0.
6471 */
6472 if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_HOST_CONTEXT)
6473 {
6474 hmR0VmxExportHostState(pVCpu);
6475 STAM_COUNTER_INC(&pVCpu->hm.s.StatExportHostState);
6476 }
6477 Assert(!(pVCpu->hm.s.fCtxChanged & HM_CHANGED_HOST_CONTEXT));
6478
6479 /*
6480 * Export the state shared between host and guest (FPU, debug, lazy MSRs).
6481 */
6482 if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE)
6483 hmR0VmxExportSharedState(pVCpu, pVmxTransient);
6484 AssertMsg(!pVCpu->hm.s.fCtxChanged, ("fCtxChanged=%#RX64\n", pVCpu->hm.s.fCtxChanged));
6485
6486 /*
6487 * Store status of the shared guest/host debug state at the time of VM-entry.
6488 */
6489 pVmxTransient->fWasGuestDebugStateActive = CPUMIsGuestDebugStateActive(pVCpu);
6490 pVmxTransient->fWasHyperDebugStateActive = CPUMIsHyperDebugStateActive(pVCpu);
6491
6492 /*
6493 * Always cache the TPR-shadow if the virtual-APIC page exists, thereby skipping
6494 * more than one conditional check. The post-run side of our code shall determine
6495 * if it needs to sync. the virtual APIC TPR with the TPR-shadow.
6496 */
6497 if (pVmcsInfo->pbVirtApic)
6498 pVmxTransient->u8GuestTpr = pVmcsInfo->pbVirtApic[XAPIC_OFF_TPR];
6499
6500 /*
6501 * Update the host MSRs values in the VM-exit MSR-load area.
6502 */
6503 if (!pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs)
6504 {
6505 if (pVmcsInfo->cExitMsrLoad > 0)
6506 hmR0VmxUpdateAutoLoadHostMsrs(pVCpu, pVmcsInfo);
6507 pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs = true;
6508 }
6509
6510 /*
6511 * Evaluate if we need to intercept guest RDTSC/P accesses. Set up the
6512 * VMX-preemption timer based on the next virtual sync clock deadline.
6513 */
6514 if ( !pVmxTransient->fUpdatedTscOffsettingAndPreemptTimer
6515 || idCurrentCpu != pVCpu->hmr0.s.idLastCpu)
6516 {
6517 hmR0VmxUpdateTscOffsettingAndPreemptTimer(pVCpu, pVmxTransient, idCurrentCpu);
6518 pVmxTransient->fUpdatedTscOffsettingAndPreemptTimer = true;
6519 }
6520
6521 /* Record statistics of how often we use TSC offsetting as opposed to intercepting RDTSC/P. */
6522 bool const fIsRdtscIntercepted = RT_BOOL(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_RDTSC_EXIT);
6523 if (!fIsRdtscIntercepted)
6524 STAM_COUNTER_INC(&pVCpu->hm.s.StatTscOffset);
6525 else
6526 STAM_COUNTER_INC(&pVCpu->hm.s.StatTscIntercept);
6527
6528 ASMAtomicUoWriteBool(&pVCpu->hm.s.fCheckedTLBFlush, true); /* Used for TLB flushing, set this across the world switch. */
6529 hmR0VmxFlushTaggedTlb(pHostCpu, pVCpu, pVmcsInfo); /* Invalidate the appropriate guest entries from the TLB. */
6530 Assert(idCurrentCpu == pVCpu->hmr0.s.idLastCpu);
6531 pVCpu->hm.s.vmx.LastError.idCurrentCpu = idCurrentCpu; /* Record the error reporting info. with the current host CPU. */
6532 pVmcsInfo->idHostCpuState = idCurrentCpu; /* Record the CPU for which the host-state has been exported. */
6533 pVmcsInfo->idHostCpuExec = idCurrentCpu; /* Record the CPU on which we shall execute. */
6534
6535 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatEntry, &pVCpu->hm.s.StatInGC, x);
6536
6537 TMNotifyStartOfExecution(pVM, pVCpu); /* Notify TM to resume its clocks when TSC is tied to execution,
6538 as we're about to start executing the guest. */
6539
6540 /*
6541 * Load the guest TSC_AUX MSR when we are not intercepting RDTSCP.
6542 *
6543 * This is done this late as updating the TSC offsetting/preemption timer above
6544 * figures out if we can skip intercepting RDTSCP by calculating the number of
6545 * host CPU ticks till the next virtual sync deadline (for the dynamic case).
6546 */
6547 if ( (pVmcsInfo->u32ProcCtls2 & VMX_PROC_CTLS2_RDTSCP)
6548 && !fIsRdtscIntercepted)
6549 {
6550 vmxHCImportGuestState(pVCpu, pVmcsInfo, CPUMCTX_EXTRN_TSC_AUX);
6551
6552 /* NB: Because we call hmR0VmxAddAutoLoadStoreMsr with fUpdateHostMsr=true,
6553 it's safe even after hmR0VmxUpdateAutoLoadHostMsrs has already been done. */
6554 int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_K8_TSC_AUX, CPUMGetGuestTscAux(pVCpu),
6555 true /* fSetReadWrite */, true /* fUpdateHostMsr */);
6556 AssertRC(rc);
6557 Assert(!pVmxTransient->fRemoveTscAuxMsr);
6558 pVmxTransient->fRemoveTscAuxMsr = true;
6559 }
6560
6561#ifdef VBOX_STRICT
6562 Assert(pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs);
6563 hmR0VmxCheckAutoLoadStoreMsrs(pVCpu, pVmcsInfo, pVmxTransient->fIsNestedGuest);
6564 hmR0VmxCheckHostEferMsr(pVmcsInfo);
6565 AssertRC(vmxHCCheckCachedVmcsCtls(pVCpu, pVmcsInfo, pVmxTransient->fIsNestedGuest));
6566#endif
6567
6568#ifdef HMVMX_ALWAYS_CHECK_GUEST_STATE
6569 /** @todo r=ramshankar: We can now probably use iemVmxVmentryCheckGuestState here.
6570 * Add a PVMXMSRS parameter to it, so that IEM can look at the host MSRs,
6571 * see @bugref{9180#c54}. */
6572 uint32_t const uInvalidReason = hmR0VmxCheckGuestState(pVCpu, pVmcsInfo);
6573 if (uInvalidReason != VMX_IGS_REASON_NOT_FOUND)
6574 Log4(("hmR0VmxCheckGuestState returned %#x\n", uInvalidReason));
6575#endif
6576}
6577
6578
6579/**
6580 * First C routine invoked after running guest code using hardware-assisted VMX.
6581 *
6582 * @param pVCpu The cross context virtual CPU structure.
6583 * @param pVmxTransient The VMX-transient structure.
6584 * @param rcVMRun Return code of VMLAUNCH/VMRESUME.
6585 *
6586 * @remarks Called with interrupts disabled, and returns with interrupts enabled!
6587 *
6588 * @remarks No-long-jump zone!!! This function will however re-enable longjmps
6589 * unconditionally when it is safe to do so.
6590 */
6591static void hmR0VmxPostRunGuest(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, int rcVMRun)
6592{
6593 ASMAtomicUoWriteBool(&pVCpu->hm.s.fCheckedTLBFlush, false); /* See HMInvalidatePageOnAllVCpus(): used for TLB flushing. */
6594 ASMAtomicIncU32(&pVCpu->hmr0.s.cWorldSwitchExits); /* Initialized in vmR3CreateUVM(): used for EMT poking. */
6595 pVCpu->hm.s.fCtxChanged = 0; /* Exits/longjmps to ring-3 requires saving the guest state. */
6596 pVmxTransient->fVmcsFieldsRead = 0; /* Transient fields need to be read from the VMCS. */
6597 pVmxTransient->fVectoringPF = false; /* Vectoring page-fault needs to be determined later. */
6598 pVmxTransient->fVectoringDoublePF = false; /* Vectoring double page-fault needs to be determined later. */
6599
6600 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
6601 if (!(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_RDTSC_EXIT))
6602 {
6603 uint64_t uGstTsc;
6604 if (!pVmxTransient->fIsNestedGuest)
6605 uGstTsc = pVCpu->hmr0.s.uTscExit + pVmcsInfo->u64TscOffset;
6606 else
6607 {
6608 uint64_t const uNstGstTsc = pVCpu->hmr0.s.uTscExit + pVmcsInfo->u64TscOffset;
6609 uGstTsc = CPUMRemoveNestedGuestTscOffset(pVCpu, uNstGstTsc);
6610 }
6611 TMCpuTickSetLastSeen(pVCpu, uGstTsc); /* Update TM with the guest TSC. */
6612 }
6613
6614 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatInGC, &pVCpu->hm.s.StatPreExit, x);
6615 TMNotifyEndOfExecution(pVCpu->CTX_SUFF(pVM), pVCpu, pVCpu->hmr0.s.uTscExit); /* Notify TM that the guest is no longer running. */
6616 VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_HM);
6617
6618 pVCpu->hmr0.s.vmx.fRestoreHostFlags |= VMX_RESTORE_HOST_REQUIRED; /* Some host state messed up by VMX needs restoring. */
6619 pVmcsInfo->fVmcsState |= VMX_V_VMCS_LAUNCH_STATE_LAUNCHED; /* Use VMRESUME instead of VMLAUNCH in the next run. */
6620#ifdef VBOX_STRICT
6621 hmR0VmxCheckHostEferMsr(pVmcsInfo); /* Verify that the host EFER MSR wasn't modified. */
6622#endif
6623 Assert(!ASMIntAreEnabled());
6624 ASMSetFlags(pVmxTransient->fEFlags); /* Enable interrupts. */
6625 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
6626
6627#ifdef HMVMX_ALWAYS_CLEAN_TRANSIENT
6628 /*
6629 * Clean all the VMCS fields in the transient structure before reading
6630 * anything from the VMCS.
6631 */
6632 pVmxTransient->uExitReason = 0;
6633 pVmxTransient->uExitIntErrorCode = 0;
6634 pVmxTransient->uExitQual = 0;
6635 pVmxTransient->uGuestLinearAddr = 0;
6636 pVmxTransient->uExitIntInfo = 0;
6637 pVmxTransient->cbExitInstr = 0;
6638 pVmxTransient->ExitInstrInfo.u = 0;
6639 pVmxTransient->uEntryIntInfo = 0;
6640 pVmxTransient->uEntryXcptErrorCode = 0;
6641 pVmxTransient->cbEntryInstr = 0;
6642 pVmxTransient->uIdtVectoringInfo = 0;
6643 pVmxTransient->uIdtVectoringErrorCode = 0;
6644#endif
6645
6646 /*
6647 * Save the basic VM-exit reason and check if the VM-entry failed.
6648 * See Intel spec. 24.9.1 "Basic VM-exit Information".
6649 */
6650 uint32_t uExitReason;
6651 int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_REASON, &uExitReason);
6652 AssertRC(rc);
6653 pVmxTransient->uExitReason = VMX_EXIT_REASON_BASIC(uExitReason);
6654 pVmxTransient->fVMEntryFailed = VMX_EXIT_REASON_HAS_ENTRY_FAILED(uExitReason);
6655
6656 /*
6657 * Log the VM-exit before logging anything else as otherwise it might be a
6658 * tad confusing what happens before and after the world-switch.
6659 */
6660 HMVMX_LOG_EXIT(pVCpu, uExitReason);
6661
6662 /*
6663 * Remove the TSC_AUX MSR from the auto-load/store MSR area and reset any MSR
6664 * bitmap permissions, if it was added before VM-entry.
6665 */
6666 if (pVmxTransient->fRemoveTscAuxMsr)
6667 {
6668 hmR0VmxRemoveAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_K8_TSC_AUX);
6669 pVmxTransient->fRemoveTscAuxMsr = false;
6670 }
6671
6672 /*
6673 * Check if VMLAUNCH/VMRESUME succeeded.
6674 * If this failed, we cause a guru meditation and cease further execution.
6675 */
6676 if (RT_LIKELY(rcVMRun == VINF_SUCCESS))
6677 {
6678 /*
6679 * Update the VM-exit history array here even if the VM-entry failed due to:
6680 * - Invalid guest state.
6681 * - MSR loading.
6682 * - Machine-check event.
6683 *
6684 * In any of the above cases we will still have a "valid" VM-exit reason
6685 * despite @a fVMEntryFailed being false.
6686 *
6687 * See Intel spec. 26.7 "VM-Entry failures during or after loading guest state".
6688 *
6689 * Note! We don't have CS or RIP at this point. Will probably address that later
6690 * by amending the history entry added here.
6691 */
6692 EMHistoryAddExit(pVCpu, EMEXIT_MAKE_FT(EMEXIT_F_KIND_VMX, pVmxTransient->uExitReason & EMEXIT_F_TYPE_MASK),
6693 UINT64_MAX, pVCpu->hmr0.s.uTscExit);
6694
6695 if (RT_LIKELY(!pVmxTransient->fVMEntryFailed))
6696 {
6697 VMMRZCallRing3Enable(pVCpu);
6698 Assert(!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3));
6699
6700#ifdef HMVMX_ALWAYS_SAVE_RO_GUEST_STATE
6701 hmR0VmxReadAllRoFieldsVmcs(pVmxTransient);
6702#endif
6703
6704 /*
6705 * Always import the guest-interruptibility state as we need it while evaluating
6706 * injecting events on re-entry.
6707 *
6708 * We don't import CR0 (when unrestricted guest execution is unavailable) despite
6709 * checking for real-mode while exporting the state because all bits that cause
6710 * mode changes wrt CR0 are intercepted.
6711 */
6712 uint64_t const fImportMask = CPUMCTX_EXTRN_INHIBIT_INT
6713 | CPUMCTX_EXTRN_INHIBIT_NMI
6714#if defined(HMVMX_ALWAYS_SYNC_FULL_GUEST_STATE) || defined(HMVMX_ALWAYS_SAVE_FULL_GUEST_STATE)
6715 | HMVMX_CPUMCTX_EXTRN_ALL
6716#elif defined(HMVMX_ALWAYS_SAVE_GUEST_RFLAGS)
6717 | CPUMCTX_EXTRN_RFLAGS
6718#endif
6719 ;
6720 rc = vmxHCImportGuestState(pVCpu, pVmcsInfo, fImportMask);
6721 AssertRC(rc);
6722
6723 /*
6724 * Sync the TPR shadow with our APIC state.
6725 */
6726 if ( !pVmxTransient->fIsNestedGuest
6727 && (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_TPR_SHADOW))
6728 {
6729 Assert(pVmcsInfo->pbVirtApic);
6730 if (pVmxTransient->u8GuestTpr != pVmcsInfo->pbVirtApic[XAPIC_OFF_TPR])
6731 {
6732 rc = APICSetTpr(pVCpu, pVmcsInfo->pbVirtApic[XAPIC_OFF_TPR]);
6733 AssertRC(rc);
6734 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_APIC_TPR);
6735 }
6736 }
6737
6738 Assert(VMMRZCallRing3IsEnabled(pVCpu));
6739 Assert( pVmxTransient->fWasGuestDebugStateActive == false
6740 || pVmxTransient->fWasHyperDebugStateActive == false);
6741 return;
6742 }
6743 }
6744#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6745 else if (pVmxTransient->fIsNestedGuest)
6746 AssertMsgFailed(("VMLAUNCH/VMRESUME failed but shouldn't happen when VMLAUNCH/VMRESUME was emulated in IEM!\n"));
6747#endif
6748 else
6749 Log4Func(("VM-entry failure: rcVMRun=%Rrc fVMEntryFailed=%RTbool\n", rcVMRun, pVmxTransient->fVMEntryFailed));
6750
6751 VMMRZCallRing3Enable(pVCpu);
6752}
6753
6754
6755/**
6756 * Runs the guest code using hardware-assisted VMX the normal way.
6757 *
6758 * @returns VBox status code.
6759 * @param pVCpu The cross context virtual CPU structure.
6760 * @param pcLoops Pointer to the number of executed loops.
6761 */
6762static VBOXSTRICTRC hmR0VmxRunGuestCodeNormal(PVMCPUCC pVCpu, uint32_t *pcLoops)
6763{
6764 uint32_t const cMaxResumeLoops = pVCpu->CTX_SUFF(pVM)->hmr0.s.cMaxResumeLoops;
6765 Assert(pcLoops);
6766 Assert(*pcLoops <= cMaxResumeLoops);
6767 Assert(!CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx));
6768
6769#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6770 /*
6771 * Switch to the guest VMCS as we may have transitioned from executing the nested-guest
6772 * without leaving ring-0. Otherwise, if we came from ring-3 we would have loaded the
6773 * guest VMCS while entering the VMX ring-0 session.
6774 */
6775 if (pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs)
6776 {
6777 int rc = vmxHCSwitchToGstOrNstGstVmcs(pVCpu, false /* fSwitchToNstGstVmcs */);
6778 if (RT_SUCCESS(rc))
6779 { /* likely */ }
6780 else
6781 {
6782 LogRelFunc(("Failed to switch to the guest VMCS. rc=%Rrc\n", rc));
6783 return rc;
6784 }
6785 }
6786#endif
6787
6788 VMXTRANSIENT VmxTransient;
6789 RT_ZERO(VmxTransient);
6790 VmxTransient.pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
6791
6792 /* Paranoia. */
6793 Assert(VmxTransient.pVmcsInfo == &pVCpu->hmr0.s.vmx.VmcsInfo);
6794
6795 VBOXSTRICTRC rcStrict = VERR_INTERNAL_ERROR_5;
6796 for (;;)
6797 {
6798 Assert(!HMR0SuspendPending());
6799 HMVMX_ASSERT_CPU_SAFE(pVCpu);
6800 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatEntry, x);
6801
6802 /*
6803 * Preparatory work for running nested-guest code, this may force us to
6804 * return to ring-3.
6805 *
6806 * Warning! This bugger disables interrupts on VINF_SUCCESS!
6807 */
6808 rcStrict = hmR0VmxPreRunGuest(pVCpu, &VmxTransient, false /* fStepping */);
6809 if (rcStrict != VINF_SUCCESS)
6810 break;
6811
6812 /* Interrupts are disabled at this point! */
6813 hmR0VmxPreRunGuestCommitted(pVCpu, &VmxTransient);
6814 int rcRun = hmR0VmxRunGuest(pVCpu, &VmxTransient);
6815 hmR0VmxPostRunGuest(pVCpu, &VmxTransient, rcRun);
6816 /* Interrupts are re-enabled at this point! */
6817
6818 /*
6819 * Check for errors with running the VM (VMLAUNCH/VMRESUME).
6820 */
6821 if (RT_SUCCESS(rcRun))
6822 { /* very likely */ }
6823 else
6824 {
6825 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatPreExit, x);
6826 hmR0VmxReportWorldSwitchError(pVCpu, rcRun, &VmxTransient);
6827 return rcRun;
6828 }
6829
6830 /*
6831 * Profile the VM-exit.
6832 */
6833 AssertMsg(VmxTransient.uExitReason <= VMX_EXIT_MAX, ("%#x\n", VmxTransient.uExitReason));
6834 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitAll);
6835 STAM_COUNTER_INC(&pVCpu->hm.s.aStatExitReason[VmxTransient.uExitReason & MASK_EXITREASON_STAT]);
6836 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatPreExit, &pVCpu->hm.s.StatExitHandling, x);
6837 HMVMX_START_EXIT_DISPATCH_PROF();
6838
6839 VBOXVMM_R0_HMVMX_VMEXIT_NOCTX(pVCpu, &pVCpu->cpum.GstCtx, VmxTransient.uExitReason);
6840
6841 /*
6842 * Handle the VM-exit.
6843 */
6844#ifdef HMVMX_USE_FUNCTION_TABLE
6845 rcStrict = g_aVMExitHandlers[VmxTransient.uExitReason].pfn(pVCpu, &VmxTransient);
6846#else
6847 rcStrict = hmR0VmxHandleExit(pVCpu, &VmxTransient);
6848#endif
6849 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitHandling, x);
6850 if (rcStrict == VINF_SUCCESS)
6851 {
6852 if (++(*pcLoops) <= cMaxResumeLoops)
6853 continue;
6854 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchMaxResumeLoops);
6855 rcStrict = VINF_EM_RAW_INTERRUPT;
6856 }
6857 break;
6858 }
6859
6860 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatEntry, x);
6861 return rcStrict;
6862}
6863
6864
6865#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6866/**
6867 * Runs the nested-guest code using hardware-assisted VMX.
6868 *
6869 * @returns VBox status code.
6870 * @param pVCpu The cross context virtual CPU structure.
6871 * @param pcLoops Pointer to the number of executed loops.
6872 *
6873 * @sa hmR0VmxRunGuestCodeNormal.
6874 */
6875static VBOXSTRICTRC hmR0VmxRunGuestCodeNested(PVMCPUCC pVCpu, uint32_t *pcLoops)
6876{
6877 uint32_t const cMaxResumeLoops = pVCpu->CTX_SUFF(pVM)->hmr0.s.cMaxResumeLoops;
6878 Assert(pcLoops);
6879 Assert(*pcLoops <= cMaxResumeLoops);
6880 Assert(CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx));
6881
6882 /*
6883 * Switch to the nested-guest VMCS as we may have transitioned from executing the
6884 * guest without leaving ring-0. Otherwise, if we came from ring-3 we would have
6885 * loaded the nested-guest VMCS while entering the VMX ring-0 session.
6886 */
6887 if (!pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs)
6888 {
6889 int rc = vmxHCSwitchToGstOrNstGstVmcs(pVCpu, true /* fSwitchToNstGstVmcs */);
6890 if (RT_SUCCESS(rc))
6891 { /* likely */ }
6892 else
6893 {
6894 LogRelFunc(("Failed to switch to the nested-guest VMCS. rc=%Rrc\n", rc));
6895 return rc;
6896 }
6897 }
6898
6899 VMXTRANSIENT VmxTransient;
6900 RT_ZERO(VmxTransient);
6901 VmxTransient.pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
6902 VmxTransient.fIsNestedGuest = true;
6903
6904 /* Paranoia. */
6905 Assert(VmxTransient.pVmcsInfo == &pVCpu->hmr0.s.vmx.VmcsInfoNstGst);
6906
6907 /* Setup pointer so PGM/IEM can query VM-exit auxiliary info. on demand in ring-0. */
6908 pVCpu->hmr0.s.vmx.pVmxTransient = &VmxTransient;
6909
6910 VBOXSTRICTRC rcStrict = VERR_INTERNAL_ERROR_5;
6911 for (;;)
6912 {
6913 Assert(!HMR0SuspendPending());
6914 HMVMX_ASSERT_CPU_SAFE(pVCpu);
6915 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatEntry, x);
6916
6917 /*
6918 * Preparatory work for running guest code, this may force us to
6919 * return to ring-3.
6920 *
6921 * Warning! This bugger disables interrupts on VINF_SUCCESS!
6922 */
6923 rcStrict = hmR0VmxPreRunGuest(pVCpu, &VmxTransient, false /* fStepping */);
6924 if (rcStrict != VINF_SUCCESS)
6925 break;
6926
6927 /* Interrupts are disabled at this point! */
6928 hmR0VmxPreRunGuestCommitted(pVCpu, &VmxTransient);
6929 int rcRun = hmR0VmxRunGuest(pVCpu, &VmxTransient);
6930 hmR0VmxPostRunGuest(pVCpu, &VmxTransient, rcRun);
6931 /* Interrupts are re-enabled at this point! */
6932
6933 /*
6934 * Check for errors with running the VM (VMLAUNCH/VMRESUME).
6935 */
6936 if (RT_SUCCESS(rcRun))
6937 { /* very likely */ }
6938 else
6939 {
6940 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatPreExit, x);
6941 hmR0VmxReportWorldSwitchError(pVCpu, rcRun, &VmxTransient);
6942 rcStrict = rcRun;
6943 break;
6944 }
6945
6946 /* Disabling this since we adjust the shadow page table permissions in (SyncHandlerPte). */
6947#if 0
6948 /*
6949 * Undo temporary disabling of the APIC-access page monitoring we did in hmR0VmxMergeVmcsNested.
6950 * This is needed for NestedTrap0eHandler (and IEM) to cause nested-guest APIC-access VM-exits.
6951 */
6952 if (VmxTransient.pVmcsInfo->u32ProcCtls2 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
6953 {
6954 PVMXVVMCS const pVmcsNstGst = &pVCpu->cpum.GstCtx.hwvirt.vmx.Vmcs;
6955 RTGCPHYS const GCPhysApicAccess = pVmcsNstGst->u64AddrApicAccess.u;
6956 PGMHandlerPhysicalReset(pVCpu->CTX_SUFF(pVM), GCPhysApicAccess);
6957 }
6958#endif
6959
6960 /*
6961 * Profile the VM-exit.
6962 */
6963 AssertMsg(VmxTransient.uExitReason <= VMX_EXIT_MAX, ("%#x\n", VmxTransient.uExitReason));
6964 STAM_COUNTER_INC(&pVCpu->hm.s.StatNestedExitAll);
6965 STAM_COUNTER_INC(&pVCpu->hm.s.aStatNestedExitReason[VmxTransient.uExitReason & MASK_EXITREASON_STAT]);
6966 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatPreExit, &pVCpu->hm.s.StatExitHandling, x);
6967 HMVMX_START_EXIT_DISPATCH_PROF();
6968
6969 VBOXVMM_R0_HMVMX_VMEXIT_NOCTX(pVCpu, &pVCpu->cpum.GstCtx, VmxTransient.uExitReason);
6970
6971 /*
6972 * Handle the VM-exit.
6973 */
6974 rcStrict = vmxHCHandleExitNested(pVCpu, &VmxTransient);
6975 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitHandling, x);
6976 if (rcStrict == VINF_SUCCESS)
6977 {
6978 if (!CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
6979 {
6980 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchNstGstVmexit);
6981 rcStrict = VINF_VMX_VMEXIT;
6982 }
6983 else
6984 {
6985 if (++(*pcLoops) <= cMaxResumeLoops)
6986 continue;
6987 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchMaxResumeLoops);
6988 rcStrict = VINF_EM_RAW_INTERRUPT;
6989 }
6990 }
6991 else
6992 Assert(rcStrict != VINF_VMX_VMEXIT);
6993 break;
6994 }
6995
6996 /* Ensure VM-exit auxiliary info. is no longer available. */
6997 pVCpu->hmr0.s.vmx.pVmxTransient = NULL;
6998
6999 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatEntry, x);
7000 return rcStrict;
7001}
7002#endif /* VBOX_WITH_NESTED_HWVIRT_VMX */
7003
7004
7005/** @name Execution loop for single stepping, DBGF events and expensive Dtrace
7006 * probes.
7007 *
7008 * The following few functions and associated structure contains the bloat
7009 * necessary for providing detailed debug events and dtrace probes as well as
7010 * reliable host side single stepping. This works on the principle of
7011 * "subclassing" the normal execution loop and workers. We replace the loop
7012 * method completely and override selected helpers to add necessary adjustments
7013 * to their core operation.
7014 *
7015 * The goal is to keep the "parent" code lean and mean, so as not to sacrifice
7016 * any performance for debug and analysis features.
7017 *
7018 * @{
7019 */
7020
7021/**
7022 * Single steps guest code using hardware-assisted VMX.
7023 *
7024 * This is -not- the same as the guest single-stepping itself (say using EFLAGS.TF)
7025 * but single-stepping through the hypervisor debugger.
7026 *
7027 * @returns Strict VBox status code (i.e. informational status codes too).
7028 * @param pVCpu The cross context virtual CPU structure.
7029 * @param pcLoops Pointer to the number of executed loops.
7030 *
7031 * @note Mostly the same as hmR0VmxRunGuestCodeNormal().
7032 */
7033static VBOXSTRICTRC hmR0VmxRunGuestCodeDebug(PVMCPUCC pVCpu, uint32_t *pcLoops)
7034{
7035 uint32_t const cMaxResumeLoops = pVCpu->CTX_SUFF(pVM)->hmr0.s.cMaxResumeLoops;
7036 Assert(pcLoops);
7037 Assert(*pcLoops <= cMaxResumeLoops);
7038
7039 VMXTRANSIENT VmxTransient;
7040 RT_ZERO(VmxTransient);
7041 VmxTransient.pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
7042
7043 /* Set HMCPU indicators. */
7044 bool const fSavedSingleInstruction = pVCpu->hm.s.fSingleInstruction;
7045 pVCpu->hm.s.fSingleInstruction = pVCpu->hm.s.fSingleInstruction || DBGFIsStepping(pVCpu);
7046 pVCpu->hmr0.s.fDebugWantRdTscExit = false;
7047 pVCpu->hmr0.s.fUsingDebugLoop = true;
7048
7049 /* State we keep to help modify and later restore the VMCS fields we alter, and for detecting steps. */
7050 VMXRUNDBGSTATE DbgState;
7051 vmxHCRunDebugStateInit(pVCpu, &VmxTransient, &DbgState);
7052 vmxHCPreRunGuestDebugStateUpdate(pVCpu, &VmxTransient, &DbgState);
7053
7054 /*
7055 * The loop.
7056 */
7057 VBOXSTRICTRC rcStrict = VERR_INTERNAL_ERROR_5;
7058 for (;;)
7059 {
7060 Assert(!HMR0SuspendPending());
7061 HMVMX_ASSERT_CPU_SAFE(pVCpu);
7062 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatEntry, x);
7063 bool fStepping = pVCpu->hm.s.fSingleInstruction;
7064
7065 /* Set up VM-execution controls the next two can respond to. */
7066 vmxHCPreRunGuestDebugStateApply(pVCpu, &VmxTransient, &DbgState);
7067
7068 /*
7069 * Preparatory work for running guest code, this may force us to
7070 * return to ring-3.
7071 *
7072 * Warning! This bugger disables interrupts on VINF_SUCCESS!
7073 */
7074 rcStrict = hmR0VmxPreRunGuest(pVCpu, &VmxTransient, fStepping);
7075 if (rcStrict != VINF_SUCCESS)
7076 break;
7077
7078 /* Interrupts are disabled at this point! */
7079 hmR0VmxPreRunGuestCommitted(pVCpu, &VmxTransient);
7080
7081 /* Override any obnoxious code in the above two calls. */
7082 vmxHCPreRunGuestDebugStateApply(pVCpu, &VmxTransient, &DbgState);
7083
7084 /*
7085 * Finally execute the guest.
7086 */
7087 int rcRun = hmR0VmxRunGuest(pVCpu, &VmxTransient);
7088
7089 hmR0VmxPostRunGuest(pVCpu, &VmxTransient, rcRun);
7090 /* Interrupts are re-enabled at this point! */
7091
7092 /* Check for errors with running the VM (VMLAUNCH/VMRESUME). */
7093 if (RT_SUCCESS(rcRun))
7094 { /* very likely */ }
7095 else
7096 {
7097 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatPreExit, x);
7098 hmR0VmxReportWorldSwitchError(pVCpu, rcRun, &VmxTransient);
7099 return rcRun;
7100 }
7101
7102 /* Profile the VM-exit. */
7103 AssertMsg(VmxTransient.uExitReason <= VMX_EXIT_MAX, ("%#x\n", VmxTransient.uExitReason));
7104 STAM_COUNTER_INC(&pVCpu->hm.s.StatDebugExitAll);
7105 STAM_COUNTER_INC(&pVCpu->hm.s.aStatExitReason[VmxTransient.uExitReason & MASK_EXITREASON_STAT]);
7106 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatPreExit, &pVCpu->hm.s.StatExitHandling, x);
7107 HMVMX_START_EXIT_DISPATCH_PROF();
7108
7109 VBOXVMM_R0_HMVMX_VMEXIT_NOCTX(pVCpu, &pVCpu->cpum.GstCtx, VmxTransient.uExitReason);
7110
7111 /*
7112 * Handle the VM-exit - we quit earlier on certain VM-exits, see hmR0VmxHandleExitDebug().
7113 */
7114 rcStrict = vmxHCRunDebugHandleExit(pVCpu, &VmxTransient, &DbgState);
7115 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitHandling, x);
7116 if (rcStrict != VINF_SUCCESS)
7117 break;
7118 if (++(*pcLoops) > cMaxResumeLoops)
7119 {
7120 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchMaxResumeLoops);
7121 rcStrict = VINF_EM_RAW_INTERRUPT;
7122 break;
7123 }
7124
7125 /*
7126 * Stepping: Did the RIP change, if so, consider it a single step.
7127 * Otherwise, make sure one of the TFs gets set.
7128 */
7129 if (fStepping)
7130 {
7131 int rc = hmR0VmxImportGuestState(pVCpu, VmxTransient.pVmcsInfo, CPUMCTX_EXTRN_CS | CPUMCTX_EXTRN_RIP);
7132 AssertRC(rc);
7133 if ( pVCpu->cpum.GstCtx.rip != DbgState.uRipStart
7134 || pVCpu->cpum.GstCtx.cs.Sel != DbgState.uCsStart)
7135 {
7136 rcStrict = VINF_EM_DBG_STEPPED;
7137 break;
7138 }
7139 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_DR7);
7140 }
7141
7142 /*
7143 * Update when dtrace settings changes (DBGF kicks us, so no need to check).
7144 */
7145 if (VBOXVMM_GET_SETTINGS_SEQ_NO() != DbgState.uDtraceSettingsSeqNo)
7146 vmxHCPreRunGuestDebugStateUpdate(pVCpu, &VmxTransient, &DbgState);
7147
7148 /* Restore all controls applied by hmR0VmxPreRunGuestDebugStateApply above. */
7149 rcStrict = vmxHCRunDebugStateRevert(pVCpu, &VmxTransient, &DbgState, rcStrict);
7150 Assert(rcStrict == VINF_SUCCESS);
7151 }
7152
7153 /*
7154 * Clear the X86_EFL_TF if necessary.
7155 */
7156 if (pVCpu->hmr0.s.fClearTrapFlag)
7157 {
7158 int rc = hmR0VmxImportGuestState(pVCpu, VmxTransient.pVmcsInfo, CPUMCTX_EXTRN_RFLAGS);
7159 AssertRC(rc);
7160 pVCpu->hmr0.s.fClearTrapFlag = false;
7161 pVCpu->cpum.GstCtx.eflags.Bits.u1TF = 0;
7162 }
7163 /** @todo there seems to be issues with the resume flag when the monitor trap
7164 * flag is pending without being used. Seen early in bios init when
7165 * accessing APIC page in protected mode. */
7166
7167/** @todo we need to do hmR0VmxRunDebugStateRevert here too, in case we broke
7168 * out of the above loop. */
7169
7170 /* Restore HMCPU indicators. */
7171 pVCpu->hmr0.s.fUsingDebugLoop = false;
7172 pVCpu->hmr0.s.fDebugWantRdTscExit = false;
7173 pVCpu->hm.s.fSingleInstruction = fSavedSingleInstruction;
7174
7175 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatEntry, x);
7176 return rcStrict;
7177}
7178
7179/** @} */
7180
7181
7182/**
7183 * Checks if any expensive dtrace probes are enabled and we should go to the
7184 * debug loop.
7185 *
7186 * @returns true if we should use debug loop, false if not.
7187 */
7188static bool hmR0VmxAnyExpensiveProbesEnabled(void)
7189{
7190 /* It's probably faster to OR the raw 32-bit counter variables together.
7191 Since the variables are in an array and the probes are next to one
7192 another (more or less), we have good locality. So, better read
7193 eight-nine cache lines ever time and only have one conditional, than
7194 128+ conditionals, right? */
7195 return ( VBOXVMM_R0_HMVMX_VMEXIT_ENABLED_RAW() /* expensive too due to context */
7196 | VBOXVMM_XCPT_DE_ENABLED_RAW()
7197 | VBOXVMM_XCPT_DB_ENABLED_RAW()
7198 | VBOXVMM_XCPT_BP_ENABLED_RAW()
7199 | VBOXVMM_XCPT_OF_ENABLED_RAW()
7200 | VBOXVMM_XCPT_BR_ENABLED_RAW()
7201 | VBOXVMM_XCPT_UD_ENABLED_RAW()
7202 | VBOXVMM_XCPT_NM_ENABLED_RAW()
7203 | VBOXVMM_XCPT_DF_ENABLED_RAW()
7204 | VBOXVMM_XCPT_TS_ENABLED_RAW()
7205 | VBOXVMM_XCPT_NP_ENABLED_RAW()
7206 | VBOXVMM_XCPT_SS_ENABLED_RAW()
7207 | VBOXVMM_XCPT_GP_ENABLED_RAW()
7208 | VBOXVMM_XCPT_PF_ENABLED_RAW()
7209 | VBOXVMM_XCPT_MF_ENABLED_RAW()
7210 | VBOXVMM_XCPT_AC_ENABLED_RAW()
7211 | VBOXVMM_XCPT_XF_ENABLED_RAW()
7212 | VBOXVMM_XCPT_VE_ENABLED_RAW()
7213 | VBOXVMM_XCPT_SX_ENABLED_RAW()
7214 | VBOXVMM_INT_SOFTWARE_ENABLED_RAW()
7215 | VBOXVMM_INT_HARDWARE_ENABLED_RAW()
7216 ) != 0
7217 || ( VBOXVMM_INSTR_HALT_ENABLED_RAW()
7218 | VBOXVMM_INSTR_MWAIT_ENABLED_RAW()
7219 | VBOXVMM_INSTR_MONITOR_ENABLED_RAW()
7220 | VBOXVMM_INSTR_CPUID_ENABLED_RAW()
7221 | VBOXVMM_INSTR_INVD_ENABLED_RAW()
7222 | VBOXVMM_INSTR_WBINVD_ENABLED_RAW()
7223 | VBOXVMM_INSTR_INVLPG_ENABLED_RAW()
7224 | VBOXVMM_INSTR_RDTSC_ENABLED_RAW()
7225 | VBOXVMM_INSTR_RDTSCP_ENABLED_RAW()
7226 | VBOXVMM_INSTR_RDPMC_ENABLED_RAW()
7227 | VBOXVMM_INSTR_RDMSR_ENABLED_RAW()
7228 | VBOXVMM_INSTR_WRMSR_ENABLED_RAW()
7229 | VBOXVMM_INSTR_CRX_READ_ENABLED_RAW()
7230 | VBOXVMM_INSTR_CRX_WRITE_ENABLED_RAW()
7231 | VBOXVMM_INSTR_DRX_READ_ENABLED_RAW()
7232 | VBOXVMM_INSTR_DRX_WRITE_ENABLED_RAW()
7233 | VBOXVMM_INSTR_PAUSE_ENABLED_RAW()
7234 | VBOXVMM_INSTR_XSETBV_ENABLED_RAW()
7235 | VBOXVMM_INSTR_SIDT_ENABLED_RAW()
7236 | VBOXVMM_INSTR_LIDT_ENABLED_RAW()
7237 | VBOXVMM_INSTR_SGDT_ENABLED_RAW()
7238 | VBOXVMM_INSTR_LGDT_ENABLED_RAW()
7239 | VBOXVMM_INSTR_SLDT_ENABLED_RAW()
7240 | VBOXVMM_INSTR_LLDT_ENABLED_RAW()
7241 | VBOXVMM_INSTR_STR_ENABLED_RAW()
7242 | VBOXVMM_INSTR_LTR_ENABLED_RAW()
7243 | VBOXVMM_INSTR_GETSEC_ENABLED_RAW()
7244 | VBOXVMM_INSTR_RSM_ENABLED_RAW()
7245 | VBOXVMM_INSTR_RDRAND_ENABLED_RAW()
7246 | VBOXVMM_INSTR_RDSEED_ENABLED_RAW()
7247 | VBOXVMM_INSTR_XSAVES_ENABLED_RAW()
7248 | VBOXVMM_INSTR_XRSTORS_ENABLED_RAW()
7249 | VBOXVMM_INSTR_VMM_CALL_ENABLED_RAW()
7250 | VBOXVMM_INSTR_VMX_VMCLEAR_ENABLED_RAW()
7251 | VBOXVMM_INSTR_VMX_VMLAUNCH_ENABLED_RAW()
7252 | VBOXVMM_INSTR_VMX_VMPTRLD_ENABLED_RAW()
7253 | VBOXVMM_INSTR_VMX_VMPTRST_ENABLED_RAW()
7254 | VBOXVMM_INSTR_VMX_VMREAD_ENABLED_RAW()
7255 | VBOXVMM_INSTR_VMX_VMRESUME_ENABLED_RAW()
7256 | VBOXVMM_INSTR_VMX_VMWRITE_ENABLED_RAW()
7257 | VBOXVMM_INSTR_VMX_VMXOFF_ENABLED_RAW()
7258 | VBOXVMM_INSTR_VMX_VMXON_ENABLED_RAW()
7259 | VBOXVMM_INSTR_VMX_VMFUNC_ENABLED_RAW()
7260 | VBOXVMM_INSTR_VMX_INVEPT_ENABLED_RAW()
7261 | VBOXVMM_INSTR_VMX_INVVPID_ENABLED_RAW()
7262 | VBOXVMM_INSTR_VMX_INVPCID_ENABLED_RAW()
7263 ) != 0
7264 || ( VBOXVMM_EXIT_TASK_SWITCH_ENABLED_RAW()
7265 | VBOXVMM_EXIT_HALT_ENABLED_RAW()
7266 | VBOXVMM_EXIT_MWAIT_ENABLED_RAW()
7267 | VBOXVMM_EXIT_MONITOR_ENABLED_RAW()
7268 | VBOXVMM_EXIT_CPUID_ENABLED_RAW()
7269 | VBOXVMM_EXIT_INVD_ENABLED_RAW()
7270 | VBOXVMM_EXIT_WBINVD_ENABLED_RAW()
7271 | VBOXVMM_EXIT_INVLPG_ENABLED_RAW()
7272 | VBOXVMM_EXIT_RDTSC_ENABLED_RAW()
7273 | VBOXVMM_EXIT_RDTSCP_ENABLED_RAW()
7274 | VBOXVMM_EXIT_RDPMC_ENABLED_RAW()
7275 | VBOXVMM_EXIT_RDMSR_ENABLED_RAW()
7276 | VBOXVMM_EXIT_WRMSR_ENABLED_RAW()
7277 | VBOXVMM_EXIT_CRX_READ_ENABLED_RAW()
7278 | VBOXVMM_EXIT_CRX_WRITE_ENABLED_RAW()
7279 | VBOXVMM_EXIT_DRX_READ_ENABLED_RAW()
7280 | VBOXVMM_EXIT_DRX_WRITE_ENABLED_RAW()
7281 | VBOXVMM_EXIT_PAUSE_ENABLED_RAW()
7282 | VBOXVMM_EXIT_XSETBV_ENABLED_RAW()
7283 | VBOXVMM_EXIT_SIDT_ENABLED_RAW()
7284 | VBOXVMM_EXIT_LIDT_ENABLED_RAW()
7285 | VBOXVMM_EXIT_SGDT_ENABLED_RAW()
7286 | VBOXVMM_EXIT_LGDT_ENABLED_RAW()
7287 | VBOXVMM_EXIT_SLDT_ENABLED_RAW()
7288 | VBOXVMM_EXIT_LLDT_ENABLED_RAW()
7289 | VBOXVMM_EXIT_STR_ENABLED_RAW()
7290 | VBOXVMM_EXIT_LTR_ENABLED_RAW()
7291 | VBOXVMM_EXIT_GETSEC_ENABLED_RAW()
7292 | VBOXVMM_EXIT_RSM_ENABLED_RAW()
7293 | VBOXVMM_EXIT_RDRAND_ENABLED_RAW()
7294 | VBOXVMM_EXIT_RDSEED_ENABLED_RAW()
7295 | VBOXVMM_EXIT_XSAVES_ENABLED_RAW()
7296 | VBOXVMM_EXIT_XRSTORS_ENABLED_RAW()
7297 | VBOXVMM_EXIT_VMM_CALL_ENABLED_RAW()
7298 | VBOXVMM_EXIT_VMX_VMCLEAR_ENABLED_RAW()
7299 | VBOXVMM_EXIT_VMX_VMLAUNCH_ENABLED_RAW()
7300 | VBOXVMM_EXIT_VMX_VMPTRLD_ENABLED_RAW()
7301 | VBOXVMM_EXIT_VMX_VMPTRST_ENABLED_RAW()
7302 | VBOXVMM_EXIT_VMX_VMREAD_ENABLED_RAW()
7303 | VBOXVMM_EXIT_VMX_VMRESUME_ENABLED_RAW()
7304 | VBOXVMM_EXIT_VMX_VMWRITE_ENABLED_RAW()
7305 | VBOXVMM_EXIT_VMX_VMXOFF_ENABLED_RAW()
7306 | VBOXVMM_EXIT_VMX_VMXON_ENABLED_RAW()
7307 | VBOXVMM_EXIT_VMX_VMFUNC_ENABLED_RAW()
7308 | VBOXVMM_EXIT_VMX_INVEPT_ENABLED_RAW()
7309 | VBOXVMM_EXIT_VMX_INVVPID_ENABLED_RAW()
7310 | VBOXVMM_EXIT_VMX_INVPCID_ENABLED_RAW()
7311 | VBOXVMM_EXIT_VMX_EPT_VIOLATION_ENABLED_RAW()
7312 | VBOXVMM_EXIT_VMX_EPT_MISCONFIG_ENABLED_RAW()
7313 | VBOXVMM_EXIT_VMX_VAPIC_ACCESS_ENABLED_RAW()
7314 | VBOXVMM_EXIT_VMX_VAPIC_WRITE_ENABLED_RAW()
7315 ) != 0;
7316}
7317
7318
7319/**
7320 * Runs the guest using hardware-assisted VMX.
7321 *
7322 * @returns Strict VBox status code (i.e. informational status codes too).
7323 * @param pVCpu The cross context virtual CPU structure.
7324 */
7325VMMR0DECL(VBOXSTRICTRC) VMXR0RunGuestCode(PVMCPUCC pVCpu)
7326{
7327 AssertPtr(pVCpu);
7328 PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
7329 Assert(VMMRZCallRing3IsEnabled(pVCpu));
7330 Assert(!ASMAtomicUoReadU64(&pCtx->fExtrn));
7331 HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
7332
7333 VBOXSTRICTRC rcStrict;
7334 uint32_t cLoops = 0;
7335 for (;;)
7336 {
7337#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
7338 bool const fInNestedGuestMode = CPUMIsGuestInVmxNonRootMode(pCtx);
7339#else
7340 NOREF(pCtx);
7341 bool const fInNestedGuestMode = false;
7342#endif
7343 if (!fInNestedGuestMode)
7344 {
7345 if ( !pVCpu->hm.s.fUseDebugLoop
7346 && (!VBOXVMM_ANY_PROBES_ENABLED() || !hmR0VmxAnyExpensiveProbesEnabled())
7347 && !DBGFIsStepping(pVCpu)
7348 && !pVCpu->CTX_SUFF(pVM)->dbgf.ro.cEnabledInt3Breakpoints)
7349 rcStrict = hmR0VmxRunGuestCodeNormal(pVCpu, &cLoops);
7350 else
7351 rcStrict = hmR0VmxRunGuestCodeDebug(pVCpu, &cLoops);
7352 }
7353#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
7354 else
7355 rcStrict = hmR0VmxRunGuestCodeNested(pVCpu, &cLoops);
7356
7357 if (rcStrict == VINF_VMX_VMLAUNCH_VMRESUME)
7358 {
7359 Assert(CPUMIsGuestInVmxNonRootMode(pCtx));
7360 continue;
7361 }
7362 if (rcStrict == VINF_VMX_VMEXIT)
7363 {
7364 Assert(!CPUMIsGuestInVmxNonRootMode(pCtx));
7365 continue;
7366 }
7367#endif
7368 break;
7369 }
7370
7371 int const rcLoop = VBOXSTRICTRC_VAL(rcStrict);
7372 switch (rcLoop)
7373 {
7374 case VERR_EM_INTERPRETER: rcStrict = VINF_EM_RAW_EMULATE_INSTR; break;
7375 case VINF_EM_RESET: rcStrict = VINF_EM_TRIPLE_FAULT; break;
7376 }
7377
7378 int rc2 = hmR0VmxExitToRing3(pVCpu, rcStrict);
7379 if (RT_FAILURE(rc2))
7380 {
7381 pVCpu->hm.s.u32HMError = (uint32_t)VBOXSTRICTRC_VAL(rcStrict);
7382 rcStrict = rc2;
7383 }
7384 Assert(!ASMAtomicUoReadU64(&pCtx->fExtrn));
7385 Assert(!VMMR0AssertionIsNotificationSet(pVCpu));
7386 return rcStrict;
7387}
7388
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette